In [1]:
#Dependencies
import pandas as pd

In [2]:
#Reading in schools csv
schools = "Resources/raw_data/schools_complete.csv"
schools_read = pd.read_csv(schools)
schools_read

Unnamed: 0,School ID,name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500
5,5,Wilson High School,Charter,2283,1319574
6,6,Cabrera High School,Charter,1858,1081356
7,7,Bailey High School,District,4976,3124928
8,8,Holden High School,Charter,427,248087
9,9,Pena High School,Charter,962,585858


In [3]:
#Reading in students csv
students = "Resources/raw_data/students_complete.csv"
students_read = pd.read_csv(students)
students_read.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [4]:
#creating the data frames
students_df = pd.DataFrame(students_read)
schools_df = pd.DataFrame(schools_read)

In [5]:
#Merging the data frames
student_school = pd.merge(students_df, schools_df, on = "name", how = 'outer')
student_school.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score,School ID,type,size,budget
0,0.0,Paul Bradley,M,9th,Huang High School,66.0,79.0,,,,
1,1.0,Victor Smith,M,12th,Huang High School,94.0,61.0,,,,
2,19839.0,Victor Smith,M,9th,Bailey High School,76.0,93.0,,,,
3,27649.0,Victor Smith,M,11th,Rodriguez High School,91.0,57.0,,,,
4,29947.0,Victor Smith,M,11th,Rodriguez High School,96.0,64.0,,,,


In [6]:
# total number of students
total_students = student_school['Student ID'].count()
total_students

39170

In [7]:
# total number of schools
num_schools = student_school["School ID"].count()
num_schools

15

In [8]:
# total budget of schools
budget_schools = student_school["budget"].sum()
budget_schools

24649428.0

In [9]:
# average math scores
avg_math_score = student_school['math_score'].mean()
avg_math_score

78.98537145774827

In [10]:
# average reading score
avg_read_score = student_school['reading_score'].mean()
avg_read_score

81.87784018381414

In [11]:
# total students passing math
# gets data frame to change the view of the 'math score' column to passing value
total_passing_math = student_school.loc[student_school['math_score'] > 69,:]
# stores the passing value number of students as a variable
num_passing_math = total_passing_math['Student ID'].count()
num_passing_math

# calculating the percentage of students passing math
percent_pass_math = num_passing_math / total_students
percent_pass_math

# getting to display as a full percent
percent_pass_math_final = percent_pass_math * 100
percent_pass_math_final.round(2)

74.98

In [12]:
# total students passing reading
# gets data frame to change the view of the 'reading score' column to passing value
total_passing_reading = student_school.loc[student_school['reading_score'] > 69,:]
# stores the passing value number of students as a variable
num_passing_reading = total_passing_reading['Student ID'].count()
num_passing_reading

# calculating the percentage of students passing reading
percent_pass_reading = num_passing_reading / total_students
percent_pass_reading

# getting to display as a full percent
percent_pass_reading_final = percent_pass_reading * 100
percent_pass_reading_final.round(2)

85.81

In [13]:
# overall passing rate of both reading and math (Average of those passing reading and math)
overall_passing_rate = ((percent_pass_math + percent_pass_reading) / 2) *100
overall_passing_rate.round(2)

80.39

In [14]:
# creating the 'District Summary'
district_table = pd.DataFrame({'Total Schools': [num_schools],
                              'Total Students': [total_students],
                              'Total Budget': [budget_schools],
                             'Average Math Scores': [avg_math_score],
                               'Average Reading Scores': [avg_read_score],
                               'Percent Passing Math': [percent_pass_math_final],
                               'Percent Passing Reading': [percent_pass_reading_final]
                              })
district_table = district_table[['Total Schools',
                                'Total Students',
                                'Total Budget',
                                'Average Math Scores',
                                'Average Reading Scores',
                                'Percent Passing Math',
                                'Percent Passing Reading']]
district_table = district_table.round(2)
district_table

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Scores,Average Reading Scores,Percent Passing Math,Percent Passing Reading
0,15,39170,24649428.0,78.99,81.88,74.98,85.81


In [15]:
#formatting 'District Summary'
district_table['Total Students'] = district_table['Total Students'].map("{0:,}".format)
district_table['Total Budget'] = district_table['Total Budget'].map("${0:,.2f}".format)
district_table['Percent Passing Math'] = district_table['Percent Passing Math'].map("{0:,.2f}%".format)
district_table['Percent Passing Reading'] = district_table['Percent Passing Reading'].map("{0:,.2f}%".format)
district_table

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Scores,Average Reading Scores,Percent Passing Math,Percent Passing Reading
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%
