In [44]:
#Dependencies
import pandas as pd
import numpy as np

In [45]:
#File to load
school_csvpath = "Resources/schools_complete.csv"
students_csvpath = "Resources/students_complete.csv"

In [46]:
#Read School and Student Data File and store into Pandas Dataframes
school_data = pd.read_csv(school_csvpath)
student_data = pd.read_csv(students_csvpath)

In [47]:
#combine the data into a single dataset.
all_data = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])


In [48]:
#calculate the total # of schools
grouped_school_names = all_data.groupby(["school_name"])
total_schools = len(grouped_school_names)
total_schools

15

In [49]:
#calculate the total # of students
total_students = len(all_data["Student ID"])
total_students

39170

In [50]:
#calculate the total budget
total_budget = school_data["budget"].sum()
total_budget

24649428

In [51]:
#calculate the average math score
math_score = student_data["math_score"].mean()
math_score

78.98537145774827

In [52]:
#calculate the average reading score
reading_score = student_data["reading_score"].mean()
reading_score

81.87784018381414

In [53]:
#calculate the percentage of students with a passing math score
good_math_students = all_data.loc[all_data["math_score"]>=70]
total_good_math = len(good_math_students)
percentage_passing_math = (total_good_math/total_students)*100
percentage_passing_math

74.9808526933878

In [54]:
#calculate the percentage of students with a passing reading score
good_readers = all_data.loc[all_data["reading_score"]>=70]
total_good_readers = len(good_readers)
percentage_passing_reading = (total_good_readers/total_students)*100
percentage_passing_reading

85.80546336482001

In [55]:
#calculate the percentage of students who passed math and reading
overall_passing = all_data[(all_data["reading_score"]>=70) & (all_data["math_score"]>=70)]["student_name"].count()
overall_passing_percentage = (overall_passing/total_students)*100
overall_passing_percentage

65.17232575950983

In [56]:
#create a dataframe to hold the results
district_summary = pd.DataFrame(
    {"Total Schools":[total_schools],
                                "Total Students":[total_students],
                                "Total Budget":[total_budget],
                                "Average Math Score":[math_score],
                                "Average Reading Score":[reading_score],
                                "% Passing Math":[percentage_passing_math],
                                "% Passing Reading":[percentage_passing_reading],
                                "% Overall Passing":[overall_passing_percentage]
    })
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


In [107]:
#create an overview table that summarizes:
#school name
by_school = all_data.groupby(["school_name","type"])
by_school

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001F22D95D640>

In [109]:
#total students
students_per_school = by_school["Student ID"].count()
students_per_school

school_name            type    
Bailey High School     District    4976
Cabrera High School    Charter     1858
Figueroa High School   District    2949
Ford High School       District    2739
Griffin High School    Charter     1468
Hernandez High School  District    4635
Holden High School     Charter      427
Huang High School      District    2917
Johnson High School    District    4761
Pena High School       Charter      962
Rodriguez High School  District    3999
Shelton High School    Charter     1761
Thomas High School     Charter     1635
Wilson High School     Charter     2283
Wright High School     Charter     1800
Name: Student ID, dtype: int64

In [111]:
#total school budget
budget_per_school = by_school["budget"].mean()
budget_per_school

school_name            type    
Bailey High School     District    3124928
Cabrera High School    Charter     1081356
Figueroa High School   District    1884411
Ford High School       District    1763916
Griffin High School    Charter      917500
Hernandez High School  District    3022020
Holden High School     Charter      248087
Huang High School      District    1910635
Johnson High School    District    3094650
Pena High School       Charter      585858
Rodriguez High School  District    2547363
Shelton High School    Charter     1056600
Thomas High School     Charter     1043130
Wilson High School     Charter     1319574
Wright High School     Charter     1049400
Name: budget, dtype: int64

In [113]:
#calculate per student budget
per_student_budget = (budget_per_school/students_per_school)
per_student_budget

school_name            type    
Bailey High School     District    628.0
Cabrera High School    Charter     582.0
Figueroa High School   District    639.0
Ford High School       District    644.0
Griffin High School    Charter     625.0
Hernandez High School  District    652.0
Holden High School     Charter     581.0
Huang High School      District    655.0
Johnson High School    District    650.0
Pena High School       Charter     609.0
Rodriguez High School  District    637.0
Shelton High School    Charter     600.0
Thomas High School     Charter     638.0
Wilson High School     Charter     578.0
Wright High School     Charter     583.0
dtype: float64

In [115]:
#calculate average math score
math_score2 = by_school["math_score"].mean()
math_score2

school_name            type    
Bailey High School     District    77.048432
Cabrera High School    Charter     83.061895
Figueroa High School   District    76.711767
Ford High School       District    77.102592
Griffin High School    Charter     83.351499
Hernandez High School  District    77.289752
Holden High School     Charter     83.803279
Huang High School      District    76.629414
Johnson High School    District    77.072464
Pena High School       Charter     83.839917
Rodriguez High School  District    76.842711
Shelton High School    Charter     83.359455
Thomas High School     Charter     83.418349
Wilson High School     Charter     83.274201
Wright High School     Charter     83.682222
Name: math_score, dtype: float64

In [116]:
#calculate average reading score
reading_score2 = by_school["reading_score"].mean()
reading_score2

school_name            type    
Bailey High School     District    81.033963
Cabrera High School    Charter     83.975780
Figueroa High School   District    81.158020
Ford High School       District    80.746258
Griffin High School    Charter     83.816757
Hernandez High School  District    80.934412
Holden High School     Charter     83.814988
Huang High School      District    81.182722
Johnson High School    District    80.966394
Pena High School       Charter     84.044699
Rodriguez High School  District    80.744686
Shelton High School    Charter     83.725724
Thomas High School     Charter     83.848930
Wilson High School     Charter     83.989488
Wright High School     Charter     83.955000
Name: reading_score, dtype: float64

In [118]:
#calculate % passing math
good_math_students2 = all_data[all_data["math_score"]>=70].groupby("school_name")["Student ID"].count()/students_per_school
percent_passing_math2 = good_math_students2 * 100
percent_passing_math2

school_name            type    
Bailey High School     District    66.680064
Cabrera High School    Charter     94.133477
Figueroa High School   District    65.988471
Ford High School       District    68.309602
Griffin High School    Charter     93.392371
Hernandez High School  District    66.752967
Holden High School     Charter     92.505855
Huang High School      District    65.683922
Johnson High School    District    66.057551
Pena High School       Charter     94.594595
Rodriguez High School  District    66.366592
Shelton High School    Charter     93.867121
Thomas High School     Charter     93.272171
Wilson High School     Charter     93.867718
Wright High School     Charter     93.333333
Name: Student ID, dtype: float64

In [119]:
#calcaulate % passing reading
good_readers2 = all_data[all_data["reading_score"]>=70].groupby("school_name")["Student ID"].count()/students_per_school
percent_passing_reading2 = good_readers2 * 100
percent_passing_reading2

school_name            type    
Bailey High School     District    81.933280
Cabrera High School    Charter     97.039828
Figueroa High School   District    80.739234
Ford High School       District    79.299014
Griffin High School    Charter     97.138965
Hernandez High School  District    80.862999
Holden High School     Charter     96.252927
Huang High School      District    81.316421
Johnson High School    District    81.222432
Pena High School       Charter     95.945946
Rodriguez High School  District    80.220055
Shelton High School    Charter     95.854628
Thomas High School     Charter     97.308869
Wilson High School     Charter     96.539641
Wright High School     Charter     96.611111
Name: Student ID, dtype: float64

In [120]:
#calculate overall % passing 
overall_passing2 = all_data[(all_data["reading_score"]>=70) & (all_data["math_score"]>=70)].groupby("school_name")["Student ID"].count()/students_per_school
overall_percentage_passing2 = overall_passing2 * 100
overall_percentage_passing2

school_name            type    
Bailey High School     District    54.642283
Cabrera High School    Charter     91.334769
Figueroa High School   District    53.204476
Ford High School       District    54.289887
Griffin High School    Charter     90.599455
Hernandez High School  District    53.527508
Holden High School     Charter     89.227166
Huang High School      District    53.513884
Johnson High School    District    53.539172
Pena High School       Charter     90.540541
Rodriguez High School  District    52.988247
Shelton High School    Charter     89.892107
Thomas High School     Charter     90.948012
Wilson High School     Charter     90.582567
Wright High School     Charter     90.333333
Name: Student ID, dtype: float64

In [144]:
#Create a dataframe to hold the above results
school_summary = pd.DataFrame(
    {"Total Students":[students_per_school],
     "Total School Budget":[budget_per_school],
     "Average Math Score":[math_score2],
     "Average Reading Score":[reading_score2],
     "% Passing Math":[percent_passing_math2],
     "% Passing Reading":[percent_passing_reading2],
     "% Overall Passing":[overall_percentage_passing2]
    })

school_summary

Unnamed: 0,Total Students,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,school_name type Bailey High Sc...,school_name type Bailey High Sc...,school_name type Bailey High Sc...,school_name type Bailey High Sc...,school_name type Bailey High Sc...,school_name type Bailey High Sc...,school_name type Bailey High Sc...
