In [20]:
# Import the Pandas library
import pandas as pd
import numpy

In [21]:
# Create a reference to the school and student files
school_path = "schools_complete.csv"
student_path = "students_complete.csv"

In [22]:
# read both of these files to create data frames
school_df = pd.read_csv(school_path)
student_df = pd.read_csv(student_path)

# we will frequently need to refer to passing rates
student_df["math_passed"] = student_df["math_score"]>=70
student_df["read_passed"] = student_df["reading_score"] >=70

# tasks will be easier if these are merged into a single data frame
#complete_df = pd.merge(school_df,student_df, on="school_name")

#complete_df.head()

In [23]:
# create high-level snapshot of district's key metrics

# first, let's make sure we have the values right
tot_schools = school_df["school_name"].count()

tot_students = student_df["Student ID"].count()

tot_budget = school_df["budget"].sum()

avg_mathscore = student_df["math_score"].mean()

avg_readscore = student_df["reading_score"].mean()

avg_mathpass = student_df["math_passed"].mean() * 100

avg_readpass = student_df["read_passed"].mean() * 100

avg_allpass = (avg_mathpass + avg_readpass) / 2



In [24]:
disp_tbl1_df = pd.DataFrame([(tot_schools, tot_students, tot_budget, avg_mathscore, avg_readscore, avg_mathpass, avg_readpass, avg_allpass)],
                            columns=["Total Schools","Total Students","Total Budget","Average Math Score","Average Reading Score","% Passing Math","% Passing Reading","% Overall Passing Rate"])

display(disp_tbl1_df)

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,80.393158


In [None]:
# create per-school of key metrics
groupby_school_df = student_df.groupby(["school_name"])


s1 = pd.DataFrame(groupby_school_df[("Student ID")].count())
#  although as it happens, the count of individual students for each school matches the school_df value
#display(s1)

s2 = pd.DataFrame(groupby_school_df[["math_score","reading_score","math_passed","read_passed"]].mean())
#display(s2)

disp_tbl2_df = pd.merge(s1,s2, on=["school_name"])
disp_tbl2_df = pd.merge(disp_tbl2_df,school_df, on=["school_name"])

In [55]:
# rename column to more accurately reflect what it is
disp_tbl2_df = disp_tbl2_df.rename(
    columns={"Student ID":"Total Students",
             "school_name":"School Name",
             "math_score":"Average Math Score",
             "reading_score":"Average Reading Score",
            })

# add computed columns
disp_tbl2_df['Budget Per Student'] = disp_tbl2_df.apply(lambda row: row["budget"] / row["Total Students"], axis=1)
disp_tbl2_df["Pct Passed Math"] = disp_tbl2_df.apply(lambda row: row["math_passed"] * 100, axis=1)
disp_tbl2_df["Pct Passed Reading"] = disp_tbl2_df.apply(lambda row: row["read_passed"] * 100, axis=1)
disp_tbl2_df["Overall Passing Rate"] = disp_tbl2_df.apply(lambda row: (row["Pct Passed Math"] + row["Pct Passed Reading"]) / 2, axis=1)

# delete extraneous columns
del disp_tbl2_df["School ID"]
del disp_tbl2_df["size"]
del disp_tbl2_df["math_passed"]
del disp_tbl2_df["read_passed"]

# re-order the columns according to spec:
xyz = ["School Name","type","Total Students","budget","Budget Per Student","Average Math Score","Average Reading Score",
       "Pct Passed Math","Pct Passed Reading","Overall Passing Rate"]
disp_tbl2_df = disp_tbl2_df[xyz]

display(disp_tbl2_df)

Unnamed: 0,School Name,type,Total Students,budget,Budget Per Student,Average Math Score,Average Reading Score,Pct Passed Math,Pct Passed Reading,Overall Passing Rate
0,Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,74.306672
1,Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,95.586652
2,Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,73.363852
3,Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,73.804308
4,Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,95.265668
5,Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,73.807983
6,Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,94.379391
7,Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,73.500171
8,Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,73.639992
9,Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,95.27027


In [57]:
# Present Top Performing Schools according to Passing Rate
disp_tbl2_df = disp_tbl2_df.sort_values("Overall Passing Rate",ascending=False)
display(disp_tbl2_df.head())

Unnamed: 0,School Name,type,Total Students,budget,Budget Per Student,Average Math Score,Average Reading Score,Pct Passed Math,Pct Passed Reading,Overall Passing Rate
1,Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,95.586652
12,Thomas High School,Charter,1635,1043130,638.0,83.418349,83.84893,93.272171,97.308869,95.29052
9,Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,95.27027
4,Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,95.265668
13,Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641,95.203679


In [58]:
# Present Bottom Performing Schools according to Passing Rate
disp_tbl2_df = disp_tbl2_df.sort_values("Overall Passing Rate")
display(disp_tbl2_df.head())

Unnamed: 0,School Name,type,Total Students,budget,Budget Per Student,Average Math Score,Average Reading Score,Pct Passed Math,Pct Passed Reading,Overall Passing Rate
10,Rodriguez High School,District,3999,2547363,637.0,76.842711,80.744686,66.366592,80.220055,73.293323
2,Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,73.363852
7,Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,73.500171
8,Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,73.639992
3,Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,73.804308
