In [1]:
# Import the Pandas library
import pandas as pd
import numpy as np

In [2]:
# Create a reference to the school and student files
school_path = "schools_complete.csv"
student_path = "students_complete.csv"

In [3]:
# read both of these files to create data frames
school_df = pd.read_csv(school_path)
student_df = pd.read_csv(student_path)

# we will frequently need to refer to passing rates
student_df["math_passed"] = student_df["math_score"]>=70
student_df["read_passed"] = student_df["reading_score"] >=70
# these add boolean values, but can be operated on like numbers with TRUE=1 and FALSE=0

# rename some columns - for no reason other than to make the display look a bit nicer
# do this here and we won't have to do it separately for each display table
student_df = student_df.rename(
    columns={"math_score":"Average Math Score",
             "reading_score":"Average Reading Score",
            })


display(school_df)
student_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500
5,5,Wilson High School,Charter,2283,1319574
6,6,Cabrera High School,Charter,1858,1081356
7,7,Bailey High School,District,4976,3124928
8,8,Holden High School,Charter,427,248087
9,9,Pena High School,Charter,962,585858


Unnamed: 0,Student ID,student_name,gender,grade,school_name,Average Reading Score,Average Math Score,math_passed,read_passed
0,0,Paul Bradley,M,9th,Huang High School,66,79,True,False
1,1,Victor Smith,M,12th,Huang High School,94,61,False,True
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,False,True
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,False,False
4,4,Bonnie Ray,F,9th,Huang High School,97,84,True,True


In [4]:
# create high-level snapshot of district's key metrics

# first, let's make sure we have the values right
tot_schools = school_df["school_name"].count()

tot_students = student_df["Student ID"].count()

tot_budget = school_df["budget"].sum()

avg_mathscore = student_df["Average Math Score"].mean()

avg_readscore = student_df["Average Reading Score"].mean()

avg_mathpass = student_df["math_passed"].mean() * 100

avg_readpass = student_df["read_passed"].mean() * 100

avg_allpass = (avg_mathpass + avg_readpass) / 2



In [5]:
# assemble our summary data into a data frame for display
disp_tbl1_df = pd.DataFrame([(tot_schools, tot_students, tot_budget, avg_mathscore, avg_readscore, avg_mathpass, avg_readpass, avg_allpass)],
                            columns=["Total Schools","Total Students","Total Budget","Average Math Score","Average Reading Score","% Passing Math","% Passing Reading","% Overall Passing Rate"])

# add formatting
disp_tbl1_df["Total Students"] = disp_tbl1_df["Total Students"].map("{:,}".format)
disp_tbl1_df["Total Budget"] = disp_tbl1_df["Total Budget"].map("${:,}".format)
disp_tbl1_df["Average Math Score"] = disp_tbl1_df["Average Math Score"].map("{:,.2f}".format)
disp_tbl1_df["Average Reading Score"] = disp_tbl1_df["Average Reading Score"].map("{:,.2f}".format)
disp_tbl1_df["% Passing Math"] = disp_tbl1_df["% Passing Math"].map("{:,.2f}%".format)
disp_tbl1_df["% Passing Reading"] = disp_tbl1_df["% Passing Reading"].map("{:,.2f}%".format)
disp_tbl1_df["% Overall Passing Rate"] = disp_tbl1_df["% Overall Passing Rate"].map("{:,.2f}%".format)



print("District Summary")
display(disp_tbl1_df)

District Summary


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428",78.99,81.88,74.98%,85.81%,80.39%


In [6]:
# create per-school of key metrics
groupby_school_df = student_df.groupby(["school_name"])

s1 = pd.DataFrame(groupby_school_df[("Student ID")].count())
#  although as it happens, the count of individual students for each school matches the school_df value

s2 = pd.DataFrame(groupby_school_df[["Average Math Score","Average Reading Score","math_passed","read_passed"]].mean())

# merge the above two, as well as the school df from which we will use school type and budget
disp_tbl2_df = pd.merge(s1,s2, on=["school_name"])
disp_tbl2_df = pd.merge(disp_tbl2_df,school_df, on=["school_name"])

disp_tbl2_df.head()

Unnamed: 0,school_name,Student ID,Average Math Score,Average Reading Score,math_passed,read_passed,School ID,type,size,budget
0,Bailey High School,4976,77.048432,81.033963,0.666801,0.819333,7,District,4976,3124928
1,Cabrera High School,1858,83.061895,83.97578,0.941335,0.970398,6,Charter,1858,1081356
2,Figueroa High School,2949,76.711767,81.15802,0.659885,0.807392,1,District,2949,1884411
3,Ford High School,2739,77.102592,80.746258,0.683096,0.79299,13,District,2739,1763916
4,Griffin High School,1468,83.351499,83.816757,0.933924,0.97139,4,Charter,1468,917500


In [7]:
# rename columns to more accurately reflect what they represent
disp_tbl2_df = disp_tbl2_df.rename(
    columns={"Student ID":"Total Students",
             "school_name":"School Name",
             "reading_score":"Average Reading Score",
            })

# add computed columns
disp_tbl2_df['Budget Per Student'] = disp_tbl2_df.apply(lambda row: row["budget"] / row["Total Students"], axis=1)
disp_tbl2_df["Pct Passed Math"] = disp_tbl2_df.apply(lambda row: row["math_passed"] * 100, axis=1)
disp_tbl2_df["Pct Passed Reading"] = disp_tbl2_df.apply(lambda row: row["read_passed"] * 100, axis=1)
disp_tbl2_df["Overall Passing Rate"] = disp_tbl2_df.apply(lambda row: (row["Pct Passed Math"] + row["Pct Passed Reading"]) / 2, axis=1)

In [8]:
# delete extraneous columns
del disp_tbl2_df["School ID"]
del disp_tbl2_df["size"]
del disp_tbl2_df["math_passed"]
del disp_tbl2_df["read_passed"]

# re-order the columns according to spec:
xyz = ["School Name","type","Total Students","budget","Budget Per Student","Average Math Score","Average Reading Score",
       "Pct Passed Math","Pct Passed Reading","Overall Passing Rate"]
disp_tbl2_df = disp_tbl2_df[xyz]

In [9]:
# add formatting
disp_tbl2_df["budget"] = disp_tbl2_df["budget"].map("${:,}".format)
disp_tbl2_df["Budget Per Student"] = disp_tbl2_df["Budget Per Student"].map("${:,.2f}".format)
disp_tbl2_df["Average Math Score"] = disp_tbl2_df["Average Math Score"].map("{:,.2f}".format)
disp_tbl2_df["Average Reading Score"] = disp_tbl2_df["Average Reading Score"].map("{:,.2f}".format)
disp_tbl2_df["Pct Passed Math"] = disp_tbl2_df["Pct Passed Math"].map("{:,.2f}%".format)
disp_tbl2_df["Pct Passed Reading"] = disp_tbl2_df["Pct Passed Reading"].map("{:,.2f}%".format)
disp_tbl2_df["Overall Passing Rate"] = disp_tbl2_df["Overall Passing Rate"].map("{:,.2f}%".format)

print('School Summary')
display(disp_tbl2_df)

School Summary


Unnamed: 0,School Name,type,Total Students,budget,Budget Per Student,Average Math Score,Average Reading Score,Pct Passed Math,Pct Passed Reading,Overall Passing Rate
0,Bailey High School,District,4976,"$3,124,928",$628.00,77.05,81.03,66.68%,81.93%,74.31%
1,Cabrera High School,Charter,1858,"$1,081,356",$582.00,83.06,83.98,94.13%,97.04%,95.59%
2,Figueroa High School,District,2949,"$1,884,411",$639.00,76.71,81.16,65.99%,80.74%,73.36%
3,Ford High School,District,2739,"$1,763,916",$644.00,77.1,80.75,68.31%,79.30%,73.80%
4,Griffin High School,Charter,1468,"$917,500",$625.00,83.35,83.82,93.39%,97.14%,95.27%
5,Hernandez High School,District,4635,"$3,022,020",$652.00,77.29,80.93,66.75%,80.86%,73.81%
6,Holden High School,Charter,427,"$248,087",$581.00,83.8,83.81,92.51%,96.25%,94.38%
7,Huang High School,District,2917,"$1,910,635",$655.00,76.63,81.18,65.68%,81.32%,73.50%
8,Johnson High School,District,4761,"$3,094,650",$650.00,77.07,80.97,66.06%,81.22%,73.64%
9,Pena High School,Charter,962,"$585,858",$609.00,83.84,84.04,94.59%,95.95%,95.27%


In [10]:
# Present Top Performing Schools according to Passing Rate
# We have a display table already, we just need to sort it 
disp_tbl2_df = disp_tbl2_df.sort_values("Overall Passing Rate",ascending=False)
print("Top Performing Schools")
display(disp_tbl2_df.head())


Top Performing Schools


Unnamed: 0,School Name,type,Total Students,budget,Budget Per Student,Average Math Score,Average Reading Score,Pct Passed Math,Pct Passed Reading,Overall Passing Rate
1,Cabrera High School,Charter,1858,"$1,081,356",$582.00,83.06,83.98,94.13%,97.04%,95.59%
12,Thomas High School,Charter,1635,"$1,043,130",$638.00,83.42,83.85,93.27%,97.31%,95.29%
4,Griffin High School,Charter,1468,"$917,500",$625.00,83.35,83.82,93.39%,97.14%,95.27%
9,Pena High School,Charter,962,"$585,858",$609.00,83.84,84.04,94.59%,95.95%,95.27%
13,Wilson High School,Charter,2283,"$1,319,574",$578.00,83.27,83.99,93.87%,96.54%,95.20%


In [11]:
# Present Bottom Performing Schools according to Passing Rate
disp_tbl2_df = disp_tbl2_df.sort_values("Overall Passing Rate")
print("Bottom Performing Schools")
display(disp_tbl2_df.head())

# "tail" would print the same five schools without sorting, but would print them in reverse order

Bottom Performing Schools


Unnamed: 0,School Name,type,Total Students,budget,Budget Per Student,Average Math Score,Average Reading Score,Pct Passed Math,Pct Passed Reading,Overall Passing Rate
10,Rodriguez High School,District,3999,"$2,547,363",$637.00,76.84,80.74,66.37%,80.22%,73.29%
2,Figueroa High School,District,2949,"$1,884,411",$639.00,76.71,81.16,65.99%,80.74%,73.36%
7,Huang High School,District,2917,"$1,910,635",$655.00,76.63,81.18,65.68%,81.32%,73.50%
8,Johnson High School,District,4761,"$3,094,650",$650.00,77.07,80.97,66.06%,81.22%,73.64%
3,Ford High School,District,2739,"$1,763,916",$644.00,77.1,80.75,68.31%,79.30%,73.80%


In [12]:
# these will be useful for both math and reading summaries

# split into grades
g9_df = student_df.loc[student_df["grade"] == "9th"]
g10_df = student_df.loc[student_df["grade"] == "10th"]
g11_df = student_df.loc[student_df["grade"] == "11th"]
g12_df = student_df.loc[student_df["grade"] == "12th"]

# group each grade by school
groupby_g9_df = g9_df.groupby(["school_name"])
groupby_g10_df = g10_df.groupby(["school_name"])
groupby_g11_df = g11_df.groupby(["school_name"])
groupby_g12_df = g12_df.groupby(["school_name"])

In [13]:
# compute average math scores for each grades
mathscore_g9_df = pd.DataFrame(groupby_g9_df["Average Math Score"].mean())
mathscore_g10_df = pd.DataFrame(groupby_g10_df["Average Math Score"].mean())
mathscore_g11_df = pd.DataFrame(groupby_g11_df["Average Math Score"].mean())
mathscore_g12_df = pd.DataFrame(groupby_g12_df["Average Math Score"].mean())

# rename column according to grade level
mathscore_g9_df = mathscore_g9_df.rename(columns={"Average Math Score":"9th"})
mathscore_g10_df = mathscore_g10_df.rename(columns={"Average Math Score":"10th"})
mathscore_g11_df = mathscore_g11_df.rename(columns={"Average Math Score":"11th"})
mathscore_g12_df = mathscore_g12_df.rename(columns={"Average Math Score":"12th"})

# merge the results
disp_tbl3_df = pd.merge(mathscore_g9_df, mathscore_g10_df, on=["school_name"])
disp_tbl3_df = pd.merge(disp_tbl3_df, mathscore_g11_df, on=["school_name"])
disp_tbl3_df = pd.merge(disp_tbl3_df, mathscore_g12_df, on=["school_name"])

In [14]:
# add formatting
disp_tbl3_df["9th"] = disp_tbl3_df["9th"].map("{:.2f}".format)
disp_tbl3_df["10th"] = disp_tbl3_df["10th"].map("{:.2f}".format)
disp_tbl3_df["11th"] = disp_tbl3_df["11th"].map("{:.2f}".format)
disp_tbl3_df["12th"] = disp_tbl3_df["12th"].map("{:.2f}".format)

print ("Average Math Scores by School")
display (disp_tbl3_df)

Average Math Scores by School


Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86
Pena High School,83.63,83.37,84.33,84.12


In [15]:
# we already have the grade splits and groupbys from above

# compute average reading scores for each grades
readscore_g9_df = pd.DataFrame(groupby_g9_df["Average Reading Score"].mean())
readscore_g10_df = pd.DataFrame(groupby_g10_df["Average Reading Score"].mean())
readscore_g11_df = pd.DataFrame(groupby_g11_df["Average Reading Score"].mean())
readscore_g12_df = pd.DataFrame(groupby_g12_df["Average Reading Score"].mean())

# rename column according to grade level
readscore_g9_df = readscore_g9_df.rename(columns={"Average Reading Score":"9th"})
readscore_g10_df = readscore_g10_df.rename(columns={"Average Reading Score":"10th"})
readscore_g11_df = readscore_g11_df.rename(columns={"Average Reading Score":"11th"})
readscore_g12_df = readscore_g12_df.rename(columns={"Average Reading Score":"12th"})

# merge the results
disp_tbl4_df = pd.merge(readscore_g9_df, readscore_g10_df, on=["school_name"])
disp_tbl4_df = pd.merge(disp_tbl4_df, readscore_g11_df, on=["school_name"])
disp_tbl4_df = pd.merge(disp_tbl4_df, readscore_g12_df, on=["school_name"])




In [16]:
# add formatting

disp_tbl4_df["9th"] = disp_tbl4_df["9th"].map("{:.2f}".format)
disp_tbl4_df["10th"] = disp_tbl4_df["10th"].map("{:.2f}".format)
disp_tbl4_df["11th"] = disp_tbl4_df["11th"].map("{:.2f}".format)
disp_tbl4_df["12th"] = disp_tbl4_df["12th"].map("{:.2f}".format)

print ("Average Reading Scores by School")
display (disp_tbl4_df)

Average Reading Scores by School


Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.3,80.91,80.95,80.91
Cabrera High School,83.68,84.25,83.79,84.29
Figueroa High School,81.2,81.41,80.64,81.38
Ford High School,80.63,81.26,80.4,80.66
Griffin High School,83.37,83.71,84.29,84.01
Hernandez High School,80.87,80.66,81.4,80.86
Holden High School,83.68,83.32,83.82,84.7
Huang High School,81.29,81.51,81.42,80.31
Johnson High School,81.26,80.77,80.62,81.23
Pena High School,83.81,83.61,84.34,84.59


In [17]:
# assign categories to schools
spending_bins = [0, 600, 630, 640, 9999]
spending_labels = ["up to $600","$601-630","$631-640","$641 and up"]

school_df['Budget Per Student'] = school_df.apply(lambda row: row["budget"] / row["size"], axis=1)
school_df['Spending Ranges (Per Student)'] = pd.cut(school_df["Budget Per Student"], spending_bins, labels=spending_labels)

size_bins = [0, 1775, 3000, 9999]
size_labels = ["Small","Medium","Large"]

school_df['School Size'] = pd.cut(school_df["size"], size_bins, labels=size_labels)

student_df=pd.merge(student_df, school_df, on=["school_name"])
student_df.head(10)


Unnamed: 0,Student ID,student_name,gender,grade,school_name,Average Reading Score,Average Math Score,math_passed,read_passed,School ID,type,size,budget,Budget Per Student,Spending Ranges (Per Student),School Size
0,0,Paul Bradley,M,9th,Huang High School,66,79,True,False,0,District,2917,1910635,655.0,$641 and up,Medium
1,1,Victor Smith,M,12th,Huang High School,94,61,False,True,0,District,2917,1910635,655.0,$641 and up,Medium
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,False,True,0,District,2917,1910635,655.0,$641 and up,Medium
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,False,False,0,District,2917,1910635,655.0,$641 and up,Medium
4,4,Bonnie Ray,F,9th,Huang High School,97,84,True,True,0,District,2917,1910635,655.0,$641 and up,Medium
5,5,Bryan Miranda,M,9th,Huang High School,94,94,True,True,0,District,2917,1910635,655.0,$641 and up,Medium
6,6,Sheena Carter,F,11th,Huang High School,82,80,True,True,0,District,2917,1910635,655.0,$641 and up,Medium
7,7,Nicole Baker,F,12th,Huang High School,96,69,False,True,0,District,2917,1910635,655.0,$641 and up,Medium
8,8,Michael Roth,M,10th,Huang High School,95,87,True,True,0,District,2917,1910635,655.0,$641 and up,Medium
9,9,Matthew Greene,M,10th,Huang High School,96,84,True,True,0,District,2917,1910635,655.0,$641 and up,Medium


In [18]:
# create per-spending range of key metrics
groupby_spending_df = student_df.groupby(['Spending Ranges (Per Student)'])

disp_tbl5_df = pd.DataFrame(groupby_spending_df[["Average Math Score","Average Reading Score","math_passed","read_passed"]].mean())

# add computed columns
disp_tbl5_df["Pct Passed Math"] = disp_tbl5_df.apply(lambda row: row["math_passed"] * 100, axis=1)
disp_tbl5_df["Pct Passed Reading"] = disp_tbl5_df.apply(lambda row: row["read_passed"] * 100, axis=1)
disp_tbl5_df["Overall Passing Rate"] = disp_tbl5_df.apply(lambda row: (row["Pct Passed Math"] + row["Pct Passed Reading"]) / 2, axis=1)

# delete columns that we don't need any more
del disp_tbl5_df["math_passed"]
del disp_tbl5_df["read_passed"]

In [19]:
# add formatting
disp_tbl5_df["Average Math Score"] = disp_tbl5_df["Average Math Score"].map("{:,.2f}".format)
disp_tbl5_df["Average Reading Score"] = disp_tbl5_df["Average Reading Score"].map("{:,.2f}".format)
disp_tbl5_df["Pct Passed Math"] = disp_tbl5_df["Pct Passed Math"].map("{:.2f}%".format)
disp_tbl5_df["Pct Passed Reading"] = disp_tbl5_df["Pct Passed Reading"].map("{:.2f}%".format)
disp_tbl5_df["Overall Passing Rate"] = disp_tbl5_df["Overall Passing Rate"].map("{:.2f}%".format)

print ("Scores by School Spending")
display(disp_tbl5_df)

Scores by School Spending


Unnamed: 0_level_0,Average Math Score,Average Reading Score,Pct Passed Math,Pct Passed Reading,Overall Passing Rate
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
up to $600,83.36,83.91,93.74%,96.51%,95.12%
$601-630,79.18,81.98,75.60%,86.77%,81.18%
$631-640,78.05,81.48,71.36%,83.65%,77.51%
$641 and up,77.06,80.96,66.61%,80.78%,73.69%


In [20]:
# create per-size categories of key metrics
groupby_spending_df = student_df.groupby(['School Size'])

disp_tbl6_df = pd.DataFrame(groupby_spending_df[["Average Math Score","Average Reading Score","math_passed","read_passed"]].mean())

# add computed columns
disp_tbl6_df["Pct Passed Math"] = disp_tbl6_df.apply(lambda row: row["math_passed"] * 100, axis=1)
disp_tbl6_df["Pct Passed Reading"] = disp_tbl6_df.apply(lambda row: row["read_passed"] * 100, axis=1)
disp_tbl6_df["Overall Passing Rate"] = disp_tbl6_df.apply(lambda row: (row["Pct Passed Math"] + row["Pct Passed Reading"]) / 2, axis=1)

# delete columns that we don't need any more
del disp_tbl6_df["math_passed"]
del disp_tbl6_df["read_passed"]

In [21]:
# add formatting
disp_tbl6_df["Average Math Score"] = disp_tbl6_df["Average Math Score"].map("{:,.2f}".format)
disp_tbl6_df["Average Reading Score"] = disp_tbl6_df["Average Reading Score"].map("{:,.2f}".format)
disp_tbl6_df["Pct Passed Math"] = disp_tbl6_df["Pct Passed Math"].map("{:.2f}%".format)
disp_tbl6_df["Pct Passed Reading"] = disp_tbl6_df["Pct Passed Reading"].map("{:.2f}%".format)
disp_tbl6_df["Overall Passing Rate"] = disp_tbl6_df["Overall Passing Rate"].map("{:.2f}%".format)

print ("Scores by School Size")
display(disp_tbl6_df)

Scores by School Size


Unnamed: 0_level_0,Average Math Score,Average Reading Score,Pct Passed Math,Pct Passed Reading,Overall Passing Rate
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small,83.48,83.83,93.62%,96.58%,95.10%
Medium,79.47,82.24,77.72%,87.11%,82.41%
Large,77.07,80.93,66.47%,81.11%,73.79%


In [22]:
# create per-School Type of key metrics
groupby_spending_df = student_df.groupby(['type'])

disp_tbl7_df = pd.DataFrame(groupby_spending_df[["Average Math Score","Average Reading Score","math_passed","read_passed"]].mean())

# add computed columns
disp_tbl7_df["Pct Passed Math"] = disp_tbl7_df.apply(lambda row: row["math_passed"] * 100, axis=1)
disp_tbl7_df["Pct Passed Reading"] = disp_tbl7_df.apply(lambda row: row["read_passed"] * 100, axis=1)
disp_tbl7_df["Overall Passing Rate"] = disp_tbl7_df.apply(lambda row: (row["Pct Passed Math"] + row["Pct Passed Reading"]) / 2, axis=1)

# delete columns that we don't need any more
del disp_tbl7_df["math_passed"]
del disp_tbl7_df["read_passed"]

In [23]:
# add formatting
disp_tbl7_df["Average Math Score"] = disp_tbl7_df["Average Math Score"].map("{:,.2f}".format)
disp_tbl7_df["Average Reading Score"] = disp_tbl7_df["Average Reading Score"].map("{:,.2f}".format)
disp_tbl7_df["Pct Passed Math"] = disp_tbl7_df["Pct Passed Math"].map("{:.2f}%".format)
disp_tbl7_df["Pct Passed Reading"] = disp_tbl7_df["Pct Passed Reading"].map("{:.2f}%".format)
disp_tbl7_df["Overall Passing Rate"] = disp_tbl7_df["Overall Passing Rate"].map("{:.2f}%".format)

print ("Scores by School Type")
display(disp_tbl7_df)

Scores by School Type


Unnamed: 0_level_0,Average Math Score,Average Reading Score,Pct Passed Math,Pct Passed Reading,Overall Passing Rate
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.41,83.9,93.70%,96.65%,95.17%
District,76.99,80.96,66.52%,80.91%,73.71%


There appears to be a low variance of both math and reading scores across grades within the same school.  This would suggest that grade level is not nearly as important in affecting test scores as other factors such as school size.

As a whole, schools with higher budgets, did not yield better test results. By contrast, schools with higher spending per student actually ($645-675)  underperformed compared to schools with smaller budgets (<$585 per student).

As a whole, smaller and medium sized schools dramatically out-performed large sized schools on passing math performances (89-91% passing vs 67%).

As a whole, charter schools out-performed the public district schools across all metrics. 

The above three points should look familiar as they were copied verbatim from the suggested observable trends - and the tables displayed above do bear them out.  However, let's see table 2 (the summary by school) sorted a couple of different ways:

In [24]:
# sorted by enrollment size
disp_tbl2_df = disp_tbl2_df.sort_values("Total Students")
display(disp_tbl2_df)

Unnamed: 0,School Name,type,Total Students,budget,Budget Per Student,Average Math Score,Average Reading Score,Pct Passed Math,Pct Passed Reading,Overall Passing Rate
6,Holden High School,Charter,427,"$248,087",$581.00,83.8,83.81,92.51%,96.25%,94.38%
9,Pena High School,Charter,962,"$585,858",$609.00,83.84,84.04,94.59%,95.95%,95.27%
4,Griffin High School,Charter,1468,"$917,500",$625.00,83.35,83.82,93.39%,97.14%,95.27%
12,Thomas High School,Charter,1635,"$1,043,130",$638.00,83.42,83.85,93.27%,97.31%,95.29%
11,Shelton High School,Charter,1761,"$1,056,600",$600.00,83.36,83.73,93.87%,95.85%,94.86%
14,Wright High School,Charter,1800,"$1,049,400",$583.00,83.68,83.95,93.33%,96.61%,94.97%
1,Cabrera High School,Charter,1858,"$1,081,356",$582.00,83.06,83.98,94.13%,97.04%,95.59%
13,Wilson High School,Charter,2283,"$1,319,574",$578.00,83.27,83.99,93.87%,96.54%,95.20%
3,Ford High School,District,2739,"$1,763,916",$644.00,77.1,80.75,68.31%,79.30%,73.80%
7,Huang High School,District,2917,"$1,910,635",$655.00,76.63,81.18,65.68%,81.32%,73.50%


In [25]:
# sorted by spending
disp_tbl2_df = disp_tbl2_df.sort_values("Budget Per Student")
display(disp_tbl2_df)

Unnamed: 0,School Name,type,Total Students,budget,Budget Per Student,Average Math Score,Average Reading Score,Pct Passed Math,Pct Passed Reading,Overall Passing Rate
13,Wilson High School,Charter,2283,"$1,319,574",$578.00,83.27,83.99,93.87%,96.54%,95.20%
6,Holden High School,Charter,427,"$248,087",$581.00,83.8,83.81,92.51%,96.25%,94.38%
1,Cabrera High School,Charter,1858,"$1,081,356",$582.00,83.06,83.98,94.13%,97.04%,95.59%
14,Wright High School,Charter,1800,"$1,049,400",$583.00,83.68,83.95,93.33%,96.61%,94.97%
11,Shelton High School,Charter,1761,"$1,056,600",$600.00,83.36,83.73,93.87%,95.85%,94.86%
9,Pena High School,Charter,962,"$585,858",$609.00,83.84,84.04,94.59%,95.95%,95.27%
4,Griffin High School,Charter,1468,"$917,500",$625.00,83.35,83.82,93.39%,97.14%,95.27%
0,Bailey High School,District,4976,"$3,124,928",$628.00,77.05,81.03,66.68%,81.93%,74.31%
10,Rodriguez High School,District,3999,"$2,547,363",$637.00,76.84,80.74,66.37%,80.22%,73.29%
12,Thomas High School,Charter,1635,"$1,043,130",$638.00,83.42,83.85,93.27%,97.31%,95.29%


We see that all Charter schools are smaller than any District school, and that all Charter Schools (except for Thomas High School) have lower budgets per student than any District school.  Because of those correlations, we cannnot be certain better test scores are a result of smaller school sizes or the differences between the practices of Charter schools and District schools; likewise with spending totals.  Given that, we can take the suggestion of where more research could be helpful but should be careful not to draw immediate conclusions.