## PyCity Schools

### Analysis:
- Scores are not dependant on higher budget spent per student
- Charter Schools performed the highest in all metrics
- Students who performed well in Math scored high overall


In [7]:
# Dependencies
import numpy as np
import pandas as pd

In [8]:
#reader csvs
student_file = "Resources/students_complete.csv"
school_file = "Resources/schools_complete.csv"

studentdf = pd.read_csv(student_file)
schooldf = pd.read_csv(school_file)

#sample of student/school df
#schooldf = schooldf["School ID","School","type","size","budget"]
schooldf.columns = ['School ID','school','type','size','budget']

In [9]:
dfcomp = pd.merge(studentdf, schooldf, how="left", on=["school","school"])
dfcomp.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [10]:
# Calculate Totals 
school_count = len(dfcomp["school"].unique())
student_count = dfcomp["Student ID"].count()

# Calculate Total Budget
ttl_budget = schooldf["budget"].sum()


In [11]:
# Calculate Average Scores
avg_math_score = dfcomp["math_score"].mean()
avg_reading_score = dfcomp["reading_score"].mean()
overall_pass_rate = (avg_math_score + avg_reading_score) / 2

In [13]:
# Calculate the Percentage of Pass Rates
pass_math = dfcomp[(dfcomp["math_score"] > 70)].count()["name"]
pass_math_percent = pass_math / (student_count) * 100


pass_reading = dfcomp[(dfcomp["reading_score"] > 70)].count()["name"]
pass_reading_percent = pass_reading / (student_count) * 100

In [14]:
district_summary = pd.DataFrame({"Total Schools": [school_count], 
                                 "Total Students": [student_count], 
                                 "Total Budget": [ttl_budget],
                                 "Average Math Score": [avg_math_score], 
                                 "Average Reading Score": [avg_reading_score],
                                 "% Passing Math": [pass_math_percent],
                                 "% Passing Reading": [pass_reading_percent],
                                 "% Overall Passing Rate": [overall_pass_rate]})

district_summary = district_summary[["Total Schools", "Total Students", "Total Budget",
                                     "Average Math Score", 
                                     "Average Reading Score",
                                     "% Passing Math",
                                     "% Passing Reading",
                                     "% Overall Passing Rate"]]


#format values
district_summary["Total Schools"] = district_summary["Total Schools"].map("{0:,.0f}".format)
district_summary["Total Students"] = district_summary["Total Students"].map("{0:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${0:,.0f}".format)
district_summary["Average Math Score"] = district_summary["Average Math Score"].map("{0:,.2f}%".format)
district_summary["Average Reading Score"] = district_summary["Average Reading Score"].map("{0:,.2f}%".format)
district_summary["% Passing Math"] = district_summary["% Passing Math"].map("{0:,.2f}%".format)
district_summary["% Passing Reading"] = district_summary["% Passing Reading"].map("{0:,.2f}%".format)
district_summary["% Overall Passing Rate"] = district_summary["% Overall Passing Rate"].map("{0:,.2f}%".format)

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428",78.99%,81.88%,72.39%,82.97%,80.43%


## School Summary

In [15]:
schooldf.head()

Unnamed: 0,School ID,school,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [36]:
#School Type
school_types = schooldf.set_index(["school"])["type"]

# Total students
per_school_count = dfcomp["school"].value_counts()

# school budget and capita
per_school_budget = schooldf.groupby(["school"]).mean()["budget"]
per_school_capita = per_school_budget / per_school_count

#Average Scores
avg_school_math = dfcomp.groupby(["school"]).mean()["math_score"]
avg_school_reading = dfcomp.groupby(["school"]).mean()["reading_score"]

#Pass rate
school_pass_math = dfcomp[(dfcomp["math_score"] > 70)]
school_pass_reading = dfcomp[(dfcomp["reading_score"] > 70)]

per_school_pass_math = school_pass_math.groupby(["school"]).count()["name"] / per_school_count * 100
per_school_pass_reading = school_pass_reading.groupby(["school"]).count()["name"] / per_school_count * 100
overall_pass_rate = (per_school_pass_math + per_school_pass_reading) / 2


per_school_summary = pd.DataFrame({"School Type": school_types,
                                   "Total Students": per_school_count,
                                   "Total School Budget": per_school_budget,
                                   "Per Student Budget": per_school_capita,
                                   "Average Math Score": avg_school_math,
                                   "Average Reading Score": avg_school_reading,
                                   "% Passing Math": per_school_pass_math,
                                   "% Passing Reading": per_school_pass_reading,
                                   "% Overall Passing Rate": overall_pass_rate})

# Minor data munging
per_school_summary = per_school_summary[["School Type", "Total Students", "Total School Budget", "Per Student Budget",
                                         "Average Math Score", "Average Reading Score", 
                                         "% Passing Math", "% Passing Reading", 
                                         "% Overall Passing Rate"]]

per_school_summary["Total School Budget"] = per_school_summary["Total Students"].map("${:,.2f}".format)
per_school_summary["Per Student Budget"] = per_school_summary["Per Student Budget"].map("${:,.2f}".format)

# Display the data frame
per_school_summary

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Bailey High School,District,4976,"$4,976.00",$628.00,77.048432,81.033963,64.630225,79.300643,71.965434
Cabrera High School,Charter,1858,"$1,858.00",$582.00,83.061895,83.97578,89.558665,93.86437,91.711518
Figueroa High School,District,2949,"$2,949.00",$639.00,76.711767,81.15802,63.750424,78.433367,71.091896
Ford High School,District,2739,"$2,739.00",$644.00,77.102592,80.746258,65.753925,77.51004,71.631982
Griffin High School,Charter,1468,"$1,468.00",$625.00,83.351499,83.816757,89.713896,93.392371,91.553134
Hernandez High School,District,4635,"$4,635.00",$652.00,77.289752,80.934412,64.746494,78.187702,71.467098
Holden High School,Charter,427,$427.00,$581.00,83.803279,83.814988,90.632319,92.740047,91.686183
Huang High School,District,2917,"$2,917.00",$655.00,76.629414,81.182722,63.318478,78.81385,71.066164
Johnson High School,District,4761,"$4,761.00",$650.00,77.072464,80.966394,63.852132,78.281874,71.067003
Pena High School,Charter,962,$962.00,$609.00,83.839917,84.044699,91.683992,92.203742,91.943867


## Top Performing Schools (By Passing Rate)

In [37]:
top_schools = per_school_summary.sort_values(["% Overall Passing Rate"], ascending=False)
top_schools.head(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Wilson High School,Charter,2283,"$2,283.00",$578.00,83.274201,83.989488,90.932983,93.25449,92.093736
Pena High School,Charter,962,$962.00,$609.00,83.839917,84.044699,91.683992,92.203742,91.943867
Wright High School,Charter,1800,"$1,800.00",$583.00,83.682222,83.955,90.277778,93.444444,91.861111
Cabrera High School,Charter,1858,"$1,858.00",$582.00,83.061895,83.97578,89.558665,93.86437,91.711518
Holden High School,Charter,427,$427.00,$581.00,83.803279,83.814988,90.632319,92.740047,91.686183


## Bottom Performing Schools (By Passing Rate)

In [38]:
bottom_schools = per_school_summary.sort_values(["% Overall Passing Rate"], ascending=True)
bottom_schools.head(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Rodriguez High School,District,3999,"$3,999.00",$637.00,76.842711,80.744686,64.066017,77.744436,70.905226
Huang High School,District,2917,"$2,917.00",$655.00,76.629414,81.182722,63.318478,78.81385,71.066164
Johnson High School,District,4761,"$4,761.00",$650.00,77.072464,80.966394,63.852132,78.281874,71.067003
Figueroa High School,District,2949,"$2,949.00",$639.00,76.711767,81.15802,63.750424,78.433367,71.091896
Hernandez High School,District,4635,"$4,635.00",$652.00,77.289752,80.934412,64.746494,78.187702,71.467098


## Math Scores by Grade

In [39]:
nineth_grade = dfcomp[(dfcomp["grade"] == "9th")]
tenth_grade = dfcomp[(dfcomp["grade"] == "10th")]
eleventh_grade = dfcomp[(dfcomp["grade"] == "11th")]
twelfth_grade = dfcomp[(dfcomp["grade"] == "12th")]

In [40]:
nineth_grade_scores = nineth_grade.groupby(["school"]).mean()["math_score"]
tenth_grade_scores = tenth_grade.groupby(["school"]).mean()["math_score"]
eleventh_grade_scores = eleventh_grade.groupby(["school"]).mean()["math_score"]
twelfth_grade_scores = twelfth_grade.groupby(["school"]).mean()["math_score"]

In [41]:
nineth_grade_scores = nineth_grade_scores.map("{0:,.2f}".format)
tenth_grade_scores = tenth_grade_scores.map("{0:,.2f}".format)
eleventh_grade_scores = eleventh_grade_scores.map("{0:,.2f}".format)
twelfth_grade_scores = twelfth_grade_scores.map("{0:,.2f}".format)

In [42]:
scores = pd.DataFrame({"9th": nineth_grade_scores, "10th": tenth_grade_scores,
                                "11th": eleventh_grade_scores, "12th": twelfth_grade_scores})

scores = scores[["9th", "10th", "11th", "12th"]]
scores.index.name = " "

In [43]:
scores

Unnamed: 0,9th,10th,11th,12th
,,,,
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86


## Scores by School Spending

In [44]:
# Establish the bins 
spending_bins = [0, 600, 625, 650, 675]
group_names = ["<$600", "$600-625", "$650-675", ">$675"]

per_school_summary["Spending Ranges (Per Student)"] = pd.cut(per_school_capita, spending_bins, labels=group_names)

spending_math_scores = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["Average Math Score"]
spending_passing_math = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["% Passing Math"]

spending_reading_scores = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["Average Reading Score"]
spending_passing_reading = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["% Passing Reading"]

overall_passing_rate = (spending_math_scores + spending_reading_scores) / 2

spending_summary = pd.DataFrame({"Average Math Score" : spending_math_scores,
                                 "Average Reading Score": spending_reading_scores,
                                 "% Passing Math": spending_passing_math,
                                 "% Passing Reading": spending_passing_reading,
                                 "% Overall Passing Rate": overall_passing_rate})

spending_summary = spending_summary[["Average Math Score", 
                                     "Average Reading Score", 
                                     "% Passing Math", "% Passing Reading",
                                     "% Overall Passing Rate"]]

spending_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$600,83.43621,83.892196,90.25877,93.184236,83.664203
$600-625,83.595708,83.930728,90.698944,92.798056,83.763218
$650-675,78.032719,81.416375,68.711132,80.695926,79.724547
>$675,76.959583,81.058567,64.032486,78.500776,79.009075


## Scores by School Spending

In [57]:
#Bins
spending_bins = [0, 585, 615, 645, 675]
group_names = ["<$585", "$585-615", "$615-645", "$645-675"]

per_school_summary["Spending Ranges (Per Student)"] = pd.cut(per_school_capita, spending_bins, labels=group_names)

spending_math_scores = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["Average Math Score"]
spending_passing_math = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["% Passing Math"]

spending_reading_scores = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["Average Reading Score"]
spending_passing_reading = per_school_summary.groupby(["Spending Ranges (Per Student)"]).mean()["% Passing Reading"]

overall_passing_rate = (spending_math_scores + spending_reading_scores) / 2

spending_summary = pd.DataFrame({"Average Math Score" : spending_math_scores,
                                 "Average Reading Score": spending_reading_scores,
                                 "% Passing Math": spending_passing_math,
                                 "% Passing Reading": spending_passing_reading,
                                 "% Overall Passing Rate": overall_passing_rate})

spending_summary = spending_summary[["Average Math Score", 
                                     "Average Reading Score", 
                                     "% Passing Math", "% Passing Reading",
                                     "% Overall Passing Rate"]]

In [54]:
spending_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,83.455399,83.933814,90.350436,93.325838,83.694607
$585-615,83.599686,83.885211,90.788049,92.410786,83.742449
$615-645,79.079225,81.891436,73.021426,83.214343,80.48533
$645-675,76.99721,81.027843,63.972368,78.427809,79.012526


## Scores by School Size

In [55]:
size_bins = [0, 1000, 2500, 5000]
group_names = ["Small", "Medium", "Large"]

per_school_summary["School Size"] = pd.cut(per_school_summary["Total Students"], size_bins, labels=group_names)

total_math_scores = per_school_summary.groupby(["School Size"]).mean()["Average Math Score"]
total_passing_math = per_school_summary.groupby(["School Size"]).mean()["% Passing Math"]

total_reading_scores = per_school_summary.groupby(["School Size"]).mean()["Average Reading Score"]
total_passing_reading = per_school_summary.groupby(["School Size"]).mean()["% Passing Reading"]

overall_passing_rate = (total_passing_math + total_passing_reading) / 2

size_summary = pd.DataFrame({"Average Math Score" : total_math_scores,
                             "Average Reading Score": total_reading_scores,
                             "% Passing Math": total_passing_math,
                             "% Passing Reading": total_passing_reading,
                             "% Overall Passing Rate": overall_passing_rate})

size_summary = size_summary[["Average Math Score", 
                             "Average Reading Score", 
                             "% Passing Math", "% Passing Reading",
                             "% Overall Passing Rate"]]

In [56]:
size_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small,83.821598,83.929843,91.158155,92.471895,91.815025
Medium,83.357937,83.88528,90.098249,93.246451,91.67235
Large,76.956733,80.966636,64.302528,78.324559,71.313543


## Scores by School Type

In [51]:
type_math_scores = per_school_summary.groupby(["School Type"]).mean()["Average Math Score"]
type_pass_math = per_school_summary.groupby(["School Type"]).mean()["% Passing Math"]

type_reading_scores = per_school_summary.groupby(["School Type"]).mean()["Average Reading Score"]
type_pass_reading = per_school_summary.groupby(["School Type"]).mean()["% Passing Reading"]

overall_pass_rate = (type_pass_math + type_pass_reading) / 2

type_summary = pd.DataFrame({"Average Math Score" : type_math_scores,
                             "Average Reading Score": type_reading_scores,
                             "% Passing Math": type_pass_math,
                             "% Passing Reading": type_pass_reading,
                             "% Overall Passing Rate": overall_pass_rate})


type_summary = type_summary[["Average Math Score", 
                             "Average Reading Score",
                             "% Passing Math",
                             "% Passing Reading",
                             "% Overall Passing Rate"]]

type_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,90.363226,93.052812,91.708019
District,76.956733,80.966636,64.302528,78.324559,71.313543
