In [694]:
# import dependacies 
import os 
import csv 
import pandas as pd 
import numpy as np 

In [695]:
# set csv paths 
schools_complete = "raw_data/schools_complete.csv"
students_complete = "raw_data/students_complete.csv"

In [696]:
# read schools csv 
schools_pd = pd.read_csv(schools_complete, low_memory=False)

schools_pd.head()

Unnamed: 0,School ID,name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [697]:
# rename and format columns in schools df 
schools_organized_df = schools_pd.rename(columns={"name":"School Name", "type": "Type", 
                                                   "size":"School Size", 
                                                   "budget": "Total Budget"})
schools_organized_df.head()

Unnamed: 0,School ID,School Name,Type,School Size,Total Budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [698]:
# School Types 
school_types = schools_organized_df.groupby("Type").mean()
school_types

Unnamed: 0_level_0,School ID,School Size,Total Budget
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Charter,7.25,1524.25,912688.1
District,6.714286,3853.714286,2478275.0


In [699]:
# total distrcit budget **Actual District Budget**
total_budget = schools_pd["budget"].sum()
total_budget 

24649428

In [700]:
total_student_count = schools_pd["size"].sum()
total_student_count

39170

In [701]:
students_pd = pd.read_csv(students_complete, low_memory=False)
students_pd.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [702]:
# rename and format columns in student df 
students_organized_df = students_pd.rename(columns={"name":"Student Name", "gender": "Gender", "school":"School Name", 
                                                    "grade": "Grade", "reading_score": "Reading Score", 
                                                    "math_score": "Math Score"})
students_organized_df.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [703]:
# merge extracts into new data frame using inner join 
ds_combined = pd.merge(schools_organized_df, students_organized_df, on="School Name", how="outer")
ds_combined.head()

Unnamed: 0,School ID,School Name,Type,School Size,Total Budget,Student ID,Student Name,Gender,Grade,Reading Score,Math Score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [704]:
# School Type index 

school_type = ds_combined.groupby(["Type"]).mean()
school_type

Unnamed: 0_level_0,School ID,School Size,Total Budget,Student ID,Reading Score,Math Score
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Charter,6.964163,1717.352468,1024543.0,19438.475398,83.902821,83.406183
District,6.984505,4063.261195,2611175.0,19650.507711,80.962485,76.987026


In [705]:
# School names 

school_names = ds_combined["School Name"].unique()
school_names

array(['Huang High School', 'Figueroa High School', 'Shelton High School',
       'Hernandez High School', 'Griffin High School',
       'Wilson High School', 'Cabrera High School', 'Bailey High School',
       'Holden High School', 'Pena High School', 'Wright High School',
       'Rodriguez High School', 'Johnson High School', 'Ford High School',
       'Thomas High School'], dtype=object)

In [706]:
# total school count
total_school_count = len(ds_combined["School Name"].unique())
total_school_count

15

In [707]:
# total student count by school 
sc_per_school = ds_combined["School Name"].value_counts()
sc_per_school

Bailey High School       4976
Johnson High School      4761
Hernandez High School    4635
Rodriguez High School    3999
Figueroa High School     2949
Huang High School        2917
Ford High School         2739
Wilson High School       2283
Cabrera High School      1858
Wright High School       1800
Shelton High School      1761
Thomas High School       1635
Griffin High School      1468
Pena High School          962
Holden High School        427
Name: School Name, dtype: int64

In [708]:
# total student count 
total_student_count = sc_per_school.sum()
total_student_count

39170

In [709]:
tb_per_school = ds_combined.groupby(["School Name"]).mean()["Total Budget"]
tb_per_school.head()

School Name
Bailey High School      3124928.0
Cabrera High School     1081356.0
Figueroa High School    1884411.0
Ford High School        1763916.0
Griffin High School      917500.0
Name: Total Budget, dtype: float64

In [710]:
total_ds_budget = pd.DataFrame({"Total Budget": tb_per_school}).sum()
total_ds_budget

Total Budget    24649428.0
dtype: float64

In [711]:
# calculate average Reading and Math Scores (** this will be used in future tables)
avg_rs = ds_combined["Reading Score"].mean() 
avg_ms = ds_combined["Math Score"].mean() 
print(avg_rs)
print(avg_ms)

81.87784018381414
78.98537145774827


In [712]:
# variable for passing reading >= 70%

rs_passing = ds_combined.loc[ds_combined["Reading Score"] >= 70,:]
rs_passing.head()

Unnamed: 0,School ID,School Name,Type,School Size,Total Budget,Student ID,Student Name,Gender,Grade,Reading Score,Math Score
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84
5,0,Huang High School,District,2917,1910635,5,Bryan Miranda,M,9th,94,94
6,0,Huang High School,District,2917,1910635,6,Sheena Carter,F,11th,82,80


In [713]:
# variable for failing reading < 70%

rs_failing = ds_combined.loc[ds_combined["Reading Score"] < 70,:]
rs_failing.head()

Unnamed: 0,School ID,School Name,Type,School Size,Total Budget,Student ID,Student Name,Gender,Grade,Reading Score,Math Score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
12,0,Huang High School,District,2917,1910635,12,Brittney Walker,F,9th,64,79
18,0,Huang High School,District,2917,1910635,18,Kevin Stevens,M,9th,64,69
26,0,Huang High School,District,2917,1910635,26,Melanie Decker,F,9th,63,85


In [714]:
# rs passing  
rs_passing_total = rs_passing["Reading Score"].mean()
rs_passing_total

84.47325200833086

In [715]:
# rs failing  

rs_failing_total = rs_failing["Reading Score"].mean()
rs_failing_total

66.1886690647482

In [716]:
# overall rs total passing 

passing_reading_total = (rs_failing_total / rs_passing_total) * 100
passing_reading_total

78.35458857227444

In [717]:
# variable for Passing Math 
ms_passing = ds_combined.loc[ds_combined["Math Score"] >= 70,:]
ms_passing.head()

Unnamed: 0,School ID,School Name,Type,School Size,Total Budget,Student ID,Student Name,Gender,Grade,Reading Score,Math Score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84
5,0,Huang High School,District,2917,1910635,5,Bryan Miranda,M,9th,94,94
6,0,Huang High School,District,2917,1910635,6,Sheena Carter,F,11th,82,80
8,0,Huang High School,District,2917,1910635,8,Michael Roth,M,10th,95,87


In [718]:
# Variable for Failing Math
ms_failing = ds_combined.loc[ds_combined["Math Score"] < 70,:]
ms_failing.head()

Unnamed: 0,School ID,School Name,Type,School Size,Total Budget,Student ID,Student Name,Gender,Grade,Reading Score,Math Score
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
7,0,Huang High School,District,2917,1910635,7,Nicole Baker,F,12th,96,69
14,0,Huang High School,District,2917,1910635,14,Tammy Hebert,F,10th,85,67


In [719]:
# ms passing 
 
ms_passing_total = ms_passing["Math Score"].mean()
ms_passing_total

84.4712631937351

In [720]:
# ms failing 
ms_failing_total = ms_failing["Math Score"].mean()
ms_failing_total 

62.544489795918366

In [721]:
# ms passing 
passing_math_total = (ms_failing_total / ms_passing_total) * 100
passing_math_total

74.04232804293738

In [722]:
# overall % passing 
overall_passing = ((ms_passing_total + rs_passing_total)/2)
overall_passing

84.47225760103298

In [723]:
# District Summary 

# Creating a summary DataFrame using above values
summary_df = pd.DataFrame({"Total Schools":[total_school_count],
                          "Total Budget": int(total_ds_budget), 
                          "Average Math Score %": avg_rs, 
                          "Average Reading Score %": avg_ms, 
                          "Passing Reading %": passing_reading_total, 
                          "Passing Math %": passing_math_total, 
                          "Overall Passing %": overall_passing})


# format
summary_df["Total Budget"] = summary_df["Total Budget"].map("${:,}".format)
summary_df["Total Schools"] = summary_df["Total Schools"].map("{:,}".format)

# reorganize 

summary_df = summary_df[["Total Schools","Total Budget","Average Math Score %",
                         "Average Reading Score %", "Passing Reading %", "Passing Math %", "Overall Passing %"]]

summary_df

Unnamed: 0,Total Schools,Total Budget,Average Math Score %,Average Reading Score %,Passing Reading %,Passing Math %,Overall Passing %
0,15,"$24,649,428",81.87784,78.985371,78.354589,74.042328,84.472258


In [724]:
# School Summary 

schools_summary = ds_combined.groupby(["School Name"]).mean()
schools_summary.head()

Unnamed: 0_level_0,School ID,School Size,Total Budget,Student ID,Reading Score,Math Score
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bailey High School,7.0,4976.0,3124928.0,20358.5,81.033963,77.048432
Cabrera High School,6.0,1858.0,1081356.0,16941.5,83.97578,83.061895
Figueroa High School,1.0,2949.0,1884411.0,4391.0,81.15802,76.711767
Ford High School,13.0,2739.0,1763916.0,36165.0,80.746258,77.102592
Griffin High School,4.0,1468.0,917500.0,12995.5,83.816757,83.351499


In [725]:
# school budget per student 
budget_per_student = (schools_summary["Total Budget"])/(ds_combined["School Size"]).mean()
budget_per_student

School Name
Bailey High School       937.584222
Cabrera High School      324.443419
Figueroa High School     565.387114
Ford High School         529.234533
Griffin High School      275.281070
Hernandez High School    906.708337
Holden High School        74.434501
Huang High School        573.255202
Johnson High School      928.499797
Pena High School         175.777239
Rodriguez High School    764.295164
Shelton High School      317.015781
Thomas High School       312.974324
Wilson High School       395.916886
Wright High School       314.855537
Name: Total Budget, dtype: float64

In [726]:
# Average Reading Score by School
avg_rs_per_school = ds_combined.groupby("School Name").mean()["Reading Score"]
avg_rs_per_school.head()

School Name
Bailey High School      81.033963
Cabrera High School     83.975780
Figueroa High School    81.158020
Ford High School        80.746258
Griffin High School     83.816757
Name: Reading Score, dtype: float64

In [727]:
# Average Math Score by School
avg_ms_per_school = ds_combined.groupby("School Name").mean()["Math Score"]
avg_ms_per_school.head()

School Name
Bailey High School      77.048432
Cabrera High School     83.061895
Figueroa High School    76.711767
Ford High School        77.102592
Griffin High School     83.351499
Name: Math Score, dtype: float64

In [728]:
# variable for # passing reading 
rs_passing_per_school = ds_combined.loc[ds_combined["Reading Score"] >= 70, "School Name"].value_counts()

In [729]:
# total reading pass % by school *******

total_rs_pass_per_school = (rs_passing_per_school/sc_per_school) * 100
total_rs_pass_per_school

Bailey High School       81.933280
Cabrera High School      97.039828
Figueroa High School     80.739234
Ford High School         79.299014
Griffin High School      97.138965
Hernandez High School    80.862999
Holden High School       96.252927
Huang High School        81.316421
Johnson High School      81.222432
Pena High School         95.945946
Rodriguez High School    80.220055
Shelton High School      95.854628
Thomas High School       97.308869
Wilson High School       96.539641
Wright High School       96.611111
Name: School Name, dtype: float64

In [730]:
# variable for failing reading by school 
# rs_failing_per_school = ds_combined.loc[ds_combined["Reading Score"] < 70, "School Name"].value_counts()


In [731]:
# variable for passing % math by school 

ms_passing_per_school = ds_combined.loc[ds_combined["Math Score"] >= 70, "School Name"].value_counts()
ms_passing_per_school

Bailey High School       3318
Johnson High School      3145
Hernandez High School    3094
Rodriguez High School    2654
Wilson High School       2143
Figueroa High School     1946
Huang High School        1916
Ford High School         1871
Cabrera High School      1749
Wright High School       1680
Shelton High School      1653
Thomas High School       1525
Griffin High School      1371
Pena High School          910
Holden High School        395
Name: School Name, dtype: int64

In [732]:
# Variable for Failing % Math by school
# ms_failing_per_school = ds_combined.loc[ds_combined["Math Score"] < 70, "School Name"].value_counts()


In [733]:
# total ms pass % by school *******

total_ms_pass_per_school = (ms_passing_per_school/sc_per_school) * 100
total_ms_pass_per_school

Bailey High School       66.680064
Cabrera High School      94.133477
Figueroa High School     65.988471
Ford High School         68.309602
Griffin High School      93.392371
Hernandez High School    66.752967
Holden High School       92.505855
Huang High School        65.683922
Johnson High School      66.057551
Pena High School         94.594595
Rodriguez High School    66.366592
Shelton High School      93.867121
Thomas High School       93.272171
Wilson High School       93.867718
Wright High School       93.333333
Name: School Name, dtype: float64

In [734]:
# Overall Pass % by school 

overall_passing_by_school = ((total_rs_pass_per_school+total_ms_pass_per_school)/2) 
overall_passing_by_school

Bailey High School       74.306672
Cabrera High School      95.586652
Figueroa High School     73.363852
Ford High School         73.804308
Griffin High School      95.265668
Hernandez High School    73.807983
Holden High School       94.379391
Huang High School        73.500171
Johnson High School      73.639992
Pena High School         95.270270
Rodriguez High School    73.293323
Shelton High School      94.860875
Thomas High School       95.290520
Wilson High School       95.203679
Wright High School       94.972222
Name: School Name, dtype: float64

In [740]:
schools_sum = pd.DataFrame({"School Type" : str(school_type),
                            "Total Students": sc_per_school,
                            "Total Budget": int(total_ds_budget),
                            "Budget per Student": budget_per_student,
                            "Average Math Score %": avg_ms_per_school, 
                            "Average Reading Score %": avg_rs_per_school,
                            "Passing Reading %": total_rs_pass_per_school,
                            "Passing Math %": total_ms_pass_per_school,
                            "Overall Passing %": overall_passing_by_school})

schools_sum

Unnamed: 0,Average Math Score %,Average Reading Score %,Budget per Student,Overall Passing %,Passing Math %,Passing Reading %,School Type,Total Budget,Total Students
Bailey High School,77.048432,81.033963,937.584222,74.306672,66.680064,81.93328,School ID School Size Total Budget...,24649428,4976
Cabrera High School,83.061895,83.97578,324.443419,95.586652,94.133477,97.039828,School ID School Size Total Budget...,24649428,1858
Figueroa High School,76.711767,81.15802,565.387114,73.363852,65.988471,80.739234,School ID School Size Total Budget...,24649428,2949
Ford High School,77.102592,80.746258,529.234533,73.804308,68.309602,79.299014,School ID School Size Total Budget...,24649428,2739
Griffin High School,83.351499,83.816757,275.28107,95.265668,93.392371,97.138965,School ID School Size Total Budget...,24649428,1468
Hernandez High School,77.289752,80.934412,906.708337,73.807983,66.752967,80.862999,School ID School Size Total Budget...,24649428,4635
Holden High School,83.803279,83.814988,74.434501,94.379391,92.505855,96.252927,School ID School Size Total Budget...,24649428,427
Huang High School,76.629414,81.182722,573.255202,73.500171,65.683922,81.316421,School ID School Size Total Budget...,24649428,2917
Johnson High School,77.072464,80.966394,928.499797,73.639992,66.057551,81.222432,School ID School Size Total Budget...,24649428,4761
Pena High School,83.839917,84.044699,175.777239,95.27027,94.594595,95.945946,School ID School Size Total Budget...,24649428,962


In [745]:
# reorganize 

schools_final = schools_sum[["Total Students", "School Type", "Total Budget", 
                               "Budget per Student","Average Math Score %",
                               "Average Reading Score %", "Passing Reading %", 
                               "Passing Math %", "Overall Passing %"]]

schools_final.head()

Unnamed: 0,Total Students,School Type,Total Budget,Budget per Student,Average Math Score %,Average Reading Score %,Passing Reading %,Passing Math %,Overall Passing %
Bailey High School,4976,School ID School Size Total Budget...,24649428,937.584222,77.048432,81.033963,81.93328,66.680064,74.306672
Cabrera High School,1858,School ID School Size Total Budget...,24649428,324.443419,83.061895,83.97578,97.039828,94.133477,95.586652
Figueroa High School,2949,School ID School Size Total Budget...,24649428,565.387114,76.711767,81.15802,80.739234,65.988471,73.363852
Ford High School,2739,School ID School Size Total Budget...,24649428,529.234533,77.102592,80.746258,79.299014,68.309602,73.804308
Griffin High School,1468,School ID School Size Total Budget...,24649428,275.28107,83.351499,83.816757,97.138965,93.392371,95.265668


In [746]:
# format
schools_final["Total Budget"] = schools_final["Total Budget"].map("${:,}".format)
schools_final["Total Students"] = schools_final["Total Students"].map("{:,}".format)
schools_final["Budget per Student"] = schools_final["Budget per Student"].map("${:,.2f}".format)
schools_final.head()

Unnamed: 0,Total Students,School Type,Total Budget,Budget per Student,Average Math Score %,Average Reading Score %,Passing Reading %,Passing Math %,Overall Passing %
Bailey High School,4976,School ID School Size Total Budget...,"$24,649,428",$937.58,77.048432,81.033963,81.93328,66.680064,74.306672
Cabrera High School,1858,School ID School Size Total Budget...,"$24,649,428",$324.44,83.061895,83.97578,97.039828,94.133477,95.586652
Figueroa High School,2949,School ID School Size Total Budget...,"$24,649,428",$565.39,76.711767,81.15802,80.739234,65.988471,73.363852
Ford High School,2739,School ID School Size Total Budget...,"$24,649,428",$529.23,77.102592,80.746258,79.299014,68.309602,73.804308
Griffin High School,1468,School ID School Size Total Budget...,"$24,649,428",$275.28,83.351499,83.816757,97.138965,93.392371,95.265668


In [747]:
# Top Performing Schools (by Passing Rate)
top_performing_schools = schools_final[["Total Students","School Type", "Total Budget", 
                               "Budget per Student","Average Math Score %",
                               "Average Reading Score %", "Passing Reading %", 
                               "Passing Math %", "Overall Passing %"]]



# sort

top_performing_schools = schools_final.sort_values('Overall Passing %', ascending=False)
top_performing_schools.head()

Unnamed: 0,Total Students,School Type,Total Budget,Budget per Student,Average Math Score %,Average Reading Score %,Passing Reading %,Passing Math %,Overall Passing %
Cabrera High School,1858,School ID School Size Total Budget...,"$24,649,428",$324.44,83.061895,83.97578,97.039828,94.133477,95.586652
Thomas High School,1635,School ID School Size Total Budget...,"$24,649,428",$312.97,83.418349,83.84893,97.308869,93.272171,95.29052
Pena High School,962,School ID School Size Total Budget...,"$24,649,428",$175.78,83.839917,84.044699,95.945946,94.594595,95.27027
Griffin High School,1468,School ID School Size Total Budget...,"$24,649,428",$275.28,83.351499,83.816757,97.138965,93.392371,95.265668
Wilson High School,2283,School ID School Size Total Budget...,"$24,649,428",$395.92,83.274201,83.989488,96.539641,93.867718,95.203679


In [751]:
# Bottom Performing Schools (by passing rate)

bottom_performing_schools = schools_final[["Total Students", "School Type", "Total Budget", 
                               "Budget per Student","Average Math Score %",
                               "Average Reading Score %", "Passing Reading %", 
                               "Passing Math %", "Overall Passing %"]]


bottom_performing_schools = schools_final.sort_values('Overall Passing %', ascending=True)
bottom_performing_schools.head()

Unnamed: 0,Total Students,School Type,Total Budget,Budget per Student,Average Math Score %,Average Reading Score %,Passing Reading %,Passing Math %,Overall Passing %
Rodriguez High School,3999,School ID School Size Total Budget...,"$24,649,428",$764.30,76.842711,80.744686,80.220055,66.366592,73.293323
Figueroa High School,2949,School ID School Size Total Budget...,"$24,649,428",$565.39,76.711767,81.15802,80.739234,65.988471,73.363852
Huang High School,2917,School ID School Size Total Budget...,"$24,649,428",$573.26,76.629414,81.182722,81.316421,65.683922,73.500171
Johnson High School,4761,School ID School Size Total Budget...,"$24,649,428",$928.50,77.072464,80.966394,81.222432,66.057551,73.639992
Ford High School,2739,School ID School Size Total Budget...,"$24,649,428",$529.23,77.102592,80.746258,79.299014,68.309602,73.804308


In [752]:
# ms and rs scores by grade  
#//////////////////////////////////////////////
#9th grade 

grades_9th = ds_combined.loc[ds_combined["Grade"] == "9th",:]

In [753]:
# avg ms 9th
avg_ms_9th = grades_9th.groupby("School Name").mean()["Math Score"]


In [754]:
# 10th grade 
grades_10th = ds_combined.loc[ds_combined["Grade"] == "10th",:]

In [755]:
# avg ms 10th 
avg_ms_10th = grades_10th.groupby("School Name").mean()["Math Score"]


In [756]:
# 11th Grade 
grades_11th = ds_combined.loc[ds_combined["Grade"] == "11th",:]


In [757]:
# avg ms 11th
avg_ms_11th = grades_11th.groupby("School Name").mean()["Math Score"]


In [758]:
# 12th Grade 
grades_12th = ds_combined.loc[ds_combined["Grade"] == "12th",:]

In [759]:
# avg ms 12th 
avg_ms_12th = grades_12th.groupby("School Name").mean()["Math Score"]

In [760]:
ms_scores_by_grade = pd.DataFrame({"9th": avg_ms_9th,
                                   "10th": avg_ms_10th, 
                                   "11th": avg_ms_11th,
                                   "12th": avg_ms_12th})

In [761]:
# MS scores by grade and school


ms_by_grade_final = ms_scores_by_grade[["9th", "10th", "11th", "12th"]]
ms_by_grade_final

Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


In [762]:
# Average Reading Score by School
avg_rs_9th = grades_9th.groupby("School Name").mean()["Reading Score"]


In [763]:
avg_rs_10th = grades_10th.groupby("School Name").mean()["Reading Score"]


In [764]:
avg_rs_11th = grades_11th.groupby("School Name").mean()["Reading Score"]


In [765]:
avg_rs_12th = grades_12th.groupby("School Name").mean()["Reading Score"]


In [766]:
rs_scores_by_grade = pd.DataFrame({"9th": avg_rs_9th,
                                   "10th": avg_rs_10th, 
                                   "11th": avg_rs_11th,
                                   "12th": avg_rs_12th})


In [767]:
# Average reading score by grade and school (Final)
rs_by_grade_final = rs_scores_by_grade[["9th", "10th", "11th", "12th"]]
rs_by_grade_final

Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


In [768]:
# Schools by spending ranges per student 

# //////////////////////

# budget per student bin 

budget_ps_bin = [0, 300, 500, 700, 1000]

range_labels = ['0-300', '300-500', '500-700', '700-1000']

spending_rng_ps = pd.DataFrame(schools_sum)
spending_rng_ps

Unnamed: 0,Average Math Score %,Average Reading Score %,Budget per Student,Overall Passing %,Passing Math %,Passing Reading %,School Type,Total Budget,Total Students
Bailey High School,77.048432,81.033963,937.584222,74.306672,66.680064,81.93328,School ID School Size Total Budget...,24649428,4976
Cabrera High School,83.061895,83.97578,324.443419,95.586652,94.133477,97.039828,School ID School Size Total Budget...,24649428,1858
Figueroa High School,76.711767,81.15802,565.387114,73.363852,65.988471,80.739234,School ID School Size Total Budget...,24649428,2949
Ford High School,77.102592,80.746258,529.234533,73.804308,68.309602,79.299014,School ID School Size Total Budget...,24649428,2739
Griffin High School,83.351499,83.816757,275.28107,95.265668,93.392371,97.138965,School ID School Size Total Budget...,24649428,1468
Hernandez High School,77.289752,80.934412,906.708337,73.807983,66.752967,80.862999,School ID School Size Total Budget...,24649428,4635
Holden High School,83.803279,83.814988,74.434501,94.379391,92.505855,96.252927,School ID School Size Total Budget...,24649428,427
Huang High School,76.629414,81.182722,573.255202,73.500171,65.683922,81.316421,School ID School Size Total Budget...,24649428,2917
Johnson High School,77.072464,80.966394,928.499797,73.639992,66.057551,81.222432,School ID School Size Total Budget...,24649428,4761
Pena High School,83.839917,84.044699,175.777239,95.27027,94.594595,95.945946,School ID School Size Total Budget...,24649428,962


In [769]:
spending_rng_ps["Spending Ranges (per Student)"] = pd.cut(schools_sum["Budget per Student"], budget_ps_bin, labels=range_labels)
spending_rng_ps


Unnamed: 0,Average Math Score %,Average Reading Score %,Budget per Student,Overall Passing %,Passing Math %,Passing Reading %,School Type,Total Budget,Total Students,Spending Ranges (per Student)
Bailey High School,77.048432,81.033963,937.584222,74.306672,66.680064,81.93328,School ID School Size Total Budget...,24649428,4976,700-1000
Cabrera High School,83.061895,83.97578,324.443419,95.586652,94.133477,97.039828,School ID School Size Total Budget...,24649428,1858,300-500
Figueroa High School,76.711767,81.15802,565.387114,73.363852,65.988471,80.739234,School ID School Size Total Budget...,24649428,2949,500-700
Ford High School,77.102592,80.746258,529.234533,73.804308,68.309602,79.299014,School ID School Size Total Budget...,24649428,2739,500-700
Griffin High School,83.351499,83.816757,275.28107,95.265668,93.392371,97.138965,School ID School Size Total Budget...,24649428,1468,0-300
Hernandez High School,77.289752,80.934412,906.708337,73.807983,66.752967,80.862999,School ID School Size Total Budget...,24649428,4635,700-1000
Holden High School,83.803279,83.814988,74.434501,94.379391,92.505855,96.252927,School ID School Size Total Budget...,24649428,427,0-300
Huang High School,76.629414,81.182722,573.255202,73.500171,65.683922,81.316421,School ID School Size Total Budget...,24649428,2917,500-700
Johnson High School,77.072464,80.966394,928.499797,73.639992,66.057551,81.222432,School ID School Size Total Budget...,24649428,4761,700-1000
Pena High School,83.839917,84.044699,175.777239,95.27027,94.594595,95.945946,School ID School Size Total Budget...,24649428,962,0-300


In [770]:
spending_rng_sorted = spending_rng_ps.sort_values('Spending Ranges (per Student)', ascending=True)

In [771]:
spending_rng_final = spending_rng_sorted.groupby("Spending Ranges (per Student)")
spending_rng_final

<pandas.core.groupby.DataFrameGroupBy object at 0x1106a3a20>

In [772]:
spending_rng_final = pd.DataFrame({"Average Math Score %": total_ms_pass_per_school,
                                   "Average Reading Score %": avg_rs_per_school, 
                                   "Passing Math %": total_ms_pass_per_school,
                                   "Passing Reading %": total_rs_pass_per_school,
                                   "Overall Passing %": overall_passing_by_school})

spending_rng_final

Unnamed: 0_level_0,Average Math Score %,Average Reading Score %,Overall Passing %,Passing Math %,Passing Reading %
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bailey High School,66.680064,81.033963,74.306672,66.680064,81.93328
Cabrera High School,94.133477,83.97578,95.586652,94.133477,97.039828
Figueroa High School,65.988471,81.15802,73.363852,65.988471,80.739234
Ford High School,68.309602,80.746258,73.804308,68.309602,79.299014
Griffin High School,93.392371,83.816757,95.265668,93.392371,97.138965
Hernandez High School,66.752967,80.934412,73.807983,66.752967,80.862999
Holden High School,92.505855,83.814988,94.379391,92.505855,96.252927
Huang High School,65.683922,81.182722,73.500171,65.683922,81.316421
Johnson High School,66.057551,80.966394,73.639992,66.057551,81.222432
Pena High School,94.594595,84.044699,95.27027,94.594595,95.945946


In [773]:
spending_rng_final = spending_rng_sorted.groupby("Spending Ranges (per Student)").mean()
spending_rng_final.head()

Unnamed: 0_level_0,Average Math Score %,Average Reading Score %,Budget per Student,Overall Passing %,Passing Math %,Passing Reading %,Total Budget,Total Students
Spending Ranges (per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0-300,83.664898,83.892148,175.16427,94.971776,93.497607,96.445946,24649428.0,952.333333
300-500,83.359224,83.898984,333.041189,95.18279,93.694764,96.670815,24649428.0,1867.4
500-700,76.814591,81.029,555.958949,73.556111,66.660665,80.451556,24649428.0,2868.333333
700-1000,77.06334,80.919864,884.27188,73.761992,66.464293,81.059691,24649428.0,4592.75


In [774]:
spending_rng_final = spending_rng_final[["Average Math Score %",
                                   "Average Reading Score %", 
                                   "Passing Math %",
                                   "Passing Reading %",
                                   "Overall Passing %"]]

spending_rng_final

Unnamed: 0_level_0,Average Math Score %,Average Reading Score %,Passing Math %,Passing Reading %,Overall Passing %
Spending Ranges (per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0-300,83.664898,83.892148,93.497607,96.445946,94.971776
300-500,83.359224,83.898984,93.694764,96.670815,95.18279
500-700,76.814591,81.029,66.660665,80.451556,73.556111
700-1000,77.06334,80.919864,66.464293,81.059691,73.761992


In [775]:
# Scores by school Size


# //////////////////////

# budget per student bin 

school_sizes_bin = [400, 1500, 2500, 5000]

range_labels = ['Low', 'Medium', 'High']

scores_by_school_size = pd.DataFrame(schools_sum)
scores_by_school_size.head()

Unnamed: 0,Average Math Score %,Average Reading Score %,Budget per Student,Overall Passing %,Passing Math %,Passing Reading %,School Type,Total Budget,Total Students,Spending Ranges (per Student)
Bailey High School,77.048432,81.033963,937.584222,74.306672,66.680064,81.93328,School ID School Size Total Budget...,24649428,4976,700-1000
Cabrera High School,83.061895,83.97578,324.443419,95.586652,94.133477,97.039828,School ID School Size Total Budget...,24649428,1858,300-500
Figueroa High School,76.711767,81.15802,565.387114,73.363852,65.988471,80.739234,School ID School Size Total Budget...,24649428,2949,500-700
Ford High School,77.102592,80.746258,529.234533,73.804308,68.309602,79.299014,School ID School Size Total Budget...,24649428,2739,500-700
Griffin High School,83.351499,83.816757,275.28107,95.265668,93.392371,97.138965,School ID School Size Total Budget...,24649428,1468,0-300


In [776]:
scores_by_rank = pd.cut(ds_combined["School Size"], school_sizes_bin, labels=range_labels)
scores_by_rank

0          High
1          High
2          High
3          High
4          High
5          High
6          High
7          High
8          High
9          High
10         High
11         High
12         High
13         High
14         High
15         High
16         High
17         High
18         High
19         High
20         High
21         High
22         High
23         High
24         High
25         High
26         High
27         High
28         High
29         High
          ...  
39140    Medium
39141    Medium
39142    Medium
39143    Medium
39144    Medium
39145    Medium
39146    Medium
39147    Medium
39148    Medium
39149    Medium
39150    Medium
39151    Medium
39152    Medium
39153    Medium
39154    Medium
39155    Medium
39156    Medium
39157    Medium
39158    Medium
39159    Medium
39160    Medium
39161    Medium
39162    Medium
39163    Medium
39164    Medium
39165    Medium
39166    Medium
39167    Medium
39168    Medium
39169    Medium
Name: School Size, Lengt

In [777]:
scores_by_school_size = pd.DataFrame({"Total Students": sc_per_school,
                                      "Average Math Score %": total_ms_pass_per_school,
                                         "Average Reading Score %": avg_rs_per_school, 
                                         "Passing Math %": total_ms_pass_per_school,
                                         "Passing Reading %": total_rs_pass_per_school,
                                         "Overall Passing %": overall_passing_by_school})

scores_by_school_size

Unnamed: 0,Average Math Score %,Average Reading Score %,Overall Passing %,Passing Math %,Passing Reading %,Total Students
Bailey High School,66.680064,81.033963,74.306672,66.680064,81.93328,4976
Cabrera High School,94.133477,83.97578,95.586652,94.133477,97.039828,1858
Figueroa High School,65.988471,81.15802,73.363852,65.988471,80.739234,2949
Ford High School,68.309602,80.746258,73.804308,68.309602,79.299014,2739
Griffin High School,93.392371,83.816757,95.265668,93.392371,97.138965,1468
Hernandez High School,66.752967,80.934412,73.807983,66.752967,80.862999,4635
Holden High School,92.505855,83.814988,94.379391,92.505855,96.252927,427
Huang High School,65.683922,81.182722,73.500171,65.683922,81.316421,2917
Johnson High School,66.057551,80.966394,73.639992,66.057551,81.222432,4761
Pena High School,94.594595,84.044699,95.27027,94.594595,95.945946,962


In [778]:
scores_by_school_size["Spending - Low, Medium, High"] = pd.cut(schools_sum["Total Students"], school_sizes_bin, labels=range_labels)
scores_by_school_size

Unnamed: 0,Average Math Score %,Average Reading Score %,Overall Passing %,Passing Math %,Passing Reading %,Total Students,"Spending - Low, Medium, High"
Bailey High School,66.680064,81.033963,74.306672,66.680064,81.93328,4976,High
Cabrera High School,94.133477,83.97578,95.586652,94.133477,97.039828,1858,Medium
Figueroa High School,65.988471,81.15802,73.363852,65.988471,80.739234,2949,High
Ford High School,68.309602,80.746258,73.804308,68.309602,79.299014,2739,High
Griffin High School,93.392371,83.816757,95.265668,93.392371,97.138965,1468,Low
Hernandez High School,66.752967,80.934412,73.807983,66.752967,80.862999,4635,High
Holden High School,92.505855,83.814988,94.379391,92.505855,96.252927,427,Low
Huang High School,65.683922,81.182722,73.500171,65.683922,81.316421,2917,High
Johnson High School,66.057551,80.966394,73.639992,66.057551,81.222432,4761,High
Pena High School,94.594595,84.044699,95.27027,94.594595,95.945946,962,Low


In [779]:
# scores_rng_final_sorted = scores_rng_final.sort_values("Scores By School Size", ascending=False)

scores_by_spending_final = scores_by_school_size.groupby("Spending - Low, Medium, High").mean()

scores_by_spending_final

Unnamed: 0_level_0,Average Math Score %,Average Reading Score %,Overall Passing %,Passing Math %,Passing Reading %,Total Students
"Spending - Low, Medium, High",Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
High,66.548453,80.966636,73.673757,66.548453,80.799062,3853.714286
Low,93.497607,83.892148,94.971776,93.497607,96.445946,952.333333
Medium,93.694764,83.898984,95.18279,93.694764,96.670815,1867.4


In [780]:
scores_by_spending_sorted = scores_by_spending_final.sort_values("Total Students", ascending=True)
scores_by_spending_sorted

Unnamed: 0_level_0,Average Math Score %,Average Reading Score %,Overall Passing %,Passing Math %,Passing Reading %,Total Students
"Spending - Low, Medium, High",Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Low,93.497607,83.892148,94.971776,93.497607,96.445946,952.333333
Medium,93.694764,83.898984,95.18279,93.694764,96.670815,1867.4
High,66.548453,80.966636,73.673757,66.548453,80.799062,3853.714286


In [781]:
# Scores by level of spending 


scores_by_spending_sum = scores_by_spending_sorted[["Average Math Score %",
                                   "Average Reading Score %", 
                                   "Passing Math %",
                                   "Passing Reading %",
                                   "Overall Passing %"]]

scores_by_spending_sum

Unnamed: 0_level_0,Average Math Score %,Average Reading Score %,Passing Math %,Passing Reading %,Overall Passing %
"Spending - Low, Medium, High",Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Low,93.497607,83.892148,93.497607,96.445946,94.971776
Medium,93.694764,83.898984,93.694764,96.670815,95.18279
High,66.548453,80.966636,66.548453,80.799062,73.673757


In [None]:
# Scores by Type




In [784]:
# scores_by_type = pd.DataFrame({"Average Math Score %": total_ms_pass_per_school,
#                                "Average Reading Score %": avg_rs_per_school, 
#                                "Passing Math %": total_ms_pass_per_school,
#                                "Passing Reading %": total_rs_pass_per_school,
#                                "Overall Passing %": overall_passing_by_school})

scores_by_type_sorted = scores_by_type[["Average Math Score %",
                          "Average Reading Score %", 
                          "Passing Math %",
                          "Passing Reading %",
                          "Overall Passing %"]]


scores_by_type

KeyError: "['Average Math Score %' 'Average Reading Score %' 'Passing Math %'\n 'Passing Reading %' 'Overall Passing %'] not in index"