In [61]:
import pandas as pd

In [62]:
# Review Schools Info
schools_df = pd.read_csv("schools_complete.csv")
renamed_schools_df = schools_df.rename(columns={"name" : "school name"})
renamed_schools_df.head()

Unnamed: 0,School ID,school name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [63]:
# Review Student Info
students_df = pd.read_csv("students_complete.csv")
renamed_students_df = students_df.rename(columns={"school" : "school name"})

In [64]:
students_merged_df = pd.merge(renamed_students_df, renamed_schools_df, on="school name", how="outer")
students_merged_df.head()

Unnamed: 0,Student ID,name,gender,grade,school name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [65]:
# Total Schools
unique_schools = renamed_schools_df["school name"].unique()
total_schools = len(unique_schools)
total_schools

15

In [66]:
# Total Students
total_students = renamed_students_df["name"].count()
total_students

39170

In [67]:
# Total Budget
total_budget = renamed_schools_df["budget"].sum()
total_budget

24649428

In [68]:
# Average Math Score
avg_math_score = students_merged_df["math_score"].mean()
avg_math_score

78.98537145774827

In [69]:
# Average Reading Score
avg_reading_score = students_merged_df["reading_score"].mean()
avg_reading_score

81.87784018381414

In [70]:
# % Passing Math (Passing Rate 70%)
passing_math = students_merged_df.loc[students_merged_df["math_score"] > 69]
num_passing_math = len(passing_math)
percent_passing_math = (num_passing_math / total_students) * 100
percent_passing_math

74.980852693387803

In [71]:
# % Passing Reading (Passing Rate 70%)
passing_reading = students_merged_df.loc[students_merged_df["reading_score"] > 69]
num_passing_reading = len(passing_reading)
percent_passing_reading = (num_passing_reading / total_students) * 100
percent_passing_reading

85.805463364820014

In [72]:
# Overall Passing (Avg of Math/Reading Passing Rates)
percent_overall_passing = (percent_passing_math + percent_passing_reading) / 2
percent_overall_passing

80.393158029103915

In [73]:
# District Summary Raw Dictionary
district_summary_rawdict = {"Total Schools" : [total_schools],
                           "Total Students" : [total_students],
                           "Total Budget": [total_budget],
                           "Average Math Score" : [avg_math_score],
                           "Average Reading Score" : [avg_reading_score],
                           "% Passing Math (>= 70%)" : [percent_passing_math],
                           "% Passing Reading (>= 70%)" : [percent_passing_reading],
                           "Overall Passing" : [percent_overall_passing]}

In [74]:
# District Summary DataFrame
district_summary_df = pd.DataFrame(district_summary_rawdict)
district_summary_df[["Total Budget"]] = district_summary_df[["Total Budget"]].applymap("${:,.2f}".format)
district_summary_df

Unnamed: 0,% Passing Math (>= 70%),% Passing Reading (>= 70%),Average Math Score,Average Reading Score,Overall Passing,Total Budget,Total Schools,Total Students
0,74.980853,85.805463,78.985371,81.87784,80.393158,"$24,649,428.00",15,39170


In [75]:
# Groupby Schools to Show School Summary
school_groupby = students_merged_df.groupby("school name")

In [76]:
# School Type
school_type = school_groupby["type"].max()
school_type_df = pd.DataFrame(school_type)

In [77]:
# Total Students
total_students = school_groupby["name"].count()
total_students_df = pd.DataFrame(total_students)
renamed_total_students_df = total_students_df.rename(columns={"name" : "Total Students"})
renamed_total_students_df.head()

Unnamed: 0_level_0,Total Students
school name,Unnamed: 1_level_1
Bailey High School,4976
Cabrera High School,1858
Figueroa High School,2949
Ford High School,2739
Griffin High School,1468


In [78]:
# Total School Budget
school_budget = school_groupby["budget"].max()
school_budget_df = pd.DataFrame(school_budget)

In [79]:
# Per Student Budget
per_student = school_budget_df["budget"] / renamed_total_students_df["Total Students"]
per_student_df = pd.DataFrame(per_student)
renamed_per_student_df = per_student_df.rename(columns={0 : "Per Student Budget"})

In [80]:
# Average Math Score by School
avg_math_school = school_groupby["math_score"].mean()
avg_math_school_df = pd.DataFrame(avg_math_school)
renamed_avg_math_school_df = avg_math_school_df.rename(columns={"math_score" : "Average Math Score"})

In [81]:
# Average Reading Score by School
avg_reading_school = school_groupby["reading_score"].mean()
avg_reading_school_df = pd.DataFrame(avg_reading_school)
renamed_avg_reading_school_df = avg_reading_school_df.rename(columns={"reading_score" : "Average Reading Score"})

In [82]:
# % Passing Math by School - Set Up
pass_math_by_school = students_merged_df.loc[students_merged_df["math_score"] > 69]
pass_math_by_school_group = pass_math_by_school.groupby("school name")
pass_math_by_school_group_count = pass_math_by_school_group["math_score"].count()
pass_math_by_school_group_count_df = pd.DataFrame(pass_math_by_school_group_count)

# % Passing Math by School - DataFrame
joined_pass_math_by_school = renamed_total_students_df.join(pass_math_by_school_group_count_df)
percent_passing_math_school = 100 * (joined_pass_math_by_school["math_score"] / joined_pass_math_by_school["Total Students"])
percent_passing_math_school_df = pd.DataFrame(percent_passing_math_school)
renamed_percent_passing_math_school_df = percent_passing_math_school_df.rename(columns={0 : "% Passing Math"})

In [83]:
# % Passing Reading by School - Set Up
pass_reading_by_school = students_merged_df.loc[students_merged_df["reading_score"] > 69]
pass_reading_by_school_group = pass_reading_by_school.groupby("school name")
pass_reading_by_school_group_count = pass_reading_by_school_group["reading_score"].count()
pass_reading_by_school_group_count_df = pd.DataFrame(pass_reading_by_school_group_count)

# % Passing Reading by School - DataFrame
joined_pass_reading_by_school = renamed_total_students_df.join(pass_reading_by_school_group_count_df)
percent_passing_reading_school = 100 * (joined_pass_reading_by_school["reading_score"] / joined_pass_reading_by_school["Total Students"])
percent_passing_reading_school_df = pd.DataFrame(percent_passing_reading_school)
renamed_percent_passing_reading_school_df = percent_passing_reading_school_df.rename(columns={0 : "% Passing Reading"})

In [84]:
# % Passing Overall by School - DataFrame
percent_passing_overall_school = ((renamed_percent_passing_reading_school_df["% Passing Reading"] + renamed_percent_passing_math_school_df["% Passing Math"]) / 2)
percent_passing_overall_school_df = pd.DataFrame(percent_passing_overall_school)
renamed_percent_passing_overall_school_df = percent_passing_overall_school_df.rename(columns={0 : "% Passing Overall"})

In [85]:
# Joining All Above DFs to Create the School Summary DataFrame
first_join = school_type_df.join(renamed_total_students_df)
second_join = first_join.join(renamed_per_student_df)
third_join = second_join.join(renamed_avg_math_school_df)
fourth_join = third_join.join(renamed_avg_reading_school_df)
fifth_join = fourth_join.join(renamed_percent_passing_math_school_df)
sixth_join = fifth_join.join(renamed_percent_passing_reading_school_df)
school_summary = sixth_join.join(renamed_percent_passing_overall_school_df)
school_summary[["Per Student Budget"]] = school_summary[["Per Student Budget"]].applymap("${:,.2f}".format)

In [86]:
# School Summary DataFrame
school_summary

Unnamed: 0_level_0,type,Total Students,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
school name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bailey High School,District,4976,$628.00,77.048432,81.033963,66.680064,81.93328,74.306672
Cabrera High School,Charter,1858,$582.00,83.061895,83.97578,94.133477,97.039828,95.586652
Figueroa High School,District,2949,$639.00,76.711767,81.15802,65.988471,80.739234,73.363852
Ford High School,District,2739,$644.00,77.102592,80.746258,68.309602,79.299014,73.804308
Griffin High School,Charter,1468,$625.00,83.351499,83.816757,93.392371,97.138965,95.265668
Hernandez High School,District,4635,$652.00,77.289752,80.934412,66.752967,80.862999,73.807983
Holden High School,Charter,427,$581.00,83.803279,83.814988,92.505855,96.252927,94.379391
Huang High School,District,2917,$655.00,76.629414,81.182722,65.683922,81.316421,73.500171
Johnson High School,District,4761,$650.00,77.072464,80.966394,66.057551,81.222432,73.639992
Pena High School,Charter,962,$609.00,83.839917,84.044699,94.594595,95.945946,95.27027


In [87]:
# Top Performing Schools (By Passing Overall Rate)
top_performing_schools = school_summary.sort_values("% Passing Overall", ascending=False)
top_performing_schools.head()

Unnamed: 0_level_0,type,Total Students,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
school name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Cabrera High School,Charter,1858,$582.00,83.061895,83.97578,94.133477,97.039828,95.586652
Thomas High School,Charter,1635,$638.00,83.418349,83.84893,93.272171,97.308869,95.29052
Pena High School,Charter,962,$609.00,83.839917,84.044699,94.594595,95.945946,95.27027
Griffin High School,Charter,1468,$625.00,83.351499,83.816757,93.392371,97.138965,95.265668
Wilson High School,Charter,2283,$578.00,83.274201,83.989488,93.867718,96.539641,95.203679


In [88]:
# Worst Performing Schools (By Passing Overall Rate)
worst_performing_schools = school_summary.sort_values("% Passing Overall", ascending=True)
worst_performing_schools.head()

Unnamed: 0_level_0,type,Total Students,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
school name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Rodriguez High School,District,3999,$637.00,76.842711,80.744686,66.366592,80.220055,73.293323
Figueroa High School,District,2949,$639.00,76.711767,81.15802,65.988471,80.739234,73.363852
Huang High School,District,2917,$655.00,76.629414,81.182722,65.683922,81.316421,73.500171
Johnson High School,District,4761,$650.00,77.072464,80.966394,66.057551,81.222432,73.639992
Ford High School,District,2739,$644.00,77.102592,80.746258,68.309602,79.299014,73.804308


In [89]:
students_merged_df.head()

Unnamed: 0,Student ID,name,gender,grade,school name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [90]:
# 9th grade avg scores
ninth_grade = students_merged_df.loc[students_merged_df['grade'] == '9th']
ninth_grade_school_group = ninth_grade.groupby('school name')
ninth_grade_avg_reading = ninth_grade_school_group['reading_score'].mean()
ninth_grade_avg_reading_df = pd.DataFrame(ninth_grade_avg_reading)
renamed_ninth_grade_avg_reading_df = ninth_grade_avg_reading_df.rename(columns={'reading_score' : '9th'})
ninth_grade_avg_math = ninth_grade_school_group['math_score'].mean()
ninth_grade_avg_math_df = pd.DataFrame(ninth_grade_avg_math)
renamed_ninth_grade_avg_math_df = ninth_grade_avg_math_df.rename(columns={'math_score' : '9th'})

In [91]:
# 10th grade avg scores
tenth_grade = students_merged_df.loc[students_merged_df['grade'] == '10th']
tenth_grade_school_group = tenth_grade.groupby('school name')
tenth_grade_avg_reading = tenth_grade_school_group['reading_score'].mean()
tenth_grade_avg_reading_df = pd.DataFrame(tenth_grade_avg_reading)
renamed_tenth_grade_avg_reading_df = tenth_grade_avg_reading_df.rename(columns={'reading_score' : '10th'})
tenth_grade_avg_math = tenth_grade_school_group['math_score'].mean()
tenth_grade_avg_math_df = pd.DataFrame(tenth_grade_avg_math)
renamed_tenth_grade_avg_math_df = tenth_grade_avg_math_df.rename(columns={'math_score' : '10th'})

In [92]:
# 11th grade avg scores
eleventh_grade = students_merged_df.loc[students_merged_df['grade'] == '11th']
eleventh_grade_school_group = eleventh_grade.groupby('school name')
eleventh_grade_avg_reading = eleventh_grade_school_group['reading_score'].mean()
eleventh_grade_avg_reading_df = pd.DataFrame(eleventh_grade_avg_reading)
renamed_eleventh_grade_avg_reading_df = eleventh_grade_avg_reading_df.rename(columns={'reading_score' : '11th'})
eleventh_grade_avg_math = eleventh_grade_school_group['math_score'].mean()
eleventh_grade_avg_math_df = pd.DataFrame(eleventh_grade_avg_math)
renamed_eleventh_grade_avg_math_df = eleventh_grade_avg_math_df.rename(columns={'math_score' : '11th'})

In [93]:
# 12th grade avg scores
twelfth_grade = students_merged_df.loc[students_merged_df['grade'] == '12th']
twelfth_grade_school_group = twelfth_grade.groupby('school name')
twelfth_grade_avg_reading = twelfth_grade_school_group['reading_score'].mean()
twelfth_grade_avg_reading_df = pd.DataFrame(twelfth_grade_avg_reading)
renamed_twelfth_grade_avg_reading_df = twelfth_grade_avg_reading_df.rename(columns={'reading_score' : '12th'})
twelfth_grade_avg_math = twelfth_grade_school_group['math_score'].mean()
twelfth_grade_avg_math_df = pd.DataFrame(twelfth_grade_avg_math)
renamed_twelfth_grade_avg_math_df = twelfth_grade_avg_math_df.rename(columns={'math_score' : '12th'})

In [94]:
# Avg Math Score by Grade
first_join_math_grade = renamed_ninth_grade_avg_math_df.join(renamed_tenth_grade_avg_math_df)
second_join_math_grade = first_join_math_grade.join(renamed_eleventh_grade_avg_math_df)
avg_math_score_by_grade = second_join_math_grade.join(renamed_twelfth_grade_avg_math_df)
avg_math_score_by_grade

Unnamed: 0_level_0,9th,10th,11th,12th
school name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


In [95]:
# Avg Reading Score by Grade
first_join_reading_grade = renamed_ninth_grade_avg_reading_df.join(renamed_tenth_grade_avg_reading_df)
second_join_reading_grade = first_join_reading_grade.join(renamed_eleventh_grade_avg_reading_df)
avg_reading_score_by_grade = second_join_reading_grade.join(renamed_twelfth_grade_avg_reading_df)
avg_reading_score_by_grade

Unnamed: 0_level_0,9th,10th,11th,12th
school name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


In [98]:
# Scores by School Spending Bins
bins = [550, 585, 630, 645, 670]
group_names = ["560-585", "586-630", "631-645", "646-670"]

In [116]:
# Scores by School Spending
renamed_per_student_df['Spending Ranges (Per Student)'] = pd.cut(renamed_per_student_df['Per Student Budget'], bins, labels=group_names)
insert_percent_math = renamed_per_student_df.join(renamed_percent_passing_math_school_df)
insert_percent_reading = insert_percent_math.join(renamed_percent_passing_reading_school_df)
school_by_spending = insert_percent_reading.join(renamed_percent_passing_overall_school_df)
insert_avg_reading = school_by_spending.join(renamed_avg_reading_school_df)
insert_avg_math = insert_avg_reading.join(renamed_avg_math_school_df)
remove_per_student_budget = insert_avg_math[["Spending Ranges (Per Student)", "% Passing Math", "% Passing Reading", "% Passing Overall", "Average Reading Score", "Average Math Score"]]

In [117]:
remove_per_student_budget

Unnamed: 0_level_0,Spending Ranges (Per Student),% Passing Math,% Passing Reading,% Passing Overall,Average Reading Score,Average Math Score
school name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bailey High School,586-630,66.680064,81.93328,74.306672,81.033963,77.048432
Cabrera High School,560-585,94.133477,97.039828,95.586652,83.97578,83.061895
Figueroa High School,631-645,65.988471,80.739234,73.363852,81.15802,76.711767
Ford High School,631-645,68.309602,79.299014,73.804308,80.746258,77.102592
Griffin High School,586-630,93.392371,97.138965,95.265668,83.816757,83.351499
Hernandez High School,646-670,66.752967,80.862999,73.807983,80.934412,77.289752
Holden High School,560-585,92.505855,96.252927,94.379391,83.814988,83.803279
Huang High School,646-670,65.683922,81.316421,73.500171,81.182722,76.629414
Johnson High School,646-670,66.057551,81.222432,73.639992,80.966394,77.072464
Pena High School,586-630,94.594595,95.945946,95.27027,84.044699,83.839917
