In [107]:
# Imports pandas and pathlib.
import pandas as pd
import pathlib

In [108]:
# Shows the file path. 
school_data_import = pathlib.Path("../Resources/schools_complete.csv")
student_data_import = pathlib.Path("../Resources/students_complete.csv")

In [109]:
# Reads the csv files.
schools_data_df = pd.read_csv(school_data_import)
students_data_df = pd.read_csv(student_data_import)

In [110]:
print("DISTRICT SUMMARY")

DISTRICT SUMMARY


In [111]:
school_students_info_df =pd.merge(schools_data_df, students_data_df, on=["school_name"]) 
school_students_info_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [112]:
# Counts the number of total unique schools.
total_unique_schools_df = len(schools_data_df["school_name"].unique())
print("Total number of unique schools : " , total_unique_schools_df)

Total number of unique schools :  15


In [113]:
# Counts the number of total students in district.
total_students_df = len(students_data_df["Student ID"])
print("Total students: ", total_students_df)

Total students:  39170


In [114]:
# Adds all the budget to calculate the total budget in district.
total_budget_df = schools_data_df["budget"].sum()
print("Total budget: ", total_budget_df)

Total budget:  24649428


In [115]:
# Calculates the average math score.
average_math_score_df = students_data_df["math_score"].mean()
print("Average math score: ", average_math_score_df)

Average math score:  78.98537145774827


In [116]:
# Calculates the average reading score.
average_reading_score_df = students_data_df["reading_score"].mean()
print("Average reading score: ", average_reading_score_df)

Average reading score:  81.87784018381414


In [117]:
# Calculates the percentage of students who passed math.
number_of_math_pass_students = (len(students_data_df.loc[students_data_df["math_score"] >= 70]))
percentage_of_math_pass_students = ((number_of_math_pass_students)/total_students_df)* 100
print("Percentage of math pass students: ", percentage_of_math_pass_students)

Percentage of math pass students:  74.9808526933878


In [118]:
# Calculates the percentage of students who passed reading.
number_of_reading_pass_students = (len(students_data_df.loc[students_data_df["reading_score"] >= 70]))
percentage_of_reading_pass_students = ((number_of_reading_pass_students)/total_students_df)* 100
print("Percentage of reading pass students: ", percentage_of_reading_pass_students)

Percentage of reading pass students:  85.80546336482001


In [119]:
# Calculates the percentage of students who passed both math and reading.
# Displays the overall percentage who passed math and reading.
overall_pass_students= (students_data_df[(students_data_df["reading_score"] >= 70) & (students_data_df["math_score"] >= 70)]).count()
overall_percent_passed_math_and_reading = (overall_pass_students['math_score']/total_students_df)*100
print("Overall passing percentage: ", overall_percent_passed_math_and_reading)
  
# print("Overall passing percentage: ", overall_percent_passed_math_and_reading)

Overall passing percentage:  65.17232575950983


In [120]:
# Creats data frame and displays the district summary
district_summary =  {"Total number of unique schools ": [total_unique_schools_df],
                    "Total students ": total_students_df,
                    "Total budget ": total_budget_df,
                     "Average math score ": average_math_score_df,
                    "Average reading score ":  average_reading_score_df,
                    "Percentage of math pass students ": percentage_of_math_pass_students,
                    "Percentage of reading pass students ": percentage_of_reading_pass_students,
                    "Overall passing percentage ": overall_percent_passed_math_and_reading}

district_summary_df = pd.DataFrame(data=district_summary)
district_summary_df

Unnamed: 0,Total number of unique schools,Total students,Total budget,Average math score,Average reading score,Percentage of math pass students,Percentage of reading pass students,Overall passing percentage
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


In [121]:
print("SCHOOL SUMMARY")

SCHOOL SUMMARY


In [153]:
# lists the school type
school_names = sorted(schools_data_df["school_name"].tolist())
school_type_df = pd.DataFrame(schools_data_df["type"])
school_type_df.columns = ['School Type']
school_type_df.index = school_names
school_type_df

Unnamed: 0,School Type
Bailey High School,District
Cabrera High School,District
Figueroa High School,Charter
Ford High School,District
Griffin High School,Charter
Hernandez High School,Charter
Holden High School,Charter
Huang High School,District
Johnson High School,Charter
Pena High School,Charter


In [124]:
# Uses group buy to categorize the students per school and counts the total students per school
total_student_per_school = school_students_info_df.groupby('school_name')
total_student_per_school = pd.DataFrame((total_student_per_school["Student ID"].count()))
total_student_per_school.columns = ['Total Students']
total_student_per_school.index = school_names
total_student_per_school

Unnamed: 0,Total Students
Bailey High School,4976
Cabrera High School,1858
Figueroa High School,2949
Ford High School,2739
Griffin High School,1468
Hernandez High School,4635
Holden High School,427
Huang High School,2917
Johnson High School,4761
Pena High School,962


In [125]:
# Counts the total budget per school
total_budget_per_school_df = schools_data_df.groupby('school_name')
per_school_budget = pd.DataFrame(total_budget_per_school_df["budget"].agg('sum'))
per_school_budget.columns = ['Total School Budget']
per_school_budget.index = school_names
per_school_budget

Unnamed: 0,Total School Budget
Bailey High School,3124928
Cabrera High School,1081356
Figueroa High School,1884411
Ford High School,1763916
Griffin High School,917500
Hernandez High School,3022020
Holden High School,248087
Huang High School,1910635
Johnson High School,3094650
Pena High School,585858


In [126]:
# Counts the per student budget
total_budget_per_student_df = pd.DataFrame(per_school_budget['Total School Budget'] / total_student_per_school['Total Students'])
total_budget_per_student_df.columns = ['Budget Per Student']
total_budget_per_student_df.index = school_names
total_budget_per_student_df


Unnamed: 0,Budget Per Student
Bailey High School,628.0
Cabrera High School,582.0
Figueroa High School,639.0
Ford High School,644.0
Griffin High School,625.0
Hernandez High School,652.0
Holden High School,581.0
Huang High School,655.0
Johnson High School,650.0
Pena High School,609.0


In [127]:
# Calculates the average math score per school
average_math_score_per_school = school_students_info_df.groupby('school_name')
average_math_score_per_school = pd.DataFrame(average_math_score_per_school['math_score'].mean())
average_math_score_per_school.columns = ['Average Math Score']
average_math_score_per_school.index = school_names
average_math_score_per_school


Unnamed: 0,Average Math Score
Bailey High School,77.048432
Cabrera High School,83.061895
Figueroa High School,76.711767
Ford High School,77.102592
Griffin High School,83.351499
Hernandez High School,77.289752
Holden High School,83.803279
Huang High School,76.629414
Johnson High School,77.072464
Pena High School,83.839917


In [128]:
# Calculates the average reading score per school
average_reading_score_per_school = school_students_info_df.groupby('school_name')
average_reading_score_per_school = pd.DataFrame(average_reading_score_per_school['reading_score'].mean())
average_reading_score_per_school.columns = ['Average Reading Score']
average_reading_score_per_school.index = school_names
average_reading_score_per_school

Unnamed: 0,Average Reading Score
Bailey High School,81.033963
Cabrera High School,83.97578
Figueroa High School,81.15802
Ford High School,80.746258
Griffin High School,83.816757
Hernandez High School,80.934412
Holden High School,83.814988
Huang High School,81.182722
Johnson High School,80.966394
Pena High School,84.044699


In [129]:
# Calculates the average math passing percentage per school 
student_passing_math_per_school = school_students_info_df[(school_students_info_df["math_score"] >= 70)]
student_passing_math_per_school = pd.DataFrame(student_passing_math_per_school.groupby(["school_name"]).count())    
percentage_student_passing_math_per_school = pd.DataFrame(student_passing_math_per_school['math_score'] / total_student_per_school['Total Students'] * 100)
percentage_student_passing_math_per_school.columns = ['Math Score Percentage']
percentage_student_passing_math_per_school.index = school_names
percentage_student_passing_math_per_school

Unnamed: 0,Math Score Percentage
Bailey High School,66.680064
Cabrera High School,94.133477
Figueroa High School,65.988471
Ford High School,68.309602
Griffin High School,93.392371
Hernandez High School,66.752967
Holden High School,92.505855
Huang High School,65.683922
Johnson High School,66.057551
Pena High School,94.594595


In [130]:
# Calculates the average reading passing percentage per school 
student_passing_reading_per_school = school_students_info_df[(school_students_info_df["reading_score"] >= 70)]
student_passing_reading_per_school = student_passing_reading_per_school.groupby(["school_name"]).count()                                                  
percentage_student_passing_reading_per_school = pd.DataFrame((student_passing_reading_per_school["reading_score"]/total_student_per_school['Total Students']) * 100)
percentage_student_passing_reading_per_school.columns = ['Reading Score Percentage']
percentage_student_passing_reading_per_school.index = school_names
percentage_student_passing_reading_per_school

Unnamed: 0,Reading Score Percentage
Bailey High School,81.93328
Cabrera High School,97.039828
Figueroa High School,80.739234
Ford High School,79.299014
Griffin High School,97.138965
Hernandez High School,80.862999
Holden High School,96.252927
Huang High School,81.316421
Johnson High School,81.222432
Pena High School,95.945946


In [131]:
# Calculates the average math and reading passing percentage per school 
students_passing_math_and_reading = school_students_info_df[
    (school_students_info_df["reading_score"] >= 70) & (school_students_info_df["math_score"] >= 70)
]
students_passing_math_and_reading = (students_passing_math_and_reading.groupby(["school_name"]).count())
percentage_students_passing_math_and_reading = pd.DataFrame((students_passing_math_and_reading['reading_score']/total_student_per_school['Total Students'])*100)
percentage_students_passing_math_and_reading.columns = ['Overall Percentage']
percentage_students_passing_math_and_reading.index = school_names
percentage_students_passing_math_and_reading


Unnamed: 0,Overall Percentage
Bailey High School,54.642283
Cabrera High School,91.334769
Figueroa High School,53.204476
Ford High School,54.289887
Griffin High School,90.599455
Hernandez High School,53.527508
Holden High School,89.227166
Huang High School,53.513884
Johnson High School,53.539172
Pena High School,90.540541


In [132]:
# Creates data frame called per_school_summary_df, concatenate all the above calculation and display
per_school_summary_df = pd.concat(
    [school_type_df,
     total_student_per_school,
     per_school_budget,
     total_budget_per_student_df,
     average_math_score_per_school,
     average_reading_score_per_school,
     percentage_student_passing_math_per_school,
     percentage_student_passing_reading_per_school,
     percentage_students_passing_math_and_reading
     
], axis = 1)
per_school_summary_df

Unnamed: 0,School Type,Total Students,Total School Budget,Budget Per Student,Average Math Score,Average Reading Score,Math Score Percentage,Reading Score Percentage,Overall Percentage
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,District,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,Charter,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,Charter,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,Charter,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


In [133]:
print(("Highest-Performing School by Percentage of Overall Passing").upper())

HIGHEST-PERFORMING SCHOOL BY PERCENTAGE OF OVERALL PASSING


In [134]:
# Sort the overall percentage in descending order to find out the highest number of overall pass students
highest_math_percentage = pd.DataFrame(per_school_summary_df.sort_values(['Overall Percentage'], ascending=False))
highest_math_percentage.head(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Budget Per Student,Average Math Score,Average Reading Score,Math Score Percentage,Reading Score Percentage,Overall Percentage
Cabrera High School,District,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Thomas High School,District,1635,1043130,638.0,83.418349,83.84893,93.272171,97.308869,90.948012
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Wilson High School,District,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641,90.582567
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


In [135]:
print(("Lowest-Performing School by Percentage of Overall Passing").upper())

LOWEST-PERFORMING SCHOOL BY PERCENTAGE OF OVERALL PASSING


In [136]:
# Sort the overall percentage in ascending order to find out the lowest number of overall pass students.
Lowest_performing_school=pd.DataFrame(per_school_summary_df.sort_values(['Overall Percentage'], ascending=True))
Lowest_performing_school.head()

Unnamed: 0,School Type,Total Students,Total School Budget,Budget Per Student,Average Math Score,Average Reading Score,Math Score Percentage,Reading Score Percentage,Overall Percentage
Rodriguez High School,Charter,3999,2547363,637.0,76.842711,80.744686,66.366592,80.220055,52.988247
Figueroa High School,Charter,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Hernandez High School,Charter,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Johnson High School,Charter,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,53.539172


In [158]:
students_data_df.head(3)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60


In [137]:
print(("Math scores by grade").title())

Math Scores By Grade


In [184]:
# Creates a DataFrame that lists the average math score for students of each grade level (9th, 10th, 11th, 12th) at each school.
# grade_math_scores = students_data_df.groupby(['school_name', 'grade'])
# grade_math_scores= pd.DataFrame(grade_math_scores['math_score'].mean())


ninth_graders = students_data_df[(students_data_df["grade"] == "9th")]
ninth_grade_math_scores = ninth_graders.groupby(['school_name'])
ninth_grade_math_scores= pd.DataFrame(ninth_grade_math_scores['math_score'].mean())
ninth_grade_math_scores.columns = ['9th']

tenth_graders = students_data_df[(students_data_df["grade"] == "10th")]
tenth_grader_math_scores = tenth_graders.groupby(['school_name'])
tenth_grader_math_scores= pd.DataFrame(tenth_grader_math_scores['math_score'].mean())
tenth_grader_math_scores.columns = ['10th']


eleventh_graders = students_data_df[(students_data_df["grade"] == "11th")]
eleventh_grader_math_scores = eleventh_graders.groupby(['school_name'])
eleventh_grader_math_scores= pd.DataFrame(eleventh_grader_math_scores['math_score'].mean())
eleventh_grader_math_scores.columns = ['11th']


twelfth_graders = students_data_df[(students_data_df["grade"] == "12th")]
twelfth_grader_math_scores = twelfth_graders.groupby(['school_name'])
twelfth_grader_math_scores= pd.DataFrame(twelfth_grader_math_scores['math_score'].mean())
twelfth_grader_math_scores.columns = ['12th']

# Display the result
math_scores_by_grade = pd.concat(
     [ninth_grade_math_scores,
     tenth_grader_math_scores,
     eleventh_grader_math_scores,
     twelfth_grader_math_scores
], axis = 1)     
math_scores_by_grade


Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


In [None]:
print(("reading scores by grade").title())

In [185]:
#Create a DataFrame that lists the average reading score for students of each grade level (9th, 10th, 11th, 12th) at each school.
ninth_graders = students_data_df[(students_data_df["grade"] == "9th")]
ninth_grade_reading_scores = ninth_graders.groupby(['school_name'])
ninth_grade_reading_scores= pd.DataFrame(ninth_grade_reading_scores['reading_score'].mean())
ninth_grade_reading_scores.columns = ['9th']

tenth_graders = students_data_df[(students_data_df["grade"] == "10th")]
tenth_grader_reading_scores = tenth_graders.groupby(['school_name'])
tenth_grader_reading_scores= pd.DataFrame(tenth_grader_reading_scores['reading_score'].mean())
tenth_grader_reading_scores.columns = ['10th']


eleventh_graders = students_data_df[(students_data_df["grade"] == "11th")]
eleventh_grader_reading_scores = eleventh_graders.groupby(['school_name'])
eleventh_grader_reading_scores= pd.DataFrame(eleventh_grader_reading_scores['reading_score'].mean())
eleventh_grader_reading_scores.columns = ['11th']


twelfth_graders = students_data_df[(students_data_df["grade"] == "12th")]
twelfth_grader_reading_scores = twelfth_graders.groupby(['school_name'])
twelfth_grader_reading_scores= pd.DataFrame(twelfth_grader_reading_scores['reading_score'].mean())
twelfth_grader_reading_scores.columns = ['12th']

# Display the result
reading_scores_by_grade = pd.concat(
     [ninth_grade_reading_scores,
     tenth_grader_reading_scores,
     eleventh_grader_reading_scores,
     twelfth_grader_reading_scores
], axis = 1)     
reading_scores_by_grade


Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


In [141]:
print('Scores By School Spending')

Scores By School Spending


In [142]:
#Creates a table that breaks down school performance based on average spending ranges (per student)
spending_bins = [0, 585, 630, 645, 680]
spending_range = ["<$585", "$585-630", "$630-645", "$645-680"]

In [143]:
school_spending_df = per_school_summary_df.copy()

In [144]:
 # Slice the data and place it into bins
school_spending_df["Spending Range(Per Student)"] = pd.cut(school_spending_df["Budget Per Student"], 
                                              spending_bins, labels=spending_range, 
                                              include_lowest=True)
school_spending_df

Unnamed: 0,School Type,Total Students,Total School Budget,Budget Per Student,Average Math Score,Average Reading Score,Math Score Percentage,Reading Score Percentage,Overall Percentage,Spending Range(Per Student)
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,54.642283,$585-630
Cabrera High School,District,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,91.334769,<$585
Figueroa High School,Charter,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,53.204476,$630-645
Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,54.289887,$630-645
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455,$585-630
Hernandez High School,Charter,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,53.527508,$645-680
Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,89.227166,<$585
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,53.513884,$645-680
Johnson High School,Charter,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,53.539172,$645-680
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541,$585-630


In [145]:
# Performs calculation to find out average for math , reading , percentage passing math , percentage passing reading and overall by using mean function.
spending_math_scores = pd.DataFrame(school_spending_df.groupby(["Spending Range(Per Student)"])["Average Math Score"].mean())
spending_reading_scores =  pd.DataFrame(school_spending_df.groupby(["Spending Range(Per Student)"])["Average Reading Score"].mean())
spending_passing_math =  pd.DataFrame(school_spending_df.groupby(["Spending Range(Per Student)"])["Reading Score Percentage"].mean())
spending_passing_reading =  pd.DataFrame(school_spending_df.groupby(["Spending Range(Per Student)"])["Math Score Percentage"].mean())
overall_passing_spending =  pd.DataFrame(school_spending_df.groupby(["Spending Range(Per Student)"])["Overall Percentage"].mean())

In [155]:
# Creates a dataframe to hold all the above calculation
spending_summary= pd.DataFrame(pd.concat(
                           [spending_math_scores,
                            spending_reading_scores,
                            spending_passing_math,
                            spending_passing_reading,
                            overall_passing_spending
                                
                                
], axis = 1))
spending_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,Reading Score Percentage,Math Score Percentage,Overall Percentage
Spending Range(Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,83.455399,83.933814,96.610877,93.460096,90.369459
$585-630,81.899826,83.155286,92.718205,87.133538,81.418596
$630-645,78.518855,81.624473,84.391793,73.484209,62.857656
$645-680,76.99721,81.027843,81.133951,66.164813,53.526855


In [147]:
size_bins = [0, 1000, 2000, 5000]
labels = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]
per_school_summary_df["School Size"] = pd.cut(per_school_summary_df["Total Students"], 
                                              size_bins, labels=labels, 
                                              include_lowest=True)
per_school_summary_df

Unnamed: 0,School Type,Total Students,Total School Budget,Budget Per Student,Average Math Score,Average Reading Score,Math Score Percentage,Reading Score Percentage,Overall Percentage,School Size
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,54.642283,Large (2000-5000)
Cabrera High School,District,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,91.334769,Medium (1000-2000)
Figueroa High School,Charter,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,53.204476,Large (2000-5000)
Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,54.289887,Large (2000-5000)
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455,Medium (1000-2000)
Hernandez High School,Charter,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,53.527508,Large (2000-5000)
Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,89.227166,Small (<1000)
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,53.513884,Large (2000-5000)
Johnson High School,Charter,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,53.539172,Large (2000-5000)
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541,Small (<1000)


In [148]:
size_math_scores = per_school_summary_df.groupby(["School Size"])["Average Math Score"].mean()
size_reading_scores = per_school_summary_df.groupby(["School Size"])["Average Reading Score"].mean()
size_passing_math = per_school_summary_df.groupby(["School Size"])["Math Score Percentage"].mean()
size_passing_reading = per_school_summary_df.groupby(["School Size"])["Reading Score Percentage"].mean()
size_overall_passing = per_school_summary_df.groupby(["School Size"])["Overall Percentage"].mean()

In [157]:
size_summary = pd.DataFrame(pd.concat(
                        [size_math_scores,
                         size_reading_scores,
                         size_passing_math,
                         size_passing_reading,
                         size_overall_passing
],axis = 1))
size_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,Math Score Percentage,Reading Score Percentage,Overall Percentage
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.821598,83.929843,93.550225,96.099437,89.883853
Medium (1000-2000),83.374684,83.864438,93.599695,96.79068,90.621535
Large (2000-5000),77.746417,81.344493,69.963361,82.766634,58.286003


In [150]:
average_math_score_by_type = per_school_summary_df.groupby(["School Type"])["Average Math Score"].mean()
average_reading_score_by_type = per_school_summary_df.groupby(["School Type"])["Average Reading Score"].mean()
average_percent_passing_math_by_type = per_school_summary_df.groupby(["School Type"])["Math Score Percentage"].mean()
average_percent_passing_reading_by_type = per_school_summary_df.groupby(["School Type"])["Reading Score Percentage"].mean()
average_percent_overall_passing_by_type = per_school_summary_df.groupby(["School Type"])["Overall Percentage"].mean()

In [156]:
type_summary = pd.DataFrame(pd.concat(
                                [average_math_score_by_type,
                                 average_reading_score_by_type,
                                 average_percent_passing_math_by_type,
                                 average_percent_passing_reading_by_type,
                                 average_percent_overall_passing_by_type
], axis = 1))
type_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,Math Score Percentage,Reading Score Percentage,Overall Percentage
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,80.324201,82.429369,79.873967,88.624209,71.744987
District,80.556334,82.643266,82.259154,89.898811,75.029073
