In [115]:
# Dependencies
import pandas as pd

In [116]:
# Import files with pandas
school_df = pd.read_csv("generated_data/schools_complete.csv")
student_df = pd.read_csv("generated_data/students_complete.csv")

# Output headers for my use
school_df.head(1)

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Miller High School,Charter,2424,1418040


In [117]:
# Output headers for my use
student_df.head(1)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,April Miller,F,9th,Miller High School,99,92


In [118]:
# Make a combined dataframe for later
district_df = pd.merge(student_df, school_df, on="school_name")
district_df.head(1)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,April Miller,F,9th,Miller High School,99,92,0,Charter,2424,1418040


In [119]:
# DISTRICT SUMMARY
#> Create a high level snapshot (in table form) of the district's key metrics, including:
# - Total Schools
school_total = school_df["school_name"].count()

# - Total Students
student_total = student_df["student_name"].count()

# - Total Budget
budget_total = school_df["budget"].sum()

# - Average Math Score
math_average = student_df["math_score"].mean()

# - Average Reading Score
reading_average = student_df["reading_score"].mean()

# - % Passing Math
math_passing = student_df["math_score"][student_df["math_score"] >= 60].count()
math_passing = (math_passing / student_total) * 100

# - % Passing Reading
reading_passing = student_df["reading_score"][student_df["reading_score"] >= 60].count()
reading_passing = (reading_passing / student_total) * 100

# - Overall Passing Rate (Average of the above two)
overall_passing = (math_passing + reading_passing) / 2

# Make that table!
district_summary_table = pd.DataFrame({"Total Schools": [school_total],
                                       "Total Students": ["{:,}".format(student_total)],
                                       "Total Budget": ["${:,}".format(budget_total)],
                                       "Average Math Score": math_average,
                                       "Average Reading Score": reading_average,
                                       "% Passing Math": math_passing,
                                       "% Passing Reading": reading_passing,
                                       "% Overall Passing Rate": overall_passing
                                       })
district_summary_table = district_summary_table[["Total Schools",
                                                "Total Students",
                                                "Total Budget",
                                                "Average Math Score",
                                                "Average Reading Score",
                                                "% Passing Math",
                                                "% Passing Reading",
                                                "% Overall Passing Rate"]]
district_summary_table

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,11,29376,"$18,648,468",82.269846,82.865877,100.0,92.766204,96.383102


In [120]:
# SCHOOL SUMMARY
#> Create an overview table that summarizes key metrics about each school, including:

# - School Name
# Make a nice groupby by school_name
school_group = district_df.groupby("school_name")

# - School Type
school_type = school_group["type"].unique()
school_type = school_type.astype('str').str.replace("[", "")
school_type = school_type.str.replace("]", "")
school_type = school_type.str.replace("'", "")

# - Total Students
school_students = school_group["student_name"].count()

# - Total School Budget
school_budget = school_df.groupby("school_name")
school_budget = school_budget["budget"].sum()

# - Per Student Budget
per_student_budget = school_budget / school_students

# - Average Math Score
school_math_average = school_group["math_score"].mean()

# - Average Reading Score
school_reading_average = school_group["reading_score"].mean()

# - % Passing Math
school_index = district_df.set_index("school_name")
math_pass = school_index["math_score"].loc[school_index.math_score >= 60]
math_pass = pd.DataFrame(math_pass).reset_index().groupby("school_name").count()
math_pass = pd.to_numeric(math_pass["math_score"])
math_pass = ((math_pass / school_students) * 100)

# - % Passing Reading
reading_pass = school_index["reading_score"].loc[school_index.reading_score >= 60]
reading_pass = pd.DataFrame(reading_pass).reset_index().groupby("school_name").count()
reading_pass = pd.to_numeric(reading_pass["reading_score"])
reading_pass = ((reading_pass / school_students) * 100)


# - Overall Passing Rate (Average of the above two)
overall_pass = ((math_pass + reading_pass) / 2)

#math_pass = math_pass.map("{:.1f}%".format)
#reading_pass = reading_pass.map("{:.1f}%".format)
#overall_pass = overall_pass.map("{:.1f}%".format)

# Make this table!
school_summary_table = pd.DataFrame(data={"School Type": school_type,
                                          "Total Students": school_students.map("{:,}".format),
                                          "Total School Budget": school_budget.map("${:,}".format),
                                          "Per Student Budget": per_student_budget.map("${:,}".format),
                                          "Average Math Score": school_math_average,
                                          "Average Reading Score": school_reading_average,
                                          "% Passing Math": math_pass,
                                          "% Passing Reading": reading_pass,
                                          "Overall Passing Rate": overall_pass
                                         })
school_summary_table = school_summary_table[["School Type",
                                            "Total Students",
                                            "Total School Budget",
                                            "Per Student Budget",
                                            "Average Math Score",
                                            "Average Reading Score",
                                            "% Passing Math",
                                            "% Passing Reading",
                                            "Overall Passing Rate"]]
school_summary_table

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Campbell High School,Charter,271,"$157,993",$583.0,83.594096,93.771218,100.0,100.0,100.0
Galloway High School,Charter,2471,"$1,445,535",$585.0,83.566168,94.029543,100.0,100.0,100.0
Glass High School,District,3271,"$2,155,589",$659.0,81.293183,76.888108,100.0,88.719046,94.359523
Gomez High School,Charter,2154,"$1,288,092",$598.0,83.83844,94.027391,100.0,100.0,100.0
Gonzalez High School,Charter,1855,"$1,192,765",$643.0,83.442588,94.140701,100.0,100.0,100.0
Hawkins High School,District,4555,"$2,851,430",$626.0,81.72382,77.005928,100.0,88.715697,94.357849
Kelly High School,District,3307,"$2,225,611",$673.0,81.678258,76.829755,100.0,88.751134,94.375567
Macdonald High School,Charter,901,"$550,511",$611.0,83.779134,93.932297,100.0,100.0,100.0
Miller High School,Charter,2424,"$1,418,040",$585.0,83.610149,93.997525,100.0,100.0,100.0
Sherman High School,District,3213,"$2,152,710",$670.0,81.502023,77.290694,100.0,89.449113,94.724556


In [121]:
# TOP PERFORMING SCHOOLS (BY PASSING RATE)
#> Create a table that highlights the top 5 performing schools based on Overall Passing Rate.  Include:
top_performing_school = school_summary_table
top_performing_school = top_performing_school.sort_values("Overall Passing Rate", ascending=False)
top_performing_school.head(5)
# - School Name
# - School Type
# - Total Students
# - Total School Budget
# - Per Student Budget
# - Average Math Score
# - Average Reading Score
# - % Passing Math
# - % Passing Reading
# - Overall Passing Rate (Average of the above two)

# Oh that was easy

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Campbell High School,Charter,271,"$157,993",$583.0,83.594096,93.771218,100.0,100.0,100.0
Galloway High School,Charter,2471,"$1,445,535",$585.0,83.566168,94.029543,100.0,100.0,100.0
Gomez High School,Charter,2154,"$1,288,092",$598.0,83.83844,94.027391,100.0,100.0,100.0
Gonzalez High School,Charter,1855,"$1,192,765",$643.0,83.442588,94.140701,100.0,100.0,100.0
Macdonald High School,Charter,901,"$550,511",$611.0,83.779134,93.932297,100.0,100.0,100.0


In [122]:
# BOTTOM PERFORMING SCHOOLS
#> Same as above but the bottom 5
bottom_performing_school = school_summary_table
bottom_performing_school = bottom_performing_school.sort_values("Overall Passing Rate")
bottom_performing_school.head(5)

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Hawkins High School,District,4555,"$2,851,430",$626.0,81.72382,77.005928,100.0,88.715697,94.357849
Glass High School,District,3271,"$2,155,589",$659.0,81.293183,76.888108,100.0,88.719046,94.359523
Kelly High School,District,3307,"$2,225,611",$673.0,81.678258,76.829755,100.0,88.751134,94.375567
Smith High School,District,4954,"$3,210,192",$648.0,81.53916,77.146952,100.0,89.281389,94.640694
Sherman High School,District,3213,"$2,152,710",$670.0,81.502023,77.290694,100.0,89.449113,94.724556


In [171]:
# MATH SCORES BY GRADE
#> Create a table that lists the average Math Score for students of each grade level (9th, 10th, 11th, 12th) at each school
math_by_grade = school_index.groupby(["school_name","grade"])
math_by_grade = math_by_grade["math_score"].mean()
math_by_grade = pd.DataFrame(math_by_grade)
math_by_grade = math_by_grade.reset_index()
math_by_grade = math_by_grade.pivot(index="school_name", columns="grade")
math_by_grade.columns = math_by_grade.columns.droplevel(0)
math_by_grade = math_by_grade[["9th", "10th", "11th", "12th"]]
math_by_grade

grade,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Campbell High School,83.842857,84.269663,83.94,82.064516
Galloway High School,83.534384,83.55163,83.975425,83.204724
Glass High School,81.867647,81.044652,81.390935,80.82312
Gomez High School,83.676568,83.966817,83.874468,83.828916
Gonzalez High School,83.548263,83.952118,83.20197,82.840206
Hawkins High School,81.667758,81.475371,81.88577,81.938296
Kelly High School,81.789659,81.881168,81.497283,81.45392
Macdonald High School,84.255507,83.813953,83.482906,83.516484
Miller High School,83.823713,83.624661,83.635838,83.304183
Sherman High School,81.496614,81.526882,81.232117,81.735955
