In [1]:
# Dependencies
import pandas as pd

In [2]:
# Import files with pandas
school_df = pd.read_csv("generated_data/schools_complete.csv")
student_df = pd.read_csv("generated_data/students_complete.csv")

# Output headers for my use
school_df.head(1)

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Miller High School,Charter,2424,1418040


In [3]:
# Output headers for my use
student_df.head(1)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,April Miller,F,9th,Miller High School,99,92


In [4]:
# Make a combined dataframe for later
district_df = pd.merge(student_df, school_df, on="school_name")
district_df.head(1)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,April Miller,F,9th,Miller High School,99,92,0,Charter,2424,1418040


In [5]:
# DISTRICT SUMMARY
#> Create a high level snapshot (in table form) of the district's key metrics, including:
# - Total Schools
school_total = school_df["school_name"].count()

# - Total Students
student_total = student_df["student_name"].count()

# - Total Budget
budget_total = school_df["budget"].sum()

# - Average Math Score
math_average = student_df["math_score"].mean()

# - Average Reading Score
reading_average = student_df["reading_score"].mean()

# - % Passing Math
math_passing = student_df["math_score"][student_df["math_score"] >= 60].count()
math_passing = (math_passing / student_total) * 100

# - % Passing Reading
reading_passing = student_df["reading_score"][student_df["reading_score"] >= 60].count()
reading_passing = (reading_passing / student_total) * 100

# - Overall Passing Rate (Average of the above two)
overall_passing = (math_passing + reading_passing) / 2

# Make that table!
district_summary_table = pd.DataFrame({"Total Schools": [school_total],
                                       "Total Students": ["{:,}".format(student_total)],
                                       "Total Budget": ["${:,}".format(budget_total)],
                                       "Average Math Score": ["{:.2f}%".format(math_average)],
                                       "Average Reading Score": ["{:.2f}%".format(reading_average)],
                                       "% Passing Math": ["{:.2f}%".format(math_passing)],
                                       "% Passing Reading": ["{:.2f}%".format(reading_passing)],
                                       "% Overall Passing Rate": ["{:.2f}%".format(overall_passing)]
                                       })
district_summary_table = district_summary_table[["Total Schools",
                                                "Total Students",
                                                "Total Budget",
                                                "Average Math Score",
                                                "Average Reading Score",
                                                "% Passing Math",
                                                "% Passing Reading",
                                                "% Overall Passing Rate"]]
district_summary_table

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,11,29376,"$18,648,468",82.27%,82.87%,100.00%,92.77%,96.38%


In [87]:
# SCHOOL SUMMARY
#> Create an overview table that summarizes key metrics about each school, including:

# - School Name
# Make a nice groupby by school_name
school_group = district_df.groupby("school_name")

# - School Type
school_type = school_group["type"].unique()
school_type = school_type.astype('str').str.replace("[", "")
school_type = school_type.str.replace("]", "")
school_type = school_type.str.replace("'", "")

# - Total Students
school_students = school_group["student_name"].count()

# - Total School Budget
school_budget = school_df.groupby("school_name")
school_budget = school_budget["budget"].sum()

# - Per Student Budget
per_student_budget = school_budget / school_students

# - Average Math Score
school_math_average = school_group["math_score"].mean()

# - Average Reading Score
school_reading_average = school_group["reading_score"].mean()

# - % Passing Math
school_index = district_df.set_index("school_name")
math_pass = school_index["math_score"].loc[school_index.math_score >= 60]
math_pass = pd.DataFrame(math_pass).reset_index().groupby("school_name").count()
math_pass = pd.to_numeric(math_pass["math_score"])
math_pass = ((math_pass / school_students) * 100)

# - % Passing Reading
reading_pass = school_index["reading_score"].loc[school_index.reading_score >= 60]
reading_pass = pd.DataFrame(reading_pass).reset_index().groupby("school_name").count()
reading_pass = pd.to_numeric(reading_pass["reading_score"])
reading_pass = ((reading_pass / school_students) * 100)


# - Overall Passing Rate (Average of the above two)
overall_pass = ((math_pass + reading_pass) / 2)

math_pass = math_pass.map("{:.1f}%".format)
reading_pass = reading_pass.map("{:.1f}%".format)
overall_pass = overall_pass.map("{:.1f}%".format)

# Make this table!
school_summary_table = pd.DataFrame(data={"School Type": school_type,
                                          "Total Students": school_students.map("{:,}".format),
                                          "Total School Budget": school_budget.map("${:,}".format),
                                          "Per Student Budget": per_student_budget.map("${:,}".format),
                                          "Average Math Score": school_math_average.map("{:.2f}%".format),
                                          "Average Reading Score": school_reading_average.map("{:.2f}%".format),
                                          "% Passing Math": math_pass,
                                          "% Passing Reading": reading_pass,
                                          "Overall Passing Rate": overall_pass
                                         })
#school_summary_table = school_summary_table.reset_index()
#school_summary_table = school_summary_table.rename(columns={"school_name": "School Name"})
school_summary_table = school_summary_table[["School Type",
                                            "Total Students",
                                            "Total School Budget",
                                            "Per Student Budget",
                                            "Average Math Score",
                                            "Average Reading Score",
                                            "% Passing Math",
                                            "% Passing Reading",
                                            "Overall Passing Rate"]]
school_summary_table
# I tried a lot of things that didn't work...
#school_students

#math_passing = student_df["math_score"][student_df["math_score"] >= 60].count()
#math_passing = (math_passing / student_total) * 100


#I need total number of students per school - school_students
#I also need how many students passed math
#school_group["math_score"].count()
#district_df["math_score"].loc[district_df["school_name"] == "Campbell High School"]
#school_math_passing = school_group[school_group["math_score"] >= 60].count()
#school_group["math_score"].unique()
#school_math_passing = pd.DataFrame({[key for key in school_group["math_score"].unique()]: [len(key) for key in school_group["math_score"].unique()]})

#school_math_passing
#school_math_scores = school_group["math_score"].unique()
#school_math_pass = {}
#for key in school_math_scores.keys():
#    school_math_pass[key] = []
#    for index in school_math_scores[key]:
#        print(key)
#        print(index)
#        if index >= 60:
#            school_math_pass[key] += [index]
#    print(key)
#    print(len(school_math_pass[key]) / school_group["math_score"].count())
#for i in school_math_scores:
#    print(len(i))

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Campbell High School,Charter,271,"$157,993",$583.0,83.59%,93.77%,100.0%,100.0%,100.0%
Galloway High School,Charter,2471,"$1,445,535",$585.0,83.57%,94.03%,100.0%,100.0%,100.0%
Glass High School,District,3271,"$2,155,589",$659.0,81.29%,76.89%,100.0%,88.7%,94.4%
Gomez High School,Charter,2154,"$1,288,092",$598.0,83.84%,94.03%,100.0%,100.0%,100.0%
Gonzalez High School,Charter,1855,"$1,192,765",$643.0,83.44%,94.14%,100.0%,100.0%,100.0%
Hawkins High School,District,4555,"$2,851,430",$626.0,81.72%,77.01%,100.0%,88.7%,94.4%
Kelly High School,District,3307,"$2,225,611",$673.0,81.68%,76.83%,100.0%,88.8%,94.4%
Macdonald High School,Charter,901,"$550,511",$611.0,83.78%,93.93%,100.0%,100.0%,100.0%
Miller High School,Charter,2424,"$1,418,040",$585.0,83.61%,94.00%,100.0%,100.0%,100.0%
Sherman High School,District,3213,"$2,152,710",$670.0,81.50%,77.29%,100.0%,89.4%,94.7%
