# PyCitySchools

In [None]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

### District Summary

In [None]:
total_schools = len(school_data_complete["school_name"].unique())

total_students = len(school_data_complete["Student ID"])

In [None]:
total_budget = school_data["budget"].sum()

In [None]:
avg_math = school_data_complete["math_score"].mean()

In [None]:
avg_reading = school_data_complete["reading_score"].mean()

In [None]:
student_scores = school_data_complete[["math_score","reading_score"]]

math_pass_count = len(student_scores.loc[(student_scores["math_score"] >= 70), :])
reading_pass_count = len(student_scores.loc[(student_scores["reading_score"] >= 70), :])
both_pass_count = len(student_scores.loc[(student_scores["reading_score"] >= 70) & (student_scores["math_score"] >= 70), :])

math_pass = (math_pass_count / total_students) * 100
reading_pass = (reading_pass_count / total_students) * 100
both_pass = (both_pass_count / total_students) * 100

In [None]:
district_dict = [{"Total Schools": total_schools, "Total Students": total_students,
               "Total Budget": total_budget, "Average Math Score": avg_math, 
               "Average Reading Score": avg_reading, "% Passing Math": math_pass, 
               "% Passing Reading": reading_pass, "% Overall Passing": both_pass}]
district_summary_df = pd.DataFrame(district_dict)
district_summary_df

### School Summary

In [None]:
school_calc_df = school_data_complete.groupby(["school_name"])
school_summary_df = school_data.iloc[:,1:5]
school_summary_df = school_summary_df.rename(columns={"type":"School Type","size":"Total Students","budget":"Total School Budget"})

In [None]:
school_summary_df["Per Student Budget"] = school_summary_df["Total School Budget"] / school_summary_df["Total Students"]
school_summary_df["Per Student Budget"] = school_summary_df["Per Student Budget"].astype(int)
school_summary_df = school_summary_df.sort_values("school_name")

In [None]:
avg_scores = school_calc_df[["math_score","reading_score"]].mean()

avg_scores = avg_scores.rename(columns={"math_score":"Average Math Score","reading_score":"Average Reading Score"})

school_summary_df = pd.merge(school_summary_df, avg_scores, how="left", on="school_name")

In [None]:
pass_avgs = school_calc_df[["math_score","reading_score"]].apply(lambda x: ((x>=70).sum()) / x.count() * 100)
pass_avgs = pass_avgs.rename(columns={"math_score":"% Passing Math","reading_score":"% Passing Reading"})
school_summary_df = pd.merge(school_summary_df, pass_avgs, how="left", on="school_name")

In [None]:
both_pass_count = school_data_complete.loc[(school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70), ["school_name","math_score","reading_score"]]
both_pass_count = both_pass_count.groupby(["school_name"])
both_pass_perc = pd.DataFrame(both_pass_count["math_score"].count())


both_pass_perc = both_pass_perc.rename(columns={"math_score":"# Overall Passing"})
school_summary_df = pd.merge(school_summary_df, both_pass_perc, how="left", on="school_name")

In [None]:
school_summary_df["% Overall Passing"] = school_summary_df["# Overall Passing"] / school_summary_df["Total Students"] * 100

In [None]:
school_summary_df = school_summary_df.rename(columns={"school_name":"School Name"})
school_summary_df = school_summary_df.drop("# Overall Passing", axis=1)
school_summary_df = school_summary_df.set_index("School Name",drop=True)
school_summary_df

### Top Performing Schools

In [None]:
top_performing_df = school_summary_df.sort_values("% Overall Passing", ascending = False)
top_performing_df.head(5)

### Bottom Performing Schools

In [None]:
bottom_performing_df = school_summary_df.sort_values("% Overall Passing")
bottom_performing_df.head(5)

### Math Scores by Grade

In [None]:
scores_9th_df = school_data_complete.loc[school_data_complete["grade"] == "9th", :]
scores_10th_df = school_data_complete.loc[school_data_complete["grade"] == "10th", :]
scores_11th_df = school_data_complete.loc[school_data_complete["grade"] == "11th", :]
scores_12th_df = school_data_complete.loc[school_data_complete["grade"] == "12th", :]

In [None]:
math_9th = scores_9th_df.groupby(["school_name"])["math_score"].mean()
math_10th = scores_10th_df.groupby(["school_name"])["math_score"].mean()
math_11th = scores_11th_df.groupby(["school_name"])["math_score"].mean()
math_12th = scores_12th_df.groupby(["school_name"])["math_score"].mean()

In [None]:
math_by_grades_df = pd.DataFrame({"9th": math_9th, "10th": math_10th, "11th": math_11th, "12th": math_12th})
math_by_grades_df.index.names = ["School Name"]
math_by_grades_df

### Reading Scores by Grade

In [None]:
reading_9th = scores_9th_df.groupby(["school_name"])["reading_score"].mean()
reading_10th = scores_10th_df.groupby(["school_name"])["reading_score"].mean()
reading_11th = scores_11th_df.groupby(["school_name"])["reading_score"].mean()
reading_12th = scores_12th_df.groupby(["school_name"])["reading_score"].mean()

In [None]:
reading_by_grades_df = pd.DataFrame({"9th": reading_9th, "10th": reading_10th, "11th": reading_11th, "12th": reading_12th})
reading_by_grades_df.index.names = ["School Name"]
reading_by_grades_df

### Scores by School Spending

### Scores by School Size

### Scores by School Type

In [None]:
charter_df = school_data_complete.loc[school_data_complete["type"] == "Charter", :]
district_df = school_data_complete.loc[school_data_complete["type"] == "District", :]

In [None]:
charter_math_avg = charter_df["math_score"].mean()
charter_reading_avg = charter_df["reading_score"].mean()
district_math_avg = district_df["math_score"].mean()
district_reading_avg = district_df["reading_score"].mean()

In [None]:
charter_math_pass = len(charter_df.loc[charter_df["math_score"] >= 70, :]) / len(charter_df) * 100

district_math_pass = len(district_df.loc[district_df["math_score"] >= 70, :]) / len(district_df) * 100

In [None]:
charter_reading_pass = len(charter_df.loc[charter_df["reading_score"] >= 70, :]) / len(charter_df) * 100

district_reading_pass = len(district_df.loc[district_df["reading_score"] >= 70, :]) / len(district_df) * 100

In [None]:
charter_both_pass = len(charter_df.loc[(charter_df["math_score"] >= 70) & (charter_df["reading_score"] >= 70), :]) / len(charter_df) * 100

district_both_pass = len(district_df.loc[(district_df["math_score"] >= 70) & (district_df["reading_score"] >= 70), :]) / len(district_df) * 100

In [None]:
school_type_df = pd.DataFrame({"School Type": ["Charter", "District"], 
                        "Average Math Score": [charter_math_avg, district_math_avg], 
                        "Average Reading Score": [charter_reading_avg, district_reading_avg], 
                        "% Passing Math": [charter_math_pass, district_math_pass], 
                        "% Passing Reading": [charter_reading_pass, district_reading_pass], 
                        "% Overall Passing": [charter_both_pass, district_both_pass]
                       })

school_type_df = school_type_df.set_index("School Type",drop=True)
school_type_df