In [None]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [None]:
#set school name as the index
school_df = school_data_complete.rename(columns = {"school_name": "School Name"})
school_df = school_df.set_index("School Name")
school_df

In [None]:
#group by school name
school_group_df = school_df.groupby("School Name")
school_group_df.head()

In [None]:
#calculate average math score per school
Avg_math = school_group_df["math_score"].mean()


In [None]:
#calculate average reading score per school
Avg_read = school_group_df["reading_score"].mean()


In [None]:
#Calculate percent passing math
pass_math_df = school_df.loc[school_df["math_score"] >= 70, :]
math_group = pass_math_df.groupby("School Name")
math_percent = math_group["math_score"].count() / math_group["size"].unique() * 100



In [None]:
#calculate percent passing reading
pass_read_df = school_df.loc[school_df["reading_score"] >= 70, :]
read_group = pass_read_df.groupby("School Name")
read_percent = read_group["reading_score"].count() / read_group["size"].unique() * 100


In [None]:
#calculate percent passing overall
overall_pass_df = school_df.loc[((school_df["math_score"] >= 70) & (school_df["reading_score"] >= 70)) ,:]
overall_group = overall_pass_df.groupby("School Name")
overall_percent = overall_group["student_name"].count() / overall_group["size"].unique() * 100


In [None]:
#create data frame from summary stats
stats_df = pd.DataFrame({"Average Math Score": Avg_math, "Average Reading Score": Avg_read,"% Passing Math": math_percent,
                         "% Passing Reading": read_percent, "% Overall Passing": overall_percent})
stats_df

In [None]:
#calculate per student budget
Per_student_budget = school_data["budget"] / school_data["size"]
Per_student_budget

In [None]:
#add per student budget to school_data df
school_data["Per Student Budget"] = Per_student_budget
school_data = school_data.rename(columns = {"school_name": "School Name", "type": "School Type", "size": "Total Students",
                                            "budget": "Total School Budget"})
school_data 

In [None]:
#merge school_data with stats_df and 
merge_df = pd.merge(school_data, stats_df, on = "School Name")
merge_df
merge_df.dtypes

In [None]:
#change index to school name, delete school id, convert columns to floats, check data types
summary_table = merge_df.set_index("School Name")
del summary_table["School ID"]
summary_table.loc[:,"Total School Budget"] = summary_table["Total School Budget"].astype(float)
summary_table.loc[:,"% Passing Math"] = summary_table["% Passing Math"].astype(float)
summary_table.loc[:,"% Passing Reading"] = summary_table["% Passing Reading"].astype(float)
summary_table.loc[:,"% Overall Passing"] = summary_table["% Overall Passing"].astype(float)
summary_table.dtypes

In [None]:
#sort by School Name alphabetized, format money columns, 
summary_table = summary_table.sort_values(["School Name"], ascending=True)
summary_table_df = summary_table.style.format({"Total School Budget": "${:,.2f}",
                           "Per Student Budget": "${:.2f}"})
#shool summary table
summary_table_df

In [None]:
#top performing schools by % overall passing
summary_table = summary_table.sort_values(["% Overall Passing"], ascending = False)
summary_table_df = summary_table.head().style.format({"Total School Budget": "${:,.2f}",
                           "Per Student Budget": "${:.2f}"})
summary_table_df

In [None]:
#bottom performing schools by % overall passing
summary_table = summary_table.sort_values(["% Overall Passing"], ascending = True)
summary_table_df = summary_table.head().style.format({"Total School Budget": "${:,.2f}",
                           "Per Student Budget": "${:.2f}"})
summary_table_df