In [54]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [55]:
#District Summary Calculations

# Calculate the total number of schools
total_schools = school_data_complete["school_name"].nunique()

# Calculate the total number of students
total_students = len(school_data_complete["Student ID"])

# Calculate the total budget  
schools = school_data_complete.groupby(["school_name"])
school_budget = schools['budget'].mean()
school_budget_df = pd.DataFrame({"School Budget": school_budget})
tot_school_budget = school_budget_df["School Budget"].sum()

# Calculate the average math score
tot_math_avg = round(school_data_complete["math_score"].sum() \
                     / total_students,3)

# Calculate the average reading score
tot_read_avg = round(school_data_complete["reading_score"].sum() \
                     / total_students,3)

# Calculate the percentage of students with a passing math score (70 or greater)
passing_math_df = school_data_complete.loc[school_data_complete["math_score"] >= 70]
tot_passing_math = len(passing_math_df)
percent_passing_math = tot_passing_math / total_students *100

# Calculate the percentage of students with a passing reading score (70 or greater)
passing_read_df = school_data_complete.loc[school_data_complete["reading_score"] >= 70]
tot_passing_read = len(passing_read_df)
percent_passing_read = tot_passing_read / total_students *100

# Calculate the percentage of students who passed math and reading (% Overall Passing)
passing_both_df = school_data_complete.loc[((school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70))]
tot_passing_both = len(passing_both_df)
percent_passing_both = len(passing_both_df) / total_students *100

# Create a dataframe to hold the above results
summary_df = pd.DataFrame({"Total Schools": [total_schools], "Total Students": [total_students],  \
                           "Total Budget": [tot_school_budget], "Average Math Score": [tot_math_avg],  \
                           "Average Reading Score": [tot_read_avg], "% Passing Math":percent_passing_math,  \
                           "% Passing Reading":percent_passing_read,"% Overall Passing":percent_passing_both,})


# formatting
summary_df['Total Students'] = summary_df['Total Students'].map("{:,}".format)
summary_df['Total Budget']=summary_df['Total Budget'].astype(float).map("${:,.2f}".format)
summary_df["% Passing Math"] = summary_df["% Passing Math"].map("{:.3f}%".format) 
summary_df["% Passing Reading"] = summary_df["% Passing Reading"].map("{:.3f}%".format) 
summary_df["% Overall Passing"] = summary_df["% Overall Passing"].map("{:.3f}%".format) 
summary_df


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985,81.878,74.981%,85.805%,65.172%


In [85]:
#group by & calcs for by by school table
schools_type = school_data_complete.groupby(["school_name","type"])
total_students_per = schools_type["Student ID"].count()
total_budget_per = schools_type["budget"].mean()
per_student_budget = total_budget / total_students_per
avg_math_score = schools_type["math_score"].mean()
avg_read_score = schools_type["reading_score"].mean()

#Create new table with schools
schools_type_df = pd.DataFrame({"Total Students": total_students_per,"Total School Budget": total_budget_per,  \
                                "Per Student Budget":per_student_budget,"Average Math Score": avg_math_score, \
                                "Average Reading Score": avg_read_score})

schools_type_df = schools_type_df.reset_index()
schools_type_df = schools_type_df.set_index(["school_name"])
schools_type_df = schools_type_df.rename(columns={"type":"School Type"})

#formatting
schools_type_df['Total Students'] =schools_type_df['Total Students'].map("{:,}".format)
schools_type_df['Total School Budget']=schools_type_df['Total School Budget'].astype(float).map("${:,.2f}".format)
schools_type_df['Per Student Budget']=schools_type_df['Per Student Budget'].astype(float).map("${:,.2f}".format)

In [86]:
# Calculate the percentage of students with a passing math score (70 or greater) per school
passing_math_per_school = passing_math_df.groupby(["school_name"])
number_passing_math = passing_math_per_school["Student ID"].count()

passing_math_per_df = pd.DataFrame({"Number Passing Math":number_passing_math})

# Calculate the percentage of students with a passing reading score (70 or greater) per school
passing_read_per_df = passing_read_df.groupby(["school_name"])
number_passing_read = passing_read_per_df["Student ID"].count()

passing_read_per_df = pd.DataFrame({"Number Passing Reading":number_passing_read})

In [87]:
#merge passing tables with by school table
schools_type_math_df = pd.merge(schools_type_df,passing_math_per_df,on="school_name")
schools_type_math_read_df = pd.merge(schools_type_math_df,passing_read_per_df,on="school_name")

In [88]:
#Calc % Passing Math & add to to table
percent_passing_math = schools_type_math_read_df["Number Passing Math"].values \
                                                  / total_students_per *100

schools_type_math_read_df["% Passing Math"] = np.array(percent_passing_math)

#Calc % Passing Read & add to to table
percent_passing_read = schools_type_math_read_df["Number Passing Reading"].values \
                                                  / total_students_per *100

schools_type_math_read_df["% Passing Reading"] = np.array(percent_passing_read)

#formatting
schools_type_math_read_df["% Passing Math"] = schools_type_math_read_df["% Passing Math"].map("{:.3f}%".format) 
schools_type_math_read_df["% Passing Reading"] = schools_type_math_read_df["% Passing Reading"].map("{:.3f}%".format) 

#drop calc only columns
schools_sum_df = schools_type_math_read_df.drop(columns='Number Passing Math')
schools_sum_df = schools_sum_df.drop(columns='Number Passing Reading')

schools_sum_df

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.680%,81.933%
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133%,97.040%
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988%,80.739%
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.310%,79.299%
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392%,97.139%
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.753%,80.863%
Holden High School,Charter,427,"$248,087.00",$581.00,83.803279,83.814988,92.506%,96.253%
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.684%,81.316%
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.058%,81.222%
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.595%,95.946%
