## District Summary

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [2]:
school_data_complete

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130


In [3]:
## Total Number of School
total_schools = school_data_complete['school_name'].drop_duplicates().reset_index(drop=True).count()

## Total Number of Students
total_students = school_data_complete['student_name'].count()

## Total Budget
total_budget = school_data_complete['budget'].drop_duplicates().reset_index(drop=True).sum()

## Average Math Score
average_math_score = school_data_complete['math_score'].mean()

## Average Reading Score
average_reading_score = school_data_complete['reading_score'].mean()

## Percentage of students passing math
per_math_pass = school_data_complete.loc[school_data_complete['math_score']>=70].shape[0]/total_students * 100

## Percentage of students passing reading
per_reading_pass = school_data_complete.loc[school_data_complete['reading_score']>=70].shape[0]/total_students * 100

## Percentage of students passing math and reading
per_math_reading_pass = school_data_complete[
    (school_data_complete['math_score']>=70)&
    (school_data_complete['reading_score']>=70)
].shape[0]/total_students * 100

## District Summary Data Frame
district_summary_df = pd.DataFrame(columns=["Total Schools","Total Students","Total Budget", "Average Math Score","Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"])

district_summary_df['Total Schools'] = [total_schools]
district_summary_df['Total Students'] = [total_students]
district_summary_df['Total Budget'] = [total_budget]
district_summary_df['Average Math Score']=[average_math_score]
district_summary_df['Average Reading Score']=[average_reading_score]
district_summary_df['% Passing Math']= [per_math_pass]
district_summary_df['% Passing Reading']=[per_reading_pass]
district_summary_df['% Overall Passing'] = [per_math_reading_pass]

district_summary_df['Total Students'] = district_summary_df['Total Students'].map('{:,}'.format)

district_summary_df['Total Budget'] = district_summary_df['Total Budget'].map('${:,}'.format)

district_summary_df



Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428",78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [4]:
## To simplify the tasks, the Pandas groupy and aggregate function will be utilised

schools_summary = school_data_complete.groupby(['school_name','type','budget']).agg({
    'student_name': 'count',
    'math_score':['mean',lambda x: ((x>=70).sum()/(x>0).sum()*100)],
    'reading_score':['mean', lambda y: ((y>=70).sum()/(y>0).sum()*100)],
    })
    
## Seperate groupby function then re merge into 
schools_summary




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,student_name,math_score,math_score,reading_score,reading_score
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,<lambda_0>,mean,<lambda_0>
school_name,type,budget,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Bailey High School,District,3124928,4976,77.048432,66.680064,81.033963,81.93328
Cabrera High School,Charter,1081356,1858,83.061895,94.133477,83.97578,97.039828
Figueroa High School,District,1884411,2949,76.711767,65.988471,81.15802,80.739234
Ford High School,District,1763916,2739,77.102592,68.309602,80.746258,79.299014
Griffin High School,Charter,917500,1468,83.351499,93.392371,83.816757,97.138965
Hernandez High School,District,3022020,4635,77.289752,66.752967,80.934412,80.862999
Holden High School,Charter,248087,427,83.803279,92.505855,83.814988,96.252927
Huang High School,District,1910635,2917,76.629414,65.683922,81.182722,81.316421
Johnson High School,District,3094650,4761,77.072464,66.057551,80.966394,81.222432
Pena High School,Charter,585858,962,83.839917,94.594595,84.044699,95.945946
