## District Summary

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [2]:
school_data

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500
5,5,Wilson High School,Charter,2283,1319574
6,6,Cabrera High School,Charter,1858,1081356
7,7,Bailey High School,District,4976,3124928
8,8,Holden High School,Charter,427,248087
9,9,Pena High School,Charter,962,585858


In [3]:
student_data

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84
...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90


In [4]:
school_data_complete

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130


In [5]:
## Total Number of School
total_schools = school_data_complete['school_name'].drop_duplicates().reset_index(drop=True).count()

## Total Number of Students
total_students = school_data_complete['student_name'].count()

## Total Budget
total_budget = school_data_complete['budget'].drop_duplicates().reset_index(drop=True).sum()

## Average Math Score
average_math_score = school_data_complete['math_score'].mean()

## Average Reading Score
average_reading_score = school_data_complete['reading_score'].mean()

## Percentage of students passing math
per_math_pass = school_data_complete.loc[school_data_complete['math_score']>=70].shape[0]/total_students * 100

## Percentage of students passing reading
per_reading_pass = school_data_complete.loc[school_data_complete['reading_score']>=70].shape[0]/total_students * 100

## Percentage of students passing math and reading
per_math_reading_pass = school_data_complete[
    (school_data_complete['math_score']>=70)&
    (school_data_complete['reading_score']>=70)
].shape[0]/total_students * 100

## District Summary Data Frame
district_summary_df = pd.DataFrame(columns=["Total Schools","Total Students","Total Budget", "Average Math Score","Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"])

district_summary_df['Total Schools'] = [total_schools]
district_summary_df['Total Students'] = [total_students]
district_summary_df['Total Budget'] = [total_budget]
district_summary_df['Average Math Score']=[average_math_score]
district_summary_df['Average Reading Score']=[average_reading_score]
district_summary_df['% Passing Math']= [per_math_pass]
district_summary_df['% Passing Reading']=[per_reading_pass]
district_summary_df['% Overall Passing'] = [per_math_reading_pass]

district_summary_df['Total Students'] = district_summary_df['Total Students'].map('{:,}'.format)

district_summary_df['Total Budget'] = district_summary_df['Total Budget'].map('${:,}'.format)

district_summary_df



Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428",78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [30]:
## Groupy function to collate data based on each school in the district
school_names = school_data_complete.set_index('school_name').groupby(['school_name'])

## Referring back to the first data frame that contained information only on the schools. This prevents the collection of duplicate data
school_types = school_data.set_index('school_name')['type']

## Counting the number of students per school based on their names
school_student_count = school_names['student_name'].count()

## Determining the budget for each school from the first school dataframe
school_budget = school_data.set_index('school_name')['budget']

## Calculating the average budget for each student
student_budget = school_budget/school_student_count

## Average maths score for each school
school_math_average = school_names['math_score'].mean()

## Average reading score for each school
school_reading_average = school_names['reading_score'].mean()

## Percentage of students that have passed maths in each school
school_math_pass = ((school_data_complete[
    (school_data_complete['math_score']>=70)
].groupby('school_name')['student_name'].count())/school_student_count)*100

## Percentage of students that have passed reading in each school
school_reading_pass = ((school_data_complete[
    (school_data_complete['reading_score']>=70)
].groupby('school_name')['student_name'].count())/school_student_count)*100

## Percentage of students that have passed both maths and reading in each school
school_overall_pass = school_math_pass = ((school_data_complete[
    (school_data_complete['math_score']>=70)&
    (school_data_complete['reading_score']>=70)
].groupby('school_name')['student_name'].count())/school_student_count)*100

## Creating a Summary Tabale in a new data frame
school_summary = pd.DataFrame({
    "School Type": school_types,
    'Total Students': school_student_count,
    'Total School Budget': school_budget,
    'Per Student Budget': student_budget,
    'Average Math Score': school_math_average,
    'Average Reading Score': school_reading_average,
    '% Passing Math': school_math_pass,
    '% Passing Reading': school_reading_pass,
    '% Overall Passing': school_overall_pass
})

## Formatting the Data Frame
school_summary['Total School Budget'] = school_summary['Total School Budget'].map('${:,}'.format)

school_summary['Per Student Budget'] = school_summary['Per Student Budget'].map('${:.2f}'.format)

school_summary


Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Bailey High School,District,4976,"$3,124,928",$628.00,77.048432,81.033963,54.642283,81.93328,54.642283
Cabrera High School,Charter,1858,"$1,081,356",$582.00,83.061895,83.97578,91.334769,97.039828,91.334769
Figueroa High School,District,2949,"$1,884,411",$639.00,76.711767,81.15802,53.204476,80.739234,53.204476
Ford High School,District,2739,"$1,763,916",$644.00,77.102592,80.746258,54.289887,79.299014,54.289887
Griffin High School,Charter,1468,"$917,500",$625.00,83.351499,83.816757,90.599455,97.138965,90.599455
Hernandez High School,District,4635,"$3,022,020",$652.00,77.289752,80.934412,53.527508,80.862999,53.527508
Holden High School,Charter,427,"$248,087",$581.00,83.803279,83.814988,89.227166,96.252927,89.227166
Huang High School,District,2917,"$1,910,635",$655.00,76.629414,81.182722,53.513884,81.316421,53.513884
Johnson High School,District,4761,"$3,094,650",$650.00,77.072464,80.966394,53.539172,81.222432,53.539172
Pena High School,Charter,962,"$585,858",$609.00,83.839917,84.044699,90.540541,95.945946,90.540541
