# PyCity Schools Analysis


In [175]:
#Dependencies
import pandas as pd
import numpy as np
import os

# define file path
schools_file = os.path.join('Resources','schools_complete.csv')
students_file = os.path.join('Resources', 'students_complete.csv')

# read schools file
schools_df = pd.read_csv(schools_file)

#read student file
students_df = pd.read_csv(students_file)

In [176]:
#create array of unique school names
unique_school_names = schools_df['name'].unique()
#gives the length of unique school names to give us how many schools
school_count = len(unique_school_names)

#district student count
dist_student_count = schools_df['size'].sum()

#student count from student file (to verify with district student count)
total_student_rec = students_df['name'].count()

#total budget
total_budget = schools_df['budget'].sum()

#calculations for number and % passing reading
num_passing_reading = students_df.loc[students_df['reading_score'] >= 70]['reading_score'].count()
perc_pass_reading = num_passing_reading/total_student_rec
perc_pass_reading

#calculations for number and % passing math
num_passing_math = students_df.loc[students_df['math_score'] >= 70]['math_score'].count()
perc_pass_math = num_passing_math/total_student_rec
perc_pass_math

#average math score calculation
avg_math_score = students_df['math_score'].mean()
avg_math_score 

#average reading score calculation
avg_reading_score = students_df['reading_score'].mean()
avg_reading_score

#Overall Passing Rate Calculations
overall_pass = np.mean([perc_pass_reading, perc_pass_math])

# district dataframe from dictionary

district_summary = pd.DataFrame({
    
    "Total Schools": [school_count],
    "Total Students": [dist_student_count],
    "Total Budget": [total_budget],
    "Average Reading Score": [avg_reading_score],
    "Average Math Score": [avg_math_score],
    "% Passing Reading":[perc_pass_reading],
    "% Passing Math": [perc_pass_math],
    "Overall Passing Rate": [overall_pass]

})

#store as different df to change order
dist_sum = district_summary[["Total Schools", "Total Students", "Total Budget", "Average Reading Score", "Average Math Score", '% Passing Reading', '% Passing Math', 'Overall Passing Rate']]

#format cells
dist_sum.style.format({"Total Budget": "${:,.2f}", "Average Reading Score": "{:.1f}", "Average Math Score": "{:.1f}", "% Passing Math": "{:.1%}", "% Passing Reading": "{:.1%}", "Overall Passing Rate": "{:.1%}"})

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Reading Score,Average Math Score,% Passing Reading,% Passing Math,Overall Passing Rate
0,15,39170,"$24,649,428.00",81.9,79.0,85.8%,75.0%,80.4%


In [177]:
#groups by school
by_school = students_df.groupby(['school'])

#counts students per school and creates DataFrame
# students_per_school = pd.DataFrame([by_school['Student ID'].count(), by_school)

#creates df of school name and budget
district_info = schools_df[['name','type','size','budget']]
                                    
#adds budget per student
district_info['Per Student Budget'] = schools_df['budget']/schools_df['size']

district_info.rename(columns = {'name': 'school'}, inplace = True)

#creates dataframe of avg math and reading score by school
avg_math_by_sch = by_school['math_score'].mean().round(1).reset_index()
avg_read_by_sch = by_school['reading_score'].mean().round(1).reset_index()
avg_scores = pd.merge(avg_math_by_sch, avg_read_by_sch, on=('school'))
avg_scores.rename(columns = {'math_score': 'Average Math Score', 'reading_score': 'Average Reading Score'}, inplace=True)

#school level passing scores counts
pass_math = students_df.loc[students_df['math_score'] >=70][['school', 'math_score']]
pass_math_by_sch = pass_math.groupby('school').count().reset_index()
pass_math_by_sch.rename(columns = {"math_score": "# passing math"}, inplace=True)

pass_read = students_df.loc[students_df['reading_score'] >=70][['school', 'reading_score']]
pass_read_by_sch = pass_read.groupby('school').count().reset_index()
pass_read_by_sch.rename(columns = {"reading_score": "# passing reading"}, inplace=True)

pass_count = pd.merge(pass_math_by_sch, pass_read_by_sch, on=('school'))

pass_read_by_sch

#merge all so far
sch_summary = pd.merge(district_info, avg_scores, on=('school'))
sch_summary = pd.merge(sch_summary, pass_count, on=('school'))


sch_summary['% Passing Math'] = sch_summary['# passing math']/sch_summary['size']
sch_summary['% Passing Reading'] = sch_summary['# passing reading']/sch_summary['size']
del sch_summary['# passing math']
del sch_summary['# passing reading']
sch_summary['Overall Passing Rate'] = (sch_summary['% Passing Math']+sch_summary['% Passing Reading'])/2
sch_summary.rename(columns = {'school': "School Name", "type": "School Type", "size":"Total Students", "budget": "Total School Budget"}, inplace = True)
sch_summary.set_index('School Name', inplace=True)
sch_summary.style.format({'Total Students': '{:,}', "Total School Budget": "${:,}", "Per Student Budget": "${:.0f}", "% Passing Math": "{:.1%}", "% Passing Reading": "{:.1%}", "Overall Passing Rate": "{:.1%}"})


Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Huang High School,District,2917,"$1,910,635",$655,76.6,81.2,65.7%,81.3%,73.5%
Figueroa High School,District,2949,"$1,884,411",$639,76.7,81.2,66.0%,80.7%,73.4%
Shelton High School,Charter,1761,"$1,056,600",$600,83.4,83.7,93.9%,95.9%,94.9%
Hernandez High School,District,4635,"$3,022,020",$652,77.3,80.9,66.8%,80.9%,73.8%
Griffin High School,Charter,1468,"$917,500",$625,83.4,83.8,93.4%,97.1%,95.3%
Wilson High School,Charter,2283,"$1,319,574",$578,83.3,84.0,93.9%,96.5%,95.2%
Cabrera High School,Charter,1858,"$1,081,356",$582,83.1,84.0,94.1%,97.0%,95.6%
Bailey High School,District,4976,"$3,124,928",$628,77.0,81.0,66.7%,81.9%,74.3%
Holden High School,Charter,427,"$248,087",$581,83.8,83.8,92.5%,96.3%,94.4%
Pena High School,Charter,962,"$585,858",$609,83.8,84.0,94.6%,95.9%,95.3%


In [178]:
top_5 = sch_summary.sort_values("Overall Passing Rate", ascending = False)
top_5.head().style.format({'Total Students': '{:,}', "Total School Budget": "${:,}", "Per Student Budget": "${:.0f}", "% Passing Math": "{:.1%}", "% Passing Reading": "{:.1%}", "Overall Passing Rate": "{:.1%}"})

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,"$1,081,356",$582,83.1,84.0,94.1%,97.0%,95.6%
Thomas High School,Charter,1635,"$1,043,130",$638,83.4,83.8,93.3%,97.3%,95.3%
Pena High School,Charter,962,"$585,858",$609,83.8,84.0,94.6%,95.9%,95.3%
Griffin High School,Charter,1468,"$917,500",$625,83.4,83.8,93.4%,97.1%,95.3%
Wilson High School,Charter,2283,"$1,319,574",$578,83.3,84.0,93.9%,96.5%,95.2%


In [179]:
bottom_5 = top_5.tail()
bottom_5.style.format({'Total Students': '{:,}', "Total School Budget": "${:,}", "Per Student Budget": "${:.0f}", "% Passing Math": "{:.1%}", "% Passing Reading": "{:.1%}", "Overall Passing Rate": "{:.1%}"})

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Ford High School,District,2739,"$1,763,916",$644,77.1,80.7,68.3%,79.3%,73.8%
Johnson High School,District,4761,"$3,094,650",$650,77.1,81.0,66.1%,81.2%,73.6%
Huang High School,District,2917,"$1,910,635",$655,76.6,81.2,65.7%,81.3%,73.5%
Figueroa High School,District,2949,"$1,884,411",$639,76.7,81.2,66.0%,80.7%,73.4%
Rodriguez High School,District,3999,"$2,547,363",$637,76.8,80.7,66.4%,80.2%,73.3%


In [180]:
ninth_math = students_df.loc[students_df['grade'] == '9th'].groupby('school')["math_score"].mean().reset_index()
ninth_math.rename(columns = {'math_score': "9th"}, inplace=True)
tenth_math = students_df.loc[students_df['grade'] == '10th'].groupby('school')["math_score"].mean().reset_index()
tenth_math.rename(columns = {'math_score': "10th"}, inplace=True)
eleventh_math = students_df.loc[students_df['grade'] == '11th'].groupby('school')["math_score"].mean().reset_index()
eleventh_math.rename(columns = {'math_score': "11th"}, inplace=True)
twelfth_math = students_df.loc[students_df['grade'] == '12th'].groupby('school')["math_score"].mean().reset_index()
twelfth_math.rename(columns = {'math_score': "12th"}, inplace=True)

math_scores = pd.merge(ninth_math, tenth_math, on = 'school').merge(eleventh_math, on = 'school').merge(twelfth_math, on = 'school')
math_scores.rename(columns = {'school':'School Name'}, inplace = True)
math_scores.set_index('School Name', inplace = True)
math_scores.style.format({'9th': '{:.1f}', "10th": '{:.1f}', "11th": "{:.1f}", "12th": "{:.1f}"})

Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.1,77.0,77.5,76.5
Cabrera High School,83.1,83.2,82.8,83.3
Figueroa High School,76.4,76.5,76.9,77.2
Ford High School,77.4,77.7,76.9,76.2
Griffin High School,82.0,84.2,83.8,83.4
Hernandez High School,77.4,77.3,77.1,77.2
Holden High School,83.8,83.4,85.0,82.9
Huang High School,77.0,75.9,76.4,77.2
Johnson High School,77.2,76.7,77.5,76.9
Pena High School,83.6,83.4,84.3,84.1


In [181]:
ninth_reading = students_df.loc[students_df['grade'] == '9th'].groupby('school')["reading_score"].mean().reset_index()
ninth_reading.rename(columns = {"reading_score": "9th"}, inplace=True)
tenth_reading = students_df.loc[students_df['grade'] == '10th'].groupby('school')["reading_score"].mean().reset_index()
tenth_reading.rename(columns = {"reading_score": "10th"}, inplace=True)
eleventh_reading = students_df.loc[students_df['grade'] == '11th'].groupby('school')["reading_score"].mean().reset_index()
eleventh_reading.rename(columns = {"reading_score": "11th"}, inplace=True)
twelfth_reading = students_df.loc[students_df['grade'] == '12th'].groupby('school')["reading_score"].mean().reset_index()
twelfth_reading.rename(columns = {"reading_score": "12th"}, inplace=True)

reading_scores = pd.merge(ninth_reading, tenth_reading, on = 'school').merge(eleventh_reading, on = 'school').merge(twelfth_reading, on = 'school')
reading_scores.rename(columns = {'school':'School Name'}, inplace = True)
reading_scores.set_index('School Name', inplace = True)
reading_scores.style.format({'9th': '{:.1f}', "10th": '{:.1f}', "11th": "{:.1f}", "12th": "{:.1f}"})


Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.3,80.9,80.9,80.9
Cabrera High School,83.7,84.3,83.8,84.3
Figueroa High School,81.2,81.4,80.6,81.4
Ford High School,80.6,81.3,80.4,80.7
Griffin High School,83.4,83.7,84.3,84.0
Hernandez High School,80.9,80.7,81.4,80.9
Holden High School,83.7,83.3,83.8,84.7
Huang High School,81.3,81.5,81.4,80.3
Johnson High School,81.3,80.8,80.6,81.2
Pena High School,83.8,83.6,84.3,84.6


In [198]:
bin1_df = district_info.loc[district_info['Per Student Budget'] < 585]['school']
bin1_df.reset_index()

SyntaxError: invalid syntax (<ipython-input-198-02c8bb4bca14>, line 3)

In [182]:
students_df

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84
5,5,Bryan Miranda,M,9th,Huang High School,94,94
6,6,Sheena Carter,F,11th,Huang High School,82,80
7,7,Nicole Baker,F,12th,Huang High School,96,69
8,8,Michael Roth,M,10th,Huang High School,95,87
9,9,Matthew Greene,M,10th,Huang High School,96,84
