In [87]:
#Dependencies
import pandas as pd
import numpy as np
import os

# define file path
schools_file = os.path.join('Resources','schools_complete.csv')
students_file = os.path.join('Resources', 'students_complete.csv')

# read schools file
schools_df = pd.read_csv(schools_file)

#read student file
students_df = pd.read_csv(students_file)

#create array of unique school names
unique_school_names = schools_df['name'].unique()
#gives the length of unique school names to give us how many schools
school_count = len(unique_school_names)

#district student count
dist_student_count = schools_df['size'].sum()

#student count from student file (to verify with district student count)
total_student_rec = students_df['name'].count()

#total budget
total_budget = schools_df['budget'].sum()

#calculations for number and % passing reading
num_passing_reading = students_df.loc[students_df['reading_score'] >= 70]['reading_score'].count()
perc_pass_reading = num_passing_reading/total_student_rec
perc_pass_reading

#calculations for number and % passing math
num_passing_math = students_df.loc[students_df['math_score'] >= 70]['math_score'].count()
perc_pass_math = num_passing_math/total_student_rec
perc_pass_math

#average math score calculation
avg_math_score = students_df['math_score'].mean()
avg_math_score 

#average reading score calculation
avg_reading_score = students_df['reading_score'].mean()
avg_reading_score

#Overall Passing Rate Calculations
overall_pass = np.mean([perc_pass_reading, perc_pass_math])

# district dataframe from dictionary

district_summary = pd.DataFrame({
    
    "Total Schools": [school_count],
    "Total Students": [dist_student_count],
    "Total Budget": [total_budget],
    "Average Reading Score": [avg_reading_score],
    "Average Math Score": [avg_math_score],
    "% Passing Reading":[perc_pass_reading],
    "% Passing Math": [perc_pass_math],
    "Overall Passing Rate": [overall_pass]

})

#store as different df to change order
dist_sum = district_summary[["Total Schools", "Total Students", "Total Budget", "Average Reading Score", "Average Math Score", '% Passing Reading', '% Passing Math', 'Overall Passing Rate']]

#format cells
dist_sum.style.format({"Total Budget": "${:,.2f}", "Average Reading Score": "{:.1f}", "Average Math Score": "{:.1f}", "% Passing Math": "{:.1%}", "% Passing Reading": "{:.1%}", "Overall Passing Rate": "{:.1%}"})





Unnamed: 0,Total Schools,Total Students,Total Budget,Average Reading Score,Average Math Score,% Passing Reading,% Passing Math,Overall Passing Rate
0,15,39170,"$24,649,428.00",81.9,79.0,85.8%,75.0%,80.4%


In [None]:
# Create an overview table that summarizes key metrics about each school, including:
# School Name
# School Type
# Total Students
# Total School Budget
# Per Student Budget
# Average Math Score
# Average Reading Score
# % Passing Math
# % Passing Reading
# Overall Passing Rate (Average of the above two)

In [88]:
students_df.columns

Index(['Student ID', 'name', 'gender', 'grade', 'school', 'reading_score',
       'math_score'],
      dtype='object')

In [113]:
schools_df.columns

Index(['School ID', 'name', 'type', 'size', 'budget'], dtype='object')

In [291]:
#groups by school
by_school = students_df.groupby(['school'])

#counts students per school and creates DataFrame
# students_per_school = pd.DataFrame([by_school['Student ID'].count(), by_school)

#creates df of school name and budget
district_info = schools_df[['name','type','size','budget']]
                                    
#adds budget per student
district_info['Per Student Budget'] = schools_df['budget']/schools_df['size']

district_info.rename(columns = {'name': 'school'}, inplace = True)

#creates dataframe of avg math and reading score by school
avg_math_by_sch = by_school['math_score'].mean().round(1).reset_index()
avg_read_by_sch = by_school['reading_score'].mean().round(1).reset_index()
avg_scores = pd.merge(avg_math_by_sch, avg_read_by_sch, on=('school'))
avg_scores.rename(columns = {'math_score': 'Average Math Score', 'reading_score': 'Average Reading Score'}, inplace=True)

#school level passing scores counts
pass_math = students_df.loc[students_df['math_score'] >=70][['school', 'math_score']]
pass_math_by_sch = pass_math.groupby('school').count().reset_index()
pass_math_by_sch.rename(columns = {"math_score": "# passing math"}, inplace=True)

pass_read = students_df.loc[students_df['reading_score'] >=70][['school', 'reading_score']]
pass_read_by_sch = pass_read.groupby('school').count().reset_index()
pass_read_by_sch.rename(columns = {"reading_score": "# passing reading"}, inplace=True)

pass_count = pd.merge(pass_math_by_sch, pass_read_by_sch, on=('school'))

pass_read_by_sch

#merge all so far
sch_summary = pd.merge(district_info, avg_scores, on=('school'))
sch_summary = pd.merge(sch_summary, pass_count, on=('school'))

sch_summary['% Passing Math'] = sch_summary['# passing math']/sch_summary['size']
sch_summary['% Passing Reading'] = sch_summary['# passing reading']/sch_summary['size']
del sch_summary['# passing math']
del sch_summary['# passing reading']
sch_summary['Overall Passing Rate'] = np.mean([sch_summary['% Passing Math'], sch_summary['% Passing Reading']])
sch_summary.rename(columns = {'school': "School Name", "type": "School Type", "size":"Total Students", "budget": "Total School Budget"}, inplace = True)
sch_summary.style.format({'Total Students': '{:,}', "Total School Budget": "${:,}", "Per Student Budget": "${:.0f}", "% Passing Math": "{:.1%}", "% Passing Reading": "{:.1%}", "Overall Passing Rate": "{:.1%}"})
#dist_sum.style.format({"Total Budget": "${:,.2f}", "Average Reading Score": "{:.1f}", "Average Math Score": "{:.1f}", "% Passing Math": "{:.1%}", "% Passing Reading": "{:.1%}", "Overall Passing Rate": "{:.1%}"})

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
0,Huang High School,District,2917,"$1,910,635",$655,76.6,81.2,65.7%,81.3%,85.1%
1,Figueroa High School,District,2949,"$1,884,411",$639,76.7,81.2,66.0%,80.7%,85.1%
2,Shelton High School,Charter,1761,"$1,056,600",$600,83.4,83.7,93.9%,95.9%,85.1%
3,Hernandez High School,District,4635,"$3,022,020",$652,77.3,80.9,66.8%,80.9%,85.1%
4,Griffin High School,Charter,1468,"$917,500",$625,83.4,83.8,93.4%,97.1%,85.1%
5,Wilson High School,Charter,2283,"$1,319,574",$578,83.3,84.0,93.9%,96.5%,85.1%
6,Cabrera High School,Charter,1858,"$1,081,356",$582,83.1,84.0,94.1%,97.0%,85.1%
7,Bailey High School,District,4976,"$3,124,928",$628,77.0,81.0,66.7%,81.9%,85.1%
8,Holden High School,Charter,427,"$248,087",$581,83.8,83.8,92.5%,96.3%,85.1%
9,Pena High School,Charter,962,"$585,858",$609,83.8,84.0,94.6%,95.9%,85.1%
