In [1]:
# Import Dependencies
import pandas as pd
import numpy as np

# CSV files and script share the same folder
schools_df = pd.read_csv("schools_complete.csv")
students_df = pd.read_csv("students_complete.csv")

In [2]:
# find in students_df those that show student passing ability
pass_read_score = [1 if i >= 70 else 0 for i in students_df['reading_score']]
pass_math_score = [1 if i >= 70 else 0 for i in students_df['math_score']]

total_student_score = students_df['reading_score'] + students_df['math_score']
pass_overall_score = [1 if i >= 140 else 0 for i in total_student_score]

students_df['read_pass'] = pass_read_score
students_df['math_pass'] = pass_math_score
students_df['overall_pass'] = pass_overall_score

# rename schools_df column label
schools_df = schools_df.rename(columns={'name':'school'})

In [3]:
# define value_format or just fix format in excel
def value_format(adjustment, mult_by, *value):
    value_formatted = [adjustment.format((value[i] * mult_by)) for i in range(len(value))]
    return value_formatted

In [4]:
# define series_format or just fix format in excel
def series_format(adjustment, mult_by, *series):
    series_formatted = [(series[i] * mult_by).apply(lambda x: adjustment.format(x))
                 for i in range(len(series))]
    return series_formatted

In [5]:
total_schools = len(schools_df['school'].unique())
total_students = schools_df['size'].sum()
total_budget = schools_df['budget'].sum()
avg_math_score = students_df['math_score'].mean()
avg_read_score = students_df['reading_score'].mean()

perc_pass_math = students_df['math_pass'].sum() / total_students
perc_pass_read = students_df['read_pass'].sum() / total_students
perc_pass_overall = students_df['overall_pass'].sum() / total_students

# format values
total_students_frmt = "{:,}".format(total_students)
total_budget_frmt = "${:,}".format(total_budget)
ds_period_formatted = value_format("{:.2f}", 1, avg_math_score, avg_read_score)
ds_percent_formatted = value_format("{:.2f}%", 100, perc_pass_math, perc_pass_read, perc_pass_overall)


district_summary_df = pd.DataFrame({
    'Total Schools': total_schools,
    'Total Students': total_students_frmt,
    'Total Budget': total_budget_frmt,
    'Average Math Score': ds_period_formatted[0],
    'Average Reading Score': ds_period_formatted[1],
    '% Passing Math': ds_percent_formatted[0],
    '% Passing Reading': ds_percent_formatted[1],
    '% Overall Passing Rate': ds_percent_formatted[2]
}, index=['summary values'], columns=[
    'Total Schools',
    'Total Students',
    'Total Budget',
    'Average Math Score',
    'Average Reading Score',
    '% Passing Math',
    '% Passing Reading',
    '% Overall Passing Rate'
])

district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
summary values,15,39170,"$24,649,428",78.99,81.88,74.98%,85.81%,89.39%


In [6]:
students_gb_sm_df = students_df.groupby(by='school').sum().reset_index()

school_summary_df = pd.merge(schools_df,students_gb_sm_df, how='outer', on='school')
school_summary_df = school_summary_df.rename(columns={
    'school': 'School',
    'type': 'School Type',
    'size': 'Total Students',
    'budget': 'Total School Budget',
    'reading_score': 'Average Reading Score',
    'math_score': 'Average Math Score',
    'read_pass': '% Passing Reading',
    'math_pass': '% Passing Math',
    'overall_pass': '% Overall Passing Rate'
})

school_summary_df['Per Student Budget'] = school_summary_df['Total School Budget'] / school_summary_df['Total Students']
school_summary_df['Average Math Score'] = school_summary_df['Average Math Score'] / school_summary_df['Total Students']
school_summary_df['Average Reading Score'] = school_summary_df['Average Reading Score'] / school_summary_df['Total Students']
school_summary_df['% Passing Reading'] = school_summary_df['% Passing Reading'] / school_summary_df['Total Students']
school_summary_df['% Passing Math'] = school_summary_df['% Passing Math'] / school_summary_df['Total Students']
school_summary_df['% Overall Passing Rate'] = school_summary_df['% Overall Passing Rate'] / school_summary_df['Total Students']

In [7]:
# transfer of ownership in order to clean school_summary_df
clean_ss_df = school_summary_df.copy()

# format columns or fix columns in excel
ss_money_period_formatted = series_format("${:,.2f}", 1, clean_ss_df['Total School Budget'], clean_ss_df['Per Student Budget'])
ss_period_formatted = series_format("{:.2f}", 1, clean_ss_df['Average Math Score'], clean_ss_df['Average Reading Score'])
ss_percent_formatted = series_format("{:.2f}%", 100, clean_ss_df['% Passing Math'], clean_ss_df['% Passing Reading'], clean_ss_df['% Overall Passing Rate'])

ss_df = pd.DataFrame({
    'School': clean_ss_df['School'],
    'School Type': clean_ss_df['School Type'],
    'Total Students': clean_ss_df['Total Students'].map("{:,}".format,),
    'Total School Budget': ss_money_period_formatted[0],
    'Per Student Budget': ss_money_period_formatted[1],
    'Average Math Score': ss_period_formatted[0],
    'Average Reading Score': ss_period_formatted[1],
    '% Passing Math': ss_percent_formatted[0],
    '% Passing Reading': ss_percent_formatted[1],
    '% Overall Passing Rate': ss_percent_formatted[2]
}, columns=[
    'School','School Type','Total Students','Total School Budget',
    'Per Student Budget','Average Math Score','Average Reading Score',
    '% Passing Math','% Passing Reading','% Overall Passing Rate'
]).set_index('School')
del ss_df.index.name

ss_df

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Huang High School,District,2917,"$1,910,635.00",$655.00,76.63,81.18,65.68%,81.32%,84.98%
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.71,81.16,65.99%,80.74%,84.67%
Shelton High School,Charter,1761,"$1,056,600.00",$600.00,83.36,83.73,93.87%,95.85%,99.38%
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.29,80.93,66.75%,80.86%,84.88%
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.35,83.82,93.39%,97.14%,99.46%
Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.27,83.99,93.87%,96.54%,99.26%
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.06,83.98,94.13%,97.04%,99.57%
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.05,81.03,66.68%,81.93%,85.19%
Holden High School,Charter,427,"$248,087.00",$581.00,83.8,83.81,92.51%,96.25%,98.59%
Pena High School,Charter,962,"$585,858.00",$609.00,83.84,84.04,94.59%,95.95%,99.17%
