# District Summary


In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete_df = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [2]:
#create variables for the desired vaules 
school_count = len(school_data_complete_df["school_name"].unique())
student_count = len(school_data_complete_df["Student ID"].unique())

# this variable pulls all 15 unique budget vaules, this is confirmed by the list returning 15 vaules. 
budget_values = school_data_complete_df["budget"].unique()
total_budget = sum(budget_values)

#average test scores
math_avg = school_data_complete_df["math_score"].mean()
read_avg = school_data_complete_df["reading_score"].mean()

# counts the number of students who score a 70 or above using df.loc
#df.loc looks at row that has that vaule above 70 and all columns
pass_math_count = len(school_data_complete_df.loc[school_data_complete_df["math_score"] >= 70,:])
pass_read_count = len(school_data_complete_df.loc[school_data_complete_df["reading_score"] >= 70,:])
# these are students passing math AND reading
pass_both_count = len(school_data_complete_df.loc[(school_data_complete_df["math_score"] >= 70) & 
                                              (school_data_complete_df["reading_score"] >= 70),:])

#calculate and store the percent variables
percent_pass_math = pass_math_count/student_count
percent_pass_read = pass_read_count/student_count    
percent_pass_both = pass_both_count/student_count

In [3]:
#calculate and store the percent variables

def percent_district(x):
    num = x/student_count
    percentage = "{:.3%}".format(num)
    return(percentage)

per_pass_math = percent_district(pass_math_count)
per_pass_read = percent_district(pass_read_count)
per_pass_both = percent_district(pass_both_count)

In [4]:
#creating a Dataframe form a list of dictionaries
summary = [{"Total Schools":school_count,
          "Total Students": "{:,}".format(student_count),
          "Total Budget": "${:,.2f}".format(total_budget),
          "Average Math Score": "{:,.2f}".format(math_avg),
          "Average Reading Score": "{:,.2f}".format(read_avg),
          "% Passing Math":per_pass_math,
          "% Passing Reading":per_pass_read,
          "% Passing Overall":per_pass_both
           }]

district_summary_df = pd.DataFrame(summary)
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
0,15,39170,"$24,649,428.00",78.99,81.88,74.981%,85.805%,65.172%


## School Summary

In [5]:
#pull types and budgets from school data file
school_data_2 = school_data.set_index('school_name')

school_type = school_data_2['type']
stucount_by_school = school_data_2['size']
budget_by_school = school_data_2['budget']
spending_per_stu = budget_by_school/stucount_by_school

In [6]:
#group data by school
grouped_by_school_df = school_data_complete_df.groupby(['school_name'])

#grab student count and type by school
math_avg_by_school = grouped_by_school_df['math_score'].mean()
read_avg_by_school = grouped_by_school_df['reading_score'].mean()

In [7]:
#calculate students % passing math by school
pass_math = school_data_complete_df.loc[(school_data_complete_df['math_score'] >= 70),:]
pass_math_by_sch = pass_math.groupby('school_name')
pass_math_by_sch = pass_math_by_sch['Student ID'].count()

per_pass_math_by_sch = pass_math_by_sch/stucount_by_school
#I am certain there is a more elegant way to gather this data but this works for the time being.

In [8]:
#calculate students % passing reading by school
pass_read = school_data_complete_df.loc[(school_data_complete_df['reading_score'] >= 70),:]
pass_read_by_sch = pass_read.groupby('school_name')
pass_read_by_sch = pass_read_by_sch['Student ID'].count()

per_pass_read_by_sch = pass_read_by_sch/stucount_by_school

In [9]:
#calculate students % passing overall by school
pass_both = school_data_complete_df.loc[(school_data_complete_df['reading_score'] >= 70) &
                                           (school_data_complete_df['math_score'] >= 70),:]
pass_both_by_school = pass_both.groupby('school_name')
pass_both_by_school = pass_both_by_school['Student ID'].count()

passing_overall = pass_both_by_school/stucount_by_school 

In [10]:
def dec_2_per(num):
    per = num*100
    return(per)

In [23]:
school_summary = pd.DataFrame({
    "School Type": school_type,
    "Total Students": stucount_by_school.map("{:,}".format),
    "Per Student Budget": spending_per_stu.map("${:,.0f}".format),
    "Total School Budget": budget_by_school.map("${:,}".format),
    "Average Math Score": math_avg_by_school,
    "Average Reading Score": read_avg_by_school,
    '% Passing Math': per_pass_math_by_sch.map(dec_2_per),
    '% Passing Reading': per_pass_read_by_sch.map(dec_2_per),
    "Overall Passing Rate": passing_overall.map(dec_2_per)
})

school_summary

Unnamed: 0_level_0,School Type,Total Students,Per Student Budget,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,$628,"$3,124,928",77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,$582,"$1,081,356",83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,$639,"$1,884,411",76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,$644,"$1,763,916",77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,$625,"$917,500",83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,$652,"$3,022,020",77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,$581,"$248,087",83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,$655,"$1,910,635",76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,$650,"$3,094,650",77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,$609,"$585,858",83.839917,84.044699,94.594595,95.945946,90.540541


## Top Performing Schools (By % Overall Passing)

In [12]:
top_schools = school_summary.sort_values(by='Overall Passing Rate', ascending=False)
top_schools.head()

Unnamed: 0_level_0,School Type,Total Students,Per Student Budget,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,$582,"$1,081,356",83.061895,83.97578,94.133477,97.039828,91.334769
Thomas High School,Charter,1635,$638,"$1,043,130",83.418349,83.84893,93.272171,97.308869,90.948012
Griffin High School,Charter,1468,$625,"$917,500",83.351499,83.816757,93.392371,97.138965,90.599455
Wilson High School,Charter,2283,$578,"$1,319,574",83.274201,83.989488,93.867718,96.539641,90.582567
Pena High School,Charter,962,$609,"$585,858",83.839917,84.044699,94.594595,95.945946,90.540541


## Bottom Performing Schools (By % Overall Passing)

In [13]:
bottom_schools = school_summary.sort_values(by='Overall Passing Rate', ascending=True)
bottom_schools.head(5)

Unnamed: 0_level_0,School Type,Total Students,Per Student Budget,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,$637,"$2,547,363",76.842711,80.744686,66.366592,80.220055,52.988247
Figueroa High School,District,2949,$639,"$1,884,411",76.711767,81.15802,65.988471,80.739234,53.204476
Huang High School,District,2917,$655,"$1,910,635",76.629414,81.182722,65.683922,81.316421,53.513884
Hernandez High School,District,4635,$652,"$3,022,020",77.289752,80.934412,66.752967,80.862999,53.527508
Johnson High School,District,4761,$650,"$3,094,650",77.072464,80.966394,66.057551,81.222432,53.539172


## Math Scores by Grade

In [24]:
#pulling math grades by campus and average 
only_9th = school_data_complete_df.loc[school_data_complete_df['grade']=='9th',:]
only_9th = only_9th.groupby('school_name')
math_9th = only_9th["math_score"].mean()

only_10th = school_data_complete_df.loc[school_data_complete_df['grade']=='10th',:]
only_10th = only_10th.groupby('school_name')
math_10th = only_10th["math_score"].mean()

only_11th = school_data_complete_df.loc[school_data_complete_df['grade']=='11th',:]
only_11th = only_11th.groupby('school_name')
math_11th = only_11th["math_score"].mean()

only_12th = school_data_complete_df.loc[school_data_complete_df['grade']=='12th',:]
only_12th = only_12th.groupby('school_name')
math_12th = only_12th["math_score"].mean()

In [15]:
#creating math grades DataFrame
math_summary = pd.DataFrame({
    "9th":math_9th.map("{:,.2f}".format),
    "10th":math_10th.map("{:,.2f}".format),
    "11th":math_11th.map("{:,.2f}".format),
    "12th":math_12th.map("{:,.2f}".format)
})

math_summary

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86
Pena High School,83.63,83.37,84.33,84.12


## Reading Score by Grade 

In [16]:
#pulling reading grades by campus and average 
read_9th = only_9th["reading_score"].mean()
read_10th = only_10th["reading_score"].mean()
read_11th = only_11th['reading_score'].mean()
read_12th = only_12th['reading_score'].mean()

In [17]:
read_summary = pd.DataFrame({
    "9th":read_9th.map("{:,.2f}".format),
    "10th":read_10th.map("{:,.2f}".format),
    "11th":read_11th.map("{:,.2f}".format),
    "12th":read_12th.map("{:,.2f}".format)
})

read_summary

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.3,80.91,80.95,80.91
Cabrera High School,83.68,84.25,83.79,84.29
Figueroa High School,81.2,81.41,80.64,81.38
Ford High School,80.63,81.26,80.4,80.66
Griffin High School,83.37,83.71,84.29,84.01
Hernandez High School,80.87,80.66,81.4,80.86
Holden High School,83.68,83.32,83.82,84.7
Huang High School,81.29,81.51,81.42,80.31
Johnson High School,81.26,80.77,80.62,81.23
Pena High School,83.81,83.61,84.34,84.59


## Scores by School Spending

In [18]:
#create a reduced dataframe to look into spending
spending_by_school = school_summary[['Per Student Budget', 'Average Math Score', 'Average Reading Score',
                '% Passing Math', '% Passing Reading', 'Overall Passing Rate']]

#replace budget string with float
spending_by_school['Per Student Budget'] = spending_by_school['Per Student Budget'].str.replace('$','').astype(float)

#bin the budget data
bins = [0,586,631,646,681]
spending_bins = ['Under $585','$585-630','$630-645','$645-680']

spending_by_school['Spending Range (Per Student)'] = pd.cut(spending_by_school['Per Student Budget'], bins,
                                                     labels = spending_bins, include_lowest=True)
spending_by_school = spending_by_school.groupby('Spending Range (Per Student)').mean()
score_by_spending = spending_by_school.drop(['Per Student Budget'], axis=1)
score_by_spending


  spending_by_school['Per Student Budget'] = spending_by_school['Per Student Budget'].str.replace('$','').astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  spending_by_school['Per Student Budget'] = spending_by_school['Per Student Budget'].str.replace('$','').astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  spending_by_school['Spending Range (Per Student)'] = pd.cut(spending_by_school['Per Student Budget'], bins,


Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
Spending Range (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Under $585,83.455399,83.933814,93.460096,96.610877,90.369459
$585-630,81.899826,83.155286,87.133538,92.718205,81.418596
$630-645,78.518855,81.624473,73.484209,84.391793,62.857656
$645-680,76.99721,81.027843,66.164813,81.133951,53.526855


## Scores by School Size

In [27]:
#create a reduced dataframe to look into sizes
scores_by_size = school_summary[['Total Students', 'Average Math Score', 'Average Reading Score',
                '% Passing Math', '% Passing Reading','Overall Passing Rate']]

scores_by_size['Total Students'] = scores_by_size['Total Students'].str.replace(',','').astype(float)

bins_2 = [0,999,1999,5000]
size_bins = ['Small (<1000)','Medium (1000-2000)','Large (2000-5000)']

scores_by_size['Size Cat'] = pd.cut(scores_by_size['Total Students'], bins_2,
                             labels = size_bins, include_lowest=True)
scores_by_size = scores_by_size.groupby('Size Cat').mean()
size_summary = scores_by_size.drop(['Total Students'], axis=1)
size_summary


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  scores_by_size['Total Students'] = scores_by_size['Total Students'].str.replace(',','').astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  scores_by_size['Size Cat'] = pd.cut(scores_by_size['Total Students'], bins_2,


Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
Size Cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.821598,83.929843,93.550225,96.099437,89.883853
Medium (1000-2000),83.374684,83.864438,93.599695,96.79068,90.621535
Large (2000-5000),77.746417,81.344493,69.963361,82.766634,58.286003


## Scores by School Type

In [28]:
#create a reduced dataframe to summarize
type_summary = school_summary[['School Type', 'Average Math Score', 'Average Reading Score',
                '% Passing Math', '% Passing Reading','Overall Passing Rate']]

scores_by_type = type_summary.groupby('School Type').mean()
scores_by_type

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,93.62083,96.586489,90.432244
District,76.956733,80.966636,66.548453,80.799062,53.672208
