# Andy McRae

## PyCitySchools

In [1]:
# import pandas
import pandas as pd
import os

In [2]:
# get files
school_file = os.path.join('Resources','schools_complete.csv')
student_file = os.path.join('Resources','students_complete.csv')

In [3]:
# creating a dataframe for school csv
school_df = pd.read_csv(school_file)
school_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [4]:
# creating dataframe for student csv
student_df = pd.read_csv(student_file)
student_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [5]:
# now i feel like merging the two data frames. I think a left merge of school on student so that if there is 
# student who's school is not in the dataframe we dont lose them right off the bat. we can always drop them later
# if that comes to need
combined_df = pd.merge(student_df, school_df, how="left", on=['school_name','school_name'])
combined_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [6]:
combined_df.count()
# there are no missing values in the data frame.

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
School ID        39170
type             39170
size             39170
budget           39170
dtype: int64

In [7]:
# calculating number of TOTAL SCHOOLS
total_schools = combined_df['school_name'].nunique()
print(total_schools)

15


In [8]:
# calculating number of TOTAL STUDENTS
total_students = combined_df['student_name'].nunique()
print(total_students)

32715


In [9]:
# calculating TOTAL BUDGET
total_budget = school_df['budget'].sum()
print(total_budget)

24649428


In [10]:
# calculating AVERAGE MATH SCORE
avg_math_score = combined_df['math_score'].mean()
print(avg_math_score)

78.98537145774827


In [11]:
# calculating AVERAGE READING SCORE
avg_reading_score = combined_df['reading_score'].mean()
print(avg_reading_score)

81.87784018381414


In [12]:
#### Passing is defined as a grade of 70 or greater

In [13]:
# calculating the % PASSING MATH
passing_math_df = combined_df.loc[combined_df['math_score'] >= 70, :]
# passing_math_df.head()
passed_math = passing_math_df['math_score'].count()
# print(passed_math)
total_math = combined_df['math_score'].count()
# print(total_math)
percent_pass_math = (passed_math / total_math) * 100
print(percent_pass_math)

74.9808526933878


In [14]:
# calculating the % PASSING READING
passing_reading_df = combined_df.loc[combined_df['reading_score'] >= 70, :]
# passing_reading_df.head()
passed_reading = passing_reading_df['reading_score'].count()
# print(passed_reading)
total_reading = combined_df['reading_score'].count()
# print(total_reading)
percent_pass_reading = (passed_reading / total_reading) * 100
print(percent_pass_reading)

85.80546336482001


In [15]:
# calculating the % PASSING BOTH MATH AND READING

# making a dataframe of only students that are passing both math and reading
passing1_df = combined_df.loc[combined_df['math_score'] >= 70, :]
passing1_df.head()
passing_df = passing1_df.loc[passing1_df['reading_score'] >= 70, :]
passing_df.head()

passed = passing_df['math_score'].count()
# print(passed)
total = combined_df['math_score'].count()
# print(total)
overall_passing = (passed / total) * 100
print(overall_passing)


65.17232575950983


In [16]:
# district summary
district_summary = pd.DataFrame({
    'Total Schools': [total_schools],
    'Total Students': total_students,
    'Total Budget': total_budget,
    'Average Math Score': avg_math_score,
    'Average Reading Score': avg_reading_score,
    '% Passing Math': percent_pass_math,
    '% Passing Reading': percent_pass_reading,
    '% Overall Passing': overall_passing
})
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,32715,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


# District Summary

In [17]:
# adding formatting to Total Students and Total budget
district_summary['Total Students'] = district_summary['Total Students'].map('{:,}'.format)
district_summary['Total Budget'] = district_summary['Total Budget'].map('${:,.2f}'.format)
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,32715,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


In [18]:
school_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [19]:
student_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [20]:
# list/array of all unique school names for school summary
school_names = combined_df['school_name'].unique()
# print(school_names)

In [21]:
# grouping by school name
combined_group = combined_df.groupby('school_name')

In [22]:
# SCHOOL TYPE
school_type = combined_group['type'].unique()
school_type = pd.Series(school_type)
school_type = school_type.str[0]
# school_type = school_type.astype(object)
# getting rid of brackets

In [23]:
# TOTAL STUDENTS
total_students = combined_group['student_name'].count()

In [24]:
# Total School Budget
total_budget = combined_group['budget'].unique()
total_budget = total_budget.astype(float)

In [25]:
# PER STUDENT BUDGET
per_student_budget = total_budget / total_students
per_student_budget = per_student_budget.astype(float)

In [26]:
#AVERAGE MATH SCORE
avg_math = combined_group['math_score'].mean()

#AVERAGE READING SCORE
avg_reading = combined_group['reading_score'].mean()


In [27]:
# combined_df.head()

In [28]:
# pulling dataframes that i already created from before
passing_math_group = passing_math_df.groupby('school_name')
passing_reading_group = passing_reading_df.groupby('school_name')
passing_group = passing_df.groupby('school_name')
passing_group

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001A464BCBBA8>

In [29]:
# % PASSING MATH
pass_math = (passing_math_group['math_score'].count() / combined_group['math_score'].count()) * 100

# % PASSING READING
pass_reading = (passing_reading_group['reading_score'].count() / combined_group['math_score'].count()) * 100

# % OVERALL PASSING
passing_overall = (passing_group['math_score'].count() / combined_group['math_score'].count()) * 100

In [30]:
# Create Summary
adict = {
    'School Type': school_type,
    'Total Students': total_students,
    'Total School Budget': total_budget,
    'Per Student Budget': per_student_budget,
    'Average Math Score': avg_math,
    'Average Reading Score': avg_reading,
    '% Passing Math': pass_math,
    '% Passing Reading': pass_reading,
    '% Overall Passing': passing_overall
}
school_summary = pd.DataFrame(adict,index=school_names)
# school_summary

In [31]:
side_school_df = school_summary.copy()

# School Summary

In [32]:
school_summary_sorted = school_summary.sort_index()
school_summary_sorted

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Bailey High School,District,4976,3124928.0,628.0,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,1884411.0,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,1763916.0,644.0,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,3022020.0,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,248087.0,581.0,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,1910635.0,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,3094650.0,650.0,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


# Top Five Overall Perfoming Schools

In [33]:
top_five_summary = school_summary.sort_values('% Overall Passing', ascending=False)
top_five_summary.head()

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Thomas High School,Charter,1635,1043130.0,638.0,83.418349,83.84893,93.272171,97.308869,90.948012
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Wilson High School,Charter,2283,1319574.0,578.0,83.274201,83.989488,93.867718,96.539641,90.582567
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


# Bottom Five Overall Performing Schools

In [34]:
bottom_five_summary = school_summary.sort_values('% Overall Passing', ascending=True)
bottom_five_summary.head()

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Rodriguez High School,District,3999,2547363.0,637.0,76.842711,80.744686,66.366592,80.220055,52.988247
Figueroa High School,District,2949,1884411.0,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Huang High School,District,2917,1910635.0,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Hernandez High School,District,4635,3022020.0,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Johnson High School,District,4761,3094650.0,650.0,77.072464,80.966394,66.057551,81.222432,53.539172


In [35]:
# combined_df.head()

### going on to math scores based on grade

In [36]:
# every ninth grader
nine_df = combined_df.loc[combined_df['grade'] == '9th', :]
nine_df.head()
nine_group = nine_df.groupby('school_name')
nine_math = nine_group['math_score'].mean()
# nine_math.head()

In [37]:
# every tenth grader
ten_df = combined_df.loc[combined_df['grade'] == '10th', :]
ten_df.head()
ten_group = ten_df.groupby('school_name')
ten_math = ten_group['math_score'].mean()
# ten_math.head()

In [38]:
# every eleventh grader
eleven_df = combined_df.loc[combined_df['grade'] == '11th', :]
eleven_df.head()
eleven_group = eleven_df.groupby('school_name')
eleven_math = eleven_group['math_score'].mean()
# eleven_math.head()

In [39]:
# every twelfth grader
twelve_df = combined_df.loc[combined_df['grade'] == '12th', :]
twelve_df.head()
twelve_group = twelve_df.groupby('school_name')
twelve_math = twelve_group['math_score'].mean()
# twelve_math.head()

In [40]:
math_score_dict = {
    '9th': nine_math,
    '10th': ten_math,
    '11th': eleven_math,
    '12th': twelve_math
}

## Math Score by Grade

In [41]:
math_score_summary = pd.DataFrame(math_score_dict,index=school_names)
math_score_summary_srt = math_score_summary.sort_index()
math_score_summary_srt

Unnamed: 0,9th,10th,11th,12th
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


### on to reading score based on grade

In [42]:
# ninth grade
nine_read = nine_group['reading_score'].mean()
# tenth grade
ten_read = ten_group['reading_score'].mean()
#eleventh grade
eleven_read = eleven_group['reading_score'].mean()
# twelfth grade
twelve_read = twelve_group['reading_score'].mean()
# twelve_read.head()

In [43]:
# make dictionary to put in summary data frame
read_score_dict = {
    '9th': nine_read,
    '10th': ten_read,
    '11th': eleven_read,
    '12th': twelve_read
}

## Reading Score By Grade

In [44]:
# make summary table
reading_score_summary = pd.DataFrame(read_score_dict, index=school_names)
reading_score_summary_srt = reading_score_summary.sort_index()
reading_score_summary_srt

Unnamed: 0,9th,10th,11th,12th
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


## scores by school spending

In [45]:
# side_school_df['Per Student Budget'].head()

In [46]:
# bins are <585, 585-630, 630-645, 645-680
spending_bins = [0, 585, 630, 645, 680]
spending_groups = ['<585', '585-630', '630-645', '645-680']

side_school_df['Spending Ranges (Per Student)'] = pd.cut(side_school_df['Per Student Budget'], spending_bins,
                                           labels=spending_groups, include_lowest=True)
# side_school_df.head()

In [47]:
# create groupby object
school_spending_group = side_school_df.groupby('Spending Ranges (Per Student)')

In [48]:
# find averge math score
spending_avg_math = school_spending_group['Average Math Score'].mean()
# find average reading score
spending_avg_read = school_spending_group['Average Reading Score'].mean()
# % passing math
spending_percent_math = school_spending_group['% Passing Math'].mean()
# % passing Reading
spending_percent_read = school_spending_group['% Passing Reading'].mean()
# % overall passing
spending_overall_pass = school_spending_group['% Overall Passing'].mean()

In [49]:
# create dict for summary table
spending_dict = {
    'Average Math Score': spending_avg_math,
    'Average Reading Score': spending_avg_read,
    '% Passing Math': spending_percent_math,
    '% Passing Reading': spending_percent_read,
    '% Overall Passing': spending_overall_pass
}

In [50]:
# make a summary table for scores by school spending
scores_by_spending_summary = pd.DataFrame(spending_dict, index=spending_groups)
scores_by_spending_summary

Unnamed: 0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
<585,83.455399,83.933814,93.460096,96.610877,90.369459
585-630,81.899826,83.155286,87.133538,92.718205,81.418596
630-645,78.518855,81.624473,73.484209,84.391793,62.857656
645-680,76.99721,81.027843,66.164813,81.133951,53.526855


In [51]:
# side_school_df.head()

In [52]:
# making bins for scores by school size
# bins Small(<1000), Medium(1000 - 2000), Large(2000-5000)
size_bin = [0, 1000, 2000, 5000]
school_size_groups = ['Small(<1000)', 'Medium(1000-2000)', 'Large(2000-5000)']

# creating bin column
side_school_df['School Size'] = pd.cut(side_school_df['Total Students'], size_bin,
                                       labels=school_size_groups, include_lowest=True)
# side_school_df.head()


In [53]:
school_size_group = side_school_df.groupby('School Size')
# school_size_group

In [54]:
# average math
size_avg_math = school_size_group['Average Math Score'].mean()
# average reading 
size_avg_read = school_size_group['Average Reading Score'].mean()
# % Passing math
size_percent_math = school_size_group['% Passing Math'].mean()
# % Passing reading
size_percent_read = school_size_group['% Passing Reading'].mean()
# % Overall passing
size_overall_pass = school_size_group['% Overall Passing'].mean()

In [55]:
# creating dictionary to go into summary table
school_size_dict = {
    'Average Math Score': size_avg_math,
    'Average Reading Score': size_avg_read,
    '% Passing Math': size_percent_math,
    '% Passing Reading': size_percent_read,
    '% Overall Passing': size_overall_pass
}

In [56]:
# creating summary table for score by school size
scores_by_size_summary = pd.DataFrame(school_size_dict, index= school_size_groups)
scores_by_size_summary

Unnamed: 0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Small(<1000),83.821598,83.929843,93.550225,96.099437,89.883853
Medium(1000-2000),83.374684,83.864438,93.599695,96.79068,90.621535
Large(2000-5000),77.746417,81.344493,69.963361,82.766634,58.286003


### now by school type

In [57]:

school_type_group = side_school_df.groupby('School Type')
# school_type_group

In [58]:
# average math
type_avg_math = school_type_group['Average Math Score'].mean()
# average reading
type_avg_read = school_type_group['Average Reading Score'].mean()
# % Passing Math
type_percent_math = school_type_group['% Passing Math'].mean()
# % passing Reading
type_percent_read = school_type_group['% Passing Reading'].mean()
# % overall passing
type_overall_pass = school_type_group['% Overall Passing'].mean()

In [59]:
# creating dictionary to go into summary table
# creating dictionary to go into summary table
school_type_dict = {
    'Average Math Score': type_avg_math,
    'Average Reading Score': type_avg_read,
    '% Passing Math': type_percent_math,
    '% Passing Reading': type_percent_read,
    '% Overall Passing': type_overall_pass
}

In [60]:
# making a list for my index
school_type_for_summary = ['Charter','District']

# creating summary table for score by school type
scores_by_type_summary = pd.DataFrame(school_type_dict, index=school_type_for_summary)
scores_by_type_summary

Unnamed: 0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Charter,83.473852,83.896421,93.62083,96.586489,90.432244
District,76.956733,80.966636,66.548453,80.799062,53.672208


## time to format tables

In [61]:
# formatting school summary
school_summary['Total School Budget'] = school_summary['Total School Budget'].map('${:,.2f}'.format)
school_summary['Per Student Budget'] = school_summary['Per Student Budget'].map('${:.2f}'.format)
# school_summary

In [62]:
# scores_by_spending_summary

In [63]:
#formatting score by spending
scores_by_spending_summary['Average Math Score'] = scores_by_spending_summary['Average Math Score'].map('{:.2f}'.format)
scores_by_spending_summary['Average Reading Score'] = scores_by_spending_summary['Average Reading Score'].map('{:.2f}'.format)
scores_by_spending_summary['% Passing Math'] = scores_by_spending_summary['% Passing Math'].map('{:.2f}'.format)
scores_by_spending_summary['% Passing Reading'] = scores_by_spending_summary['% Passing Reading'].map('{:.2f}'.format)
scores_by_spending_summary['% Overall Passing'] = scores_by_spending_summary['% Overall Passing'].map('{:.2f}'.format)


# My Summary Tables

## District Summary

In [64]:
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,32715,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [65]:
school_summary_sorted

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Bailey High School,District,4976,3124928.0,628.0,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,1884411.0,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,1763916.0,644.0,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,3022020.0,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,248087.0,581.0,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,1910635.0,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,3094650.0,650.0,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


## Top Performing Schools (By % Overall Passing)

In [66]:
top_five_summary.head()

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Thomas High School,Charter,1635,1043130.0,638.0,83.418349,83.84893,93.272171,97.308869,90.948012
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Wilson High School,Charter,2283,1319574.0,578.0,83.274201,83.989488,93.867718,96.539641,90.582567
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


## Bottom Performing Schools (By % Overall Passing)

In [67]:
bottom_five_summary.head()

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Rodriguez High School,District,3999,2547363.0,637.0,76.842711,80.744686,66.366592,80.220055,52.988247
Figueroa High School,District,2949,1884411.0,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Huang High School,District,2917,1910635.0,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Hernandez High School,District,4635,3022020.0,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Johnson High School,District,4761,3094650.0,650.0,77.072464,80.966394,66.057551,81.222432,53.539172


## Math Score By Grade

In [68]:
math_score_summary_srt

Unnamed: 0,9th,10th,11th,12th
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


## Reading Score By Grade

In [69]:
reading_score_summary_srt

Unnamed: 0,9th,10th,11th,12th
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


## Scores by School Spending

In [70]:
scores_by_spending_summary

Unnamed: 0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
<585,83.46,83.93,93.46,96.61,90.37
585-630,81.9,83.16,87.13,92.72,81.42
630-645,78.52,81.62,73.48,84.39,62.86
645-680,77.0,81.03,66.16,81.13,53.53


## Scores by School Size

In [71]:
scores_by_size_summary

Unnamed: 0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Small(<1000),83.821598,83.929843,93.550225,96.099437,89.883853
Medium(1000-2000),83.374684,83.864438,93.599695,96.79068,90.621535
Large(2000-5000),77.746417,81.344493,69.963361,82.766634,58.286003


## Scores by School Type

In [72]:
scores_by_type_summary

Unnamed: 0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Charter,83.473852,83.896421,93.62083,96.586489,90.432244
District,76.956733,80.966636,66.548453,80.799062,53.672208


# Observable Trends

(1): I noticed early that charter schools out perform district schools as a whole. Looking at the Scores by School Type table we see that all the values are higher for charter schools. Also When looking at Top Performing Schools, they are all Charter, while Bottom Performing Schools are all District. My hypothesis is that charter schools have the ability to be selective of who they let in so they are only letting in students who have a greater chance of passing. Meanwhile district schools have to let everyone in, so they are more representative of the real population of students in the the whole district.

(2): This is a strange one, in fact the opposite of what I would expect. Looking at the Scores by School Spending table, as the spending per student goes up the percent of overall passing average goes down. In fact every value in that table goes down. There is some weird correlation there between spending and student learning. I do not want to go as far as saying causation, because I have to evidence of such a thing. But this is a strange happenstance. I would expect Student scores to go up with spending.