In [51]:
#Taken from PyCitySchools_starter.ipynb
# Dependencies and Setup

import pandas as pd
import os
from pathlib import Path

# create path to read CSV files and create an output folder if not already present
school_data_to_load_path=os.path.join(os.getcwd(),'Resources','schools_complete.csv')
student_data_to_load_path=os.path.join(os.getcwd(),'Resources','students_complete.csv')

#Create output analysis directory **IF NEEDED**
if not os.path.exists("Ed_Analysis"): # check to see if dir exists: https://www.tutorialspoint.com/How-can-I-create-a-directory-if-it-does-not-exist-using-Python
    os.mkdir('Ed_Analysis')

#Create paths for report out files **IF NEEDED**
Dist_rpt_out_path=os.path.join(os.getcwd(),'Ed_Analysis','Dist_rpt.txt')
School_rpt_out_path=os.path.join(os.getcwd(),'Ed_Analysis','School_rpt.txt')


# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load_path,header=0)
student_data = pd.read_csv(student_data_to_load_path,header=0)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


# District Summary

In [52]:
#Create a small dataframe expracting only a school's name and its budget then droping all duplicate rows
dist_schools_df=school_data_complete.loc[:,['school_name','budget']].drop_duplicates().reset_index(drop=True)


In [74]:
#Count total number of schools
school_count=school_data_complete['school_name'].nunique()
print(f'schools in district: {school_count}')

#Count the total number of students.  Since school_data_complete is a dataframe, there are no null cell values.
#So, this is essentially a row count.
student_count=school_data_complete['school_name'].count()
print(f"District student body: {student_count} students")


# Calculate the operating budget of the distric across all the schools
dist_budget=dist_schools_df['budget'].sum()
print('District operational budget: ', '${:,.0f}'.format(dist_budget))




schools in district: 15
District student body: 39170 students
District operational budget:  $24,649,428


In [78]:
#Calculate district averate math scores
dist_math_mean=school_data_complete['math_score'].mean()

#Calculate district average reading scores
dist_read_mean=school_data_complete['reading_score'].mean()

print(f"District average math score: {dist_math_mean:.3f} %")
print(f"District average math score: {dist_read_mean:.3f} %")

District average math score: 78.985 %
District average math score: 81.878 %


In [79]:
print(f"Total students {student_count}")

#taken from starter code
#Calculate how many students in the district passed math
pass_math_count = school_data_complete[(school_data_complete["math_score"] >= 70)].count()["student_name"]
print(f" studnets who passed math: {pass_math_count}")

#Calculate how may students in the district passed reading
pass_read_count = school_data_complete[(school_data_complete["reading_score"] >= 70)].count()["student_name"]
print(f" studnets who passed reading: {pass_read_count}")


#Calculate percentage of students passing math or reading across distric
pass_math_percentage = pass_math_count / float(student_count)*100
print(f"Passing math: {pass_math_percentage} %")

pass_read_percent= pass_read_count/float(student_count)*100
print(f"Passing reading: {pass_read_percent} %")

Total students 39170
 studnets who passed math: 29370
 studnets who passed reading: 33610
Passing math: 74.9808526933878 %
Passing reading: 85.80546336482001 %


In [56]:
#Taken from starter code
#Calculate the percentage of students that passed both math and reading exams with a 70 or better
pass_math_read_count=school_data_complete[
    (school_data_complete['math_score']>=70) & (school_data_complete['reading_score']>=70)].count()['student_name']
print(f"Total students {student_count}")
print(f"  Students passing both math and reading: {pass_math_read_count}")

pass_math_read_percent= pass_math_read_count/float(student_count)*100
print(f"Students passing both math and reading: {pass_math_read_percent}%")

Total students 39170
  Students passing both math and reading: 25528
Students passing both math and reading: 65.17232575950983%


In [98]:
#Create high-level snapshot of the district's key metrics in a DataFrame
district_summary=pd.DataFrame({
    "Total Schools":[school_count],
    "Total Students":[student_count],
    "Total Budget":[dist_budget],
    "Average Math Score":[dist_math_mean],
    "Average Reading Score":[dist_read_mean],
    "% Passing Math":[pass_math_percentage],
    "% Passing Reading":[pass_read_percent],
    "% Overall Passing":[pass_math_read_percent]
})

#Format large numbers and currency
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)

#Display distrst summary DataFrame
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


# School Summary

In [85]:
# Taken from starter code
#Select school types 
school_types=school_data.set_index(["school_name"])['type']

#Extract number of students from each school
per_school_counts=school_data_complete['school_name'].value_counts()



In [86]:

#Calculate the total school budget and per capita (student) spending
per_school_budget=school_data_complete.groupby(['school_name']).mean()['budget']
per_school_capita=per_school_budget/per_school_counts




In [87]:
#Calculate average test scores: Math/Reading
per_school_math=school_data_complete.groupby(['school_name']).mean()['math_score']
per_school_read=school_data_complete.groupby(['school_name']).mean()['reading_score']



In [61]:
#Calculate the number of schools with math scores of 70 or greater
school_pass_math = school_data_complete.groupby(['school_name'])['math_score'].apply(lambda x: (x>=70).sum())

#Calculate the number of schools with reading scores of 70 or greater
school_pass_read = school_data_complete.groupby(['school_name'])['reading_score'].apply(lambda x: (x>=70).sum())

#--------
# Was having difficulty getting the conditionals to work properly, i.e. >70 for each school.  So, I found a way to usa a lambda function
# to remove all failing grades, then count all of the passing grades from each school
#--------

#Calculate the percent of students passing math/reading in each school
school_pass_math_percent=school_pass_math/per_school_counts * 100
school_pass_read_percent=school_pass_read/per_school_counts*100

#Calculate the number of students passing both math and reading from each school
passing_math_and_reading=school_data_complete[
    (school_data_complete['reading_score']>=70)&(school_data_complete['math_score']>=70)
]

#Calculate the percentage of students passing both math and reading with a score of 70 or greater
overall_pass_rate = passing_math_and_reading.groupby(['school_name']).count()['student_name']/per_school_counts * 100


In [88]:
#Create DataFrame summarizing each school's budget, type, student number, and exam scores
per_school_summary = pd.DataFrame({
    "School Type": school_types,
    "Total Students":per_school_counts,
    "Total School Budget": per_school_budget,
    "Per Student Budget":per_school_capita,
    "Average Math Score":per_school_math,
    "Average Reading Score":per_school_read,
    "% Passing Math":school_pass_math_percent,
    "% Passing Reading":school_pass_read_percent,
    "% Overall Passing":overall_pass_rate
})

#Formatting
per_school_summary["Total School Budget"] = per_school_summary["Total School Budget"].map("${:,.2f}".format)
per_school_summary["Per Student Budget"] = per_school_summary["Per Student Budget"].map("${:,.2f}".format)

#Display school summary
per_school_summary

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,"$248,087.00",$581.00,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,90.540541


# Highest and Lowest Performing Schools (by % Overall Passing)

In [63]:
#Sort school summary by highest overall passing rates.  Report the top 5 schools
top_schools=per_school_summary.sort_values('% Overall Passing',ascending=False)
top_schools.head(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,91.334769
Thomas High School,Charter,1635,"$1,043,130.00",$638.00,83.418349,83.84893,93.272171,97.308869,90.948012
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,90.599455
Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.274201,83.989488,93.867718,96.539641,90.582567
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,90.540541


In [64]:
bottom_schools=per_school_summary.sort_values("% Overall Passing",ascending=True)
bottom_schools.head(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Rodriguez High School,District,3999,"$2,547,363.00",$637.00,76.842711,80.744686,66.366592,80.220055,52.988247
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,53.204476
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,53.513884
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.752967,80.862999,53.527508
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,53.539172


# Math Scores by Grade

In [65]:
# taken from starter code
ninth_graders = school_data_complete[(school_data_complete["grade"] == "9th")]
tenth_graders = school_data_complete[(school_data_complete["grade"] == "10th")]
eleventh_graders = school_data_complete[(school_data_complete["grade"] == "11th")]
twelfth_graders = school_data_complete[(school_data_complete["grade"] == "12th")]

# Group by "school_name" and take the mean of each.
ninth_graders_cores = ninth_graders.groupby(['school_name']).mean()
tenth_graders_scores = tenth_graders.groupby(['school_name']).mean()
eleventh_graders_scores = eleventh_graders.groupby(['school_name']).mean()
twelfth_graders_scores = twelfth_graders.groupby(['school_name']).mean()

# Group by "school_name" and take the mean of each, and sort out math scores
ninth_graders_math_scores = ninth_graders.groupby(['school_name']).mean()['math_score']
tenth_graders_math_scores = tenth_graders.groupby(['school_name']).mean()['math_score']
eleventh_graders_math_scores = eleventh_graders.groupby(['school_name']).mean()['math_score']
twelfth_graders_math_scores = twelfth_graders.groupby(['school_name']).mean()['math_score']

# Combine each of the scores above into single DataFrame called `math_scores_by_grade`
math_scores_by_grade = pd.DataFrame({
    "9th": ninth_graders_math_scores,
    "10th":tenth_graders_math_scores,
    "11th":eleventh_graders_math_scores,
    "12th":twelfth_graders_math_scores
})

# Minor data wrangling
math_scores_by_grade.index.name = None

# Display the DataFrame
math_scores_by_grade



Unnamed: 0,9th,10th,11th,12th
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


# Reading Scores by Grade

In [66]:
#--------
#Make sure to run the Math Scores by Grade cell before running this cell.  The DataFrame from each grade was recalled- not regenerated.
#--------

#Group by "school_name" and take the mean of each, and sort out reading scores
ninth_graders_read_scores = ninth_graders.groupby(['school_name']).mean()['reading_score']
tenth_graders_read_scores = tenth_graders.groupby(['school_name']).mean()['reading_score']
eleventh_graders_read_scores = eleventh_graders.groupby(['school_name']).mean()['reading_score']
twelfth_graders_read_scores = twelfth_graders.groupby(['school_name']).mean()['reading_score']

# Combine each of the scores above into single DataFrame called `math_scores_by_grade`
read_scores_by_grade = pd.DataFrame({
    "9th": ninth_graders_read_scores,
    "10th":tenth_graders_read_scores,
    "11th":eleventh_graders_read_scores,
    "12th":twelfth_graders_read_scores
})

# Minor data wrangling
read_scores_by_grade.index.name = None

# Display the DataFrame
read_scores_by_grade

Unnamed: 0,9th,10th,11th,12th
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


# Scores by School Spending  

In [101]:
# Establish the bins taken from starter code
spending_bins = [0, 585, 630, 645, 680]
labels = ["<$585", "$585-630", "$630-645", "$645-680"]

In [103]:
# Create a copy of the school summary since it has the "Per Student Budget" taken from starter code
school_spending_df = per_school_summary.copy()

#If this line is not run, and error is generated due to the previous formatting of the spending/student as currency.
#The binning process only works on int not str!  This line was published on Slack by Craig Gallagher
school_spending_df["Per Student Budget"]=school_spending_df["Per Student Budget"].replace("[$,]", "", regex=True).astype(float)

In [107]:
#Use 'pd.cut' to categorize spending based on the bins

school_spending_df["Spending Ranges (Per Student)"]=pd.cut(school_spending_df['Per Student Budget'],bins=spending_bins,labels=labels)

#--------
#School_spending_df was sorted to better view the information
#--------

school_spending_df_sorted=school_spending_df.sort_values(by=['Per Student Budget'],ascending=[True])
school_spending_df_sorted

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,School Size,Spending Ranges (Per Student)
Wilson High School,Charter,2283,"$1,319,574.00",578.0,83.274201,83.989488,93.867718,96.539641,90.582567,Large (2000-5000),<$585
Holden High School,Charter,427,"$248,087.00",581.0,83.803279,83.814988,92.505855,96.252927,89.227166,Small (<1000),<$585
Cabrera High School,Charter,1858,"$1,081,356.00",582.0,83.061895,83.97578,94.133477,97.039828,91.334769,Medium (1000-2000),<$585
Wright High School,Charter,1800,"$1,049,400.00",583.0,83.682222,83.955,93.333333,96.611111,90.333333,Medium (1000-2000),<$585
Shelton High School,Charter,1761,"$1,056,600.00",600.0,83.359455,83.725724,93.867121,95.854628,89.892107,Medium (1000-2000),$585-630
Pena High School,Charter,962,"$585,858.00",609.0,83.839917,84.044699,94.594595,95.945946,90.540541,Small (<1000),$585-630
Griffin High School,Charter,1468,"$917,500.00",625.0,83.351499,83.816757,93.392371,97.138965,90.599455,Medium (1000-2000),$585-630
Bailey High School,District,4976,"$3,124,928.00",628.0,77.048432,81.033963,66.680064,81.93328,54.642283,Large (2000-5000),$585-630
Rodriguez High School,District,3999,"$2,547,363.00",637.0,76.842711,80.744686,66.366592,80.220055,52.988247,Large (2000-5000),$630-645
Thomas High School,Charter,1635,"$1,043,130.00",638.0,83.418349,83.84893,93.272171,97.308869,90.948012,Medium (1000-2000),$630-645


In [90]:
#  Calculate averages for the desired columns. Taken from starter code
spending_math_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"]).mean()["Average Math Score"]
spending_reading_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"]).mean()["Average Reading Score"]
spending_passing_math = school_spending_df.groupby(["Spending Ranges (Per Student)"]).mean()["% Passing Math"]
spending_passing_reading = school_spending_df.groupby(["Spending Ranges (Per Student)"]).mean()["% Passing Reading"]
overall_passing_spending = school_spending_df.groupby(["Spending Ranges (Per Student)"]).mean()["% Overall Passing"]

In [91]:
#Assemble into summary
spending_summary=pd.DataFrame({
   "Average Math Score": spending_math_scores,
   "Average Reading Scores": spending_reading_scores,
   "% Passing Math":spending_passing_math,
   "% Passing Reading":spending_passing_reading,
   "%Overall Passing":overall_passing_spending
})

#Display results
spending_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Scores,% Passing Math,% Passing Reading,%Overall Passing
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,83.455399,83.933814,93.460096,96.610877,90.369459
$585-630,81.899826,83.155286,87.133538,92.718205,81.418596
$630-645,78.518855,81.624473,73.484209,84.391793,62.857656
$645-680,76.99721,81.027843,66.164813,81.133951,53.526855


# Scores by School Size

In [92]:
# Establish the bins. Taken from starter code
size_bins = [0, 1000, 2000, 5000]
labels = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]


In [93]:
#Categorize spending based on bins
#Use 'pd.cut' on the "Total Students" column of the 'per_school_summary' DataFrame

per_school_summary["School Size"]=pd.cut(per_school_summary['Total Students'],bins=size_bins,labels=labels)
per_school_summary

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,School Size
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.680064,81.93328,54.642283,Large (2000-5000)
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,91.334769,Medium (1000-2000)
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,53.204476,Large (2000-5000)
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,54.289887,Large (2000-5000)
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,90.599455,Medium (1000-2000)
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.752967,80.862999,53.527508,Large (2000-5000)
Holden High School,Charter,427,"$248,087.00",$581.00,83.803279,83.814988,92.505855,96.252927,89.227166,Small (<1000)
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,53.513884,Large (2000-5000)
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,53.539172,Large (2000-5000)
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,90.540541,Small (<1000)


In [94]:
# Calculate averages for the desired columns. Taken from starter code
size_math_scores = per_school_summary.groupby(["School Size"]).mean()["Average Math Score"]
size_reading_scores = per_school_summary.groupby(["School Size"]).mean()["Average Reading Score"]
size_passing_math = per_school_summary.groupby(["School Size"]).mean()["% Passing Math"]
size_passing_reading = per_school_summary.groupby(["School Size"]).mean()["% Passing Reading"]
size_overall_passing = per_school_summary.groupby(["School Size"]).mean()["% Overall Passing"]

In [95]:
#Create a DataFrame that breaks down school performance based on school size (small, medium, large).
#Use the scores from perivous cell
size_summary=pd.DataFrame({
    "Average Math Score":size_math_scores,
    "Average Reading Score":size_reading_scores,
    "% Passing Math":size_passing_math,
    "% Passing Reading": size_passing_reading,
    "% Overall Passing": size_overall_passing
})

#display results
size_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.821598,83.929843,93.550225,96.099437,89.883853
Medium (1000-2000),83.374684,83.864438,93.599695,96.79068,90.621535
Large (2000-5000),77.746417,81.344493,69.963361,82.766634,58.286003


# Scores by School Type  

In [96]:
#This cell taken from starter code
# Group the per_school_summary DataFrame by "School Type" and average the results.
type_math_scores = per_school_summary.groupby(['School Type']).mean()['Average Math Score']
type_reading_scores = per_school_summary.groupby(['School Type']).mean()['Average Reading Score']
type_passing_math = per_school_summary.groupby(['School Type']).mean()['% Passing Math']
type_passing_reading = per_school_summary.groupby(['School Type']).mean()['% Passing Reading']
type_overall_passing = per_school_summary.groupby(['School Type']).mean()['% Overall Passing']


#--------
# The following lines are taken directly from the starter code.  
# They are commeted out due to how the pervious DataFrame/Series data types are formatted create several error.
# It was not worth the effor to resolve the errors since the desired data can be reported based on the previous code in this cell
#--------
# Use the code provided to select new column data
# average_math_score_by_type = type_math_scores["Average Math Score"]
# average_reading_score_by_type = type_reading_scores["Average Reading Score"]
# average_percent_passing_math_by_type = type_passing_math["% Passing Math"]
# average_percent_passing_reading_by_type = type_passing_reading["% Passing Reading"]
# average_percent_overall_passing_by_type = type_overall_passing["% Overall Passing"]





In [97]:
type_summary=pd.DataFrame({
    "Average Math Score": type_math_scores,
    "Average Reading Score":type_reading_scores,
    "% Passing Math":type_passing_math,
    "% Passing Reading":type_passing_reading,
    "% Overall Passing":type_overall_passing
})

type_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,93.62083,96.586489,90.432244
District,76.956733,80.966636,66.548453,80.799062,53.672208


# Written Report

District Analysis
    Overall, a large majority of the district's school students are passing their math or reading exams, 75% and 86% respectfully.  However, only 65% of all district students are passing both their math and reading exams. In addition, math scores are generally lower than the reading scores.

School Analysis
    Analysis by grade level:
        For each school, the average scores are rather consistant across the grade level for math and reading. 
        Average math scores are still lower than average reading scores

    Analysis by school spending:
        Charter schools have a smaller budget (per student) than district schools
        District schools spend more perstudent than charter schools
        Spending more per student did not correspond to higher overall passing rates
                
    Analysis by school size:
        Schools that have 2000 or less students have higher overall passing rates
        The largest charter school has less students than the smallest district school

    Analysis by school type:
        Charter school have a drastically higher overall passing rate (90%) than district schools(54%)

Discussion/Conclusion
    The data analysis represented here clearly suggests two things:
        1. Schools with less students perform better in assessments
        2. Charter schools are better at utilizing their resources (budget) in educating their students- as evidenced in a drastically higher overall passing rates compared to distric schools
    Though these trends appear clear, they should be analyzed furthur before reporting this information to governing bodies/stakeholers.  For instance, when the average math and reading scores were calculated, the student population was not directly considered.  These geometric means were considered at equal weight- eventhough the charter schools routinely have lower student couonts than the distric schools.

    In addition, demographics are not included in the original student/school csv files.  There is no data citing how many teachers a school has per grade, or how many students are in a classroom.  This data supports the generally held practice that smaller class sizes lead to better student performance/learning.  However this information is only inferred because of the smaller charter school student body.

    In addition to basic school demographics, this data does not reflect any admission requirements for the charter schools:
        -Is the incomming class chosen from a lottery of applicants?
        -Is an entrance exam/aptitude test score required to gain enrollment?
        -Is admission based on the region directly surrounding the school?
    Many of these factors place undue hardships on students/families leaving the only option as an overcrwoded district school, and culls highly performing students into the charter school system- which can disappropriates resources from district schools.

