# PyCity Schools Analysis

* As a whole, schools with higher budgets, did not yield better test results. By contrast, schools with higher spending per student actually (\$645-675) underperformed compared to schools with smaller budgets (<\$585 per student).

* As a whole, smaller and medium sized schools dramatically out-performed large sized schools on passing math performances (89-91% passing vs 67%).

* As a whole, charter schools out-performed the public district schools across all metrics. However, more analysis will be required to glean if the effect is due to school practices or the fact that charter schools tend to serve smaller student populations per school. 
---

In [38]:
# Dependencies and Setup
import pandas as pd

# File to Load 
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset (consider using a left join)
main_df = pd.merge(student_data, school_data, on="school_name", how="left")
main_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [45]:
# Total Schools
total_school = main_df["school_name"].nunique()
print(total_school)

# Total Students
total_student = main_df["Student ID"].count()
print(total_student)

# Calculate the Total Budget
total_budget = school_data["budget"].sum()
print(total_budget)

# Calculate the Average Scores
avg_reading = main_df["reading_score"].mean()
print(avg_reading)

avg_math = main_df["math_score"].mean()
print(avg_math)

# No. of Student pass Reading
pass_reading = main_df.loc[main_df["reading_score"] >= 70, "reading_score"].count()
print(pass_reading)

# No. of Student pass Math
pass_math = main_df.loc[main_df["math_score"] >= 70, "math_score"].count()
print(pass_math)

pass_both = main_df.loc[(main_df["reading_score"] >= 70)&(main_df["math_score"] >= 70)].count()["Student ID"]
print(pass_both)

# Percent Passing Reading
pc_pass_reading = (pass_reading / total_student) * 100
print(pc_pass_reading)

# Percent Passing Math
pc_pass_math = (pass_math / total_student) * 100
print(pc_pass_math)

# Percent Passing Both
pc_pass_both = (pass_both / total_student) * 100
print(pc_pass_both)

15
39170
24649428
81.87784018381414
78.98537145774827
33610
29370
25528
85.80546336482001
74.9808526933878
65.17232575950983


In [48]:
# Minor Data Cleanup
#main_df["Total_Students"] = main_df["Total_Students"].map("{:,}".format)
#main_df["Total Budget"] = main_df["Total Budget"].map("${:,.2f}".format)
#main_df["Avg. Math Score"] = main_df["Avg. Math Score"].map("{:,.2f}".format)
#main_df["Avg. Reading Score"] = main_df["Avg. Reading Score"].map("{:,.2f}".format)
#main_df["% Passing Math"] = main_df["% Passing Math"].map("{:,.2f}%".format)
#main_df["% Passing Reading"] = main_df["% Passing Reading"].map("{:,.2f}%".format)
#main_df["% Overall Passing"] = main_df["% Overall Passing"].map("{:,.2f}%".format)

In [49]:
# Display the data frame
district_cols = {
                    "Total Schools" : total_school,
                    "Total_Students" : total_student,
                    "Total Budget" : total_budget,
                    "Avg. Math Score" : avg_math,
                    "Avg. Reading Score" : avg_reading,
                    "% Passing Math" : pc_pass_math,
                    "% Passing Reading" : pc_pass_reading,
                    "% Overall Passing" : pc_pass_both
                }

district_df = pd.DataFrame([district_cols])
district_df.head()

Unnamed: 0,Total Schools,Total_Students,Total Budget,Avg. Math Score,Avg. Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [50]:
main_df.head(3)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635


In [129]:
# School Type for each school ----- group_1 : Group main_df by 'school_name'
group_1 = main_df.groupby(main_df["school_name"])

In [120]:
# Determine the School Type
sc_type = group_1["type"].value_counts()

pandas.core.series.Series

In [121]:
# Calculate the total student count
sc_total_student = group_1["Student ID"].count()

dtype('int64')

In [None]:
# Per School Budget 
sc_total_budget = group_1["budget"].mean()

In [None]:
# Per Student Budget
# per_school_budget = school_data_complete.groupby(["school_name"]).mean()["budget"]
per_std_budget = sc_total_budget / sc_total_student

In [None]:
# Calculate the average test scores
sc_avg_math = group_1["math_score"].mean()
sc_avg_reading = group_1["reading_score"].mean()

In [None]:
# No of student passing
sc_pass_math = (main_df.loc[main_df["math_score"] >= 70, "math_score"]).count()
sc_pass_reading = (main_df.loc[main_df["reading_score"] >= 70, "reading_score"]).count()
sc_pass_both = main_df.loc[(main_df["math_score"]>=70) & (main_df["reading_score"]>=70)].count()

In [None]:
## Percent Passing
sc_pc_passmath = (sc_pass_math / total_student) * 100
sc_pc_passreading = (sc_pass_reading / total_student) * 100
sc_pc_passboth = (sc_pass_both / total_student) * 100

In [113]:
# Convert to data frame
school_cols = {
                "School Type" : [sc_type],
                "Total Student" : [sc_total_student],
                "Total School Budget" : [sc_total_budget],
                "Per Student Budget" : [per_std_budget],
                "Avg. Math Score" : [sc_avg_math],
                "Avg. Passing" : [sc_avg_reading],
                "%Passing Math" : [sc_pc_passmath],
                "%Passing Reading" : [sc_pc_passreading],
                "%Overall Passing" : [sc_pc_passboth]
                
}



# Minor data munging

# Display the data frame
school_df = pd.DataFrame(school_cols)
school_df

Unnamed: 0,School Type,Total Student,Total School Budget,Per Student Budget,Avg. Math Score,Avg. Passing,%Passing Math,%Passing Reading,%Overall Passing
0,school_name type Bailey High Sc...,school_name Bailey High School 4976 Cabr...,school_name Bailey High School 3124928 C...,school_name Bailey High School 628.0 Cab...,school_name Bailey High School 77.048432...,school_name Bailey High School 81.033963...,74.980853,85.805463,Student ID 65.172326 student_name 65...


## Top Performing Schools (By Passing Rate)

In [2]:
# Sort and show top five schools


## Bottom Performing Schools (By Passing Rate)

In [45]:
# Sort and show bottom five schools


Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Rodriguez High School,District,3999,"$2,547,363.00",$637.00,76.842711,80.744686,66.366592,80.220055,73.293323
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,73.363852
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,73.500171
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,73.639992
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,73.804308


## Math Scores by Grade

In [130]:
main_df.head(3)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635


In [139]:
# Create data series of scores by grade levels using conditionals
grade9 = main_df.loc[main_df["grade"] == '9th', "math_score"]
grade10 = main_df.loc[main_df["grade"] == '10th', "math_score"]
grade11 = main_df.loc[main_df["grade"] == '11th', "math_score"]
grade12 = main_df.loc[main_df["grade"] == '12th', "math_score"]


# Combine series into single data frame
grade_df = pd.DataFrame({
                        "9th" : grade9,
                        "10th" : grade10,
                        "11th" : grade11,
                        "12th" : grade12,
                        })
grade_df.head()

# Group each by school name --- group_2 --- group by school name for Math Score by Grade
group_2 = grade_df.groupby(main_df["school_name"])

# Minor data munging

# Display the data frame
group_2.mean().head()

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164


## Reading Score by Grade 

In [141]:
# Create data series of scores by grade levels using conditionals
grade9 = main_df.loc[main_df["grade"] == '9th', "reading_score"]
grade10 = main_df.loc[main_df["grade"] == '10th', "reading_score"]
grade11 = main_df.loc[main_df["grade"] == '11th', "reading_score"]
grade12 = main_df.loc[main_df["grade"] == '12th', "reading_score"]

# Combine series into single data frame
grade_df = pd.DataFrame({
                        "9th" : grade9,
                        "10th" : grade10,
                        "11th" : grade11,
                        "12th" : grade12,
                        })
grade_df.head()

# Group each by school name --- group_2 --- group by school name for Math Score by Grade
group_2 = grade_df.groupby(main_df["school_name"])

# Minor data munging

# Display the data frame
group_2.mean().head()

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699


## Scores by School Spending

In [142]:
# bins
spending_bins = [0, 585, 615, 645, 675]
# group labels
spending_labels = ['<$585', '$585-$615', '$615-$645','$645-$675']

spending_cut = pd.cut(school_df["Per Student Budget"], spending_bins, labels=spending_labels, include_lowest=True)

# ALSO -- Note that the values for `% Passing Math`, `% Passing Reading` and `% Overall Passing Rate`
# were computed using averages of averages -- your results may vary if you use weighted averages 

# Selected Columns used to analize for 'Scores by School Spending'



sc_avg_math = group_1["math_score"].mean()
sc_avg_reading = group_1["reading_score"].mean()
# Categorize the spending based on the bins

# Assemble into data frame

# Minor data munging

# Display results


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

## Scores by School Size

In [49]:
# Establish the bins 

# Categorize the spending based on the bins

# Calculate the scores based on bins

# Assemble into data frame

# Minor data munging

# Display results


Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.821598,83.929843,93.550225,96.099437,94.824831
Medium (1000-2000),83.374684,83.864438,93.599695,96.79068,95.195187
Large (2000-5000),77.746417,81.344493,69.963361,82.766634,76.364998


## Scores by School Type

In [50]:
# Type | Average Math Score | Average Reading Score | % Passing Math | % Passing Reading | % Overall Passing Rate

# Assemble into data frame

# Minor data munging

# Display results


Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,93.62083,96.586489,95.10366
District,76.956733,80.966636,66.548453,80.799062,73.673757
