In [1]:
# Dependencies and Setup
import pandas as pd
#import numpy as np

In [2]:
# File to Load
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

In [3]:
# Read School and Student Data File and store into Pandas DataFrames
school_data_df = pd.read_csv(school_data_to_load, low_memory = False)
# school_data.head()

In [4]:
student_data_df = pd.read_csv(student_data_to_load)
# student_data.head()

In [5]:
# Combine the data into a single dataset (provided in starter file).  
school_data_complete_df = pd.merge(student_data_df, school_data_df, how="left", on=["school_name", "school_name"])
#school_data_complete_df

In [6]:
# Rename columns of combined df
school_data_complete_df = school_data_complete_df.rename(columns={"Student ID" : "Student ID",
                                                          "student_name" : "Student Name",
                                                          "gender" : "Student Gender",
                                                          "grade" : "Student Grade",
                                                          "school_name" : "School Name",
                                                          "reading_score" : "Student Reading Score",
                                                          "math_score" : "Student Math Score",
                                                          "School ID" : "School ID",
                                                          "type" : "School Type",
                                                          "size" : "School Size",
                                                          "budget" : "School Budget"})
# school_data_complete_df

In [7]:
# Rearrange columns
school_data_complete_df = school_data_complete_df[["Student ID", 
                                                   "Student Name", 
                                                   "Student Gender", 
                                                   "Student Grade", 
                                                   "Student Reading Score", 
                                                   "Student Math Score", 
                                                   "School Name", 
                                                   "School ID", 
                                                   "School Type", 
                                                   "School Size", 
                                                   "School Budget"]]
# school_data_complete_df

## District Summary

In [8]:
# Calculate the total number of schools
schoolcount = school_data_complete_df["School Name"].nunique()
# schoolcount

In [9]:
# Calculate the total number of students
studentcount = school_data_complete_df["Student ID"].count()
# studentcount

In [10]:
# Calculate the total budget (make sure to get unique because the budget repeats itself per student in same school)
totalbudget = school_data_complete_df["School Budget"].unique()
# totalbudget.sum()

In [11]:
# Calculate the average math score
mathaverage = school_data_complete_df["Student Math Score"].mean()
# mathaverage

In [12]:
# Calculate the average reading score
readaverage = school_data_complete_df["Student Reading Score"].mean()
# readaverage

In [13]:
# Calculate percent of students with a math score of 70 or greater 
mathpass = school_data_complete_df[(school_data_complete_df["Student Math Score"] >= 70)].count()["Student Name"]
mathpass_percent = mathpass / float(studentcount) * 100
# mathpass_percent

In [14]:
# Calculate percent of students with a reading score of 70 or greater
readpass = school_data_complete_df[(school_data_complete_df["Student Reading Score"] >= 70)].count()["Student Name"]
readpass_percent = readpass / float(studentcount) * 100
#readpass_percent

In [15]:
# Calculate percent of students who passed both reading and math
mathreadpass_df = school_data_complete_df[(school_data_complete_df['Student Math Score'] >= 70) & (school_data_complete_df['Student Reading Score'] >= 70)]
mathreadpass = (mathreadpass_df["Student ID"].count()/studentcount) * 100
# mathreadpass


In [16]:
# Create summary of district data
district_summary_df = pd.DataFrame({"Total Schools" : [schoolcount],
                                   "Total Students" : studentcount,
                                   "Total Budget" : totalbudget.sum(),
                                   "Average Math Score" : mathaverage,
                                  "Average Reading Score" : readaverage,
                                   "% Passing Math" : mathpass_percent,
                                   "% Passing Reading" : readpass_percent,
                                   "% Overall Passing" : mathreadpass
                                   })
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


In [17]:
# Summary table formatting using map
district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,}".format)
district_summary_df["Average Math Score"] = district_summary_df["Average Math Score"].map("{:.2f}".format)
district_summary_df["Average Reading Score"] = district_summary_df["Average Reading Score"].map("{:.2f}".format)
district_summary_df["% Passing Math"] = district_summary_df["% Passing Math"].map("{:.2f}%".format)
district_summary_df["% Passing Reading"] = district_summary_df["% Passing Reading"].map("{:.2f}%".format)
district_summary_df["% Overall Passing"] = district_summary_df["% Overall Passing"].map("{:.2f}%".format)

district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428",78.99,81.88,74.98%,85.81%,65.17%


## School Summary

In [18]:
# Add Per Student Budget to the complete data table. Define variable then add it to the dataframe
perstudentbudget = school_data_complete_df["School Budget"] / school_data_complete_df["School Size"]

school_data_complete_df["Per Student Budget"] = perstudentbudget


In [19]:
# Add Students passing math and reading to the school_data_compete_df
school_data_complete_df["% Passing Math"] = school_data_complete_df["Student Math Score"] >= 70
school_data_complete_df["% Passing Reading"] = school_data_complete_df["Student Reading Score"] >= 70
school_data_complete_df["% Overall Passing"] = school_data_complete_df["% Passing Math"] & school_data_complete_df["% Passing Reading"]
# school_data_complete_df

In [20]:
# Modify and rename dataframe to use just the columns wanted
school_data_complete2_df = school_data_complete_df[["School ID", 
                                                           "School Name",
                                                           "School Type",
                                                           "School Size",
                                                           "School Budget",
                                                           "Per Student Budget",
                                                           "Student ID",
                                                           "Student Name", 
                                                           "Student Gender", 
                                                           "Student Grade", 
                                                           "Student Reading Score",
                                                           "Student Math Score",
                                                           "% Passing Reading",
                                                           "% Passing Math",
                                                           "% Overall Passing"
                                                          ]]
# school_data_complete2_df

In [21]:
# Group the data by School Name
grouped_school_data_df = school_data_complete2_df.groupby(["School Name"])
#grouped_school_data_df.mean()

In [22]:
# Find the school types per school
schooltype_s = grouped_school_data_df["School Type"].unique()

# To extract the string from the list
schooltype_s = schooltype_s.str[0]

In [23]:
# Find the total number of students per school
studentcount_s = grouped_school_data_df["Student ID"].count()
# studentcount_s

In [24]:
# Calculate the total budget per school (make sure to get unique because the budget repeats itself per student in same school)
totalbudget_s = grouped_school_data_df["School Budget"].unique()
#totalbudget_s

In [25]:
# Calculate the total budget per school
perstudentbudget_s = grouped_school_data_df["Per Student Budget"].unique()
#perstudentbudget_s

# To extract the string from the list
perstudentbudget_s = perstudentbudget_s.str[0]

In [26]:
# Calculate the average math score per school
mathaverage_s = grouped_school_data_df["Student Math Score"].mean()
#mathaverage_s

In [27]:
# Calculate the average reading score per school
readaverage_s = grouped_school_data_df["Student Reading Score"].mean()
#readaverage_s

In [28]:
# Calculate percent passing
mathpass_s = grouped_school_data_df["% Passing Math"].mean()*100
readpass_s = grouped_school_data_df["% Passing Reading"].mean() *100
mathreadpass_s = grouped_school_data_df["% Overall Passing"].mean() *100

In [29]:
# Create dataframe of school data
school_data_summary_df = pd.DataFrame({"School Type" : schooltype_s,
                                       "Total Students" : studentcount_s,
                                       "Total Budget" : totalbudget_s,
                                       "Per Student Budget" : perstudentbudget_s,
                                       "Average Math Score" : mathaverage_s,
                                       "Average Reading Score" : readaverage_s,
                                       "% Passing Math" : mathpass_s, 
                                       "% Passing Reading" : readpass_s,
                                      "% Overall Passing" : mathreadpass_s
                                           })
#school_data_summary_df.dtypes

In [30]:
# Change the data types
school_data_summary_df["Total Budget"] = school_data_summary_df["Total Budget"].astype(float)
school_data_summary_df["Per Student Budget"] = school_data_summary_df["Per Student Budget"].astype(float)
#school_data_summary_df

In [31]:
# Create new dataframe for summary formatting. Did this to retain school_data_summary for further analysis
# Create dataframe of school data summary
school_data_summary_format_df = pd.DataFrame({"School Type" : schooltype_s,
                                       "Total Students" : studentcount_s,
                                       "Total Budget" : totalbudget_s,
                                       "Per Student Budget" : perstudentbudget_s,
                                       "Average Math Score" : mathaverage_s,
                                       "Average Reading Score" : readaverage_s,
                                       "% Passing Math" : mathpass_s, 
                                       "% Passing Reading" : readpass_s,
                                      "% Overall Passing" : mathreadpass_s
                                           })

school_data_summary_format_df["Total Budget"] = school_data_summary_format_df["Total Budget"].astype(float)
school_data_summary_format_df["Per Student Budget"] = school_data_summary_format_df["Per Student Budget"].astype(float)
school_data_summary_format_df

Unnamed: 0_level_0,School Type,Total Students,Total Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,3124928.0,628.0,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,1884411.0,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,1763916.0,644.0,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,3022020.0,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,248087.0,581.0,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,1910635.0,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,3094650.0,650.0,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


In [32]:
# Summary table formatting using map
school_data_summary_format_df["Total Students"] = school_data_summary_format_df["Total Students"].map("{:,}".format)
school_data_summary_format_df["Total Budget"] = school_data_summary_format_df["Total Budget"].map("${:,.0f}".format)
school_data_summary_format_df["Per Student Budget"] = school_data_summary_format_df["Per Student Budget"].map("${:.0f}".format)
school_data_summary_format_df["Average Math Score"] = school_data_summary_format_df["Average Math Score"].map("{:.2f}".format)
school_data_summary_format_df["Average Reading Score"] = school_data_summary_format_df["Average Reading Score"].map("{:.2f}".format)
school_data_summary_format_df["% Passing Math"] = school_data_summary_format_df["% Passing Math"].map("{:.2f}%".format)
school_data_summary_format_df["% Passing Reading"] = school_data_summary_format_df["% Passing Reading"].map("{:.2f}%".format)
school_data_summary_format_df["% Overall Passing"] = school_data_summary_format_df["% Overall Passing"].map("{:.2f}%".format)
school_data_summary_format_df

Unnamed: 0_level_0,School Type,Total Students,Total Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,"$3,124,928",$628,77.05,81.03,66.68%,81.93%,54.64%
Cabrera High School,Charter,1858,"$1,081,356",$582,83.06,83.98,94.13%,97.04%,91.33%
Figueroa High School,District,2949,"$1,884,411",$639,76.71,81.16,65.99%,80.74%,53.20%
Ford High School,District,2739,"$1,763,916",$644,77.1,80.75,68.31%,79.30%,54.29%
Griffin High School,Charter,1468,"$917,500",$625,83.35,83.82,93.39%,97.14%,90.60%
Hernandez High School,District,4635,"$3,022,020",$652,77.29,80.93,66.75%,80.86%,53.53%
Holden High School,Charter,427,"$248,087",$581,83.8,83.81,92.51%,96.25%,89.23%
Huang High School,District,2917,"$1,910,635",$655,76.63,81.18,65.68%,81.32%,53.51%
Johnson High School,District,4761,"$3,094,650",$650,77.07,80.97,66.06%,81.22%,53.54%
Pena High School,Charter,962,"$585,858",$609,83.84,84.04,94.59%,95.95%,90.54%


## Top Performing Schools (By % Overall Passing)

In [33]:
# Sort grouped school_data_summary_df (unmapped df) by % Overall Passing
school_data_summary_sort1_df=school_data_summary_df.sort_values("% Overall Passing", ascending=False)
school_data_summary_sort1_df.head(5)

Unnamed: 0_level_0,School Type,Total Students,Total Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Thomas High School,Charter,1635,1043130.0,638.0,83.418349,83.84893,93.272171,97.308869,90.948012
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Wilson High School,Charter,2283,1319574.0,578.0,83.274201,83.989488,93.867718,96.539641,90.582567
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


In [34]:
# Summary table formatting using map
school_data_summary_sort1_df["Total Students"] = school_data_summary_sort1_df["Total Students"].map("{:,}".format)
school_data_summary_sort1_df["Total Budget"] = school_data_summary_sort1_df["Total Budget"].map("${:,.0f}".format)
school_data_summary_sort1_df["Per Student Budget"] = school_data_summary_sort1_df["Per Student Budget"].map("${:.0f}".format)
school_data_summary_sort1_df["Average Math Score"] = school_data_summary_sort1_df["Average Math Score"].map("{:.2f}".format)
school_data_summary_sort1_df["Average Reading Score"] = school_data_summary_sort1_df["Average Reading Score"].map("{:.2f}".format)
school_data_summary_sort1_df["% Passing Math"] = school_data_summary_sort1_df["% Passing Math"].map("{:.2f}%".format)
school_data_summary_sort1_df["% Passing Reading"] = school_data_summary_sort1_df["% Passing Reading"].map("{:.2f}%".format)
school_data_summary_sort1_df["% Overall Passing"] = school_data_summary_sort1_df["% Overall Passing"].map("{:.2f}%".format)

school_data_summary_sort1_df.head(5)

Unnamed: 0_level_0,School Type,Total Students,Total Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,"$1,081,356",$582,83.06,83.98,94.13%,97.04%,91.33%
Thomas High School,Charter,1635,"$1,043,130",$638,83.42,83.85,93.27%,97.31%,90.95%
Griffin High School,Charter,1468,"$917,500",$625,83.35,83.82,93.39%,97.14%,90.60%
Wilson High School,Charter,2283,"$1,319,574",$578,83.27,83.99,93.87%,96.54%,90.58%
Pena High School,Charter,962,"$585,858",$609,83.84,84.04,94.59%,95.95%,90.54%


## Bottom Performing Schools (By % Overall Passing)

In [35]:
# Sort grouped school_data_summary_df (unmapped df) by % Overall Passing
school_data_summary_sort2_df=school_data_summary_df.sort_values("% Overall Passing", ascending=True)
school_data_summary_sort2_df.tail(5)

Unnamed: 0_level_0,School Type,Total Students,Total Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,95.945946,90.540541
Wilson High School,Charter,2283,1319574.0,578.0,83.274201,83.989488,93.867718,96.539641,90.582567
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Thomas High School,Charter,1635,1043130.0,638.0,83.418349,83.84893,93.272171,97.308869,90.948012
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,97.039828,91.334769


In [36]:
# Summary table formatting using map
school_data_summary_sort2_df["Total Students"] = school_data_summary_sort2_df["Total Students"].map("{:,}".format)
school_data_summary_sort2_df["Total Budget"] = school_data_summary_sort2_df["Total Budget"].map("${:,.0f}".format)
school_data_summary_sort2_df["Per Student Budget"] = school_data_summary_sort2_df["Per Student Budget"].map("${:.0f}".format)
school_data_summary_sort2_df["Average Math Score"] = school_data_summary_sort2_df["Average Math Score"].map("{:.2f}".format)
school_data_summary_sort2_df["Average Reading Score"] = school_data_summary_sort2_df["Average Reading Score"].map("{:.2f}".format)
school_data_summary_sort2_df["% Passing Math"] = school_data_summary_sort2_df["% Passing Math"].map("{:.2f}%".format)
school_data_summary_sort2_df["% Passing Reading"] = school_data_summary_sort2_df["% Passing Reading"].map("{:.2f}%".format)
school_data_summary_sort2_df["% Overall Passing"] = school_data_summary_sort2_df["% Overall Passing"].map("{:.2f}%".format)

school_data_summary_sort2_df.head(5)

Unnamed: 0_level_0,School Type,Total Students,Total Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,"$2,547,363",$637,76.84,80.74,66.37%,80.22%,52.99%
Figueroa High School,District,2949,"$1,884,411",$639,76.71,81.16,65.99%,80.74%,53.20%
Huang High School,District,2917,"$1,910,635",$655,76.63,81.18,65.68%,81.32%,53.51%
Hernandez High School,District,4635,"$3,022,020",$652,77.29,80.93,66.75%,80.86%,53.53%
Johnson High School,District,4761,"$3,094,650",$650,77.07,80.97,66.06%,81.22%,53.54%


## Math Scores by Grade

In [37]:
# Create df to use for the math scores
grade_data_all_df = school_data_complete_df.loc[:, ["School ID", 
                                                           "School Name",
                                                           "School Type",
                                                           "School Size",
                                                           "School Budget",
                                                           "Per Student Budget",
                                                           "Student ID",
                                                           "Student Name", 
                                                           "Student Gender", 
                                                           "Student Grade", 
                                                           "Student Reading Score",
                                                           "Student Math Score",
                                                           "% Passing Reading",
                                                           "% Passing Math",
                                                           "% Overall Passing"
                                                          ]]

In [38]:
# Modify/reduce the df
grade_data_df = grade_data_all_df[["School Name",
                                   "Student Name", 
                                   "Student Grade",  
                                   "Student Reading Score",
                                   "Student Math Score" ]]
# grade_data_df

In [39]:
# Conditional statements to get the grade data and group the series by school
# https://datatofish.com/if-condition-in-pandas-dataframe/
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.groupby.html
grade9 = grade_data_df.loc[grade_data_df["Student Grade"] == "9th"].groupby("School Name")["Student Math Score"].mean()
grade10 = grade_data_df.loc[grade_data_df["Student Grade"] == "10th"].groupby("School Name")["Student Math Score"].mean()
grade11 = grade_data_df.loc[grade_data_df["Student Grade"] == "11th"].groupby("School Name")["Student Math Score"].mean()
grade12 = grade_data_df.loc[grade_data_df["Student Grade"] == "12th"].groupby("School Name")["Student Math Score"].mean()

In [40]:
# Create new df from group
math_average_df = pd.DataFrame({"9th" : grade9,
                               "10th" : grade10,
                               "11th" : grade11,
                               "12th" : grade12 })
math_average_df

Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


In [41]:
# Math table formatting using map
math_average_df["9th"] = math_average_df["9th"].map("{:.2f}".format)
math_average_df["10th"] = math_average_df["10th"].map("{:.2f}".format)
math_average_df["11th"] = math_average_df["11th"].map("{:.2f}".format)
math_average_df["12th"] = math_average_df["12th"].map("{:.2f}".format)

math_average_df

Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86
Pena High School,83.63,83.37,84.33,84.12


## Reading Score by Grade 

In [42]:
# Conditional statements to get the grade data and group the series by school
# https://datatofish.com/if-condition-in-pandas-dataframe/
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.groupby.html
grade9 = grade_data_df.loc[grade_data_df["Student Grade"] == "9th"].groupby("School Name")["Student Reading Score"].mean()
grade10 = grade_data_df.loc[grade_data_df["Student Grade"] == "10th"].groupby("School Name")["Student Reading Score"].mean()
grade11 = grade_data_df.loc[grade_data_df["Student Grade"] == "11th"].groupby("School Name")["Student Reading Score"].mean()
grade12 = grade_data_df.loc[grade_data_df["Student Grade"] == "12th"].groupby("School Name")["Student Reading Score"].mean()

In [43]:
# Create new df from group
read_average_df = pd.DataFrame({"9th" : grade9,
                               "10th" : grade10,
                               "11th" : grade11,
                               "12th" : grade12 })
read_average_df

Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


In [44]:
# Reading table formatting using map
read_average_df["9th"] = read_average_df["9th"].map("{:.2f}".format)
read_average_df["10th"] = read_average_df["10th"].map("{:.2f}".format)
read_average_df["11th"] = read_average_df["11th"].map("{:.2f}".format)
read_average_df["12th"] = read_average_df["12th"].map("{:.2f}".format)

read_average_df

Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.3,80.91,80.95,80.91
Cabrera High School,83.68,84.25,83.79,84.29
Figueroa High School,81.2,81.41,80.64,81.38
Ford High School,80.63,81.26,80.4,80.66
Griffin High School,83.37,83.71,84.29,84.01
Hernandez High School,80.87,80.66,81.4,80.86
Holden High School,83.68,83.32,83.82,84.7
Huang High School,81.29,81.51,81.42,80.31
Johnson High School,81.26,80.77,80.62,81.23
Pena High School,83.81,83.61,84.34,84.59


## Scores by School Spending

In [45]:
# Create df to use for the spending
scores_data_all_df = school_data_complete2_df[["School ID", 
                                                           "School Name",
                                                           "School Type",
                                                           "School Size",
                                                           "School Budget",
                                                           "Per Student Budget",
                                                           "Student ID",
                                                           "Student Name", 
                                                           "Student Gender", 
                                                           "Student Grade", 
                                                           "Student Reading Score",
                                                           "Student Math Score",
                                                           "% Passing Reading",
                                                           "% Passing Math",
                                                           "% Overall Passing" ]]
# scores_data_all_df

In [46]:
# Reduce the df
scores_data_df = scores_data_all_df[["Per Student Budget",
                                                           "Student ID",
                                                          "Student Reading Score",
                                                           "Student Math Score",
                                                          "% Passing Reading",
                                                          "% Passing Math",
                                                         "% Overall Passing" ]]
#scores_data_df

In [47]:
# Define bins and groups
bins = [0, 584.9999, 629.9999, 644.9999, 680]
group_names = ["<585", "585-630", "630-645", "645-680"]

scores_data_df["Per Student Spending Ranges"] = pd.cut(scores_data_df["Per Student Budget"], bins, labels=group_names)
# scores_data_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [48]:
# Group by spending ranges
scores_data_grouped_df = scores_data_df.groupby("Per Student Spending Ranges")
#scores_data_grouped_df.max()

In [49]:
# Recalculate averages and percentages
mathaverage = scores_data_grouped_df["Student Math Score"].mean()
readaverage = scores_data_grouped_df["Student Reading Score"].mean()
mathpass = scores_data_grouped_df["% Passing Math"].mean()*100
readpass = scores_data_grouped_df["% Passing Reading"].mean() *100
mathreadpass = scores_data_grouped_df["% Overall Passing"].mean() *100

In [50]:
# Create df for scores data summary
scores_data_summary = pd.DataFrame({"Average Math Score" : mathaverage,
                                   "Average Reading Score": readaverage,
                                   "% Passing Math": mathpass,
                                   "% Passing Reading": readpass,
                                   "% Overall Passing" : mathreadpass })
# scores_data_summary

In [51]:
# Format the df
scores_data_summary_format = scores_data_summary[["Average Math Score",
                                   "Average Reading Score",
                                   "% Passing Math",
                                   "% Passing Reading",
                                   "% Overall Passing" ]]
scores_data_summary_format

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Per Student Spending Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<585,83.363065,83.964039,93.702889,96.686558,90.640704
585-630,79.982873,82.312643,79.109851,88.513145,70.939239
630-645,77.821056,81.301007,70.623565,82.600247,58.841194
645-680,77.049297,81.005604,66.230813,81.109397,53.528791


In [52]:
# Reading table formatting using map
scores_data_summary_format["Average Math Score"] = scores_data_summary_format["Average Math Score"].map("{:.2f}".format)
scores_data_summary_format["Average Reading Score"] = scores_data_summary_format["Average Reading Score"].map("{:.2f}".format)
scores_data_summary_format["% Passing Math"] = scores_data_summary_format["% Passing Math"].map("{:.2f}%".format)
scores_data_summary_format["% Passing Reading"] = scores_data_summary_format["% Passing Reading"].map("{:.2f}%".format)
scores_data_summary_format["% Overall Passing"] = scores_data_summary_format["% Overall Passing"].map("{:.2f}%".format)

scores_data_summary_format

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Per Student Spending Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<585,83.36,83.96,93.70%,96.69%,90.64%
585-630,79.98,82.31,79.11%,88.51%,70.94%
630-645,77.82,81.3,70.62%,82.60%,58.84%
645-680,77.05,81.01,66.23%,81.11%,53.53%


## Scores by School Size

In [53]:
# Create df to use for the spending
scores2_data_all_df = school_data_complete2_df[["School ID", 
                                                           "School Name",
                                                           "School Type",
                                                           "School Size",
                                                           "School Budget",
                                                           "Per Student Budget",
                                                           "Student ID",
                                                           "Student Name", 
                                                           "Student Gender", 
                                                           "Student Grade", 
                                                           "Student Reading Score",
                                                           "Student Math Score",
                                                           "% Passing Reading",
                                                           "% Passing Math",
                                                           "% Overall Passing"  ]]
# scores_data_all_df

In [54]:
# Reduce the df
scores2_data_df = scores2_data_all_df[["School Size",
                                                           "Student ID",
                                                          "Student Reading Score",
                                                           "Student Math Score",
                                                          "% Passing Reading",
                                                          "% Passing Math",
                                                         "% Overall Passing" ]]
# scores2_data_df

In [55]:
# Define bins and groups
bins = [0, 999.9999, 1999.9999, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

scores2_data_df["School Size "] = pd.cut(scores2_data_df["School Size"], bins, labels=group_names)
# scores2_data_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [56]:
# Group by spending ranges
scores2_data_grouped_df = scores2_data_df.groupby("School Size ")
# scores2_data_grouped_df.max()

In [57]:
# Recalculate averages and percentages
mathaverage = scores2_data_grouped_df["Student Math Score"].mean()
readaverage = scores2_data_grouped_df["Student Reading Score"].mean()
mathpass = scores2_data_grouped_df["% Passing Math"].mean()*100
readpass = scores2_data_grouped_df["% Passing Reading"].mean() *100
mathreadpass = scores2_data_grouped_df["% Overall Passing"].mean() *100

In [58]:
# Create df for scores data summary
scores2_data_summary = pd.DataFrame({"Average Math Score" : mathaverage,
                                   "Average Reading Score": readaverage,
                                   "% Passing Math": mathpass,
                                   "% Passing Reading": readpass,
                                   "% Overall Passing" : mathreadpass })
# scores2_data_summary

In [59]:
# Format df
scores2_data_summary_format = scores2_data_summary[["Average Math Score",
                                   "Average Reading Score",
                                   "% Passing Math",
                                   "% Passing Reading",
                                   "% Overall Passing" ]]
scores2_data_summary_format

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.828654,83.974082,93.952484,96.040317,90.136789
Medium (1000-2000),83.372682,83.867989,93.616522,96.773058,90.624267
Large (2000-5000),77.477597,81.198674,68.65238,82.125158,56.574046


In [60]:
# Reading table formatting using map
scores2_data_summary_format["Average Math Score"] = scores2_data_summary_format["Average Math Score"].map("{:.2f}".format)
scores2_data_summary_format["Average Reading Score"] = scores2_data_summary_format["Average Reading Score"].map("{:.2f}".format)
scores2_data_summary_format["% Passing Math"] = scores2_data_summary_format["% Passing Math"].map("{:.2f}%".format)
scores2_data_summary_format["% Passing Reading"] = scores2_data_summary_format["% Passing Reading"].map("{:.2f}%".format)
scores2_data_summary_format["% Overall Passing"] = scores2_data_summary_format["% Overall Passing"].map("{:.2f}%".format)

scores2_data_summary_format

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.83,83.97,93.95%,96.04%,90.14%
Medium (1000-2000),83.37,83.87,93.62%,96.77%,90.62%
Large (2000-5000),77.48,81.2,68.65%,82.13%,56.57%


## Scores by School Type

In [61]:
# Create df to use for the spending
scores3_data_all_df = school_data_complete2_df[["School ID", 
                                                           "School Name",
                                                           "School Type",
                                                           "School Size",
                                                           "School Budget",
                                                           "Per Student Budget",
                                                           "Student ID",
                                                           "Student Name", 
                                                           "Student Gender", 
                                                           "Student Grade", 
                                                           "Student Reading Score",
                                                           "Student Math Score",
                                                           "% Passing Reading",
                                                           "% Passing Math",
                                                           "% Overall Passing"  ]]
# scores_data_all_df

In [62]:
# Reduce the df
scores3_data_df = scores3_data_all_df[["School Type",
                                                           "Student ID",
                                                          "Student Reading Score",
                                                           "Student Math Score",
                                                          "% Passing Reading",
                                                          "% Passing Math",
                                                         "% Overall Passing" ]]
# scores3_data_df

In [63]:
# Group by spending ranges
scores3_data_grouped_df = scores3_data_df.groupby("School Type")
#scores3_data_grouped_df.max()

In [64]:
# Recalculate averages and percentages

mathaverage = scores3_data_grouped_df["Student Math Score"].mean()
readaverage = scores3_data_grouped_df["Student Reading Score"].mean()
mathpass = scores3_data_grouped_df["% Passing Math"].mean()*100
readpass = scores3_data_grouped_df["% Passing Reading"].mean() *100
mathreadpass = scores3_data_grouped_df["% Overall Passing"].mean() *100

In [65]:
# Create df for scores data summary
scores3_data_summary = pd.DataFrame({"Average Math Score" : mathaverage,
                                   "Average Reading Score": readaverage,
                                   "% Passing Math": mathpass,
                                   "% Passing Reading": readpass,
                                   "% Overall Passing" : mathreadpass })
# scores3_data_summary

In [66]:
# Format df
scores3_data_summary_format = scores3_data_summary[["Average Math Score",
                                   "Average Reading Score",
                                   "% Passing Math",
                                   "% Passing Reading",
                                   "% Overall Passing" ]]
scores3_data_summary_format

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.406183,83.902821,93.701821,96.645891,90.560932
District,76.987026,80.962485,66.518387,80.905249,53.695878


In [67]:
# Reading table formatting using map
scores3_data_summary_format["Average Math Score"] = scores3_data_summary_format["Average Math Score"].map("{:.2f}".format)
scores3_data_summary_format["Average Reading Score"] = scores3_data_summary_format["Average Reading Score"].map("{:.2f}".format)
scores3_data_summary_format["% Passing Math"] = scores3_data_summary_format["% Passing Math"].map("{:.2f}%".format)
scores3_data_summary_format["% Passing Reading"] = scores3_data_summary_format["% Passing Reading"].map("{:.2f}%".format)
scores3_data_summary_format["% Overall Passing"] = scores3_data_summary_format["% Overall Passing"].map("{:.2f}%".format)

scores3_data_summary_format

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.41,83.9,93.70%,96.65%,90.56%
District,76.99,80.96,66.52%,80.91%,53.70%
