In [1]:
 # Dependencies and Setup
import pandas as pd

In [2]:
# Files to Load
student_input = "Resources/students_complete.csv"
school_input = "Resources/schools_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
student_data = pd.read_csv(student_input)
school_data = pd.read_csv(school_input)

# Combine the data into a single dataset.  
df = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])


## District Summary

**Create a high-level snapshot, in a DataFrame, of the district's key metrics, including the following:**

- Total schools
- Total students
- Total budget
- Average math score
- Average reading score
- % passing math (the percentage of students who passed math)
- % passing reading (the percentage of students who passed reading)
- % overall passing (the percentage of students who passed math AND reading)

In [3]:
# calculate totals for schools and students
total_schools = len(df["school_name"].unique())
total_students = df["Student ID"].count()

# calculate the total budget
total_budget = school_data["budget"].sum()

In [4]:
# calculate average scores
avg_math = df["math_score"].mean()
avg_reading = df["reading_score"].mean()

In [5]:
# calculate % passing for math
pass_math = df.loc[(df["math_score"] >= 70.0)].math_score.count()
percent_pass_math = (pass_math / total_students) * 100

In [6]:
# calculate % passing for reading
pass_reading = df.loc[(df["reading_score"] >= 70.0)].reading_score.count()
percent_pass_reading = (pass_reading / total_students) * 100

In [7]:
# calculate % passing for both
pass_both = df.loc[(df["math_score"] >= 70.0) & (df["reading_score"] >= 70.0)].reading_score.count()
percent_pass_both = (pass_both / total_students) * 100

In [8]:
# create new df using calculations completed above
district_df = pd.DataFrame(columns=('Total Schools','Total Students','Total Budget','Average Math Score','Average Reading Score','% Passing Math','% Passing Reading','% Overall Passing'))
district_df.loc[0] = [total_schools,total_students,total_budget,avg_math,avg_reading,percent_pass_math,percent_pass_reading,percent_pass_both]

In [9]:
# format values
district_df["Total Schools"] = district_df["Total Schools"].astype(int)
district_df["Total Students"] = district_df["Total Students"].map("{:,.0f}".format)
district_df["Total Budget"] = district_df["Total Budget"].map("${:,.2f}".format)
district_df["Average Math Score"] = district_df["Average Math Score"].map("{:,.2f}".format)
district_df["Average Reading Score"] = district_df["Average Reading Score"].map("{:,.2f}".format)
district_df["% Passing Math"] = district_df["% Passing Math"].map("{:.2f}%".format)
district_df["% Passing Reading"] = district_df["% Passing Reading"].map("{:.2f}%".format)
district_df["% Overall Passing"] = district_df["% Overall Passing"].map("{:.2f}%".format)


In [47]:
district_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,65.17%


## School Summary

**Create a DataFrame that summarizes key metrics about each school, including the following:**

- School name
- School type
- Total students
- Total school budget
- Per student budget
- Average math score
- Average reading score
- % passing math (the percentage of students who passed math)
- % passing reading (the percentage of students who passed reading)
- % overall passing (the percentage of students who passed math AND reading)

In [12]:
# group by school / school name is index
# used groupby / aggregate to count student names and find avg for math and reading scores
studentcount_avgs_df = df.set_index(["school_name"]).sort_index()
studentcount_avgs_df = studentcount_avgs_df.groupby("school_name").agg({'student_name': 'count', 'math_score': 'mean', 'reading_score': 'mean'})
studentcount_avgs_df = studentcount_avgs_df.rename(columns={"student_name":"Total Students", "math_score":"Average Math Score", "reading_score":"Average Reading Score"})

In [13]:
# create df with school name as index, with school type and budget
grouped_school_data_df = school_data.set_index(["school_name"]).sort_index()
grouped_school_data_df = grouped_school_data_df[(["type", "budget"])]
grouped_school_data_df = grouped_school_data_df.rename(columns={"type":"School Type", "budget": "Total School Budget"})

# merge this data pulled from school_data w/ other merged dataframes
merged_dfs = pd.merge(grouped_school_data_df,studentcount_avgs_df, on="school_name")


In [14]:
# create mask to only include data with students who passed math
passed_math_df = df.loc[(df["math_score"] >= 70.0)]
# create df with school name as index, with count of test scores 70 or higher in math
math_grouped_df = passed_math_df.groupby("school_name").agg({'math_score': 'count'})
math_grouped_df = math_grouped_df.rename(columns={"math_score":"count_passed_math"})

# create mask to only include data with students who passed reading
passed_reading_df = df.loc[(df["reading_score"] >= 70.0)]
# create df with school name as index, with count of test scores 70 or higher in reading
reading_grouped_df = passed_reading_df.groupby("school_name").agg({'reading_score': 'count'})
reading_grouped_df = reading_grouped_df.rename(columns={"reading_score":"count_passed_reading"})

# create mask to only include data with students who passed both math & reading
passed_both_df = df.loc[(df["math_score"] >= 70.0) & (df["reading_score"] >= 70.0)]
# create df with school name as index, with count of test scores 70 or higher in both math & reading
both_grouped_df = passed_both_df.groupby("school_name").agg({'reading_score': 'count'})
both_grouped_df = both_grouped_df.rename(columns={"reading_score":"count_passed_overall"})


In [15]:
# merge dataframes created so far
merged_dfs = pd.merge(merged_dfs, math_grouped_df,on="school_name").join(reading_grouped_df, on="school_name").join(both_grouped_df, on="school_name")
#merged_dfs = pd.merge(merged_dfs,reading_grouped_df, on="school_name")
#merged_dfs = pd.merge(merged_dfs,both_grouped_df, on="school_name")

# reorder
merged_dfs = merged_dfs[["School Type", "Total Students", "Total School Budget", "Average Math Score","Average Reading Score", "count_passed_math", "count_passed_reading", "count_passed_overall"]]

In [16]:
# add new column, calculating per student budget
merged_dfs["Per Student Budget"] = merged_dfs["Total School Budget"] / merged_dfs["Total Students"]

# add new column, calculating % passed math
merged_dfs["% Passing Math"] = (merged_dfs["count_passed_math"] / merged_dfs["Total Students"]) * 100

# add new column, calculating % passed reading
merged_dfs["% Passing Reading"] = (merged_dfs["count_passed_reading"] / merged_dfs["Total Students"]) * 100

# add new column, calculating % passed math & reading
merged_dfs["% Overall Passing"] = (merged_dfs["count_passed_overall"] / merged_dfs["Total Students"]) * 100

In [17]:
# remove extra columns
school_summary_df = merged_dfs.drop(columns=["count_passed_math","count_passed_reading","count_passed_overall"])

# rearrange columns
formatted_school_summary_df = school_summary_df[["School Type", "Total Students", "Total School Budget", "Per Student Budget", "Average Math Score","Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]]

In [18]:
# format columns
formatted_school_summary_df["Total Students"] = formatted_school_summary_df["Total Students"].map("{:,.0f}".format)
formatted_school_summary_df["Total School Budget"] = formatted_school_summary_df["Total School Budget"].map("${:,.2f}".format)
formatted_school_summary_df["Per Student Budget"] = formatted_school_summary_df["Per Student Budget"].map("${:,.2f}".format)
formatted_school_summary_df["Average Math Score"] = formatted_school_summary_df["Average Math Score"].map("{:,.2f}".format)
formatted_school_summary_df["Average Reading Score"] = formatted_school_summary_df["Average Reading Score"].map("{:,.2f}".format)
formatted_school_summary_df["% Passing Math"] = formatted_school_summary_df["% Passing Math"].map("{:.2f}%".format)
formatted_school_summary_df["% Passing Reading"] = formatted_school_summary_df["% Passing Reading"].map("{:.2f}%".format)
formatted_school_summary_df["% Overall Passing"] = formatted_school_summary_df["% Overall Passing"].map("{:.2f}%".format)

formatted_school_summary_df.index.name = None

In [19]:
formatted_school_summary_df

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.05,81.03,66.68%,81.93%,54.64%
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.06,83.98,94.13%,97.04%,91.33%
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.71,81.16,65.99%,80.74%,53.20%
Ford High School,District,2739,"$1,763,916.00",$644.00,77.1,80.75,68.31%,79.30%,54.29%
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.35,83.82,93.39%,97.14%,90.60%
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.29,80.93,66.75%,80.86%,53.53%
Holden High School,Charter,427,"$248,087.00",$581.00,83.8,83.81,92.51%,96.25%,89.23%
Huang High School,District,2917,"$1,910,635.00",$655.00,76.63,81.18,65.68%,81.32%,53.51%
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.07,80.97,66.06%,81.22%,53.54%
Pena High School,Charter,962,"$585,858.00",$609.00,83.84,84.04,94.59%,95.95%,90.54%


## Highest-Performing Schools (by % Overall Passing)

Create a DataFrame that highlights the top 5 performing schools based on % Overall Passing. Include the following metrics:

* School name
* School type
* Total students
* Total school budget
* Per student budget
* Average math score
* Average reading score
* % passing math (the percentage of students who passed math)
* % passing reading (the percentage of students who passed reading)
* % overall passing (the percentage of students who passed math AND reading)

In [20]:
#high_sorted_school_summary_df = school_summary_df.sort_values('% Overall Passing', ascending=False).head(5)
high_sorted_school_summary_df = school_summary_df.sort_values('% Overall Passing', ascending=False).iloc[:5,:]
high_sorted_school_summary_df = high_sorted_school_summary_df[["School Type", "Total Students", "Total School Budget", "Per Student Budget", "Average Math Score","Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]]

In [21]:
# formatting values
high_sorted_school_summary_df["Total Students"] = high_sorted_school_summary_df["Total Students"].map("{:,.0f}".format)
high_sorted_school_summary_df["Total School Budget"] = high_sorted_school_summary_df["Total School Budget"].map("${:,.2f}".format)
high_sorted_school_summary_df["Per Student Budget"] = high_sorted_school_summary_df["Per Student Budget"].map("${:,.2f}".format)
high_sorted_school_summary_df["Average Math Score"] = high_sorted_school_summary_df["Average Math Score"].map("{:,.2f}".format)
high_sorted_school_summary_df["Average Reading Score"] = high_sorted_school_summary_df["Average Reading Score"].map("{:,.2f}".format)
high_sorted_school_summary_df["% Passing Math"] = high_sorted_school_summary_df["% Passing Math"].map("{:.2f}%".format)
high_sorted_school_summary_df["% Passing Reading"] = high_sorted_school_summary_df["% Passing Reading"].map("{:.2f}%".format)
high_sorted_school_summary_df["% Overall Passing"] = high_sorted_school_summary_df["% Overall Passing"].map("{:.2f}%".format)


In [22]:
high_sorted_school_summary_df

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.06,83.98,94.13%,97.04%,91.33%
Thomas High School,Charter,1635,"$1,043,130.00",$638.00,83.42,83.85,93.27%,97.31%,90.95%
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.35,83.82,93.39%,97.14%,90.60%
Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.27,83.99,93.87%,96.54%,90.58%
Pena High School,Charter,962,"$585,858.00",$609.00,83.84,84.04,94.59%,95.95%,90.54%


## Lowest-Performing Schools (by % Overall Passing)

Create a DataFrame that highlights the bottom 5 performing schools based on % Overall Passing. Include the following metrics:

* School name
* School type
* Total students
* Total school budget
* Per student budget
* Average math score
* Average reading score
* % passing math (the percentage of students who passed math)
* % passing reading (the percentage of students who passed reading)
* % overall passing (the percentage of students who passed math AND reading)

In [23]:
low_sorted_school_summary_df = school_summary_df.sort_values('% Overall Passing').iloc[:5,:]
low_sorted_school_summary_df = low_sorted_school_summary_df[["School Type", "Total Students", "Total School Budget", "Per Student Budget", "Average Math Score","Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]]

In [24]:
# formatting values
low_sorted_school_summary_df["Total Students"] = low_sorted_school_summary_df["Total Students"].map("{:,.0f}".format)
low_sorted_school_summary_df["Total School Budget"] = low_sorted_school_summary_df["Total School Budget"].map("${:,.2f}".format)
low_sorted_school_summary_df["Per Student Budget"] = low_sorted_school_summary_df["Per Student Budget"].map("${:,.2f}".format)
low_sorted_school_summary_df["Average Math Score"] = low_sorted_school_summary_df["Average Math Score"].map("{:,.2f}".format)
low_sorted_school_summary_df["Average Reading Score"] = low_sorted_school_summary_df["Average Reading Score"].map("{:,.2f}".format)
low_sorted_school_summary_df["% Passing Math"] = low_sorted_school_summary_df["% Passing Math"].map("{:.2f}%".format)
low_sorted_school_summary_df["% Passing Reading"] = low_sorted_school_summary_df["% Passing Reading"].map("{:.2f}%".format)
low_sorted_school_summary_df["% Overall Passing"] = low_sorted_school_summary_df["% Overall Passing"].map("{:.2f}%".format)

In [26]:
low_sorted_school_summary_df

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Rodriguez High School,District,3999,"$2,547,363.00",$637.00,76.84,80.74,66.37%,80.22%,52.99%
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.71,81.16,65.99%,80.74%,53.20%
Huang High School,District,2917,"$1,910,635.00",$655.00,76.63,81.18,65.68%,81.32%,53.51%
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.29,80.93,66.75%,80.86%,53.53%
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.07,80.97,66.06%,81.22%,53.54%


## Math Scores by Grade

Create a DataFrame that lists the average math score for students of each grade level (9th, 10th, 11th, 12th) at each school.

In [27]:
#create df for each grade level
math_mask9th = df[df.grade == "9th"].groupby("school_name").agg({"math_score":"mean"}).rename(columns={"math_score": "9th"})
math_mask10th = df[df.grade == "10th"].groupby("school_name").agg({"math_score":"mean"}).rename(columns={"math_score": "10th"})
math_mask11th = df[df.grade == "11th"].groupby("school_name").agg({"math_score":"mean"}).rename(columns={"math_score": "11th"})
math_mask12th = df[df.grade == "12th"].groupby("school_name").agg({"math_score":"mean"}).rename(columns={"math_score": "12th"})

#merge grade levels data into one df
math_merged_grades = pd.merge(math_mask9th,math_mask10th, on="school_name").join(math_mask11th,on="school_name").join(math_mask12th,on="school_name")
math_merged_grades.index.name = None


In [28]:
# formatting values
math_merged_grades["9th"] = math_merged_grades["9th"].map("{:,.2f}".format)
math_merged_grades["10th"] = math_merged_grades["10th"].map("{:,.2f}".format)
math_merged_grades["11th"] = math_merged_grades["11th"].map("{:,.2f}".format)
math_merged_grades["12th"] = math_merged_grades["12th"].map("{:,.2f}".format)


In [49]:
math_merged_grades

Unnamed: 0,9th,10th,11th,12th
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86
Pena High School,83.63,83.37,84.33,84.12


## Reading Scores by Grade

Create a DataFrame that lists the average reading score for students of each grade level (9th, 10th, 11th, 12th) at each school.

In [30]:
#create df for each grade level
reading_math_mask9th = df[df.grade == "9th"].groupby("school_name").agg({"reading_score":"mean"}).rename(columns={"reading_score": "9th"})
reading_math_mask10th = df[df.grade == "10th"].groupby("school_name").agg({"reading_score":"mean"}).rename(columns={"reading_score": "10th"})
reading_math_mask11th = df[df.grade == "11th"].groupby("school_name").agg({"reading_score":"mean"}).rename(columns={"reading_score": "11th"})
reading_math_mask12th = df[df.grade == "12th"].groupby("school_name").agg({"reading_score":"mean"}).rename(columns={"reading_score": "12th"})

#merge grade levels data into one df
reading_merged_grades = pd.merge(reading_math_mask9th,reading_math_mask10th, on="school_name").join(reading_math_mask11th,on="school_name").join(reading_math_mask12th,on="school_name")
reading_merged_grades.index.name = None

In [31]:
# formatting values
reading_merged_grades["9th"] = reading_merged_grades["9th"].map("{:,.2f}".format)
reading_merged_grades["10th"] = reading_merged_grades["10th"].map("{:,.2f}".format)
reading_merged_grades["11th"] = reading_merged_grades["11th"].map("{:,.2f}".format)
reading_merged_grades["12th"] = reading_merged_grades["12th"].map("{:,.2f}".format)


In [32]:
reading_merged_grades

Unnamed: 0,9th,10th,11th,12th
Bailey High School,81.3,80.91,80.95,80.91
Cabrera High School,83.68,84.25,83.79,84.29
Figueroa High School,81.2,81.41,80.64,81.38
Ford High School,80.63,81.26,80.4,80.66
Griffin High School,83.37,83.71,84.29,84.01
Hernandez High School,80.87,80.66,81.4,80.86
Holden High School,83.68,83.32,83.82,84.7
Huang High School,81.29,81.51,81.42,80.31
Johnson High School,81.26,80.77,80.62,81.23
Pena High School,83.81,83.61,84.34,84.59


## Scores by School Spending

Create a table that breaks down school performance based on average spending ranges (per student). Use your judgment to create four bins with reasonable cutoff values to group school spending. Include the following metrics in the table:

* Average math score
* Average reading score
* % passing math (the percentage of students who passed math)
* % passing reading (the percentage of students who passed reading)
* % overall passing (the percentage of students who passed math AND reading)

In [33]:
# using df created above as starting point, setting with new name
scores_by_spending_df = school_summary_df[["School Type", "Total Students", "Total School Budget", "Per Student Budget", "Average Math Score","Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]]


In [34]:
# create bins for values
bins = 0, 585, 630, 645, 680
# create labels for bins
labels = "<$585", "$585-630", "$630-645", "$645-680"

In [35]:
# Slice the data and place it into bins
scores_by_spending_df["Spending Ranges (Per Student)"] = pd.cut(scores_by_spending_df["Per Student Budget"], bins, labels=labels,include_lowest=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  scores_by_spending_df["Spending Ranges (Per Student)"] = pd.cut(scores_by_spending_df["Per Student Budget"], bins, labels=labels,include_lowest=True)


In [36]:
# resetting index to be the new bin labels, sorting low to high
scores_by_spending_df = scores_by_spending_df.set_index(["Spending Ranges (Per Student)"]).sort_index()
# calculating avg of amounts within each bin
scores_by_spending_df = scores_by_spending_df.groupby("Spending Ranges (Per Student)").agg({'Average Math Score': 'mean', 'Average Reading Score': 'mean', '% Passing Math':'mean', '% Passing Reading': 'mean','% Overall Passing': 'mean'})


In [37]:
# formatting to 2 places after the decimal
scores_by_spending_df["Average Math Score"] = scores_by_spending_df["Average Math Score"].map("{:,.2f}".format)
scores_by_spending_df["Average Reading Score"] = scores_by_spending_df["Average Reading Score"].map("{:,.2f}".format)
scores_by_spending_df["% Passing Math"] = scores_by_spending_df["% Passing Math"].map("{:,.2f}".format)
scores_by_spending_df["% Passing Reading"] = scores_by_spending_df["% Passing Reading"].map("{:,.2f}".format)
scores_by_spending_df["% Overall Passing"] = scores_by_spending_df["% Overall Passing"].map("{:,.2f}".format)

scores_by_spending_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,83.46,83.93,93.46,96.61,90.37
$585-630,81.9,83.16,87.13,92.72,81.42
$630-645,78.52,81.62,73.48,84.39,62.86
$645-680,77.0,81.03,66.16,81.13,53.53


## Scores by School Size

Create a table that breaks down school performance based on school size (small, medium, or large).

In [38]:
# using df created above as starting point, setting with new name
scores_by_size_df = school_summary_df[["Total Students", "Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]]



In [39]:
# create bins for values
bins = 0, 1000, 2000, 5000
# create labels for bins
labels = "Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"

In [40]:
# Slice the data and place it into bins
scores_by_size_df["School Size"] = pd.cut(scores_by_size_df["Total Students"], bins, labels=labels,include_lowest=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  scores_by_size_df["School Size"] = pd.cut(scores_by_size_df["Total Students"], bins, labels=labels,include_lowest=True)


In [41]:
# resetting index to be the new bin labels, sorting descending
scores_by_size_df = scores_by_size_df.set_index(["School Size"]).sort_index()
# calculating avg of amounts within each bin
scores_by_size_df = scores_by_size_df.groupby("School Size").agg({'Average Math Score': 'mean', 'Average Reading Score': 'mean', '% Passing Math':'mean', '% Passing Reading': 'mean','% Overall Passing': 'mean'})

In [42]:
# formatting to 2 places after the decimal
scores_by_size_df["Average Math Score"] = scores_by_size_df["Average Math Score"].map("{:,.2f}".format)
scores_by_size_df["Average Reading Score"] = scores_by_size_df["Average Reading Score"].map("{:,.2f}".format)
scores_by_size_df["% Passing Math"] = scores_by_size_df["% Passing Math"].map("{:,.2f}".format)
scores_by_size_df["% Passing Reading"] = scores_by_size_df["% Passing Reading"].map("{:,.2f}".format)
scores_by_size_df["% Overall Passing"] = scores_by_size_df["% Overall Passing"].map("{:,.2f}".format)

scores_by_size_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.82,83.93,93.55,96.1,89.88
Medium (1000-2000),83.37,83.86,93.6,96.79,90.62
Large (2000-5000),77.75,81.34,69.96,82.77,58.29


## Scores by School Type

Create a table that breaks down school performance based on type of school (district or charter).

In [43]:
# using df created above as starting point, setting with new name
scores_by_type_df = school_summary_df[["School Type", "Total Students", "Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]]


In [44]:
# resetting index to be the new bin labels, sorting descending
scores_by_type_df = scores_by_type_df.set_index(["School Type"]).sort_index()
# calculating avg of amounts within each bin
scores_by_type_df = scores_by_type_df.groupby("School Type").agg({'Average Math Score': 'mean', 'Average Reading Score': 'mean', '% Passing Math':'mean', '% Passing Reading': 'mean','% Overall Passing': 'mean'})

In [45]:
# formatting to 2 places after the decimal
scores_by_type_df["Average Math Score"] = scores_by_type_df["Average Math Score"].map("{:,.2f}".format)
scores_by_type_df["Average Reading Score"] = scores_by_type_df["Average Reading Score"].map("{:,.2f}".format)
scores_by_type_df["% Passing Math"] = scores_by_type_df["% Passing Math"].map("{:,.2f}".format)
scores_by_type_df["% Passing Reading"] = scores_by_type_df["% Passing Reading"].map("{:,.2f}".format)
scores_by_type_df["% Overall Passing"] = scores_by_type_df["% Overall Passing"].map("{:,.2f}".format)

scores_by_type_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.47,83.9,93.62,96.59,90.43
District,76.96,80.97,66.55,80.8,53.67
