### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Calculate the percentage of students who passed math **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [2]:
# Calculate number of schools and number of students
total_schools = len(school_data_complete["school_name"].unique())
total_students = len(school_data_complete["Student ID"].unique())

# Calculate total budget (using school data dataframe only, as budget is per school not per student)
total_budget = school_data["budget"].sum(axis=0)

# Calculate average math and reading scores
avg_math = school_data_complete["math_score"].mean()
avg_reading = school_data_complete["reading_score"].mean()

# Filter by math sores >= 70, calculate number of passing scores, divide by total students to get %
passing_math_df = school_data_complete.loc[school_data_complete["math_score"] >= 70]
passing_math_count = passing_math_df["Student ID"].count()
passing_math_pcnt = passing_math_count / total_students

# Filter by reading scores >=70, calculate number of passing scores, divide by total students to get % 
passing_reading_df = school_data_complete.loc[school_data_complete["reading_score"] >= 70]
passing_reading_count = passing_reading_df["Student ID"].count()
passing_reading_pcnt = passing_reading_count / total_students

# Filter by math scores >= 70 AND reading scores >=70, calculate number of passing scores, divide by total students to get % 
passing_overall_df = school_data_complete.loc[(school_data_complete["math_score"] >= 70) & 
                                                      (school_data_complete["reading_score"] >= 70)]
passing_overall_count = passing_overall_df["Student ID"].count()
passing_overall_pcnt = passing_overall_count / total_students

In [3]:
# Create summary data frame
summary_df = pd.DataFrame({"Total Schools": [total_schools],
                           "Total Students": [total_students],
                          "Total Budget": [total_budget],
                          "Average Math Score": [avg_math],
                          "Average Reading Score": [avg_reading],
                          "% Passing Math": [passing_math_pcnt],
                          "% Passing Reading": [passing_reading_pcnt],
                          "% Passing Overall": [passing_overall_pcnt]})


# Format colulmns in data frame
summary_df["Total Students"] = summary_df["Total Students"].astype(float).map("{:,}".format)
summary_df["Total Budget"] = summary_df["Total Budget"].astype(float).map("${:,}".format)
summary_df["Average Math Score"] = summary_df["Average Math Score"].astype(float).map("{:.2f}".format)
summary_df["Average Reading Score"] = summary_df["Average Reading Score"].astype(float).map("{:.2f}".format)
summary_df["% Passing Math"] = summary_df["% Passing Math"].astype(float).map("{:.2%}".format)
summary_df["% Passing Reading"] = summary_df["% Passing Reading"].astype(float).map("{:.2%}".format)
summary_df["% Passing Overall"] = summary_df["% Passing Overall"].astype(float).map("{:.2%}".format)

summary_df
                

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
0,15,39170.0,"$24,649,428.0",78.99,81.88,74.98%,85.81%,65.17%


## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * % Overall Passing (The percentage of students that passed math **and** reading.)
  
* Create a dataframe to hold the above results

In [4]:
# Create a data frame that is grouped by school
school_grouped_df = school_data_complete.groupby(["school_name"])

# Calculate number of students per school
sch_total_students = school_grouped_df["Student ID"].count()

# Calculate school budget and budget per student
sch_budget = school_grouped_df["budget"].mean()
sch_budget_per_student = sch_budget / sch_total_students

# Calculate average math and reading scores per school
sch_avg_math = school_grouped_df["math_score"].mean()
sch_avg_reading = school_grouped_df["reading_score"].mean()

# Group by school the previously created dataframe that filters on math scores >= 70
passing_math_sch_grouped_df = passing_math_df.groupby(["school_name"])
sch_passing_math_count = passing_math_sch_grouped_df["Student ID"].count()
sch_passing_math_pcnt = sch_passing_math_count / sch_total_students

# Group by school the previously created dataframe that filters on reading scores >= 70
passing_reading_sch_grouped_df = passing_reading_df.groupby(["school_name"])
sch_passing_reading_count = passing_reading_sch_grouped_df["Student ID"].count()
sch_passing_reading_pcnt = sch_passing_reading_count / sch_total_students

# Group by school the previously created dataframe that filters on math AND reading scores >= 70
passing_overall_sch_grouped_df = passing_overall_df.groupby(["school_name"])
sch_passing_overall_count = passing_overall_sch_grouped_df["Student ID"].count()
sch_passing_overall_pcnt = sch_passing_overall_count / sch_total_students

In [6]:
# Create summary data frame of calculated values
school_summary_df = pd.DataFrame({"Total Students": sch_total_students,
                                  "Total School Budget": sch_budget,
                                  "Per Student Budget": sch_budget_per_student,
                                  "Average Math Score": sch_avg_math,
                                  "Average Reading Score": sch_avg_reading,
                                  "% Passing Math": sch_passing_math_pcnt,
                                  "% Passing Reading": sch_passing_reading_pcnt,
                                  "% Passing Overall": sch_passing_overall_pcnt})

# Create a copy of the school dataframe with just school name and school type
school_data_reduced = school_data[["school_name", "type"]]

# Combine summary data frame and school data frame copy to pull in "type" column
school_summary_df = pd.merge(school_summary_df, school_data_reduced, on="school_name")

# Rename "type" column to "School Type"
school_summary_df = school_summary_df.rename(columns={"type": "School Type"})

# Set index to school name and remove index name
school_summary_df = school_summary_df.set_index("school_name")
school_summary_df.index.name = None

# Re-order columns
school_summary_df = school_summary_df[["School Type", "Total Students", "Total School Budget", "Per Student Budget", 
                                      "Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading",
                                      "% Passing Overall"]]


# Format colulmns in data frame
school_summary_df["Total Students"] = school_summary_df["Total Students"].astype(float).map("{:,}".format)
school_summary_df["Total School Budget"] = school_summary_df["Total School Budget"].astype(float).map("${:,.2f}".format)
school_summary_df["Per Student Budget"] = school_summary_df["Per Student Budget"].astype(float).map("${:,.2f}".format)
school_summary_df["Average Math Score"] = school_summary_df["Average Math Score"].astype(float).map("{:.2f}".format)
school_summary_df["Average Reading Score"] = school_summary_df["Average Reading Score"].astype(float).map("{:.2f}".format)
school_summary_df["% Passing Math"] = school_summary_df["% Passing Math"].astype(float).map("{:.2%}".format)
school_summary_df["% Passing Reading"] = school_summary_df["% Passing Reading"].astype(float).map("{:.2%}".format)
school_summary_df["% Passing Overall"] = school_summary_df["% Passing Overall"].astype(float).map("{:.2%}".format)

# Display summary data frame
school_summary_df

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
Bailey High School,District,4976.0,"$3,124,928.00",$628.00,77.05,81.03,66.68%,81.93%,54.64%
Cabrera High School,Charter,1858.0,"$1,081,356.00",$582.00,83.06,83.98,94.13%,97.04%,91.33%
Figueroa High School,District,2949.0,"$1,884,411.00",$639.00,76.71,81.16,65.99%,80.74%,53.20%
Ford High School,District,2739.0,"$1,763,916.00",$644.00,77.1,80.75,68.31%,79.30%,54.29%
Griffin High School,Charter,1468.0,"$917,500.00",$625.00,83.35,83.82,93.39%,97.14%,90.60%
Hernandez High School,District,4635.0,"$3,022,020.00",$652.00,77.29,80.93,66.75%,80.86%,53.53%
Holden High School,Charter,427.0,"$248,087.00",$581.00,83.8,83.81,92.51%,96.25%,89.23%
Huang High School,District,2917.0,"$1,910,635.00",$655.00,76.63,81.18,65.68%,81.32%,53.51%
Johnson High School,District,4761.0,"$3,094,650.00",$650.00,77.07,80.97,66.06%,81.22%,53.54%
Pena High School,Charter,962.0,"$585,858.00",$609.00,83.84,84.04,94.59%,95.95%,90.54%


## Top Performing Schools (By % Overall Passing)

* Sort and display the top five performing schools by % overall passing.

In [7]:
schools_sorted_top = school_summary_df.sort_values("% Passing Overall", ascending=False)
schools_sorted_top.head()

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
Cabrera High School,Charter,1858.0,"$1,081,356.00",$582.00,83.06,83.98,94.13%,97.04%,91.33%
Thomas High School,Charter,1635.0,"$1,043,130.00",$638.00,83.42,83.85,93.27%,97.31%,90.95%
Griffin High School,Charter,1468.0,"$917,500.00",$625.00,83.35,83.82,93.39%,97.14%,90.60%
Wilson High School,Charter,2283.0,"$1,319,574.00",$578.00,83.27,83.99,93.87%,96.54%,90.58%
Pena High School,Charter,962.0,"$585,858.00",$609.00,83.84,84.04,94.59%,95.95%,90.54%


## Bottom Performing Schools (By % Overall Passing)

* Sort and display the five worst-performing schools by % overall passing.

In [8]:
schools_sorted_bottom = school_summary_df.sort_values("% Passing Overall")
schools_sorted_bottom.head()

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
Rodriguez High School,District,3999.0,"$2,547,363.00",$637.00,76.84,80.74,66.37%,80.22%,52.99%
Figueroa High School,District,2949.0,"$1,884,411.00",$639.00,76.71,81.16,65.99%,80.74%,53.20%
Huang High School,District,2917.0,"$1,910,635.00",$655.00,76.63,81.18,65.68%,81.32%,53.51%
Hernandez High School,District,4635.0,"$3,022,020.00",$652.00,77.29,80.93,66.75%,80.86%,53.53%
Johnson High School,District,4761.0,"$3,094,650.00",$650.00,77.07,80.97,66.06%,81.22%,53.54%


## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

In [9]:
# Create data frame for each grade with math score and school name
grade_nine_math = school_data_complete[["math_score", "school_name"]].loc[school_data_complete["grade"] == "9th"]
grade_ten_math = school_data_complete[["math_score", "school_name"]].loc[school_data_complete["grade"] == "10th"]
grade_eleven_math = school_data_complete[["math_score", "school_name"]].loc[school_data_complete["grade"] == "11th"]
grade_twelve_math = school_data_complete[["math_score", "school_name"]].loc[school_data_complete["grade"] == "12th"]

# Group each data frame by school / calculate average score
sch_grade_nine_math = grade_nine_math.groupby("school_name").mean()
sch_grade_ten_math = grade_ten_math.groupby("school_name").mean()
sch_grade_eleven_math = grade_eleven_math.groupby("school_name").mean()
sch_grade_twelve_math = grade_twelve_math.groupby("school_name").mean()

# Merge data frames together one at a time, renaming columns as we go
math_by_grade = pd.merge(sch_grade_nine_math, sch_grade_ten_math, on=["school_name"])
math_by_grade = math_by_grade.rename(columns={"math_score_x": "9th", "math_score_y": "10th"})
math_by_grade = pd.merge(math_by_grade, sch_grade_eleven_math, on=["school_name"])
math_by_grade = math_by_grade.rename(columns={"math_score": "11th"})
math_by_grade = pd.merge(math_by_grade, sch_grade_twelve_math, on=["school_name"])
math_by_grade = math_by_grade.rename(columns={"math_score": "12th"})

# Apply formatting
math_by_grade.index.name = None
math_by_grade["9th"] = math_by_grade["9th"].astype(float).map("{:.2f}".format)
math_by_grade["10th"] = math_by_grade["10th"].astype(float).map("{:.2f}".format)
math_by_grade["11th"] = math_by_grade["11th"].astype(float).map("{:.2f}".format)
math_by_grade["12th"] = math_by_grade["12th"].astype(float).map("{:.2f}".format)

# Display data frame
math_by_grade

Unnamed: 0,9th,10th,11th,12th
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86
Pena High School,83.63,83.37,84.33,84.12


## Reading Score by Grade 

* Perform the same operations as above for reading scores

In [10]:
# Create data frame for each grade with reading score and school name
grade_nine_reading = school_data_complete[["reading_score", "school_name"]].loc[school_data_complete["grade"] == "9th"]
grade_ten_reading = school_data_complete[["reading_score", "school_name"]].loc[school_data_complete["grade"] == "10th"]
grade_eleven_reading = school_data_complete[["reading_score", "school_name"]].loc[school_data_complete["grade"] == "11th"]
grade_twelve_reading = school_data_complete[["reading_score", "school_name"]].loc[school_data_complete["grade"] == "12th"]

# Group each data frame by school / calculate average score
sch_grade_nine_reading = grade_nine_reading.groupby("school_name").mean()
sch_grade_ten_reading = grade_ten_reading.groupby("school_name").mean()
sch_grade_eleven_reading = grade_eleven_reading.groupby("school_name").mean()
sch_grade_twelve_reading = grade_twelve_reading.groupby("school_name").mean()

# Merge data frames together one at a time, renaming columns as we go
reading_by_grade = pd.merge(sch_grade_nine_reading, sch_grade_ten_reading, on=["school_name"])
reading_by_grade = reading_by_grade.rename(columns={"reading_score_x": "9th", "reading_score_y": "10th"})
reading_by_grade = pd.merge(reading_by_grade, sch_grade_eleven_reading, on=["school_name"])
reading_by_grade = reading_by_grade.rename(columns={"reading_score": "11th"})
reading_by_grade = pd.merge(reading_by_grade, sch_grade_twelve_reading, on=["school_name"])
reading_by_grade = reading_by_grade.rename(columns={"reading_score": "12th"})

# Apply formatting
reading_by_grade.index.name = None
reading_by_grade["9th"] = reading_by_grade["9th"].astype(float).map("{:.2f}".format)
reading_by_grade["10th"] = reading_by_grade["10th"].astype(float).map("{:.2f}".format)
reading_by_grade["11th"] = reading_by_grade["11th"].astype(float).map("{:.2f}".format)
reading_by_grade["12th"] = reading_by_grade["12th"].astype(float).map("{:.2f}".format)

# Display data frame
reading_by_grade

Unnamed: 0,9th,10th,11th,12th
Bailey High School,81.3,80.91,80.95,80.91
Cabrera High School,83.68,84.25,83.79,84.29
Figueroa High School,81.2,81.41,80.64,81.38
Ford High School,80.63,81.26,80.4,80.66
Griffin High School,83.37,83.71,84.29,84.01
Hernandez High School,80.87,80.66,81.4,80.86
Holden High School,83.68,83.32,83.82,84.7
Huang High School,81.29,81.51,81.42,80.31
Johnson High School,81.26,80.77,80.62,81.23
Pena High School,83.81,83.61,84.34,84.59


## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [11]:
# Determine min and max

print(school_summary_df["Per Student Budget"].min())
print(school_summary_df["Per Student Budget"].max())

$578.00
$655.00


In [12]:
# Make a copy of the original data frame
school_spending_df = school_data_complete

# Add columns to data frame with boolean values showing whether student passed math, passed reading, and passed overall
school_spending_df["pass_math"] = school_spending_df["math_score"] >= 70
school_spending_df["pass_reading"] = school_spending_df["reading_score"] >= 70
school_spending_df["pass_overall"] = (school_spending_df["math_score"] >= 70) & (school_spending_df["reading_score"] >= 70)

# Create a new column for budget per student
school_spending_df["budget_per_student"] = school_spending_df["budget"] / school_spending_df["size"]

# Create four bins of equal size and group names
bins = [578, 597.25, 616.5, 635.75, 655]
groups = ["<$597.25", "$597.25-616.5", "$616.5-637.5", "$637.5-655"]

# Slice the data into bins based on budget per student
school_spending_df["budget_range"] = pd.cut(school_spending_df["budget_per_student"], bins, labels=groups, include_lowest = True)

# Create a groupby object based on the budget range
school_spending_df = school_spending_df.groupby("budget_range")

# Get the average math and reading scores as well as % passing math, reading, and overall
# Note that since the pass_math, pass_reading, and pass_overal columns are booleans where True=1 and False=0, taking the mean
# will give us the % that passed
school_spending_summary = school_spending_df[["math_score", "reading_score", "pass_math", "pass_reading", "pass_overall"]].mean()

# Rename columns
school_spending_summary = school_spending_summary.rename(columns={"math_score": "Average Math Score", 
                                                                  "reading_score": "Average Reading Score",
                                                                  "pass_math": "% Passing Math", 
                                                                  "pass_reading": "% Passing Reading",
                                                                  "pass_overall": "% Passing Overall"})
                                                                 
# Format colulmns
school_spending_summary["Average Math Score"] = school_spending_summary["Average Math Score"].astype(float).map("{:.2f}".format)
school_spending_summary["Average Reading Score"] = school_spending_summary["Average Reading Score"].astype(float).map("{:.2f}".format)
school_spending_summary["% Passing Math"] = school_spending_summary["% Passing Math"].astype(float).map("{:.2%}".format)
school_spending_summary["% Passing Reading"] = school_spending_summary["% Passing Reading"].astype(float).map("{:.2%}".format)
school_spending_summary["% Passing Overall"] = school_spending_summary["% Passing Overall"].astype(float).map("{:.2%}".format)

# Display data frame
school_spending_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
budget_range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$597.25,83.36,83.96,93.70%,96.69%,90.64%
$597.25-616.5,83.53,83.84,94.12%,95.89%,90.12%
$616.5-637.5,78.48,81.67,72.77%,85.40%,62.83%
$637.5-655,77.42,81.15,68.34%,81.82%,56.07%


## Scores by School Size

* Perform the same operations as above, based on school size.

In [13]:
# Make a copy of the original data frame
school_size_df = school_data_complete

# Add columns to data frame with boolean values showing whether student passed math, passed reading, and passed overall
school_size_df["pass_math"] = school_size_df["math_score"] >= 70
school_size_df["pass_reading"] = school_size_df["reading_score"] >= 70
school_size_df["pass_overall"] = (school_size_df["math_score"] >= 70) & (school_size_df["reading_score"] >= 70)

# Create a new column for budget per student
school_size_df["budget_per_student"] = school_size_df["budget"] / school_size_df["size"]

# Create four bins of equal size and group names
bins = [0, 1000, 2000, 5000]
groups = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-3000)"]

# Slice the data into bins based on budget per student
school_size_df["size_category"] = pd.cut(school_size_df["size"], bins, labels=groups, include_lowest = True)

# Create a groupby object based on the budget range
school_size_df = school_size_df.groupby("size_category")

# Get the average math and reading scores
school_size_summary = school_size_df[["math_score", "reading_score", "pass_math", "pass_reading", "pass_overall"]].mean()

# Rename columns
school_size_summary = school_size_summary.rename(columns={"math_score": "Average Math Score", 
                                                          "reading_score": "Average Reading Score",
                                                          "pass_math": "% Passing Math",
                                                          "pass_reading": "% Passing Reading",
                                                          "pass_overall": "% Passing Overall"})
                                                                                                                                   
# Format colulmns
school_size_summary["Average Math Score"] = school_size_summary["Average Math Score"].astype(float).map("{:.2f}".format)
school_size_summary["Average Reading Score"] = school_size_summary["Average Reading Score"].astype(float).map("{:.2f}".format)
school_size_summary["% Passing Math"] = school_size_summary["% Passing Math"].astype(float).map("{:.2%}".format)
school_size_summary["% Passing Reading"] = school_size_summary["% Passing Reading"].astype(float).map("{:.2%}".format)
school_size_summary["% Passing Overall"] = school_size_summary["% Passing Overall"].astype(float).map("{:.2%}".format)
    
# Display dataframe
school_size_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
size_category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.83,83.97,93.95%,96.04%,90.14%
Medium (1000-2000),83.37,83.87,93.62%,96.77%,90.62%
Large (2000-3000),77.48,81.2,68.65%,82.13%,56.57%


## Scores by School Type

* Perform the same operations as above, based on school type

In [14]:
# Make a copy of the original dataframe
school_type_df = school_data_complete
                                                                                         
# Add columns to data frame with boolean values showing whether student passed math, passed reading, and passed overall
school_type_df["pass_math"] = school_type_df["math_score"] >= 70
school_type_df["pass_reading"] = school_type_df["reading_score"] >= 70
school_type_df["pass_overall"] = (school_type_df["math_score"] >= 70) & (school_type_df["reading_score"] >= 70)                                               

# Create a groupby object based on type
school_type_df = school_type_df.groupby("type")

# Get the average math and reading scores as well as % passing math, reading, and overall
# Note that since the pass_math, pass_reading, and pass_overal columns are booleans where True=1 and False=0, taking the mean
# will give us the % that passed
school_type_summary = school_type_df[["math_score", "reading_score", "pass_math", "pass_reading", "pass_overall"]].mean()


# Rename columns
school_type_summary = school_type_summary.rename(columns={"math_score": "Average Math Score", 
                                                          "reading_score": "Average Reading Score",
                                                          "pass_math": "% Passing Math",
                                                          "pass_reading": "% Passing Reading",
                                                          "pass_overall": "% Passing Overall"})
                                                                                                                                   
# Format colulmns
school_type_summary["Average Math Score"] = school_type_summary["Average Math Score"].astype(float).map("{:.2f}".format)
school_type_summary["Average Reading Score"] = school_type_summary["Average Reading Score"].astype(float).map("{:.2f}".format)
school_type_summary["% Passing Math"] = school_type_summary["% Passing Math"].astype(float).map("{:.2%}".format)
school_type_summary["% Passing Reading"] = school_type_summary["% Passing Reading"].astype(float).map("{:.2%}".format)
school_type_summary["% Passing Overall"] = school_type_summary["% Passing Overall"].astype(float).map("{:.2%}".format)

school_type_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.41,83.9,93.70%,96.65%,90.56%
District,76.99,80.96,66.52%,80.91%,53.70%


# Report

1. Schools with higher per student budgets have lower passing rates. Conventional wisdom might suggest that well funded schools have better student outcomes, but the data suggests that the opposite is true. Increasing school funding seems unlikely to produce worse results; there is a stronger possibility that schools with poor outcomes receive increased funding to try to improve student outcomes.
2. Large schools have meaningfully lower passing rates than small and medium schools. In order to understand why that is, we might want to look at other factors that might vary meaningfully between large schools and small/medium schools such as class size or population density.
