### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load
school_data_to_load = "schools_complete.csv"
student_data_to_load = "students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)
#print(school_data) #check to see if file has been loaded properly 
#print(student_data) check to see if file has been loaded properly 

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()  #.head is a method that displays your dataframe

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Calculate the percentage of students who passed math **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [2]:
# Calculate the total number of schools
#total_schools_df= len(school_data_complete["school_name"].unique()) #use length function of unique school names #method 1
#try accomplishing the above with loc method
total_schools_df=len(school_data_complete.loc[:,'school_name'].unique())
total_schools_df #use hanging object check to test

15

In [3]:
# Calculate the total number of students
total_students_df= len(school_data_complete["Student ID"].unique()) #len global function to find number of items in a list
total_students_df #use hanging object check to test

39170

In [4]:
# Calculate the total budget (make sure to find it for unique school values only as to not have duplicates)  
total_budget_df= school_data["budget"].sum() #.sum() global method to add values up
total_budget_df #use hanging object check to test

24649428

In [5]:
# Calculate the average math score
average_math_score= student_data["math_score"].mean()
average_math_score #test 

78.98537145774827

In [6]:
# Calculate the average reading score
average_reading_score= student_data["reading_score"].mean()
average_reading_score #test

81.87784018381414

In [7]:
# Calculate the percentage of students with a passing math score (70 or greater), therefore greater than or equal to 70
students_passing_math = student_data.loc[student_data["math_score"] >= 70,:]
percent_passing_math = round(float(students_passing_math["math_score"].count()/total_students_df)*100,6)
percent_passing_math #test

74.980853

In [8]:
# Calculate the percentage of students with a passing reading score (70 or greater), therefore greater than or equal to 70
students_passing_reading = student_data.loc[student_data["reading_score"] >= 70,:]
percent_passing_reading = round(float(students_passing_reading["reading_score"].count()/total_students_df)*100,6)
percent_passing_reading #test

85.805463

In [9]:
# Calculate the percentage of students who passed math and reading (% Overall Passing)
#overall_passing_rate =round((percent_passing_math + percent_passing_reading)/2,6) #this was my original thought process 
#overall_passing_rate      #this number should be 65.172326; probably a weighted average  
student_data["Passing Reading"] = student_data["reading_score"] >= 70
student_data["Passing Math"] = student_data["math_score"] >= 70
student_data["Passing Overall"] = student_data["Passing Math"] & student_data["Passing Reading"] #both conditions must be met to pass
pass_total = student_data[student_data["Passing Overall"]]["Student ID"].count()
pass_total #test- this will be the number of students who passed with 70 or greater in both math AND reading
# Total-level group includes count of students and average math and reading scores.
funcs = {"Student ID": "count", "math_score": "mean", "reading_score": "mean"} #this was not learned in class 
ss_total_group = student_data.groupby(by=lambda x: 0) #using a hack method to groupby; ss will be summary statisics
ss_total = ss_total_group.agg(funcs) #.agg method will return an aggregated data frame of the specified funcs i have defined
ss_total["% Passing Overall"] = pass_total / ss_total['Student ID'] #unique number of students
ss_total #test
overall_pass=ss_total["% Passing Overall"]
overall_pass #not yet converted a to percent
percent_passing_overall= round(float(overall_pass)*100,6) #recall: round to 6 decimal places
percent_passing_overall #test if percentage has been accomplished

65.172326

In [10]:
# Create a dataframe to hold the above results; use the pd.DataFrame method which expects a dictionary to be passed to it
district_summary_df = pd.DataFrame({"Total Schools":[total_schools_df],   #the right-hand side in red will be the string values as column names
                          "Total Students":[total_students_df],
                          "Total Budget ($)":[total_budget_df],
                          "Average Math Score":[average_math_score],
                          "Average Reading Score":[average_reading_score],
                          "% Passing Math":[percent_passing_math],
                          "% Passing Reading":[percent_passing_reading],
                          "% Overall Passing":[percent_passing_overall]})
district_summary_df #test

Unnamed: 0,Total Schools,Total Students,Total Budget ($),Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * % Overall Passing (The percentage of students that passed math **and** reading.)
  
* Create a dataframe to hold the above results

In [11]:
# School Name
# Change header name from the dataframe from name to school
new_header_name= {"school_name": "school"}
school_data_complete = school_data_complete.rename(columns=new_header_name) #when i change it here why cant i call on it below?
school_data_complete.head() #test if rename has been achieved

Unnamed: 0,Student ID,student_name,gender,grade,school,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [12]:
# Merge the two data frames together on = school_name
school_data_complete = pd.merge(student_data, school_data, on="school_name") #pd.merge()
school_data_complete.head() #test

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,Passing Reading,Passing Math,Passing Overall,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,False,True,False,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,True,False,False,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,True,False,False,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,False,False,False,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,True,True,True,0,District,2917,1910635


In [13]:
# Changing the school type into a str you need to do this in order for the last problem to use the groupby
school_type = school_data_complete.groupby("school_name")["type"].unique()
school_type = school_type.str[0] #covert to a .str[]
school_type #test

school_name
Bailey High School       District
Cabrera High School       Charter
Figueroa High School     District
Ford High School         District
Griffin High School       Charter
Hernandez High School    District
Holden High School        Charter
Huang High School        District
Johnson High School      District
Pena High School          Charter
Rodriguez High School    District
Shelton High School       Charter
Thomas High School        Charter
Wilson High School        Charter
Wright High School        Charter
Name: type, dtype: object

In [14]:
# Count number of students in each school
student_count = school_data_complete["school_name"].value_counts() #.value_counts() will give us all non-null values for each school
student_count.head() #test

Bailey High School       4976
Johnson High School      4761
Hernandez High School    4635
Rodriguez High School    3999
Figueroa High School     2949
Name: school_name, dtype: int64

In [15]:
# Calculate Total School Budget
budget_per_school = school_data_complete.groupby("school_name")["budget"].mean() #.count() this will not allow you to perform calculations
budget_per_school #test

school_name
Bailey High School       3124928
Cabrera High School      1081356
Figueroa High School     1884411
Ford High School         1763916
Griffin High School       917500
Hernandez High School    3022020
Holden High School        248087
Huang High School        1910635
Johnson High School      3094650
Pena High School          585858
Rodriguez High School    2547363
Shelton High School      1056600
Thomas High School       1043130
Wilson High School       1319574
Wright High School       1049400
Name: budget, dtype: int64

In [16]:
# Budget for each student
budget_per_student= budget_per_school/student_count
budget_per_student #test

Bailey High School       628.0
Cabrera High School      582.0
Figueroa High School     639.0
Ford High School         644.0
Griffin High School      625.0
Hernandez High School    652.0
Holden High School       581.0
Huang High School        655.0
Johnson High School      650.0
Pena High School         609.0
Rodriguez High School    637.0
Shelton High School      600.0
Thomas High School       638.0
Wilson High School       578.0
Wright High School       583.0
dtype: float64

In [17]:
# Calculate the average math and reading scores for each school
school_average_math = round(school_data_complete.groupby("school_name")["math_score"].mean(),6)
school_average_reading = round(school_data_complete.groupby("school_name")["reading_score"].mean(),6)
school_average_math #test
school_average_reading #test

school_name
Bailey High School       81.033963
Cabrera High School      83.975780
Figueroa High School     81.158020
Ford High School         80.746258
Griffin High School      83.816757
Hernandez High School    80.934412
Holden High School       83.814988
Huang High School        81.182722
Johnson High School      80.966394
Pena High School         84.044699
Rodriguez High School    80.744686
Shelton High School      83.725724
Thomas High School       83.848930
Wilson High School       83.989488
Wright High School       83.955000
Name: reading_score, dtype: float64

In [27]:
# Calculate the % of students Passing Math
# Calculate the % of students Passing Reading
# Calculate the % of students Overall Passing 
school_result = school_data[["school_name", "type", "budget"]]

# student-level data by school
school_students_gb = school_data_complete.groupby(by="School ID")
funcs = {"Student ID": "count", "math_score": "mean", "reading_score": "mean", "Passing Math": "sum", "Passing Reading": "sum", "Passing Overall": "sum"} #Functions i have defined

school_totals = school_students_gb.agg(funcs) #using .agg() like in district summary to get an aggregated summary table 
# Create the dateframe to hold above results:
school_result_summary_df = pd.merge(school_data[["school_name", "type", "budget"]], school_totals, left_index=True, right_index=True)
school_result_summary_df["Per Student Budget ($)"] = school_result_summary_df["budget"] / school_result_summary_df["Student ID"]
school_result_summary_df["% Passing Math"] = (school_result_summary_df["Passing Math"] / school_result_summary_df["Student ID"])*100
school_result_summary_df["% Passing Reading"] = (school_result_summary_df["Passing Reading"] / school_result_summary_df["Student ID"])*100
school_result_summary_df["% Overall Passing"] = (school_result_summary_df["Passing Overall"] / school_result_summary_df["Student ID"])*100
# Make school name the index, rename columns, and create a dataframe to hold the above results
school_result_summary_df.rename(columns={"type": "School Type", 
                              "Student ID": "Total Students",
                              "budget": "Total School Budget($)", 
                              "math_score": "Average Math Score",
                              "reading_score": "Average Reading Score"}, inplace=True)
school_result_summary_df.set_index(keys="school_name", inplace=True) #setting the index to school_name
school_result_summary_df #test

#the way below does not calculate the correct percentage of students passing math and reading together; this was my original thought process
#passing_df = school_data_complete.loc[(school_data_complete['math_score'] >=70) 
                                          #& (school_data_complete['reading_score'] >=70)]
#passing_math_df = school_data_complete.loc[(school_data_complete['math_score'] >=70)]
#passing_reading_df = school_data_complete.loc[(school_data_complete['reading_score'] >=70)]

#passing_math_df.head() #test
#passing_reading_df.head() #test

Unnamed: 0_level_0,School Type,Total School Budget($),Total Students,Average Math Score,Average Reading Score,Passing Math,Passing Reading,Passing Overall,Per Student Budget ($),% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Huang High School,District,1910635,2917,76.629414,81.182722,1916,2372,1561,655.0,65.683922,81.316421,53.513884
Figueroa High School,District,1884411,2949,76.711767,81.15802,1946,2381,1569,639.0,65.988471,80.739234,53.204476
Shelton High School,Charter,1056600,1761,83.359455,83.725724,1653,1688,1583,600.0,93.867121,95.854628,89.892107
Hernandez High School,District,3022020,4635,77.289752,80.934412,3094,3748,2481,652.0,66.752967,80.862999,53.527508
Griffin High School,Charter,917500,1468,83.351499,83.816757,1371,1426,1330,625.0,93.392371,97.138965,90.599455
Wilson High School,Charter,1319574,2283,83.274201,83.989488,2143,2204,2068,578.0,93.867718,96.539641,90.582567
Cabrera High School,Charter,1081356,1858,83.061895,83.97578,1749,1803,1697,582.0,94.133477,97.039828,91.334769
Bailey High School,District,3124928,4976,77.048432,81.033963,3318,4077,2719,628.0,66.680064,81.93328,54.642283
Holden High School,Charter,248087,427,83.803279,83.814988,395,411,381,581.0,92.505855,96.252927,89.227166
Pena High School,Charter,585858,962,83.839917,84.044699,910,923,871,609.0,94.594595,95.945946,90.540541


## Top Performing Schools (By % Overall Passing)

* Sort and display the top five performing schools by % overall passing.

In [19]:
# Find the top 5 schools by % overall passing
# Sort_values method
top_performing_schools= school_result_summary_df.sort_values("% Overall Passing", ascending=False, inplace=False) #will put in descending order when ascending=false
top_performing_schools.head() #test

Unnamed: 0_level_0,School Type,Total School Budget($),Total Students,Average Math Score,Average Reading Score,Passing Math,Passing Reading,Passing Overall,Per Student Budget ($),% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Cabrera High School,Charter,1081356,1858,83.061895,83.97578,1749,1803,1697,582.0,94.133477,97.039828,91.334769
Thomas High School,Charter,1043130,1635,83.418349,83.84893,1525,1591,1487,638.0,93.272171,97.308869,90.948012
Griffin High School,Charter,917500,1468,83.351499,83.816757,1371,1426,1330,625.0,93.392371,97.138965,90.599455
Wilson High School,Charter,1319574,2283,83.274201,83.989488,2143,2204,2068,578.0,93.867718,96.539641,90.582567
Pena High School,Charter,585858,962,83.839917,84.044699,910,923,871,609.0,94.594595,95.945946,90.540541


## Bottom Performing Schools (By % Overall Passing)

* Sort and display the five worst-performing schools by % overall passing.

In [20]:
# Sort and display the 5 worst-performing schools by % overall passing
Bottom_performing_schools=school_result_summary_df.sort_values("% Overall Passing", ascending=True, inplace=False) #will put in ascending order when ascending=true
Bottom_performing_schools.head() #test

Unnamed: 0_level_0,School Type,Total School Budget($),Total Students,Average Math Score,Average Reading Score,Passing Math,Passing Reading,Passing Overall,Per Student Budget ($),% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Rodriguez High School,District,2547363,3999,76.842711,80.744686,2654,3208,2119,637.0,66.366592,80.220055,52.988247
Figueroa High School,District,1884411,2949,76.711767,81.15802,1946,2381,1569,639.0,65.988471,80.739234,53.204476
Huang High School,District,1910635,2917,76.629414,81.182722,1916,2372,1561,655.0,65.683922,81.316421,53.513884
Hernandez High School,District,3022020,4635,77.289752,80.934412,3094,3748,2481,652.0,66.752967,80.862999,53.527508
Johnson High School,District,3094650,4761,77.072464,80.966394,3145,3867,2549,650.0,66.057551,81.222432,53.539172


## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

In [21]:
# Math score by grade
from pandas.api.types import CategoricalDtype #haven't learned this yet in class
#Reset the grade order in the original students data frame "students_data". 
student_data["grade"] = student_data["grade"].astype(CategoricalDtype(["9th", "10th","11th","12th"]))
student_data

math_scores_grade = round(student_data.pivot_table(index="school_name", columns="grade", values="math_score"),6)
math_scores_grade.index.name = None 
math_scores_grade #test

grade,9th,10th,11th,12th
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


## Reading Score by Grade 

* Perform the same operations as above for reading scores

In [22]:
# Reading score by grade
# Reset the grade order in the original students data frame "student_data". 
student_data["grade"] = student_data["grade"].astype(CategoricalDtype(["9th", "10th","11th","12th"]))
student_data

reading_scores_grade = round(student_data.pivot_table(index="school_name", columns="grade", values="reading_score"),6)
reading_scores_grade.index.name = None
reading_scores_grade #test

grade,9th,10th,11th,12th
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [23]:
# Scores by school spending
# Start by creating names and labels for the bins
spending_range = ["<$585", "$585-615", "$615-645", "$645-675"] #these will be the labels for my bins
spending_bins = [0, 585, 615, 645, 675]

#schools_spending_df = school_summary_df
school_result_summary_df["Spending Ranges(Per Student)"] = pd.cut(budget_per_student, bins=spending_bins, labels=spending_range)
school_result_summary_df.head() #test; pd.cut() allows us to use the newly create bins to cut our data into

spending_math_score = round(school_result_summary_df.groupby(["Spending Ranges(Per Student)"])["Average Math Score"].mean(),2)
spending_reading_score = round(school_result_summary_df.groupby(["Spending Ranges(Per Student)"])["Average Reading Score"].mean(),2)
spending_passing_math = round(school_result_summary_df.groupby(["Spending Ranges(Per Student)"])["% Passing Math"].mean(),2)
spending_passing_reading = round(school_result_summary_df.groupby(["Spending Ranges(Per Student)"])["% Passing Reading"].mean(),2)
overall_passing_rate = round((spending_passing_math + spending_passing_reading)/2,2)

# Creating dataframe to hold the results; recall using the pd.DataFrame method which expects a mapper-like object in the argument (ie dictionary)
scores_by_spending = pd.DataFrame ({"Average Math Score": spending_math_score,
    "Average Reading Score": spending_reading_score,
    "% Passing Math": spending_passing_math,
    "% Passing Reading": spending_passing_reading,
    "% Overall Passing": overall_passing_rate})
scores_by_spending #test

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Spending Ranges(Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,83.46,83.93,93.46,96.61,95.04
$585-615,83.6,83.89,94.23,95.9,95.06
$615-645,79.08,81.89,75.67,86.11,80.89
$645-675,77.0,81.03,66.16,81.13,73.64


## Scores by School Size

* Perform the same operations as above, based on school size.

In [28]:
# Scores by school size
# First start by creating the bins based on school size
size_bins = [0, 1000, 2000, 5000]

# Labels for the bins
group_names = ["Small (<1000)", "Medium(1000-2000)", "Large(2000-5000)"]

school_size_df = school_result_summary_df
school_size_df["School Size"] = pd.cut(student_count, bins=size_bins, labels=group_names) #pd.cut() to cut our data into our newly created bins
school_size_df

# Define how to calculate each variable 
spending_math_score = school_size_df.groupby(["School Size"])["Average Math Score"].mean()
spending_reading_score = school_size_df.groupby(["School Size"])["Average Reading Score"].mean()
spending_passing_math = school_size_df.groupby(["School Size"])["% Passing Math"].mean()
spending_passing_reading = school_size_df.groupby(["School Size"])["% Passing Reading"].mean()
overall_passing_rate = (spending_passing_math + spending_passing_reading)/2

# Create a dataframe to hold the above; use pd.DataFrame()
scores_by_size = pd.DataFrame ({"Average Math Score": spending_math_score,
    "Average Reading Score": spending_reading_score,
    "% Passing Math": spending_passing_math,
    "% Passing Reading": spending_passing_reading,
    "% Overall Passing": overall_passing_rate})
scores_by_size #test

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.821598,83.929843,93.550225,96.099437,94.824831
Medium(1000-2000),83.374684,83.864438,93.599695,96.79068,95.195187
Large(2000-5000),77.746417,81.344493,69.963361,82.766634,76.364998


# Scores by School Type
* Perform the same operations as above, based on school type

In [25]:
# Scores by school type
# Repeat the above breakdown, but this time group schools based on school type
scores_school_type = school_result_summary_df[["School Type","Average Math Score", #defining our header names meant to hold string valued as denoted by ""
                                        "Average Reading Score", 
                                        "% Passing Math", 
                                        "% Passing Reading", 
                                        "% Overall Passing"]]

scores_school_type = scores_school_type.groupby("School Type").mean() #group it by the mean score of each school type
scores_school_type #test

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,93.62083,96.586489,90.432244
District,76.956733,80.966636,66.548453,80.799062,53.672208
