In [195]:
#import dependencies
import pandas as pd
import numpy as np

In [196]:
#Files to load
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

In [197]:
# read the school data file and store it in a Pandas DF; pd.DataFrame and pd.read_csv are similar commands in how they work
school_data_df = pd.read_csv(school_data_to_load)
school_data_df

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500
5,5,Wilson High School,Charter,2283,1319574
6,6,Cabrera High School,Charter,1858,1081356
7,7,Bailey High School,District,4976,3124928
8,8,Holden High School,Charter,427,248087
9,9,Pena High School,Charter,962,585858


In [198]:
# read in and store the student data
student_data_df = pd.read_csv(student_data_to_load)
student_data_df.head(10)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84
5,5,Bryan Miranda,M,9th,Huang High School,94,94
6,6,Sheena Carter,F,11th,Huang High School,82,80
7,7,Nicole Baker,F,12th,Huang High School,96,69
8,8,Michael Roth,M,10th,Huang High School,95,87
9,9,Matthew Greene,M,10th,Huang High School,96,84


In [199]:
# Use the count method to determine if there are any missing values in the school data.
school_data_df.count()

School ID      15
school_name    15
type           15
size           15
budget         15
dtype: int64

In [200]:
# Use the count method to determine if there are any missing values in the student data.
student_data_df.count()

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
dtype: int64

In [201]:
# there is no missing data based on the count method in either dataset because the counts are the same for all columns.

In [202]:
# Use the isnull method to determine if there are empty rows in school dataset
school_data_df.isnull()

Unnamed: 0,School ID,school_name,type,size,budget
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
5,False,False,False,False,False
6,False,False,False,False,False
7,False,False,False,False,False
8,False,False,False,False,False
9,False,False,False,False,False


In [203]:
# Use the isnull method to determine if there are empty rows in student dataset
student_data_df.isnull()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...
39165,False,False,False,False,False,False,False
39166,False,False,False,False,False,False,False
39167,False,False,False,False,False,False,False
39168,False,False,False,False,False,False,False


In [204]:
# since there are a ton of rows, we can sum the false values to double check that none are empty.
student_data_df.isnull().sum()

Student ID       0
student_name     0
gender           0
grade            0
school_name      0
reading_score    0
math_score       0
dtype: int64

In [205]:
# finally, use the notnull method to find missing values.
# similar to isnull, but value is opposite
school_data_df.notnull()

Unnamed: 0,School ID,school_name,type,size,budget
0,True,True,True,True,True
1,True,True,True,True,True
2,True,True,True,True,True
3,True,True,True,True,True
4,True,True,True,True,True
5,True,True,True,True,True
6,True,True,True,True,True
7,True,True,True,True,True
8,True,True,True,True,True
9,True,True,True,True,True


In [206]:
student_data_df.notnull().sum()

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
dtype: int64

In [207]:
# Determine data types for the school DataFrame
school_data_df.dtypes

School ID       int64
school_name    object
type           object
size            int64
budget          int64
dtype: object

In [208]:
#Determine data types for the student DataFrame
student_data_df.dtypes

Student ID        int64
student_name     object
gender           object
grade            object
school_name      object
reading_score     int64
math_score        int64
dtype: object

In [209]:
# now we need to clean the student dataset to remove incorrect prefixes/suffixes
# success!
all_prefix_suffix = ["Dr. ", "Mrs. ", "Mr. ", "Ms. ", "Miss ", " MD", " PhD", " DDS", " DVM"]

for word in all_prefix_suffix:
    student_data_df["student_name"] = student_data_df["student_name"].str.replace(word, "")
    
student_data_df.head(10)

  student_data_df["student_name"] = student_data_df["student_name"].str.replace(word, "")


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84
5,5,Bryan Miranda,M,9th,Huang High School,94,94
6,6,Sheena Carter,F,11th,Huang High School,82,80
7,7,Nicole Baker,F,12th,Huang High School,96,69
8,8,Michael Roth,M,10th,Huang High School,95,87
9,9,Matthew Greene,M,10th,Huang High School,96,84


In [210]:
# we want to do further analyses on the combined datasets.
# combine the data into one dataset.
complete_data_df = pd.merge(student_data_df, school_data_df, on=["school_name", "school_name"])
complete_data_df.head(10)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
5,5,Bryan Miranda,M,9th,Huang High School,94,94,0,District,2917,1910635
6,6,Sheena Carter,F,11th,Huang High School,82,80,0,District,2917,1910635
7,7,Nicole Baker,F,12th,Huang High School,96,69,0,District,2917,1910635
8,8,Michael Roth,M,10th,Huang High School,95,87,0,District,2917,1910635
9,9,Matthew Greene,M,10th,Huang High School,96,84,0,District,2917,1910635


In [211]:
# now we can start analyzing data
# first check there's no missing data
check_count = complete_data_df.count()
check_count

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
School ID        39170
type             39170
size             39170
budget           39170
dtype: int64

In [212]:
# calculate the total number of students
student_count = complete_data_df['Student ID'].count()
print("There are " + "{:,}".format(student_count) + " students in the school district.")

There are 39,170 students in the school district.


In [213]:
# calculate the total number of schools
# using the unique() method will return an array of all unique schools. 
# use the len() method on the array to find the total number of unique values.
school_count = len(complete_data_df['School ID'].unique())
print("There are "+ str(school_count) + " schools in the district.")

There are 15 schools in the district.


In [214]:
# calculate the total budget for all of the schools
total_budget = complete_data_df['budget'].unique().sum()
print("The total budget for the district is $"+ "{:,}".format(total_budget)+ ".")

# extra to practice knowledge - make a dataframe that shows the budget for each individual school. Don't use groupby here.
print("\nSee below for a breakdown of each individual school budget.")

school_budgets = complete_data_df['budget']
school_names = complete_data_df['school_name']
indv_budgets ={"School Name": school_names,
               "Budget": school_budgets}
indv_budget_df = pd.DataFrame(indv_budgets, columns=["School Name", "Budget"])
indv_budget_df = indv_budget_df.drop_duplicates(subset = "School Name")
#indv_budget_df["Budget"] = indv_budget_df["Budget"].apply('${:,}'.format)
indv_budget_df = indv_budget_df.sort_values(by="School Name", ascending=True)
indv_budget_df

The total budget for the district is $24,649,428.

See below for a breakdown of each individual school budget.


Unnamed: 0,School Name,Budget
17871,Bailey High School,3124928
16013,Cabrera High School,1081356
2917,Figueroa High School,1884411
34796,Ford High School,1763916
12262,Griffin High School,917500
7627,Hernandez High School,3022020
22847,Holden High School,248087
0,Huang High School,1910635
30035,Johnson High School,3094650
23274,Pena High School,585858


In [215]:
# next, we need to get the average math and reading scores
mean_reading_score = complete_data_df['reading_score'].mean()
print("Overall Average Reading Score: "+ '{:,.1f}'.format(mean_reading_score))
mean_math_score = complete_data_df['math_score'].mean()
print("Overall Average Math Score: "+'{:,.1f}'.format(mean_math_score))

# extra to practice knowledge - find the average score for each school. Use groupby - easier.
print("\nSee below for a breakdown of each individual school's average scores for reading and math.")

reading_scores = complete_data_df['reading_score']
math_scores = complete_data_df['math_score']
scores_dict ={"School Name": school_names,
            "Average Reading Score": reading_scores,
            "Average Math Score": math_scores}
scores_df = pd.DataFrame(scores_dict, columns=["School Name", "Average Reading Score", "Average Math Score"])
avg_scores = scores_df.groupby(["School Name"]).mean()
#avg_scores["Average Reading Score"] = avg_scores["Average Reading Score"].apply('{:,.1f}'.format)
#avg_scores["Average Math Score"] = avg_scores["Average Math Score"].apply('{:,.1f}'.format)
avg_scores

Overall Average Reading Score: 81.9
Overall Average Math Score: 79.0

See below for a breakdown of each individual school's average scores for reading and math.


Unnamed: 0_level_0,Average Reading Score,Average Math Score
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bailey High School,81.033963,77.048432
Cabrera High School,83.97578,83.061895
Figueroa High School,81.15802,76.711767
Ford High School,80.746258,77.102592
Griffin High School,83.816757,83.351499
Hernandez High School,80.934412,77.289752
Holden High School,83.814988,83.803279
Huang High School,81.182722,76.629414
Johnson High School,80.966394,77.072464
Pena High School,84.044699,83.839917


In [216]:
# now we want to determine the percentage of students with a passing grade.
# first, determine passing scores.
passing_math = complete_data_df["math_score"]>=70
passing_reading = complete_data_df["reading_score"]>=70
# then pass these T/F results into a new dataframe
passing_math_df = complete_data_df[passing_math]
passing_reading_df = complete_data_df[passing_reading]
passing_both_df = complete_data_df[passing_math&passing_reading]
# then count the values of students passing each subject.
passing_math_count = passing_math_df["student_name"].count()
passing_reading_count = passing_reading_df["student_name"].count()
passing_both_count = passing_both_df["student_name"].count()
print("The number of students in the district who passed math is: " +'{:,d}'.format(passing_math_count))
print("The number of students in the district who passed reading is: " +'{:,d}'.format(passing_reading_count))
print("The number of students in the district who passed both math and reading is: " +'{:,d}'.format(passing_both_count))

The number of students in the district who passed math is: 29,370
The number of students in the district who passed reading is: 33,610
The number of students in the district who passed both math and reading is: 25,528


In [217]:
# turn the counts into percent pass/fail.
passing_math_percentage = (passing_math_count/float(student_count))
passing_reading_percentage = (passing_reading_count/float(student_count))
passing_both_percentage = (passing_both_count/float(student_count))
# read out the results.
print("The number of students in the district who passed math is: " +'{:,d}'.format(passing_math_count) + " (" + '{:,.1%}'.format(passing_math_percentage) + ")")
print("The number of students in the district who passed reading is: " +'{:,d}'.format(passing_reading_count) + " (" + '{:,.1%}'.format(passing_reading_percentage) + ")")
print("The number of students in the district who passed both math and reading is: " +'{:,d}'.format(passing_both_count) + " (" + '{:,.1%}'.format(passing_both_percentage) + ")")

The number of students in the district who passed math is: 29,370 (75.0%)
The number of students in the district who passed reading is: 33,610 (85.8%)
The number of students in the district who passed both math and reading is: 25,528 (65.2%)


In [218]:
# now compile all of the data we've found into a new dataframe for reporting out.
district_summary_df = pd.DataFrame(
          [{"Total Schools": school_count,
            "Total Students": student_count,
            "Total Budget": total_budget,
            "Average Reading Score": mean_reading_score,
            "Average Math Score": mean_math_score,
            "% Passing Math": passing_math_percentage,
            "% Passing Reading": passing_reading_percentage,
            "% Overall Passing": passing_both_percentage}])
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Reading Score,Average Math Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,81.87784,78.985371,0.749809,0.858055,0.651723


In [219]:
# format the data using map & format
# format students with commas
district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)
district_summary_df["Total Students"]

0    39,170
Name: Total Students, dtype: object

In [220]:
# format the budget
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,.2f}".format)
district_summary_df["Total Budget"]

0    $24,649,428.00
Name: Total Budget, dtype: object

In [221]:
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Reading Score,Average Math Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",81.87784,78.985371,0.749809,0.858055,0.651723


In [222]:
# Format the other columns.
district_summary_df["Average Math Score"] = district_summary_df["Average Math Score"].map("{:.1f}".format)
district_summary_df["Average Reading Score"] = district_summary_df["Average Reading Score"].map("{:.1f}".format)
district_summary_df["% Passing Math"] = district_summary_df["% Passing Math"].map("{:.0%}".format)
district_summary_df["% Passing Reading"] = district_summary_df["% Passing Reading"].map("{:.0%}".format)
district_summary_df["% Overall Passing"] = district_summary_df["% Overall Passing"].map("{:.0%}".format)
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Reading Score,Average Math Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",81.9,79.0,75%,86%,65%


In [223]:
# Reorder the columns in the order you want them to appear.
new_column_order = ["Total Schools", "Total Students", "Total Budget","Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]

# Assign district summary df the new column order.
district_summary_df = district_summary_df[new_column_order]
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",79.0,81.9,75%,86%,65%


In [224]:
# now we want to create a similar summary for each school in the district.
# we are setting the index to the school name instead of 0, 1, 2, 3, etc.
# we will use "school_name" and "type" to index because they are both series
each_school_type = school_data_df.set_index(["school_name"])["type"]
each_school_type

school_name
Huang High School        District
Figueroa High School     District
Shelton High School       Charter
Hernandez High School    District
Griffin High School       Charter
Wilson High School        Charter
Cabrera High School       Charter
Bailey High School       District
Holden High School        Charter
Pena High School          Charter
Wright High School        Charter
Rodriguez High School    District
Johnson High School      District
Ford High School         District
Thomas High School        Charter
Name: type, dtype: object

In [225]:
# now we will create a DataFrame by converting the series to a DF
df = pd.DataFrame(each_school_type)
df

Unnamed: 0_level_0,type
school_name,Unnamed: 1_level_1
Huang High School,District
Figueroa High School,District
Shelton High School,Charter
Hernandez High School,District
Griffin High School,Charter
Wilson High School,Charter
Cabrera High School,Charter
Bailey High School,District
Holden High School,Charter
Pena High School,Charter


In [226]:
# add the other column data
each_school_studentsize = complete_data_df["school_name"].value_counts()
each_school_budget = school_data_df.set_index(['school_name'])["budget"]

In [227]:
# calculate per capita spending
spendingcapita_per_school = each_school_budget/each_school_studentsize
spendingcapita_per_school

Bailey High School       628.0
Cabrera High School      582.0
Figueroa High School     639.0
Ford High School         644.0
Griffin High School      625.0
Hernandez High School    652.0
Holden High School       581.0
Huang High School        655.0
Johnson High School      650.0
Pena High School         609.0
Rodriguez High School    637.0
Shelton High School      600.0
Thomas High School       638.0
Wilson High School       578.0
Wright High School       583.0
dtype: float64

In [228]:
# calculate scores
student_school_math = student_data_df.set_index(['school_name'])['math_score']
student_school_reading = student_data_df.set_index(['school_name'])['reading_score']
each_school_reading_averages = complete_data_df.groupby(['school_name']).mean()['reading_score']
each_school_math_averages = complete_data_df.groupby(['school_name']).mean()['math_score']

In [229]:
# Calculate the passing scores by creating a filtered DataFrame.
each_school_passing_math = complete_data_df[passing_math]
each_school_passing_reading = complete_data_df[passing_reading]
each_school_passing_both = complete_data_df[passing_reading & passing_math]
each_school_passing_math = each_school_passing_math.groupby(["school_name"]).count()["student_name"]
each_school_passing_reading = each_school_passing_reading.groupby(["school_name"]).count()["student_name"]
each_school_passing_both = each_school_passing_both.groupby(["school_name"]).count()["student_name"]

In [230]:
# calculate percentage of students passing by school
each_school_passing_math_percentage = each_school_passing_math / each_school_studentsize
each_school_passing_reading_percentage = each_school_passing_reading / each_school_studentsize
each_school_passing_both_percentage = each_school_passing_both / each_school_studentsize

In [231]:
per_school_summary_df = pd.DataFrame({
             "School Type": each_school_type,
             "Total Students": each_school_studentsize,
             "Total School Budget": each_school_budget,
             "Per Student Budget": spendingcapita_per_school,
             "Average Math Score": each_school_math_averages,
           "Average Reading Score": each_school_reading_averages,
           "% Passing Math": each_school_passing_math_percentage,
           "% Passing Reading": each_school_passing_reading_percentage,
           "% Overall Passing": each_school_passing_both_percentage})
per_school_summary_df.head()

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,0.666801,0.819333,0.546423
Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,0.941335,0.970398,0.913348
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,0.659885,0.807392,0.532045
Ford High School,District,2739,1763916,644.0,77.102592,80.746258,0.683096,0.79299,0.542899
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,0.933924,0.97139,0.905995


In [232]:
# format the final results
per_school_summary_df["Total School Budget"]=per_school_summary_df["Total School Budget"].map('${:,.2f}'.format)
per_school_summary_df["Per Student Budget"]=per_school_summary_df["Per Student Budget"].map('${:,.2f}'.format)
per_school_summary_df["Average Math Score"]=per_school_summary_df["Average Math Score"].map('{:.1f}'.format)
per_school_summary_df["Average Reading Score"]=per_school_summary_df["Average Reading Score"].map('{:.1f}'.format)
per_school_summary_df["% Passing Math"]=per_school_summary_df["% Passing Math"].map('{:.1%}'.format)
per_school_summary_df["% Passing Reading"]=per_school_summary_df["% Passing Reading"].map('{:.1%}'.format)
per_school_summary_df["% Overall Passing"]=per_school_summary_df["% Overall Passing"].map('{:.1%}'.format)
per_school_summary_df

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.0,81.0,66.7%,81.9%,54.6%
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.1,84.0,94.1%,97.0%,91.3%
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.7,81.2,66.0%,80.7%,53.2%
Ford High School,District,2739,"$1,763,916.00",$644.00,77.1,80.7,68.3%,79.3%,54.3%
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.4,83.8,93.4%,97.1%,90.6%
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.3,80.9,66.8%,80.9%,53.5%
Holden High School,Charter,427,"$248,087.00",$581.00,83.8,83.8,92.5%,96.3%,89.2%
Huang High School,District,2917,"$1,910,635.00",$655.00,76.6,81.2,65.7%,81.3%,53.5%
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.1,81.0,66.1%,81.2%,53.5%
Pena High School,Charter,962,"$585,858.00",$609.00,83.8,84.0,94.6%,95.9%,90.5%


In [233]:
# sort and show the top 5 schools
top_schools = per_school_summary_df.sort_values(["% Overall Passing"], ascending = False)
top_schools.head(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.1,84.0,94.1%,97.0%,91.3%
Thomas High School,Charter,1635,"$1,043,130.00",$638.00,83.4,83.8,93.3%,97.3%,90.9%
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.4,83.8,93.4%,97.1%,90.6%
Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.3,84.0,93.9%,96.5%,90.6%
Pena High School,Charter,962,"$585,858.00",$609.00,83.8,84.0,94.6%,95.9%,90.5%


In [234]:
# sort and show the bottom 5 schools
bottom_schools = per_school_summary_df.sort_values(["% Overall Passing"], ascending = True)
bottom_schools.head(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Rodriguez High School,District,3999,"$2,547,363.00",$637.00,76.8,80.7,66.4%,80.2%,53.0%
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.7,81.2,66.0%,80.7%,53.2%
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.3,80.9,66.8%,80.9%,53.5%
Huang High School,District,2917,"$1,910,635.00",$655.00,76.6,81.2,65.7%,81.3%,53.5%
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.1,81.0,66.1%,81.2%,53.5%


In [235]:
# dig into data - filter by grade level
ninth = complete_data_df[(complete_data_df["grade"]=="9th")]
tenth = complete_data_df[(complete_data_df["grade"]=="10th")]
eleventh = complete_data_df[(complete_data_df["grade"]=="11th")]
twelfth = complete_data_df[(complete_data_df["grade"]=="12th")]

In [236]:
# groupby school and find averages
ninth_math = ninth.groupby(['school_name']).mean()['math_score']
tenth_math = tenth.groupby(['school_name']).mean()['math_score']
eleventh_math = eleventh.groupby(['school_name']).mean()['math_score']
twelfth_math = twelfth.groupby(['school_name']).mean()['math_score']

ninth_reading = ninth.groupby(['school_name']).mean()['reading_score']
tenth_reading = tenth.groupby(['school_name']).mean()['reading_score']
eleventh_reading = eleventh.groupby(['school_name']).mean()['reading_score']
twelfth_reading = twelfth.groupby(['school_name']).mean()['reading_score']

In [237]:
# combine grade level averages by school into a dataframe
math_scores_by_grade = pd.DataFrame({
               "9th": ninth_math,
               "10th": tenth_math,
               "11th": eleventh_math,
               "12th": twelfth_math})
math_scores_by_grade

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


In [238]:
reading_scores_by_grade = pd.DataFrame({
               "9th": ninth_reading,
               "10th": tenth_reading,
               "11th": eleventh_reading,
               "12th": twelfth_reading})
reading_scores_by_grade

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


In [239]:
# format the math dataframe
math_scores_by_grade["9th"] = math_scores_by_grade["9th"].map("{:.1f}".format)
math_scores_by_grade["10th"] = math_scores_by_grade["10th"].map("{:.1f}".format)
math_scores_by_grade["11th"] = math_scores_by_grade["11th"].map("{:.1f}".format)
math_scores_by_grade["12th"] = math_scores_by_grade["12th"].map("{:.1f}".format)
# check order if needed: math_scores_by_grade = math_scores_by_grade["9th", "10th", "11th", "12th"]
# remove the index name from the table
math_scores_by_grade.index.name = None
# display
math_scores_by_grade.head()

Unnamed: 0,9th,10th,11th,12th
Bailey High School,77.1,77.0,77.5,76.5
Cabrera High School,83.1,83.2,82.8,83.3
Figueroa High School,76.4,76.5,76.9,77.2
Ford High School,77.4,77.7,76.9,76.2
Griffin High School,82.0,84.2,83.8,83.4


In [240]:
# format the reading dataframe
reading_scores_by_grade["9th"] = reading_scores_by_grade["9th"].map("{:.1f}".format)
reading_scores_by_grade["10th"] = reading_scores_by_grade["10th"].map("{:.1f}".format)
reading_scores_by_grade["11th"] = reading_scores_by_grade["11th"].map("{:.1f}".format)
reading_scores_by_grade["12th"] = reading_scores_by_grade["12th"].map("{:.1f}".format)
# check order if needed: math_scores_by_grade = math_scores_by_grade["9th", "10th", "11th", "12th"]
# remove the index name from the table
reading_scores_by_grade.index.name = None
# display
reading_scores_by_grade.head()

Unnamed: 0,9th,10th,11th,12th
Bailey High School,81.3,80.9,80.9,80.9
Cabrera High School,83.7,84.3,83.8,84.3
Figueroa High School,81.2,81.4,80.6,81.4
Ford High School,80.6,81.3,80.4,80.7
Griffin High School,83.4,83.7,84.3,84.0
