In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [2]:
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [14]:
bins = [0, 1000, 2000, 5000]
group_names = ["<1000", "1000-1999", "2000-5000"]
school_data_complete["School Size"] = pd.cut(school_data_complete["size"], bins, labels=group_names, include_lowest = True)
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget,School Size
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635,2000-5000
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635,2000-5000
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635,2000-5000
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635,2000-5000
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635,2000-5000


In [15]:
school_data_group = school_data_complete.groupby("School Size")
school_data_group.head()
students_per_bin = school_data_group["student_name"].count()
students_per_bin

School Size
<1000         1389
1000-1999     8522
2000-5000    29259
Name: student_name, dtype: int64

In [16]:
Avg_math = school_data_group["math_score"].mean()
Avg_math

School Size
<1000        83.828654
1000-1999    83.372682
2000-5000    77.477597
Name: math_score, dtype: float64

In [17]:
Avg_read = school_data_group["reading_score"].mean()
Avg_read

School Size
<1000        83.974082
1000-1999    83.867989
2000-5000    81.198674
Name: reading_score, dtype: float64

In [18]:
#Calculate percent passing math
pass_math_df = school_data_complete.loc[school_data_complete["math_score"] >= 70, :]
math_group = pass_math_df.groupby("School Size")
math_percent = math_group["math_score"].count() /students_per_bin * 100
math_percent




School Size
<1000        93.952484
1000-1999    93.616522
2000-5000    68.652380
dtype: float64

In [19]:
#calculate percent passing reading
pass_read_df = school_data_complete.loc[school_data_complete["reading_score"] >= 70, :]
read_group = pass_read_df.groupby("School Size")
read_percent = read_group["reading_score"].count() / students_per_bin * 100
read_percent

School Size
<1000        96.040317
1000-1999    96.773058
2000-5000    82.125158
dtype: float64

In [20]:
#calculate percent passing overall
overall = school_data_complete.loc[((school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70)) ,:]
overall_group = overall.groupby("School Size")
overall_percent = overall_group["student_name"].count() / students_per_bin * 100
overall_percent

School Size
<1000        90.136789
1000-1999    90.624267
2000-5000    56.574046
Name: student_name, dtype: float64

In [21]:
math = pd.Series(Avg_math)
read = pd.Series(Avg_read)
math_percent = pd.Series(math_percent)
read_percent = pd.Series(read_percent)
over_percent = pd.Series(overall_percent)
summary = pd.DataFrame([math, read, math_percent, read_percent, over_percent])
summary = summary.T
summary = summary.rename(columns = {"math_score": "Avgerage Math Score", "reading_score": "Avgerage Reading Score",
                                   "Unnamed 0":"% Passing Math", "Unnamed 1":"% Passing Reading", "student_name":"% Passing Overall"})
summary_table = summary.style.format({"Average Math Score": "{:.2f}","Average Reading Score": "{:.2f}", 
                              "% Passing Math": "{:.2f}", "% Passing Reading": "{:.2f}", "% Passing Overall": "{:.2f}"})
summary_table

Unnamed: 0_level_0,Avgerage Math Score,Avgerage Reading Score,% Passing Math,% Passing Reading,% Passing Overall
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<1000,83.828654,83.974082,93.95,96.04,90.14
1000-1999,83.372682,83.867989,93.62,96.77,90.62
2000-5000,77.477597,81.198674,68.65,82.13,56.57
