In [5]:
# Import dependencies
import pandas as pd


# Set up merged database

In [6]:
# get school date

csv_schools = "Resources/schools_complete.csv"

schools_df = pd.read_csv(csv_schools, encoding = "utf-8")
schools_df.head()



Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [7]:
# get student data

csv_students = "Resources/students_complete.csv"

students_df = pd.read_csv(csv_students, encoding = "utf-8")
students_df.head()


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [15]:
# merge dataframes

school_student_df = pd.merge(students_df, schools_df, how = "left", on=["school_name","school_name"])

school_student_df.to_csv("merged data.csv")
school_student_df.head()


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


# District Data

In [17]:
# Get total number of unique schools


school_count = len(school_student_df["school_name"].value_counts())
school_count

15

In [18]:
# Get total students & budget

students_budget_df = school_student_df[["school_name","size","budget"]]

students_budget_byschool_df = students_budget_df.groupby(["school_name"]).last()
#students_budget_byschool_df

student_count = students_budget_byschool_df["size"].sum()
budget_total = students_budget_byschool_df["budget"].sum()

print(student_count)
print(budget_total)

24649428

In [23]:
# Get Average math score & Average reading score

scores_df = school_student_df[["school_name","reading_score","math_score"]]
#scores_df

reading_avg = scores_df["reading_score"].mean()
math_avg = scores_df["math_score"].mean()

print(reading_avg)
print(math_avg)


81.87784018381414
78.98537145774827


In [34]:
# Calculate %students who passed math (score >= 70%)
passing_math = scores_df[(scores_df["math_score"] >= 70)].count()["school_name"]
passing_math
math_percent = passing_math / student_count * 100

print(math_percent)

74.9808526933878


In [35]:
# Calculate %students who passed reading (score >= 70%)
passing_reading = scores_df[(scores_df["reading_score"] >= 70)].count()["school_name"]

reading_percent = passing_reading / student_count * 100

print(reading_percent)

85.80546336482001


In [38]:
# Calculate %students who passed both math & reading (score >= 70%)
passing_math_reading = scores_df[(scores_df["reading_score"] >= 70) & (scores_df["math_score"] >= 70)].count()["school_name"]

math_reading_percent = passing_math_reading / student_count * 100

print(math_reading_percent)

65.17232575950983


In [52]:
district_summary_df = pd.DataFrame(
    {"Total Schools" : [school_count],
    "Total Students" : [student_count],
    "Total Budget" : [budget_total],
    "Average Math Score" : [math_avg],
    "Average Reading Score" : [reading_avg],
    "% Passing Math" : [math_percent],
    "% Passing Reading" : [reading_percent],
    "% Overall Passing" : [math_reading_percent]}
)

district_summary_formatted = district_summary_df.copy()
district_summary_formatted["Total Students"] = district_summary_formatted["Total Students"].map("{:,}".format)
district_summary_formatted["Total Budget"] = district_summary_formatted["Total Budget"].map("${:,.2f}".format)
district_summary_formatted["Average Math Score"] = district_summary_formatted["Average Math Score"].map("{:,.2f}".format)
district_summary_formatted["Average Reading Score"] = district_summary_formatted["Average Reading Score"].map("{:,.2f}".format)
district_summary_formatted["% Passing Math"] = district_summary_formatted["% Passing Math"].map("{:.2f}%".format)
district_summary_formatted["% Passing Reading"] = district_summary_formatted["% Passing Reading"].map("{:.2f}%".format)
district_summary_formatted["% Overall Passing"] = district_summary_formatted["% Overall Passing"].map("{:.2f}%".format)

district_summary_formatted

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,65.17%


# School Data

In [56]:
school_student_df.count()

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
School ID        39170
type             39170
size             39170
budget           39170
dtype: int64

In [114]:
# Get school type, total students, & budget
school_info_last_df = school_student_df.groupby(["school_name"]).last()

school_budget = school_info_last_df["budget"]
school_type = school_info_last_df["type"]
school_size = school_info_last_df["size"]
budget_student = school_budget / school_size
print(budget_student)

school_name
Bailey High School       628.0
Cabrera High School      582.0
Figueroa High School     639.0
Ford High School         644.0
Griffin High School      625.0
Hernandez High School    652.0
Holden High School       581.0
Huang High School        655.0
Johnson High School      650.0
Pena High School         609.0
Rodriguez High School    637.0
Shelton High School      600.0
Thomas High School       638.0
Wilson High School       578.0
Wright High School       583.0
dtype: float64


In [137]:
school_info_mean_df = school_student_df.groupby(["school_name"]).mean()

school_math = school_info_mean_df["math_score"]
school_reading = school_info_mean_df["reading_score"]

school_math_passing = school_student_df[(school_student_df["math_score"] >= 70)]
school_math_percent = school_math_passing.groupby(["school_name"]).count()["student_name"]  / school_size *100

school_reading_passing = school_student_df[(school_student_df["reading_score"] >= 70)]
school_reading_percent = school_reading_passing.groupby(["school_name"]).count()["student_name"]  / school_size *100

school_overall_passing = school_student_df[(school_student_df["math_score"] >= 70) & (school_student_df["reading_score"] >= 70)]
school_overall_percent = school_overall_passing.groupby(["school_name"]).count()["student_name"]  / school_size *100



In [138]:
school_summary_df = pd.DataFrame(
    {"School Type" : school_type,
    "Total Students" : school_size,
    "Total School Budget" : school_budget,
    "Per Student Budget" : budget_student,
    "Average Math Score" : school_math,
    "Average Reading Score" : school_reading,
    "% Passing Math" : school_math_percent,
    "% Passing Reading" : school_reading_percent,
    "% Overall Passing" : school_overall_percent
    })

school_summary_df

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541
