In [1]:
#import modules
import pandas as pd


In [2]:
#Create Filepaths
schools_filepath = "Resources/schools_complete.csv"
students_filepath = "Resources/students_complete.csv"

#Read Filepaths in DataFrames
schools_df = pd.read_csv(schools_filepath)
students_df = pd.read_csv(students_filepath)

#Merge Dataframes into a single dataframe
combined_df = pd.merge(schools_df, students_df, how = "left", on = ["school_name", "school_name"])

In [3]:
#School Dataframe Head
schools_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [4]:
#Students Dataframe Head
students_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [5]:
#Combined Dataframe Head
combined_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [6]:
#Total Number of Schools in District
total_schools = schools_df['school_name'].count()

In [7]:
#Total Number of Students in District
total_students = students_df['student_name'].count()

In [8]:
#Total Budget for District
total_budget = schools_df['budget'].sum()

In [9]:
#Average Math Score for District
avg_math_dist = students_df['math_score'].mean()

In [10]:
#Average Reading Score for District
avg_reading_dist = students_df['reading_score'].mean()

In [11]:
#Percentage of Students with Passing Math Score
num_stu_passmath = len(students_df.loc[students_df['math_score'] >= 70])
per_stu_passmath = (num_stu_passmath / total_students) * 100

In [12]:
#Percentage of Students with Passing Reading Score
num_stu_passreading = len(students_df.loc[students_df['reading_score'] >= 70])
per_stu_passreading = (num_stu_passreading / total_students) * 100

In [13]:
#Percentage of Students with Passing Reading & Math Score
num_stu_passmathandreading = len(students_df.loc[(students_df['math_score'] >= 70) & (students_df['reading_score'] >= 70)])
per_stu_passmathandreading = (num_stu_passmathandreading / total_students) * 100

In [14]:
#Create District Summary Dataframe
district_summary_df = pd.DataFrame([
    {"Total Schools": total_schools,
     "Total Students": total_students,
     "Total Budget": total_budget,
     "Average Math Score": avg_math_dist,
     "Average Reading Score": avg_reading_dist,
     "% Passing Math": per_stu_passmath,
     "%Passing Reading": per_stu_passreading,
     "% Overall Passing": per_stu_passmathandreading
    }
])
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,.2f}".format)

district_summary_df


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,%Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


Schools Summary

In [21]:
#Get average math scores by school
school_avg_mathscores = combined_df.groupby(["school_name"])["math_score"].mean()

In [23]:
#Get average reading scores by school
school_avg_readingscores = combined_df.groupby(["school_name"])["reading_score"].mean()

In [24]:
#Get each schools budget
school_budget = combined_df.groupby(["school_name"])["budget"].mean()

In [25]:
#Get number of students per school
school_total_students = combined_df.groupby(["school_name"])["student_name"].count()

In [26]:
#Get School budget per student
school_budget_perstudent = school_budget / school_total_students

In [28]:
#Get school type
school_type = schools_df.set_index(["school_name"])["type"]

In [48]:
#Get percent of students who passed math by each school
school_students_passmath_df = combined_df[(combined_df["math_score"] >= 70)]
school_students_passmath = school_students_passmath_df.groupby(["school_name"])["student_name"].count()
per_school_students_passmath = (school_students_passmath / school_total_students) * 100

In [49]:
#Get percent of students who passed reading by each school
school_students_passreading_df = combined_df[(combined_df["reading_score"] >= 70)]
school_students_passreading = school_students_passreading_df.groupby(["school_name"])["student_name"].count()
per_school_students_passreading = (school_students_passreading / school_total_students) * 100

In [50]:
#Get percent of students who passed reading and math by school
school_students_passboth_df = combined_df[(combined_df["reading_score"] >= 70) & (combined_df["math_score"] >= 70)]
school_students_passboth = school_students_passboth_df.groupby(["school_name"])["student_name"].count()
per_school_students_passboth = (school_students_passboth / school_total_students) * 100

In [51]:
#School Summary Dataframe
school_summary_df = pd.DataFrame({
    "School Type": school_type,
    "Total Students": school_total_students,
    "Total School Budget": school_budget,
    "Per Student Budget": school_budget_perstudent,
    "Average Math Score": school_avg_mathscores,
    "Average Reading Score": school_avg_readingscores,
    "% Passing Math": per_school_students_passmath,
    "% Passing Reading": per_school_students_passreading,
    "% Overall Passing": per_school_students_passboth
})
school_summary_df["Total School Budget"] = school_summary_df["Total School Budget"].map("${:,.2f}".format)
school_summary_df["Per Student Budget"] = school_summary_df["Per Student Budget"].map("${:,.2f}".format)
school_summary_df

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,"$248,087.00",$581.00,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,90.540541
