Analysis

In [11]:
# Setup 
import pandas as pd
from pathlib import Path

# Find Path
school_data_path = Path("Resources/schools_complete.csv")
student_data_path = Path("Resources/students_complete.csv")

# Read files and import into Dataframes
school_data = pd.read_csv(school_data_path)
student_data = pd.read_csv(student_data_path)

# Merge the two datasets into one
merged_data = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
merged_data.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


District Summary

In [49]:
# Total number of unique schools
total_schools = len(merged_data['school_name'].unique())
print (total_schools)

15


In [52]:
# Total students
total_students = len(merged_data['Student ID'].unique())
print (total_students)

39170


In [24]:
# Total budget
total_budget = school_data.budget.sum()
print (total_budget)

24649428


In [25]:
# Average math score
avg_math = merged_data.math_score.mean()
print (avg_math)

78.98537145774827


In [26]:
# Average reading score
avg_reading = merged_data.reading_score.mean()
print (avg_reading)

81.87784018381414


In [47]:
# % passing math (the percentage of students who passed math)
passing_mcount = merged_data[merged_data["math_score"] >=70].count()["student_name"]
print(passing_mcount)
passing_mpercentage = passing_mcount/total_students * 100
print(passing_mpercentage)

29370
74.9808526933878


In [40]:
# % passing reading (the percentage of students who passed reading)
passing_rcount = merged_data[merged_data["reading_score"] >=70].count()["student_name"]
print(passing_rcount)
passing_rpercentage = passing_rcount/total_students * 100
print(passing_rpercentage)

33610
85.80546336482001


In [53]:
# % overall passing (the percentage of students who passed math AND reading)
pass_count = merged_data[(merged_data["reading_score"] >=70) & (merged_data["math_score"] >=70)].count()["student_name"]
print(pass_count)
pass_percentage = pass_count/total_students * 100
print(pass_percentage)

25528
65.17232575950983


In [63]:
# create a high-level snapshot of the district's key metrics in a Dataframe
district_df = pd.DataFrame({
    "Total Schools": f"{total_schools}",
    "Total Students": f"{total_students:,}",
    "Total Budget": f"${total_budget:,}",
    "Average Math Score": f"{avg_math:.2f}",
    "Average Reading Score": f"{avg_reading:.2f}", 
    "% Passing Math": f"{passing_mpercentage:.2f}%",
    "% Passing Reading": f"{passing_rpercentage:.2f}%", 
    "% Overall Passing": f"{pass_percentage:.2f}%"
                                               }, index=[0])

district_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428",78.99,81.88,74.98%,85.81%,65.17%
