In [2]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path

# File to Load (Remember to Change These)
school_data_to_load = Path("..", "Resources/schools_complete.csv")
student_data_to_load = Path("..", "Resources/students_complete.csv")

# Read School and Student Data File and store into Pandas DataFrames
school_data_df = pd.read_csv(school_data_to_load)
student_data_df = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete_df = pd.merge(student_data_df, school_data_df, how="left", on=["school_name", "school_name"])
school_data_complete_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


DISTRICT SUMMARY

In [3]:
# Calculate the total number of unique schools
school_count = school_data_complete_df["school_name"].nunique()
school_count

15

In [4]:
# Calculate the total number of students
student_count = school_data_complete_df["Student ID"].count()
student_count

39170

In [5]:
# Calculate the total budget
individual_school_df = school_data_complete_df.drop_duplicates(subset = "school_name", keep = "first")
total_budget = individual_school_df["budget"].sum()
total_budget

24649428

In [6]:
# Calculate the average (mean) math score
average_math_score = school_data_complete_df["math_score"].mean()
average_math_score

78.98537145774827

In [7]:
# Calculate the average (mean) reading score
average_reading_score = school_data_complete_df["reading_score"].mean()
average_reading_score

81.87784018381414

In [8]:
#Calculate percentage of students who passed math
passed_math_count = school_data_complete_df[school_data_complete_df["math_score"] >= 70]["student_name"].count()
passed_math_percentage = passed_math_count/float(student_count)*100
passed_math_percentage

74.9808526933878

In [9]:
#Calculate percentage of students who passed reading
passed_reading_count = school_data_complete_df[school_data_complete_df["reading_score"] >= 70]["student_name"].count()
passed_reading_percentage = passed_reading_count/float(student_count)*100
passed_reading_percentage

85.80546336482001

In [11]:
#Calculate the percentage of students who passed both math AND reading
passed_math_and_reading_count = school_data_complete_df[
    (school_data_complete_df["reading_score"] >= 70) & (school_data_complete_df["math_score"] >= 70)].count()["student_name"]
passed_math_and_reading_percentage = passed_math_and_reading_count/float(student_count)*100
passed_math_and_reading_percentage

65.17232575950983

In [None]:
#Compile overall metrics for the district into one data frame
district_summary = pd.DataFrame({
    "Total Schools":[15],
    "Total Students":[39170],
    "Total Budget":[24649428],
    "Average Math Score":[78.98537145774827],
    "Average Reading Score":[81.87784018381414],
    "% Passing Math":[74.9808526933878],
    "% Passing Reading":[85.80546336482001],
    "% Overall Passing":[65.17232575950983],
})

# Format columns to include commas, $, and round to the proper decimal places
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


SCHOOL SUMMARY

In [12]:
#Select the school type
school_types = school_data_df.set_index(["school_name"])["type"]

In [None]:
#calculate the total student count per school
per_school_counts = 