In [4]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [5]:
# Calculate the total number of schools
school_total = len(school_data_complete["School ID"].unique())

# Calculate the total number of students
student_total = len(school_data_complete["Student ID"].unique())

# Calculate the total budget
school_budget_total = school_data_complete["budget"].unique().sum()

# Calculate the average math score
avg_math = round(school_data_complete["math_score"].mean(), 2)

# Calculate the average reading score
avg_reading = round(school_data_complete["reading_score"].mean(), 2)

# Calculate the percentage of students with a passing math score (70 or greater)
math_passing = school_data_complete['math_score'] >= 70
st_math_passing = round((np.sum(math_passing) / student_total)*100, 2)

# Calculate the percentage of students with a passing reading score (70 or greater)
reading_passing = school_data_complete['reading_score'] >= 70
st_reading_passing = round((np.sum(reading_passing) / student_total)*100, 2)

# Calculate the percentage of students who passed math and reading (% Overall Passing)
all_passing = round((np.sum(reading_passing & math_passing) / student_total)*100, 2)

In [6]:
# Create a dataframe to hold the above results
summary_df = pd.DataFrame({"Total Schools": [school_total],
                            "Total Students": student_total,
                            "Total Budget": school_budget_total,
                            "Average Math Score": avg_math,
                            "Average Reading Score": avg_reading,
                            "% Passing Math": st_math_passing,
                            "% Passing Reading": st_reading_passing,
                            "% Overall Passing": all_passing})
summary_df.head()

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.99,81.88,74.98,85.81,65.17


In [7]:
# Optional: give the displayed data cleaner formatting
summary_df["Total Students"] = summary_df["Total Students"].map("{:,}".format)
summary_df["Total Budget"] = summary_df["Total Budget"].map("${:,.2f}".format)
summary_df.head()

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98,85.81,65.17
