### Perform the necessary calculations and then create a high-level snapshot of the district's key metrics in a DataFrame.
#### Include the following:
- Total number of unique schools
- Total students
- Total budget
- Average math score
- Average reading score
- % passing math (the percentage of students who passed math)
- % passing reading (the percentage of students who passed reading)
- % overall passing (the percentage of students who passed math AND reading)

In [1]:
#dependencies
import pandas as pd
from pathlib import Path

In [3]:
# File to Load (Remember to Change These)
school_data_to_load = Path("../Resources/schools_complete.csv")
student_data_to_load = Path("../Resources/students_complete.csv")
# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

In [4]:
# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


##### District Summary

In [23]:
# Calculate the total number of unique schools - used activities from Pandas lecture 04 - 06 
un_school_count = school_data_complete["school_name"].unique()
school_count = len(un_school_count)
school_count

15

In [11]:
# Calculate the total number of students
#student_count = school_data_complete["student_name"].value_counts()
student_count = len(school_data_complete)
student_count

39170

In [24]:
# Calculate the total budget #from schools_complete.cvs only/student info don't have those values
total_budget = school_data["budget"].sum()
total_budget

24649428

In [14]:
# Calculate the average (mean) math score
average_math_score = school_data_complete["math_score"].mean()
average_math_score

78.98537145774827

In [17]:
# Calculate the average (mean) reading score
average_reading_score = school_data_complete["reading_score"].mean()
average_reading_score

81.87784018381414

In [15]:
# Use the following to calculate the percentage of students who passed math (math scores greather than or equal to 70)
passing_math_count = school_data_complete[(school_data_complete["math_score"] >= 70)].count()["student_name"]
passing_math_percentage = passing_math_count / float(student_count) * 100
passing_math_percentage

74.9808526933878

In [19]:
# Calculate the percentage of students who passed reading (hint: look at how the math percentage was calculated)  
passing_reading_count = school_data_complete[(school_data_complete["reading_score"] >= 70)].count()["student_name"]
passing_reading_percentage = passing_reading_count / float(student_count) * 100
passing_reading_percentage

85.80546336482001

In [20]:
# Use the following to calculate the percentage of students that passed math and reading
passing_math_reading_count = school_data_complete[
    (school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70)
].count()["student_name"]
overall_passing_rate = passing_math_reading_count /  float(student_count) * 100
overall_passing_rate

65.17232575950983

In [38]:
# Create a high-level snapshot of the district's key metrics in a DataFrame
# district_summary = pd.DataFrame({"Total School" : f"{school_count}", "Total Students" : f"{student_count:,}" , "Total Budget" : f"{total_budget:,.2f}", 
#                                  "Average Math Score" : f"{average_math_score:,.3f}" , "Average Reading Score" : f"{average_reading_score:,.4f}" ,
#                                   "% Passing Math" : f"%{passing_math_percentage:,.5f}" , "% Passing Reading" : f"%{passing_reading_percentage:,.6f}" , 
#                                   "% Overall Passing": f"%{overall_passing_rate:,.7f}" }, index=[0])
district_summary_df = pd.DataFrame({"Total School" : [school_count], "Total Students" : [student_count] , "Total Budget" : [total_budget], 
                                 "Average Math Score" : [average_math_score] , "Average Reading Score" : [average_reading_score],
                                  "% Passing Math" : [passing_math_percentage] , "% Passing Reading" : [passing_reading_percentage], 
                                  "% Overall Passing": [overall_passing_rate]})

# Formatting commas in numbers, and $ sign
district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,.2f}".format)

# Display the DataFrame
district_summary_df

Unnamed: 0,Total School,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


##### School Summary