In [128]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


# District Summary

In [129]:
# Calculate the total number of unique schools
school_count = len(school_data.value_counts('school_name'))
school_count

15

In [130]:
# Calculate the total number of students
student_count = len(student_data['student_name'])
student_count

39170

In [131]:
# Calculate the total budget
total_budget = school_data['budget'].sum()
total_budget

24649428

In [132]:
# Calculate the average (mean) math score
average_math_score = school_data_complete['math_score'].mean()
average_math_score

78.98537145774827

In [133]:
# Calculate the average (mean) reading score
average_reading_score = school_data_complete['reading_score'].mean()
average_reading_score

81.87784018381414

In [134]:
# Use the following to calculate the percentage of students who passed math (math scores greather than or equal to 70)
passing_math_count = school_data_complete[(school_data_complete["math_score"] >= 70)].count()["student_name"]
passing_math_percentage = passing_math_count / float(student_count) * 100
passing_math_percentage

74.9808526933878

In [135]:
# Calculate the percentage of students who passeed reading (hint: look at how the math percentage was calculated)  
passing_reading_count = school_data_complete[(school_data_complete["reading_score"] >= 70)].count()["student_name"]
passing_reading_percentage = passing_reading_count / float(student_count) * 100
passing_reading_percentage

85.80546336482001

In [136]:
# Use the following to calculate the percentage of students that passed math and reading
passing_math_reading_count = school_data_complete[
    (school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70)].count()["student_name"]
overall_passing_rate = passing_math_reading_count /  float(student_count) * 100
overall_passing_rate

65.17232575950983

In [137]:
# Create a high-level snapshot of the district's key metrics in a DataFrame
district_summary = pd.DataFrame({"Total Schools": school_count, 
                                 "Total Students": student_count,
                                 "Total Budget": total_budget, 
                                 "Average Math Score": average_math_score, 
                                 "Average Reading Score": average_reading_score, 
                                 "% Passing Math": passing_math_percentage,
                                 "% Passing Reading": passing_reading_percentage,
                                 "% Overall Passing": overall_passing_rate}, index=[0])

# Formatting
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)

# Display the DataFrame
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


# School Summary

In [138]:
# Use the code provided to select the school type
school_types = school_data.set_index(["school_name"])["type"]

In [139]:
# Calculate the total student count
per_school_counts = school_data_complete["student_name"].value_counts()

In [140]:
# Calculate the total school budget and per capita spending
per_school_budget = school_data_complete.groupby(["school_name"]).sum()["budget"]
per_school_capita = per_school_budget / per_school_counts

In [141]:
# Calculate the average test scores
per_school_math = school_data_complete["math_score"].mean()
per_school_reading = school_data_complete["reading_score"].mean()

In [142]:
# Calculate the number of schools with math scores of 70 or higher
school_passing_math = school_data_complete[(school_data_complete["math_score"] > 70)]

In [143]:
# Calculate the number of schools with reading scores of 70 or higher
school_passing_reading = school_data_complete[(school_data_complete["reading_score"] > 70)]

In [144]:
# Use the provided code to calculate the schools that passed both math and reading with scores of 70 or higher
passing_math_and_reading = school_data_complete[
    (school_data_complete["reading_score"] >= 70) & (school_data_complete["math_score"] >= 70)]

In [145]:
# Use the provided code to calculate the passing rates
per_school_passing_math = school_passing_math.groupby(["school_name"]).count()["student_name"] / per_school_counts * 100
per_school_passing_reading = school_passing_reading.groupby(["school_name"]).count()["student_name"] / per_school_counts * 100
overall_passing_rate = passing_math_and_reading.groupby(["school_name"]).count()["student_name"] / per_school_counts * 100

In [152]:
# Create a DataFrame called `per_school_summary` with columns for the calculations above.
per_school_summary = pd.DataFrame.columns({"School Type": school_types,
                                   "Total Students": per_school_counts,
                                   "Total School Budget": per_school_budget, 
                                   "Per Student Budget": per_school_capita,
                                   "Average Math Score": per_school_math,
                                   "Average Reading Score": per_school_reading,
                                   "% Passing Math": school_passing_math,
                                   "% Passing Reading": school_passing_reading,
                                   "% Overall Passing": overall_passing_rate})

# Formatting
per_school_summary["Total School Budget"] = per_school_summary["Total School Budget"].map("${:,.2f}".format)
per_school_summary["Per Student Budget"] = per_school_summary["Per Student Budget"].map("${:,.2f}".format)

# Display the DataFrame
per_school_summary

TypeError: 'pandas._libs.properties.AxisProperty' object is not callable

In [127]:
# #per_school_summary = per_school_summary["School Type",
#                                    "Total Students",
#                                    "Total School Budget", 
#                                    "Per Student Budget",
#                                    "Average Math Score",
#                                    "Average Reading Score",
#                                    "% Passing Math",
#                                    "% Passing Reading",
#                                    "% Overall Passing"]

# Highest-Performing Schools (by % Overall Passing)

In [150]:
# Sort the schools by `% Overall Passing` in descending order and display the top 5 rows.
high_performing_schools = per_school_summary.sort_values(["% Overall Passing"], descending=True)
high_performing_schools.head(5)

NameError: name 'per_school_summary' is not defined

# Bottom Performing Schools (By % Overall Passing)

In [None]:
# Sort the schools by `% Overall Passing` in ascending order and display the top 5 rows.
bottom_performing_schools = per_school_summary.sort_values(["% Overall Passing"], ascending=True)
bottom_performing_schools.head(5)