# PyCity Schools Analysis

- Analysis here

---

In [1]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path


In [2]:
# File to Load 
school_data_to_load = Path("Resources/schools_complete.csv")
student_data_to_load = Path("Resources/students_complete.csv")

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


# District Summary

In [3]:
# Calculate the total number of unique schools
school_count= school_data_complete['school_name'].nunique()
print("Total number of unique schools:", school_count)

Total number of unique schools: 15


In [4]:
# Calculate the total number of students
student_count=school_data_complete['student_name'].count()
print("Total number of students:",student_count)

Total number of students: 39170


In [5]:
# Calculate the total budget

# Step 1. Remove duplicates based on 'School ID' to avoid counting budgets multiple times
unique_schools = school_data_complete.drop_duplicates(subset='School ID')

# Step 2. Calculate the total budget
total_budget = unique_schools['budget'].sum()
print("Total budget:",total_budget)

Total budget: 24649428


In [6]:
# Calculate the average (mean) math score
average_math_score = school_data_complete['math_score'].mean()
print(f"The average math score is: {average_math_score:.2f}")

The average math score is: 78.99


In [7]:
# Calculate the average (mean) reading score
average_reading_score = school_data_complete['reading_score'].mean()
print(f"The average math score is: {average_reading_score:.2f}")

The average math score is: 81.88


In [8]:
#Calculate the percentage of students who passed math (math scores greather than or equal to 70)

# Filter the DataFrame to include only students who passed math
passed_math = school_data_complete[school_data_complete['math_score'] >= 70]

# Calculate the total number of students who passed math
total_passed_math = passed_math['student_name'].count()

# Calculate the percentage of students that passed math
percentage_passed_math = (total_passed_math / student_count) * 100

print(f"The percentage of students that passed math is: {percentage_passed_math:.2f}%")


The percentage of students that passed math is: 74.98%


In [9]:
#Calculate the percentage of students who passed reading (math scores greather than or equal to 70)

# Filter the DataFrame to include only students who passed reading
passed_reading = school_data_complete[school_data_complete['reading_score'] >= 70]

# Calculate the total number of students who passed reading
total_passed_reading = passed_reading['student_name'].count()

# Calculate the percentage of students that passed readint
percentage_passed_reading = (total_passed_reading / student_count) * 100

print(f"The percentage of students that passed reading is: {percentage_passed_reading:.2f}%")

The percentage of students that passed reading is: 85.81%


In [10]:
# calculate the percentage of students that passed math and reading
passing_math_reading_count = school_data_complete[
    (school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70)
].count()["student_name"]
overall_passing_rate = passing_math_reading_count /  float(student_count) * 100

print(f"The percentage of students that passed math and reading is: {overall_passing_rate:.2f}%")

The percentage of students that passed math and reading is: 65.17%


In [14]:
# Create a high-level snapshot of the district's key metrics in a DataFrame
district_summary = pd.DataFrame(
    {
    "Total Schools":[school_count],
    "Total Students":[student_count],
    "Total Budget":[total_budget],
    "Average Math Score":[f"{average_math_score:.2f}"],
    "Average Reading Score":[f"{average_reading_score:.2f}"],
    "% Passing Math":[f"{percentage_passed_math:.2f}%"],
    "% Passing Reading":[f"{percentage_passed_reading:.2f}%"],
    "% Overall Passing":[f"{overall_passing_rate:.2f}%"]
    
}
)

# Display the DataFrame
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.99,81.88,74.98%,85.81%,65.17%


# School Summary


In [16]:
# Use the code provided to select the type per school from school_data
school_types = school_data.set_index(["school_name"])["type"]
school_types

school_name
Huang High School        District
Figueroa High School     District
Shelton High School       Charter
Hernandez High School    District
Griffin High School       Charter
Wilson High School        Charter
Cabrera High School       Charter
Bailey High School       District
Holden High School        Charter
Pena High School          Charter
Wright High School        Charter
Rodriguez High School    District
Johnson High School      District
Ford High School         District
Thomas High School        Charter
Name: type, dtype: object

In [15]:
# Calculate the total student count per school from school_data
per_school_counts = school_data["school_name"].count()
print(per_school_counts)

15


In [20]:
# Calculate the total school budget and per capita spending per school from school_data

# Calculate the total school budget per school
per_school_budget = school_data.groupby("school_name")["budget"].sum()


#Calculate the per capita spending
#first, we need to get the totla students per school
students_per_school = school_data.groupby("school_name")["School ID"].count()
#Then we can calculate per capita spending
per_school_capita = per_school_budget / students_per_school

school_budget_per_capita_summary = pd.DataFrame({
    "Total School Budget": per_school_budget,
    "Per Student Budget": per_school_capita
})

print(school_budget_per_capita_summary)

                       Total School Budget  Per Student Budget
school_name                                                   
Bailey High School                 3124928           3124928.0
Cabrera High School                1081356           1081356.0
Figueroa High School               1884411           1884411.0
Ford High School                   1763916           1763916.0
Griffin High School                 917500            917500.0
Hernandez High School              3022020           3022020.0
Holden High School                  248087            248087.0
Huang High School                  1910635           1910635.0
Johnson High School                3094650           3094650.0
Pena High School                    585858            585858.0
Rodriguez High School              2547363           2547363.0
Shelton High School                1056600           1056600.0
Thomas High School                 1043130           1043130.0
Wilson High School                 1319574           13