In [5]:
import pandas as pd
from pathlib import Path

# Create file paths for each csv
school_data_csv = Path("Resources/schools_complete.csv")
student_data_csv = Path("Resources/students_complete.csv")

# Read & store student and school data into pandas dataframe
school_data = pd.read_csv(school_data_csv)
student_data = pd.read_csv(student_data_csv)

# Merge the data into a single dataset.  
school_df_1 = pd.merge(student_data, school_data, how="left", on=["school_name"])

#Rename columns
school_df_1 = school_df_1.rename(columns={"student_name": "Student Name", "gender": "Gender", "grade": "Grade",
                                          "school_name": "School Name","reading_score": "Reading Score", 
                                          "math_score": "Math Score", "type": "Type", "size": "Size", "budget": "Budget"})


school_df_1.head(10)

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score,School ID,Type,Size,Budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
5,5,Bryan Miranda,M,9th,Huang High School,94,94,0,District,2917,1910635
6,6,Sheena Carter,F,11th,Huang High School,82,80,0,District,2917,1910635
7,7,Nicole Baker,F,12th,Huang High School,96,69,0,District,2917,1910635
8,8,Michael Roth,M,10th,Huang High School,95,87,0,District,2917,1910635
9,9,Matthew Greene,M,10th,Huang High School,96,84,0,District,2917,1910635


In [6]:
#Find the total number of unique schools
school_count = school_df_1["School Name"].nunique()
print(f'School count is: {school_count}')

#Find the total student count
total_student = school_df_1["Student ID"].nunique()
print(f'Student count is: {total_student}')

#Find the total school district budget
total_budget = school_df_1["Budget"].unique()
print(f'Total budget is: {total_budget.sum()}')


School count is: 15
Student count is: 39170
Total budget is: 24649428


In [7]:
#Average math score
avg_math_score = school_df_1["Math Score"].mean()

#Average reading score
avg_reading_score = school_df_1["Reading Score"].mean()

print(f'The average scores for the district were:')
print(f'Math: {avg_math_score:,.2f}%')
print(f'Reading: {avg_reading_score:,.2f}%')

The average scores for the district were:
Math: 78.99%
Reading: 81.88%


In [8]:
#% passing math (the percentage of students who passed math)
math_count = school_df_1[(school_df_1["Math Score"]>= 70)].count()["Student Name"]
passing_percent_math = (math_count / total_student)*100
#print(math_count)
print(f'Percent of students passing math: {passing_percent_math:,.2f}%')

#% passing reading (the percentage of students who passed reading)
read_count = school_df_1[(school_df_1["Reading Score"]>= 70)].count()["Student Name"]
passing_percent_read = (read_count / total_student)*100
#print(read_count)
print(f'Percent of students passing reading: {passing_percent_read:,.2f}%')


Percent of students passing math: 74.98%
Percent of students passing reading: 85.81%


In [9]:
#% overall passing (the percentage of students who passed math AND reading)
overall_count = school_df_1[(school_df_1["Math Score"]>= 70) & (school_df_1["Reading Score"]>= 70)].count()["Student Name"]
passing_both = (overall_count / total_student)*100

print(f'Percent of students passing both: {passing_both:,.2f}%')

Percent of students passing both: 65.17%


In [21]:
# Create a high-level snapshot of the district's key metrics in a DataFrame
list_df = pd.DataFrame({"School Count": [school_count],
                       "Total Student": [total_student],
                        "Total Budget": [total_budget.sum()],
                        "Average Math Score": [avg_math_score],
                        "Average Reading Score": [avg_reading_score],
                        "Students Passing Math": [passing_percent_math],
                        "Students Passing Reading": [passing_percent_read],
                        "Overall Passing": [passing_both]
                       })


# Formatting

# Display the DataFrame
list_df

Unnamed: 0,School Count,Total Student,Total Budget,Average Math Score,Average Reading Score,Students Passing Math,Students Passing Reading,Overall Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326
