In [1]:
import pandas as pd
from pathlib import Path

# Create file paths for each csv
school_data_csv = Path("Resources/schools_complete.csv")
student_data_csv = Path("Resources/students_complete.csv")

# Read & store student and school data into pandas dataframe
school_data = pd.read_csv(school_data_csv)

school_data

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score,School ID,Type,Size,Budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
5,5,Bryan Miranda,M,9th,Huang High School,94,94,0,District,2917,1910635
6,6,Sheena Carter,F,11th,Huang High School,82,80,0,District,2917,1910635
7,7,Nicole Baker,F,12th,Huang High School,96,69,0,District,2917,1910635
8,8,Michael Roth,M,10th,Huang High School,95,87,0,District,2917,1910635
9,9,Matthew Greene,M,10th,Huang High School,96,84,0,District,2917,1910635


In [39]:
student_data = pd.read_csv(student_data_csv)

student_data.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [40]:
# Merge the data into a single dataset.  
school_df_1 = pd.merge(student_data, school_data, how="left", on=["school_name"])

#Rename columns
school_df_1 = school_df_1.rename(columns={"student_name": "Student Name", "gender": "Gender", "grade": "Grade",
                                          "school_name": "School Name","reading_score": "Reading Score", 
                                          "math_score": "Math Score", "type": "Type", "size": "Size", "budget": "Budget"})


school_df_1.head(10)

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score,School ID,Type,Size,Budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
5,5,Bryan Miranda,M,9th,Huang High School,94,94,0,District,2917,1910635
6,6,Sheena Carter,F,11th,Huang High School,82,80,0,District,2917,1910635
7,7,Nicole Baker,F,12th,Huang High School,96,69,0,District,2917,1910635
8,8,Michael Roth,M,10th,Huang High School,95,87,0,District,2917,1910635
9,9,Matthew Greene,M,10th,Huang High School,96,84,0,District,2917,1910635


In [None]:
##DISTRICT SUMMARY

In [32]:
#Find the total number of unique schools
school_count = school_df_1["School Name"].nunique()
print(f'School count is: {school_count}')

#Find the total student count
total_student = school_df_1["Student ID"].nunique()
print(f'Student count is: {total_student:,}')

#Find the total school district budget
total_budget = school_df_1["Budget"].unique()
print(f'Total budget is: {total_budget.sum():,}')


#Find the total student count
per_student_budget = total_budget / total_student
print(f'Budget per student is: ${per_student_budget.sum():,.2f}')

School count is: 15
Student count is: 39,170
Total budget is: 24,649,428
Budget per student is: $629.29


In [6]:
#Average math score
avg_math_score = school_df_1["Math Score"].mean()

#Average reading score
avg_reading_score = school_df_1["Reading Score"].mean()

print(f'The average scores for the district were:')
print(f'Math: {avg_math_score:,.2f}%')
print(f'Reading: {avg_reading_score:,.2f}%')

The average scores for the district were:
Math: 78.99%
Reading: 81.88%


In [7]:
#% passing math (the percentage of students who passed math)
math_count = school_df_1[(school_df_1["Math Score"]>= 70)].count()["Student Name"]
passing_percent_math = (math_count / total_student)*100
#print(math_count)
print(f'Percent of students passing math: {passing_percent_math:,.2f}%')

#% passing reading (the percentage of students who passed reading)
read_count = school_df_1[(school_df_1["Reading Score"]>= 70)].count()["Student Name"]
passing_percent_read = (read_count / total_student)*100
#print(read_count)
print(f'Percent of students passing reading: {passing_percent_read:,.2f}%')


Percent of students passing math: 74.98%
Percent of students passing reading: 85.81%


In [8]:
#% overall passing (the percentage of students who passed math AND reading)
overall_count = school_df_1[(school_df_1["Math Score"]>= 70) & (school_df_1["Reading Score"]>= 70)].count()["Student Name"]
passing_both = (overall_count / total_student)*100

print(f'Percent of students passing both: {passing_both:,.2f}%')

Percent of students passing both: 65.17%


In [30]:
# Create a high-level snapshot of the district's key metrics in a DataFrame
list_df = pd.DataFrame({"School Count": [school_count],
                       "Total Student": [total_student],
                        "Total Budget": [total_budget.sum()],
                        "Average Math Score": [avg_math_score],
                        "Average Reading Score": [avg_reading_score],
                        "Students Passing Math": [passing_percent_math],
                        "Students Passing Reading": [passing_percent_read],
                        "Students Passing All": [passing_both]
                       })


# Formatting
list_df["Total Student"] = list_df["Total Student"].map("{:,}".format)
list_df["Total Budget"] = list_df["Total Budget"].map("${:,}".format)
list_df["Average Math Score"] = list_df["Average Math Score"].map("{:,.2f}%".format)
list_df["Average Reading Score"] = list_df["Average Reading Score"].map("{:,.2f}%".format)
list_df["Students Passing Math"] = list_df["Students Passing Math"].map("{:,.2f}%".format)
list_df["Students Passing Reading"] = list_df["Students Passing Reading"].map("{:,.2f}%".format)
list_df["Students Passing All"] = list_df["Students Passing All"].map("{:,.2f}%".format)


# Display the DataFrame
list_df

Unnamed: 0,School Count,Total Student,Total Budget,Average Math Score,Average Reading Score,Students Passing Math,Students Passing Reading,Students Passing All
0,15,39170,"$24,649,428",78.99%,81.88%,74.98%,85.81%,65.17%


In [None]:
##SCHOOL SUMMARY

In [66]:
#Rename columns
school_df_2 = school_data.rename(columns={"school_name": "School Name",
                                          "type": "Type", "size": "Size",
                                          "budget": "Budget"})
#School name
school_name = school_df_2["School Name"]


school_name

0         Huang High School
1      Figueroa High School
2       Shelton High School
3     Hernandez High School
4       Griffin High School
5        Wilson High School
6       Cabrera High School
7        Bailey High School
8        Holden High School
9          Pena High School
10       Wright High School
11    Rodriguez High School
12      Johnson High School
13         Ford High School
14       Thomas High School
Name: School Name, dtype: object

In [68]:
#School type
school_df_3 = school_df_2.set_index(["School Name"])
school_type = school_df_3["Type"]

school_type


School Name
Huang High School        District
Figueroa High School     District
Shelton High School       Charter
Hernandez High School    District
Griffin High School       Charter
Wilson High School        Charter
Cabrera High School       Charter
Bailey High School       District
Holden High School        Charter
Pena High School          Charter
Wright High School        Charter
Rodriguez High School    District
Johnson High School      District
Ford High School         District
Thomas High School        Charter
Name: Type, dtype: object

In [52]:
#Total students, Total school budget
school_df_3

Unnamed: 0_level_0,School ID,Type,Size,Budget
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Huang High School,0,District,2917,1910635
Figueroa High School,1,District,2949,1884411
Shelton High School,2,Charter,1761,1056600
Hernandez High School,3,District,4635,3022020
Griffin High School,4,Charter,1468,917500
Wilson High School,5,Charter,2283,1319574
Cabrera High School,6,Charter,1858,1081356
Bailey High School,7,District,4976,3124928
Holden High School,8,Charter,427,248087
Pena High School,9,Charter,962,585858


In [61]:
#Per student budget
per_student_budget = school_df_3["Budget"] / school_df_3["Size"]
school_df_3["Per Student Budget"] = per_student_budget

school_df_3.head()



Unnamed: 0_level_0,School ID,Type,Size,Budget,Per Student Budget
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Huang High School,0,District,2917,1910635,655.0
Figueroa High School,1,District,2949,1884411,639.0
Shelton High School,2,Charter,1761,1056600,600.0
Hernandez High School,3,District,4635,3022020,652.0
Griffin High School,4,Charter,1468,917500,625.0


In [69]:
#Average math score

#Average reading score

#% passing math (the percentage of students who passed math)

#% passing reading (the percentage of students who passed reading)

#% overall passing (the percentage of students who passed math AND reading)

KeyError: 'passing_percent_math'

In [None]:
#Highest-Performing Schools (by % Overall Passing)
#Sort the schools by % Overall Passing in descending order and display the top 5 rows.

#top_schools_df = 


#Save the results in a DataFrame called "top_schools".




#Lowest-Performing Schools (by % Overall Passing)
#Sort the schools by % Overall Passing in ascending order and display the top 5 rows.



#Savethe results in a DataFrame called "bottom_schools".



#Math Scores by Grade
#Perform the necessary calculations to create a DataFrame that lists the average math 
#score for students of each grade level (9th, 10th, 11th, 12th) at each school.


