# PyCity Schools Analysis
### Sullivan Shave

In [1]:
import pandas as pd
from pathlib import Path

### Load csv file from resources

In [2]:
school_data = pd.read_csv("schools_complete.csv")
student_data = pd.read_csv("students_complete.csv")

#### School Data (df1)

In [3]:
school_data.head(5)

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [4]:
school_data.shape

(15, 5)

In [5]:
df1 = pd.DataFrame(school_data)

In [6]:
df1.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


#### Student Data (df2)

In [7]:
student_data.head(5)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [8]:
student_data.shape

(39170, 7)

In [9]:
df2 = pd.DataFrame(student_data)

In [10]:
df2.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


#### Student_and_School_Data (df)

In [11]:
student_and_school = pd.merge(student_data, school_data, how = "left", on = ["school_name", "school_name"])

In [12]:
student_and_school.head(2)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635


In [13]:
df = pd.DataFrame(student_and_school)

In [14]:
df.head(5)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [44]:
# number of schools in the district 

school_count = df["school_name"].nunique()
school_count

15

In [45]:
# number of students in the district

student_count = len(df["Student ID"])
student_count

39170

In [54]:
# total budget among the schools in the district

budget_per_school = df.drop_duplicates(subset=["budget"])
#len(budget_per_school)
total_budget = budget_per_school["budget"].sum()
total_budget

24649428

In [64]:
# average math score in the district (rounded to 2 decimal places)

average_math_score = round(df["math_score"].mean(),2)
average_math_score


78.99

In [65]:
# average reading score in the district (rounded to 2 decimal places)

average_reading_score = round(df["reading_score"].mean(),2)
average_reading_score

81.88

In [71]:
# percentage of students who received a passing grade in math 

students_math_passed = df[(df["math_score"] >= 70)].count()["student_name"]
students_math_passed_percent = round(students_math_passed / float(student_count) * 100, 2)
print(str(students_math_passed_percent) + "%")

74.98%


In [72]:
# percentage of students who received a passing grade in reading

students_reading_passed = df[(df["reading_score"] >= 70)].count()["student_name"]
students_reading_passed_percent = round(students_reading_passed / float(student_count) * 100, 2)
print(str(students_reading_passed_percent) + "%")


85.81%


In [78]:
# percent of students who passed both math AND reading (total percent of students passing)

number_of_students_passing = df[(df["math_score"]>= 70) & \
                                (df["reading_score"] >= 70)].count()["student_name"]
percent_passing_students = round(number_of_students_passing / float(student_count) * 100, 2)
print(str(percent_passing_students) + "%")

65.17%


In [84]:
summary = [{"Total Schools": school_count,\
            "Total Students": student_count,\
            "Total Budget": total_budget,\
            "Average Math Score": average_math_score,\
           "Average Reading Score": average_reading_score,\
           "% Passing Math": students_math_passed_percent,\
           "% Passing English": students_reading_passed_percent,\
           "% Overall Passing": percent_passing_students}]

dfS = pd.DataFrame(summary)
dfS

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing English,% Overall Passing
0,15,39170,24649428,78.99,81.88,74.98,85.81,65.17


## School Summary