# PyCity Schools Analysis


In [2]:
# import and read files/merge files
import pandas as pd
from pathlib import Path

school_data_raw = Path("schools_complete.csv")
student_data_raw = Path("students_complete.csv")

school_data = pd.read_csv(school_data_raw)
student_data = pd.read_csv(student_data_raw)

school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [3]:
# unique schools
schools_unique = school_data_complete['school_name'].nunique()
schools_unique


15

In [4]:
# total students
total_students = school_data_complete['student_name'].count()
total_students

39170

In [5]:
# total budget
budget_total = school_data_complete['budget'].unique().sum()

budget_total

24649428

In [6]:
# average math score
math_average = school_data_complete['math_score'].mean()
math_average

78.98537145774827

In [7]:
# average reading score
reading_average = school_data_complete['reading_score'].mean()
reading_average

81.87784018381414

In [8]:
# % passing math
passing_math_count = school_data_complete[(school_data_complete['math_score'] >= 70)].count()['student_name']
passing_math_percentage = passing_math_count / float(total_students) * 100
passing_math_percentage

74.9808526933878

In [9]:
# % passing reading
passing_reading_count = school_data_complete[(school_data_complete['reading_score'] >= 70)].count()['student_name']
passing_reading_percentage = passing_reading_count / float(total_students) * 100
passing_reading_percentage

85.80546336482001

In [10]:
# % overall passing
passing_overall_count = school_data_complete[
    (school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70)
].count()["student_name"]
overall_passing_rate = passing_overall_count /  float(total_students) * 100
overall_passing_rate

65.17232575950983

In [11]:
# creating DataFrame
district_create = {'Total Schools': [schools_unique], 'Total Students': [total_students], 
                'Total Budget': [budget_total], 'Average Math Score':[math_average], 
                'Average Reading Score':[reading_average], '% Passing Math':[passing_math_percentage], 
                '% Passing Reading':[passing_reading_percentage],'% Overall Passing':[overall_passing_rate]}
district_summary=pd.DataFrame(district_create)

# formatting
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)
district_summary["Average Math Score"] = district_summary["Average Math Score"].map("{:,.2f}".format)
district_summary["Average Reading Score"] = district_summary["Average Reading Score"].map("{:,.2f}".format)
district_summary["% Passing Math"] = district_summary["% Passing Math"].map("{:,.2f}%".format)
district_summary["% Passing Reading"] = district_summary["% Passing Reading"].map("{:,.2f}%".format)
district_summary["% Overall Passing"] = district_summary["% Overall Passing"].map("{:,.2f}%".format)

district_summary


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,65.17%


## School Summary

In [18]:
# School Name
school_name = school_data_complete['school_name'].unique()
school_name

array(['Huang High School', 'Figueroa High School', 'Shelton High School',
       'Hernandez High School', 'Griffin High School',
       'Wilson High School', 'Cabrera High School', 'Bailey High School',
       'Holden High School', 'Pena High School', 'Wright High School',
       'Rodriguez High School', 'Johnson High School', 'Ford High School',
       'Thomas High School'], dtype=object)