# PyCitySchools Analysis
- Analysis goes here
- Second Point
---

In [15]:
#import dependencies
import pandas as pd
from pathlib import Path

#load files
loadSchoolData = Path("../Resources/schools_complete.csv")
loadStudentData = Path("../Resources/students_complete.csv")

#read files and store as pandas dataframes
schoolData = pd.read_csv(loadSchoolData)
studentData = pd.read_csv(loadStudentData)

#merge data
testDatadf = pd.merge(studentData, schoolData, how="left", on=["school_name", "school_name"])
testDatadf.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


# District Summary
---

In [16]:
#calculate the total number of unique schools
schoolCount = len(pd.unique(testDatadf["school_name"]))
schoolCount

15

In [17]:
#calculate the total number of students by ID
#using ID because some students have the same name
studentCount = len(pd.unique(testDatadf["Student ID"]))
studentCount

39170

In [18]:
#calculate the total budget
totalBudget = schoolData["budget"].sum()
totalBudget

24649428

In [19]:
#calculate the average (mean) math score
avgMathScore = testDatadf["math_score"].mean()
avgMathScore

78.98537145774827

In [20]:
#calculate the average (mean) reading score
avgReadScore = testDatadf["reading_score"].mean()
avgReadScore

81.87784018381414

In [21]:
#calculate the percentage of students who passed math 
#(math scores greather than or equal to 70)
passMathCount = testDatadf[(testDatadf["math_score"] >= 70)].count()["Student ID"]
passMathPercent = passMathCount/float(studentCount) * 100
passMathPercent

74.9808526933878

In [22]:
#calculate the percentage of students who passed reading 
#(reading scores greather than or equal to 70)
passReadCount = testDatadf[(testDatadf["reading_score"] >= 70)].count()["Student ID"]
passReadPercent = passReadCount/float(studentCount) * 100
passReadPercent

85.80546336482001

In [23]:
#calculate the percentage of students who passed both
passTestsCount = testDatadf[(testDatadf["math_score"] >= 70) & (testDatadf["reading_score"] >= 70)].count()["Student ID"]
passTestsPercent = passTestsCount/float(studentCount) * 100
passTestsPercent

65.17232575950983

In [24]:
#create a high level snapshot of the districts key metrics in a data frame

districtSummary = pd.DataFrame({"Total Number of Unique Schools": [schoolCount], 
                                "Total Students": [studentCount],
                                "Total Budget": [totalBudget],
                                "Average Math Score": [avgMathScore],
                                "Average Reading Score": [avgReadScore],
                                "% Passing Math": [passMathPercent],
                                "% Passing Reading": [passReadPercent],
                                "% Overall Passing": [passTestsPercent]})

#format district summary
districtSummary["Total Students"] = districtSummary["Total Students"].map("{:,}".format)
districtSummary["Total Budget"] = districtSummary["Total Budget"].map("${:,.2f}".format)
districtSummary["Average Math Score"] = districtSummary["Average Math Score"].map("{:,.2f}".format)
districtSummary["Average Reading Score"] = districtSummary["Average Reading Score"].map("{:,.2f}".format)
districtSummary["% Passing Math"] = districtSummary["% Passing Math"].map("{:,.2f}%".format)
districtSummary["% Passing Reading"] = districtSummary["% Passing Reading"].map("{:,.2f}%".format)
districtSummary["% Overall Passing"] = districtSummary["% Overall Passing"].map("{:,.2f}%".format)


districtSummary

Unnamed: 0,Total Number of Unique Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,65.17%
