PyCity School Analysis
February 8, 2019
Scott McEachern

In [5]:
#- Load Datasets into DataFrame
import os
import pandas as pd


# Schools DataFrame
schoolPath = os.path.join(".", "Resources", "schools_complete.csv")

school_df = pd.read_csv(schoolPath)


# Student DataFrame
studentPath = os.path.join(".", "Resources", "students_complete.csv")

student_df = pd.read_csv(studentPath)

In [6]:
#-- District Summary

#- Calculate Total Schools
totalNumSchools = school_df.shape[0]


#- Calculate Total Students
totalNumStudents = student_df.shape[0]
totalNumStudentsFormat = f"{'{:,}'.format(totalNumStudents)}"


#- Total School Budget
totalSchoolBudget = school_df["budget"].sum()
totalSchoolBudgetFormat = f"${'{:,.2f}'.format(totalSchoolBudget)}"


#- Average Math Score
aveMathScore = student_df["math_score"].mean()
aveMathScoreFormat = f"{'{:,.6f}'.format(aveMathScore)}"


#- Average Reading Score
aveReadingScore = student_df["reading_score"].mean()
aveReadingScoreFormat = f"{'{:,.6f}'.format(aveReadingScore)}"


#- Overall Passing Rate
overallAveScore = (aveMathScore + aveReadingScore) / 2
overallAveScoreFormat = f"{'{:,.6f}'.format(overallAveScore)}"


#- Percentage students with passing math score 
totalStudentsPassingMath = student_df[student_df['math_score'] >= 70].shape[0]
percentStudentsPassingMath = (totalStudentsPassingMath / totalNumStudents) * 100

percentStudentsPassingMathFormat = f"{'{:.6f}'.format(percentStudentsPassingMath)}"


#- Percentage students with passing reading score
totalStudentsPassingReading = student_df[student_df['reading_score'] >= 70].shape[0]
percentStudentsPassingReading = (totalStudentsPassingReading / totalNumStudents) * 100

percentStudentsPassingReadingFormat = f"{'{:.6f}'.format(percentStudentsPassingReading)}"


# Result Dataframe
districtSummaryResults = {
    'Total Schools': [totalNumSchools],
    'Total Students': [totalNumStudentsFormat],
    'Total Budget': [totalSchoolBudgetFormat],
    'Average Match Score': [aveMathScoreFormat],
    'Average Reading Score': [aveReadingScoreFormat],
    '% Passing Math': [percentStudentsPassingMathFormat],
    '% Passing Reading': [percentStudentsPassingReadingFormat],
    '% Overall Passing Rate': [overallAveScoreFormat]
                }

districtSummary_df = pd.DataFrame(districtSummaryResults)

#print(percentStudentsPassingMatchFormat)

districtSummary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Match Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,80.431606


In [21]:
school_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [31]:
#-- School Summary



#- Merge into single dataframe
studentWithSchool_df = pd.merge(student_df, school_df, how="left", on=["school_name", "school_name"])


#- Group by the school name
studentGroupBySchool_dfGroupBy = studentWithSchool_df.groupby('school_name')


#- Create Dictionary to store summary information
schoolSummaryData = {
    'School Name': [],
    'School Type': [],
    'Total Students': [],
    'Total School Budget': [],
    'Per Student Budget': [],
    'Average Math Score': [],
    'Average Reading Score': [],
    '% Passing Math': [],
    '% Passing Reading': [],
    '% Overall Passing Rate': []
    }


#- Calculate Summary for each school
#  Loop through the groupby and calculate the summary for each school
for schoolName, groupedSchool_df in studentGroupBySchool_dfGroupBy:
    
    # Set School Name
    schoolSummaryData['School Name'].append(schoolName)
    
    
    # Set School Type
    schoolSummaryData['School Type'].append(groupedSchool_df.iloc[0]['type'] )
    
    
    # Calculate Total Students
    schoolStudentTotal = groupedSchool_df.shape[0]
    schoolSummaryData['Total Students'].append(schoolStudentTotal)

    
    # Total School Budget
    schoolBudget = groupedSchool_df.iloc[0]['budget']
    schoolSummaryData['Total School Budget'].append(schoolBudget)
#TODO - format that school budget before adding to data frame


    # Average Student Budget
    perStudentBudget = (schoolBudget/studentsAtSchool)
    schoolSummaryData['Per Student Budget'].append(perStudentBudget)
    
    
    # Average Math Score
    schoolAveMathScore = groupedSchool_df['math_score'].mean()
    schoolSummaryData['Average Math Score'].append(schoolAveMathScore)
    
    
    # Average Reading Score
    schoolAveReadingScore = groupedSchool_df['reading_score'].mean()
    schoolSummaryData['Average Reading Score'].append(schoolAveReadingScore)                                                            

    
    # Percent Passing Math
    schoolStudentsPassingMath = groupedSchool_df[groupedSchool_df['math_score'] >= 70].shape[0]
    schoolPercentPassingMatch = (schoolStudentsPassingMath / schoolStudentTotal) * 100
    
    schoolSummaryData['% Passing Math'].append(schoolPercentPassingMatch)
    
    
    # Percent Passing Reading
    schoolStudentsPassingReading = groupedSchool_df[groupedSchool_df['reading_score'] >= 70].shape[0]
    schoolPercentPassingReading = (schoolStudentsPassingReading / schoolStudentTotal) * 100
    
    schoolSummaryData['% Passing Reading'].append(schoolPercentPassingReading)
    
    
    # Overall Passing Rate
    schoolOverallPassingRate = (schoolPercentPassingMatch + schoolPercentPassingReading) / 2
    
    schoolSummaryData['% Overall Passing Rate'].append(schoolOverallPassingRate)
    
    
#- Create DataFrame of results
schoolSummary_df = pd.DataFrame(schoolSummaryData)

schoolSummary_df.head()

#Summary_df.head()
#TEST
#print(schoolNames)
#print(schoolTotalStudents)

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,Bailey High School,District,4976,3124928,1736.071111,77.048432,81.033963,66.680064,81.93328,74.306672
1,Cabrera High School,Charter,1858,1081356,600.753333,83.061895,83.97578,94.133477,97.039828,95.586652
2,Figueroa High School,District,2949,1884411,1046.895,76.711767,81.15802,65.988471,80.739234,73.363852
3,Ford High School,District,2739,1763916,979.953333,77.102592,80.746258,68.309602,79.299014,73.804308
4,Griffin High School,Charter,1468,917500,509.722222,83.351499,83.816757,93.392371,97.138965,95.265668
