In [1]:
# Option 2: Academy of Py
# Analyze the district-wide standardized test results
# Aggregate the data to and showcase 
#  obvious trends in school performance.


# Import pandas, numpy and the csv files
import pandas as pd
import numpy as np

schools_file = "schools_complete.csv"
student_file = "students_complete.csv"

school_data = pd.read_csv(schools_file, encoding="ISO-8859-1")
student_data = pd.read_csv(student_file, encoding="ISO-8859-1")

school_student_data = pd.merge(student_data, school_data, how="left", 
                                on=["school_name", "school_name"])


In [2]:
# District summary of key metrics

# Total schools count is how many rows are in school_data df
total_schools = school_data['school_name'].count()

# Total students count is how many rows are in student_data df
total_students = student_data['student_name'].count()

# Find sum of budget by adding 'budget' columns together
total_budget = school_data['budget'].sum()

# Create shortened variables for math and reading score columns
math_score = school_student_data['math_score']
reading_score = school_student_data['reading_score']

# Find average of math and reading scores
average_math = math_score.mean()
average_reading = reading_score.mean()

# % of Students with math score greater or equal to 70
more_70_math = math_score[math_score>=70].count()
pass_math_per = (more_70_math / total_students)*100

# % of Students with reading score greater or equal to 70

more_70_reading = reading_score[reading_score>=70].count()
pass_reading_per = (more_70_reading / total_students)*100

# Overall passing rate (math and reading passing rates)
overall_pass = (pass_math_per + pass_reading_per)/2

# Creating dictionary of new calculations
district_summary = [{"Total Schools": total_schools, 
                     "Total Students": total_students, 
                     "Total Budget":total_budget, 
                     "Average Math Score": average_math,
                     "Average Reading Score": average_reading, 
                     "% Passing Math": pass_math_per, 
                     "% Passing Reading": pass_reading_per,
                     "% Overall Passing Rate": overall_pass}]

# Creating dataframe based on district_summary dictionary
district_df = pd.DataFrame(district_summary)
district_df = district_df[["Total Schools", "Total Students","Total Budget",
                           "Average Math Score", "Average Reading Score", 
                           "% Passing Math", "% Passing Reading", 
                           "% Overall Passing Rate"]]


In [5]:
# School summary of key metrics - each school

# Drop columns not necessary for summary of each school
schools_sum = school_student_data.drop(['Student ID', 'gender', 'grade', 
                                           'School ID', 'student_name'], axis=1, 
                                          inplace=False)

# Rename headers in preparation to reflect summaries and improve formatting
schools_sum = schools_sum.rename(columns={"size": "Total Students", 
                                          "math_score": "Average Math Score", 
                                          "reading_score": "Average Reading Score", 
                                          "type": "School Type", 
                                          "budget":"Total School Budget"})

# Create new column to track per student budget 
schools_sum['Per Student Budget'] = schools_sum['Total School Budget'] / schools_sum['Total Students']

# Sort out passing grades (grade >=70) to reflect only passing grades
schools_sum['% Passing Reading'] = schools_sum['Average Reading Score'][schools_sum['Average Reading Score']>=70]
schools_sum['% Passing Math'] = schools_sum['Average Math Score'][schools_sum['Average Math Score']>=70]

######

# Group results of schools_sum dataframe by 'school names'
schools_grpd = schools_sum.groupby(['school_name'])

# Separate grouped data frame to get average scores for reading and math
schools_grpd_mean = schools_grpd.mean()

# Separate groupded data frame to tally passing grades for reading and math and total students
schools_grpd_count = schools_grpd.count()

# Separate grouped data frame to preserve the value of the type of school
schools_grpd_first = schools_grpd.first()

######
# Find the percentages of students who passed math and reading per school
schools_grpd_count['% Passing Reading'] = (schools_grpd_count['% Passing Reading'] / schools_grpd_count['Total Students'])*100
schools_grpd_count['% Passing Math'] = (schools_grpd_count['% Passing Math'] / schools_grpd_count['Total Students'])*100

#####
# Assembling all the summary information per school in one dataframe

# Use 'Total Student' counts
schools_full_summary = schools_grpd_count

# Use average scores in math and reading
schools_full_summary['Average Reading Score'] = schools_grpd_mean['Average Reading Score']
schools_full_summary['Average Math Score'] = schools_grpd_mean['Average Math Score']

# Use 'Per Student Budget', 'Type', and 'Total Budget' values per school 
schools_full_summary['Per Student Budget'] = schools_grpd_first['Per Student Budget']
schools_full_summary['School Type'] = schools_grpd_first['School Type']
schools_full_summary['Total School Budget'] = schools_grpd_first['Total School Budget']

# Calculate 'Overall Passing Rate' averaging reading and math passing rates
schools_full_summary['% Overall Passing Rate'] = (schools_full_summary['% Passing Reading'] + schools_full_summary['% Passing Math']) / 2

# Reorganize columns
full_school_summary_org = schools_full_summary[["School Type", "Total Students", "Total School Budget", 
                                               "Per Student Budget", "Average Math Score", "Average Reading Score", 
                                               "% Passing Math", "% Passing Reading", "% Overall Passing Rate"]]


In [None]:
# Bottom 5 performing schools (by overall passing rate)
# Same columns to include as above

In [None]:
# Top 5 performing schools (by overall passing rate)
# Same columns to include as above

In [None]:
# Average math scores by grade for each grade level (9th-12th)
# Average reading scores by grade for each grade level (9th-12th)

In [None]:
# Scores by school spending
# School perfomances based on average spending ranges, per student
# Use 4 bins to group school spending
# Include in table: "Average Math Score, Average Reading Score, % Passing Math, % Passing Reading, Overall Passing Rate (Average of the above two)"


In [None]:
# Scores by school size
# Same as above, except group by approximation of school size
# Small, medium, large


In [None]:
# Scores by school type
# Same as above, except gropuby school type (Charter, District)