In [65]:
# import depdencies
import pandas as pd

In [66]:
# reference file paths
schools_path = "/Users/micahvandersteen/Desktop/pandas-challenge/Resources/PyCitySchools_Resources_schools_complete.csv"
students_path = "/Users/micahvandersteen/Desktop/pandas-challenge/Resources/04-Pandas_Homework_PyCitySchools_Resources_students_complete.csv"

In [67]:
# read files into dataframes using pandas
schools_df = pd.read_csv(schools_path)
students_df = pd.read_csv(students_path)

In [68]:
# renaming columns of the schools data frame
schools_df_clean = schools_df.rename( columns = {"School ID" : "School ID",
                            "school_name" : "School Name",
                            "type" : "School Type",
                            "size" : "School Size",
                            "budget" : "Budget"})

In [69]:
# cleaning up column names of the students data frame
students_df_clean = students_df.rename( columns = {"Student ID" : "Student ID",
                              "student_name" : "Student Name",
                              "gender" : "Gender",
                              "grade" : "Grade",
                              "school_name" : "School Name",
                              "reading_score" : "Reading Score",
                              "math_score" : "Math Score"})

In [70]:
# merge the cleaned data frames
school_data_complete = pd.merge(schools_df_clean, students_df_clean, how = 'left', on = ["School Name", "School Name"])


In [71]:
# from schools_df_clean: we can get total schools, total budget, total students, average math/reading score
total_schools = school_data_complete["School Name"].nunique()
total_budget = school_data_complete["Budget"].unique().sum()
total_students = len(school_data_complete)
average_math_score = school_data_complete["Math Score"].mean()
average_reading_score = school_data_complete["Reading Score"].mean()

print(f"Total Budget: {'${:,.2f}'.format(total_budget)}")
print(f"Total Schools: {total_schools}")
print(f"Total Students: {total_students}")
print(f"The average math score: {round(average_math_score,2)}")
print(f"The average reading score: {round(average_reading_score,2)}")

Total Budget: $24,649,428.00
Total Schools: 15
Total Students: 39170
The average math score: 78.99
The average reading score: 81.88


In [72]:
# finding Percent Passing Math
#locates all students with a math score greater than or equal to 70
students_passing_math = school_data_complete.loc[school_data_complete["Math Score"] >= 70,:]

percent_passing_math = len(students_passing_math)/total_students * 100

print(f"percent passing math: {round(percent_passing_math,2)}")

percent passing math: 74.98


In [73]:
# finding percent passing reading
# locates all students with a reading score greater than or equal to 70
students_passing_reading = school_data_complete.loc[school_data_complete["Reading Score"] >= 70,:]

percent_passing_reading = len(students_passing_reading)/total_students * 100

print(f"percent passing reading: {round(percent_passing_reading,2)}")

percent passing reading: 85.81


In [74]:
# Overall Passing Rate (Average of the above two)
overall_passing_rate = ( percent_passing_math + percent_passing_reading ) / 2

print(f"Passing rate overall: {round(overall_passing_rate,2)}")

Passing rate overall: 80.39


In [75]:
# creating dictionary frame with district summary results 
district_summary_dict = {"Total Budget" : ['${:,.2f}'.format(total_budget)],
                        "Total Schools" : [total_schools],
                        "Total Students" : [total_students],
                        "Average Math Score" : [round( average_math_score, 3 )],
                        "Percent Passing Math" : [f"{round( percent_passing_math, 3 )}%"],
                        "Average Reading Score" : [round( average_reading_score, 3 )],
                        "Percent Passing Reading" : [f"{round( percent_passing_reading, 3 )}%"],
                        "Overall Passing Rate" : [f"{round( overall_passing_rate, 3 )}%"]
                        }

In [76]:
# creating data frame of district summary 
district_summary_df = pd.DataFrame(district_summary_dict)

# shows district summary results
district_summary_df

Unnamed: 0,Total Budget,Total Schools,Total Students,Average Math Score,Percent Passing Math,Average Reading Score,Percent Passing Reading,Overall Passing Rate
0,"$24,649,428.00",15,39170,78.985,74.981%,81.878,85.805%,80.393%


In [77]:
# Starting school summary for each school... dictionaryname.update( {key:value})
#list of the district's School Name's 
school_names = list(school_data_complete["School Name"].unique())
#print(f"The schools in the district are: {school_names}")

# initalize grand dictionary for all school summaries to go into
school_summary_dict = {"School Type" : [],
                      "Total Students" : [],
                      "Budget" : [],
                      "Budget per Student" : [],
                      "Average Math Score" : [],
                      "Average Reading Score" : [],
                      "Percent Passing Math" : [],
                      "Percent Passing Reading" : [],
                      "Overall Passing Rate" : []}   

In [78]:
# Finding School Type
# group the schools dataframe by school type, and index by school to obtain list of schools with each type of school
school_type_list = schools_df_clean.set_index(["School Name"])["School Type"]


In [79]:
# Finding Total Students in Each School
# group by school name to find sum of students in each school
school_size_list = schools_df_clean.set_index(["School Name"])["School Size"]


In [80]:
# Finding Total School Budget for each School
# set index as school name, and get budgets for each school
school_budget_list = schools_df_clean.set_index(["School Name"])["Budget"]


In [81]:
# Finding Budget Per Student For Each School
school_budget_per_student_list = school_budget_list / school_size_list


In [82]:
# Mean Math Scores per School
# Finding Average Math Score for Each School, switch to working with the cleaned complete dataframe
# group data by school name
grouped_school_data = school_data_complete.groupby("School Name")

# find mean of each school
mean_math_score_list = round(grouped_school_data["Math Score"].mean() , 3)


In [83]:
# Average Reading Score
# find mean reading score for each school
mean_reading_score_list = round(grouped_school_data["Reading Score"].mean() , 3)


In [84]:
# % Passing Math
# find percent of students passing math for each school
# first locate students passing
students_passing_math_in_school = school_data_complete.loc[school_data_complete["Math Score"] >= 70, :]

# group those passing math by school name and count the students each school has passing math
students_passing_math_per_school = students_passing_math_in_school.groupby("School Name").count()["Student Name"]

# find percentage passing 
percentage_passing_math_list = round (students_passing_math_per_school / school_size_list * 100, 3)

In [85]:
# % Passing Reading
# locate students passing math
students_passing_reading_in_school = school_data_complete.loc[school_data_complete["Reading Score"] >= 70, :]

# group those passing reading by school name and count total for each school
students_passing_reading_per_school = students_passing_reading_in_school.groupby("School Name").count()["Student Name"]

# find percentage passing reading
percentage_passing_reading_list = round (students_passing_reading_per_school / school_size_list * 100, 3)

In [86]:
# Overall Passing Rate (Average of percent passing math and percent passing reading)
overall_passing_rate_list = (percentage_passing_reading_list + percentage_passing_math_list)/2


In [96]:
# create dataframe for school summary list
school_summary_df = pd.DataFrame({"School Type" : school_type_list,
                      "Total Students" : school_size_list,
                      "Budget" : school_budget_list,
                      "Budget per Student" : school_budget_per_student_list,
                      "Average Math Score" : mean_math_score_list,
                      "Average Reading Score" : mean_reading_score_list,
                      "Percent Passing Math" : percentage_passing_math_list,
                      "Percent Passing Reading" : percentage_passing_reading_list,
                      "Overall Passing Rate" : overall_passing_rate_list})

school_summary_df

Unnamed: 0,School Type,Total Students,Budget,Budget per Student,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Overall Passing Rate
Bailey High School,District,4976,3124928,628.0,77.048,81.034,66.68,81.933,74.3065
Cabrera High School,Charter,1858,1081356,582.0,83.062,83.976,94.133,97.04,95.5865
Figueroa High School,District,2949,1884411,639.0,76.712,81.158,65.988,80.739,73.3635
Ford High School,District,2739,1763916,644.0,77.103,80.746,68.31,79.299,73.8045
Griffin High School,Charter,1468,917500,625.0,83.351,83.817,93.392,97.139,95.2655
Hernandez High School,District,4635,3022020,652.0,77.29,80.934,66.753,80.863,73.808
Holden High School,Charter,427,248087,581.0,83.803,83.815,92.506,96.253,94.3795
Huang High School,District,2917,1910635,655.0,76.629,81.183,65.684,81.316,73.5
Johnson High School,District,4761,3094650,650.0,77.072,80.966,66.058,81.222,73.64
Pena High School,Charter,962,585858,609.0,83.84,84.045,94.595,95.946,95.2705


In [93]:
# Find top five schools by overall passing rate
top_five_schools = school_summary_df.sort_values(by = ['Overall Passing Rate'], ascending = False).head(5)

top_five_schools

Unnamed: 0,School Type,Total Students,Budget,Budget per Student,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Overall Passing Rate
Cabrera High School,Charter,1858,1081356,582.0,83.062,83.976,94.133,97.04,95.5865
Thomas High School,Charter,1635,1043130,638.0,83.418,83.849,93.272,97.309,95.2905
Pena High School,Charter,962,585858,609.0,83.84,84.045,94.595,95.946,95.2705
Griffin High School,Charter,1468,917500,625.0,83.351,83.817,93.392,97.139,95.2655
Wilson High School,Charter,2283,1319574,578.0,83.274,83.989,93.868,96.54,95.204


In [95]:
# find bottom five schools by overall passing rate
bottom_five_schools = school_summary_df.sort_values(['Overall Passing Rate']).head(5)

bottom_five_schools

Unnamed: 0,School Type,Total Students,Budget,Budget per Student,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Overall Passing Rate
Rodriguez High School,District,3999,2547363,637.0,76.843,80.745,66.367,80.22,73.2935
Figueroa High School,District,2949,1884411,639.0,76.712,81.158,65.988,80.739,73.3635
Huang High School,District,2917,1910635,655.0,76.629,81.183,65.684,81.316,73.5
Johnson High School,District,4761,3094650,650.0,77.072,80.966,66.058,81.222,73.64
Ford High School,District,2739,1763916,644.0,77.103,80.746,68.31,79.299,73.8045
