In [483]:
#import dependencies
import pandas as pd

In [484]:
#create file paths
school_file = "raw_data/schools_complete.csv"
students_file = "raw_data/students_complete.csv"

In [485]:
#read data into a pandas dataset
schools_df = pd.read_csv(school_file)
students_df = pd.read_csv(students_file)

In [486]:
#start district summary by finding the total number of schools
total_schools = schools_df["name"].count()
total_schools

15

In [487]:
#district summary--find the total number of students in the district
total_students = students_df["name"].count()
total_students

39170

In [488]:
#district analysis -- find total budget
total_budget = schools_df["budget"].sum()
total_budget

24649428

In [489]:
#district analysis -- calculate the average scores for reading and math 
district_average_reading = students_df["reading_score"].mean()
print(district_average_reading)
district_average_math = students_df["math_score"].mean()
print(district_average_math)

81.87784018381414
78.98537145774827


In [490]:
#calculate the total passing in math
pass_count_math = students_df[students_df["math_score"] > 69].count()
pass_count_math = pass_count_math["math_score"]
pass_count_math

29370

In [491]:
#calculate the total passing in reading
pass_count_reading = students_df[students_df["reading_score"] > 69].count()
pass_count_reading = pass_count_reading["reading_score"]
pass_count_reading

33610

In [492]:
#districct analysis -- calculate the percentage passing in reading and math
percent_passing_reading = pass_count_reading / total_students
percent_passing_math = pass_count_math / total_students
print ('{:.2%}'.format(percent_passing_math))
print('{:.2%}'.format(percent_passing_reading))

74.98%
85.81%


In [493]:
#calculate the overall passing rate
overall_pass = (percent_passing_math + percent_passing_reading) / 2
'{:.2%}'.format(overall_pass)

'80.39%'

In [494]:
#create a summary table of the district's key metrics
district_summary_table = pd.DataFrame(
    {
        "Total Schools": '{:,.0f}'.format(total_schools),
        "Total Students": '{:,.0f}'.format(total_students),
        "Total Budget": '${:,.2f}'.format(total_budget),                              
        "Average Math Score": '{:,.2f}'.format(district_average_math),
        "Average Reading Score": '{:,.2f}'.format(district_average_reading),
        "% Passing Math":'{:.2%}'.format(percent_passing_math),
        "% Passing Reading":'{:.2%}'.format(percent_passing_reading),
        "Overall Passing Rate": '{:.2%}'.format(overall_pass)
    }, index=[0])
district_summary_table

Unnamed: 0,% Passing Math,% Passing Reading,Average Math Score,Average Reading Score,Overall Passing Rate,Total Budget,Total Schools,Total Students
0,74.98%,85.81%,78.99,81.88,80.39%,"$24,649,428.00",15,39170


In [428]:
#District Summary complete
#Begin analysis of the individual schools
#build upon the schools_df by adding the summary columns from the students_df

In [429]:
#review the beginning of the dataset to familiarize
students_df = students_df.rename(columns={"name": "Student Name", "school": "School Name"})
students_df.head()

Unnamed: 0,Student ID,Student Name,gender,grade,School Name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [430]:
#review the beginning of the dataset to familiarize
schools_df.head()

Unnamed: 0,School ID,name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [431]:
#rename the "size" in the table to total students
schools_summary_table = schools_df.rename(columns={"name": "School Name", 'type': "School Type", 'size': 'Total Students', 'budget': "Total Budget"})

In [432]:
#calculate the budge per student and append a column to the summary table
schools_summary_table["Per Student Budget"] = schools_summary_table["Total Budget"] / schools_summary_table["Total Students"]

In [433]:
#calculate the average scores for each school
#group the student_df by school and check the .mean() method
#test--create new dataframe from groupby object
df = pd.DataFrame(students_df.groupby("School Name").mean())
df = df.reset_index()

In [434]:
schools_summary_table = pd.merge(schools_summary_table, df, on="School Name")
schools_summary_table = schools_summary_table.rename(columns={"reading_score": "Average Reading Score", "math_score": "Average Math Score"})

In [435]:
#Calculate the number of students who passed reading (for calculating the percent pass later)
df2 = students_df[["School Name", "reading_score"]]
df2 = df2.loc[df2["reading_score"] > 69]
df2 = df2.groupby("School Name").count()
df2 = df2.rename(columns={"reading_score": "# of Pass Reading"})
df2 = df2.reset_index()

In [436]:
#Calculate the number of students who passed math (for calculating the percent pass later)
df3 = students_df[["School Name", "math_score"]]
df3 = df3.loc[df3["math_score"] > 69]
df3 = df3.groupby("School Name").count()
df3 = df3.rename(columns={"math_score": "# of Pass Math"})
df3 = df3.reset_index()

In [437]:
#merge the #'s passed dataframes with the summary df in order to perform calculations
schools_summary_table = pd.merge(schools_summary_table, df2, on="School Name")
schools_summary_table = pd.merge(schools_summary_table, df3, on="School Name")

In [438]:
#Calculate the passing % for math and reading using the columns in the dataframe and add them as new columns
schools_summary_table["% Passing Reading"] = schools_summary_table["# of Pass Reading"] / schools_summary_table["Total Students"]
schools_summary_table["% Passing Math"] = schools_summary_table["# of Pass Math"] / schools_summary_table["Total Students"]
schools_summary_table["Overall Passing Rate"] = (schools_summary_table["% Passing Reading"] + schools_summary_table["% Passing Math"]) / 2

In [439]:
#drop the unnecessary columns from the summary dataframe
schools_summary_table = schools_summary_table.drop(["School ID", "# of Pass Reading", "# of Pass Math", "Student ID"], axis=1)
schools_summary_table.head()

Unnamed: 0,School ID,School Name,School Type,Total Students,Total Budget,Per Student Budget,Average Reading Score,Average Math Score,% Passing Reading,% Passing Math,Overall Passing Rate
0,0,Huang High School,District,2917,1910635,655.0,81.182722,76.629414,0.813164,0.656839,0.735002
1,1,Figueroa High School,District,2949,1884411,639.0,81.15802,76.711767,0.807392,0.659885,0.733639
2,2,Shelton High School,Charter,1761,1056600,600.0,83.725724,83.359455,0.958546,0.938671,0.948609
3,3,Hernandez High School,District,4635,3022020,652.0,80.934412,77.289752,0.80863,0.66753,0.73808
4,4,Griffin High School,Charter,1468,917500,625.0,83.816757,83.351499,0.97139,0.933924,0.952657


In [441]:
#apply formatting to the columns in the df
format_percent = "{0:.2f}%".format
format_round2 =  '{:,.2f}'.format
format_round0 = '{:,.0f}'.format
format_currency = '${:,.2f}'.format
schools_summary_table[['% Passing Reading','% Passing Math', 'Overall Passing Rate']] = schools_summary_table[['% Passing Reading','% Passing Math', 'Overall Passing Rate']] * 100
schools_summary_table[['Total Budget','Per Student Budget']] = schools_summary_table[['Total Budget','Per Student Budget']].applymap(format_currency)
schools_summary_table[['Average Reading Score','Average Math Score']] = schools_summary_table[['Average Reading Score','Average Math Score']].applymap(format_round2)
schools_summary_table[['% Passing Reading','% Passing Math', 'Overall Passing Rate']] = schools_summary_table[['% Passing Reading','% Passing Math', 'Overall Passing Rate']].applymap(format_percent)
schools_summary_table[['Total Students']] = schools_summary_table[['Total Students']].applymap(format_round0)

In [445]:
schools_summary_table.head()

Unnamed: 0,School ID,School Name,School Type,Total Students,Total Budget,Per Student Budget,Average Reading Score,Average Math Score,% Passing Reading,% Passing Math,Overall Passing Rate
0,0,Huang High School,District,2917,"$1,910,635.00",$655.00,81.18,76.63,81.32%,65.68%,73.50%
1,1,Figueroa High School,District,2949,"$1,884,411.00",$639.00,81.16,76.71,80.74%,65.99%,73.36%
2,2,Shelton High School,Charter,1761,"$1,056,600.00",$600.00,83.73,83.36,95.85%,93.87%,94.86%
3,3,Hernandez High School,District,4635,"$3,022,020.00",$652.00,80.93,77.29,80.86%,66.75%,73.81%
4,4,Griffin High School,Charter,1468,"$917,500.00",$625.00,83.82,83.35,97.14%,93.39%,95.27%


In [None]:
# Summary Table for Individual Schools complete
# Begin creating table for the top 5 performing schools based on overall pass rate