# Import dependencies and setup

In [1]:
#import dependencies
import pandas as pd

In [2]:
#create a reference to cvs schools file and import it into a pandas df
schools="Resources/schools_complete.csv"
schools_df=pd.read_csv(schools)
schools_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [3]:
#create a reference to cvs students file and import it into a pandas df
students="Resources/students_complete.csv"
students_df=pd.read_csv(students)
students_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [4]:
#merge df on school_name
city_df=pd.merge(schools_df, students_df, on="school_name")
city_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


# 1. District Summary
* Total Schools
* Total Students
* Total Budget
* Average Math Score
* Average Reading Score
* % Passing Math
* % Passing Reading
* Overall Passing Rate (Average of the above two)


In [5]:
#calculate total number of schools
tot_schools= city_df["school_name"].nunique()

#calculate total number of studens
tot_students= city_df["student_name"].count()

#calculate total budget
tot_budget= schools_df["budget"].sum()

#print results to check
"total number of schools: "+ str(tot_schools), "total number of studens: "+ str(tot_students),\
"total budget: " + str(tot_budget)

('total number of schools: 15',
 'total number of studens: 39170',
 'total budget: 24649428')

In [6]:
# MATH
#calculate average math score
avg_math_score= city_df["math_score"].mean()

#calculate percentage of students with a passing math score (70 or greater)
passing_math_df= city_df.loc[city_df["math_score"] >= 70]
passing_math= passing_math_df["student_name"].count()
passing_math_perc= (passing_math/tot_students)*100

#print results to check
"average math score: "+ str(round(avg_math_score,2)), "passing math %: "+ str(round(passing_math_perc,2)),


('average math score: 78.99', 'passing math %: 74.98')

In [7]:
#READING
#calculate average reading score
avg_read_score= city_df["reading_score"].mean()

#calculate the percentage of students with a passing reading score (70 or greater)
passing_read_df= city_df.loc[city_df["reading_score"] >= 70]
passing_read= passing_read_df["student_name"].count()
passing_read_perc= (passing_read/tot_students)*100

#print results to check
"average reading score: "+ str(round(avg_read_score,2)), "passing reading %: "+ str(round(passing_read_perc,2)),

('average reading score: 81.88', 'passing reading %: 85.81')

In [8]:
#Calculate the overall passing rate 
avg_score= (passing_math_perc+passing_read_perc)/2

#print results to check
"overall passing rate : "+ str(round(avg_score,2)),

('overall passing rate : 80.39',)

In [9]:
#district summary table

district_summary_df=pd.DataFrame({
    "Total schools" : [tot_schools],
    "Total students" : [tot_students],
    "Total budget ($)" : [tot_budget],
    "Average math score" : round(avg_math_score,2),
    "Average reading score" : round(avg_read_score,2),
    "Passing math (%)" : round(passing_math_perc,2),
    "Passing reading (%)" : round(passing_read_perc,2),
    "Overall passing rate (%)" : round(avg_score,2)})

district_summary_df

Unnamed: 0,Total schools,Total students,Total budget ($),Average math score,Average reading score,Passing math (%),Passing reading (%),Overall passing rate (%)
0,15,39170,24649428,78.99,81.88,74.98,85.81,80.39


# 2. School Summary

* School Name
* School Type
* Total Students
* Total School Budget
* Per Student Budget
* Average Math Score
* Average Reading Score
* % Passing Math
* % Passing Reading
* Overall Passing Rate (Average of the above two)

In [10]:
#Group by school
grouped_df=city_df.groupby("school_name")

#retrieve school type and make df with correct index and column name
school_type=schools_df["type"]
school_type_df=pd.DataFrame(school_type).set_index(schools_df["school_name"]).rename(columns={"type":"Type"})

#calculate total students and make df with correct column name
school_students=grouped_df["student_name"].count()
school_students_df=pd.DataFrame(school_students).rename(columns={"student_name":"Total students"})

#retrieve total budget and make df with correct index and column name
school_budget= schools_df["budget"]
school_budget_df=pd.DataFrame(school_budget).set_index(schools_df["school_name"])\
                        .rename(columns={"budget":"Total budget"})

#calculate budget per student and make df with correct index and column name
budget_per_student= school_budget_df["Total budget"]/school_students_df["Total students"]
budget_per_student_df=pd.DataFrame(budget_per_student).rename(columns={0:"Per student budget"})

#printing results to check
school_type_df, school_students_df, school_budget_df, budget_per_student_df



(                           Type
 school_name                    
 Huang High School      District
 Figueroa High School   District
 Shelton High School     Charter
 Hernandez High School  District
 Griffin High School     Charter
 Wilson High School      Charter
 Cabrera High School     Charter
 Bailey High School     District
 Holden High School      Charter
 Pena High School        Charter
 Wright High School      Charter
 Rodriguez High School  District
 Johnson High School    District
 Ford High School       District
 Thomas High School      Charter,
                        Total students
 school_name                          
 Bailey High School               4976
 Cabrera High School              1858
 Figueroa High School             2949
 Ford High School                 2739
 Griffin High School              1468
 Hernandez High School            4635
 Holden High School                427
 Huang High School                2917
 Johnson High School              4761
 Pena Hig

In [11]:
#MATH
avg_math_score= round(grouped_df["math_score"].mean(),2)
avg_math_score_df=pd.DataFrame(avg_math_score).rename(columns={"math_score":"Average math score"})

passing_math= city_df.loc[city_df["math_score"] >= 70]
grouped_math= passing_math_df.groupby("school_name")
passing_math= grouped_math["student_name"].count()

passing_math_perc= round((passing_math/school_students)*100,2)
passing_math_perc_df=pd.DataFrame(passing_math_perc).rename(columns={"student_name":"Passing math (%)"})

#printing results to check
avg_math_score_df, passing_math_perc_df

(                       Average math score
 school_name                              
 Bailey High School                  77.05
 Cabrera High School                 83.06
 Figueroa High School                76.71
 Ford High School                    77.10
 Griffin High School                 83.35
 Hernandez High School               77.29
 Holden High School                  83.80
 Huang High School                   76.63
 Johnson High School                 77.07
 Pena High School                    83.84
 Rodriguez High School               76.84
 Shelton High School                 83.36
 Thomas High School                  83.42
 Wilson High School                  83.27
 Wright High School                  83.68,
                        Passing math (%)
 school_name                            
 Bailey High School                66.68
 Cabrera High School               94.13
 Figueroa High School              65.99
 Ford High School                  68.31
 Griffin High School  

In [12]:
#READING
avg_read_score= round(grouped_df["reading_score"].mean(),2)
avg_read_score_df=pd.DataFrame(avg_read_score).rename(columns={"reading_score":"Average reading score"})

passing_read= city_df.loc[city_df["reading_score"] >= 70]
grouped_read= passing_read_df.groupby("school_name")
passing_read= grouped_read["student_name"].count()

passing_read_perc= round((passing_read/school_students)*100,2)
passing_read_perc_df=pd.DataFrame(passing_read_perc).rename(columns={"student_name":"Passing reading (%)"})

#printing results to check
avg_read_score_df, passing_read_perc_df


(                       Average reading score
 school_name                                 
 Bailey High School                     81.03
 Cabrera High School                    83.98
 Figueroa High School                   81.16
 Ford High School                       80.75
 Griffin High School                    83.82
 Hernandez High School                  80.93
 Holden High School                     83.81
 Huang High School                      81.18
 Johnson High School                    80.97
 Pena High School                       84.04
 Rodriguez High School                  80.74
 Shelton High School                    83.73
 Thomas High School                     83.85
 Wilson High School                     83.99
 Wright High School                     83.96,
                        Passing reading (%)
 school_name                               
 Bailey High School                   81.93
 Cabrera High School                  97.04
 Figueroa High School                 80.

In [13]:
#Calculate the overall passing rate 
avg_score= round((passing_math_perc+passing_read_perc)/2,2)
avg_score_df=pd.DataFrame(avg_score).rename(columns={"student_name":"Overall passing rate (%)"})

#printing results to check
avg_score_df

Unnamed: 0_level_0,Overall passing rate (%)
school_name,Unnamed: 1_level_1
Bailey High School,74.31
Cabrera High School,95.58
Figueroa High School,73.36
Ford High School,73.81
Griffin High School,95.26
Hernandez High School,73.81
Holden High School,94.38
Huang High School,73.5
Johnson High School,73.64
Pena High School,95.27


In [14]:
#school summary
school_summary_df=pd.merge(school_type_df, school_students_df, on="school_name").merge(school_budget_df, 
                    on="school_name").merge(budget_per_student_df, on="school_name").merge(avg_math_score_df, 
                    on= "school_name").merge(avg_read_score_df, on= "school_name").merge(passing_math_perc_df, 
                    on= "school_name").merge(passing_read_perc_df, on= "school_name").merge(avg_score_df,on= "school_name")

school_summary_df

Unnamed: 0_level_0,Type,Total students,Total budget,Per student budget,Average math score,Average reading score,Passing math (%),Passing reading (%),Overall passing rate (%)
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Huang High School,District,2917,1910635,655.0,76.63,81.18,65.68,81.32,73.5
Figueroa High School,District,2949,1884411,639.0,76.71,81.16,65.99,80.74,73.36
Shelton High School,Charter,1761,1056600,600.0,83.36,83.73,93.87,95.85,94.86
Hernandez High School,District,4635,3022020,652.0,77.29,80.93,66.75,80.86,73.81
Griffin High School,Charter,1468,917500,625.0,83.35,83.82,93.39,97.14,95.26
Wilson High School,Charter,2283,1319574,578.0,83.27,83.99,93.87,96.54,95.21
Cabrera High School,Charter,1858,1081356,582.0,83.06,83.98,94.13,97.04,95.58
Bailey High School,District,4976,3124928,628.0,77.05,81.03,66.68,81.93,74.31
Holden High School,Charter,427,248087,581.0,83.8,83.81,92.51,96.25,94.38
Pena High School,Charter,962,585858,609.0,83.84,84.04,94.59,95.95,95.27


# 3. Top Performing Schools (By Passing Rate)

In [15]:
top_school_df= school_summary_df.sort_values("Overall passing rate (%)", ascending=False)
top_school_df.head()

Unnamed: 0_level_0,Type,Total students,Total budget,Per student budget,Average math score,Average reading score,Passing math (%),Passing reading (%),Overall passing rate (%)
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,1081356,582.0,83.06,83.98,94.13,97.04,95.58
Thomas High School,Charter,1635,1043130,638.0,83.42,83.85,93.27,97.31,95.29
Pena High School,Charter,962,585858,609.0,83.84,84.04,94.59,95.95,95.27
Griffin High School,Charter,1468,917500,625.0,83.35,83.82,93.39,97.14,95.26
Wilson High School,Charter,2283,1319574,578.0,83.27,83.99,93.87,96.54,95.21


# 4. Bottom Performing Schools (By Passing Rate)

In [16]:
bottom_school_df= school_summary_df.sort_values("Overall passing rate (%)")
bottom_school_df.head()

Unnamed: 0_level_0,Type,Total students,Total budget,Per student budget,Average math score,Average reading score,Passing math (%),Passing reading (%),Overall passing rate (%)
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,2547363,637.0,76.84,80.74,66.37,80.22,73.3
Figueroa High School,District,2949,1884411,639.0,76.71,81.16,65.99,80.74,73.36
Huang High School,District,2917,1910635,655.0,76.63,81.18,65.68,81.32,73.5
Johnson High School,District,4761,3094650,650.0,77.07,80.97,66.06,81.22,73.64
Hernandez High School,District,4635,3022020,652.0,77.29,80.93,66.75,80.86,73.81


# 5. Math Scores by Grade**

In [27]:
#9th grade
math_df=city_df.loc["math_score"]
math_df.groupby["school_name"]


#city_df["math_score"](["school_name","grade"])
#math_sum = math_9th_df["math_score"]
#math_9th_df= math_9th_df.sum("math_score")
#math_scores_9th= pd.mean(math_9th_df["math_score"])
#avg_math_9th= math_9th_df.sum("math_score")
#avg_math_9th
math_df

KeyError: 'math_score'