In [190]:
# DEPENDENCIES AND SETUP
import pandas as pd
import numpy as np

# LOAD FILES
school_csv = "Resources/schools.csv"
student_csv = "Resources/students.csv"

In [191]:
school_data = pd.read_csv(school_csv)
school_data.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [192]:
student_data = pd.read_csv(student_csv)
student_data.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [193]:
# COMBINE THE DATA INTO A SINGLE DATA SET
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [194]:
# ------------------------------- DISTRICT SUMMARY ------------------------------------------------------------------------------------------------------------
# CALCULATE THE TOTAL NUMBER OF SCHOOLS 
total_of_schools = len(school_data_complete["school_name"].unique())

# CALCULATE THE TOTAL NUMBER OF STUDENTS - by counting how many students are listed
total_of_students = (school_data_complete["student_name"].count())

# CALCULATE THE TOTAL BODGET
the_total_budget = school_data_complete["budget"].unique().sum()

# CALCULATE THE AVERAGE MATH SCORE
average_math_score = school_data_complete["math_score"].mean()

# CALCULATE THE AVERAGE READING SCORE
average_reading_score = school_data_complete["reading_score"].mean()

# CALCULATE THE PERCENTAGE OF STUDENTS WITH A PASSING MATH SCORE OF =/+ 70
passing_math = len(school_data_complete[school_data_complete["math_score"]>=70])

# CALCULATE THE PERCENTAGE OF STUDENTS WITH A PASSING READING SCORE OF =/+ 70
passing_reading = len(school_data_complete[school_data_complete["reading_score"]>=70])

# CALCULATE THE PERCENTAGE OF STUDENTS WHO HAD AN OVERALL PASSING (MATH AND READING)
overall_passing = len(school_data_complete[(school_data_complete["math_score"] >=70) & (school_data_complete["reading_score"] >= 70)])

In [195]:
# CREATE A DATA FRAME TO HOLD THE ABOVE RESULTS
district_summary = pd.DataFrame({"Total Schools" : [total_of_schools],
    "Total Students" : f'{total_of_students:,}',
    "Total Budget" : f'${the_total_budget:,}' ,
    "Average Math Score" : average_math_score,
    "Average Reading Score" : average_reading_score,
    "% Passing Math" : (passing_math/total_of_students) * 100,
    "% Passing Reading" : (passing_reading/total_of_students) * 100,
    "% Overall Passing" : (overall_passing/total_of_students) * 100
})
district_summary
# OPTIONAL: GIVE DISPLAYED DATA CLEANER FORMATTING

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428",78.985371,81.87784,74.980853,85.805463,65.172326


In [196]:
# ------------------------------- SCHOOL SUMMARY : CREATE A TABLE ---------------------------------------------------------------------------------------------
# SCHOOL TYPE
school_type = school_data.set_index(["school_name"])["type"]
# TOTAL STUDENTS

#merge both the dataframes
combined_df = pd.merge(school_data, student_data, on="school_name")
combined_df.head()

#total students per school
total_students_per_school = combined_df["school_name"].value_counts()

# TOTAL SCHOOL BUDGET
school_budget = combined_df.groupby(["school_name"]).mean()["budget"]

# PER STUDENT BUDGET
budget_per_student = school_budget/total_students_per_school
# AVERAGE MATH SCORE
average_math_school = round(combined_df.groupby(["school_name"]).mean()["math_score"],2)
# AVERAGE READING SCORE
average_reading_school = round(combined_df.groupby(["school_name"]).mean()["reading_score"],2)

# % PASSING MATH
students_passing_math = combined_df[(combined_df["math_score"]>70)]
students_passing_math_per_school = students_passing_math.groupby(["school_name"]).count()["student_name"]
percentage_passing_math_schools = round((students_passing_math_per_school/total_students_per_school)*100,2)
percentage_passing_math_schools

# % PASSING READING
students_passing_reading = combined_df[(combined_df["reading_score"]>70)]
students_passing_reading_per_school = students_passing_reading.groupby(["school_name"]).count()["student_name"]
percentage_passing_reading_schools = round((students_passing_reading_per_school/total_students_per_school)*100,2)
percentage_passing_reading_schools


# % PASSING BOTH MATH AND READING
overall_passing_rate_schoolwise = (percentage_passing_math_schools + percentage_passing_reading_schools)/2
overall_passing_rate_schoolwise

Bailey High School       71.965
Cabrera High School      91.710
Figueroa High School     71.090
Ford High School         71.630
Griffin High School      91.550
Hernandez High School    71.470
Holden High School       91.685
Huang High School        71.065
Johnson High School      71.065
Pena High School         91.940
Rodriguez High School    70.905
Shelton High School      91.255
Thomas High School       91.560
Wilson High School       92.090
Wright High School       91.860
dtype: float64

In [197]:
# DATA FRAME TO HOLD RESULTS
# Creating a summary DataFrame using above values
schools = ({"School Type": school_type,
    "Total Students": total_students_per_school, 
    "Total School Budget": school_budget, 
    "Per Student Budget": budget_per_student, 
    "Average Math Score": average_math_school,
    "Average Reading Score": average_reading_school,
    '% Passing Math': passing_math_school,
    '% Passing Reading': percentage_passing_reading_schools,
    "Overall Passing": overall_passing_rate_schoolwise
})

school_summary = pd.DataFrame(schools)
school_summary

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing
Bailey High School,District,4976,3124928.0,628.0,77.05,81.03,3318,79.3,71.965
Cabrera High School,Charter,1858,1081356.0,582.0,83.06,83.98,1749,93.86,91.71
Figueroa High School,District,2949,1884411.0,639.0,76.71,81.16,1946,78.43,71.09
Ford High School,District,2739,1763916.0,644.0,77.1,80.75,1871,77.51,71.63
Griffin High School,Charter,1468,917500.0,625.0,83.35,83.82,1371,93.39,91.55
Hernandez High School,District,4635,3022020.0,652.0,77.29,80.93,3094,78.19,71.47
Holden High School,Charter,427,248087.0,581.0,83.8,83.81,395,92.74,91.685
Huang High School,District,2917,1910635.0,655.0,76.63,81.18,1916,78.81,71.065
Johnson High School,District,4761,3094650.0,650.0,77.07,80.97,3145,78.28,71.065
Pena High School,Charter,962,585858.0,609.0,83.84,84.04,910,92.2,91.94


In [198]:
# ------------------------------- TOP PERFORMING SCHOOLS (BY % OVERALL PASSING) -------------------------------------------------------------------------------
# SET AND DISPLAY THE TOP 5 PERFORMING SCHOOLS BY % OVERALL PASSING

In [None]:
# --------------------------------- BOTTOM PERFORMING SCHOOLS (BY % OVERALL PASSING) --------------------------------------------------------------------------
# SET AND DISPLAY THE TOP 5 WORST PERFORMING SCHOOLS BY % OVERALL PASSING

In [None]:
# ------------------------------- MATH SCORES BY GRADE ---------------------------------------------------------------------------------------------------------
# CREATE A TABLE LISTING MATH READING SCORE FOR STUDENTS GRADE 9,10,11,12 AT EACH SCHOOL

# CREATE A PANDAS SERIES FOR EACH GRADE (USE CONDITIONAL STATEMENT)

# GROUP EACH SERIES BY SCHOOL

# COMBINE THE SERIES INTO A DATA FRAME

# OPTIONAL : GIVE THE DISPLAY CLEANER FORMATTING

In [None]:
# ------------------------------- READING SCORES BY GRADE ------------------------------------------------------------------------------------------------------
# CREATE A TABLE LISTING AVERAGE READING SCORE FOR STUDENTS GRADE 9,10,11,12 AT EACH SCHOOL

# CREATE A PANDAS SERIES FOR EACH GRADE (USE CONDITIONAL STATEMENT)

# GROUP EACH SERIES BY SCHOOL

# COMBINE THE SERIES INTO A DATA FRAME

# OPTIONAL : GIVE THE DISPLAY CLEANER FORMATTING

In [None]:
# ------------------------------- SCORES BY SCHOOL SPENDING ----------------------------------------------------------------------------------------------------
# CREATE A TABLE BREAKING DOWN SCHOOLS PERFORMANCES BASED ON AVERAGE SPENDING RANGES PER STUDENT. USE 4 REASONABLE BINS TO GROUP SCHOOL SPENDING
# INCLUDE:
# AVERAGE MATH SCORES

# AVERAFE READING SCORES

# % PASSING MATH

# % PASSING ENGLISH

# OVERALL PASSING RATE (AVERAGE OF ABOVE TWO)

In [None]:
# ------------------------------- SCORES BY SCHOOL TYPE --------------------------------------------------------------------------------------------------------
# CREATE A TABLE BREAKING DOWN SCHOOLS PERFORMANCES BASED ON SCHOOL SIZE USE 4 REASONABLE BINS TO GROUP SCHOOL SPENDING
# INCLUDE:
# AVERAGE MATH SCORES

# AVERAFE READING SCORES

# % PASSING MATH

# % PASSING ENGLISH

# OVERALL PASSING RATE (AVERAGE OF ABOVE TWO)

In [None]:
# ------------------------------- SCORES BY SCHOOL TYPE--------------------------------------------------------------------------------------------------------
# CREATE A TABLE BREAKING DOWN SCHOOLS PERFORMANCES BASED ON SCHOOL TYPE. USE 4 REASONABLE BINS TO GROUP SCHOOL SPENDING
# INCLUDE:
# AVERAGE MATH SCORES

# AVERAFE READING SCORES

# % PASSING MATH

# % PASSING ENGLISH

# OVERALL PASSING RATE (AVERAGE OF ABOVE TWO)