In [1]:
# Imports pandas and pathlib.
import pandas as pd
import pathlib

In [2]:
# Shows the file path. 
school_data_import = pathlib.Path("../Resources/schools_complete.csv")
student_data_import = pathlib.Path("../Resources/students_complete.csv")

In [3]:
# Reads the csv files.
schools_data_df = pd.read_csv(school_data_import)
students_data_df = pd.read_csv(student_data_import)

In [4]:
print("DISTRICT SUMMARY")

DISTRICT SUMMARY


In [5]:
# Displays the head of the csv file (schools data's header and 5 rows of data).
schools_data_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [6]:
# Displays the head of the csv file (student data's header and 5 rows of data).
students_data_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [7]:
school_students_info_df =pd.merge(schools_data_df, students_data_df, on=["school_name"]) 
school_students_info_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [8]:
# Counts the number of total unique schools.
total_unique_schools_df = len(schools_data_df["school_name"].unique())
print("Total number of unique schools : " , total_unique_schools_df)

Total number of unique schools :  15


In [9]:
# Counts the number of total students in district.
total_students_df = len(students_data_df["Student ID"])
print("Total students: ", total_students_df)

Total students:  39170


In [10]:
# Adds all the budget to calculate the total budget in district.
total_budget_df = schools_data_df["budget"].sum()
print("Total budget: ", total_budget_df)

Total budget:  24649428


In [11]:
# Calculates the average math score.
average_math_score_df = students_data_df["math_score"].mean()
print("Average math score: ", average_math_score_df)

Average math score:  78.98537145774827


In [12]:
# Calculates the average reading score.
average_reading_score_df = students_data_df["reading_score"].mean()
print("Average reading score: ", average_reading_score_df)

Average reading score:  81.87784018381414


In [13]:
# Calculates the percentage of students who passed math.
number_of_math_pass_students = (len(students_data_df.loc[students_data_df["math_score"] >= 70]))
percentage_of_math_pass_students = ((number_of_math_pass_students)/total_students_df)* 100
print("Percentage of math pass students: ", percentage_of_math_pass_students)

Percentage of math pass students:  74.9808526933878


In [14]:
# Calculates the percentage of students who passed reading.
number_of_reading_pass_students = (len(students_data_df.loc[students_data_df["reading_score"] >= 70]))
percentage_of_reading_pass_students = ((number_of_reading_pass_students)/total_students_df)* 100
print("Percentage of reading pass students: ", percentage_of_reading_pass_students)

Percentage of reading pass students:  85.80546336482001


In [15]:
# Calculates the percentage of students who passed both math and reading.
# Displays the overall percentage who passed math and reading.
overall_pass_students= (students_data_df[(students_data_df["reading_score"] >= 70) & (students_data_df["math_score"] >= 70)]).count()
overall_percent_passed_math_and_reading = (overall_pass_students['math_score']/total_students_df)*100
print("Overall passing percentage: ", overall_percent_passed_math_and_reading)
  
# print("Overall passing percentage: ", overall_percent_passed_math_and_reading)

Overall passing percentage:  65.17232575950983


In [16]:
district_summary =  {"Total number of unique schools ": [total_unique_schools_df],
                    "Total students ": total_students_df,
                    "Total budget ": total_budget_df,
                     "Average math score ": average_math_score_df,
                    "Average reading score ":  average_reading_score_df,
                    "Percentage of math pass students ": percentage_of_math_pass_students,
                    "Percentage of reading pass students ": percentage_of_reading_pass_students,
                    "Overall passing percentage ": overall_percent_passed_math_and_reading}

district_summary_df = pd.DataFrame(data=district_summary)
district_summary_df

Unnamed: 0,Total number of unique schools,Total students,Total budget,Average math score,Average reading score,Percentage of math pass students,Percentage of reading pass students,Overall passing percentage
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


In [17]:
print("SCHOOL SUMMARY")

SCHOOL SUMMARY


In [18]:
schools_data_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [19]:
students_data_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [20]:
school_students_info_df =pd.merge(schools_data_df, students_data_df, on=["school_name"]) 
school_students_info_df.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [21]:
school_names = sorted(schools_data_df["school_name"].tolist())
school_type_df = pd.DataFrame(schools_data_df["type"])
school_type_df.columns = ['School Type']
school_type_df.index = school_names
school_type_df

Unnamed: 0,School Type
Bailey High School,District
Cabrera High School,District
Figueroa High School,Charter
Ford High School,District
Griffin High School,Charter
Hernandez High School,Charter
Holden High School,Charter
Huang High School,District
Johnson High School,Charter
Pena High School,Charter


In [22]:
school_name_df = pd.DataFrame(schools_data_df["school_name"])
school_name_df.index = school_names
school_name_df

Unnamed: 0,school_name
Bailey High School,Huang High School
Cabrera High School,Figueroa High School
Figueroa High School,Shelton High School
Ford High School,Hernandez High School
Griffin High School,Griffin High School
Hernandez High School,Wilson High School
Holden High School,Cabrera High School
Huang High School,Bailey High School
Johnson High School,Holden High School
Pena High School,Pena High School


In [23]:
total_student_per_school = school_students_info_df.groupby('school_name')
total_student_per_school = pd.DataFrame((total_student_per_school["Student ID"].count()))
total_student_per_school.columns = ['Total Students']
total_student_per_school.index = school_names
total_student_per_school

Unnamed: 0,Total Students
Bailey High School,4976
Cabrera High School,1858
Figueroa High School,2949
Ford High School,2739
Griffin High School,1468
Hernandez High School,4635
Holden High School,427
Huang High School,2917
Johnson High School,4761
Pena High School,962


In [32]:
total_budget_per_school_df = schools_data_df.groupby('school_name')
per_school_budget = pd.DataFrame(total_budget_per_school_df["budget"].agg('sum'))
per_school_budget.columns = ['Total School Budget']
per_school_budget.index = school_names
per_school_budget

Unnamed: 0,Total School Budget
Bailey High School,3124928
Cabrera High School,1081356
Figueroa High School,1884411
Ford High School,1763916
Griffin High School,917500
Hernandez High School,3022020
Holden High School,248087
Huang High School,1910635
Johnson High School,3094650
Pena High School,585858


In [33]:
total_budget_per_student_df = pd.DataFrame(per_school_budget['Total School Budget'] / total_student_per_school['Total Students'])
total_budget_per_student_df.columns = ['Budget Per Student']
total_budget_per_student_df.index = school_names
total_budget_per_student_df


Unnamed: 0,Budget Per Student
Bailey High School,628.0
Cabrera High School,582.0
Figueroa High School,639.0
Ford High School,644.0
Griffin High School,625.0
Hernandez High School,652.0
Holden High School,581.0
Huang High School,655.0
Johnson High School,650.0
Pena High School,609.0


In [26]:
average_math_score_per_school = school_students_info_df.groupby('school_name')
average_math_score_per_school = pd.DataFrame(average_math_score_per_school['math_score'].mean())
average_math_score_per_school.columns = ['Math Score']
average_math_score_per_school.index = school_names
average_math_score_per_school


Unnamed: 0,Math Score
Bailey High School,77.048432
Cabrera High School,83.061895
Figueroa High School,76.711767
Ford High School,77.102592
Griffin High School,83.351499
Hernandez High School,77.289752
Holden High School,83.803279
Huang High School,76.629414
Johnson High School,77.072464
Pena High School,83.839917


In [27]:
average_reading_score_per_school = school_students_info_df.groupby('school_name')
average_reading_score_per_school = pd.DataFrame(average_reading_score_per_school['reading_score'].mean())
average_reading_score_per_school.columns = ['Reading Score']
average_reading_score_per_school.index = school_names
average_reading_score_per_school

Unnamed: 0,Reading Score
Bailey High School,81.033963
Cabrera High School,83.97578
Figueroa High School,81.15802
Ford High School,80.746258
Griffin High School,83.816757
Hernandez High School,80.934412
Holden High School,83.814988
Huang High School,81.182722
Johnson High School,80.966394
Pena High School,84.044699


In [28]:
student_passing_math_per_school = school_students_info_df[(school_students_info_df["math_score"] >= 70)]
student_passing_math_per_school = pd.DataFrame(student_passing_math_per_school.groupby(["school_name"]).count())    
percentage_student_passing_math_per_school = pd.DataFrame(student_passing_math_per_school['math_score'] / total_student_per_school['Total Students'] * 100)
percentage_student_passing_math_per_school.columns = ['Math Score Percentage']
percentage_student_passing_math_per_school.index = school_names
percentage_student_passing_math_per_school

Unnamed: 0,Math Score Percentage
Bailey High School,66.680064
Cabrera High School,94.133477
Figueroa High School,65.988471
Ford High School,68.309602
Griffin High School,93.392371
Hernandez High School,66.752967
Holden High School,92.505855
Huang High School,65.683922
Johnson High School,66.057551
Pena High School,94.594595


In [29]:
student_passing_reading_per_school = school_students_info_df[(school_students_info_df["reading_score"] >= 70)]
student_passing_reading_per_school = student_passing_reading_per_school.groupby(["school_name"]).count()                                                  
percentage_student_passing_reading_per_school = pd.DataFrame((student_passing_reading_per_school["reading_score"]/total_student_per_school['Total Students']) * 100)
percentage_student_passing_reading_per_school.columns = ['Reading Score Percentage']
percentage_student_passing_reading_per_school.index = school_names
percentage_student_passing_reading_per_school

Unnamed: 0,Reading Score Percentage
Bailey High School,81.93328
Cabrera High School,97.039828
Figueroa High School,80.739234
Ford High School,79.299014
Griffin High School,97.138965
Hernandez High School,80.862999
Holden High School,96.252927
Huang High School,81.316421
Johnson High School,81.222432
Pena High School,95.945946


In [30]:
students_passing_math_and_reading = school_students_info_df[
    (school_students_info_df["reading_score"] >= 70) & (school_students_info_df["math_score"] >= 70)
]
students_passing_math_and_reading = (students_passing_math_and_reading.groupby(["school_name"]).count())
percentage_students_passing_math_and_reading = pd.DataFrame((students_passing_math_and_reading['reading_score']/total_student_per_school['Total Students'])*100)
percentage_students_passing_math_and_reading.columns = ['Overall Percentage']
percentage_students_passing_math_and_reading.index = school_names
percentage_students_passing_math_and_reading


Unnamed: 0,Overall Percentage
Bailey High School,54.642283
Cabrera High School,91.334769
Figueroa High School,53.204476
Ford High School,54.289887
Griffin High School,90.599455
Hernandez High School,53.527508
Holden High School,89.227166
Huang High School,53.513884
Johnson High School,53.539172
Pena High School,90.540541


In [31]:
per_school_summary_df = pd.concat(
    [school_type_df,
     total_student_per_school,
     per_school_budget,
     total_budget_per_student_df,
     average_math_score_per_school,
     average_reading_score_per_school,
     percentage_student_passing_math_per_school,
     percentage_student_passing_reading_per_school,
     percentage_students_passing_math_and_reading
     
], axis = 1)
per_school_summary_df

Unnamed: 0,School Type,Total Students,Budget Per Student,Math Score,Reading Score,Math Score Percentage,Reading Score Percentage,Overall Percentage
Bailey High School,District,4976,628.0,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,District,1858,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,Charter,2949,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,644.0,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,Charter,4635,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,581.0,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,Charter,4761,650.0,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,609.0,83.839917,84.044699,94.594595,95.945946,90.540541
