In [1]:
# Import Dependencies
import os
import pandas as pd
import numpy as np

In [2]:
# Import and Read CSV Files
file_one = os.path.join('schools_complete.csv')
file_two = os.path.join('students_complete.csv')

schools_df = pd.read_csv(file_one, encoding='utf-8')
students_df = pd.read_csv(file_two, encoding='utf-8')
schools_df.head()

Unnamed: 0,School ID,name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [3]:
students_df.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [4]:
# Print out the required information for the District Summary DataFrame
total_schools = schools_df['name'].count()
total_students = students_df['name'].count()
total_budget = schools_df['budget'].sum()
avg_math_score = students_df['math_score'].mean()
avg_reading_score = students_df['reading_score'].mean()
print('Total Schools: ' + str(total_schools))
print('Total Students: ' + str(total_students))
print('Total Budget: ' + str(total_budget))
print('Average Math Score: ' + str(avg_math_score))
print('Average Reading Score: ' + str(avg_reading_score))

Total Schools: 15
Total Students: 39170
Total Budget: 24649428
Average Math Score: 78.98537145774827
Average Reading Score: 81.87784018381414


In [5]:
# The amount of students who passed math and reading
pass_math = (students_df["math_score"] > 70)
pass_read = (students_df["reading_score"] > 70)
pass_math.head()

0     True
1    False
2    False
3    False
4     True
Name: math_score, dtype: bool

In [6]:
#Calculate the percentage of students passing math and reading
avg_math_percent = np.sum(pass_math)/total_students * 100
avg_read_percent = np.sum(pass_read)/total_students * 100
print('% Passing Math: ' + str(round(avg_math_percent, 2)))
print('% Passing Reading: ' + str(round(avg_read_percent, 2)))

% Passing Math: 72.39
% Passing Reading: 82.97


In [7]:
#Overall passing is the average of the two % Passing Math and Reading
overall_passing = (avg_math_percent + avg_read_percent) / 2
overall_passing

77.68189941281594

In [8]:
district_summary_df = pd.DataFrame([{
    'Total Schools': round(total_schools,2),
    'Total Students': round(total_students,2),
    'Total Budget': round(total_budget,2),
    'Average Math Score': round(avg_math_score,2),
    'Average Reading Score': round(avg_reading_score, 2),
    '% Passing Math': round(avg_math_percent,2),
    '% Passing Reading': round(avg_read_percent, 2),
    '% Overall Passing Rate': round(overall_passing, 2)
}])
district_summary_df = district_summary_df[['Total Schools', 'Total Students', 'Total Budget', 'Average Math Score', 'Average Reading Score', '% Passing Math', '% Passing Reading', "% Overall Passing Rate"]]
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,24649428,78.99,81.88,72.39,82.97,77.68


In [9]:
#Count how many students are at each school
school_counts = students_df["school"].value_counts()
school_counts

Bailey High School       4976
Johnson High School      4761
Hernandez High School    4635
Rodriguez High School    3999
Figueroa High School     2949
Huang High School        2917
Ford High School         2739
Wilson High School       2283
Cabrera High School      1858
Wright High School       1800
Shelton High School      1761
Thomas High School       1635
Griffin High School      1468
Pena High School          962
Holden High School        427
Name: school, dtype: int64

In [10]:
#Isolate the school names and districts
school_type = schools_df.loc[:, ['name', 'type']]
school_type

Unnamed: 0,name,type
0,Huang High School,District
1,Figueroa High School,District
2,Shelton High School,Charter
3,Hernandez High School,District
4,Griffin High School,Charter
5,Wilson High School,Charter
6,Cabrera High School,Charter
7,Bailey High School,District
8,Holden High School,Charter
9,Pena High School,Charter


In [11]:
grouped_budget = schools_df.groupby('name')
budget = grouped_budget['budget'].sum()
budget

name
Bailey High School       3124928
Cabrera High School      1081356
Figueroa High School     1884411
Ford High School         1763916
Griffin High School       917500
Hernandez High School    3022020
Holden High School        248087
Huang High School        1910635
Johnson High School      3094650
Pena High School          585858
Rodriguez High School    2547363
Shelton High School      1056600
Thomas High School       1043130
Wilson High School       1319574
Wright High School       1049400
Name: budget, dtype: int64

In [12]:
budget_student = budget / school_counts
budget_student

Bailey High School       628.0
Cabrera High School      582.0
Figueroa High School     639.0
Ford High School         644.0
Griffin High School      625.0
Hernandez High School    652.0
Holden High School       581.0
Huang High School        655.0
Johnson High School      650.0
Pena High School         609.0
Rodriguez High School    637.0
Shelton High School      600.0
Thomas High School       638.0
Wilson High School       578.0
Wright High School       583.0
dtype: float64

In [13]:
bailey = students_df.loc[students_df["school"] == "Bailey High School"]
bailey.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
17871,17871,Blake Martin,M,9th,Bailey High School,75,59
17872,17872,Kathryn Kane,F,12th,Bailey High School,84,58
17873,17873,Richard Haas,M,11th,Bailey High School,79,86
17874,17874,Frank Marsh,M,9th,Bailey High School,71,89
17875,17875,Charles Goodman Jr.,M,9th,Bailey High School,90,61


In [14]:
#Calculate Total Bailey High School Students
total_students_bailey = bailey['name'].count()
total_students_bailey

4976

In [15]:
#Calculate the Math and Reading Score for Bailey
bailey_math = (bailey["math_score"] > 70)
bailey_read = (bailey["reading_score"] > 70)
bailey_math_percent = np.sum(bailey_math)/total_students_bailey * 100
bailey_read_percent = np.sum(bailey_read)/total_students_bailey * 100
bailey_passing = (bailey_math_percent + bailey_read_percent) / 2

In [16]:
cabrera = students_df.loc[students_df["school"] == "Cabrera High School"]
cabrera.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
16013,16013,Olivia Short,F,11th,Cabrera High School,94,94
16014,16014,Kerry Jones,F,9th,Cabrera High School,98,97
16015,16015,Bruce Jackson,M,12th,Cabrera High School,78,83
16016,16016,Amy Green,F,12th,Cabrera High School,89,92
16017,16017,Melanie Mason,F,12th,Cabrera High School,96,71


In [17]:
#Do the same for Cabera High School
total_students_cabrera = cabrera['name'].count()
cabrera_math = (cabrera["math_score"] > 70)
cabrera_read = (cabrera["reading_score"] > 70)
cabrera_math_percent = np.sum(cabrera_math)/total_students_cabrera * 100
cabrera_read_percent = np.sum(cabrera_read)/total_students_cabrera * 100
cabrera_passing = (cabrera_math_percent + cabrera_read_percent) / 2

In [18]:
#Do the same for Figueroa High School
figueroa = students_df.loc[students_df["school"] == "Figueroa High School"]
figueroa.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
2917,2917,Amy Jacobs,F,10th,Figueroa High School,85,87
2918,2918,Nathan Campbell,M,12th,Figueroa High School,97,84
2919,2919,Randall Stewart,M,12th,Figueroa High School,67,77
2920,2920,Jennifer Brown,F,9th,Figueroa High School,97,64
2921,2921,Denise Lopez,F,10th,Figueroa High School,79,64


In [19]:
total_students_figueroa = figueroa['name'].count()
figueroa_math = (figueroa["math_score"] > 70)
figueroa_read = (figueroa["reading_score"] > 70)
figueroa_math_percent = np.sum(figueroa_math)/total_students_figueroa * 100
figueroa_read_percent = np.sum(figueroa_read)/total_students_figueroa * 100
figueroa_passing = (figueroa_math_percent + figueroa_read_percent) / 2

In [20]:
#Ford High School
ford = students_df.loc[students_df["school"] == "Ford High School"]
ford.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
34796,34796,Michael Mercado,M,9th,Ford High School,66,94
34797,34797,Stephen Wolf,M,11th,Ford High School,68,63
34798,34798,Bonnie Hughes,F,12th,Ford High School,73,59
34799,34799,Melissa Smith,F,11th,Ford High School,88,58
34800,34800,Brian Mitchell,M,10th,Ford High School,96,55


In [21]:
total_students_ford = ford['name'].count()
ford_math = (ford["math_score"] > 70)
ford_read = (ford["reading_score"] > 70)
ford_math_percent = np.sum(ford_math)/total_students_ford * 100
ford_read_percent = np.sum(ford_read)/total_students_ford * 100
ford_passing = (ford_math_percent + ford_read_percent) / 2

In [22]:
#Griffin High School
griffin = students_df.loc[students_df["school"] == "Griffin High School"]
griffin.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
12262,12262,Heather Wright,F,11th,Griffin High School,79,68
12263,12263,Elizabeth Goodwin,F,10th,Griffin High School,91,81
12264,12264,Michelle Wong,F,9th,Griffin High School,78,89
12265,12265,Scott Roth MD,M,11th,Griffin High School,91,85
12266,12266,Billy Wilson,M,12th,Griffin High School,76,83


In [23]:
total_students_griffin = griffin['name'].count()
griffin_math = (griffin["math_score"] > 70)
griffin_read = (griffin["reading_score"] > 70)
griffin_math_percent = np.sum(griffin_math)/total_students_griffin * 100
griffin_read_percent = np.sum(griffin_read)/total_students_griffin * 100
griffin_passing = (griffin_math_percent + griffin_read_percent) / 2

In [24]:
#Hernandez High School
hern = students_df.loc[students_df["school"] == "Hernandez High School"]
hern.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
7627,7627,Russell Davis,M,10th,Hernandez High School,70,88
7628,7628,Timothy Walker,M,12th,Hernandez High School,97,93
7629,7629,Katie Johnston,F,12th,Hernandez High School,83,81
7630,7630,Joann Oconnell,F,12th,Hernandez High School,77,91
7631,7631,Sarah Alexander,F,10th,Hernandez High School,84,93


In [25]:
total_students_hern = hern['name'].count()
hern_math = (hern["math_score"] > 70)
hern_read = (hern["reading_score"] > 70)
hern_math_percent = np.sum(hern_math)/total_students_hern * 100
hern_read_percent = np.sum(hern_read)/total_students_hern * 100
hern_passing = (hern_math_percent + hern_read_percent) / 2

In [26]:
#Huang High School
huang = students_df.loc[students_df["school"] == "Huang High School"]
hern.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
7627,7627,Russell Davis,M,10th,Hernandez High School,70,88
7628,7628,Timothy Walker,M,12th,Hernandez High School,97,93
7629,7629,Katie Johnston,F,12th,Hernandez High School,83,81
7630,7630,Joann Oconnell,F,12th,Hernandez High School,77,91
7631,7631,Sarah Alexander,F,10th,Hernandez High School,84,93


In [27]:
total_students_huang = huang['name'].count()
huang_math = (huang["math_score"] > 70)
huang_read = (huang["reading_score"] > 70)
huang_math_percent = np.sum(huang_math)/total_students_huang * 100
huang_read_percent = np.sum(huang_read)/total_students_huang * 100
huang_passing = (huang_math_percent + huang_read_percent) / 2

In [28]:
#Johnson High School
john = students_df.loc[students_df["school"] == "Johnson High School"]
john.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
30035,30035,Lisa Casey,F,12th,Johnson High School,87,87
30036,30036,Jessica Lopez,F,9th,Johnson High School,98,62
30037,30037,Anna Wilkins,F,11th,Johnson High School,89,77
30038,30038,Andrew Smith,M,9th,Johnson High School,66,85
30039,30039,Robert Allison,M,11th,Johnson High School,63,85


In [29]:
total_students_john = john['name'].count()
john_math = (john["math_score"] > 70)
john_read = (john["reading_score"] > 70)
john_math_percent = np.sum(john_math)/total_students_john * 100
john_read_percent = np.sum(john_read)/total_students_john * 100
john_passing = (john_math_percent + john_read_percent) / 2

In [30]:
#Pena High School
pena = students_df.loc[students_df["school"] == "Pena High School"]
pena.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
23274,23274,Alec Davis,M,9th,Pena High School,91,75
23275,23275,Michael Meyer,M,10th,Pena High School,94,76
23276,23276,Donald Gutierrez,M,11th,Pena High School,98,91
23277,23277,Travis Chavez,M,11th,Pena High School,78,71
23278,23278,Sheena Ball,F,12th,Pena High School,87,92


In [31]:
total_students_pena = pena['name'].count()
pena_math = (pena["math_score"] > 70)
pena_read = (pena["reading_score"] > 70)
pena_math_percent = np.sum(pena_math)/total_students_pena * 100
pena_read_percent = np.sum(pena_read)/total_students_pena * 100
pena_passing = (pena_math_percent + pena_read_percent) / 2

In [32]:
#Rodriguez High School
rod = students_df.loc[students_df["school"] == "Rodriguez High School"]
rod.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
26036,26036,Sherry Jenkins,F,11th,Rodriguez High School,74,81
26037,26037,Kimberly Calderon,F,10th,Rodriguez High School,80,86
26038,26038,William Brady,M,11th,Rodriguez High School,97,62
26039,26039,Jacob Padilla,M,11th,Rodriguez High School,79,73
26040,26040,Paula Maldonado,F,10th,Rodriguez High School,96,92


In [33]:
total_students_rod = rod['name'].count()
rod_math = (rod["math_score"] > 70)
rod_read = (rod["reading_score"] > 70)
rod_math_percent = np.sum(rod_math)/total_students_rod * 100
rod_read_percent = np.sum(rod_read)/total_students_rod * 100
rod_passing = (rod_math_percent + rod_read_percent) / 2

In [34]:
#Shelton High School
shel = students_df.loc[students_df["school"] == "Shelton High School"]
shel.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
5866,5866,Jamie Montgomery,F,12th,Shelton High School,70,91
5867,5867,Shannon Phillips,F,10th,Shelton High School,84,71
5868,5868,Todd Barber,M,11th,Shelton High School,95,99
5869,5869,Desiree King,F,12th,Shelton High School,76,95
5870,5870,Melissa Roberts,F,10th,Shelton High School,71,82


In [35]:
total_students_shel = shel['name'].count()
shel_math = (shel["math_score"] > 70)
shel_read = (shel["reading_score"] > 70)
shel_math_percent = np.sum(shel_math)/total_students_shel * 100
shel_read_percent = np.sum(shel_read)/total_students_shel * 100
shel_passing = (shel_math_percent + shel_read_percent) / 2

In [36]:
#Thomas High School
tom = students_df.loc[students_df["school"] == "Thomas High School"]
tom.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
37535,37535,Norma Mata,F,10th,Thomas High School,76,76
37536,37536,Cody Miller,M,11th,Thomas High School,84,82
37537,37537,Erik Snyder,M,9th,Thomas High School,80,90
37538,37538,Tanya Martinez,F,9th,Thomas High School,71,69
37539,37539,Noah Erickson,M,9th,Thomas High School,86,76


In [37]:
total_students_tom = tom['name'].count()
tom_math = (tom["math_score"] > 70)
tom_read = (tom["reading_score"] > 70)
tom_math_percent = np.sum(tom_math)/total_students_tom * 100
tom_read_percent = np.sum(tom_read)/total_students_tom * 100
tom_passing = (tom_math_percent + tom_read_percent) / 2

In [38]:
#Wilson High School
wil = students_df.loc[students_df["school"] == "Wilson High School"]
wil.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
13730,13730,Kelli Anderson,F,10th,Wilson High School,84,71
13731,13731,Russell Ramirez,M,10th,Wilson High School,72,87
13732,13732,Eric Butler,M,10th,Wilson High School,97,82
13733,13733,Warren Kerr,M,11th,Wilson High School,93,68
13734,13734,Gail Hall,F,9th,Wilson High School,79,72


In [39]:
total_students_wil = wil['name'].count()
wil_math = (wil["math_score"] > 70)
wil_read = (wil["reading_score"] > 70)
wil_math_percent = np.sum(wil_math)/total_students_wil * 100
wil_read_percent = np.sum(wil_read)/total_students_wil * 100
wil_passing = (wil_math_percent + wil_read_percent) / 2

In [40]:
#Wright High School
wright = students_df.loc[students_df["school"] == "Wright High School"]
wright.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
24236,24236,Aaron Johnson,M,10th,Wright High School,89,72
24237,24237,Kimberly Hamilton,F,10th,Wright High School,84,93
24238,24238,Ashley Johns,F,10th,Wright High School,88,88
24239,24239,Stephanie Donovan,F,10th,Wright High School,75,84
24240,24240,Cynthia Guzman,F,11th,Wright High School,93,82


In [41]:
total_students_wright = wright['name'].count()
wright_math = (wright["math_score"] > 70)
wright_read = (wright["reading_score"] > 70)
wright_math_percent = np.sum(wright_math)/total_students_wright * 100
wright_read_percent = np.sum(wright_read)/total_students_wright * 100
wright_passing = (wright_math_percent + wright_read_percent) / 2

In [44]:
school_summary = [{
    'School Type': school_type,
    'Total Students': [round(total_students_bailey,2), round(total_students_cabrera,2), round(total_students_figueroa,2), round(total_students_ford,2), round(total_students_griffin,2), round(total_students_hern,2), round(total_students_huang,2), round(total_students_john,2), round(total_students_pena,2), round(total_students_rod,2), round(total_students_shel,2), round(total_students_tom,2), round(total_students_wil,2), round(total_students_wright,2)],
    'Total Budget': round(total_budget,2),
    'Average Math Score': round(avg_math_score,2),
    'Average Reading Score': round(avg_reading_score, 2),
    '% Passing Math': round(avg_math_percent,2),
    '% Passing Reading': round(avg_read_percent, 2),
    '% Overall Passing Rate': round(overall_passing, 2)
}])

school_summary_df = pd.DataFrame([{

school_summary_df

Unnamed: 0,% Overall Passing Rate,% Passing Math,% Passing Reading,Average Math Score,Average Reading Score,School Type,Total Budget,Total Students
0,77.68,72.39,82.97,78.99,81.88,name type 0 Hu...,24649428,"[4976, 1858, 2949, 2739, 1468, 4635, 2917, 476..."
