In [1]:
## Importing the Pandas Module for Analysis
import pandas as pd


In [2]:
## Loading the required files for analyses
schools_load_df = pd.read_csv("Resources\schools_complete.csv")
students_load_df = pd.read_csv("Resources\students_complete.csv")


In [3]:
## Viewing the Schools Data Frame
schools_load_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [4]:
## Viewing the Students Data Frame
students_load_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [5]:
## Merging the two dataframes
schools_complete_df = pd.merge(students_load_df, schools_load_df, on=["school_name","school_name"], how="left")
schools_complete_df.head()


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


# District Summary

In [6]:
## Total Number of Schools
## In this code, we first determine how many different schools there are followed by a count of these unique values. 
total_schools = (schools_complete_df["school_name"].value_counts()).count()

## Total Students
# using the .count() function will provide the total number of students
total_students = schools_complete_df["student_name"].count()

## Total Budget calculated using the .sum() function
## Since the new data frame will contain a the budget value for every row, we will need to re-examine the schools data frame to get the total value of the budget across all schools

total_budget = schools_load_df['budget'].sum()

## Calculating the average math score
average_math_score = schools_complete_df['math_score'].mean()

## Calculating the average reading score
average_reading_score = schools_complete_df['reading_score'].mean()


## Calculating the percentage of students who passed math

passing_math_count = (schools_complete_df.loc[schools_complete_df['math_score']>=70].shape[0])

total_math_count = schools_complete_df.loc[schools_complete_df['math_score']].shape[0]


passing_math_percentage = int(passing_math_count)/int(total_math_count) * 100

## Calculating the percentage of students who passed reading

passing_reading_count = (schools_complete_df.loc[schools_complete_df['reading_score']>=70].shape[0])

total_reading_count = schools_complete_df.loc[schools_complete_df['reading_score']].shape[0]


passing_reading_percentage = int(passing_reading_count)/int(total_reading_count) * 100

## Overall Passing rate for both Math and Reading

math_reading_pass_count = schools_complete_df.loc[(schools_complete_df['math_score']>=70) & (schools_complete_df['reading_score']>=70)].shape[0]

math_reading_pass_percentage = int(math_reading_pass_count)/int(total_students) * 100



In [7]:
## Creating a Summary Table Data Frame

district_summary_df = pd.DataFrame({
    'Total Schools':[total_schools],
    'Total Students': [total_students],
    'Total Budget': [total_budget],
    'Average Math Score': [average_math_score],
    'Average Reading Score': [average_reading_score],
    '% Passing Math': [math_reading_pass_percentage],
    '% Passing Reading': [math_reading_pass_percentage],
    '% Overall Pass': [math_reading_pass_percentage]
})

district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Pass
0,15,39170,24649428,78.985371,81.87784,65.172326,65.172326,65.172326


In [8]:
district_summary_df = district_summary_df.style.format({    'Total Schools':'{:.0f}',
'Total Students':'{:,}',
'Total Budget':'${:,}'
})

In [9]:
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Pass
0,15,39170,"$24,649,428",78.985371,81.87784,65.172326,65.172326,65.172326


# School Summary

## School Names and Types

In [10]:
## School Names and Types
school_summary_df = schools_complete_df[['school_name','type','budget','size']].drop_duplicates().reset_index(drop=True)

school_summary_df

Unnamed: 0,school_name,type,budget,size
0,Huang High School,District,1910635,2917
1,Figueroa High School,District,1884411,2949
2,Shelton High School,Charter,1056600,1761
3,Hernandez High School,District,3022020,4635
4,Griffin High School,Charter,917500,1468
5,Wilson High School,Charter,1319574,2283
6,Cabrera High School,Charter,1081356,1858
7,Bailey High School,District,3124928,4976
8,Holden High School,Charter,248087,427
9,Pena High School,Charter,585858,962


In [11]:
## Calculating the Budget per student and adding the column to the Data Frame
school_summary_df['Per Student Budget'] = school_summary_df['budget']/school_summary_df['size']

school_summary_df

Unnamed: 0,school_name,type,budget,size,Per Student Budget
0,Huang High School,District,1910635,2917,655.0
1,Figueroa High School,District,1884411,2949,639.0
2,Shelton High School,Charter,1056600,1761,600.0
3,Hernandez High School,District,3022020,4635,652.0
4,Griffin High School,Charter,917500,1468,625.0
5,Wilson High School,Charter,1319574,2283,578.0
6,Cabrera High School,Charter,1081356,1858,582.0
7,Bailey High School,District,3124928,4976,628.0
8,Holden High School,Charter,248087,427,581.0
9,Pena High School,Charter,585858,962,609.0


### Data Frame for Each School

In [12]:
huang_school = schools_complete_df.loc[schools_complete_df['school_name']=='Huang High School']

figueroa_school = schools_complete_df.loc[schools_complete_df['school_name']=='Figueroa High School']

shelton_school = schools_complete_df.loc[schools_complete_df['school_name']=='Shelton High School']

hernandez_school = schools_complete_df.loc[schools_complete_df['school_name']=='Griffin High School']

griffin_school = schools_complete_df.loc[schools_complete_df['school_name']=='Griffin High School']

wilson_school = schools_complete_df.loc[schools_complete_df['school_name']=='Wilson High School']

cabrera_school = schools_complete_df.loc[schools_complete_df['school_name']=='Cabrera High School'] 

bailey_school = schools_complete_df.loc[schools_complete_df['school_name']=='Bailey High School']

holden_school = schools_complete_df.loc[schools_complete_df['school_name']=='Holden High School'] 

pena_school = schools_complete_df.loc[schools_complete_df['school_name']=='Pena High School']  

wright_school = schools_complete_df.loc[schools_complete_df['school_name']=='Wright High School']

rodriguez_school = schools_complete_df.loc[schools_complete_df['school_name']=='Rodriguez High School'] 

johnson_school = schools_complete_df.loc[schools_complete_df['school_name']=='Johnson High School']

ford_school = schools_complete_df.loc[schools_complete_df['school_name']=='Ford High School'] 

thomas_school = schools_complete_df.loc[schools_complete_df['school_name']=='Thomas High School']


### Calculating the Average Math Score for Each School

In [13]:
huang_math_score_average = huang_school['math_score'].mean() 

figueroa_math_score_average = figueroa_school['math_score'].mean()

shelton_math_score_average = shelton_school['math_score'].mean() 

hernandez_math_score_average = hernandez_school['math_score'].mean()

griffin_math_score_average = griffin_school['math_score'].mean()

wilson_math_score_average = wilson_school['math_score'].mean() 
cabrera_math_score_average = cabrera_school['math_score'].mean()

bailey_math_score_average = bailey_school['math_score'].mean() 
holden_math_score_average = holden_school['math_score'].mean() 
pena_math_score_average = pena_school['math_score'].mean()

wright_math_score_average = wright_school['math_score'].mean() 
rodriguez_math_score_average = rodriguez_school['math_score'].mean()

johnson_math_score_average = johnson_school['math_score'].mean()

ford_math_score_average = ford_school['math_score'].mean()

thomas_math_score_average = thomas_school['math_score'].mean()



### Combinding the Math Average to a Data Frame

In [14]:
school_math_average = [huang_math_score_average, figueroa_math_score_average, shelton_math_score_average, hernandez_math_score_average, griffin_math_score_average, wilson_math_score_average, cabrera_math_score_average, bailey_math_score_average, holden_math_score_average, pena_math_score_average, wright_math_score_average, rodriguez_math_score_average, johnson_math_score_average, ford_math_score_average, thomas_math_score_average]




### Average Reading Score for Each School

In [15]:
huang_reading_score_average = huang_school['reading_score'].mean() 

figueroa_reading_score_average = figueroa_school['reading_score'].mean()

shelton_reading_score_average = shelton_school['reading_score'].mean() 

hernandez_reading_score_average = hernandez_school['reading_score'].mean()

griffin_reading_score_average = griffin_school['reading_score'].mean()

wilson_reading_score_average = wilson_school['reading_score'].mean() 
cabrera_reading_score_average = cabrera_school['reading_score'].mean()

bailey_reading_score_average = bailey_school['reading_score'].mean() 
holden_reading_score_average = holden_school['reading_score'].mean() 
pena_reading_score_average = pena_school['reading_score'].mean()

wright_reading_score_average = wright_school['reading_score'].mean() 
rodriguez_reading_score_average = rodriguez_school['reading_score'].mean()

johnson_reading_score_average = johnson_school['reading_score'].mean()

ford_reading_score_average = ford_school['reading_score'].mean()

thomas_reading_score_average = thomas_school['reading_score'].mean()



### Combining the Reading Scores into a single data frame

In [16]:
school_reading_average = [huang_reading_score_average, figueroa_reading_score_average, shelton_reading_score_average, hernandez_reading_score_average, griffin_reading_score_average, wilson_reading_score_average, cabrera_reading_score_average, bailey_reading_score_average, holden_reading_score_average, pena_reading_score_average, wright_reading_score_average, rodriguez_reading_score_average, johnson_reading_score_average, ford_reading_score_average, thomas_reading_score_average]




### % Passing Math (The percentage of students that passed math.)

In [17]:
huang_math_percentage_pass = (huang_school.loc[huang_school['math_score']>=70].shape[0]/huang_school.loc[huang_school['math_score']>=0].shape[0])*100

figueroa_math_percentage_pass = (figueroa_school.loc[figueroa_school['math_score']>=70].shape[0]/figueroa_school.loc[figueroa_school['math_score']>=0].shape[0])*100

shelton_math_percentage_pass = (shelton_school.loc[shelton_school['math_score']>=70].shape[0]/shelton_school.loc[shelton_school['math_score']>=0].shape[0])*100

hernandez_math_percentage_pass = (hernandez_school.loc[hernandez_school['math_score']>=70].shape[0]/hernandez_school.loc[hernandez_school['math_score']>=0].shape[0])*100

griffin_math_percentage_pass = (griffin_school.loc[griffin_school['math_score']>=70].shape[0]/griffin_school.loc[griffin_school['math_score']>=0].shape[0])*100

wilson_math_percentage_pass = (wilson_school.loc[wilson_school['math_score']>=70].shape[0]/wilson_school.loc[wilson_school['math_score']>=0].shape[0])*100

cabrera_math_percentage_pass = (cabrera_school.loc[cabrera_school['math_score']>=70].shape[0]/cabrera_school.loc[cabrera_school['math_score']>=0].shape[0])*100

bailey_math_percentage_pass = (bailey_school.loc[bailey_school['math_score']>=70].shape[0]/bailey_school.loc[bailey_school['math_score']>=0].shape[0])*100

holden_math_percentage_pass = (holden_school.loc[holden_school['math_score']>=70].shape[0]/holden_school.loc[holden_school['math_score']>=0].shape[0])*100

pena_math_percentage_pass = (pena_school.loc[pena_school['math_score']>=70].shape[0]/pena_school.loc[pena_school['math_score']>=0].shape[0])*100

wright_math_percentage_pass =  (wright_school.loc[wright_school['math_score']>=70].shape[0]/wright_school.loc[wright_school['math_score']>=0].shape[0])*100

rodriguez_math_percentage_pass = (rodriguez_school.loc[rodriguez_school['math_score']>=70].shape[0]/rodriguez_school.loc[rodriguez_school['math_score']>=0].shape[0])*100

johnson_math_percentage_pass = (johnson_school.loc[johnson_school['math_score']>=70].shape[0]/johnson_school.loc[johnson_school['math_score']>=0].shape[0])*100


ford_math_percentage_pass = (ford_school.loc[ford_school['math_score']>=70].shape[0]/ford_school.loc[ford_school['math_score']>=0].shape[0])*100

thomas_math_percentage_pass = (thomas_school.loc[thomas_school['math_score']>=70].shape[0]/thomas_school.loc[thomas_school['math_score']>=0].shape[0])*100




### Combining The lists for Math Percentage Pass for each school

In [18]:
school_math_percentage_pass = [huang_math_percentage_pass,figueroa_math_percentage_pass,shelton_math_percentage_pass,hernandez_math_percentage_pass,griffin_math_percentage_pass,wilson_math_percentage_pass,cabrera_math_percentage_pass,bailey_math_percentage_pass,holden_math_percentage_pass,pena_math_percentage_pass,wright_math_percentage_pass,rodriguez_math_percentage_pass,johnson_math_percentage_pass,ford_math_percentage_pass,thomas_math_percentage_pass]





### % Passing Reading (The percentage of students that passed reading.)

In [19]:
huang_reading_percentage_pass = (huang_school.loc[huang_school['reading_score']>=70].shape[0]/huang_school.loc[huang_school['reading_score']>=0].shape[0])*100

figueroa_reading_percentage_pass = (figueroa_school.loc[figueroa_school['reading_score']>=70].shape[0]/figueroa_school.loc[figueroa_school['reading_score']>=0].shape[0])*100

shelton_reading_percentage_pass = (shelton_school.loc[shelton_school['reading_score']>=70].shape[0]/shelton_school.loc[shelton_school['reading_score']>=0].shape[0])*100

hernandez_reading_percentage_pass = (hernandez_school.loc[hernandez_school['reading_score']>=70].shape[0]/hernandez_school.loc[hernandez_school['reading_score']>=0].shape[0])*100

griffin_reading_percentage_pass = (griffin_school.loc[griffin_school['reading_score']>=70].shape[0]/griffin_school.loc[griffin_school['reading_score']>=0].shape[0])*100

wilson_reading_percentage_pass = (wilson_school.loc[wilson_school['reading_score']>=70].shape[0]/wilson_school.loc[wilson_school['reading_score']>=0].shape[0])*100

cabrera_reading_percentage_pass = (cabrera_school.loc[cabrera_school['reading_score']>=70].shape[0]/cabrera_school.loc[cabrera_school['reading_score']>=0].shape[0])*100

bailey_reading_percentage_pass = (bailey_school.loc[bailey_school['reading_score']>=70].shape[0]/bailey_school.loc[bailey_school['reading_score']>=0].shape[0])*100

holden_reading_percentage_pass = (holden_school.loc[holden_school['reading_score']>=70].shape[0]/holden_school.loc[holden_school['reading_score']>=0].shape[0])*100

pena_reading_percentage_pass = (pena_school.loc[pena_school['reading_score']>=70].shape[0]/pena_school.loc[pena_school['reading_score']>=0].shape[0])*100

wright_reading_percentage_pass =  (wright_school.loc[wright_school['reading_score']>=70].shape[0]/wright_school.loc[wright_school['reading_score']>=0].shape[0])*100

rodriguez_reading_percentage_pass = (rodriguez_school.loc[rodriguez_school['reading_score']>=70].shape[0]/rodriguez_school.loc[rodriguez_school['reading_score']>=0].shape[0])*100

johnson_reading_percentage_pass = (johnson_school.loc[johnson_school['reading_score']>=70].shape[0]/johnson_school.loc[johnson_school['reading_score']>=0].shape[0])*100


ford_reading_percentage_pass = (ford_school.loc[ford_school['reading_score']>=70].shape[0]/ford_school.loc[ford_school['reading_score']>=0].shape[0])*100

thomas_reading_percentage_pass = (thomas_school.loc[thomas_school['reading_score']>=70].shape[0]/thomas_school.loc[thomas_school['reading_score']>=0].shape[0])*100




### Combining The lists for Reading Percentage Pass for each school

In [20]:
school_reading_percentage_pass =[huang_reading_percentage_pass,figueroa_reading_percentage_pass,shelton_reading_percentage_pass,hernandez_reading_percentage_pass,griffin_reading_percentage_pass,wilson_reading_percentage_pass,cabrera_reading_percentage_pass,bailey_reading_percentage_pass,holden_reading_percentage_pass,pena_reading_percentage_pass,wright_reading_percentage_pass,rodriguez_reading_percentage_pass,johnson_reading_percentage_pass,ford_reading_percentage_pass,thomas_reading_percentage_pass]





### % Overall Passing (The percentage of students that passed math **and** reading.

In [21]:
huang_math_reading_percentage = (huang_school.loc[(huang_school['math_score']>=70) & (huang_school['reading_score']>=70)].shape[0])/(huang_school.loc[(huang_school['math_score']>=0) & (huang_school['reading_score']>=0)].shape[0])*100

figueroa_math_reading_percentage = (figueroa_school.loc[(figueroa_school['math_score']>=70) & (figueroa_school['reading_score']>=70)].shape[0])/(figueroa_school.loc[(figueroa_school['math_score']>=0) & (figueroa_school['reading_score']>=0)].shape[0])*100

shelton_math_reading_percentage = (shelton_school.loc[(shelton_school['math_score']>=70) & (shelton_school['reading_score']>=70)].shape[0])/(shelton_school.loc[(shelton_school['math_score']>=0) & (shelton_school['reading_score']>=0)].shape[0])*100

hernandez_math_reading_percentage = (hernandez_school.loc[(hernandez_school['math_score']>=70) & (hernandez_school['reading_score']>=70)].shape[0])/(hernandez_school.loc[(hernandez_school['math_score']>=0) & (hernandez_school['reading_score']>=0)].shape[0])*100

griffin_math_reading_percentage = (griffin_school.loc[(griffin_school['math_score']>=70) & (griffin_school['reading_score']>=70)].shape[0])/(griffin_school.loc[(griffin_school['math_score']>=0) & (griffin_school['reading_score']>=0)].shape[0])*100

wilson_math_reading_percentage = (wilson_school.loc[(wilson_school['math_score']>=70) & (wilson_school['reading_score']>=70)].shape[0])/(wilson_school.loc[(wilson_school['math_score']>=0) & (wilson_school['reading_score']>=0)].shape[0])*100

cabrera_math_reading_percentage = (cabrera_school.loc[(cabrera_school['math_score']>=70) & (cabrera_school['reading_score']>=70)].shape[0])/(cabrera_school.loc[(cabrera_school['math_score']>=0) & (cabrera_school['reading_score']>=0)].shape[0])*100

bailey_math_reading_percentage = (bailey_school.loc[(bailey_school['math_score']>=70) & (bailey_school['reading_score']>=70)].shape[0])/(bailey_school.loc[(bailey_school['math_score']>=0) & (bailey_school['reading_score']>=0)].shape[0])*100

holden_math_reading_percentage = (holden_school.loc[(holden_school['math_score']>=70) & (holden_school['reading_score']>=70)].shape[0])/(holden_school.loc[(holden_school['math_score']>=0) & (holden_school['reading_score']>=0)].shape[0])*100

pena_math_reading_percentage = (pena_school.loc[(pena_school['math_score']>=70) & (pena_school['reading_score']>=70)].shape[0])/(pena_school.loc[(pena_school['math_score']>=0) & (pena_school['reading_score']>=0)].shape[0])*100

wright_math_reading_percentage = (wright_school.loc[(wright_school['math_score']>=70) & (wright_school['reading_score']>=70)].shape[0])/(wright_school.loc[(wright_school['math_score']>=0) & (wright_school['reading_score']>=0)].shape[0])*100

rodriguez_math_reading_percentage = (rodriguez_school.loc[(rodriguez_school['math_score']>=70) & (rodriguez_school['reading_score']>=70)].shape[0])/(rodriguez_school.loc[(rodriguez_school['math_score']>=0) & (rodriguez_school['reading_score']>=0)].shape[0])*100

johnson_math_reading_percentage = (johnson_school.loc[(johnson_school['math_score']>=70) & (johnson_school['reading_score']>=70)].shape[0])/(johnson_school.loc[(johnson_school['math_score']>=0) & (johnson_school['reading_score']>=0)].shape[0])*100

ford_math_reading_percentage = (ford_school.loc[(ford_school['math_score']>=70) & (ford_school['reading_score']>=70)].shape[0])/(ford_school.loc[(ford_school['math_score']>=0) & (ford_school['reading_score']>=0)].shape[0])*100

thomas_math_reading_percentage = (thomas_school.loc[(thomas_school['math_score']>=70) & (thomas_school['reading_score']>=70)].shape[0])/(thomas_school.loc[(thomas_school['math_score']>=0) & (thomas_school['reading_score']>=0)].shape[0])*100




### Combining the Overall Pass List

In [22]:
school_math_reading_percentage =[huang_math_reading_percentage,figueroa_math_reading_percentage,shelton_math_reading_percentage,hernandez_math_reading_percentage,griffin_math_reading_percentage,wilson_math_reading_percentage,cabrera_math_reading_percentage,bailey_math_reading_percentage,holden_math_reading_percentage,pena_math_reading_percentage,wright_math_reading_percentage,rodriguez_math_reading_percentage,johnson_math_reading_percentage,ford_math_reading_percentage,thomas_math_reading_percentage]



