In [1]:
## Importing the Pandas Module for Analysis
import pandas as pd


In [2]:
## Loading the required files for analyses
schools_load_df = pd.read_csv("Resources\schools_complete.csv")
students_load_df = pd.read_csv("Resources\students_complete.csv")


In [3]:
## Viewing the Schools Data Frame
schools_load_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [4]:
## Viewing the Students Data Frame
students_load_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [5]:
## Merging the two dataframes
schools_complete_df = pd.merge(students_load_df, schools_load_df, on=["school_name","school_name"], how="left")
schools_complete_df.head()


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


# District Summary

In [6]:
## Total Number of Schools
## In this code, we first determine how many different schools there are followed by a count of these unique values. 
total_schools = (schools_complete_df["school_name"].value_counts()).count()

## Total Students
# using the .count() function will provide the total number of students
total_students = schools_complete_df["student_name"].count()

## Total Budget calculated using the .sum() function
## Since the new data frame will contain a the budget value for every row, we will need to re-examine the schools data frame to get the total value of the budget across all schools

total_budget = schools_load_df['budget'].sum()

## Calculating the average math score
average_math_score = schools_complete_df['math_score'].mean()

## Calculating the average reading score
average_reading_score = schools_complete_df['reading_score'].mean()


## Calculating the percentage of students who passed math

passing_math_count = (schools_complete_df.loc[schools_complete_df['math_score']>=70].shape[0])

total_math_count = schools_complete_df.loc[schools_complete_df['math_score']].shape[0]


passing_math_percentage = int(passing_math_count)/int(total_math_count) * 100

## Calculating the percentage of students who passed reading

passing_reading_count = (schools_complete_df.loc[schools_complete_df['reading_score']>=70].shape[0])

total_reading_count = schools_complete_df.loc[schools_complete_df['reading_score']].shape[0]


passing_reading_percentage = int(passing_reading_count)/int(total_reading_count) * 100

## Overall Passing rate for both Math and Reading

math_reading_pass_count = schools_complete_df.loc[(schools_complete_df['math_score']>=70) & (schools_complete_df['reading_score']>=70)].shape[0]

math_reading_pass_percentage = int(math_reading_pass_count)/int(total_students) * 100



In [7]:
## Creating a Summary Table Data Frame

district_summary_df = pd.DataFrame({
    'Total Schools':[total_schools],
    'Total Students': [total_students],
    'Total Budget': [total_budget],
    'Average Math Score': [average_math_score],
    'Average Reading Score': [average_reading_score],
    '% Passing Math': [math_reading_pass_percentage],
    '% Passing Reading': [math_reading_pass_percentage],
    '% Overall Pass': [math_reading_pass_percentage]
})

district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Pass
0,15,39170,24649428,78.985371,81.87784,65.172326,65.172326,65.172326


In [8]:
district_summary_df = district_summary_df.style.format({    'Total Schools':'{:.0f}',
'Total Students':'{:,}',
'Total Budget':'${:,}'
})

In [9]:
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Pass
0,15,39170,"$24,649,428",78.985371,81.87784,65.172326,65.172326,65.172326


# School Summary

In [12]:
## Creating a seperate Data Frame for Each School
huang_school = schools_complete_df.loc[schools_complete_df['school_name']=='Huang High School']

figueroa_school = schools_complete_df.loc[schools_complete_df['school_name']=='Figueroa High School']

shelton_school = schools_complete_df.loc[schools_complete_df['school_name']=='Shelton High School']

hernandez_school = schools_complete_df.loc[schools_complete_df['school_name']=='Griffin High School']

griffin_school = schools_complete_df.loc[schools_complete_df['school_name']=='Griffin High School']

wilson_school = schools_complete_df.loc[schools_complete_df['school_name']=='Wilson High School']

cabrera_school = schools_complete_df.loc[schools_complete_df['school_name']=='Cabrera High School'] 

bailey_school = schools_complete_df.loc[schools_complete_df['school_name']=='Bailey High School']

holden_school = schools_complete_df.loc[schools_complete_df['school_name']=='Holden High School'] 

pena_school = schools_complete_df.loc[schools_complete_df['school_name']=='Pena High School']  

wright_school = schools_complete_df.loc[schools_complete_df['school_name']=='Wright High School']

rodriguez_school = schools_complete_df.loc[schools_complete_df['school_name']=='Rodriguez High School'] 

johnson_school = schools_complete_df.loc[schools_complete_df['school_name']=='Johnson High School']

ford_school = schools_complete_df.loc[schools_complete_df['school_name']=='Ford High School'] 

thomas_school = schools_complete_df.loc[schools_complete_df['school_name']=='Thomas High School']


In [19]:
## School Name List

school_names_types_budget = schools_complete_df[['school_name','type','budget']].drop_duplicates().reset_index(drop=True)

school_names_types_budget



Unnamed: 0,school_name,type,budget
0,Huang High School,District,1910635
1,Figueroa High School,District,1884411
2,Shelton High School,Charter,1056600
3,Hernandez High School,District,3022020
4,Griffin High School,Charter,917500
5,Wilson High School,Charter,1319574
6,Cabrera High School,Charter,1081356
7,Bailey High School,District,3124928
8,Holden High School,Charter,248087
9,Pena High School,Charter,585858
