### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [31]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
schools_and_students_df = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

schools_and_students_df.tail(20)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
39150,39150,Jennifer Hamilton,F,11th,Thomas High School,80,75,14,Charter,1635,1043130
39151,39151,Shannon Williams,F,10th,Thomas High School,84,73,14,Charter,1635,1043130
39152,39152,Lori Moore,F,9th,Thomas High School,98,84,14,Charter,1635,1043130
39153,39153,William Hubbard,M,9th,Thomas High School,80,75,14,Charter,1635,1043130
39154,39154,Bradley Johnson,M,12th,Thomas High School,91,71,14,Charter,1635,1043130
39155,39155,John Brooks,M,10th,Thomas High School,92,98,14,Charter,1635,1043130
39156,39156,Stephanie Contreras,F,11th,Thomas High School,79,95,14,Charter,1635,1043130
39157,39157,Kristen Gonzalez,F,9th,Thomas High School,79,94,14,Charter,1635,1043130
39158,39158,Kari Holloway,F,10th,Thomas High School,87,90,14,Charter,1635,1043130
39159,39159,Kimberly Cabrera,F,11th,Thomas High School,85,72,14,Charter,1635,1043130


## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Calculate the percentage of students who passed math **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [32]:
#number of schools
#listing out all the unique schools
list_of_schools = schools_and_students_df['school_name'].unique()
#using len to find the number of schools w/in the unique list
num_schools = len(list_of_schools)
num_schools

15

In [33]:
#number of students
#using length of student ID column
num_students = len(schools_and_students_df['Student ID'])
num_students

39170

In [34]:
#total budget
#sum the budget column
total_budget = schools_and_students_df['budget'].sum()
total_budget

82932329558

In [35]:
#avg mathematics score
avg_maths = schools_and_students_df['math_score'].mean()
avg_maths 

78.98537145774827

In [36]:
#avg reading
avg_reading = schools_and_students_df['reading_score'].mean()
avg_reading 

81.87784018381414

In [37]:
#% passing maths
# number of students passing/total students
#boolean mask to filter students >70
passing_maths_mask = schools_and_students_df['math_score'] > 70
#creating a df with that mask
students_passing_maths_df = schools_and_students_df.loc[passing_maths_mask, :]
#finding the length of the df of students passing (=number of students passing)
students_passing_maths = len(schools_and_students_df.loc[passing_maths_mask, :])
#applying % formula
maths_percent_passing = students_passing_maths/num_students * 100
maths_percent_passing 

72.39213683941792

In [38]:
#% passing reading
# number of students passing/total students
#boolean mask to filter students >70
passing_reading_mask = schools_and_students_df['reading_score'] > 70
#creating a df with that mask
students_passing_reading_df = schools_and_students_df.loc[passing_reading_mask, :]
#finding the length of the df of students passing (=number of students passing)
students_passing_reading = len(schools_and_students_df.loc[passing_reading_mask, :])
#applying % formula
reading_percent_passing = students_passing_reading/num_students * 100
reading_percent_passing 

82.97166198621395

In [39]:
#% passing both
# number of students passing/total students
#boolean mask to filter students >70
passing_both_mask = (schools_and_students_df['math_score'] > 70) & (schools_and_students_df['reading_score'] > 70)
#creating a df with that mask
students_passing_both_df = schools_and_students_df.loc[passing_both_mask, :]
#finding the length of the df of students passing (=number of students passing)
students_passing_both = len(schools_and_students_df.loc[passing_both_mask, :])
#applying % formula
overall_percent_passing = students_passing_both/num_students * 100
overall_percent_passing 

60.801633903497574

In [40]:
#district summary
#creating a dictionary of lists to hold the columns
summary_dict = {'total_schools': [num_schools], 'total_students': [num_students], 'total_budget': [total_budget], 'average_math_score': [avg_maths], 'average_reading_score': [avg_reading], 'percent_passing_math': [maths_percent_passing], 'percent_passing_reading': [reading_percent_passing], 'overall_percent_passing': [overall_percent_passing]}
#creating the df
district_summary_df = pd.DataFrame(summary_dict)
#formatting the columns
#integers needing commas
district_summary_df.iloc[:, 1:3] = district_summary_df.iloc[:, 1:3].applymap('{:,}'.format)
#decimals needing rounding
district_summary_df.iloc[:, 3:5] = district_summary_df.iloc[:, 3:5].applymap('{:.2f}'.format)
#decimals needing percent signs and rounding
district_summary_df.iloc[:, 5:8] = district_summary_df.iloc[:, 5:8].applymap('{:.2f}%'.format)
district_summary_df

Unnamed: 0,total_schools,total_students,total_budget,average_math_score,average_reading_score,percent_passing_math,percent_passing_reading,overall_percent_passing
0,15,39170,82932329558,78.99,81.88,72.39%,82.97%,60.80%


## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * % Overall Passing (The percentage of students that passed math **and** reading.)
  
* Create a dataframe to hold the above results

In [105]:
school_info_dict = {}
for x in range(0, num_students):
    if schools_and_students_df.iloc[x, 4] != schools_and_students_df.iloc[(x-1), 4]:
        school_name = schools_and_students_df.iloc[x, 4]
        school_type = schools_and_students_df.iloc[x, 8]
        students_per_school = schools_and_students_df.iloc[x, 9]
        total_budget = schools_and_students_df.iloc[x, 10] 
        per_student_budget = total_budget/students_per_school
        school_info_dict[school_name] = [school_type, students_per_school, total_budget, per_student_budget]
school_info_dict

15


15

In [160]:
#grouped by object
grouped_schools_df = schools_and_students_df.groupby('school_name')
#collecting school name
school_name = grouped_schools_df['school_name'].unique()
#school type
school_type = grouped_schools_df['type'].unique()
#total students
total_students = grouped_schools_df['Student ID'].count()
#total budget
total_budget = grouped_schools_df['budget'].unique()
#converting total budget to a number
total_budget = total_budget.astype('float64')
#budget per student
budget_per_student = total_budget/total_students
#finding avg maths scores - sum of scores over total students
maths_scores_sum = grouped_schools_df['math_score'].sum()
avg_maths_score = maths_scores_sum/total_students
#finding avg reading scores - sum of scores over total students
reading_scores_sum = grouped_schools_df['reading_score'].sum()
avg_reading_score = reading_scores_sum/total_students

grouped_schools_df['math_score'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bailey High School,4976.0,77.048432,12.908393,55.0,66.0,77.0,88.0,99.0
Cabrera High School,1858.0,83.061895,9.245293,68.0,75.0,83.0,91.0,99.0
Figueroa High School,2949.0,76.711767,12.986908,55.0,65.0,76.0,88.0,99.0
Ford High School,2739.0,77.102592,12.789984,55.0,66.0,77.0,88.0,99.0
Griffin High School,1468.0,83.351499,9.236004,68.0,75.0,83.0,91.0,99.0
Hernandez High School,4635.0,77.289752,13.178957,55.0,66.0,77.0,89.0,99.0
Holden High School,427.0,83.803279,9.34215,68.0,76.0,84.0,93.0,99.0
Huang High School,2917.0,76.629414,12.947801,55.0,66.0,76.0,88.0,99.0
Johnson High School,4761.0,77.072464,13.101575,55.0,66.0,77.0,89.0,99.0
Pena High School,962.0,83.839917,9.326506,68.0,75.0,84.0,92.0,99.0


In [101]:
school_names = schools_and_students_df['school_name']
number_of_students = schools_and_students_df['school_name'].value_counts()
number_of_students
#schools_and_students_df['size'].unique()
#schools_and_students_df['type'].unique()
#grouped_schools_df = schools_and_students_df.groupby('school_name')
#grouped_schools_df["type"].unique()
#grouped_schools_df['Student ID'].count()

0         Huang High School
1         Huang High School
2         Huang High School
3         Huang High School
4         Huang High School
                ...        
39165    Thomas High School
39166    Thomas High School
39167    Thomas High School
39168    Thomas High School
39169    Thomas High School
Name: school_name, Length: 39170, dtype: object

## Top Performing Schools (By % Overall Passing)

* Sort and display the top five performing schools by % overall passing.

## Bottom Performing Schools (By % Overall Passing)

* Sort and display the five worst-performing schools by % overall passing.

## Math Scores by Grade

* Create a table that lists the average Math Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

## Reading Score by Grade 

* Perform the same operations as above for reading scores

## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

## Scores by School Size

* Perform the same operations as above, based on school size.

## Scores by School Type

* Perform the same operations as above, based on school type