In [457]:
 # Dependencies and Setup
import pandas as pd

In [479]:
# Files to Load
student_input = "Resources/students_complete.csv"
school_input = "Resources/schools_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
student_data = pd.read_csv(student_input)
school_data = pd.read_csv(school_input)

# Combine the data into a single dataset.  
df = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])


## District Summary

**Create a high-level snapshot, in a DataFrame, of the district's key metrics, including the following:**

- Total schools
- Total students
- Total budget
- Average math score
- Average reading score
- % passing math (the percentage of students who passed math)
- % passing reading (the percentage of students who passed reading)
- % overall passing (the percentage of students who passed math AND reading)

In [480]:
# calculate totals for schools and students
total_schools = len(df["school_name"].unique())
total_students = df["Student ID"].count()

# calculate the total budget
total_budget = school_data["budget"].sum()

In [481]:
# calculate average scores
avg_math = df["math_score"].mean()
avg_reading = df["reading_score"].mean()

In [482]:
# calculate % passing for math
pass_math = df.loc[(df["math_score"] >= 70.0)].math_score.count()
percent_pass_math = (pass_math / total_students) * 100

In [483]:
# calculate % passing for reading
pass_reading = df.loc[(df["reading_score"] >= 70.0)].reading_score.count()
percent_pass_reading = (pass_reading / total_students) * 100

In [484]:
# calculate % passing for both
pass_both = df.loc[(df["math_score"] >= 70.0) & (df["reading_score"] >= 70.0)].reading_score.count()
percent_pass_both = (pass_both / total_students) * 100

In [485]:
district_df = pd.DataFrame(columns=('Total Schools','Total Students','Total Budget','Average Math Score','Average Reading Score','% Passing Math','% Passing Reading','% Overall Passing'))

district_df.loc[0] = [total_schools,total_students,total_budget,avg_math,avg_reading,percent_pass_math,percent_pass_reading,percent_pass_both]

district_df["Total Schools"] = district_df["Total Schools"].astype(int)
district_df["Total Students"] = district_df["Total Students"].astype(int)
district_df["Total Budget"] = district_df["Total Budget"].map("${:,.2f}".format)
district_df["% Passing Math"] = district_df["% Passing Math"].map("{:.5f}%".format)
district_df["% Passing Reading"] = district_df["% Passing Reading"].map("{:.5f}%".format)
district_df["% Overall Passing"] = district_df["% Overall Passing"].map("{:.5f}%".format)

district_df.head()

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.98085%,85.80546%,65.17233%


## School Summary

**Create a DataFrame that summarizes key metrics about each school, including the following:**

- School name
- School type
- Total students
- Total school budget
- Per student budget
- Average math score
- Average reading score
- % passing math (the percentage of students who passed math)
- % passing reading (the percentage of students who passed reading)
- % overall passing (the percentage of students who passed math AND reading)

In [531]:
# group by school / school name is index
# used groupby / agg to count student names and find avg for math and reading scores
studentcount_avgs_df = df.set_index(["school_name"]).sort_index()
studentcount_avgs_df = studentcount_avgs_df.groupby("school_name").agg({'student_name': 'count', 'math_score': 'mean', 'reading_score': 'mean'})
studentcount_avgs_df = studentcount_avgs_df.rename(columns={"student_name":"Total Students", "math_score":"Average Math Score", "reading_score":"Average Reading Score"})

In [532]:
# create df with school name as index, with school type and budget
grouped_school_data_df = school_data.set_index(["school_name"]).sort_index()
grouped_school_data_df = grouped_school_data_df[(["type", "budget"])]
grouped_school_data_df = grouped_school_data_df.rename(columns={"type":"School Type", "budget": "Total School Budget"})

# merge this data pulled from school_data w/ other merged dataframes
merged_dfs = pd.merge(grouped_school_data_df,studentcount_avgs_df, on="school_name")


In [533]:
# create mask to only include data with students who passed math
passed_math_df = df.loc[(df["math_score"] >= 70.0)]
# create df with school name as index, with count of test scores 70 or higher in math
math_grouped_df = passed_math_df.groupby("school_name").agg({'math_score': 'count'})
math_grouped_df = math_grouped_df.rename(columns={"math_score":"count_passed_math"})

# create mask to only include data with students who passed reading
passed_reading_df = df.loc[(df["reading_score"] >= 70.0)]
# create df with school name as index, with count of test scores 70 or higher in reading
reading_grouped_df = passed_reading_df.groupby("school_name").agg({'reading_score': 'count'})
reading_grouped_df = reading_grouped_df.rename(columns={"reading_score":"count_passed_reading"})

# create mask to only include data with students who passed both math & reading
passed_both_df = df.loc[(df["math_score"] >= 70.0) & (df["reading_score"] >= 70.0)]
# create df with school name as index, with count of test scores 70 or higher in both math & reading
both_grouped_df = passed_both_df.groupby("school_name").agg({'reading_score': 'count'})
both_grouped_df = both_grouped_df.rename(columns={"reading_score":"count_passed_overall"})


In [534]:
# merge dataframes created so far
merged_dfs = pd.merge(merged_dfs, math_grouped_df, on="school_name")
merged_dfs = pd.merge(merged_dfs,reading_grouped_df, on="school_name")
merged_dfs = pd.merge(merged_dfs,both_grouped_df, on="school_name")

# reorder
merged_dfs = merged_dfs[["School Type", "Total Students", "Total School Budget", "Average Math Score","Average Reading Score", "count_passed_math", "count_passed_reading", "count_passed_overall"]]

In [535]:
# add new column, calculating per student budget
merged_dfs["Per Student Budget"] = merged_dfs["Total School Budget"] / merged_dfs["Total Students"]

# add new column, calculating % passed math
merged_dfs["% Passing Math"] = (merged_dfs["count_passed_math"] / merged_dfs["Total Students"]) * 100

# add new column, calculating % passed reading
merged_dfs["% Passing Reading"] = (merged_dfs["count_passed_reading"] / merged_dfs["Total Students"]) * 100

# add new column, calculating % passed math & reading
merged_dfs["% Overall Passing"] = (merged_dfs["count_passed_overall"] / merged_dfs["Total Students"]) * 100

In [536]:
# remove extra columns
school_summary_df = merged_dfs.drop(columns=["count_passed_math","count_passed_reading","count_passed_overall"])

# rearrange columns
school_summary_df = school_summary_df[["School Type", "Total Students", "Total School Budget", "Per Student Budget", "Average Math Score","Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]]

In [537]:
# format columns
school_summary_df["Total School Budget"] = school_summary_df["Total School Budget"].map("${:,.2f}".format)
school_summary_df["Per Student Budget"] = school_summary_df["Per Student Budget"].map("${:,.2f}".format)
school_summary_df["% Passing Math"] = school_summary_df["% Passing Math"].map("{:.5f}%".format)
school_summary_df["% Passing Reading"] = school_summary_df["% Passing Reading"].map("{:.5f}%".format)
school_summary_df["% Overall Passing"] = school_summary_df["% Overall Passing"].map("{:.5f}%".format)

In [538]:
school_summary_df.head()

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.68006%,81.93328%,54.64228%
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.13348%,97.03983%,91.33477%
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.98847%,80.73923%,53.20448%
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.30960%,79.29901%,54.28989%
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.39237%,97.13896%,90.59946%


## Highest-Performing Schools (by % Overall Passing)

Create a DataFrame that highlights the top 5 performing schools based on % Overall Passing. Include the following metrics:

* School name
* School type
* Total students
* Total school budget
* Per student budget
* Average math score
* Average reading score
* % passing math (the percentage of students who passed math)
* % passing reading (the percentage of students who passed reading)
* % overall passing (the percentage of students who passed math AND reading)

## Lowest-Performing Schools (by % Overall Passing)

Create a DataFrame that highlights the bottom 5 performing schools based on % Overall Passing. Include the following metrics:

* School name
* School type
* Total students
* Total school budget
* Per student budget
* Average math score
* Average reading score
* % passing math (the percentage of students who passed math)
* % passing reading (the percentage of students who passed reading)
* % overall passing (the percentage of students who passed math AND reading)

## Math Scores by Grade

Create a DataFrame that lists the average math score for students of each grade level (9th, 10th, 11th, 12th) at each school.

## Reading Scores by Grade

Create a DataFrame that lists the average reading score for students of each grade level (9th, 10th, 11th, 12th) at each school.

## Scores by School Spending

Create a table that breaks down school performance based on average spending ranges (per student). Use your judgment to create four bins with reasonable cutoff values to group school spending. Include the following metrics in the table:

* Average math score
* Average reading score
* % passing math (the percentage of students who passed math)
* % passing reading (the percentage of students who passed reading)
* % overall passing (the percentage of students who passed math AND reading)

## Scores by School Size

Create a table that breaks down school performance based on school size (small, medium, or large).

In [None]:
#group by size

## Scores by School Type

Create a table that breaks down school performance based on type of school (district or charter).

In [None]:
#group by type