# PyCity Schools Analysis

- Type your analysis


In [229]:
# Import appropriate modules

import pandas as pd
from pathlib import Path

# Read the csv files and merge them in to a DataFrame

school_data_path = Path("Resources/schools_complete.csv")
student_data_path = Path("Resources/students_complete.csv")

# Setup separate DataFrames for school and student data
school_data = pd.read_csv(school_data_path)
student_data = pd.read_csv(student_data_path)

# Merge the DataFrames by school name
school_student_df = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_student_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


# District Summary

In [230]:
# Find the total number of unique schools

school_count = len(school_student_df["school_name"].unique())
school_count

15

In [231]:
# Find the total number of students

student_count = len(school_student_df["student_name"])
student_count

39170

In [232]:
# Find the total budget

total_budget = school_data["budget"].sum()
total_budget

24649428

In [233]:
# Find the average math score

average_math_score = round(school_student_df["math_score"].mean(), 2)
average_math_score

78.99

In [234]:
# Find the average reading score

average_reading_score = round(school_student_df["reading_score"].mean(), 2)
average_reading_score

81.88

In [235]:
# Find percentage of students passing math. Passing grade is 70 or higher

math_pass = school_student_df.loc[(school_student_df["math_score"]) >= 70, :]["math_score"].count()
per_math_pass = round(math_pass / student_count * 100, 2)
per_math_pass

74.98

In [236]:
# Find percentage of students passing reading. Passing grade is 70 or higher

read_pass = school_student_df.loc[(school_student_df["reading_score"]) >= 70, :]["reading_score"].count()
per_read_pass = round(read_pass / student_count * 100, 2)
per_read_pass

85.81

In [237]:
# Find the percentage of students who passed math and reading

overall_pass = school_student_df.loc[((school_student_df["reading_score"]) >= 70) & 
                                     ((school_student_df["math_score"]) >= 70), :]["reading_score"].count()

per_overall_pass = round(overall_pass / student_count * 100, 2)

per_overall_pass

65.17

In [238]:
# Create DataFrame for district's key metrics

district_summary = pd.DataFrame([{"Total Schools": school_count, "Total Students": student_count, "Total Budget": total_budget,
                                  "Average Math Score": average_math_score, "Average Reading Score": average_reading_score,
                                  "% Passing Math": per_math_pass, "%Passing Reading": per_read_pass, "% Overall Passing": per_overall_pass}])

# Formatting
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,%Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98,85.81,65.17


# School Summary

In [239]:
# List out the unique school names. This can be useful later for looping

school_list = school_data["school_name"].unique()
school_list

# Group the data by the school name
school_summary_df = school_student_df.groupby("school_name")


#### School type

In [240]:
# Create dictionaries to store data for the school type per school. Keys are the school names, values are school type
school_type = {}

# Loop through school_data store in th e school_type dictionary
for i in range(len(school_data)):
    school_type[school_data.at[i, "school_name"]] = school_data.at[i, "type"]

# Store it in a Series
school_types = pd.Series(school_type)
school_types

Huang High School        District
Figueroa High School     District
Shelton High School       Charter
Hernandez High School    District
Griffin High School       Charter
Wilson High School        Charter
Cabrera High School       Charter
Bailey High School       District
Holden High School        Charter
Pena High School          Charter
Wright High School        Charter
Rodriguez High School    District
Johnson High School      District
Ford High School         District
Thomas High School        Charter
dtype: object

#### Total Students

In [241]:
# Lists the number of students in each school using value_counts

stu_per_school = school_student_df["school_name"].value_counts()
stu_per_school


Bailey High School       4976
Johnson High School      4761
Hernandez High School    4635
Rodriguez High School    3999
Figueroa High School     2949
Huang High School        2917
Ford High School         2739
Wilson High School       2283
Cabrera High School      1858
Wright High School       1800
Shelton High School      1761
Thomas High School       1635
Griffin High School      1468
Pena High School          962
Holden High School        427
Name: school_name, dtype: int64

In [242]:
# Code could be useful later?

# Lists the number of students in each school using a for loop and creating a DataFrame

# per_school_count = {}
# per_school_count = {"School": [], "Count": []}

# for school in school_list:
#     per_school_count["School"].append(school)
#     per_school_count["Count"].append(school_student_df.loc[school_student_df["school_name"] == school, :]
#                                      ["student_name"].count())

# per_school_count["Count"]
# per_school_count_df = pd.DataFrame(per_school_count)

# per_school_count_df

#### Budget per School

In [243]:
# # Create dictionaries to store data for the budget per school. Keys are the school names, values are budget
# # and budget per capita respectively
# budg_schools = {}          # Dictionary for budget
# budg_schools_capita = {}   # Dictionary for budget per capita

# # Loop through school_data and add to the school budget and school budget per capita 
# for i in range(len(school_data)):
#     budg_schools[school_data.at[i, "school_name"]] = school_data.at[i, "budget"]
#     budg_schools_capita[school_data.at[i, "school_name"]] = int(school_data.at[i, "budget"]) / stu_per_school[school_data.at[i, "school_name"]]

# # Create a Series for each of budget per schoool and budget per capita per school
# per_school_budget = pd.Series(budg_schools)
# per_school_capita = pd.Series(budg_schools_capita)

# # 
# per_school_budget
# per_school_capita




#### Average math score

In [248]:
# Find average math score for each school

per_school_av_math = school_summary_df[["math_score"]].mean()
per_school_av_math = per_school_av_math.rename(columns={"math_score": "Average Math Score"})
type(per_school_av_math)
per_school_av_math

Unnamed: 0_level_0,Average Math Score
school_name,Unnamed: 1_level_1
Bailey High School,77.048432
Cabrera High School,83.061895
Figueroa High School,76.711767
Ford High School,77.102592
Griffin High School,83.351499
Hernandez High School,77.289752
Holden High School,83.803279
Huang High School,76.629414
Johnson High School,77.072464
Pena High School,83.839917


#### Average Reading score

In [249]:
# Find average reading score for each school
per_school_av_read = school_summary_df[["reading_score"]].mean()
per_school_av_read = per_school_av_read.rename(columns={"reading_score": "Average Reading Score"})
type(per_school_av_read)
per_school_av_read

Unnamed: 0_level_0,Average Reading Score
school_name,Unnamed: 1_level_1
Bailey High School,81.033963
Cabrera High School,83.97578
Figueroa High School,81.15802
Ford High School,80.746258
Griffin High School,83.816757
Hernandez High School,80.934412
Holden High School,83.814988
Huang High School,81.182722
Johnson High School,80.966394
Pena High School,84.044699
