In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [2]:
#look at the first five rows of the dataframe
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [3]:
#check for missing values
school_data_complete.count()

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
School ID        39170
type             39170
size             39170
budget           39170
dtype: int64

In [4]:
#total number of schools
schools = school_data_complete["school_name"].value_counts()
Total_schools = len(schools)
Total_schools

15

In [5]:
#total number of students
Total_students = school_data_complete["student_name"].count()
Total_students

39170

In [6]:
#total budget
Total_budget = school_data["budget"].sum()
Total_budget

24649428

In [7]:
#average math score
avg_math_score = school_data_complete["math_score"].mean()
avg_math_score

78.98537145774827

In [8]:
#average reading score
avg_reading_score = school_data_complete["reading_score"].mean()
avg_reading_score

81.87784018381414

In [9]:
#percent of students passing math
pass_math_df = school_data_complete.loc[school_data_complete["math_score"] >= 70, :]
math_pass = len(pass_math_df)
math_percent = math_pass / Total_students * 100
math_percent

74.9808526933878

In [10]:
#percent of students passing reading
pass_reading_df = school_data_complete.loc[school_data_complete["reading_score"] >= 70, :]
reading_pass = len(pass_reading_df)
reading_percent = reading_pass / Total_students * 100
reading_percent

85.80546336482001

In [11]:
#percent of students passing math and reading
over_all_pass_df = school_data_complete.loc[((school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70)) ,:]
over_all_pass = len(over_all_pass_df)
overall_percent = over_all_pass / Total_students * 100
overall_percent

65.17232575950983

In [12]:
#create a dictionary of summary stats
dist_dict = {"Total Schools": [Total_schools], "Total Students": Total_students, "Total Budget": Total_budget,
            "Average Math Score": avg_math_score, "Average Reading Score": avg_reading_score, "% Passing Math": math_percent,
            "% Passing Reading": reading_percent, "% Overall Passing": overall_percent}

In [13]:
#put the dictionary in a table and format some of the columns
district_summary = pd.DataFrame(dist_dict)
district_summary = district_summary.style.format({"Total Students":"{:,}", "Total Budget":"${:,.2f}"})
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326
