In [1]:
# Import pandas for DataFrame
import pandas as pd

In [2]:
# Identify files
schools_file = "Resources/schools_complete.csv"
students_file = "Resources/students_complete.csv"

In [3]:
# Load schools csv into a DataFrame
schools_df = pd.read_csv(schools_file, encoding="utf8")

schools_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [4]:
# Load students csv into a DataFrame
students_df = pd.read_csv(students_file, encoding="utf8")

students_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [5]:
# Rename columns
renamed_schools_df = schools_df.rename(columns= {"school_name": "School Name",
                                                "type": "Type",
                                                "size": "Size",
                                                "budget": "Budget"})
renamed_schools_df.head()

Unnamed: 0,School ID,School Name,Type,Size,Budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [6]:
# Rename columns
renamed_students_df = students_df.rename(columns= {"student_name": "Student Name",
                                                  "gender": "Gender",
                                                  "grade": "Grade",
                                                  "school_name": "School Name",
                                                  "reading_score":"Reading Score",
                                                  "math_score": "Math Score"})
renamed_students_df.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [7]:
# District: Total Schools
total_schools = renamed_schools_df["School ID"].count()

print(total_schools)

15


In [8]:
# District: Total Students
total_students = renamed_students_df["Student ID"].count()

print(total_students)

39170


In [9]:
# District: Total Budget
total_budget = renamed_schools_df["Budget"].sum()

print(total_budget)

24649428


In [10]:
# District: Average Math Score
avg_math = renamed_students_df["Math Score"].mean()

print(avg_math)

78.98537145774827


In [11]:
# District: Average Reading Score
avg_reading = renamed_students_df["Reading Score"].mean()

print(avg_reading)

81.87784018381414


In [12]:
# Find min and max of math score
renamed_students_df["Math Score"].describe()

count    39170.000000
mean        78.985371
std         12.309968
min         55.000000
25%         69.000000
50%         79.000000
75%         89.000000
max         99.000000
Name: Math Score, dtype: float64

In [13]:
# Find min and max of reading score
renamed_students_df["Reading Score"].describe()

count    39170.00000
mean        81.87784
std         10.23958
min         63.00000
25%         73.00000
50%         82.00000
75%         91.00000
max         99.00000
Name: Reading Score, dtype: float64

In [14]:
# District: % Passing Math
# Passing Score: 70?
passed_math_df = renamed_students_df.loc[renamed_students_df["Math Score"] > 70, ["Student ID", "Student Name", "Gender", "Grade", 
                                                              "School Name", "Math Score", "Reading Score"]]
    
passed_math_df.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Math Score,Reading Score
0,0,Paul Bradley,M,9th,Huang High School,79,66
4,4,Bonnie Ray,F,9th,Huang High School,84,97
5,5,Bryan Miranda,M,9th,Huang High School,94,94
6,6,Sheena Carter,F,11th,Huang High School,80,82
8,8,Michael Roth,M,10th,Huang High School,87,95


In [15]:
# Number of Students: Passed Math
stu_passed_math = passed_math_df["Student ID"].count()

print(stu_passed_math)

28356


In [16]:
# District: % Passing Reading
# Passing Score: 70?
passed_reading_df = renamed_students_df.loc[renamed_students_df["Reading Score"] > 70, ["Student ID", "Student Name", "Gender", "Grade", 
                                                              "School Name", "Reading Score", "Math Score"]]
    
passed_reading_df.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
4,4,Bonnie Ray,F,9th,Huang High School,97,84
5,5,Bryan Miranda,M,9th,Huang High School,94,94
6,6,Sheena Carter,F,11th,Huang High School,82,80


In [17]:
# Number of Students: Passed Reading
stu_passed_reading = passed_reading_df["Student ID"].count()

print(stu_passed_reading)

32500


In [18]:
# District: % Overall Passing
# Passing Score: 70?
passed_both_df = passed_math_df.loc[passed_math_df["Reading Score"] > 70, ["Student ID", "Student Name", "Gender", "Grade", 
                                          "School Name", "Reading Score", "Math Score"]]
passed_both_df.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score
4,4,Bonnie Ray,F,9th,Huang High School,97,84
5,5,Bryan Miranda,M,9th,Huang High School,94,94
6,6,Sheena Carter,F,11th,Huang High School,82,80
8,8,Michael Roth,M,10th,Huang High School,95,87
9,9,Matthew Greene,M,10th,Huang High School,96,84


In [19]:
# Number of Students: Passed Both Math & Reading
stu_passed_both = passed_both_df["Student ID"].count()

print(stu_passed_both)

23816


In [20]:
# District: % Passing Math
per_passing_math = stu_passed_math / total_students

print(per_passing_math)

0.7239213683941792


In [21]:
# District: % Passing Reading
per_passing_reading = stu_passed_reading / total_students

print(per_passing_reading)

0.8297166198621394


In [22]:
# District: % Overall Passing
per_passing_both = stu_passed_both / total_students

print(per_passing_both)

0.6080163390349758


In [27]:
# District Metrics Table
district_metrics_df = pd.DataFrame({"Total Schools": [total_schools], 
                                    "Total Students": [total_students],
                                    "Total Budget": [total_budget], 
                                    "Average Math Score": [avg_math], 
                                    "Average Reading Score": [avg_reading],
                                    "% Passing Math": [per_passing_math], 
                                    "% Passing Reading": [per_passing_reading], 
                                    "% Passing Both": [per_passing_both]})

district_metrics_df.head()

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Both
0,15,39170,24649428,78.985371,81.87784,0.723921,0.829717,0.608016
