In [1]:
# Import pandas for DataFrame
import pandas as pd

In [2]:
# Identify files
schools_file = "Resources/schools_complete.csv"
students_file = "Resources/students_complete.csv"

In [3]:
# Load schools csv into a DataFrame
og_schools_df = pd.read_csv(schools_file, encoding="utf8")

og_schools_df.columns

Index(['School ID', 'school_name', 'type', 'size', 'budget'], dtype='object')

In [4]:
# Load students csv into a DataFrame
og_students_df = pd.read_csv(students_file, encoding="utf8")

og_students_df.columns

Index(['Student ID', 'student_name', 'gender', 'grade', 'school_name',
       'reading_score', 'math_score'],
      dtype='object')

In [5]:
# Rename columns
renamed_schools_df = og_schools_df.rename(columns= {"school_name": "School Name",
                                                "type": "Type",
                                                "size": "Size",
                                                "budget": "Budget"})
renamed_schools_df.head()

Unnamed: 0,School ID,School Name,Type,Size,Budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [6]:
# Rename columns
renamed_students_df = og_students_df.rename(columns= {"student_name": "Student Name",
                                                  "gender": "Gender",
                                                  "grade": "Grade",
                                                  "school_name": "School Name",
                                                  "reading_score":"Reading Score",
                                                  "math_score": "Math Score"})
renamed_students_df.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [7]:
# District: TOTAL SCHOOLS
total_schools = renamed_schools_df["School ID"].count()

print(total_schools)

15


In [8]:
# District: TOTAL STUDENTS
total_students = renamed_students_df["Student ID"].count()

print(total_students)

39170


In [9]:
# District: TOTAL BUDGET
total_budget = renamed_schools_df["Budget"].sum()

print(total_budget)

24649428


In [10]:
# District: AVERAGE MATH SCORE
avg_math = round(renamed_students_df["Math Score"].mean(), 2)

print(avg_math)

78.99


In [11]:
# District: AVERAGE READING SCORE
avg_reading = round(renamed_students_df["Reading Score"].mean(), 2)

print(avg_reading)

81.88


In [12]:
# Find min and max of math scores
print(renamed_students_df["Math Score"].min())
print(renamed_students_df["Math Score"].max())

55
99


In [13]:
# Find min and max of reading scores
print(renamed_students_df["Reading Score"].min())
print(renamed_students_df["Reading Score"].max())

63
99


In [14]:
# District: PASSING MATH
# Passing Score: 70
passed_math_df = renamed_students_df.loc[renamed_students_df["Math Score"] >= 70, ["Student ID", "Student Name", 
                                                                                  "Gender", "Grade", "School Name", 
                                                                                  "Math Score", "Reading Score"]]
passed_math_df.head(2)

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Math Score,Reading Score
0,0,Paul Bradley,M,9th,Huang High School,79,66
4,4,Bonnie Ray,F,9th,Huang High School,84,97


In [15]:
# Number of Students: PASSED MATH
stu_passed_math = passed_math_df["Student ID"].count()

print(stu_passed_math)

29370


In [16]:
# District: PASSING READING
# Passing Score: 70
passed_reading_df = renamed_students_df.loc[renamed_students_df["Reading Score"] >= 70, ["Student ID", "Student Name", 
                                                                                        "Gender", "Grade", "School Name", 
                                                                                        "Reading Score", "Math Score"]]
passed_reading_df.head(2)

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60


In [17]:
# Number of Students: PASSED READING
stu_passed_reading = passed_reading_df["Student ID"].count()

print(stu_passed_reading)

33610


In [18]:
# District: OVERALL PASSING
# Passing Score: 70
passed_both_df = passed_math_df.loc[passed_math_df["Reading Score"] >= 70, ["Student ID", "Student Name", 
                                                                           "Gender", "Grade", "School Name", 
                                                                           "Reading Score", "Math Score"]]
passed_both_df.head(2)

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score
4,4,Bonnie Ray,F,9th,Huang High School,97,84
5,5,Bryan Miranda,M,9th,Huang High School,94,94


In [19]:
# Number of Students: PASSED BOTH
stu_passed_both = passed_both_df["Student ID"].count()

print(stu_passed_both)

25528


In [20]:
# District: % PASSING MATH
per_passing_math = round((stu_passed_math / total_students) * 100, 2)

print(per_passing_math)

74.98


In [21]:
# District: % PASSING READING
per_passing_reading = round((stu_passed_reading / total_students) * 100, 2)

print(per_passing_reading)

85.81


In [22]:
# District: % OVERALL PASSING
per_passing_both = round((stu_passed_both / total_students) * 100, 2)

print(per_passing_both)

65.17


## District Summary:

In [23]:
# DISTRICT METRICS SUMMARY TABLE
district_metrics_df = pd.DataFrame({"Total Schools": [total_schools], 
                                    "Total Students": [total_students],
                                    "Total Budget": [total_budget], 
                                    "Average Math Score": [avg_math], 
                                    "Average Reading Score": [avg_reading],
                                    "% Passing Math": f"{per_passing_math}%", 
                                    "% Passing Reading": f"{per_passing_reading}%", 
                                    "% Overall Passing": f"{per_passing_both}%"})

# Change formats of numbers
district_metrics_df["Total Budget"] = district_metrics_df["Total Budget"].map("${:,.2f}".format)
district_metrics_df["Total Students"] = district_metrics_df["Total Students"].map("{:,}".format)

district_metrics_df.head()

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,65.17%


In [24]:
# All Students
all_grouped_metrics = renamed_students_df.groupby(["School Name"])

# all_grouped_metrics["Student ID"].count().head(15)

In [25]:
# Put student ID count into a DataFrame
student_ID_metrics_df = pd.DataFrame(all_grouped_metrics["Student ID"].count())
# Rename column
student_ID_metrics_df = student_ID_metrics_df.rename(columns={"Student ID": "Total Students"})
student_ID_metrics_df.head()

Unnamed: 0_level_0,Total Students
School Name,Unnamed: 1_level_1
Bailey High School,4976
Cabrera High School,1858
Figueroa High School,2949
Ford High School,2739
Griffin High School,1468


In [26]:
# Put average Scores into a DataFrame
scores_metrics_df = round(pd.DataFrame(all_grouped_metrics.mean()), 2)
# Remove student ID average
scores_metrics_df = scores_metrics_df[["Reading Score", "Math Score"]]
# Rename Columns
scores_metrics_df = scores_metrics_df.rename(columns={"Reading Score": "Average Reading Score",
                                             "Math Score": "Average Math Score"})
scores_metrics_df.head()

Unnamed: 0_level_0,Average Reading Score,Average Math Score
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bailey High School,81.03,77.05
Cabrera High School,83.98,83.06
Figueroa High School,81.16,76.71
Ford High School,80.75,77.1
Griffin High School,83.82,83.35


In [27]:
# Merge total students with average scores
grouped_metrics_df = pd.merge(student_ID_metrics_df, scores_metrics_df, on="School Name")
grouped_metrics_df.head()

Unnamed: 0_level_0,Total Students,Average Reading Score,Average Math Score
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bailey High School,4976,81.03,77.05
Cabrera High School,1858,83.98,83.06
Figueroa High School,2949,81.16,76.71
Ford High School,2739,80.75,77.1
Griffin High School,1468,83.82,83.35


In [28]:
# Merge previous grouped metrics with school metrics
merged_df = pd.merge(grouped_metrics_df, renamed_schools_df, on="School Name")
# Rename columns
merged_df = merged_df.rename(columns={"Type": "School Type", "Budget": "Total School Budget"})  
# Reorder columns
merged_df = merged_df[["School Name", "School Type", "Total Students", "Total School Budget", 
                       "Average Math Score", "Average Reading Score"]]
# Create 'per student budget' column
merged_df["Per Student Budget"] = merged_df["Total School Budget"] / merged_df["Total Students"]

merged_df.head()

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Average Math Score,Average Reading Score,Per Student Budget
0,Bailey High School,District,4976,3124928,77.05,81.03,628.0
1,Cabrera High School,Charter,1858,1081356,83.06,83.98,582.0
2,Figueroa High School,District,2949,1884411,76.71,81.16,639.0
3,Ford High School,District,2739,1763916,77.1,80.75,644.0
4,Griffin High School,Charter,1468,917500,83.35,83.82,625.0


In [29]:
# Create a groupby for passing math, passing reading, and overall passing metrics
passed_math_groupby = passed_math_df.groupby(["School Name"])

passed_reading_groupby = passed_reading_df.groupby(["School Name"])

passed_both_groupby = passed_both_df.groupby(["School Name"])


In [30]:
# Put Passing Math metrics into a DataFrame
passed_math_metrics_df = pd.DataFrame(passed_math_groupby["Student ID"].count())
# Rename column
passed_math_metrics_df = passed_math_metrics_df.rename(columns={"Student ID": "Num Passing Math"})
#passed_math_metrics_df

# Put Passing Reading metrics into a DataFrame
passed_reading_metrics_df = pd.DataFrame(passed_reading_groupby["Student ID"].count())
# Rename column
passed_reading_metrics_df = passed_reading_metrics_df.rename(columns={"Student ID": "Num Passing Reading"})
#passed_reading_metrics_df


# Put Overall Passing metrics into a DataFrame
passed_both_metrics_df = pd.DataFrame(passed_both_groupby["Student ID"].count())
# Rename column
passed_both_metrics_df = passed_both_metrics_df.rename(columns={"Student ID": "Num Overall Passing"})
#passed_both_metrics_df


In [31]:
# Merge Passing number Metrics
passing_metrics_df = pd.merge(passed_math_metrics_df, passed_reading_metrics_df, on="School Name")
passing_metrics_df = pd.merge(passing_metrics_df, passed_both_metrics_df, on="School Name")

passing_metrics_df.head()

Unnamed: 0_level_0,Num Passing Math,Num Passing Reading,Num Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bailey High School,3318,4077,2719
Cabrera High School,1749,1803,1697
Figueroa High School,1946,2381,1569
Ford High School,1871,2172,1487
Griffin High School,1371,1426,1330


In [32]:
# Merged passing number metrics with previous merged dataframe
school_merged_df = pd.merge(merged_df, passing_metrics_df, on="School Name")

# Calculate % passing
school_merged_df["% Passing Math"] = round((school_merged_df["Num Passing Math"] / school_merged_df["Total Students"])* 100, 2)
school_merged_df["% Passing Reading"] = round((school_merged_df["Num Passing Reading"] / school_merged_df["Total Students"]) * 100, 2)
school_merged_df["% Overall Passing"] = round((school_merged_df["Num Overall Passing"] / school_merged_df["Total Students"]) * 100, 2)

school_merged_df.head()

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Average Math Score,Average Reading Score,Per Student Budget,Num Passing Math,Num Passing Reading,Num Overall Passing,% Passing Math,% Passing Reading,% Overall Passing
0,Bailey High School,District,4976,3124928,77.05,81.03,628.0,3318,4077,2719,66.68,81.93,54.64
1,Cabrera High School,Charter,1858,1081356,83.06,83.98,582.0,1749,1803,1697,94.13,97.04,91.33
2,Figueroa High School,District,2949,1884411,76.71,81.16,639.0,1946,2381,1569,65.99,80.74,53.2
3,Ford High School,District,2739,1763916,77.1,80.75,644.0,1871,2172,1487,68.31,79.3,54.29
4,Griffin High School,Charter,1468,917500,83.35,83.82,625.0,1371,1426,1330,93.39,97.14,90.6


## School Summary:

In [69]:
# SCHOOL SUMMARY:
school_merged_df = school_merged_df[["School Name", "School Type", "Total Students", "Total School Budget", 
                                     "Per Student Budget", "Average Math Score", "Average Reading Score", 
                                     "% Passing Math", "% Passing Reading", "% Overall Passing"]]
# Set index to School Name
school_merged_df = school_merged_df.set_index("School Name")
school_merged_df

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,3124928,628.0,77.05,81.03,66.68,81.93,54.64
Cabrera High School,Charter,1858,1081356,582.0,83.06,83.98,94.13,97.04,91.33
Figueroa High School,District,2949,1884411,639.0,76.71,81.16,65.99,80.74,53.2
Ford High School,District,2739,1763916,644.0,77.1,80.75,68.31,79.3,54.29
Griffin High School,Charter,1468,917500,625.0,83.35,83.82,93.39,97.14,90.6
Hernandez High School,District,4635,3022020,652.0,77.29,80.93,66.75,80.86,53.53
Holden High School,Charter,427,248087,581.0,83.8,83.81,92.51,96.25,89.23
Huang High School,District,2917,1910635,655.0,76.63,81.18,65.68,81.32,53.51
Johnson High School,District,4761,3094650,650.0,77.07,80.97,66.06,81.22,53.54
Pena High School,Charter,962,585858,609.0,83.84,84.04,94.59,95.95,90.54


## Top 5 Performing Schools:

In [68]:
# TOP 5 PERFORMING SCHOOLS (BY % OVERALL PASSING)
overall_highest_df = school_merged_df.sort_values("% Overall Passing", ascending=False)

# Reset indexes
overall_highest_df = overall_highest_df.reset_index(drop=True)

# Drop rows except for Top 5 
top_5_schools_df = overall_highest_df.iloc[0:5, :]

# Set index to school name
top_5_schools_df = top_5_schools_df.set_index("School Name")
top_5_schools_df

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,1081356,582.0,83.06,83.98,94.13,97.04,91.33
Thomas High School,Charter,1635,1043130,638.0,83.42,83.85,93.27,97.31,90.95
Griffin High School,Charter,1468,917500,625.0,83.35,83.82,93.39,97.14,90.6
Wilson High School,Charter,2283,1319574,578.0,83.27,83.99,93.87,96.54,90.58
Pena High School,Charter,962,585858,609.0,83.84,84.04,94.59,95.95,90.54


## Bottom 5 Performing Schools:

In [66]:
# BOTTOM 5 PERFORMING SCHOOLS (BY % OVERALL PASSING)
overall_lowest_df = school_merged_df.sort_values("% Overall Passing", ascending=True)

# Reset indexes
overall_lowest_df = overall_lowest_df.reset_index(drop=True)

# Drop all rows except for Bottom 5
bottom_5_schools_df = overall_lowest_df.iloc[0:5, :]

# Set Index to school name
bottom_5_schools_df = bottom_5_schools_df.set_index("School Name")
bottom_5_schools_df

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,2547363,637.0,76.84,80.74,66.37,80.22,52.99
Figueroa High School,District,2949,1884411,639.0,76.71,81.16,65.99,80.74,53.2
Huang High School,District,2917,1910635,655.0,76.63,81.18,65.68,81.32,53.51
Hernandez High School,District,4635,3022020,652.0,77.29,80.93,66.75,80.86,53.53
Johnson High School,District,4761,3094650,650.0,77.07,80.97,66.06,81.22,53.54


In [36]:
# Isolate 9th Grade Math Scores
math_9th = renamed_students_df.loc[renamed_students_df["Grade"] == "9th", ["School Name", "Math Score"]]
# Reset Index
math_9th = math_9th.reset_index(drop=True)
# Rename column
math_9th = math_9th.rename(columns={"Math Score": "9th"})
# Groupby School Name with average scores
math_9th_groupby = round(math_9th.groupby(["School Name"], as_index=False).mean(), 2)

# Isolate 10th Grade Math Scores
math_10th = renamed_students_df.loc[renamed_students_df["Grade"] == "10th", ["School Name", "Math Score"]]
# Reset Index
math_10th = math_10th.reset_index(drop=True)
# Rename column
math_10th = math_10th.rename(columns={"Math Score": "10th"})
# Groupby School Name with average scores
math_10th_groupby = round(math_10th.groupby(["School Name"], as_index=False).mean(), 2)

# Isolate 11th Grade Math Scores
math_11th = renamed_students_df.loc[renamed_students_df["Grade"] == "11th", ["School Name", "Math Score"]]
# Reset Index
math_11th = math_11th.reset_index(drop=True)
# Rename column
math_11th = math_11th.rename(columns={"Math Score": "11th"})
# Groupby School Name with averages scores
math_11th_groupby = round(math_11th.groupby(["School Name"], as_index=False).mean(), 2)

# Isolate 12th Grade Math Scores
math_12th = renamed_students_df.loc[renamed_students_df["Grade"] == "12th", ["School Name", "Math Score"]]
# Reset Index
math_12th = math_12th.reset_index(drop=True)
# Rename column
math_12th = math_12th.rename(columns={"Math Score": "12th"})
# Groupby School Name with average scores
math_12th_groupby = round(math_12th.groupby(["School Name"], as_index=False).mean(), 2)

In [37]:
# Isolate 9th Grade Reading Scores
reading_9th = renamed_students_df.loc[renamed_students_df["Grade"] == "9th", ["School Name", "Reading Score"]]
# Reset Index
reading_9th = reading_9th.reset_index(drop=True)
# Rename column
reading_9th = reading_9th.rename(columns={"Reading Score": "9th"})
# Groupby School Name with average scores
reading_9th_groupby = round(reading_9th.groupby(["School Name"], as_index=False).mean(), 2)

# Isolate 10th Grade Reading Scores
reading_10th = renamed_students_df.loc[renamed_students_df["Grade"] == "10th", ["School Name", "Reading Score"]]
# Reset Index
reading_10th = reading_10th.reset_index(drop=True)
# Rename column
reading_10th = reading_10th.rename(columns={"Reading Score": "10th"})
# Groupby School Name with average scores
reading_10th_groupby = round(reading_10th.groupby(["School Name"], as_index=False).mean(), 2)

# Isolate 11th Grade Reading Scores
reading_11th = renamed_students_df.loc[renamed_students_df["Grade"] == "11th", ["School Name", "Reading Score"]]
# Reset Index
reading_11th = reading_11th.reset_index(drop=True)
# Rename column
reading_11th = reading_11th.rename(columns={"Reading Score": "11th"})
# Groupby School Name with average scores
reading_11th_groupby = round(reading_11th.groupby(["School Name"], as_index=False).mean(), 2)

# Isolate 12th Grade Reading Scores
reading_12th = renamed_students_df.loc[renamed_students_df["Grade"] == "12th", ["School Name", "Reading Score"]]
# Reset Index
reading_12th = reading_12th.reset_index(drop=True)
# Rename column
reading_12th = reading_12th.rename(columns={"Reading Score": "12th"})
# Groupby School Name with average scores
reading_12th_groupby = round(reading_12th.groupby(["School Name"], as_index=False).mean(), 2)

## Math Scores by Grade:

In [65]:
# Merge first two math DataFrames
math_merge_1 = pd.merge(math_9th_groupby, math_10th_groupby, on="School Name")

# Merge last two math DataFrames
math_merge_2 = pd.merge(math_11th_groupby, math_12th_groupby, on="School Name")

# Merge all DataFrames
math_merge_df = pd.merge(math_merge_1, math_merge_2, on="School Name")

# Set Index to school name
math_merge_df = math_merge_df.set_index("School Name")
math_merge_df

Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86
Pena High School,83.63,83.37,84.33,84.12


## Reading Scores by Grade:

In [63]:
# Merge first two reading DataFrames
reading_merge_1 = pd.merge(reading_9th_groupby, reading_10th_groupby, on="School Name")

# Merge last two reading DataFrames
reading_merge_2 = pd.merge(reading_11th_groupby, reading_12th_groupby, on="School Name")

# Merge all DataFrames
reading_merge_df = pd.merge(reading_merge_1, reading_merge_2, on="School Name")

# Set Index to School Name
reading_merge_df = reading_merge_df.set_index("School Name")

reading_merge_df

Unnamed: 0_level_0,9th,10th,11th,12th
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.3,80.91,80.95,80.91
Cabrera High School,83.68,84.25,83.79,84.29
Figueroa High School,81.2,81.41,80.64,81.38
Ford High School,80.63,81.26,80.4,80.66
Griffin High School,83.37,83.71,84.29,84.01
Hernandez High School,80.87,80.66,81.4,80.86
Holden High School,83.68,83.32,83.82,84.7
Huang High School,81.29,81.51,81.42,80.31
Johnson High School,81.26,80.77,80.62,81.23
Pena High School,83.81,83.61,84.34,84.59


In [40]:
# Minimum per student budget
print(school_merged_df["Per Student Budget"].min())

# Maxiumum per student budget
print(school_merged_df["Per Student Budget"].max())

578.0
655.0


In [41]:
# Minimum School Size
print(renamed_schools_df["Size"].min())

# Maximum School Size
print(renamed_schools_df["Size"].max())

427
4976


In [42]:
# Create spending DataFrame
spending_df = school_merged_df[["Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading",
                               "% Overall Passing", "Per Student Budget"]]

In [58]:
# Bins for school spending per student
spending_bins = [0, 600, 630, 645, 660]
spending_names = ["<$600", "\$601-630", "\$631-645", "\$646-660"]

In [59]:
# Create bins for spending
spending_df["Spending Range (Per Student)"] = pd.cut(spending_df["Per Student Budget"], spending_bins, labels=spending_names, include_lowest=True)
# Remove unneccessary columns
spending_range_df = spending_df[["Average Math Score", "Average Reading Score", "% Passing Math", 
                                 "% Passing Reading","% Overall Passing", "Spending Range (Per Student)"]]
spending_range_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Spending Range (Per Student)
0,77.05,81.03,66.68,81.93,54.64,\$601-630
1,83.06,83.98,94.13,97.04,91.33,<$600
2,76.71,81.16,65.99,80.74,53.2,\$631-645
3,77.1,80.75,68.31,79.3,54.29,\$631-645
4,83.35,83.82,93.39,97.14,90.6,\$601-630


## Scores by School Spending:

In [60]:
# OUTPUT: SCORES BY SCHOOL SPENDING (PER STUDENT)
spending_groupby = spending_range_df.groupby(["Spending Range (Per Student)"])
round(spending_groupby.mean().head(15),2)

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Spending Range (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$600,83.43,83.89,93.54,96.46,90.27
\$601-630,81.41,82.96,84.89,91.67,78.59
\$631-645,78.52,81.62,73.48,84.39,62.86
\$646-660,77.0,81.03,66.16,81.13,53.53


In [61]:
# Count amount in Spending Range bins
spending_df["Spending Range (Per Student)"].value_counts()

<$600        5
\$631-645    4
\$646-660    3
\$601-630    3
Name: Spending Range (Per Student), dtype: int64

In [47]:
# Create size DataFrame
size_df = pd.merge(school_merged_df, renamed_schools_df, on="School Name")
size_df = size_df[["School Name", "Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading",
                    "% Overall Passing", "Size"]]
#size_df

In [48]:
# Bins for school size
size_bins = [0, 2000, 3000, 5000]
size_names = ["Small (<2000)", "Medium (2000-3000)", "Large (3000-4000)"]

In [49]:
# Create Bins for Size Range
size_df["Size Range"] = pd.cut(size_df["Size"], size_bins, labels=size_names, include_lowest=True)
# Remove unneccessary columns
size_range_df = size_df[["Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading",
                  "% Overall Passing", "Size Range"]]
size_range_df.head()

Unnamed: 0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Size Range
0,77.05,81.03,66.68,81.93,54.64,Large (3000-4000)
1,83.06,83.98,94.13,97.04,91.33,Small (<2000)
2,76.71,81.16,65.99,80.74,53.2,Medium (2000-3000)
3,77.1,80.75,68.31,79.3,54.29,Medium (2000-3000)
4,83.35,83.82,93.39,97.14,90.6,Small (<2000)


In [50]:
# Check amount in each bin
size_range_df["Size Range"].value_counts()

Small (<2000)         7
Large (3000-4000)     4
Medium (2000-3000)    4
Name: Size Range, dtype: int64

## Scores by School Size:

In [51]:
# OUTPUT: SCORES BY SCHOOL SIZE
size_groupby = size_range_df.groupby(["Size Range"])
round(size_groupby.mean().head(15), 2)

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Size Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<2000),83.5,83.88,93.58,96.59,90.41
Medium (2000-3000),78.43,81.77,73.46,84.48,62.9
Large (3000-4000),77.06,80.92,66.46,81.06,53.68


In [52]:
# Merge school tables for scores by school type
type_df = pd.merge(school_merged_df, renamed_schools_df, on="School Name")

school_type_df = type_df[["Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading",
                  "% Overall Passing", "Type"]]
school_type_df = school_type_df.rename(columns={"Type": "School Type"})
school_type_df.head()

Unnamed: 0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,School Type
0,77.05,81.03,66.68,81.93,54.64,District
1,83.06,83.98,94.13,97.04,91.33,Charter
2,76.71,81.16,65.99,80.74,53.2,District
3,77.1,80.75,68.31,79.3,54.29,District
4,83.35,83.82,93.39,97.14,90.6,Charter


In [53]:
# Find breakdown of school types
school_type_df["School Type"].value_counts()

Charter     8
District    7
Name: School Type, dtype: int64

## Scores by School Type:

In [54]:
# OUTPUT: SCORES BY SCHOOL TYPE
school_type_groupby = school_type_df.groupby(["School Type"])
round(school_type_groupby.mean().head(15), 2)

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.47,83.9,93.62,96.59,90.43
District,76.96,80.97,66.55,80.8,53.67


In [55]:
# Mapping for dollar notation
#Df[column].map(<“${:,.2f}”>.format)

In [56]:
# Mapping for comma notation
#Df[column].map(<“{:,}”>.format)

## Trends Observed:

1. Charter schools have better scores