In [1]:
# Import pandas for DataFrame
import pandas as pd

In [2]:
# Identify files
schools_file = "Resources/schools_complete.csv"
students_file = "Resources/students_complete.csv"

In [3]:
# Load schools csv into a DataFrame
og_schools_df = pd.read_csv(schools_file, encoding="utf8")

og_schools_df.columns

Index(['School ID', 'school_name', 'type', 'size', 'budget'], dtype='object')

In [4]:
# Load students csv into a DataFrame
og_students_df = pd.read_csv(students_file, encoding="utf8")

og_students_df.columns

Index(['Student ID', 'student_name', 'gender', 'grade', 'school_name',
       'reading_score', 'math_score'],
      dtype='object')

In [5]:
# Rename columns
renamed_schools_df = og_schools_df.rename(columns= {"school_name": "School Name",
                                                "type": "Type",
                                                "size": "Size",
                                                "budget": "Budget"})
renamed_schools_df.head()

Unnamed: 0,School ID,School Name,Type,Size,Budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [6]:
# Rename columns
renamed_students_df = og_students_df.rename(columns= {"student_name": "Student Name",
                                                  "gender": "Gender",
                                                  "grade": "Grade",
                                                  "school_name": "School Name",
                                                  "reading_score":"Reading Score",
                                                  "math_score": "Math Score"})
renamed_students_df.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [7]:
# District: Total Schools
total_schools = renamed_schools_df["School ID"].count()

print(total_schools)

15


In [8]:
# District: Total Students
total_students = renamed_students_df["Student ID"].count()

print(total_students)

39170


In [9]:
# District: Total Budget
total_budget = renamed_schools_df["Budget"].sum()

print(total_budget)

24649428


In [10]:
# District: Average Math Score
avg_math = renamed_students_df["Math Score"].mean()

print(avg_math)

78.98537145774827


In [11]:
# District: Average Reading Score
avg_reading = renamed_students_df["Reading Score"].mean()

print(avg_reading)

81.87784018381414


In [12]:
# Find min and max of math score
renamed_students_df["Math Score"].describe()

count    39170.000000
mean        78.985371
std         12.309968
min         55.000000
25%         69.000000
50%         79.000000
75%         89.000000
max         99.000000
Name: Math Score, dtype: float64

In [13]:
# Find min and max of reading score
renamed_students_df["Reading Score"].describe()

count    39170.00000
mean        81.87784
std         10.23958
min         63.00000
25%         73.00000
50%         82.00000
75%         91.00000
max         99.00000
Name: Reading Score, dtype: float64

In [14]:
# District: % Passing Math
# Passing Score: 70?
passed_math_df = renamed_students_df.loc[renamed_students_df["Math Score"] > 70, ["Student ID", "Student Name", "Gender", "Grade", 
                                                                "School Name", "Math Score", "Reading Score"]]
passed_math_df.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Math Score,Reading Score
0,0,Paul Bradley,M,9th,Huang High School,79,66
4,4,Bonnie Ray,F,9th,Huang High School,84,97
5,5,Bryan Miranda,M,9th,Huang High School,94,94
6,6,Sheena Carter,F,11th,Huang High School,80,82
8,8,Michael Roth,M,10th,Huang High School,87,95


In [15]:
# Number of Students: Passed Math
stu_passed_math = passed_math_df["Student ID"].count()

print(stu_passed_math)

28356


In [16]:
# District: % Passing Reading
# Passing Score: 70?
passed_reading_df = renamed_students_df.loc[renamed_students_df["Reading Score"] > 70, ["Student ID", "Student Name", "Gender", "Grade", 
                                                               "School Name", "Reading Score", "Math Score"]]
passed_reading_df.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
4,4,Bonnie Ray,F,9th,Huang High School,97,84
5,5,Bryan Miranda,M,9th,Huang High School,94,94
6,6,Sheena Carter,F,11th,Huang High School,82,80


In [17]:
# Number of Students: Passed Reading
stu_passed_reading = passed_reading_df["Student ID"].count()

print(stu_passed_reading)

32500


In [18]:
# District: % Overall Passing
# Passing Score: 70?
passed_both_df = passed_math_df.loc[passed_math_df["Reading Score"] > 70, ["Student ID", "Student Name", "Gender", "Grade", 
                                           "School Name", "Reading Score", "Math Score"]]
passed_both_df.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score
4,4,Bonnie Ray,F,9th,Huang High School,97,84
5,5,Bryan Miranda,M,9th,Huang High School,94,94
6,6,Sheena Carter,F,11th,Huang High School,82,80
8,8,Michael Roth,M,10th,Huang High School,95,87
9,9,Matthew Greene,M,10th,Huang High School,96,84


In [19]:
# Number of Students: Passed Both Math & Reading
stu_passed_both = passed_both_df["Student ID"].count()

print(stu_passed_both)

23816


In [20]:
# District: % Passing Math
per_passing_math = stu_passed_math / total_students

print(per_passing_math)

0.7239213683941792


In [21]:
# District: % Passing Reading
per_passing_reading = stu_passed_reading / total_students

print(per_passing_reading)

0.8297166198621394


In [22]:
# District: % Overall Passing
per_passing_both = stu_passed_both / total_students

print(per_passing_both)

0.6080163390349758


In [23]:
# District Metrics Summary Table
district_metrics_df = pd.DataFrame({"Total Schools": [total_schools], 
                                    "Total Students": [total_students],
                                    "Total Budget": [total_budget], 
                                    "Average Math Score": [avg_math], 
                                    "Average Reading Score": [avg_reading],
                                    "% Passing Math": [per_passing_math], 
                                    "% Passing Reading": [per_passing_reading], 
                                    "% Passing Both": [per_passing_both]})

district_metrics_df.head()

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Both
0,15,39170,24649428,78.985371,81.87784,0.723921,0.829717,0.608016


In [24]:
# All Students
all_grouped_metrics = renamed_students_df.groupby(["School Name"])

# all_grouped_metrics["Student ID"].count().head(15)

In [25]:
# Put student ID count into a Data Frame
student_ID_metrics_df = pd.DataFrame(all_grouped_metrics["Student ID"].count())
# Rename column
student_ID_metrics_df = student_ID_metrics_df.rename(columns={"Student ID": "Total Students"})
student_ID_metrics_df

Unnamed: 0_level_0,Total Students
School Name,Unnamed: 1_level_1
Bailey High School,4976
Cabrera High School,1858
Figueroa High School,2949
Ford High School,2739
Griffin High School,1468
Hernandez High School,4635
Holden High School,427
Huang High School,2917
Johnson High School,4761
Pena High School,962


In [26]:
# Put average Scores into a DataFrame
scores_metrics_df = pd.DataFrame(all_grouped_metrics.mean())
# Remove student ID average
scores_metrics_df = scores_metrics_df[["Reading Score", "Math Score"]]
# Rename Columns
scores_metrics_df = scores_metrics_df.rename(columns={"Reading Score": "Average Reading Score",
                                             "Math Score": "Average Math Score"})
scores_metrics_df

Unnamed: 0_level_0,Average Reading Score,Average Math Score
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bailey High School,81.033963,77.048432
Cabrera High School,83.97578,83.061895
Figueroa High School,81.15802,76.711767
Ford High School,80.746258,77.102592
Griffin High School,83.816757,83.351499
Hernandez High School,80.934412,77.289752
Holden High School,83.814988,83.803279
Huang High School,81.182722,76.629414
Johnson High School,80.966394,77.072464
Pena High School,84.044699,83.839917


In [27]:
# Merge total students with average scores
grouped_metrics_df = pd.merge(student_ID_metrics_df, scores_metrics_df, on="School Name")
grouped_metrics_df

Unnamed: 0_level_0,Total Students,Average Reading Score,Average Math Score
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bailey High School,4976,81.033963,77.048432
Cabrera High School,1858,83.97578,83.061895
Figueroa High School,2949,81.15802,76.711767
Ford High School,2739,80.746258,77.102592
Griffin High School,1468,83.816757,83.351499
Hernandez High School,4635,80.934412,77.289752
Holden High School,427,83.814988,83.803279
Huang High School,2917,81.182722,76.629414
Johnson High School,4761,80.966394,77.072464
Pena High School,962,84.044699,83.839917


In [28]:
# Merge previous grouped metrics with school metrics
merged_df = pd.merge(grouped_metrics_df, renamed_schools_df, on="School Name")
# Rename columns
merged_df = merged_df.rename(columns={"Type": "School Type", "Budget": "Total School Budget"})  
# Reorder columns
merged_df = merged_df[["School Name", "School Type", "Total Students", "Total School Budget", 
                       "Average Math Score", "Average Reading Score"]]
# Create 'per student budget' column
merged_df["Per Student Budget"] = merged_df["Total School Budget"] / merged_df["Total Students"]

merged_df

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Average Math Score,Average Reading Score,Per Student Budget
0,Bailey High School,District,4976,3124928,77.048432,81.033963,628.0
1,Cabrera High School,Charter,1858,1081356,83.061895,83.97578,582.0
2,Figueroa High School,District,2949,1884411,76.711767,81.15802,639.0
3,Ford High School,District,2739,1763916,77.102592,80.746258,644.0
4,Griffin High School,Charter,1468,917500,83.351499,83.816757,625.0
5,Hernandez High School,District,4635,3022020,77.289752,80.934412,652.0
6,Holden High School,Charter,427,248087,83.803279,83.814988,581.0
7,Huang High School,District,2917,1910635,76.629414,81.182722,655.0
8,Johnson High School,District,4761,3094650,77.072464,80.966394,650.0
9,Pena High School,Charter,962,585858,83.839917,84.044699,609.0


In [29]:
# Create a groupby for passing math, passing reading, and overall passing metrics
passed_math_groupby = passed_math_df.groupby(["School Name"])

passed_reading_groupby = passed_reading_df.groupby(["School Name"])

passed_both_groupby = passed_both_df.groupby(["School Name"])


In [30]:
# Put passing math metrics into Data Frames
passed_math_metrics_df = pd.DataFrame(passed_math_groupby["Student ID"].count())
# Rename column
passed_math_metrics_df = passed_math_metrics_df.rename(columns={"Student ID": "Num Passing Math"})
#passed_math_metrics_df

# Put passing reading metrics into Data Frames
passed_reading_metrics_df = pd.DataFrame(passed_reading_groupby["Student ID"].count())
# Rename column
passed_reading_metrics_df = passed_reading_metrics_df.rename(columns={"Student ID": "Num Passing Reading"})
#passed_reading_metrics_df

# Put overall passing metrics into Data Frames
passed_both_metrics_df = pd.DataFrame(passed_both_groupby["Student ID"].count())
# Rename column
passed_both_metrics_df = passed_both_metrics_df.rename(columns={"Student ID": "Num Overall Passing"})
#passed_both_metrics_df


In [31]:
# Merge Passing number Metrics
passing_metrics_df = pd.merge(passed_math_metrics_df, passed_reading_metrics_df, on="School Name")
passing_metrics_df = pd.merge(passing_metrics_df, passed_both_metrics_df, on="School Name")
passing_metrics_df

Unnamed: 0_level_0,Num Passing Math,Num Passing Reading,Num Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bailey High School,3216,3946,2545
Cabrera High School,1664,1744,1561
Figueroa High School,1880,2313,1472
Ford High School,1801,2123,1405
Griffin High School,1317,1371,1228
Hernandez High School,3001,3624,2325
Holden High School,387,396,359
Huang High School,1847,2299,1456
Johnson High School,3040,3727,2371
Pena High School,882,887,816


In [32]:
# Merged passing number metrics with previous merged dataframe
school_merged_df = pd.merge(merged_df, passing_metrics_df, on="School Name")
# Calculated % passing
school_merged_df["% Passing Math"] = school_merged_df["Num Passing Math"] / school_merged_df["Total Students"]
school_merged_df["% Passing Reading"] = school_merged_df["Num Passing Reading"] / school_merged_df["Total Students"]
school_merged_df["% Overall Passing"] = school_merged_df["Num Overall Passing"] / school_merged_df["Total Students"]

school_merged_df

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Average Math Score,Average Reading Score,Per Student Budget,Num Passing Math,Num Passing Reading,Num Overall Passing,% Passing Math,% Passing Reading,% Overall Passing
0,Bailey High School,District,4976,3124928,77.048432,81.033963,628.0,3216,3946,2545,0.646302,0.793006,0.511455
1,Cabrera High School,Charter,1858,1081356,83.061895,83.97578,582.0,1664,1744,1561,0.895587,0.938644,0.840151
2,Figueroa High School,District,2949,1884411,76.711767,81.15802,639.0,1880,2313,1472,0.637504,0.784334,0.499152
3,Ford High School,District,2739,1763916,77.102592,80.746258,644.0,1801,2123,1405,0.657539,0.7751,0.512961
4,Griffin High School,Charter,1468,917500,83.351499,83.816757,625.0,1317,1371,1228,0.897139,0.933924,0.836512
5,Hernandez High School,District,4635,3022020,77.289752,80.934412,652.0,3001,3624,2325,0.647465,0.781877,0.501618
6,Holden High School,Charter,427,248087,83.803279,83.814988,581.0,387,396,359,0.906323,0.9274,0.840749
7,Huang High School,District,2917,1910635,76.629414,81.182722,655.0,1847,2299,1456,0.633185,0.788138,0.499143
8,Johnson High School,District,4761,3094650,77.072464,80.966394,650.0,3040,3727,2371,0.638521,0.782819,0.498005
9,Pena High School,Charter,962,585858,83.839917,84.044699,609.0,882,887,816,0.91684,0.922037,0.848233


In [33]:
# SCHOOL SUMMARY METRICS
school_merged_df = school_merged_df[["School Name", "School Type", "Total Students", "Total School Budget", "Per Student Budget",
                                    "Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]]

school_merged_df

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,0.646302,0.793006,0.511455
1,Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,0.895587,0.938644,0.840151
2,Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,0.637504,0.784334,0.499152
3,Ford High School,District,2739,1763916,644.0,77.102592,80.746258,0.657539,0.7751,0.512961
4,Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,0.897139,0.933924,0.836512
5,Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,0.647465,0.781877,0.501618
6,Holden High School,Charter,427,248087,581.0,83.803279,83.814988,0.906323,0.9274,0.840749
7,Huang High School,District,2917,1910635,655.0,76.629414,81.182722,0.633185,0.788138,0.499143
8,Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,0.638521,0.782819,0.498005
9,Pena High School,Charter,962,585858,609.0,83.839917,84.044699,0.91684,0.922037,0.848233


In [34]:
# TOP 5 SCHOOLS
top_5_schools_df = school_merged_df.sort_values("% Overall Passing", ascending=False)

# Reset indexes
top_5_schools_df = top_5_schools_df.reset_index(drop=True)
top_5_schools_df.head(5)

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,0.90933,0.932545,0.848883
1,Pena High School,Charter,962,585858,609.0,83.839917,84.044699,0.91684,0.922037,0.848233
2,Wright High School,Charter,1800,1049400,583.0,83.682222,83.955,0.902778,0.934444,0.844444
3,Thomas High School,Charter,1635,1043130,638.0,83.418349,83.84893,0.902141,0.929052,0.842813
4,Holden High School,Charter,427,248087,581.0,83.803279,83.814988,0.906323,0.9274,0.840749


In [35]:
# BOTTOM 5 SCHOOLS
bottom_5_schools_df = school_merged_df.sort_values("% Overall Passing", ascending=True)

# Reset indexes
bottom_5_schools_df = bottom_5_schools_df.reset_index(drop=True)
bottom_5_schools_df.head(5)

Unnamed: 0,School Name,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,Rodriguez High School,District,3999,2547363,637.0,76.842711,80.744686,0.64066,0.777444,0.494374
1,Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,0.638521,0.782819,0.498005
2,Huang High School,District,2917,1910635,655.0,76.629414,81.182722,0.633185,0.788138,0.499143
3,Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,0.637504,0.784334,0.499152
4,Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,0.647465,0.781877,0.501618


In [36]:
# Group by school & grade
school_grade_grouped = renamed_students_df.groupby(["School Name", "Grade"])

# Convert to data frame
school_grade_math_df = pd.DataFrame(school_grade_grouped["Math Score"].mean())
#school_grade_math_df

school_grade_reading_df = pd.DataFrame(school_grade_grouped["Reading Score"].mean())
#school_grade_reading_df

In [37]:
# Rename Column: MATH SCORES BY GRADE
school_grade_math_df = school_grade_math_df.rename(columns={"Math Score": "Average Math Score"})
school_grade_math_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Average Math Score
School Name,Grade,Unnamed: 2_level_1
Bailey High School,10th,76.996772
Bailey High School,11th,77.515588
Bailey High School,12th,76.492218
Bailey High School,9th,77.083676
Cabrera High School,10th,83.154506
Cabrera High School,11th,82.76556
Cabrera High School,12th,83.277487
Cabrera High School,9th,83.094697
Figueroa High School,10th,76.539974
Figueroa High School,11th,76.884344


In [38]:
# Rename Column: READING SCORES BY GRADE
school_grade_reading_df = school_grade_reading_df.rename(columns={"Reading Score": "Average Reading Score"})
school_grade_reading_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Average Reading Score
School Name,Grade,Unnamed: 2_level_1
Bailey High School,10th,80.907183
Bailey High School,11th,80.945643
Bailey High School,12th,80.912451
Bailey High School,9th,81.303155
Cabrera High School,10th,84.253219
Cabrera High School,11th,83.788382
Cabrera High School,12th,84.287958
Cabrera High School,9th,83.676136
Figueroa High School,10th,81.408912
Figueroa High School,11th,80.640339


In [39]:
# Minimum per student Budget
school_merged_df["Per Student Budget"].min()

578.0

In [40]:
# Maxiumum per student Budget
school_merged_df["Per Student Budget"].max()

655.0

In [41]:
# Mean per student Budget
school_merged_df["Per Student Budget"].mean()

620.0666666666667

In [42]:
# Minimum School Size
renamed_schools_df["Size"].min()

427

In [43]:
# Maximum School Size
renamed_schools_df["Size"].max()

4976

In [44]:
# Create spending DataFrame
spending_df = school_merged_df[["Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading",
                               "% Overall Passing", "Per Student Budget"]]

In [45]:
# Bins for spending
bins = [0, 600, 630, 645, 660]
budget_names = ["0-600", "601-630", "631-645", "646-660"]

In [46]:
# Create Bins
pd.cut(spending_df["Per Student Budget"], bins, labels=budget_names, include_lowest=True)

0     601-630
1       0-600
2     631-645
3     631-645
4     601-630
5     646-660
6       0-600
7     646-660
8     646-660
9     601-630
10    631-645
11      0-600
12    631-645
13      0-600
14      0-600
Name: Per Student Budget, dtype: category
Categories (4, object): [0-600 < 601-630 < 631-645 < 646-660]

In [47]:
# Create Bins
spending_df["Spending Range"] = pd.cut(spending_df["Per Student Budget"], bins, labels=budget_names, include_lowest=True)
spending_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Per Student Budget,Spending Range
0,77.048432,81.033963,0.646302,0.793006,0.511455,628.0,601-630
1,83.061895,83.97578,0.895587,0.938644,0.840151,582.0,0-600
2,76.711767,81.15802,0.637504,0.784334,0.499152,639.0,631-645
3,77.102592,80.746258,0.657539,0.7751,0.512961,644.0,631-645
4,83.351499,83.816757,0.897139,0.933924,0.836512,625.0,601-630
5,77.289752,80.934412,0.647465,0.781877,0.501618,652.0,646-660
6,83.803279,83.814988,0.906323,0.9274,0.840749,581.0,0-600
7,76.629414,81.182722,0.633185,0.788138,0.499143,655.0,646-660
8,77.072464,80.966394,0.638521,0.782819,0.498005,650.0,646-660
9,83.839917,84.044699,0.91684,0.922037,0.848233,609.0,601-630


In [59]:
# Set Index to Spending Range
spending_df.set_index("Spending Range")

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Per Student Budget
Spending Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
601-630,77.048432,81.033963,0.646302,0.793006,0.511455,628.0
0-600,83.061895,83.97578,0.895587,0.938644,0.840151,582.0
631-645,76.711767,81.15802,0.637504,0.784334,0.499152,639.0
631-645,77.102592,80.746258,0.657539,0.7751,0.512961,644.0
601-630,83.351499,83.816757,0.897139,0.933924,0.836512,625.0
646-660,77.289752,80.934412,0.647465,0.781877,0.501618,652.0
0-600,83.803279,83.814988,0.906323,0.9274,0.840749,581.0
646-660,76.629414,81.182722,0.633185,0.788138,0.499143,655.0
646-660,77.072464,80.966394,0.638521,0.782819,0.498005,650.0
601-630,83.839917,84.044699,0.91684,0.922037,0.848233,609.0


In [61]:
# Group By Spending Range
#spending_groupby = spending_df.groupby(["Spending Range"])
#spending_groupby.head(15)

In [49]:
# Count amount in Spending Range bins
spending_df["Spending Range"].value_counts()

0-600      5
631-645    4
646-660    3
601-630    3
Name: Spending Range, dtype: int64

In [50]:
# Create size DataFrame
size_df = pd.merge(school_merged_df, renamed_schools_df, on="School Name")
size_df = size_df[["School Name", "Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading",
                    "% Overall Passing", "Size"]]
#size_df

In [51]:
# Bins for school size
bins = [0, 2000, 3000, 5000]
size_names = ["Small", "Medium", "Large"]

In [52]:
# Create Bins
size_df["Size Range"] = pd.cut(size_df["Size"], bins, labels=size_names, include_lowest=True)
size_df

Unnamed: 0,School Name,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Size,Size Range
0,Bailey High School,77.048432,81.033963,0.646302,0.793006,0.511455,4976,Large
1,Cabrera High School,83.061895,83.97578,0.895587,0.938644,0.840151,1858,Small
2,Figueroa High School,76.711767,81.15802,0.637504,0.784334,0.499152,2949,Medium
3,Ford High School,77.102592,80.746258,0.657539,0.7751,0.512961,2739,Medium
4,Griffin High School,83.351499,83.816757,0.897139,0.933924,0.836512,1468,Small
5,Hernandez High School,77.289752,80.934412,0.647465,0.781877,0.501618,4635,Large
6,Holden High School,83.803279,83.814988,0.906323,0.9274,0.840749,427,Small
7,Huang High School,76.629414,81.182722,0.633185,0.788138,0.499143,2917,Medium
8,Johnson High School,77.072464,80.966394,0.638521,0.782819,0.498005,4761,Large
9,Pena High School,83.839917,84.044699,0.91684,0.922037,0.848233,962,Small


In [53]:
# Count amount in each bin
size_df["Size Range"].value_counts()

Small     7
Large     4
Medium    4
Name: Size Range, dtype: int64

In [58]:
# Set index to Size Range
size_df.set_index("Size Range")

Unnamed: 0_level_0,School Name,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Size
Size Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Large,Bailey High School,77.048432,81.033963,0.646302,0.793006,0.511455,4976
Small,Cabrera High School,83.061895,83.97578,0.895587,0.938644,0.840151,1858
Medium,Figueroa High School,76.711767,81.15802,0.637504,0.784334,0.499152,2949
Medium,Ford High School,77.102592,80.746258,0.657539,0.7751,0.512961,2739
Small,Griffin High School,83.351499,83.816757,0.897139,0.933924,0.836512,1468
Large,Hernandez High School,77.289752,80.934412,0.647465,0.781877,0.501618,4635
Small,Holden High School,83.803279,83.814988,0.906323,0.9274,0.840749,427
Medium,Huang High School,76.629414,81.182722,0.633185,0.788138,0.499143,2917
Large,Johnson High School,77.072464,80.966394,0.638521,0.782819,0.498005,4761
Small,Pena High School,83.839917,84.044699,0.91684,0.922037,0.848233,962


In [62]:
# Group By Spending Range
#size_groupby = size_df.groupby(["Size Range"])
#size_groupby.head(15)