In [990]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [991]:
school_data_complete.head() #displaying chart for visualization

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [992]:
school_count=school_data_complete['school_name'].nunique() #getting total number of schools
school_count

15

In [993]:
student_count=school_data_complete['Student ID'].nunique() #getting total number of students
student_count

39170

In [994]:
budget_sum=school_data['budget'].sum() #getting total budget amount
budget_sum

24649428

In [995]:
average_math=school_data_complete['math_score'].mean() #getting average math score
average_math

78.98537145774827

In [996]:
average_reading=school_data_complete['reading_score'].mean() #getting average reading score
average_reading

81.87784018381414

In [997]:
#getting percentage passing math
percentage_math=len(school_data_complete[school_data_complete['math_score']>=70])/student_count*100
percentage_math

74.9808526933878

In [998]:
#getting percentage passing reading
percentage_reading=len(school_data_complete[school_data_complete['reading_score']>=70])/student_count*100
percentage_reading

85.80546336482001

In [999]:
#getting percentage passing both
percentage_both=len(school_data_complete[(school_data_complete['math_score']>=70) & (school_data_complete['reading_score']>=70)])/student_count*100
percentage_both

65.17232575950983

In [1000]:
pd.DataFrame({'Total Schools':school_count,
             'Total Students':student_count,
             'Total Budget':budget_sum,
             'Average Math Score':average_math,
             'Average Reading Score':average_reading,
              '% Passing Math':percentage_math,
              '% Passing Reading':percentage_reading,
             '% Overall Passing':percentage_both},index=[0])

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


In [1001]:
averages=school_data_complete.groupby('school_name')[['math_score','reading_score']].mean()
averages

Unnamed: 0_level_0,math_score,reading_score
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bailey High School,77.048432,81.033963
Cabrera High School,83.061895,83.97578
Figueroa High School,76.711767,81.15802
Ford High School,77.102592,80.746258
Griffin High School,83.351499,83.816757
Hernandez High School,77.289752,80.934412
Holden High School,83.803279,83.814988
Huang High School,76.629414,81.182722
Johnson High School,77.072464,80.966394
Pena High School,83.839917,84.044699


In [1002]:
averages=averages.reset_index()
school_data=school_data.reset_index()
school_data

Unnamed: 0,index,School ID,school_name,type,size,budget
0,0,0,Huang High School,District,2917,1910635
1,1,1,Figueroa High School,District,2949,1884411
2,2,2,Shelton High School,Charter,1761,1056600
3,3,3,Hernandez High School,District,4635,3022020
4,4,4,Griffin High School,Charter,1468,917500
5,5,5,Wilson High School,Charter,2283,1319574
6,6,6,Cabrera High School,Charter,1858,1081356
7,7,7,Bailey High School,District,4976,3124928
8,8,8,Holden High School,Charter,427,248087
9,9,9,Pena High School,Charter,962,585858


In [1003]:
school_data['per student budget']=school_data['budget']/school_data['size']

In [1004]:
#school_data=school_data.set_index('school_name')

In [1005]:
school_data=pd.merge(school_data,averages)

In [1006]:
school_data_complete['math_dummy']=np.where(school_data_complete['math_score']>=70,1,0)
school_data_complete

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget,math_dummy
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635,1
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635,0
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635,0
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635,0
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635,1
...,...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130,1
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130,1
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130,1
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130,1


In [1007]:
math_passing=school_data_complete.groupby('school_name', as_index=False)['math_dummy'].sum()
math_passing=(math_passing)

In [1008]:
school_data=pd.merge(school_data, math_passing)
school_data['% passing math']=school_data['math_dummy']/school_data['size']*100
school_data=school_data.drop('math_dummy',axis=1) #removing math_dummy
school_data

Unnamed: 0,index,School ID,school_name,type,size,budget,per student budget,math_score,reading_score,% passing math
0,0,0,Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922
1,1,1,Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471
2,2,2,Shelton High School,Charter,1761,1056600,600.0,83.359455,83.725724,93.867121
3,3,3,Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967
4,4,4,Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371
5,5,5,Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718
6,6,6,Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477
7,7,7,Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064
8,8,8,Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855
9,9,9,Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595


In [1009]:
school_data_complete['reading_dummy']=np.where(school_data_complete['reading_score']>=70,1,0)
school_data_complete

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget,math_dummy,reading_dummy
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635,1,0
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635,0,1
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635,0,1
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635,0,0
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130,1,1
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130,1,1
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130,1,1
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130,1,1


In [1010]:
reading_passing=school_data_complete.groupby('school_name', as_index=False)['reading_dummy'].sum()
reading_passing=(reading_passing)

In [1011]:
school_data=pd.merge(school_data, reading_passing)
school_data['% passing reading']=school_data['reading_dummy']/school_data['size']*100
school_data=school_data.drop('reading_dummy',axis=1) #removing reading_dummy
school_data

Unnamed: 0,index,School ID,school_name,type,size,budget,per student budget,math_score,reading_score,% passing math,% passing reading
0,0,0,Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421
1,1,1,Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234
2,2,2,Shelton High School,Charter,1761,1056600,600.0,83.359455,83.725724,93.867121,95.854628
3,3,3,Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999
4,4,4,Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965
5,5,5,Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641
6,6,6,Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828
7,7,7,Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328
8,8,8,Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927
9,9,9,Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946


In [1012]:
school_data_complete['both_dummy']=np.where((school_data_complete['reading_score']) & (school_data_complete['math_score'])>=70,1,0)
school_data_complete #both_dummy is not accurate

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget,math_dummy,reading_dummy,both_dummy
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635,1,0,0
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635,0,1,0
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635,0,1,0
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635,0,0,0
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130,1,1,0
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130,1,1,1
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130,1,1,0
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130,1,1,0


In [1013]:
both_passing=school_data_complete.groupby('school_name', as_index=False)['both_dummy'].sum()
both_passing=(both_passing) #both Passing is not accurate

In [1014]:
school_data=pd.merge(school_data, both_passing)
school_data['% overall passing']=school_data['both_dummy']/school_data['size']*100
school_data=school_data.drop('both_dummy',axis=1) #removing both_dummy
school_data #% Overall Passing is not accurate because the dummy itself wasn't accurate

Unnamed: 0,index,School ID,school_name,type,size,budget,per student budget,math_score,reading_score,% passing math,% passing reading,% overall passing
0,0,0,Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,29.516627
1,1,1,Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,30.111902
2,2,2,Shelton High School,Charter,1761,1056600,600.0,83.359455,83.725724,93.867121,95.854628,49.744463
3,3,3,Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,31.024811
4,4,4,Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,50.13624
5,5,5,Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641,49.890495
6,6,6,Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,50.861141
7,7,7,Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,30.345659
8,8,8,Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,49.648712
9,9,9,Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,49.89605


In [1015]:
school_data=school_data.set_index('school_name') #setting school name as the index to mimic the style of the example

In [1016]:
school_data #% Overall Passing is not accurate

Unnamed: 0_level_0,index,School ID,type,size,budget,per student budget,math_score,reading_score,% passing math,% passing reading,% overall passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Huang High School,0,0,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,29.516627
Figueroa High School,1,1,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,30.111902
Shelton High School,2,2,Charter,1761,1056600,600.0,83.359455,83.725724,93.867121,95.854628,49.744463
Hernandez High School,3,3,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,31.024811
Griffin High School,4,4,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,50.13624
Wilson High School,5,5,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641,49.890495
Cabrera High School,6,6,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,50.861141
Bailey High School,7,7,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,30.345659
Holden High School,8,8,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,49.648712
Pena High School,9,9,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,49.89605


In [1017]:
school_data=school_data.drop('index',axis=1) #removing index

In [1018]:
school_data=school_data.drop('School ID',axis=1) #removing school id

In [1019]:
school_data #% Overall Passing is not accurate

Unnamed: 0_level_0,type,size,budget,per student budget,math_score,reading_score,% passing math,% passing reading,% overall passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,29.516627
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,30.111902
Shelton High School,Charter,1761,1056600,600.0,83.359455,83.725724,93.867121,95.854628,49.744463
Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,31.024811
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,50.13624
Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641,49.890495
Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,50.861141
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,30.345659
Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,49.648712
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,49.89605


In [1020]:
school_data=school_data.rename(columns={'school_name':'School Name',
                                       'type': 'School Type',
                                       'size': 'Total Students',
                                       'budget': 'Total School Budget',
                                       'per student budget': 'Per Student Budget',
                                       'math_score': 'Average Math Score',
                                       'reading_score': 'Average Reading Score',
                                       '% passing math':'% Passing Math',
                                       '% passing reading': '% Passing Reading',
                                       '% overall passing': '% Overall Passing'})

In [1021]:
school_data=school_data.sort_values('school_name') #reorganizing alphabetically by school name

In [1022]:
school_data #% Overall Passing is not accurate

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,30.345659
Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,50.861141
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,30.111902
Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,30.485579
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,50.13624
Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,31.024811
Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,49.648712
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,29.516627
Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,30.182735
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,49.89605


In [1023]:
school_data = school_data.sort_values("% Overall Passing", ascending=False) #Top Performing School per % Overall Passing
school_data.head()#% Overall Passing is not accurate

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Wright High School,Charter,1800,1049400,583.0,83.682222,83.955,93.333333,96.611111,51.555556
Thomas High School,Charter,1635,1043130,638.0,83.418349,83.84893,93.272171,97.308869,51.192661
Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,50.861141
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,50.13624
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,49.89605


In [1024]:
school_data = school_data.sort_values("% Overall Passing", ascending=True) #Bottom Performing School per % Overall Passing
school_data.head() #% Overall Passing is not accurate

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,29.516627
Rodriguez High School,District,3999,2547363,637.0,76.842711,80.744686,66.366592,80.220055,29.607402
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,30.111902
Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,30.182735
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,30.345659


In [1025]:
#Per School Math Scores by Grade
school_data_grouped=school_data_complete.groupby(['school_name','grade']).mean()
school_data_grouped['math_score'].mean()
school_data_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Student ID,reading_score,math_score,School ID,size,budget,math_dummy,reading_dummy,both_dummy
school_name,grade,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Bailey High School,10th,20365.058918,80.907183,76.996772,7.0,4976.0,3124928.0,0.663438,0.835351,0.32042
Bailey High School,11th,20345.148681,80.945643,77.515588,7.0,4976.0,3124928.0,0.684253,0.805755,0.315747
Bailey High School,12th,20386.724708,80.912451,76.492218,7.0,4976.0,3124928.0,0.642996,0.81323,0.285019
Bailey High School,9th,20344.481481,81.303155,77.083676,7.0,4976.0,3124928.0,0.671468,0.821674,0.291495
Cabrera High School,10th,16909.487124,84.253219,83.154506,6.0,1858.0,1081356.0,0.939914,0.974249,0.484979
Cabrera High School,11th,16955.047718,83.788382,82.76556,6.0,1858.0,1081356.0,0.923237,0.970954,0.514523
Cabrera High School,12th,16924.570681,84.287958,83.277487,6.0,1858.0,1081356.0,0.950262,0.968586,0.497382
Cabrera High School,9th,16969.63447,83.676136,83.094697,6.0,1858.0,1081356.0,0.952652,0.967803,0.532197
Figueroa High School,10th,4332.703801,81.408912,76.539974,1.0,2949.0,1884411.0,0.665793,0.812582,0.302752
Figueroa High School,11th,4424.478138,80.640339,76.884344,1.0,2949.0,1884411.0,0.653032,0.781382,0.294781


In [1026]:
math_per_grade=school_data_grouped.pivot_table('math_score',['school_name'],'grade')
#math_per_grade.reindex_axis(['9th','10th','11th','12th'],axis=1) reorganizing DOES NOT WORK
math_per_grade

grade,10th,11th,12th,9th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,76.996772,77.515588,76.492218,77.083676
Cabrera High School,83.154506,82.76556,83.277487,83.094697
Figueroa High School,76.539974,76.884344,77.151369,76.403037
Ford High School,77.672316,76.918058,76.179963,77.361345
Griffin High School,84.229064,83.842105,83.356164,82.04401
Hernandez High School,77.337408,77.136029,77.186567,77.438495
Holden High School,83.429825,85.0,82.855422,83.787402
Huang High School,75.908735,76.446602,77.225641,77.027251
Johnson High School,76.691117,77.491653,76.863248,77.187857
Pena High School,83.372,84.328125,84.121547,83.625455


In [1027]:
#Per School Reading Scores by Grade
reading_per_grade=school_data_grouped.pivot_table('reading_score',['school_name'],'grade')
#math_per_grade.reindex_axis(['9th','10th','11th','12th'],axis=1) reorganizing DOES NOT WORK
reading_per_grade

grade,10th,11th,12th,9th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,80.907183,80.945643,80.912451,81.303155
Cabrera High School,84.253219,83.788382,84.287958,83.676136
Figueroa High School,81.408912,80.640339,81.384863,81.198598
Ford High School,81.262712,80.403642,80.662338,80.632653
Griffin High School,83.706897,84.288089,84.013699,83.369193
Hernandez High School,80.660147,81.39614,80.857143,80.86686
Holden High School,83.324561,83.815534,84.698795,83.677165
Huang High School,81.512386,81.417476,80.305983,81.290284
Johnson High School,80.773431,80.616027,81.227564,81.260714
Pena High School,83.612,84.335938,84.59116,83.807273


In [1028]:
school_data.head() #to help me with visualization because i keep getting errors when trying to bin

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,29.516627
Rodriguez High School,District,3999,2547363,637.0,76.842711,80.744686,66.366592,80.220055,29.607402
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,30.111902
Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,30.182735
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,30.345659


In [1029]:
#Scores by School Spending
bins = [0, 585, 630, 645, 680]
group_names = ["<$585", "$585-630", "$630-645", "$645-680"]
school_data['Spending Ranges per Student']=pd.cut(school_data["Per Student Budget"], bins, labels=group_names).head()
#school_data.groupby('Spending Ranges per Student')
#print(school_data["Average Math Score"].count())
#school_data[["Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]].mean()
#school_data=school_data.set_index('Spending Ranges per Student') 
school_data #can't figure out how to make make the chart appear as the one in PyCitySchools_starter

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Spending Ranges per Student
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,29.516627,$645-680
Rodriguez High School,District,3999,2547363,637.0,76.842711,80.744686,66.366592,80.220055,29.607402,$630-645
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,30.111902,$630-645
Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,30.182735,$645-680
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,30.345659,$585-630
Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,30.485579,
Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,31.024811,
Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,49.648712,
Shelton High School,Charter,1761,1056600,600.0,83.359455,83.725724,93.867121,95.854628,49.744463,
Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641,49.890495,


In [1032]:
#Scores by School Size
bins = [0, 1000, 2000, 5000]
group_names = ["Small <1000", "Medium 1000-2000", "Large 2000-5000"]
school_data['School Size']=pd.cut(school_data["Total Students"], bins, labels=group_names).head()
school_data #can't figure out how to make make the chart appear as the one in PyCitySchools_starter

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Spending Ranges per Student,School Size
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,29.516627,$645-680,Large 2000-5000
Rodriguez High School,District,3999,2547363,637.0,76.842711,80.744686,66.366592,80.220055,29.607402,$630-645,Large 2000-5000
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,30.111902,$630-645,Large 2000-5000
Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,30.182735,$645-680,Large 2000-5000
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,30.345659,$585-630,Large 2000-5000
Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,30.485579,,
Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,31.024811,,
Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,49.648712,,
Shelton High School,Charter,1761,1056600,600.0,83.359455,83.725724,93.867121,95.854628,49.744463,,
Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641,49.890495,,


In [1031]:
#Scores by School Type
bins = ["Charter","District"] #how do you bin strings?
group_names = ["Charter", "District"]
school_data['School Type']=pd.cut(school_data["School Type"], bins, labels=group_names).head() #can't figure out why column names school_data don't work

ValueError: could not convert string to float: 'Charter'

In [None]:
#Since i couldn't figure out where to find the data after pd.cut in the binning portion of the homework, im basing my
#observations from PyCitySchools_starter:

#1.Charter schools perform better than District schools overall
#2.The more money a school spends per student, the poorer their students perform (save your money!)
#3.Students are consistently better at reading than math