In [1]:
import pandas as pd
import numpy as np

In [2]:
#start with achievement file
achievement_file = pd.read_excel("data/bldgAchievement_1718.xlsx")
achievement_file.head()
achievement_file.dtypes

Building IRN                                 int64
Building Name                               object
District IRN                                 int64
District Name                               object
County                                      object
Region                                      object
Address                                     object
City and Zip                                object
Phone Number                                object
Principal                                   object
Performance Index Score 2017-18             object
Performance Index Score 2016-17             object
Performance Index Score 2015-16             object
Performance Index Percent 2017-18           object
Letter Grade of Performance Index           object
Percent of Students Not Tested              object
Percent of Students Limited                 object
Percent of Students Basic                   object
Percent of Students Proficient              object
Percent of Students Accelerated

In [3]:
# discard unnecessary columns
achievement_reduced = pd.DataFrame({"Building_IRN": achievement_file["Building IRN"], "Building_Name": achievement_file["Building Name"], "District_IRN": achievement_file["District IRN"], "performanceIndexPercent_2017_2018": achievement_file["Performance Index Score 2017-18"], "performanceIndexPercent_2016_2017": achievement_file["Performance Index Score 2016-17"], "performanceIndexPercent_2015_2016": achievement_file["Performance Index Score 2015-16"]})
#make sure it worked
achievement_reduced.head()

Unnamed: 0,Building_IRN,Building_Name,District_IRN,performanceIndexPercent_2017_2018,performanceIndexPercent_2016_2017,performanceIndexPercent_2015_2016
0,59,Ada Elementary School,45187,99.326,98.476,96.125
1,67,Ada High School,45187,90.416,92.066,91.667
2,83,Sandusky Middle School,44743,70.178,66.398,62.772
3,102,Meigs Primary School,48520,NC,NC,NC
4,105,Meigs Intermediate School,48520,71.376,74.145,70.394


In [4]:
#filter for cmsd schooles only 
cmsd_achieve = achievement_reduced[achievement_reduced["District_IRN"] == 43786]

In [5]:
#reorder
cmsd_achieve = cmsd_achieve[["Building_IRN", "Building_Name", "District_IRN", "performanceIndexPercent_2015_2016", "performanceIndexPercent_2016_2017", "performanceIndexPercent_2017_2018"]]

cmsd_achieve.replace('NC', np.nan, inplace=True)

In [6]:
#helper function that filters cmsd schools based on IRN so i don't need to keep repeating
def cmsd_filter(df):
    return df[df["District IRN"] == 43786]


In [7]:
# read in the necessary excel files
valueAdded_1516 = pd.read_excel("data/valueAdded_1516.xls")
valueAdded_1617 = pd.read_excel("data/valueAdded_1617.xls")
valueAdded_1718 = pd.read_excel("data/valueAdded_1718.xlsx")

In [8]:
print("1516 dtypes =====================")
print(valueAdded_1516.dtypes)
print("1617 dtypes ======================")
print(valueAdded_1617.dtypes)
print("1718 dtypes =======================")
print(valueAdded_1718.dtypes)

District IRN                                     int64
District Name                                   object
Building IRN                                     int64
Building Name                                   object
County                                          object
Region                                          object
Overall Value Added Grade                       object
Overall Composite                               object
Gifted Value Added Grade                        object
Gifted Composite                                object
Students with Disabilities Value Added Grade    object
Students with Disabilities composite            object
Lowest 20% Value Added Grade                    object
Lowest 20% Value Added Composite                object
High Mobility Value Added Grade                 object
High Mobility Composite                         object
Watermark                                       object
dtype: object
District IRN                                     in

In [9]:
#call the cmsd filter function on the value added files, then save the filtered files to a new dataframe 
cmsd_value1516 = cmsd_filter(valueAdded_1516)

In [10]:
cmsd_value1617 = cmsd_filter(valueAdded_1617)
cmsd_value1718 = cmsd_filter(valueAdded_1718)

In [11]:
#cmsd_value1516 = pd.DataFrame({"Building_IRN": cmsd_value1516["Building IRN"], "District_IRN": cmsd_value1516["District IRN"], "Overall Value Added Grade 2015-16": cmsd_value1516["Overall Value Added Grade"]})

In [12]:
#another function that reduces dataframes to only the necessary columns
def value_reduce(df, years):
    year = years
    return pd.DataFrame({"Building_IRN": df["Building IRN"], "District_IRN": df["District IRN"], f"Overall Value Added Grade {year}": df["Overall Value Added Grade"]})
    

In [13]:
cmsdValReduced_1516 = value_reduce(cmsd_value1516, 1516)

In [14]:
#make sure the function actually worked
cmsdValReduced_1516


Unnamed: 0,Building_IRN,District_IRN,Overall Value Added Grade 1516
447,224,43786,F
448,318,43786,C
449,489,43786,F
450,729,43786,F
451,828,43786,F
452,930,43786,A
453,1040,43786,F
454,2378,43786,F
455,3137,43786,F
456,4234,43786,F


In [15]:
cmsdValReduced_1617 = value_reduce(cmsd_value1617, 1617)
cmsdValReduced_1718 = value_reduce(cmsd_value1718, 1718)

In [16]:
#duplicate columns will end with _y
cmsd_valAdded_merge1 = pd.merge(cmsdValReduced_1516, cmsdValReduced_1617, how="outer", on="Building_IRN", suffixes=('', '_y'))

In [17]:
cmsd_valAdded_merge2 = pd.merge(cmsd_valAdded_merge1, cmsdValReduced_1718, how='outer', on='Building_IRN', suffixes=('', '_y'))

In [18]:
def drop_y(df):
    # list comprehension of the columns that end with _y
    to_drop = [x for x in df if x.endswith('_y')]
    df.drop(to_drop, axis=1, inplace=True)

In [19]:
drop_y(cmsd_valAdded_merge2)

In [20]:
cmsd_valAdded_merge2.head()

Unnamed: 0,Building_IRN,District_IRN,Overall Value Added Grade 1516,Overall Value Added Grade 1617,Overall Value Added Grade 1718
0,224,43786.0,F,F,F
1,318,43786.0,C,F,F
2,489,43786.0,F,F,F
3,729,43786.0,F,F,F
4,828,43786.0,F,F,F


In [21]:
#==========================================================================================================================
#in reviewing the spreadsheets, i noticed that enrollment data appeared on several tabs, so need to specify first three sheets, otherwise it only loads in first sheet
oh_enroll1516 = pd.read_excel("data/enrollment_1516.xls", sheet_name=[0, 1, 2])

#oh_enroll1516[0].head()
oh_enroll1516[0].columns.values

array(['District IRN', 'District Name', 'Building IRN', 'Building Name',
       'County', 'Region', 'City and Zip Code', 'Phone # ', 'Principal',
       'School Type', 'Pre-School Enrollment', 'Kindergarten Enrollment',
       'First Grade Enrollment', 'Second Grade Enrollment',
       'Third Grade Enrollment', 'Fourth Grade Enrollment',
       'Fifth Grade Enrollment', 'Sixth Grade Enrollment',
       'Seventh Grade Enrollment', 'Eighth Grade Enrollment',
       'Ninth Grade Enrollment ', 'Tenth Grade Enrollment',
       'Eleventh Grade Enrollment', 'Twelfth Grade Enrollment',
       'Thirteenth Grade Enrollment',
       'Enrollment Past Twelfth Grade (Students with Disabilities)',
       'Watermark'], dtype=object)

In [22]:
#write a helper function that does cleanup for me
def enroll_replaceReduce(df, years):
    #call function within function
    cmsd_df = cmsd_filter(df)
    #replace NC with zero
    cmsd_df.replace('NC', '0', inplace=True)
    #replace <10 value with zero
    cmsd_df.replace('^.*<.*$', '0', regex=True, inplace=True)
    #create list of columns to drop, then drop
    cols = cmsd_df.columns
    #force them to be integers (object_infer and other methods did not work)
    cmsd_df[cols] = cmsd_df[cols].apply(pd.to_numeric, errors='coerce')
    year = years
    #create a new column for enrollment total and use iloc to add
    cmsd_df[f'Enrollment_Total_{year}'] = cmsd_df.iloc[:, -17:-1].sum(1)
    #drop columns i'll no longer need
    return pd.DataFrame({"Building_IRN": cmsd_df["Building IRN"], "District_IRN": cmsd_df["District IRN"], f'Enrollment_Total_{year}': cmsd_df[f'Enrollment_Total_{year}']})


In [23]:
oh_enroll1617 = pd.read_excel("data/enrollment_1617.xls", sheet_name=[0, 1, 2])

In [24]:
oh_enroll1617[0].columns.values

array(['District IRN', 'District Name', 'Building IRN', 'Building Name',
       'County', 'Region', 'City and Zip Code', 'Phone # ', 'Principal',
       'School Type', 'Pre-School Enrollment', 'Kindergarten Enrollment',
       'First Grade Enrollment', 'Second Grade Enrollment',
       'Third Grade Enrollment', 'Fourth Grade Enrollment',
       'Fifth Grade Enrollment', 'Sixth Grade Enrollment',
       'Seventh Grade Enrollment', 'Eighth Grade Enrollment',
       'Ninth Grade Enrollment ', 'Tenth Grade Enrollment',
       'Eleventh Grade Enrollment', 'Twelfth Grade Enrollment',
       'Thirteenth Grade Enrollment',
       'Enrollment Past Twelfth Grade (Students with Disabilities)',
       'Watermark'], dtype=object)

In [25]:
oh_enroll1718 = pd.read_excel("data/enrollment_1718.xlsx", sheet_name=[0, 1, 2])

In [26]:
oh_enroll1718[0].columns.values

array(['Building IRN', 'Building Name', 'District IRN', 'District Name',
       'County', 'Region', 'City and Zip Code', 'Phone Number',
       'Principal', 'Pre-School Enrollment', 'Kindergarten Enrollment',
       'First Grade Enrollment', 'Second Grade Enrollment',
       'Third Grade Enrollment', 'Fourth Grade Enrollment',
       'Fifth Grade Enrollment', 'Sixth Grade Enrollment',
       'Seventh Grade Enrollment', 'Eighth Grade Enrollment',
       'Ninth Grade Enrollment', 'Tenth Grade Enrollment',
       'Eleventh Grade Enrollment', 'Twelfth Grade Enrollment',
       'Thirteenth Grade Enrollment',
       'Enrollment Past Twelfth Grade (Students with Disabilities)',
       'Watermark'], dtype=object)

In [27]:
def concat_function(df1, df2, df3):
    df_concat1 = pd.concat([df1, df2])
    return pd.concat([df_concat1, df3])

In [28]:
cmsd_enroll1516_0 = enroll_replaceReduce(oh_enroll1516[0], 1516)
cmsd_enroll1516_1 = enroll_replaceReduce(oh_enroll1516[1], 1516)
cmsd_enroll1516_2 = enroll_replaceReduce(oh_enroll1516[2], 1516)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  method=method)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [29]:
cmsd_enrollment_1516 = concat_function(cmsd_enroll1516_0, cmsd_enroll1516_1, cmsd_enroll1516_2)

In [30]:
cmsd_enrollment_1516

Unnamed: 0,Building_IRN,District_IRN,Enrollment_Total_1516
271,224,43786,378.0
272,489,43786,424.0
273,729,43786,406.0
274,828,43786,325.0
275,1040,43786,441.0
276,2378,43786,533.0
277,3137,43786,300.0
278,4234,43786,346.0
279,5066,43786,315.0
280,5637,43786,325.0


In [31]:
cmsd_enroll1617_0 = enroll_replaceReduce(oh_enroll1617[0], 1617)
cmsd_enroll1617_1 = enroll_replaceReduce(oh_enroll1617[1], 1617)
cmsd_enroll1617_2 = enroll_replaceReduce(oh_enroll1617[2], 1617)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [32]:
cmsd_enrollment_1617 = concat_function(cmsd_enroll1617_0, cmsd_enroll1617_1, cmsd_enroll1617_2)

cmsd_enrollment_1617

In [33]:
cmsd_enrollment_1617

Unnamed: 0,Building_IRN,District_IRN,Enrollment_Total_1617
265,224,43786,395.0
266,489,43786,439.0
267,729,43786,386.0
268,828,43786,311.0
269,1040,43786,465.0
270,2378,43786,581.0
271,3137,43786,277.0
272,4234,43786,343.0
273,5066,43786,297.0
274,5637,43786,299.0


In [34]:
cmsd_enroll1718_0 = enroll_replaceReduce(oh_enroll1718[0], 1718)
cmsd_enroll1718_1 = enroll_replaceReduce(oh_enroll1718[1], 1718)
cmsd_enroll1718_2 = enroll_replaceReduce(oh_enroll1718[2], 1718)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [35]:
cmsd_enrollment_1718 = concat_function(cmsd_enroll1718_0, cmsd_enroll1718_1, cmsd_enroll1718_2)

In [36]:
cmsd_enrollment_1718

Unnamed: 0,Building_IRN,District_IRN,Enrollment_Total_1718
24,224,43786,443
49,318,43786,419
88,489,43786,547
143,729,43786,442
160,828,43786,395
188,930,43786,323
190,936,43786,210
202,1040,43786,513
283,2378,43786,615
325,3137,43786,329


In [37]:
df_merge1 = pd.merge(cmsd_enrollment_1516, cmsd_enrollment_1617, how="outer", on="Building_IRN", suffixes=('', '_y'))

In [38]:
cmsd_all_enrollment = pd.merge(df_merge1, cmsd_enrollment_1718, how="outer", on="Building_IRN", suffixes=('', '_y'))

In [39]:
cmsd_all_enrollment.head()

Unnamed: 0,Building_IRN,District_IRN,Enrollment_Total_1516,District_IRN_y,Enrollment_Total_1617,District_IRN_y.1,Enrollment_Total_1718
0,224,43786.0,378.0,43786.0,395.0,43786.0,443.0
1,489,43786.0,424.0,43786.0,439.0,43786.0,547.0
2,729,43786.0,406.0,43786.0,386.0,43786.0,442.0
3,828,43786.0,325.0,43786.0,311.0,43786.0,395.0
4,1040,43786.0,441.0,43786.0,465.0,43786.0,513.0


In [40]:
drop_y(cmsd_all_enrollment)

In [41]:
cmsd_all_enrollment

Unnamed: 0,Building_IRN,District_IRN,Enrollment_Total_1516,Enrollment_Total_1617,Enrollment_Total_1718
0,224,43786.0,378.0,395.0,443.0
1,489,43786.0,424.0,439.0,547.0
2,729,43786.0,406.0,386.0,442.0
3,828,43786.0,325.0,311.0,395.0
4,1040,43786.0,441.0,465.0,513.0
5,2378,43786.0,533.0,581.0,615.0
6,3137,43786.0,300.0,277.0,329.0
7,4234,43786.0,346.0,343.0,395.0
8,5066,43786.0,315.0,297.0,301.0
9,5637,43786.0,325.0,299.0,319.0


In [42]:
cmsd_all_enrollment.drop_duplicates(subset ="Building_IRN", keep='first', inplace = True)

In [43]:
cmsd_all_enrollment

Unnamed: 0,Building_IRN,District_IRN,Enrollment_Total_1516,Enrollment_Total_1617,Enrollment_Total_1718
0,224,43786.0,378.0,395.0,443.0
1,489,43786.0,424.0,439.0,547.0
2,729,43786.0,406.0,386.0,442.0
3,828,43786.0,325.0,311.0,395.0
4,1040,43786.0,441.0,465.0,513.0
5,2378,43786.0,533.0,581.0,615.0
6,3137,43786.0,300.0,277.0,329.0
7,4234,43786.0,346.0,343.0,395.0
8,5066,43786.0,315.0,297.0,301.0
9,5637,43786.0,325.0,299.0,319.0


In [44]:
val_enroll_merge = pd.merge(cmsd_all_enrollment, cmsd_valAdded_merge2, how="outer", on="Building_IRN", suffixes=('', '_y'))

In [45]:
val_enroll_merge

Unnamed: 0,Building_IRN,District_IRN,Enrollment_Total_1516,Enrollment_Total_1617,Enrollment_Total_1718,District_IRN_y,Overall Value Added Grade 1516,Overall Value Added Grade 1617,Overall Value Added Grade 1718
0,224,43786.0,378.0,395.0,443.0,43786.0,F,F,F
1,489,43786.0,424.0,439.0,547.0,43786.0,F,F,F
2,729,43786.0,406.0,386.0,442.0,43786.0,F,F,F
3,828,43786.0,325.0,311.0,395.0,43786.0,F,F,F
4,1040,43786.0,441.0,465.0,513.0,43786.0,F,F,C
5,2378,43786.0,533.0,581.0,615.0,43786.0,F,F,F
6,3137,43786.0,300.0,277.0,329.0,43786.0,F,F,F
7,4234,43786.0,346.0,343.0,395.0,43786.0,F,F,F
8,5066,43786.0,315.0,297.0,301.0,43786.0,F,F,F
9,5637,43786.0,325.0,299.0,319.0,43786.0,F,F,F


In [46]:
single_cmsd_dataset = pd.merge(cmsd_achieve, val_enroll_merge, how="outer", on="Building_IRN", suffixes=('', '_y'))

In [47]:
single_cmsd_dataset

Unnamed: 0,Building_IRN,Building_Name,District_IRN,performanceIndexPercent_2015_2016,performanceIndexPercent_2016_2017,performanceIndexPercent_2017_2018,District_IRN_y,Enrollment_Total_1516,Enrollment_Total_1617,Enrollment_Total_1718,District_IRN_y.1,Overall Value Added Grade 1516,Overall Value Added Grade 1617,Overall Value Added Grade 1718
0,224,Adlai Stevenson School,43786.0,43.903,49.271,48.158,43786.0,378.0,395.0,443.0,43786.0,F,F,F
1,318,Menlo Park Academy,43786.0,110.76,106.422,108.824,43786.0,328.0,360.0,419.0,43786.0,C,F,F
2,489,Almira,43786.0,46.384,46.791,49.955,43786.0,424.0,439.0,547.0,43786.0,F,F,F
3,729,Andrew J Rickoff,43786.0,44.327,47.131,49.875,43786.0,406.0,386.0,442.0,43786.0,F,F,F
4,828,Anton Grdina,43786.0,38.904,40.361,42.672,43786.0,325.0,311.0,395.0,43786.0,F,F,F
5,930,Cleveland Entrepreneurship Preparatory School,43786.0,73.607,75.062,76.657,43786.0,295.0,311.0,323.0,43786.0,A,A,A
6,936,Promise Academy,43786.0,,,44.667,,,,210.0,,,,NR
7,1040,Artemus Ward,43786.0,56.816,60.942,65.324,43786.0,441.0,465.0,513.0,43786.0,F,F,C
8,2378,Benjamin Franklin,43786.0,64.892,64.372,69.865,43786.0,533.0,581.0,615.0,43786.0,F,F,F
9,3137,Bolton,43786.0,36.583,41.123,38.271,43786.0,300.0,277.0,329.0,43786.0,F,F,F


In [48]:
drop_y(single_cmsd_dataset)

In [49]:
single_cmsd_dataset

Unnamed: 0,Building_IRN,Building_Name,District_IRN,performanceIndexPercent_2015_2016,performanceIndexPercent_2016_2017,performanceIndexPercent_2017_2018,Enrollment_Total_1516,Enrollment_Total_1617,Enrollment_Total_1718,Overall Value Added Grade 1516,Overall Value Added Grade 1617,Overall Value Added Grade 1718
0,224,Adlai Stevenson School,43786.0,43.903,49.271,48.158,378.0,395.0,443.0,F,F,F
1,318,Menlo Park Academy,43786.0,110.76,106.422,108.824,328.0,360.0,419.0,C,F,F
2,489,Almira,43786.0,46.384,46.791,49.955,424.0,439.0,547.0,F,F,F
3,729,Andrew J Rickoff,43786.0,44.327,47.131,49.875,406.0,386.0,442.0,F,F,F
4,828,Anton Grdina,43786.0,38.904,40.361,42.672,325.0,311.0,395.0,F,F,F
5,930,Cleveland Entrepreneurship Preparatory School,43786.0,73.607,75.062,76.657,295.0,311.0,323.0,A,A,A
6,936,Promise Academy,43786.0,,,44.667,,,210.0,,,NR
7,1040,Artemus Ward,43786.0,56.816,60.942,65.324,441.0,465.0,513.0,F,F,C
8,2378,Benjamin Franklin,43786.0,64.892,64.372,69.865,533.0,581.0,615.0,F,F,F
9,3137,Bolton,43786.0,36.583,41.123,38.271,300.0,277.0,329.0,F,F,F


In [50]:
single_cmsd_dataset.dtypes

Building_IRN                           int64
Building_Name                         object
District_IRN                         float64
performanceIndexPercent_2015_2016     object
performanceIndexPercent_2016_2017     object
performanceIndexPercent_2017_2018     object
Enrollment_Total_1516                float64
Enrollment_Total_1617                float64
Enrollment_Total_1718                float64
Overall Value Added Grade 1516        object
Overall Value Added Grade 1617        object
Overall Value Added Grade 1718        object
dtype: object

In [51]:
#convert to numeric
single_cmsd_dataset['performanceIndexPercent_2015_2016'] = single_cmsd_dataset['performanceIndexPercent_2015_2016'].astype(np.float64)
single_cmsd_dataset['performanceIndexPercent_2016_2017'] = single_cmsd_dataset['performanceIndexPercent_2016_2017'].astype(np.float64)
single_cmsd_dataset['performanceIndexPercent_2017_2018'] = single_cmsd_dataset['performanceIndexPercent_2017_2018'].astype(np.float64)


In [52]:
single_cmsd_dataset

Unnamed: 0,Building_IRN,Building_Name,District_IRN,performanceIndexPercent_2015_2016,performanceIndexPercent_2016_2017,performanceIndexPercent_2017_2018,Enrollment_Total_1516,Enrollment_Total_1617,Enrollment_Total_1718,Overall Value Added Grade 1516,Overall Value Added Grade 1617,Overall Value Added Grade 1718
0,224,Adlai Stevenson School,43786.0,43.903,49.271,48.158,378.0,395.0,443.0,F,F,F
1,318,Menlo Park Academy,43786.0,110.760,106.422,108.824,328.0,360.0,419.0,C,F,F
2,489,Almira,43786.0,46.384,46.791,49.955,424.0,439.0,547.0,F,F,F
3,729,Andrew J Rickoff,43786.0,44.327,47.131,49.875,406.0,386.0,442.0,F,F,F
4,828,Anton Grdina,43786.0,38.904,40.361,42.672,325.0,311.0,395.0,F,F,F
5,930,Cleveland Entrepreneurship Preparatory School,43786.0,73.607,75.062,76.657,295.0,311.0,323.0,A,A,A
6,936,Promise Academy,43786.0,,,44.667,,,210.0,,,NR
7,1040,Artemus Ward,43786.0,56.816,60.942,65.324,441.0,465.0,513.0,F,F,C
8,2378,Benjamin Franklin,43786.0,64.892,64.372,69.865,533.0,581.0,615.0,F,F,F
9,3137,Bolton,43786.0,36.583,41.123,38.271,300.0,277.0,329.0,F,F,F


In [53]:
single_cmsd_dataset.dtypes

Building_IRN                           int64
Building_Name                         object
District_IRN                         float64
performanceIndexPercent_2015_2016    float64
performanceIndexPercent_2016_2017    float64
performanceIndexPercent_2017_2018    float64
Enrollment_Total_1516                float64
Enrollment_Total_1617                float64
Enrollment_Total_1718                float64
Overall Value Added Grade 1516        object
Overall Value Added Grade 1617        object
Overall Value Added Grade 1718        object
dtype: object

In [54]:
single_cmsd_dataset.to_csv("data/single_cmsd_dataset.csv")