In [1]:
import pandas as pd
import numpy as np
import re
import seaborn as sns
import matplotlib.pyplot as plt

#PATH = "C:/Users/cahib/Documents/Code/CPS_Report_Card/SQRP_Ratings"
PATH = 'C:/Users/sronkowski/Documents/GitHub/CPS_Report_Card/SQRP_Ratings'

In [2]:
#set sheet name and input document names
sheet_name = 'Elem Schools (grds PreK-8 only)'
sy15_16_doc = 'SY15_SQRP_Report_CPSEDU_FINAL_20151023.xlsx'
sy16_17_doc = 'Accountability_SQRPratings_2016-2017_SchoolLevel.xls'
sy17_18_doc = 'Accountability_SQRPratings_2017-2018_SchoolLevel.xls'
sy18_19_doc = 'Accountability_SQRPratings_2018-2019_SchoolLevel.xls'
sy19_20_doc = 'Accountability_SQRPratings_2019-2020_SchoolLevel_v20200305.xls'

#load first file
df_15_16 = pd.read_excel(f'{PATH}/{sy15_16_doc}',sheet_name = sheet_name)
#df_15_16 = pd.read_excel(f'{PATH}/{sy15_16_doc}',sheet_name = sheet_name, header = [0,1,2]) 

In [3]:
df_15_16.tail().T

Unnamed: 0,478,479,480,481,482
School ID,609977,610345,610234,610235,609973
School Name,WOODLAWN,WOODSON,YATES,YOUNG ES,ZAPATA
Network,NETWORK 9,NETWORK 9,NETWORK 5,NETWORK 3,ISP
SQRP Total Points Earned,2.6,2,3.2,2.6,3.4
SY 2015-2016 SQRP Rating,Level 2,Level 2,Level 2+,Level 2,Level 2+
...,...,...,...,...,...
Unnamed: 74,5,4,5,4,5
Unnamed: 75,10,10,10,10,10
Unnamed: 76,100,99.5,99.9,99.8,98.5
Unnamed: 77,5,5,5,5,4


In [4]:
df_15_16.tail().T

Unnamed: 0,478,479,480,481,482
School ID,609977,610345,610234,610235,609973
School Name,WOODLAWN,WOODSON,YATES,YOUNG ES,ZAPATA
Network,NETWORK 9,NETWORK 9,NETWORK 5,NETWORK 3,ISP
SQRP Total Points Earned,2.6,2,3.2,2.6,3.4
SY 2015-2016 SQRP Rating,Level 2,Level 2,Level 2+,Level 2,Level 2+
...,...,...,...,...,...
Unnamed: 74,5,4,5,4,5
Unnamed: 75,10,10,10,10,10
Unnamed: 76,100,99.5,99.9,99.8,98.5
Unnamed: 77,5,5,5,5,4


For this phase of the analysis, we only want the top-line SQRP scores, so we drop the other columns.

In [5]:
cols_to_keep = [c for c in df_15_16.columns if c[0:8] != 'Unnamed:']
cols_to_keep.remove('2015-2016 SQRP Individual Indicator Scores (Based on SY 2014-2015 Data)')
df_15_16 = df_15_16[cols_to_keep]

In [6]:
df_15_16.tail().T

Unnamed: 0,478,479,480,481,482
School ID,609977,610345,610234,610235,609973
School Name,WOODLAWN,WOODSON,YATES,YOUNG ES,ZAPATA
Network,NETWORK 9,NETWORK 9,NETWORK 5,NETWORK 3,ISP
SQRP Total Points Earned,2.6,2,3.2,2.6,3.4
SY 2015-2016 SQRP Rating,Level 2,Level 2,Level 2+,Level 2,Level 2+
SY 2015-2016 Accountability Status,Provisional Support,Intensive Support,Good Standing,Provisional Support,Good Standing


Before we merge multiple years, we will align column names to a general standard, and also add in a year column so we can match results against a given SY.

In [7]:
col_dict = {
    'SY 2015-2016 SQRP Rating':'SQRP Rating', 
    'SY 2015-2016 Accountability Status':'Accountability Status',
    '2015-2016 SQRP Individual Indicator Scores (Based on SY 2014-2015 Data)':'SQRP Individual Indicator Scores',
    'SY 2016-2017 SQRP Rating':'SQRP Rating',
    'SY 2016-2017 Accountability Status':'Accountability Status',
    'SY 2017-2018 SQRP Rating': 'SQRP Rating',
    'SY 2017-2018 Accountability Status': 'Accountability Status',
    'SY 2018-2019 SQRP Rating': 'SQRP Rating',
    'SY 2018-2019 Accountability Status': 'Accountability Status',
    'SY 2019-2020 SQRP Rating': 'SQRP Rating',
    'SY 2019-2020 Accountability Status': 'Accountability Status'
    }

In [8]:
df_15_16.rename(columns = col_dict, inplace = True)
df_15_16['School Year'] = '2015-2016'

In [9]:
df_15_16.tail().T

Unnamed: 0,478,479,480,481,482
School ID,609977,610345,610234,610235,609973
School Name,WOODLAWN,WOODSON,YATES,YOUNG ES,ZAPATA
Network,NETWORK 9,NETWORK 9,NETWORK 5,NETWORK 3,ISP
SQRP Total Points Earned,2.6,2,3.2,2.6,3.4
SQRP Rating,Level 2,Level 2,Level 2+,Level 2,Level 2+
Accountability Status,Provisional Support,Intensive Support,Good Standing,Provisional Support,Good Standing
School Year,2015-2016,2015-2016,2015-2016,2015-2016,2015-2016


We can now repeat this process over each of the school years within our time range.

In [10]:
df_16_17 = pd.read_excel(f'{PATH}/{sy16_17_doc}',sheet_name = sheet_name, header = [0,1,2])

In [11]:
df_16_17.tail().T

Unnamed: 0,Unnamed: 1,Unnamed: 2,473,474,475,476,477
2016-2017 SQRP Individual Indicator Scores (Based on SY 2015-2016 Data),School ID,Unnamed: 0_level_2,609977,610345,610234,610235,609973
2016-2017 SQRP Individual Indicator Scores (Based on SY 2015-2016 Data),School Name,Unnamed: 1_level_2,WOODLAWN,WOODSON,YATES,YOUNG ES,ZAPATA
2016-2017 SQRP Individual Indicator Scores (Based on SY 2015-2016 Data),Network,Unnamed: 2_level_2,NETWORK 9,NETWORK 9,NETWORK 5,NETWORK 3,ISP
2016-2017 SQRP Individual Indicator Scores (Based on SY 2015-2016 Data),SQRP Total Points Earned,Unnamed: 3_level_2,3.5,3.5,3.7,3.6,4.4
2016-2017 SQRP Individual Indicator Scores (Based on SY 2015-2016 Data),SY 2016-2017 SQRP Rating,Unnamed: 4_level_2,Level 1,Level 1,Level 1,Level 1,Level 1+
...,...,...,...,...,...,...,...
Other Indicators,"My Voice, My School 5 Essentials Survey",Points,4,4,5,5,5
Other Indicators,"My Voice, My School 5 Essentials Survey",Weight,10,10,10,10,10
Other Indicators,Data Quality Index Score,Score,100,99,99,100,98.6
Other Indicators,Data Quality Index Score,Points,5,5,5,5,4


Since we need to load the Excel with a Multi-index, we first slice the columns of interest, then collapse the indexing.

In [12]:
df_16_17 = df_16_17.loc[:,'2016-2017 SQRP Individual Indicator Scores (Based on SY 2015-2016 Data)']
df_16_17.columns = df_16_17.columns.get_level_values(0)

#align col names, add year col
df_16_17.rename(columns = col_dict, inplace = True)
df_16_17['School Year'] = '2016-2017'

In [13]:
df_16_17.tail().T

Unnamed: 0,473,474,475,476,477
School ID,609977,610345,610234,610235,609973
School Name,WOODLAWN,WOODSON,YATES,YOUNG ES,ZAPATA
Network,NETWORK 9,NETWORK 9,NETWORK 5,NETWORK 3,ISP
SQRP Total Points Earned,3.5,3.5,3.7,3.6,4.4
SQRP Rating,Level 1,Level 1,Level 1,Level 1,Level 1+
Accountability Status,Good Standing,Intensive Support,Good Standing,Good Standing,Good Standing
School Year,2016-2017,2016-2017,2016-2017,2016-2017,2016-2017


In [14]:
df_17_18 = pd.read_excel(f'{PATH}/{sy17_18_doc}',sheet_name = sheet_name, header = [0,1,2])

In [15]:
df_17_18 = df_17_18.loc[:,'SQRP SY2018 Individual Indicator Scores (Based on 2016-2017 Data)  Updated January 2018']
df_17_18.columns = df_17_18.columns.get_level_values(0)

#align col names, add year col
df_17_18.rename(columns = col_dict, inplace = True)
df_17_18['School Year'] = '2017-2018'

In [16]:
df_17_18.tail().T

Unnamed: 0,468,469,470,471,472
School ID,610542,610544,610548,610559,610586
School Name,WEST RIDGE,AZUELA,STEM,SHIELDS MIDDLE,SOUTHEAST
Network,Network 2,ISP,Network 6,Network 8,Network 13
SQRP Total Points Earned,4.2,3.8,3.8,3.5,3.2
SQRP Rating,Level 1+,Level 1,Level 1+,Level 1,Level 2+
Accountability Status,Good Standing,Good Standing,Good Standing,Good Standing,Good Standing
School Year,2017-2018,2017-2018,2017-2018,2017-2018,2017-2018


In [17]:
df_18_19 = pd.read_excel(f'{PATH}/{sy18_19_doc}',sheet_name = sheet_name, header = [0,1,2])

In [18]:
df_18_19 = df_18_19.loc[:,'SQRP SY2019 Individual Indicator Scores (Based on 2017-2018 Data)']
df_18_19.columns = df_18_19.columns.get_level_values(0)

#align col names, add year col
df_18_19.rename(columns = col_dict, inplace = True)
df_18_19['School Year'] = '2018-2019'

In [19]:
df_18_19.tail().T

Unnamed: 0,468,469,470,471,472
School ID,610544,610548,610559,610586,610588
School Name,AZUELA,STEM,SHIELDS MIDDLE,SOUTHEAST,RICHARDSON
Network,ISP,ISP,Network 8,Network 13,Network 10
SQRP Total Points Earned,3.5,4,3.5,3.5,3.4
SQRP Rating,Level 1,Level 1+,Level 1,Level 1,Level 2+
Accountability Status,Good Standing,Good Standing,Good Standing,Good Standing,Good Standing
School Year,2018-2019,2018-2019,2018-2019,2018-2019,2018-2019


In [20]:
df_19_20 = pd.read_excel(f'{PATH}/{sy19_20_doc}',sheet_name = sheet_name, header = [0,1,2])

In [21]:
df_19_20 = df_19_20.loc[:,'SQRP SY2020 Individual Indicator Scores (Based on 2018-2019 Data)']
df_19_20.columns = df_19_20.columns.get_level_values(0)

#align col names, add year col
df_19_20.rename(columns = col_dict, inplace = True)
df_19_20['School Year'] = '2019-2020'

In [22]:
df_19_20.tail().T

Unnamed: 0,468,469,470,471,472
School ID,610559,610586,610588,610589,610590
School Name,SHIELDS MIDDLE,SADLOWSKI,RICHARDSON,SOR JUANA,BRONZEVILLE CLASSICAL
Network,Network 8,Network 13,Network 10,Network 8,Network 9
SQRP Total Points Earned,3.7,4.1,4,.,.
SQRP Rating,Level 1,Level 1+,Level 1+,Inability to Rate,Inability to Rate
Accountability Status,Good Standing,Good Standing,Good Standing,Good Standing,Good Standing
School Year,2019-2020,2019-2020,2019-2020,2019-2020,2019-2020


In [23]:
df = df_15_16.append(df_16_17).append(df_17_18).append(df_18_19).append(df_19_20)

#reset index, drop cols with missing data
df.reset_index(drop = True, inplace = True)
df.dropna(inplace = True)

#purge errors in SQRP col, force typing of SQRP points
df = df[~(df['SQRP Total Points Earned'] == '.')]
df['SQRP Total Points Earned'] = df['SQRP Total Points Earned'].astype('float')

#force typing of school id
df['School ID'] = df['School ID'].astype('int')

In [24]:
df.tail().T

Unnamed: 0,2373,2374,2375,2376,2377
School ID,610544,610548,610559,610586,610588
School Name,AZUELA,STEM,SHIELDS MIDDLE,SADLOWSKI,RICHARDSON
Network,ISP,ISP,Network 8,Network 13,Network 10
SQRP Total Points Earned,3.7,4.2,3.7,4.1,4
SQRP Rating,Level 1,Level 1+,Level 1,Level 1+,Level 1+
Accountability Status,Good Standing,Good Standing,Good Standing,Good Standing,Good Standing
School Year,2019-2020,2019-2020,2019-2020,2019-2020,2019-2020


To filter LEARN schools, we will use the School IDs, which are as follows:

* 400046 - Romano Butler
* 400047 - Campbell
* 400048 - Excel
* 400107 - South Chicago
* 400111 - Perkins
* 400151 - LEARN 7
* 400165 - LEARN Middle School

In [25]:
id_list = ['400046', '400047', '400048', '400107', '400111', '400151', '400165']
learn_df = df[df['School ID'].isin(id_list)]

#push raw output to csv file
learn_df.to_csv(f'{PATH}/LEARN SQRP Ratings 2015-2020.csv', index = False)

## Comparison Data Gathering

To generate the most accurate possible apples-to-apples comparison, we will compare each of our Chicago schools against four schools in the same or nearby neighborhoods.

* Romano Butler
    - Johnson Elementary (AUSL) - 610274
    - Faraday Elementary (Network 5) - 610055
    - Gregory Math & Science Academy (Network 5) - 609954
    - Lawdale Elementary Community Academy (Network 5) - 610034
    
* Excel
    - Cather Elementary (Network 5) - 610251
    - Ward L Elementary (Network 5) - 610133
    - Morton Elementary Career Academy (AUSL) - 610257
    - Gregory Math & Science Academy (Network 5) - 609954

* Hunter-Perkins
    - Joplin (Network 11) - 609805
    - Barton (Network 11) - 609790
    - Foster Park (Network 11) - 609927
    - Cook (Network 11) - 609864

* Campbell
    - Cather Elementary (Network 5) - 610251
    - Dett (Network 6) - 610252
    - Spencer Technology Academy (Network 3) - 610183
    - Faraday (Network 5) - 610055

* South Chicago
    - Thorp J N (Network 12) - 610200
    - Mireles (Network 12) - 610171
    - Powell (Network 12) - 610281
    - Burnham (Network 13) - 609821
    
* LEARN 7 
    - Leland (Network 3) - 610305
    - Spencer (Network 3) - 610183
    - Ellington (Network 3) - 609904
    - Ward L Elementary (Network 5) - 610133

* LEARN Middle School
    - Cather Elementary (Network 5) - 610251
    - Ward L Elementary (Network 5) - 610133
    - Ellington (Network 3) - 609904
    - Beidler (Network 5) - 609797
    
* LEARN 9 Waukegan
    - Oakdale
    - Whittier
    - North
    - Glenwood
    - Carman-Buckner

* LEARN 6 & 10 North Chicago
    - D187 - Neal Math and Science
    - Forrestal
    - Katzenmaier 
    - Evelyn Alexander

In [26]:
romano_butler_comps = [400046, 610274, 610055, 609954, 610034]
excel_comps = [400048, 610251, 610133, 610257, 609954]
hunter_perkins_comps = [400111, 609805, 609790, 609927, 609864]
campbell_comps = [400047, 610251, 610252, 610183, 610055]
south_chicago_comps = [400107, 610200, 610171, 610281, 609821]
learn_seven_comps = [400151, 610305, 610183, 609904, 610133]
learn_middle_comps = [400165, 610305, 610183, 609904, 610133]

In [27]:
#reduce year entry to second year in entry, force typing
df['School Year'] = df['School Year'].apply(lambda x: re.split('-',x)[0])
df['School Year'] = df['School Year'].astype('int')

In [28]:
df1 = df[df['School ID'].isin(romano_butler_comps)].copy()
df2 = df[df['School ID'].isin(excel_comps)].copy()
df3 = df[df['School ID'].isin(hunter_perkins_comps)].copy()
df4 = df[df['School ID'].isin(campbell_comps)].copy()
df5 = df[df['School ID'].isin(south_chicago_comps)].copy()
df6 = df[df['School ID'].isin(learn_seven_comps)].copy()
df7 = df[df['School ID'].isin(learn_middle_comps)].copy()

We will now write a function that will allow us to generate each sheet of the output Excel file programmatically.  This function will create a pivot of the data with schools as rows, years as columns, and SQRP ratings values; it will also generate a line graph representing the same data.

In [29]:
def generate_graph_color(school, idx_count):
    '''
    Returns LEARN purple if the school variable contains 'LEARN',
    otherwise returns colors within same shade for other schools
    '''
    gray_dict = {
        2: '#D3D3D3', #lightgray
        3: '#A9A9A9', #darkgray
        4: '#696969', #dimgray
        5: '#708090', #slategray
        6: '#D3D3D3' #lightgray
    }
    if 'LEARN' in school:
        return "#542D81"
    
    else:
        return gray_dict[idx_count]    
    

In [30]:
##manual creation of a test spreadsheet

# with pd.ExcelWriter(f'{PATH}/pivot_test.xlsx') as writer:
#     output = pd.pivot(to_graph, index = 'School Name', values = 'SQRP Total Points Earned', columns = 'School Year')
    
#     sheet_name = 'test'
#     output.to_excel(writer, sheet_name=sheet_name)
    
#     workbook = writer.book
#     worksheet = writer.sheets[sheet_name]
    
#     #initialize chart
#     chart = workbook.add_chart({'type': 'line'})
    
#     #establish starting col in Excel output
#     col = 2
    
#     #generate lines for graph
#     for school in output.index:
#         chart.add_series({
#             'name': school,
#             'categories': f'={sheet_name}!$B$1:$F$1',
#             'values': f'={sheet_name}!$B${col}:$F${col}' ,
#             'line':       {'color': generate_graph_color(school, col)},
#             })
#         col += 1
    
#     #set axis values
#     chart.set_x_axis({'name': 'Year'})
#     chart.set_y_axis({
#         'name': 'SQPR Rating', 
#         'major_gridlines': {'visible': False},
#         'min': 1.5    
#         })
    
#     #set size of chart in pixels
#     chart.set_size({'width': 720, 'height': 576})
    
#     #add graph to sheet
#     worksheet.insert_chart('I3', chart)

In [31]:
def make_sqrp_excel_sheet_with_graph(df, sheet_name):
    '''
    Generates a Pivot of SQRP ratings and a 
    line graph within an active ExcelWriter session.
    '''
    sheet_name_ref = f"'{sheet_name}'"
    output = pd.pivot(df, index = 'School Name', values = 'SQRP Total Points Earned', columns = 'School Year')
    
    output.to_excel(writer, sheet_name=sheet_name)
    
    workbook = writer.book
    worksheet = writer.sheets[sheet_name]
    
    #initialize chart
    chart = workbook.add_chart({'type': 'line'})
    
    #establish starting col in Excel output
    col = 2
    
    #generate lines for graph
    for school in output.index:
        chart.add_series({
            'name': school,
            'categories': f'={sheet_name_ref}!$B$1:$F$1',
            'values': f'={sheet_name_ref}!$B${col}:$F${col}' ,
            'line':       {'color': generate_graph_color(school, col)},
            })
        col += 1
    
    #set axis values
    chart.set_x_axis({'name': 'Year'})
    chart.set_y_axis({
        'name': 'SQPR Rating', 
        'major_gridlines': {'visible': False},
        'min': 1.5    
        })
    
    #set size of chart in pixels
    chart.set_size({'width': 720, 'height': 576})
    
    #add graph to sheet
    worksheet.insert_chart('I3', chart)

In [35]:
with pd.ExcelWriter(f'{PATH}/SQRP Rating Comps, 2015-2020.xlsx') as writer:
    make_sqrp_excel_sheet_with_graph(df1, 'Romano Butler')
    make_sqrp_excel_sheet_with_graph(df2, 'Excel')
    make_sqrp_excel_sheet_with_graph(df3, 'Hunter Perkins')
    make_sqrp_excel_sheet_with_graph(df4, 'Campbell')
    make_sqrp_excel_sheet_with_graph(df5, 'South Chicago')
    make_sqrp_excel_sheet_with_graph(df6, 'LEARN 7')
    make_sqrp_excel_sheet_with_graph(df7, 'LEARN Middle School')

Here is code to graph the data within Python

In [33]:
to_graph = df[df['School ID'].isin(hunter_perkins_comps)].copy()

# #reduce year entry to second year in entry, force typing
to_graph['School Year'] = to_graph['School Year'].apply(lambda x: re.split('-',x)[0])
to_graph['School Year'] = to_graph['School Year'].astype('int')

palette ={
    "LEARN - PERKINS":"#542D81",
    "FOSTER PARK":"#7F7F7F",
    "COOK":"#7F7F7F",
    "JOPLIN":"#7F7F7F",
    "BARTON":"#7F7F7F"
    }

fig, ax = plt.subplots(1, 1, figsize = (6, 6), dpi=300)

ax = sns.lineplot(x="School Year", y="SQRP Total Points Earned", hue = 'School Name', data=to_graph)



TypeError: expected string or bytes-like object