## Import Libraries

In [1]:
# import libraries

import numpy as np
import pandas as pd 
from io import StringIO
from sklearn import preprocessing
from pandas import ExcelWriter
from datetime import datetime
 
from os import listdir
from os.path import isfile, join

import openpyxl

import math

import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression

import scipy

In [2]:
# larger output display when printing
pd.set_option('display.max_rows', 750, 'display.max_columns', 500)  # show all rows and cols when printing output to terminal

## Functions

In [3]:
# IN: (1) label for x-axis variables and (2) pdSeries of y and x values 
# OUT: plot of histrogram

def plot_compute_r_squared(feature_list, y, x_list):
    
#     font = {'family' : 'monospace',
#         'weight' : 'bold',
#         'size'   : '18'}

#     params = {"ytick.color" : "w",
#               "xtick.color" : "w",
#               "axes.labelcolor" : "w",
#               "axes.edgecolor" : "w"}


#     plt.rc('font', **font)
#     plt.rcParams.update(params)
    
    
    num_features = len(feature_list)
    
    
    
#     # make scatter plots and r2 for each feature individually against y
#     fig, axs = plt.subplots(num_features, 1, figsize=(6, num_features*8))



    # create lists to store regression outputs
    r2_list = []
    p_value_list = []
    correlation_sign = []
    slope_list = []

    
    # for each feature (individually!)
    for i in range(0, num_features):

        # run regression
        slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x_list[i], y)

        # populate lists with regression outputs, round to 6 decimals
        r2_list.append(np.around(r_value*r_value,6))   # r-value is "correlation coefficient"...square it to get r-squared
        p_value_list.append(np.around(p_value,6))
        slope_list.append(np.around(p_value,6))

        # determine whether correlation is positive or negative
        if (r_value<0):
            correlation_sign.append('negative')
        elif (r_value>=0):
            correlation_sign.append('positive')

#         # make scatter plot for each feature against assessment scores 
#         axs[i].scatter(x_list[i], y, alpha=0.5)
#         axs[i].set_xlabel(feature_list[i])
#         axs[i].set_ylabel('adj_assessment_score')
        
        
    x = make_r2_table(r2_list, p_value_list, feature_list, correlation_sign)   
        
        
#     n_bins=50

#     fig, axs = plt.subplots(num_features, 1, figsize=(8,num_features*10))

    
#     for i in range(0,num_features):
#         axs[i].hist(x_list[i], bins=n_bins)
#         axs[i].set_xlabel(feature_list[i])
    
    r2_df = pd.DataFrame(data=[r2_list, feature_list, correlation_sign]).T
    r2_df.columns = ['r2___', 'feature______________________________', 'correlation']
    r2_df.sort_values(by=['r2___'],ascending=False).reset_index(drop=True)
    
    
    return(x, r2_list, p_value_list, correlation_sign)

In [4]:
# IN: (1) r2 values (2) names of features (3) correlation sign 
# OUT: table with r2 values and correlation sign

def make_r2_table(r2_list, p_value_list, feature_list, correlation_sign):

    r2_df = pd.DataFrame(data=[feature_list, r2_list, p_value_list, correlation_sign]).T
    r2_df.columns = ['feature______________________________', 'r2___', 'p-value__', 'correlation']
    r2_df_sorted = r2_df.sort_values(by=['r2___'],ascending=False).reset_index(drop=True)
    
    
    
    return(r2_df_sorted)

In [5]:
# IN: (1) raw data (2) minimum sum of proficiency percentages and
#         (3) methods for filling missing values ('method'). '1' = scale to 100%, '2' = fill with 2.5 avg score
# OUT: plot of histrogram

def calculate_adj_assessment_scores(all_data_raw, min_sum_proficiency_percentages, method):

    # create cols related to assessment scores
    adj_assessment_score_list = []

    # make list to store sum of the prof teir percentages
    sum_proficiency_percentages_list = []

    # store rows to drop if thresholds are breached
    assessment_rows_to_drop = []

    adj_score_raw_list = []
    sum_prof_pcts_list = []

    # assign weights to each prof teir
    prof_4_wgt = 4
    prof_3_wgt = 3
    prof_2_wgt = 2
    prof_1_wgt = 1

    # calculate weighted assessment score and sum of prof percentages for each row to populate new columns
    for i in range(0,len(all_data_raw)):
        a = all_data_raw.loc[i, 'Percent Proficiency Level 1']
        b = all_data_raw.loc[i, 'Percent Proficiency Level 2']
        c = all_data_raw.loc[i, 'Percent Proficiency Level 3']
        d = all_data_raw.loc[i, 'Percent Proficiency Level 4']

        adj_score_raw = ((prof_4_wgt*d + prof_3_wgt*c + prof_2_wgt*b + prof_1_wgt*a)/100)
        if (adj_score_raw==1):
            print(i)

        # sum up percentages
        sum_prof_pcts = float((a + b + c + d))

        # append adj raw score and sum pcts to list
        adj_score_raw_list.append(adj_score_raw)
        sum_prof_pcts_list.append(sum_prof_pcts)

        # if no test records, set adj_score to zero (to avoid dividing by zero)
        if (sum_prof_pcts == 0): 
            adj_score = 0

        # scale score so that total of pcts equals 100%
        else:
            scale_to_100_pct = float(100/sum_prof_pcts)

            if method == 1:
                adj_score = np.multiply(adj_score_raw, scale_to_100_pct)

            elif method == 2:
                # assume all tests not in percentage get average score with 2.5 weighting
                adj_score = adj_score_raw + (2.5*(100-sum_prof_pcts))/100


        # append adj score and sum of prof pcts to lists
        adj_assessment_score_list.append(adj_score)
        sum_proficiency_percentages_list.append(sum_prof_pcts)

        # establish min adj assessment score (so that when force adj score to zero in loop below, it will be dropped)
        min_adj_assessment_score = 0.1
        
        # determine which rows need to be dropped based on parameters above
        if (adj_score < min_adj_assessment_score or sum_prof_pcts < min_sum_proficiency_percentages):
            assessment_rows_to_drop.append(i)

    # print the number of rows to drop
    print('Total Rows: ' + str(len(adj_assessment_score_list)))
    print('Assessment Score Outliers: ' + str(len(assessment_rows_to_drop)))

    return (adj_assessment_score_list, assessment_rows_to_drop)

In [6]:
# IN: all raw data
# OUT: clean dataframe with adjusted assessment scores and ids

def make_assess_df(all_data_raw, adj_assessment_score_list):

    # make lea name and type columns
    lea_name = pd.DataFrame([all_data_raw.loc[:,'Charter_District_Name'].values]).T
    lea_type = pd.DataFrame([all_data_raw.loc[:,'Type'].values]).T

    # make year and ctds-year-key columns
    year = pd.DataFrame([all_data_raw.loc[:,'Year'].values]).T
    ctds_year_key = pd.DataFrame([all_data_raw.loc[:,'ctds_year_key'].values]).T

    # make ctds_id column in string format
    ctds_id_str = all_data_raw.loc[:, 'ctds_id_str']
    ctds_id = ctds_id_str.copy()

    for i in range(0, len(ctds_id_str)):
        ctds_id[i] = ctds_id_str[i][1:]
        
    # make assessment score columns
    prof_level_1 = all_data_raw.loc[:, 'Percent Proficiency Level 1']
    prof_level_2 = all_data_raw.loc[:, 'Percent Proficiency Level 2']
    prof_level_3 = all_data_raw.loc[:, 'Percent Proficiency Level 3']
    prof_level_4 = all_data_raw.loc[:, 'Percent Proficiency Level 4']

    adj_assessment_score = pd.DataFrame([adj_assessment_score_list]).T
    
    assess_df = pd.concat([lea_name, lea_type, year, ctds_year_key, ctds_id, prof_level_1, prof_level_2, prof_level_3, prof_level_4, adj_assessment_score], axis=1)
    assess_df.columns=['lea_name', 'lea_type', 'year', 'ctds_year_key', 'ctds_id', 'proficiency_level_1', 'proficiency_level_2', 'proficiency_level_3', 'proficiency_level_4', 'adj_assessment_score']
    
    return(assess_df)

In [7]:
# IN: raw analysis dataframe, assessment rows to drop, desired lea type, and outlier list in format: ['feature_name', min_val, max_val]
# OUT: analysis dataframe with outlier raws and removed

def remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list):

    school_type_rows_remove = []
    year_rows_remove = []

    for i in range(0, len(analysis_df_raw)):

        if analysis_df_raw.loc[i, 'lea_type'] != school_type:
            if school_type != 'all':
                school_type_rows_remove.append(i)

        if analysis_df_raw.loc[i, 'year'] != year:
            if year != 'all':
                year_rows_remove.append(i)
    
    rows_to_remove = []

    for i in range(0, len(outlier_list)):

        for j in range(0, len(analysis_df_raw)):                                

            if analysis_df_raw.loc[j, outlier_list[i][0]] < outlier_list[i][1]: 
                rows_to_remove.append(j)

            elif analysis_df_raw.loc[j, outlier_list[i][0]] > outlier_list[i][2]:
                rows_to_remove.append(j)

                
    outlier_rows_to_remove = np.unique(rows_to_remove + assessment_rows_to_drop + school_type_rows_remove + year_rows_remove)
    
    analysis_df = analysis_df_raw.drop(outlier_rows_to_remove).reset_index(drop=True)

    return(analysis_df)

In [8]:
# IN: (1) clean data for analysis, (2) name of y column, (3) list of x feature names
# OUT: (1) clean y values (2) clean list of x's 

def make_x_and_y(analysis_df, y_col_name, feature_list):
    # make table of x-values, features, and y for plotting
    y = analysis_df.loc[:, y_col_name]

    # feature_list = list(analysis_df.columns[65:])

    num_features = len(feature_list)


    x_list = []

    for i in range(0, num_features):

        x = analysis_df.loc[:, feature_list[i]].values

        x_list.append(x)

    return(x_list, y)

In [9]:
# IN: (1) metric to be computed into quartiles in form of pd.Series, 
#     (2) df for analysis with outliers removed, (3) list of QUARTILE cutoff points
# OUT: (1) df of cutoff points and (2) df with quartile column added

def compute_quartiles(metric_series, analysis_df, quantile_list):

    # add id cols to df
    metric_df = analysis_df.copy()

    # all col for metric data
    metric_df = metric_df.assign(metric=metric_series)
    
    # all col for quartile
    metric_df = metric_df.assign(cutoff_quartile=np.zeros(len(metric_df)))

    quartile_cutoffs = metric_series.quantile(quantile_list)

    
    
    for i in range(0,len(metric_df)):

        if(metric_df.loc[i,'metric'] <= quartile_cutoffs.values[0]):
            metric_df.loc[i,'cutoff_quartile'] = 1

        elif((metric_df.loc[i,'metric'] > quartile_cutoffs.values[0]) & (metric_df.loc[i,'metric'] < quartile_cutoffs.values[1])):
            metric_df.loc[i,'cutoff_quartile'] = 2

        elif((metric_df.loc[i,'metric'] >= quartile_cutoffs.values[1]) & (metric_df.loc[i,'metric'] < quartile_cutoffs.values[2])):
            metric_df.loc[i,'cutoff_quartile'] = 3

        else:
            metric_df.loc[i,'cutoff_quartile'] = 4

    return(quartile_cutoffs, metric_df)

In [10]:
# IN: DataFrame after outliers removed, target features to analyse correlation, quantile info and targeted feature
# OUT: R-squared values for targeted features ran in analysis, DataFrame of quantile chosen with cutoff points

def run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name):

    metric_series = analysis_df.loc[:, target_col_name]
    quartile_cutoffs, metric_df = compute_quartiles(metric_series, analysis_df, quantile_list)

    metric_df_raw = metric_df.copy()
    
    metric_df = metric_df[metric_df.loc[:, 'cutoff_quartile'] == target_quartile]

    quartile_cutoffs_df = pd.DataFrame([quantile_list, quartile_cutoffs]).T
    quartile_cutoffs_df.columns = ['quartile_pct_cutoff', 'quartile_value']


    # make x and y lists
    x_list, y = make_x_and_y(metric_df, y_col_name, feature_list)

    
    # run analysis and create tables and plots
    quartiles_df, r2_list, p_value_list, correlation_sign = plot_compute_r_squared(feature_list, y, x_list)
     
    return(metric_df, metric_df_raw, quartiles_df, quartile_cutoffs_df)

In [11]:
# IN: All inputs used to run analysis given outlier params, year, quantile params, targeted feature, etc)
# OUT: DataFrame of all assumptions used in analysis (outliers, target features, quantile, filters, etc)

def make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw):

    # combine all assumptions for output into excel tab
    a = pd.Series(feature_list)
    b = pd.Series(outlier_list)
    c = pd.Series(y_col_name)
    d = pd.Series(year)
    e = pd.Series(school_type)
    f = pd.Series(quantile_list)
    g = pd.Series(target_quartile)
    h = pd.Series(target_col_name)
    i = pd.Series(len(metric_df_raw[metric_df_raw.loc[:, 'cutoff_quartile'] == target_quartile]))
    
    assumptions_list = pd.concat([a, b, c, d, e, f, g, h, i], axis=1).T

    assumptions_list_names = ['features_analyzed', 'outlier_params', 'y_variable', 'year', 'school_type', 'quantile_values', 'target_quartile_num', 'target_col_name', 'samples (n = ']

    assumptions_list.insert(0, 'fields', assumptions_list_names)
    
    return(assumptions_list)

In [12]:
# IN: Raw DataFrame and DataFrame removing outliers, DataFrame with length of target quartile (non-unique LEAs)
# OUT: DataFrame with sample sizes of all relevent DataFrames, unique and non-unique

def make_unique_tab(analysis_df_raw, analysis_df, metric_df):
    
    # combine all assumptions for output into excel tab
    a = pd.Series(len(analysis_df_raw))
    b = pd.Series(len(analysis_df))
    c = pd.Series(len(np.unique(analysis_df.ctds_id)))
    d = pd.Series(len(metric_df))
    e = pd.Series(len(np.unique(metric_df['ctds_id'])))
    f = pd.Series(len(analysis_df_raw[analysis_df_raw.lea_type == 'district']))
    g = pd.Series(len(analysis_df_raw[analysis_df_raw.lea_type == 'charter']))

    
    unique_list = pd.concat([a, b, c, d, e, f, g], axis=1).T

    unique_list.columns = ['n samples']
    
    unique_list_names = ['raw_dataset', 'after_filters_outliers', 'unique_lea_all_quartiles', 'length_target_quartile', 'unique_lea_target_quartile', 'districts_raw', 'charters_raw']

    unique_list.insert(0, 'fields', unique_list_names)
    
    return(unique_list)

In [13]:
# IN: DataFrame after outliers removed, 2018 to 2019 wage inflation rate for teachers
# OUT: DataFrame with only 2018 and 2019 values with adjusted average teacher salary info

def create_teacher_salary_analysis_df(analysis_df, inflation_rate):

    # run analysis and data tables for excel export
    analysis_df_teach_raw = analysis_df[analysis_df.avg_teacher_salary > 0].reset_index(drop=True)
    data_2018 = analysis_df_teach_raw[analysis_df_teach_raw.year==2018]
    data_2019 = analysis_df_teach_raw[analysis_df_teach_raw.year==2019]

    data_2018 = data_2018.assign(avg_teacher_salary_infl_x = ((1 + inflation_rate) * data_2018.avg_teacher_salary))
    data_2019 = data_2019.assign(avg_teacher_salary_infl_x = data_2019.avg_teacher_salary)

    analysis_df_teach = pd.concat([data_2018, data_2019])

    return(analysis_df_teach)

## Import Raw Data

In [14]:
# import raw data
import_path = './jupyter_data/'
all_data_raw = pd.read_csv(import_path +'joined_data.csv')

# import afr metrics and other ratios df's
afr_data_df = pd.read_csv(import_path + 'afr_data_df.csv')
ratios_df = pd.read_csv(import_path + 'ratios_df.csv')

## Scale Assessment Scores

In [15]:
# establish cut off for sum of proficiency percentages
min_sum_proficiency_percentages = 90

# '1' = scale to 100%, '2' = fill with 2.5 avg score
method = 1

# calculate adjusted assessment scores
adj_assessment_score_list, assessment_rows_to_drop = calculate_adj_assessment_scores(all_data_raw, min_sum_proficiency_percentages, method)

# make data frame with adjusted assessment score data
assess_df = make_assess_df(all_data_raw, adj_assessment_score_list)

# combine all data and fill nan and inf with zeroes
analysis_df_raw = pd.concat([assess_df, afr_data_df, ratios_df], axis=1).replace([np.inf, -np.inf], np.nan).fillna(0)

Total Rows: 2698
Assessment Score Outliers: 274


In [16]:
# add column for teacher data, will be adjusted by inflation later
analysis_df_raw = analysis_df_raw.assign(avg_teacher_salary_infl_x = analysis_df_raw.avg_teacher_salary)

## Inputs

In [17]:
# old outlier_list = [
#     ['total_students', 20, 1000000],
#     ['total_teachers', 4, 1000000],
#     ['exp_to_rev_ratio', 0.60, 1.40],
#     ['students_per_teacher', 4, 35],
#     ['other_6800_per_total_exp', 0, 1],
#     ['purch_serv_6300_6400_6500_per_total_exp', 0, 0.65],
#     ['supplies_6600_per_total_exp', 0, 0.25],
#     ['pct_free_reduced', 0, 1]
#     ]    

# school_type = 'charter'   # 'charter', 'district', 'all'

# feature_list = [
#     'pct_free_reduced',
#     'pct_white',
#     'instruction_1000_per_total_exp',
#     'salaries_6100_per_total_exp',
#     'salary_benefits_instruction_1000_6100_6200_per_total_exp',
#     'salary_benefits_instruction_1000_6100_6200_class_site_per_total_exp',
#     'admin_expenses_all_per_total_exp',
#     'students_per_teacher'
#     ]

# y_col_name = 'adj_assessment_score'

In [18]:
# features to compute R-squared on
feature_list = [
    'pct_free_reduced',
    'pct_non_white',
    'pct_swd',
    'instruction_1000_per_total_exp',
    'salaries_6100_per_total_exp',
    'salary_benefits_instruction_1000_6100_6200_per_total_exp',
    'salary_benefits_instruction_1000_6100_6200_class_site_per_total_exp',
    'admin_expenses_all_per_total_exp',
    'instruction_1000_reg_sped_per_teacher',
    'students_per_teacher',
    'total_instruction_related_personnel_per_total_exp',
    'building_related_expenses_per_total_exp',
    'avg_teacher_salary_infl_x',
    'total_instruction_related_personnel_per_student',
    'salary_benefits_instruction_1000_6100_6200_class_site_per_student',
    'total_students',
    ]

# outlier params
outlier_list = [
    ['total_students', 20, 1000000],
    ['total_teachers', 4, 1000000],
    ['exp_to_rev_ratio', 0.60, 1.40],
    ['students_per_teacher', 4, 50],
    ['other_6800_per_total_exp', 0, 10],
    ['purch_serv_6300_6400_6500_per_total_exp', 0, 0.65],
    ['supplies_6600_per_total_exp', 0, 0.25],
    ['pct_free_reduced', 0, 1.05],
    ['pct_non_white', 0, 1.05],
    ['pct_swd', 0.01, 0.25],
    ['instruction_1000_reg_sped_per_teacher', 10000.0, 120000.0],
#     ['admin_expenses_all_per_total_exp', 0.05, 0.25],
#     ['building_related_expenses_per_total_exp', 0.05, 0.25],
#     ['salary_benefits_instruction_1000_6100_6200_class_site_per_total_exp', 0.2, 0.6],
    ['total_instruction_related_personnel_per_total_exp', 0.15, 0.65],
#     ['total_instruction_related_personnel_per_student', 2000, 7000],
#     ['regular_to_sped_instructional_related_ratio', 0, 50]
    ]

# wage inflation rate for teacher salary (actual average salary increase was 8% from 2018 to 2019) 
inflation_rate = 0.05  

## Run Scenarios for Excel Output

In [19]:
## Sample Scenario X (for testing)

In [23]:
# run analysis X

# choose dependent variable for computing correlation
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [1, 1, 1]
target_quartile = 1
target_col_name = 'total_students'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_x, quartile_cutoffs_df_x = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_x = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_x = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

In [24]:
quartiles_df_x

Unnamed: 0,feature______________________________,r2___,p-value__,correlation
0,pct_non_white,0.362874,0.0,negative
1,total_instruction_related_personnel_per_total_exp,0.313179,0.0,positive
2,instruction_1000_per_total_exp,0.267815,0.0,positive
3,salary_benefits_instruction_1000_6100_6200_cla...,0.204995,0.0,positive
4,salaries_6100_per_total_exp,0.20447,0.0,positive
5,salary_benefits_instruction_1000_6100_6200_per...,0.202614,0.0,positive
6,pct_free_reduced,0.122957,0.0,negative
7,instruction_1000_reg_sped_per_teacher,0.054096,0.0,positive
8,total_instruction_related_personnel_per_student,0.019993,0.000111,positive
9,total_students,0.019232,0.000151,positive


In [25]:
print(len(analysis_df))

742


#### Run Scenarios for Research Paper

In [26]:
## All LEAs, All Quartiles

In [27]:
# run analysis 1

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'all'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [1, 1, 1]
target_quartile = 1
target_col_name = 'pct_non_white'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_1, quartile_cutoffs_df_1 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_1 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_1 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_1_teach, quartile_cutoffs_df_1_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_1_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_1_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [28]:
## District, All Quartiles

In [29]:
# run analysis 2

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [1, 1, 1]
target_quartile = 1
target_col_name = 'pct_non_white'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_2, quartile_cutoffs_df_2 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_2 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_2 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_2_teach, quartile_cutoffs_df_2_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_2_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_2_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [30]:
## Charters, All Quartiles

In [31]:
# run analysis 3

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [1, 1, 1]
target_quartile = 1
target_col_name = 'pct_non_white'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_3, quartile_cutoffs_df_3 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_3 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_3 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_3_teach, quartile_cutoffs_df_3_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_3_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_3_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [32]:
## District, Ethnicity

In [33]:
# run analysis 4

# choose dependent variable for computing correlation
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 1
target_col_name = 'pct_non_white'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_4, quartile_cutoffs_df_4 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_4 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_4 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_4_teach, quartile_cutoffs_df_4_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_4_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_4_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [34]:
# run analysis 5

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 2
target_col_name = 'pct_non_white'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_5, quartile_cutoffs_df_5 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_5 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_5 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_5_teach, quartile_cutoffs_df_5_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_5_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_5_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [35]:
# run analysis 6

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 3
target_col_name = 'pct_non_white'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_6, quartile_cutoffs_df_6 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_6 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_6 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_6_teach, quartile_cutoffs_df_6_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_6_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_6_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [36]:
# run analysis 7

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 4
target_col_name = 'pct_non_white'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_7, quartile_cutoffs_df_7 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_7 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_7 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_7_teach, quartile_cutoffs_df_7_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_7_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_7_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [37]:
## District, FRL

In [38]:
# run analysis 8

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 1
target_col_name = 'pct_free_reduced'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_8, quartile_cutoffs_df_8 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_8 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_8 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_8_teach, quartile_cutoffs_df_8_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_8_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_8_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [39]:
# run analysis 9

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 2
target_col_name = 'pct_free_reduced'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_9, quartile_cutoffs_df_9 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_9 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_9 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_9_teach, quartile_cutoffs_df_9_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_9_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_9_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [40]:
# run analysis 10

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 3
target_col_name = 'pct_free_reduced'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_10, quartile_cutoffs_df_10 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_10 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_10 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_10_teach, quartile_cutoffs_df_10_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_10_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_10_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [41]:
# run analysis 11

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 4
target_col_name = 'pct_free_reduced'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_11, quartile_cutoffs_df_11 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_11 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_11 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_11_teach, quartile_cutoffs_df_11_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_11_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_11_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [42]:
## District, SWD

In [43]:
# run analysis 12

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 1
target_col_name = 'pct_swd'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_12, quartile_cutoffs_df_12 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_12 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_12 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_12_teach, quartile_cutoffs_df_12_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_12_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_12_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [44]:
# run analysis 13

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 2
target_col_name = 'pct_swd'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_13, quartile_cutoffs_df_13 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_13 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_13 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_13_teach, quartile_cutoffs_df_13_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_13_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_13_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [45]:
# run analysis 14

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 3
target_col_name = 'pct_swd'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_14, quartile_cutoffs_df_14 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_14 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_14 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_14_teach, quartile_cutoffs_df_14_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_14_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_14_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [46]:
# run analysis 15

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 4
target_col_name = 'pct_swd'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_15, quartile_cutoffs_df_15 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_15 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_15 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_15_teach, quartile_cutoffs_df_15_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_15_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_15_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [47]:
## Charter, Non-White

In [48]:
# run analysis 16

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 1
target_col_name = 'pct_non_white'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_16, quartile_cutoffs_df_16 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_16 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_16 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_16_teach, quartile_cutoffs_df_16_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_16_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_16_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [49]:
# run analysis 17

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 2
target_col_name = 'pct_non_white'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_17, quartile_cutoffs_df_17 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_17 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_17 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_17_teach, quartile_cutoffs_df_17_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_17_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_17_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [50]:
# run analysis 18

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 3
target_col_name = 'pct_non_white'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_18, quartile_cutoffs_df_18 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_18 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_18 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_18_teach, quartile_cutoffs_df_18_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_18_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_18_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [51]:
# run analysis 19

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 4
target_col_name = 'pct_non_white'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_19, quartile_cutoffs_df_19 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_19 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_19 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_19_teach, quartile_cutoffs_df_19_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_19_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_19_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [52]:
## Charter, FRL

In [53]:
# run analysis 20

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 1
target_col_name = 'pct_free_reduced'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_20, quartile_cutoffs_df_20 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_20 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_20 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_20_teach, quartile_cutoffs_df_20_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_20_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_20_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [54]:
# run analysis 21

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 2
target_col_name = 'pct_free_reduced'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_21, quartile_cutoffs_df_21 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_21 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_21 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_21_teach, quartile_cutoffs_df_21_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_21_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_21_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [55]:
# run analysis 22

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 3
target_col_name = 'pct_free_reduced'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_22, quartile_cutoffs_df_22 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_22 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_22 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_22_teach, quartile_cutoffs_df_22_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_22_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_22_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [56]:
# run analysis 23

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 4
target_col_name = 'pct_free_reduced'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_23, quartile_cutoffs_df_23 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_23 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_23 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_23_teach, quartile_cutoffs_df_23_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_23_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_23_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [57]:
## Charter, SWD

In [58]:
# run analysis 24

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 1
target_col_name = 'pct_swd'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_24, quartile_cutoffs_df_24 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_24 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_24 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_24_teach, quartile_cutoffs_df_24_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_24_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_24_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [59]:
# run analysis 25

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 2
target_col_name = 'pct_swd'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_25, quartile_cutoffs_df_25 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_25 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_25 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_25_teach, quartile_cutoffs_df_25_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_25_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_25_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [60]:
# run analysis 26

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 3
target_col_name = 'pct_swd'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_26, quartile_cutoffs_df_26 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_26 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_26 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_26_teach, quartile_cutoffs_df_26_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_26_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_26_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [61]:
# run analysis 27

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 4
target_col_name = 'pct_swd'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_27, quartile_cutoffs_df_27 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_27 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_27 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_27_teach, quartile_cutoffs_df_27_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_27_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_27_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [62]:
## District, School Size

In [63]:
# run analysis 28

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 1
target_col_name = 'total_students'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_28, quartile_cutoffs_df_28 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_28 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_28 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_28_teach, quartile_cutoffs_df_28_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_28_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_28_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [64]:
# run analysis 29

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 2
target_col_name = 'total_students'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_29, quartile_cutoffs_df_29 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_29 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_29 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_29_teach, quartile_cutoffs_df_29_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_29_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_29_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [65]:
# run analysis 30

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 3
target_col_name = 'total_students'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_30, quartile_cutoffs_df_30 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_30 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_30 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_30_teach, quartile_cutoffs_df_30_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_30_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_30_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [66]:
# run analysis 31

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'district'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 4
target_col_name = 'total_students'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_31, quartile_cutoffs_df_31 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_31 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_31 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_31_teach, quartile_cutoffs_df_31_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_31_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_31_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [67]:
## Charter, School Size

In [68]:
# run analysis 32

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 1
target_col_name = 'total_students'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_32, quartile_cutoffs_df_32 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_32 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_32 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_32_teach, quartile_cutoffs_df_32_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_32_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_32_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [69]:
# run analysis 33

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 2
target_col_name = 'total_students'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_33, quartile_cutoffs_df_33 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_33 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_33 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_33_teach, quartile_cutoffs_df_33_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_33_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_33_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [70]:
# run analysis 34

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 3
target_col_name = 'total_students'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_34, quartile_cutoffs_df_34 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_34 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_34 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_34_teach, quartile_cutoffs_df_34_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_34_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_34_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

In [71]:
# run analysis 35

# dependent variable
y_col_name = 'adj_assessment_score'

year = 'all'   # 2015 to 2019, or 'all'
school_type = 'charter'   # 'charter', 'district', 'all'

# quartile slicing assumptions
quantile_list = [0.25, 0.50, 0.75]
target_quartile = 4
target_col_name = 'total_students'

# remove outliers
analysis_df = remove_outliers(analysis_df_raw, assessment_rows_to_drop, school_type, year, outlier_list)

# run analysis and data tables for excel export
metric_df, metric_df_raw, quartiles_df_35, quartile_cutoffs_df_35 = run_analysis(analysis_df, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_35 = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_35 = make_unique_tab(analysis_df_raw, analysis_df, metric_df)

# do same for teacher salary data, 2018 and 2019 only
analysis_df_teach = create_teacher_salary_analysis_df(analysis_df, inflation_rate)
metric_df_teach, metric_df_raw_teach, quartiles_df_35_teach, quartile_cutoffs_df_35_teach = run_analysis(analysis_df_teach, feature_list, y_col_name, quantile_list, target_quartile, target_col_name)
assumptions_list_35_teach = make_assumptions_tab(feature_list, outlier_list, y_col_name, year, school_type, quantile_list, target_quartile, target_col_name, metric_df_raw)
unique_list_35_teach = make_unique_tab(analysis_df_raw, analysis_df_teach, metric_df_teach)

## Export Data to Excel File

In [72]:
# Create and Write Excel File

export_path = './jupyter_data/'


writer = pd.ExcelWriter(export_path + 'test_23.xlsx', engine='xlsxwriter')
    

analysis_df_raw.to_excel(writer, sheet_name='analysis_df_raw')
metric_df_raw.to_excel(writer, sheet_name='remove_outliers')


assumptions_list_1.to_excel(writer, sheet_name='slicing_assumptions_1')
quartiles_df_1.to_excel(writer, sheet_name='quartiles_analysis_1')
quartile_cutoffs_df_1.to_excel(writer, sheet_name='quartile_cutoffs_1')
unique_list_1.to_excel(writer, sheet_name='unique_1')
quartiles_df_1_teach.to_excel(writer, sheet_name='quartiles_analysis_1_teach')
quartile_cutoffs_df_1_teach.to_excel(writer, sheet_name='quartile_cutoffs_1_teach')
unique_list_1_teach.to_excel(writer, sheet_name='unique_1_teach')

assumptions_list_2.to_excel(writer, sheet_name='slicing_assumptions_2')
quartiles_df_2.to_excel(writer, sheet_name='quartiles_analysis_2')
quartile_cutoffs_df_2.to_excel(writer, sheet_name='quartile_cutoffs_2')
unique_list_2.to_excel(writer, sheet_name='unique_2')
quartiles_df_2_teach.to_excel(writer, sheet_name='quartiles_analysis_2_teach')
quartile_cutoffs_df_2_teach.to_excel(writer, sheet_name='quartile_cutoffs_2_teach')
unique_list_2_teach.to_excel(writer, sheet_name='unique_2_teach')

assumptions_list_3.to_excel(writer, sheet_name='slicing_assumptions_3')
quartiles_df_3.to_excel(writer, sheet_name='quartiles_analysis_3')
quartile_cutoffs_df_3.to_excel(writer, sheet_name='quartile_cutoffs_3')
unique_list_3.to_excel(writer, sheet_name='unique_3')
quartiles_df_3_teach.to_excel(writer, sheet_name='quartiles_analysis_3_teach')
quartile_cutoffs_df_3_teach.to_excel(writer, sheet_name='quartile_cutoffs_3_teach')
unique_list_3_teach.to_excel(writer, sheet_name='unique_3_teach')

assumptions_list_4.to_excel(writer, sheet_name='slicing_assumptions_4')
quartiles_df_4.to_excel(writer, sheet_name='quartiles_analysis_4')
quartile_cutoffs_df_4.to_excel(writer, sheet_name='quartile_cutoffs_4')
unique_list_4.to_excel(writer, sheet_name='unique_4')
quartiles_df_4_teach.to_excel(writer, sheet_name='quartiles_analysis_4_teach')
quartile_cutoffs_df_4_teach.to_excel(writer, sheet_name='quartile_cutoffs_4_teach')
unique_list_4_teach.to_excel(writer, sheet_name='unique_4_teach')

assumptions_list_5.to_excel(writer, sheet_name='slicing_assumptions_5')
quartiles_df_5.to_excel(writer, sheet_name='quartiles_analysis_5')
quartile_cutoffs_df_5.to_excel(writer, sheet_name='quartile_cutoffs_5')
unique_list_5.to_excel(writer, sheet_name='unique_5')
quartiles_df_5_teach.to_excel(writer, sheet_name='quartiles_analysis_5_teach')
quartile_cutoffs_df_5_teach.to_excel(writer, sheet_name='quartile_cutoffs_5_teach')
unique_list_5_teach.to_excel(writer, sheet_name='unique_5_teach')

assumptions_list_6.to_excel(writer, sheet_name='slicing_assumptions_6')
quartiles_df_6.to_excel(writer, sheet_name='quartiles_analysis_6')
quartile_cutoffs_df_6.to_excel(writer, sheet_name='quartile_cutoffs_6')
unique_list_6.to_excel(writer, sheet_name='unique_6')
quartiles_df_6_teach.to_excel(writer, sheet_name='quartiles_analysis_6_teach')
quartile_cutoffs_df_6_teach.to_excel(writer, sheet_name='quartile_cutoffs_6_teach')
unique_list_6_teach.to_excel(writer, sheet_name='unique_6_teach')

assumptions_list_7.to_excel(writer, sheet_name='slicing_assumptions_7')
quartiles_df_7.to_excel(writer, sheet_name='quartiles_analysis_7')
quartile_cutoffs_df_7.to_excel(writer, sheet_name='quartile_cutoffs_7')
unique_list_7.to_excel(writer, sheet_name='unique_7')
quartiles_df_7_teach.to_excel(writer, sheet_name='quartiles_analysis_7_teach')
quartile_cutoffs_df_7_teach.to_excel(writer, sheet_name='quartile_cutoffs_7_teach')
unique_list_7_teach.to_excel(writer, sheet_name='unique_7_teach')

assumptions_list_8.to_excel(writer, sheet_name='slicing_assumptions_8')
quartiles_df_8.to_excel(writer, sheet_name='quartiles_analysis_8')
quartile_cutoffs_df_8.to_excel(writer, sheet_name='quartile_cutoffs_8')
unique_list_8.to_excel(writer, sheet_name='unique_8')
quartiles_df_8_teach.to_excel(writer, sheet_name='quartiles_analysis_8_teach')
quartile_cutoffs_df_8_teach.to_excel(writer, sheet_name='quartile_cutoffs_8_teach')
unique_list_8_teach.to_excel(writer, sheet_name='unique_8_teach')

assumptions_list_9.to_excel(writer, sheet_name='slicing_assumptions_9')
quartiles_df_9.to_excel(writer, sheet_name='quartiles_analysis_9')
quartile_cutoffs_df_9.to_excel(writer, sheet_name='quartile_cutoffs_9')
unique_list_9.to_excel(writer, sheet_name='unique_9')
quartiles_df_9_teach.to_excel(writer, sheet_name='quartiles_analysis_9_teach')
quartile_cutoffs_df_9_teach.to_excel(writer, sheet_name='quartile_cutoffs_9_teach')
unique_list_9_teach.to_excel(writer, sheet_name='unique_9_teach')

assumptions_list_10.to_excel(writer, sheet_name='slicing_assumptions_10')
quartiles_df_10.to_excel(writer, sheet_name='quartiles_analysis_10')
quartile_cutoffs_df_10.to_excel(writer, sheet_name='quartile_cutoffs_10')
unique_list_10.to_excel(writer, sheet_name='unique_10')
quartiles_df_10_teach.to_excel(writer, sheet_name='quartiles_analysis_10_teach')
quartile_cutoffs_df_10_teach.to_excel(writer, sheet_name='quartile_cutoffs_10_teach')
unique_list_10_teach.to_excel(writer, sheet_name='unique_10_teach')

assumptions_list_11.to_excel(writer, sheet_name='slicing_assumptions_11')
quartiles_df_11.to_excel(writer, sheet_name='quartiles_analysis_11')
quartile_cutoffs_df_11.to_excel(writer, sheet_name='quartile_cutoffs_11')
unique_list_11.to_excel(writer, sheet_name='unique_11')
quartiles_df_11_teach.to_excel(writer, sheet_name='quartiles_analysis_11_teach')
quartile_cutoffs_df_11_teach.to_excel(writer, sheet_name='quartile_cutoffs_11_teach')
unique_list_11_teach.to_excel(writer, sheet_name='unique_11_teach')

assumptions_list_12.to_excel(writer, sheet_name='slicing_assumptions_12')
quartiles_df_12.to_excel(writer, sheet_name='quartiles_analysis_12')
quartile_cutoffs_df_12.to_excel(writer, sheet_name='quartile_cutoffs_12')
unique_list_12.to_excel(writer, sheet_name='unique_12')
quartiles_df_12_teach.to_excel(writer, sheet_name='quartiles_analysis_12_teach')
quartile_cutoffs_df_12_teach.to_excel(writer, sheet_name='quartile_cutoffs_12_teach')
unique_list_12_teach.to_excel(writer, sheet_name='unique_12_teach')

assumptions_list_13.to_excel(writer, sheet_name='slicing_assumptions_13')
quartiles_df_13.to_excel(writer, sheet_name='quartiles_analysis_13')
quartile_cutoffs_df_13.to_excel(writer, sheet_name='quartile_cutoffs_13')
unique_list_13.to_excel(writer, sheet_name='unique_13')
quartiles_df_13_teach.to_excel(writer, sheet_name='quartiles_analysis_13_teach')
quartile_cutoffs_df_13_teach.to_excel(writer, sheet_name='quartile_cutoffs_13_teach')
unique_list_13_teach.to_excel(writer, sheet_name='unique_13_teach')

assumptions_list_14.to_excel(writer, sheet_name='slicing_assumptions_14')
quartiles_df_14.to_excel(writer, sheet_name='quartiles_analysis_14')
quartile_cutoffs_df_14.to_excel(writer, sheet_name='quartile_cutoffs_14')
unique_list_14.to_excel(writer, sheet_name='unique_14')
quartiles_df_14_teach.to_excel(writer, sheet_name='quartiles_analysis_14_teach')
quartile_cutoffs_df_14_teach.to_excel(writer, sheet_name='quartile_cutoffs_14_teach')
unique_list_14_teach.to_excel(writer, sheet_name='unique_14_teach')

assumptions_list_15.to_excel(writer, sheet_name='slicing_assumptions_15')
quartiles_df_15.to_excel(writer, sheet_name='quartiles_analysis_15')
quartile_cutoffs_df_15.to_excel(writer, sheet_name='quartile_cutoffs_15')
unique_list_15.to_excel(writer, sheet_name='unique_15')
quartiles_df_15_teach.to_excel(writer, sheet_name='quartiles_analysis_15_teach')
quartile_cutoffs_df_15_teach.to_excel(writer, sheet_name='quartile_cutoffs_15_teach')
unique_list_15_teach.to_excel(writer, sheet_name='unique_15_teach')

assumptions_list_16.to_excel(writer, sheet_name='slicing_assumptions_16')
quartiles_df_16.to_excel(writer, sheet_name='quartiles_analysis_16')
quartile_cutoffs_df_16.to_excel(writer, sheet_name='quartile_cutoffs_16')
unique_list_16.to_excel(writer, sheet_name='unique_16')
quartiles_df_16_teach.to_excel(writer, sheet_name='quartiles_analysis_16_teach')
quartile_cutoffs_df_16_teach.to_excel(writer, sheet_name='quartile_cutoffs_16_teach')
unique_list_16_teach.to_excel(writer, sheet_name='unique_16_teach')

assumptions_list_17.to_excel(writer, sheet_name='slicing_assumptions_17')
quartiles_df_17.to_excel(writer, sheet_name='quartiles_analysis_17')
quartile_cutoffs_df_17.to_excel(writer, sheet_name='quartile_cutoffs_17')
unique_list_17.to_excel(writer, sheet_name='unique_17')
quartiles_df_17_teach.to_excel(writer, sheet_name='quartiles_analysis_17_teach')
quartile_cutoffs_df_17_teach.to_excel(writer, sheet_name='quartile_cutoffs_17_teach')
unique_list_17_teach.to_excel(writer, sheet_name='unique_17_teach')

assumptions_list_18.to_excel(writer, sheet_name='slicing_assumptions_18')
quartiles_df_18.to_excel(writer, sheet_name='quartiles_analysis_18')
quartile_cutoffs_df_18.to_excel(writer, sheet_name='quartile_cutoffs_18')
unique_list_18.to_excel(writer, sheet_name='unique_18')
quartiles_df_18_teach.to_excel(writer, sheet_name='quartiles_analysis_18_teach')
quartile_cutoffs_df_18_teach.to_excel(writer, sheet_name='quartile_cutoffs_18_teach')
unique_list_18_teach.to_excel(writer, sheet_name='unique_18_teach')

assumptions_list_19.to_excel(writer, sheet_name='slicing_assumptions_19')
quartiles_df_19.to_excel(writer, sheet_name='quartiles_analysis_19')
quartile_cutoffs_df_19.to_excel(writer, sheet_name='quartile_cutoffs_19')
unique_list_19.to_excel(writer, sheet_name='unique_19')
quartiles_df_19_teach.to_excel(writer, sheet_name='quartiles_analysis_19_teach')
quartile_cutoffs_df_19_teach.to_excel(writer, sheet_name='quartile_cutoffs_19_teach')
unique_list_19_teach.to_excel(writer, sheet_name='unique_19_teach')

assumptions_list_20.to_excel(writer, sheet_name='slicing_assumptions_20')
quartiles_df_20.to_excel(writer, sheet_name='quartiles_analysis_20')
quartile_cutoffs_df_20.to_excel(writer, sheet_name='quartile_cutoffs_20')
unique_list_20.to_excel(writer, sheet_name='unique_20')
quartiles_df_20_teach.to_excel(writer, sheet_name='quartiles_analysis_20_teach')
quartile_cutoffs_df_20_teach.to_excel(writer, sheet_name='quartile_cutoffs_20_teach')
unique_list_20_teach.to_excel(writer, sheet_name='unique_20_teach')

assumptions_list_21.to_excel(writer, sheet_name='slicing_assumptions_21')
quartiles_df_21.to_excel(writer, sheet_name='quartiles_analysis_21')
quartile_cutoffs_df_21.to_excel(writer, sheet_name='quartile_cutoffs_21')
unique_list_21.to_excel(writer, sheet_name='unique_21')
quartiles_df_21_teach.to_excel(writer, sheet_name='quartiles_analysis_21_teach')
quartile_cutoffs_df_21_teach.to_excel(writer, sheet_name='quartile_cutoffs_21_teach')
unique_list_21_teach.to_excel(writer, sheet_name='unique_21_teach')

assumptions_list_22.to_excel(writer, sheet_name='slicing_assumptions_22')
quartiles_df_22.to_excel(writer, sheet_name='quartiles_analysis_22')
quartile_cutoffs_df_22.to_excel(writer, sheet_name='quartile_cutoffs_22')
unique_list_22.to_excel(writer, sheet_name='unique_22')
quartiles_df_22_teach.to_excel(writer, sheet_name='quartiles_analysis_22_teach')
quartile_cutoffs_df_22_teach.to_excel(writer, sheet_name='quartile_cutoffs_22_teach')
unique_list_22_teach.to_excel(writer, sheet_name='unique_22_teach')

assumptions_list_23.to_excel(writer, sheet_name='slicing_assumptions_23')
quartiles_df_23.to_excel(writer, sheet_name='quartiles_analysis_23')
quartile_cutoffs_df_23.to_excel(writer, sheet_name='quartile_cutoffs_23')
unique_list_23.to_excel(writer, sheet_name='unique_23')
quartiles_df_23_teach.to_excel(writer, sheet_name='quartiles_analysis_23_teach')
quartile_cutoffs_df_23_teach.to_excel(writer, sheet_name='quartile_cutoffs_23_teach')
unique_list_23_teach.to_excel(writer, sheet_name='unique_23_teach')

assumptions_list_24.to_excel(writer, sheet_name='slicing_assumptions_24')
quartiles_df_24.to_excel(writer, sheet_name='quartiles_analysis_24')
quartile_cutoffs_df_24.to_excel(writer, sheet_name='quartile_cutoffs_24')
unique_list_24.to_excel(writer, sheet_name='unique_24')
quartiles_df_24_teach.to_excel(writer, sheet_name='quartiles_analysis_24_teach')
quartile_cutoffs_df_24_teach.to_excel(writer, sheet_name='quartile_cutoffs_24_teach')
unique_list_24_teach.to_excel(writer, sheet_name='unique_24_teach')

assumptions_list_25.to_excel(writer, sheet_name='slicing_assumptions_25')
quartiles_df_25.to_excel(writer, sheet_name='quartiles_analysis_25')
quartile_cutoffs_df_25.to_excel(writer, sheet_name='quartile_cutoffs_25')
unique_list_25.to_excel(writer, sheet_name='unique_25')
quartiles_df_25_teach.to_excel(writer, sheet_name='quartiles_analysis_25_teach')
quartile_cutoffs_df_25_teach.to_excel(writer, sheet_name='quartile_cutoffs_25_teach')
unique_list_25_teach.to_excel(writer, sheet_name='unique_25_teach')

assumptions_list_26.to_excel(writer, sheet_name='slicing_assumptions_26')
quartiles_df_26.to_excel(writer, sheet_name='quartiles_analysis_26')
quartile_cutoffs_df_26.to_excel(writer, sheet_name='quartile_cutoffs_26')
unique_list_26.to_excel(writer, sheet_name='unique_26')
quartiles_df_26_teach.to_excel(writer, sheet_name='quartiles_analysis_26_teach')
quartile_cutoffs_df_26_teach.to_excel(writer, sheet_name='quartile_cutoffs_26_teach')
unique_list_26_teach.to_excel(writer, sheet_name='unique_26_teach')

assumptions_list_27.to_excel(writer, sheet_name='slicing_assumptions_27')
quartiles_df_27.to_excel(writer, sheet_name='quartiles_analysis_27')
quartile_cutoffs_df_27.to_excel(writer, sheet_name='quartile_cutoffs_27')
unique_list_27.to_excel(writer, sheet_name='unique_27')
quartiles_df_27_teach.to_excel(writer, sheet_name='quartiles_analysis_27_teach')
quartile_cutoffs_df_27_teach.to_excel(writer, sheet_name='quartile_cutoffs_27_teach')
unique_list_27_teach.to_excel(writer, sheet_name='unique_27_teach')

assumptions_list_28.to_excel(writer, sheet_name='slicing_assumptions_28')
quartiles_df_28.to_excel(writer, sheet_name='quartiles_analysis_28')
quartile_cutoffs_df_28.to_excel(writer, sheet_name='quartile_cutoffs_28')
unique_list_28.to_excel(writer, sheet_name='unique_28')
quartiles_df_28_teach.to_excel(writer, sheet_name='quartiles_analysis_28_teach')
quartile_cutoffs_df_28_teach.to_excel(writer, sheet_name='quartile_cutoffs_28_teach')
unique_list_28_teach.to_excel(writer, sheet_name='unique_28_teach')

assumptions_list_29.to_excel(writer, sheet_name='slicing_assumptions_29')
quartiles_df_29.to_excel(writer, sheet_name='quartiles_analysis_29')
quartile_cutoffs_df_29.to_excel(writer, sheet_name='quartile_cutoffs_29')
unique_list_29.to_excel(writer, sheet_name='unique_29')
quartiles_df_29_teach.to_excel(writer, sheet_name='quartiles_analysis_29_teach')
quartile_cutoffs_df_29_teach.to_excel(writer, sheet_name='quartile_cutoffs_29_teach')
unique_list_29_teach.to_excel(writer, sheet_name='unique_29_teach')

assumptions_list_30.to_excel(writer, sheet_name='slicing_assumptions_30')
quartiles_df_30.to_excel(writer, sheet_name='quartiles_analysis_30')
quartile_cutoffs_df_30.to_excel(writer, sheet_name='quartile_cutoffs_30')
unique_list_30.to_excel(writer, sheet_name='unique_30')
quartiles_df_30_teach.to_excel(writer, sheet_name='quartiles_analysis_30_teach')
quartile_cutoffs_df_30_teach.to_excel(writer, sheet_name='quartile_cutoffs_30_teach')
unique_list_30_teach.to_excel(writer, sheet_name='unique_30_teach')

assumptions_list_31.to_excel(writer, sheet_name='slicing_assumptions_31')
quartiles_df_31.to_excel(writer, sheet_name='quartiles_analysis_31')
quartile_cutoffs_df_31.to_excel(writer, sheet_name='quartile_cutoffs_31')
unique_list_31.to_excel(writer, sheet_name='unique_31')
quartiles_df_31_teach.to_excel(writer, sheet_name='quartiles_analysis_31_teach')
quartile_cutoffs_df_31_teach.to_excel(writer, sheet_name='quartile_cutoffs_31_teach')
unique_list_31_teach.to_excel(writer, sheet_name='unique_31_teach')

assumptions_list_32.to_excel(writer, sheet_name='slicing_assumptions_32')
quartiles_df_32.to_excel(writer, sheet_name='quartiles_analysis_32')
quartile_cutoffs_df_32.to_excel(writer, sheet_name='quartile_cutoffs_32')
unique_list_32.to_excel(writer, sheet_name='unique_32')
quartiles_df_32_teach.to_excel(writer, sheet_name='quartiles_analysis_32_teach')
quartile_cutoffs_df_32_teach.to_excel(writer, sheet_name='quartile_cutoffs_32_teach')
unique_list_32_teach.to_excel(writer, sheet_name='unique_32_teach')

assumptions_list_33.to_excel(writer, sheet_name='slicing_assumptions_33')
quartiles_df_33.to_excel(writer, sheet_name='quartiles_analysis_33')
quartile_cutoffs_df_33.to_excel(writer, sheet_name='quartile_cutoffs_33')
unique_list_33.to_excel(writer, sheet_name='unique_33')
quartiles_df_33_teach.to_excel(writer, sheet_name='quartiles_analysis_33_teach')
quartile_cutoffs_df_33_teach.to_excel(writer, sheet_name='quartile_cutoffs_33_teach')
unique_list_33_teach.to_excel(writer, sheet_name='unique_33_teach')

assumptions_list_34.to_excel(writer, sheet_name='slicing_assumptions_34')
quartiles_df_34.to_excel(writer, sheet_name='quartiles_analysis_34')
quartile_cutoffs_df_34.to_excel(writer, sheet_name='quartile_cutoffs_34')
unique_list_34.to_excel(writer, sheet_name='unique_34')
quartiles_df_34_teach.to_excel(writer, sheet_name='quartiles_analysis_34_teach')
quartile_cutoffs_df_34_teach.to_excel(writer, sheet_name='quartile_cutoffs_34_teach')
unique_list_34_teach.to_excel(writer, sheet_name='unique_34_teach')

assumptions_list_35.to_excel(writer, sheet_name='slicing_assumptions_35')
quartiles_df_35.to_excel(writer, sheet_name='quartiles_analysis_35')
quartile_cutoffs_df_35.to_excel(writer, sheet_name='quartile_cutoffs_35')
unique_list_35.to_excel(writer, sheet_name='unique_35')
quartiles_df_35_teach.to_excel(writer, sheet_name='quartiles_analysis_35_teach')
quartile_cutoffs_df_35_teach.to_excel(writer, sheet_name='quartile_cutoffs_35_teach')
unique_list_35_teach.to_excel(writer, sheet_name='unique_35_teach')

writer.save()