In placebo tests, we run the synthetic control method on each PSA in the donor pool (i.e., all PSAs not in 7D) as if it was a treated PSA in 7D. These placebo PSAs are then compared with their synthetic counterparts; any deviations between a placebo PSA and its synthetic match would not be due to the implementation of CGIC 2.0 since the placebo PSAs were not treated.  By then comparing the difference between the treated PSAs and their corresponding synthetic controls to the difference between placebo PSAs and their controls, we can evaluate whether the deviation between the outcomes in treated PSAs and their synthetic counterparts are likely to reflect the impacts of CGIC 2.0 rather than random chance. 

This notebook uses the rolling means that came from `4_Quasi_Experimental_Analysis_Synthetic_Control.ipynb` to create real and synthetic rolling means for each metric, for every PSA, to prepare data to run placebo tests.

Note to reader: The author is aware that it is not the best practice to name variables starting with a digit, however, the following R script uses this naming convention, so the author conformed to this standard

In [1]:
import sys
sys.path.append('..')

import os
import re #for regex

import math
import geopandas as gpd
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from cgic_scripts import gis, plots, shotspotter, synth, synth_placebo

%matplotlib inline

# Colors for plotting
lab_blue = '#2b4888'
lab_pink = '#de4057'
lab_grey = '#595959'


In [2]:
# Column Names for PSA
cols = list(np.arange(701, 709, 1)) 
#psas_loop = list(np.arange(100, 900, 100))

# list of Districts, from 1 to 7, for help w dynamically creating variables
districts = list(np.arange(1,8,1))
districts = [str(i) + 'D' for i in districts]

# Create a list of all the PSAs in the District 
# Each District does not have the same number of PSAs
d1 = list(np.arange(101, 109, 1)) 
d2 = list(np.arange(201, 209, 1))
d3 = list(np.arange(301, 309, 1))
d4 = list(np.arange(401, 410, 1))
d5 = list(np.arange(501, 508, 1))
d6 = list(np.arange(601, 609, 1))
d7 = list(np.arange(701, 709, 1))

# Concat the list
psa_list = [d1, d2, d3, d4, d5, d6, d7]

In [3]:
DATA_DIR = os.path.join('..', 'data')

# Read in the rolling means: 
aggregate_shotspotter_df = pd.read_csv(os.path.join(DATA_DIR,'aggregate_shotspotter_df.csv'))
aggregate_calls_df = pd.read_csv(os.path.join(DATA_DIR,'aggregate_calls_df.csv'))
agg_violent_crime_df = pd.read_csv(os.path.join(DATA_DIR,'aggregate_violent_crime_df.csv'))
agg_gun_crime_df = pd.read_csv(os.path.join(DATA_DIR,'aggregate_gun_crime_df.csv'))
agg_top_broad_arrests_df = pd.read_csv(os.path.join(DATA_DIR,'aggregate_broad_arrests_df.csv'))
agg_gun_arrests_df = pd.read_csv(os.path.join(DATA_DIR,'aggregate_gun_arrests_df.csv'))
agg_top_violent_gun_crime_df = pd.read_csv(os.path.join(DATA_DIR,'aggregate_violent_gun_arrests_df.csv'))

## Calls for Service

In [4]:
for i in range(len(psa_list)):
    synthetic_control, real7D, synthetic_control_pre, real_pre, control_less_real = \
                synth_placebo.perform_control(ddf=aggregate_calls_df,
                                              p=1,
                                              fig=False, 
                                              plot_all_periods = True,
                                              soft_convex=False,
                                              treatment_psas = psa_list[i], 
                                              exclude_psas = [])
    #Create variables on the fly, with the variable name, district id, and type of info
    vars()[districts[i] + '_calls_synth_post'] = pd.DataFrame(synthetic_control)
    vars()[districts[i] + '_calls_synth_pre'] = pd.DataFrame(synthetic_control_pre)
    vars()[districts[i] + '_calls_real_post'] = pd.DataFrame(real7D)
    vars()[districts[i] + '_calls_real_pre'] = pd.DataFrame(real_pre)
    
    #Rename columns
    globals()[districts[i] + '_calls_synth_post'].columns = psa_list[i]
    globals()[districts[i] + '_calls_synth_pre'].columns = psa_list[i]
    globals()[districts[i] + '_calls_real_post'].columns = psa_list[i]
    globals()[districts[i] + '_calls_real_pre'].columns = psa_list[i]

## ShotSpotter

In [5]:
# Shotspotter is special bc it doesn't have any PSAs in 2D, and completely missing PSAs 101, 102, and 401
d1_alt = list(np.arange(103, 109, 1)) 
d4_alt = list(np.arange(402, 410, 1))
shotspot_psa_list = [d1_alt, d3, d4_alt, d5, d6, d7]
shotspot_districts = [item for item in districts if item !='2D'] 

for i in range(len(shotspot_psa_list)):
    synthetic_control, real7D, synthetic_control_pre, real_pre, control_less_real = \
                synth_placebo.perform_control(ddf=aggregate_shotspotter_df,
                                              p=1,
                                              fig=False, 
                                              plot_all_periods = True,
                                              soft_convex=False,
                                              treatment_psas = shotspot_psa_list[i], 
                                              exclude_psas = [])
    #Create variables on the fly, with the variable name, district id, and type of info
    vars()[shotspot_districts[i] + '_shotspotter_synth_post'] = pd.DataFrame(synthetic_control)
    vars()[shotspot_districts[i] + '_shotspotter_synth_pre'] = pd.DataFrame(synthetic_control_pre)
    vars()[shotspot_districts[i] + '_shotspotter_real_post'] = pd.DataFrame(real7D)
    vars()[shotspot_districts[i] + '_shotspotter_real_pre'] = pd.DataFrame(real_pre)
    
    globals()[shotspot_districts[i] + '_shotspotter_synth_post'].columns = shotspot_psa_list[i]
    globals()[shotspot_districts[i] + '_shotspotter_synth_pre'].columns = shotspot_psa_list[i]
    globals()[shotspot_districts[i] + '_shotspotter_real_post'].columns = shotspot_psa_list[i]
    globals()[shotspot_districts[i] + '_shotspotter_real_pre'].columns = shotspot_psa_list[i]

## Violent Crimes

In [6]:
for i in range(len(psa_list)):
    synthetic_control, real7D, synthetic_control_pre, real_pre, control_less_real = \
                synth_placebo.perform_control(ddf=agg_violent_crime_df,
                                      p=1,
                                      fig=False, 
                                      plot_all_periods = True,
                                      soft_convex=False,
                                      treatment_psas = psa_list[i], 
                                      exclude_psas = [])
    #Create variables on the fly, with the variable name, district id, and type of info
    vars()[districts[i] + '_dcr_violent_synth_post'] = pd.DataFrame(synthetic_control)
    vars()[districts[i] + '_dcr_violent_synth_pre'] = pd.DataFrame(synthetic_control_pre)
    vars()[districts[i] + '_dcr_violent_real_post'] = pd.DataFrame(real7D)
    vars()[districts[i] + '_dcr_violent_real_pre'] = pd.DataFrame(real_pre)
    
     #Rename columns
    globals()[districts[i] + '_dcr_violent_synth_post'].columns = psa_list[i]
    globals()[districts[i] + '_dcr_violent_synth_pre'].columns = psa_list[i]
    globals()[districts[i] + '_dcr_violent_real_post'].columns = psa_list[i]
    globals()[districts[i] + '_dcr_violent_real_pre'].columns = psa_list[i]

## Gun Crimes

In [7]:
for i in range(len(psa_list)):
    synthetic_control, real7D, synthetic_control_pre, real_pre, control_less_real = \
                synth_placebo.perform_control(ddf=agg_gun_crime_df,
                                              p=1,
                                              fig=False, 
                                              plot_all_periods = True,
                                              soft_convex=False,
                                              treatment_psas = psa_list[i], 
                                              exclude_psas = [])
    #Create variables on the fly, with the variable name, district id, and type of info
    vars()[ districts[i] + '_dcr_gun_synth_post'] = pd.DataFrame(synthetic_control)
    vars()[ districts[i] + '_dcr_gun_synth_pre'] = pd.DataFrame(synthetic_control_pre)
    vars()[ districts[i] + '_dcr_gun_real_post'] = pd.DataFrame(real7D)
    vars()[ districts[i] + '_dcr_gun_real_pre'] = pd.DataFrame(real_pre)

    #Rename columns
    globals()[districts[i] + '_dcr_gun_synth_post'].columns = psa_list[i]
    globals()[districts[i] + '_dcr_gun_synth_pre'].columns = psa_list[i]
    globals()[districts[i] + '_dcr_gun_real_post'].columns = psa_list[i]
    globals()[districts[i] + '_dcr_gun_real_pre'].columns = psa_list[i]

## "Broad" Arrests

In [8]:
for i in range(len(psa_list)):
    synthetic_control, real7D, synthetic_control_pre, real_pre, control_less_real = \
                synth_placebo.perform_control(ddf=agg_top_broad_arrests_df,
                                      p=1,
                                      fig=False, 
                                      plot_all_periods = True,
                                      soft_convex=False,
                                      treatment_psas = psa_list[i], 
                                      exclude_psas = [])
    #Create variables on the fly, with the variable name, district id, and type of info
    vars()[districts[i] + '_arrest_broad_synth_post'] = pd.DataFrame(synthetic_control)
    vars()[districts[i] + '_arrest_broad_synth_pre'] = pd.DataFrame(synthetic_control_pre)
    vars()[districts[i] + '_arrest_broad_real_post'] = pd.DataFrame(real7D)
    vars()[districts[i] + '_arrest_broad_real_pre'] = pd.DataFrame(real_pre)
    
    #Rename columns
    globals()[districts[i] + '_arrest_broad_synth_post'].columns = psa_list[i]
    globals()[districts[i] + '_arrest_broad_synth_pre'].columns = psa_list[i]
    globals()[districts[i] + '_arrest_broad_real_post'].columns = psa_list[i]
    globals()[districts[i] + '_arrest_broad_real_pre'].columns = psa_list[i]

## Gun Arrests

In [9]:
d2_alt = []
for i in range(len(psa_list)):
    synthetic_control, real7D, synthetic_control_pre, real_pre, control_less_real = \
                synth_placebo.perform_control(ddf=agg_gun_arrests_df,
                                      p=1,
                                      fig=False, 
                                      plot_all_periods = True,
                                      soft_convex=False,
                                      treatment_psas = psa_list[i], 
                                      exclude_psas = [])
    #Create variables on the fly, with the variable name, district id, and type of info
    vars()[ districts[i] + '_arrest_violation_synth_post'] = pd.DataFrame(synthetic_control)
    vars()[ districts[i] + '_arrest_violation_synth_pre'] = pd.DataFrame(synthetic_control_pre)
    vars()[ districts[i] + '_arrest_violation_real_post'] = pd.DataFrame(real7D)
    vars()[ districts[i] + '_arrest_violation_real_pre'] = pd.DataFrame(real_pre)
    
    #Rename columns
    globals()[districts[i] + '_arrest_violation_synth_post'].columns = psa_list[i]
    globals()[districts[i] + '_arrest_violation_synth_pre'].columns = psa_list[i]
    globals()[districts[i] + '_arrest_violation_real_post'].columns = psa_list[i]
    globals()[districts[i] + '_arrest_violation_real_pre'].columns = psa_list[i]

## Violent Gun Arrests

In [10]:
#missing 201, 202
d2 = list(np.arange(203, 209, 1))

# Concat the list
psa_list = [d1, d2, d3, d4, d5, d6, d7]

for i in range(len(psa_list)):
    synthetic_control, real7D, synthetic_control_pre, real_pre, control_less_real = \
                synth_placebo.perform_control(ddf=agg_top_violent_gun_crime_df,
                                      p=1,
                                      fig=False, 
                                      plot_all_periods = True,
                                      soft_convex=False,
                                      treatment_psas = psa_list[i], 
                                      exclude_psas = [])
    #Create variables on the fly, with the variable name, district id, and type of info
    vars()[districts[i] + '_arrest_violent_synth_post'] = pd.DataFrame(synthetic_control)
    vars()[districts[i] + '_arrest_violent_synth_pre'] = pd.DataFrame(synthetic_control_pre)
    vars()[districts[i] + '_arrest_violent_real_post'] = pd.DataFrame(real7D)
    vars()[districts[i] + '_arrest_violent_real_pre'] = pd.DataFrame(real_pre)
    
    #Rename columns
    globals()[districts[i] + '_arrest_violent_synth_post'].columns = psa_list[i]
    globals()[districts[i] + '_arrest_violent_synth_pre'].columns = psa_list[i]
    globals()[districts[i] + '_arrest_violent_real_post'].columns = psa_list[i]
    globals()[districts[i] + '_arrest_violent_real_pre'].columns = psa_list[i]

# Write to CSVs for plotting in R

In [11]:
metrics_list = ['_calls', '_dcr_violent', '_dcr_gun', 
               '_arrest_broad', '_arrest_violation', '_arrest_violent']

data_types = ['_synth_post', '_synth_pre', '_real_post', '_real_pre']

In [12]:
DATA_DIR = os.path.join('..', 'data/data_placebos')

'''
Triple nested for-loop to dynamically call variables and write to a csv file using those same names
For every metric, district, and data-type combination, create a string called varname that concats these items
Example:
    district: '3D'
    metric: '_dcr_violent'
    data_type: '_synth_pre'
Will become:
    varname = '3D_dcr_violent_synth_pre'

Then calls the variable name, and writes as csv that same string

'''

for d in range(len(districts)):
    for m in range(len(metrics_list)):
        for t in range(len(data_types)):
            varname = districts[d]+metrics_list[m]+data_types[t]
            globals()[varname].to_csv(os.path.join(DATA_DIR, varname + '.csv'), index = False)
            

            
# ShotSpotter gets special treatment bc it doesn't contain 2D
for d in range(len(shotspot_districts)):
    for t in range(len(data_types)):
        varname = shotspot_districts[d] + '_shotspotter' + data_types[t]
        globals()[varname].to_csv(os.path.join(DATA_DIR, varname + '.csv'), index = False)