In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

import time

from collections import Counter
from datetime import datetime
%matplotlib inline

In [None]:
from tqdm.notebook import tqdm

## analysis file

In [None]:
analysisdf = pd.read_csv('data/paneldata.csv', index_col=0)
analysisdf.head(50)

## trusts exposed in 2016

In [None]:
all_exposed = list(set(analysisdf[analysisdf["scr_year"]==2016]['trust_code'].to_list()))
all_exposed

## acute district general hospitals subgroup

In [None]:
target_types = ["ACUTE - DGH"]

target_trusts = list(set(analysisdf[analysisdf["trust_type"].isin(target_types)]['trust_code'].to_list()))
other_trusts = list(set(analysisdf[~analysisdf["trust_type"].isin(target_types)]['trust_code'].to_list()))

analysisdf_target = analysisdf[analysisdf["trust_type"].isin(target_types)]
analysisdf_other = analysisdf[~analysisdf["trust_type"].isin(target_types)]

In [None]:
def common_items(list1, list2):
    overlap = [item for item in list1 if item in list2]
    return overlap

exposed_target = common_items(target_trusts, all_exposed)

## included in analysis
print('cases:')
print(exposed_target)
print(len(exposed_target))

In [None]:
exposed_placebo = target_trusts.copy()

for x in exposed_target:
    exposed_placebo.remove(x)
    
print('controls:')
print(exposed_placebo)
print(len(exposed_placebo))

In [None]:
analysisdf_placebo = analysisdf_target[analysisdf_target['trust_code'].isin(exposed_placebo)]

## synthetic control

In [None]:
from SyntheticControlMethods import Synth, DiffSynth

In [None]:
def to_seconds(date):
    return time.mktime(date.timetuple())

In [None]:
dates = analysisdf[analysisdf['trust_code'] == exposed_target[0]]['date'].to_list()

yvar = 'breach_%_type1'
xvar1 = 'ae_scaled'
xvar2 = 'occupied_%_general'
xvar3 = 'inpatient_%_casemix'
xvar4 = 'deprivation_q1_q2'
xvar5 = 'nurses_per_bed'

features = ['trust_code', 'date', yvar, xvar1, xvar2, xvar3, xvar4, xvar5]

## exposed units

In [None]:
exposed_group = exposed_target
dataset = analysisdf_target

for x in tqdm(exposed_group):
    
    treatment_trust = []
    treatment_trust.append(x)
    
    remaining_exposed_group = list((Counter(exposed_group) - Counter(treatment_trust)).elements())
    single_exposed_case = dataset[~dataset['trust_code'].isin(remaining_exposed_group)]
    
    syntheticdf = single_exposed_case[features]
    
    syntheticdf['date'] = pd.to_datetime(syntheticdf['date'])

    syntheticdf['seconds'] = syntheticdf['date'].apply(lambda x: to_seconds(x))
    
    syntheticdf.drop(['date'], axis=1, inplace=True)
    
    syntheticdf['trust_code'] = syntheticdf['trust_code'].astype(str)
    syntheticdf['seconds'] = syntheticdf['seconds'].astype(int)

    sc = Synth(syntheticdf, yvar, "trust_code", "seconds", 1483142400, treatment_trust[0], n_optim=25, pen="auto")
    #2016-12-31 = 1483142400

    sc.in_space_placebo(25)
    
    ###########################WEIGHTS AND FIT############################
    sc.original_data.weight_df.to_csv('outcomes/weights/{}.csv'.format(x))
    sc.original_data.rmspe_df.to_csv('outcomes/rmspe/{}.csv'.format(x))    
    ######################################################################
    
    synth_outcomes = sc.original_data.synth_outcome[0].tolist()

    exposed_outcomes = []
    treated_outcomes = sc.original_data.treated_outcome_all    
    
    for y in range(0, len(treated_outcomes)):
        exposed_outcomes.append(treated_outcomes[y][0].round(6))

    treated_v_synth = pd.DataFrame(list(zip(dates, exposed_outcomes, synth_outcomes)), columns =['date', 'exposed', 'synth'])
    treated_v_synth['difference'] = treated_v_synth['exposed'] - treated_v_synth['synth']
    
    ############################OUTCOME VARIABLE#####################
    treated_v_synth.to_csv('outcomes/treat_v_synth/{}.csv'.format(x))
    #################################################################

    treatment_effect = pd.DataFrame((dates), columns =['date'])
    treatment_effect['exposed'] = treated_v_synth['difference']

    for z in range(0,len(sc.original_data.in_space_placebos)):
        treatment_effect[z] = sc.original_data.in_space_placebos[z]
        treatment_effect.rename(columns = {z:sc.original_data.control_units.tolist()[z]}, inplace=True)
    
    ##########################TREATMENT EFFECT###########################
    treatment_effect.to_csv('outcomes/treatment_effect/{}.csv'.format(x))
    #####################################################################

## placebo units 

In [None]:
exposed_group = exposed_placebo
dataset = analysisdf_placebo

for x in tqdm(exposed_group):
    
    treatment_trust = []
    treatment_trust.append(x)
    
    placebo_cases = dataset[~dataset['trust_code'].isin(exposed_target)]
    
    syntheticdf = placebo_cases[features]
    
    syntheticdf['date'] = pd.to_datetime(syntheticdf['date'])

    syntheticdf['seconds'] = syntheticdf['date'].apply(lambda x: to_seconds(x))
    
    syntheticdf.drop(['date'], axis=1, inplace=True)
    
    syntheticdf['trust_code'] = syntheticdf['trust_code'].astype(str)
    syntheticdf['seconds'] = syntheticdf['seconds'].astype(int)
    
    sc = Synth(syntheticdf, yvar, "trust_code", "seconds", 1483142400, treatment_trust[0], n_optim=25, pen="auto") 
    #2016-12-31 = 1483142400

    sc.in_space_placebo(25)
    
    ##############################WEIGHTS AND FIT##################################
    sc.original_data.weight_df.to_csv('outcomes/weights_placebo/{}.csv'.format(x))
    sc.original_data.rmspe_df.to_csv('outcomes/rmspe_placebo/{}.csv'.format(x))
    ###############################################################################
    
    synth_outcomes = sc.original_data.synth_outcome[0].tolist()

    exposed_outcomes = []
    treated_outcomes = sc.original_data.treated_outcome_all    
    
    for y in range(0, len(treated_outcomes)):
        exposed_outcomes.append(treated_outcomes[y][0].round(6))

    treated_v_synth = pd.DataFrame(list(zip(dates, exposed_outcomes, synth_outcomes)), columns =['date', 'exposed', 'synth'])
    treated_v_synth['difference'] = treated_v_synth['exposed'] - treated_v_synth['synth']
    
    #############################OUTCOME VARIABLE#############################
    treated_v_synth.to_csv('outcomes/treat_v_synth_placebo/{}.csv'.format(x))
    ##########################################################################
    

    treatment_effect = pd.DataFrame((dates), columns =['date'])
    treatment_effect['exposed'] = treated_v_synth['difference']

    for z in range(0,len(sc.original_data.in_space_placebos)):
        treatment_effect[z] = sc.original_data.in_space_placebos[z]
        treatment_effect.rename(columns = {z:sc.original_data.control_units.tolist()[z]}, inplace=True)
    
    ###############################TREATMENT EFFECT################################
    treatment_effect.to_csv('outcomes/treatment_effect_placebo/{}.csv'.format(x))
    ###############################################################################

#### timelookup

{1398812400.0: Timestamp('2014-04-30 00:00:00'),
 1401490800.0: Timestamp('2014-05-31 00:00:00'),
 1404082800.0: Timestamp('2014-06-30 00:00:00'),
 1406761200.0: Timestamp('2014-07-31 00:00:00'),
 1409439600.0: Timestamp('2014-08-31 00:00:00'),
 1412031600.0: Timestamp('2014-09-30 00:00:00'),
 1414713600.0: Timestamp('2014-10-31 00:00:00'),
 1417305600.0: Timestamp('2014-11-30 00:00:00'),
 1419984000.0: Timestamp('2014-12-31 00:00:00'),
 1422662400.0: Timestamp('2015-01-31 00:00:00'),
 1425081600.0: Timestamp('2015-02-28 00:00:00'),
 1427756400.0: Timestamp('2015-03-31 00:00:00'),
 1430348400.0: Timestamp('2015-04-30 00:00:00'),
 1433026800.0: Timestamp('2015-05-31 00:00:00'),
 1435618800.0: Timestamp('2015-06-30 00:00:00'),
 1438297200.0: Timestamp('2015-07-31 00:00:00'),
 1440975600.0: Timestamp('2015-08-31 00:00:00'),
 1443567600.0: Timestamp('2015-09-30 00:00:00'),
 1446249600.0: Timestamp('2015-10-31 00:00:00'),
 1448841600.0: Timestamp('2015-11-30 00:00:00'),
 1451520000.0: Timestamp('2015-12-31 00:00:00'),
 1454198400.0: Timestamp('2016-01-31 00:00:00'),
 1456704000.0: Timestamp('2016-02-29 00:00:00'),
 1459378800.0: Timestamp('2016-03-31 00:00:00'),
 1461970800.0: Timestamp('2016-04-30 00:00:00'),
 1464649200.0: Timestamp('2016-05-31 00:00:00'),
 1467241200.0: Timestamp('2016-06-30 00:00:00'),
 1469919600.0: Timestamp('2016-07-31 00:00:00'),
 1472598000.0: Timestamp('2016-08-31 00:00:00'),
 1475190000.0: Timestamp('2016-09-30 00:00:00'),
 1477872000.0: Timestamp('2016-10-31 00:00:00'),
 1480464000.0: Timestamp('2016-11-30 00:00:00'),
 1483142400.0: Timestamp('2016-12-31 00:00:00'),
 1485820800.0: Timestamp('2017-01-31 00:00:00'),
 1488240000.0: Timestamp('2017-02-28 00:00:00'),
 1490914800.0: Timestamp('2017-03-31 00:00:00'),
 1493506800.0: Timestamp('2017-04-30 00:00:00'),
 1496185200.0: Timestamp('2017-05-31 00:00:00'),
 1498777200.0: Timestamp('2017-06-30 00:00:00'),
 1501455600.0: Timestamp('2017-07-31 00:00:00'),
 1504134000.0: Timestamp('2017-08-31 00:00:00'),
 1506726000.0: Timestamp('2017-09-30 00:00:00'),
 1509408000.0: Timestamp('2017-10-31 00:00:00'),
 1512000000.0: Timestamp('2017-11-30 00:00:00'),
 1514678400.0: Timestamp('2017-12-31 00:00:00'),
 1517356800.0: Timestamp('2018-01-31 00:00:00'),
 1519776000.0: Timestamp('2018-02-28 00:00:00'),
 1522450800.0: Timestamp('2018-03-31 00:00:00'),
 1525042800.0: Timestamp('2018-04-30 00:00:00'),
 1527721200.0: Timestamp('2018-05-31 00:00:00'),
 1530313200.0: Timestamp('2018-06-30 00:00:00'),
 1532991600.0: Timestamp('2018-07-31 00:00:00'),
 1535670000.0: Timestamp('2018-08-31 00:00:00'),
 1538262000.0: Timestamp('2018-09-30 00:00:00'),
 1540944000.0: Timestamp('2018-10-31 00:00:00'),
 1543536000.0: Timestamp('2018-11-30 00:00:00'),
 1546214400.0: Timestamp('2018-12-31 00:00:00'),
 1548892800.0: Timestamp('2019-01-31 00:00:00'),
 1551312000.0: Timestamp('2019-02-28 00:00:00'),
 1553990400.0: Timestamp('2019-03-31 00:00:00'),
 1556578800.0: Timestamp('2019-04-30 00:00:00'),
 1559257200.0: Timestamp('2019-05-31 00:00:00'),
 1561849200.0: Timestamp('2019-06-30 00:00:00'),
 1564527600.0: Timestamp('2019-07-31 00:00:00'),
 1567206000.0: Timestamp('2019-08-31 00:00:00')}
