In [24]:
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from utils import load_CPS_data, load_PENN_data, load_Germany_data, load_basque_data, load_smoking_data, load_boatlift_data, generate_simulation_components, generate_data

In [25]:
# set n_jobs to the number of cores
num_cores = 36
num_experiments = 1000

### Get Share of Top Time and Unit Weights

In [26]:
def TROP_weights(data,TROP_parameters):
    
    Y,W,treated_units,treated_periods = data
    
    lambda_unit,lambda_time,lambda_nn = TROP_parameters
    
    N,T = Y.shape

    #dist_time
    dist_time = np.absolute(np.arange(T)-(T-treated_periods/2))

    #dist_unit
    average_treated = np.mean(Y[treated_units,:],axis=0)
    
    mask = np.ones((N, T))
    mask[:,-treated_periods:] = 0
    A = np.sum(np.multiply(np.square(average_treated-Y),mask),axis=1)
    B = np.sum(mask,axis=1)
    dist_unit = np.sqrt(A/B)
    
    #distance-based weights
    delta_unit = np.exp(-lambda_unit*dist_unit)
    delta_time = np.exp(-lambda_time*dist_time)
    delta = np.outer(delta_unit,delta_time)
    
    #print(delta_unit)    
    top_5_unit = np.sum(np.sort(delta_unit)[-5:])/np.sum(delta_unit)
    top_half_unit = np.sum(np.sort(delta_unit)[-round(N/2):])/np.sum(delta_unit)
    top_5_time = np.sum(np.sort(delta_time)[-5:])/np.sum(delta_time)
    
    return top_5_time, top_5_unit, top_half_unit

## CPS

In [48]:
outcome = 'urate'
treatment = 'min_wage'
data = load_CPS_data(outcome, treatment)
F, M, cov_mat, pi = generate_simulation_components(data)

In [49]:
estimates = Parallel(n_jobs=num_cores, prefer='processes')(
                 delayed(TROP_weights)(generate_data(F, M, cov_mat, pi, None), [1.6,0.35,0.011])
                 for experiment in range(num_experiments))

In [50]:
np.mean(np.array(estimates),axis=0)

array([0.65634955, 0.22508191, 0.66936961])

## PENN

In [56]:
outcome = 'log_gdp'
treatment = 'dem'
data = load_PENN_data(outcome, treatment)
F, M, cov_mat, pi = generate_simulation_components(data)

estimates = Parallel(n_jobs=num_cores, prefer='processes')(
                 delayed(TROP_weights)(generate_data(F, M, cov_mat, pi, None), [0.3, 0.4, 0.006])
                 for experiment in range(num_experiments))

np.mean(np.array(estimates),axis=0)

array([0.69572784, 0.06052287, 0.59315102])

## Germany

In [52]:
np.random.seed(0)
outcome = 'gdp'
treatment = None
data = load_Germany_data(outcome, treatment)
F, M, cov_mat, pi = generate_simulation_components(data)

estimates = Parallel(n_jobs=num_cores, prefer='processes')(
                 delayed(TROP_weights)(generate_data(F, M, cov_mat, pi, 'Random'), [1.2, 0.2, 0.011])
                 for experiment in range(num_experiments))

np.mean(np.array(estimates),axis=0)

array([0.49713864, 0.36623512, 0.55457779])

## Basque

In [53]:
np.random.seed(0)
outcome = 'gdpcap'
treatment = None
data = load_basque_data(outcome, treatment)
F, M, cov_mat, pi = generate_simulation_components(data)

estimates = Parallel(n_jobs=num_cores, prefer='processes')(
                 delayed(TROP_weights)(generate_data(F, M, cov_mat, pi, 'Random'), [0, 0.35, 0.006])
                 for experiment in range(num_experiments))

np.mean(np.array(estimates),axis=0)

array([0.65634861, 0.27777778, 0.5       ])

## Smoking

In [54]:
np.random.seed(0)
outcome = 'PacksPerCapita'
treatment = None
data = load_smoking_data(outcome, treatment)
F, M, cov_mat, pi = generate_simulation_components(data)

estimates = Parallel(n_jobs=num_cores, prefer='processes')(
                 delayed(TROP_weights)(generate_data(F, M, cov_mat, pi, 'Random'), [0.25, 0.4, 0.011])
                 for experiment in range(num_experiments))

np.mean(np.array(estimates),axis=0)

array([0.69573708, 0.14958593, 0.56080483])

## Boatlift

In [55]:
#np.random.seed(0)
outcome = 'loguearnhre'
treatment = None
data = load_boatlift_data(outcome, treatment)
F, M, cov_mat, pi = generate_simulation_components(data)

estimates = Parallel(n_jobs=num_cores, prefer='processes')(
                 delayed(TROP_weights)(generate_data(F, M, cov_mat, pi, 'Random'), [0.2, 0.2, 0.151])
                 for experiment in range(num_experiments))

np.mean(np.array(estimates),axis=0)

array([0.51468552, 0.13129882, 0.54117832])