In [1]:
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from utils import load_CPS_data, load_PENN_data, load_Germany_data, load_basque_data, load_smoking_data, load_boatlift_data, generate_simulation_components, parallel_experiments_variants

In [2]:
# set n_jobs to the number of cores
num_cores = 36
num_experiments = 1000

## CPS

In [12]:
RMSE = {}
bias = {}

configs = {'Row 2': ['log_wage', 'min_wage', None],
            }

TROP_dict = {'Row 2': [[0, 0.2, 0.105], [1.5,0.1,np.inf], [0, 0.2, 0.105], [0.4,0,1.8], [0,0,1],
                      [0,0.1,np.inf], [20,0,np.inf], [0,0,np.inf]],
            }


for setting, config in list(configs.items()):
    
    print(setting)
    
    # load and process data
    outcome, treatment, option = config
    data = load_CPS_data(outcome, treatment)

    # run simulations
    simulation_components = generate_simulation_components(data)
    np.random.seed(0)
    RMSE[setting], bias[setting] = parallel_experiments_variants(num_cores, num_experiments, simulation_components, TROP_dict[setting], option)

Row 2


In [13]:
RMSE

{'Row 2': array([0.11145627, 0.11050784, 0.11145627, 0.11191598, 0.11496164,
        0.11150798, 0.13671533, 0.15612956])}

In [14]:
bias

{'Row 2': array([0.01954518, 0.02017071, 0.01954518, 0.01036247, 0.00820314,
        0.02361052, 0.01660517, 0.03339014])}

In [6]:
# 1 period and unit
RMSE = {}
bias = {}

configs = {'Row 2': ['urate', 'min_wage', None],
            }

TROP_dict = {'Row 2': [[2, 0.03, 0.325], [2.25,0,np.inf], [0, 0.2, 0.023], [2,0,0.805], [0,0,0.005],
                      [0,0,np.inf], [2.25,0,np.inf], [0,0,np.inf]],
            }


for setting, config in list(configs.items()):
    
    print(setting)
    
    # load and process data
    outcome, treatment, option = config
    data = load_CPS_data(outcome, treatment)

    # run simulations
    simulation_components = generate_simulation_components(data)
    np.random.seed(0)
    RMSE[setting], bias[setting] = parallel_experiments_variants(num_cores, num_experiments, simulation_components, TROP_dict[setting], option)

Row 2


In [7]:
RMSE

{'Row 2': array([0.63674679, 0.63162418, 0.65533419, 0.63222749, 0.69556678,
        0.62871657, 0.63162418, 0.62871657])}

In [8]:
bias

{'Row 2': array([-0.02150047, -0.02706777, -0.09861005, -0.02414906,  0.02265348,
        -0.05460891, -0.02706777, -0.05460891])}

## PENN

In [26]:
RMSE = {}
bias = {}

configs = {'Row 1': ['log_gdp', 'dem', None],
#            'Row 2': ['log_gdp', 'educ', None],
           #'Row 3': ['log_gdp', 'dem', 'Random'],
            }

TROP_dict = {'Row 1': [[0.3, 0.4, 0.006], [0.3,0.4,np.inf], [0, 0.4, 0.011], [0.1, 0, 0.011], [0, 0, 0.011],
                      [0, 0.4, np.inf], [1.5, 0, np.inf], [0,0,np.inf]],
#              'Row 2': [[0.75, 0.275, 0.026], [0.4,0.1,np.inf], [0, 0.1, 0.011], [1.6,0,0.2014], [0,0,0.3517],
#                       [0,0.1,np.inf], [0.6,0,np.inf], [0,0,np.inf]],
             #'Row 3': [[0, 0.2, 0.081], [0,1,np.inf], [0, 0.2, 0.081], [0.2, 0, 0.1012], [0, 0, 0.1819],
           #           [0, 1, np.inf], [0, 0, np.inf], [0,0,np.inf]],
            }

for setting, config in list(configs.items()):
    
    print(setting)
    
    # load and process data
    outcome, treatment, option = config
    data = load_PENN_data(outcome, treatment)

    # run simulations
    simulation_components = generate_simulation_components(data)
    np.random.seed(0)
    RMSE[setting], bias[setting] = parallel_experiments_variants(num_cores, num_experiments, simulation_components, TROP_dict[setting], option)

Row 1


In [27]:
RMSE

{'Row 1': array([0.02309235, 0.03740921, 0.02402459, 0.04426083, 0.04438792,
        0.0385671 , 0.12523482, 0.20328096])}

In [28]:
bias

{'Row 1': array([0.00922668, 0.01292837, 0.01121029, 0.03420112, 0.03432329,
        0.01575241, 0.07742634, 0.18056043])}

## Germany

In [6]:
RMSE = {}
bias = {}

configs = {
            'Row 2': ['gdp', None, 'Random'],
            }

TROP_dict = {
              'Row 2': [[0.1, 4.4, 0.005], [0.1,4.5,np.inf], [0, 4.5, 0.005], [1.96, 0, 0.025], [0, 0, 0.01],
                       [0, 2.5, np.inf], [18, 0, np.inf], [0,0,np.inf]]
            }

for setting, config in list(configs.items()):
    
    print(setting)
    
    # load and process data
    outcome, treatment, option = config
    data = load_Germany_data(outcome, treatment)

    # run simulations
    simulation_components = generate_simulation_components(data, sc_weights=True)
    np.random.seed(0)
    RMSE[setting], bias[setting] = parallel_experiments_variants(num_cores, num_experiments, simulation_components, TROP_dict[setting], option)

Row 2


In [7]:
RMSE

{'Row 2': array([0.0370448 , 0.03704458, 0.03701305, 0.05648064, 0.05646206,
        0.03706606, 0.11421741, 0.13642975])}

In [8]:
bias

{'Row 2': array([0.00028733, 0.00028726, 0.00044423, 0.00973657, 0.00926653,
        0.00046008, 0.0079063 , 0.01291603])}

## Basque

In [47]:
RMSE = {}
bias = {}

configs = {'Row 1': ['gdpcap', None, None],
            'Row 2': ['gdpcap', None, 'Random'],
            }

TROP_dict = {'Row 1': [[2.5, 0.15, 0.0212], [4.8,0.2,np.inf], [0, 0.2, 0.0212], [1.6, 0, 0.011], [0, 0, 0.092],
                      [0, 0.2, np.inf], [7, 0, np.inf], [0,0,np.inf]],
              'Row 2': [[0, 0.35, 0.0061], [0,1,np.inf], [0, 0.35, 0.0061], [0.2, 0, 0.1012], [0, 0, 0.1819],
                      [0, 1, np.inf], [0, 0, np.inf], [0,0,np.inf]],
            }

for setting, config in list(configs.items()):
    
    print(setting)
    
    # load and process data
    outcome, treatment, option = config
    data = load_basque_data(outcome, treatment)

    # run simulations
    simulation_components = generate_simulation_components(data, sc_weights=True)
    np.random.seed(0)
    RMSE[setting], bias[setting] = parallel_experiments_variants(num_cores, num_experiments, simulation_components, TROP_dict[setting], option)

Row 1
Row 2


In [48]:
RMSE

{'Row 1': array([0.08320382, 0.10664037, 0.05595315, 0.07587755, 0.07254015,
        0.10387575, 0.22954979, 0.17145903]),
 'Row 2': array([0.0407426 , 0.07018293, 0.0407426 , 0.06177894, 0.06235253,
        0.07018293, 0.1450159 , 0.1450159 ])}

In [49]:
bias

{'Row 1': array([-0.04956133, -0.06475552,  0.00030999,  0.0016426 ,  0.00038248,
         0.03036596, -0.16056401, -0.06897776]),
 'Row 2': array([-0.00165733, -0.00445536, -0.00165733, -0.00256912, -0.00250639,
        -0.00445536, -0.00672046, -0.00672046])}

## Smoking

In [9]:
RMSE = {}
bias = {}

configs = {'Row 1': ['PacksPerCapita', None, None],
            'Row 2': ['PacksPerCapita', None, 'Random'],
            }

TROP_dict = {'Row 1': [[0.3, 0.4, 0.0111], [4.8,0.2,np.inf], [0, 0.4, 0.0163], [1.6, 0, 0.011], [0, 0, 0.092],
                      [0, 0.2, np.inf], [7, 0, np.inf], [0,0,np.inf]],
              'Row 2': [[0.25, 0.4, 0.0111], [0,1,np.inf], [0, 0.4, 0.0111], [0.2, 0, 0.1012], [0, 0, 0.1819],
                       [0, 1, np.inf], [0, 0, np.inf], [0,0,np.inf]],
            }

for setting, config in list(configs.items()):
    
    print(setting)
    
    # load and process data
    outcome, treatment, option = config
    data = load_smoking_data(outcome, treatment)

    # run simulations
    simulation_components = generate_simulation_components(data, sc_weights=True)
    np.random.seed(0)
    RMSE[setting], bias[setting] = parallel_experiments_variants(num_cores, num_experiments, simulation_components, TROP_dict[setting], option)

Row 1
Row 2


In [10]:
RMSE

{'Row 1': array([0.20694489, 0.27793307, 0.22465186, 0.30165156, 0.29703819,
        0.26872825, 0.6188283 , 0.59547192]),
 'Row 2': array([0.08454273, 0.13070133, 0.08786352, 0.0983582 , 0.09833617,
        0.13070133, 0.18119248, 0.18119248])}

In [11]:
bias

{'Row 1': array([ 0.01288943,  0.01338637,  0.01335515, -0.06315621, -0.03845511,
         0.01468054, -0.11614082, -0.12300272]),
 'Row 2': array([-0.00502394, -0.00812667, -0.00399778, -0.00250334, -0.00247098,
        -0.00812667, -0.00032765, -0.00032765])}

## Boatlift

In [3]:
RMSE = {}
bias = {}

configs = {'Row 1': ['loguearnhre', None, 'Random'],
#            'Row 2': ['PacksPerCapita', None, 'Random'],
            }

TROP_dict = {'Row 1': [[0.2, 0.2, 0.1513], [4.8,0.2,np.inf], [0, 0.2, 0.2014], [1.6, 0, 0.011], [0, 0, 0.092],
                      [0, 0.2, np.inf], [7, 0, np.inf], [0,0,np.inf]],
#              'Row 2': [[0, 0.4, 0.0212], [0,1,np.inf], [0, 0.2, 0.081], [0.2, 0, 0.1012], [0, 0, 0.1819],
#                       [0, 1, np.inf], [0, 0, np.inf], [0,0,np.inf]],
            }

for setting, config in list(configs.items()):
    
    print(setting)
    
    # load and process data
    outcome, treatment, option = config
    data = load_boatlift_data(outcome, treatment)

    # run simulations
    simulation_components = generate_simulation_components(data, sc_weights=True)
    np.random.seed(0)
    RMSE[setting], bias[setting] = parallel_experiments_variants(num_cores, num_experiments, simulation_components, TROP_dict[setting], option)

Row 1


In [4]:
RMSE

{'Row 1': array([0.11516436, 0.33777453, 0.1152545 , 0.12178052, 0.11928901,
        0.17231356, 0.33146225, 0.15223907])}

In [5]:
bias

{'Row 1': array([ 9.56985803e-04, -1.79816366e-03,  1.42181802e-03,  6.89932269e-04,
         6.94133799e-04,  1.27963193e-03,  6.30565878e-03, -6.82509354e-05])}