In [3]:
import pandas as pd
import numpy as np
from random import random
from CTL2.causal_tree_learn import CausalTree
from DGP.DGP import dgp2
from DGP.DGP import dgp4
from DGP.DGP import dgp8
from MC.mc import mc_ate2
from MC.mc import mc_ate4
from MC.mc import mc_ate8
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse
import matplotlib.pyplot as plt
import pickle as pkl
import datetime
from utility.utility import transposer as tp
from utility.utility import writer
from utility.utility import reader

## Main Notebook: see sections below to run simulations for 3 Data Generation Processes:

#### Parameters:

In [14]:
n = 500 #n_tr + n_est size
n_test = 1000 #n_test
reps = 2 
test_sizes = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
var_e = 0.01

<h2><center>Data Generation Process 1: </center></h2>
$$
Y_i = -1.5D +  3D \cdot \mathcal{I}_{\{x_1 \geq 0\}} + \sum_{k=2}^5 x_k + e_i
$$

<h2><center>Treatment effect: </center></h2>
$$\gamma(x) = -1.5 + 3 \cdot \mathcal{I}_{\{x_1 \geq 0\}}
$$

<h2><center> Mean effect: </center></h2>
$$ 
\eta(x) = \sum_{k=2}^5 x_k$
$$
<h2><center><img src="other/t1.png"></center></h2>

In [17]:
print("start time: ", str(datetime.datetime.now())[11:16])
df = pd.DataFrame()
k = 1
for size in test_sizes:
    results = mc_ate2(n, n_test, var_e, reps, size)
    df = pd.concat([df, pd.DataFrame(results).transpose()])
    print('iter' + str(k) + " end time: ", str(datetime.datetime.now())[11:16])
    k = k + 1
df.columns=['test_sizes', 'Mean1', 'Mean2', 'MSE1', 'MSE2', 'BIAS1', 'BIAS2', 'VAR1', 'VAR2', 'MSE_T_BIAS', 'MSE_T_VAR', 'MSE_TOTAL']
print("end time: ", str(datetime.datetime.now())[11:16])
df

start time:  12:13
end time:  12:13


Unnamed: 0,test_sizes,Mean1,Mean2,MSE1,MSE2,BIAS1,BIAS2,VAR1,VAR2,MSE_T_BIAS,MSE_T_VAR,MSE_TOTAL
0,0.2,0.903453,-1.001994,0.596547,-0.498006,0.004108,0.310658,0.359976,0.558668,0.758821,0.193995,0.952816
0,0.3,1.535075,-1.460184,-0.035075,-0.039816,0.019345,0.006176,0.020575,0.007762,0.325549,0.124793,0.450342
0,0.4,1.304105,-1.4093,0.195895,-0.0907,0.000281,0.004684,0.038656,0.012911,0.401717,0.095579,0.497296
0,0.5,1.693096,-1.311728,-0.193096,-0.188272,0.079575,0.054006,0.116861,0.089452,0.283518,0.090859,0.374376
0,0.6,1.221227,-1.437273,0.278773,-0.062727,0.066196,0.007147,0.143911,0.011082,0.437895,0.16685,0.604745
0,0.7,1.502629,-1.500497,-0.002629,0.000497,0.011155,0.001986,0.011162,0.001986,0.269181,0.050085,0.319266
0,0.8,1.359099,-1.257747,0.140901,-0.242253,0.00377,0.002097,0.023623,0.060783,0.425508,0.084936,0.510445


<h2><center>Data Generation Process 2: </center></h2>
$$
Y_i = -2D + 3D \cdot \mathcal{I}_{\{x_1 \geq 0\}} + D \cdot \mathcal{I}_{\{x_2 \geq 0\}} + D \cdot \mathcal{I}_{\{x_1 \geq 0\ \&\;x_2 \geq 0\}} + \sum_{k=3}^5 x_k + e_i
$$
<h2><center>Treatment effect: </center></h2>
$$
\gamma(x) = -2 + 3 \cdot \mathcal{I}_{\{x_1 \geq 0\}} + \mathcal{I}_{\{x_2 \geq 0\}} + \mathcal{I}_{\{x_1 \geq 0\ \&\;x_2 \geq 0\}}
$$
<h2><center> Mean effect: </center></h2>
$$
\eta(x) = \sum_{k=3}^5 x_k
$$
<h2><center><img src="other/t2.png"></center></h2>

In [12]:
print("start time: ", str(datetime.datetime.now())[11:16])
df = pd.DataFrame()
k = 1
for size in test_sizes:
    results = mc_ate4(n, n_test, var_e, reps, size)
    df = pd.concat([df, pd.DataFrame(results).transpose()])
    print('iter' + str(k) + " end time: ", str(datetime.datetime.now())[11:16])
    k = k + 1
df.columns=['test_sizes', 'Mean1', 'Mean2', 'Mean3', 'Mean4', 'MSE1', 'MSE2','MSE3', 'MSE4', 'BIAS1', 'BIAS2','BIAS3', 'BIAS4', 'VAR1', 'VAR2','VAR3', 'VAR4',  'MSE_T_BIAS', 'MSE_T_VAR', 'MSE_TOTAL']    
print("end time: ", str(datetime.datetime.now())[11:16])

start time:  11:48
iter1 end time:  11:48
iter2 end time:  11:48
iter3 end time:  11:48
iter4 end time:  11:48
iter5 end time:  11:48
iter6 end time:  11:48
iter7 end time:  11:48
end time:  11:48


<h2><center>Data Generation Process 3: </center></h2>
$$
Y_i = -5D + 6D \cdot \mathcal{I}_{\{x_1 \geq 0\}} + 2.5D \cdot \mathcal{I}_{\{x_2 \geq 0\}} + 1.5D \cdot \mathcal{I}_{\{x_3 \geq 0\}} + \sum_{k=4}^5 x_k + e_i
$$
<h2><center>Treatment effect: </center></h2>
$$
\gamma(x) = -5 + 6 \cdot \mathcal{I}_{\{x_1 \geq 0\}} + 2.5 \cdot \mathcal{I}_{\{x_2 \geq 0\}} + 1.5 \cdot \mathcal{I}_{\{x_3 \geq 0\}}
$$
<h2><center>Mean effect: </center></h2>
$$
\eta(x) = \sum_{k=4}^5 x_k
$$
<h2><center><img src="other/t3.png"></center></h2>

In [3]:
print("start time: ", str(datetime.datetime.now())[11:16])
df = pd.DataFrame()
k = 1
for size in test_sizes:
    results = mc_ate4(n, n_test, var_e, reps, size)
    df = pd.concat([df, pd.DataFrame(results).transpose()])
    #rint('iter' + str(k) + " end time: ", str(datetime.datetime.now())[11:16])
    k = k + 1
df.columns=['test_sizes', 'Mean1', 'Mean2', 'Mean3', 'Mean4','Mean5', 'Mean6', 'Mean7', 'Mean8', 
            'MSE1', 'MSE2','MSE3', 'MSE4','MSE5', 'MSE6','MSE7', 'MSE8',
            'BIAS1', 'BIAS2','BIAS3', 'BIAS4', 'BIAS5', 'BIAS6','BIAS7', 'BIAS8',
            'VAR1', 'VAR2','VAR3', 'VAR4',  'VAR5', 'VAR6','VAR7', 'VAR8', 
            'MSE_T_BIAS', 'MSE_T_VAR', 'MSE_TOTAL']   
print("end time: ", str(datetime.datetime.now())[11:16])

array([ 0.5       , -0.33112802, -0.33112802,  1.83112802, -1.16887198,
        0.45701865,  0.45701865,  3.81004848,  1.82328035,  2.30138048,
        0.45701865,  2.75839912])