In [1]:
import pandas as pd
import numpy as np
from random import random

from CTL2.causal_tree_learn import CausalTree
from DGP.DGP import dgp2
from DGP.DGP import dgp4
from DGP.DGP import dgp4int
from DGP.DGP import dgp8

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse

import matplotlib.pyplot as plt
import pickle as pkl
#from notify_run import Notify 
#notify = Notify()
import datetime

from utility.utility import transposer as tp
from utility.utility import writer
from utility.utility import reader

### Parameters:

### ATE2

In [48]:
def mc_ate2(n, tsize, var_e, reps, nomin_test):
    #true ATE
    ATE1 = np.ones(reps)*1.5
    ATE2 = np.ones(reps)*-1.5
    #predicted ATE
    ate1 = []
    ate2 = []
    #ITE dummy
    tauhat = np.zeros((tsize, 1))
    #true ITE

    for i in range(reps):
        x_train, x_test, y_train, y_test, treat_train, treat_test = dgp2(n, tsize, var_e)
        tau = np.where((x_test[:,0] >= 0),1.5, -1.5)
        ctl = CausalTree(honest=True, weight=0.0, split_size=0.0, max_depth = 3) #which type of tree to call
        ctl.fit(x_train, y_train, treat_train, nomin_test = nomin_test) #select est size when fitting
        ctl.prune()
        ctl_predict = ctl.predict(x_test)
        #predicted ATE
        ate1 = np.append(ate1, np.mean(ctl_predict[np.where(x_test[:,0] >= 0)]))
        ate2 = np.append(ate2, np.mean(ctl_predict[np.where(x_test[:,0] < 0)]))
        #predicted ITE
        tauhat = np.append(tauhat, ctl_predict.reshape(-1,1), axis = 1)

    mean_ate = np.array([np.mean(ate1), np.mean(ate2)])    
    mse_ate = np.array([mse(ATE1, ate1), mse(ATE2, ate2)])
    bias_ate = np.array([np.sum(np.subtract(ATE1,ate1))/reps, np.sum(np.subtract(ATE2,ate2))/reps])
    var_ate = np.array([np.var(ate1), np.var(ate2)])

    #TOTAL MSE's
    ind_var = np.var(tauhat[:, 1:], axis = 1)
    ind_mean = np.mean(tauhat, axis = 1)

    total_var = np.sum(ind_var)/tsize
    total_bias = np.sum(np.square(np.subtract(ind_mean, tau)))/tsize
    total_mse = total_var + total_bias

    results = np.concatenate([np.array([nomin_test]), mean_ate, bias_ate, var_ate, mse_ate, np.array([total_bias]), np.array([total_var]), np.array([total_mse])])
    return results

In [76]:
n = 1000
tsize = 500
reps = 500
test_sizes = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
res = []
var = [0.01, 1.0, 2.5]

print("start time: ", str(datetime.datetime.now())[11:16])

for v in var:
    df = pd.DataFrame()
    for size in test_sizes:
        results = mc_ate2(n, tsize, v, reps, size)
        df = pd.concat([df, pd.DataFrame(results).transpose()])
    df.columns=['test_sizes', 'ATE1', 'ATE2', 'BIAS1', 'BIAS2', 'VAR1', 'VAR2', 'MSE1', 'MSE2', 'MSE_T_BIAS', 'MSE_T_VAR', 'MSE_TOTAL']
    res.append([v])
    res.append([df])
    
print("end time: ", str(datetime.datetime.now())[11:16])

start time:  23:04
end time:  23:05


In [77]:
res

[[0.01],
 [   test_sizes      ATE1      ATE2     BIAS1     BIAS2      VAR1      VAR2  \
  0         0.2  1.647573 -1.478776 -0.147573 -0.021224  0.066542  0.245101   
  0         0.3  1.655121 -1.537957 -0.155121  0.037957  0.045598  0.229949   
  0         0.4  1.418657 -1.420378  0.081343 -0.079622  0.016348  0.172079   
  0         0.5  1.565922 -1.569382 -0.065922  0.069382  0.080787  0.019727   
  0         0.6  1.230856 -1.627577  0.269144  0.127577  0.069614  0.075625   
  0         0.7  1.272454 -1.306883  0.227546 -0.193117  0.190105  0.045871   
  0         0.8  1.294532 -1.134064  0.205468 -0.365936  0.073650  0.034548   
  
         MSE1      MSE2  MSE_T_BIAS  MSE_T_VAR  MSE_TOTAL  
  0  0.088320  0.245552    1.535282   2.313963   3.849245  
  0  0.069660  0.231389    1.659940   2.322508   3.982448  
  0  0.022965  0.178418    1.942561   1.829489   3.772050  
  0  0.085133  0.024540    2.091723   2.146626   4.238349  
  0  0.142052  0.091901    1.725758   1.831880   3.55763