In [3]:
import pandas as pd
import numpy as np
from random import random
from CTL2.causal_tree_learn import CausalTree
from DGP.dgp4 import dgp4
from MC.mc2 import MC_MSE_2
from MC.mc2_maxd import MC_MSE_2max
from utility.utility import transposer as tp
from utility.utility import writer
from utility.utility import reader
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse
import matplotlib.pyplot as plt
import pickle as pkl
#from notify_run import Notify 
#notify = Notify()
import datetime

In [13]:
def MC_ate2(n_train, n_test, var_e, reps, nomin_test):    
    #true ATE
    ATE1 = np.ones(reps)*1.5
    ATE2 = np.ones(reps)*-1.5
    #predicted ate
    ate1 = []
    ate2 = []
    #TOTAL MSE, Bias, Var
    MSE = []
    bias = []
    var = []
    tauhat = np.zeros((n_test, 1))
    x_test, y_test, treat_test = dgp4(n_test,var_e)
    tau = np.where((x_test[:,0] >= 0),1.5, -1.5)
    for i in range(reps):
        x_train, y_train, treat_train = dgp4(n_train,var_e)  
        #true individual treatment effect
        ctl_predict = MC_MSE_2max(x_train, x_test, y_train, y_test, treat_train, treat_test, nomin_test)
        #predicted ATE
        ate1 = np.append(ate1, np.mean(ctl_predict[np.where(x_test[:,0] >= 0)]))
        ate2 = np.append(ate2, np.mean(ctl_predict[np.where(x_test[:,0] < 0)]))
        #predicted ITE
        tauhat = np.append(tauhat, ctl_predict.reshape(-1,1), axis = 1)

    #ATE MSE's
    mean_ate = np.array([np.mean(ate1), np.mean(ate2)])    
    mse_ate = np.array([mse(ATE1, ate1), mse(ATE2, ate2)])
    bias_ate = np.array([np.sum(np.subtract(ATE1,ate1))/reps, np.sum(np.subtract(ATE2,ate2))/reps])
    var_ate = np.array([np.var(ate1), np.var(ate2)])
    
    #TOTAL MSE's
    ind_var = np.var(tauhat[:, 1:], axis = 1)
    ind_mean = np.mean(tauhat, axis = 1)
    
    total_var = np.sum(ind_var)/n_test
    total_bias = np.sum(np.square(np.subtract(ind_mean, tau)))/n_test
    
    return mean_ate, mse_ate, bias_ate, var_ate, total_bias, total_var, total_var+total_bias

In [14]:
print("start time: ", str(datetime.datetime.now())[11:16])
test_sizes = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]

mean_ate_ = []
mse_ate_ = []
bias_ate_ = []
var_ate_ = []
MSE_ = []
bias_ = []
var_ = []

var_e = 0.01
n_train = 500
n_test = 1000
reps = 100

for size in test_sizes:
    mean_ate, mse_ate, bias_ate, var_ate, bias, var, MSE = MC_ate2(n_train, n_test, var_e, reps, size)
    mean_ate_ = np.append(mean_ate_,mean_ate)
    mse_ate_ = np.append(mse_ate_,mse_ate)
    bias_ate_ = np.append(bias_ate_, bias_ate)
    var_ate_ = np.append(var_ate_, var_ate)
    MSE_ = np.append(MSE_, MSE)
    bias_ = np.append(bias_, bias)
    var_ = np.append(var_, var)
    
#notify.send('Done running the simulations')
print("end time: ", str(datetime.datetime.now())[11:16])

start time:  12:04
end time:  12:09


In [15]:
var_

array([0.4726828 , 0.31493713, 0.30249119, 0.3067664 , 0.26631748,
       0.30797231, 0.33882941])

In [12]:
var_

array([2.52559618, 2.43423637, 2.24331104, 2.27172494, 2.22914882,
       2.10109462, 1.96547467])

In [11]:
mean_ate_ = mean_ate_.reshape(len(test_sizes),2).transpose()
mse_ate_ = mse_ate_.reshape(len(test_sizes),2).transpose()
bias_ate_ = bias_ate_.reshape(len(test_sizes),2).transpose()
var_ate_ = var_ate_.reshape(len(test_sizes),2).transpose()

In [13]:
df = pd.DataFrame([test_sizes, mean_ate_[0], mean_ate_[1], mse_ate_[0], mse_ate_[1], bias_ate_[0], bias_ate_[1],  var_ate_[0], var_ate_[1], MSE_, bias_, var_]).transpose().set_index(0)
df.columns=['Mean1', 'Mean2', 'MSE1', 'MSE2', 'BIAS1', 'BIAS2', 'VAR1', 'VAR2','MSE_TOTAL', 'MSE_T_BIAS', 'MSE_T_VAR']

In [14]:
df

Unnamed: 0_level_0,Mean1,Mean2,MSE1,MSE2,BIAS1,BIAS2,VAR1,VAR2,MSE_TOTAL,MSE_T_BIAS,MSE_T_VAR
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0.2,1.422224,-1.413748,0.25799,0.300417,0.077776,-0.086252,0.251941,0.292978,0.547649,0.03411,0.513539
0.3,1.393653,-1.418372,0.165283,0.174286,0.106347,-0.081628,0.153974,0.167623,0.376346,0.031813,0.344533
0.4,1.409997,-1.405045,0.121275,0.156033,0.090003,-0.094955,0.113175,0.147016,0.348066,0.038454,0.309612
0.5,1.370651,-1.388524,0.122559,0.133086,0.129349,-0.111476,0.105828,0.120659,0.326989,0.043275,0.283714
0.6,1.344313,-1.334922,0.135448,0.135821,0.155687,-0.165078,0.111209,0.10857,0.382264,0.074626,0.307638
0.7,1.31054,-1.31946,0.131799,0.145576,0.18946,-0.18054,0.095904,0.112982,0.41931,0.092108,0.327202
0.8,1.199728,-1.181456,0.231265,0.28252,0.300272,-0.318544,0.141102,0.181049,0.545813,0.147093,0.39872


In [9]:
mean_ate_, mse_ate_, bias_ate_, var_ate_ = tp(test_sizes, 2)

array([[ 1.43122283,  1.25630012,  1.35907062,  1.41398531,  1.28420573,
         1.31286666,  1.24466099],
       [-1.13615603, -1.42202702, -1.41922277, -1.38199356, -1.36327013,
        -1.36280911, -1.00897824]])

In [None]:
mean_ate_ = mean_ate_.reshape(len(test_sizes),4).transpose()
mse_ate_ = mse_ate_.reshape(len(test_sizes),4).transpose()
bias_ate_ = bias_ate_.reshape(len(test_sizes),4).transpose()
var_ate_ = var_ate_.reshape(len(test_sizes),4).transpose()

In [22]:
mean_ate_ = mean_ate_.reshape(len(test_sizes),4).transpose()
mse_ate_ = mse_ate_.reshape(len(test_sizes),4).transpose()
bias_ate_ = bias_ate_.reshape(len(test_sizes),4).transpose()
var_ate_ = var_ate_.reshape(len(test_sizes),4).transpose()
df = pd.DataFrame([test_sizes, mean_ate_[0], mean_ate_[1],mean_ate_[2], mean_ate_[3], mse_ate_[0], mse_ate_[1],mse_ate_[2], mse_ate_[3], bias_ate_[0], bias_ate_[1], bias_ate_[2], bias_ate_[3], var_ate_[0], var_ate_[1],var_ate_[2], var_ate_[3], MSE_, bias_, var_]).transpose().set_index(0)
df.columns=['Mean1', 'Mean2', 'Mean3', 'Mean4', 'MSE1', 'MSE2','MSE3', 'MSE4', 'BIAS1', 'BIAS2','BIAS3', 'BIAS4', 'VAR1', 'VAR2','VAR3', 'VAR4', 'MSE_TOTAL', 'MSE_T_BIAS', 'MSE_T_VAR']

In [23]:
df

Unnamed: 0_level_0,Mean1,Mean2,MSE1,MSE2,BIAS1,BIAS2,VAR1,VAR2,MSE_TOTAL,MSE_T_BIAS,MSE_T_VAR
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0.2,1.691971,-1.481534,0.101903,0.03832,-0.191971,-0.018466,0.06505,0.037979,0.239861,-0.107464,0.222237
0.3,1.41015,-1.452264,0.024322,0.023429,0.08985,-0.047736,0.016249,0.021151,0.287408,0.022775,0.281377
0.4,1.169408,-1.195357,0.143118,0.1998,0.330592,-0.304643,0.033827,0.106993,0.337511,0.016832,0.290455
0.5,1.507453,-1.388777,0.082896,0.15945,-0.007453,-0.111223,0.08284,0.14708,0.268964,-0.055953,0.181218
0.6,1.239287,-1.173621,0.186332,0.245199,0.260713,-0.326379,0.118361,0.138676,0.412817,-0.02903,0.389061
0.7,1.385854,-1.350041,0.034143,0.123779,0.114146,-0.149959,0.021114,0.101292,0.381972,-0.020549,0.361521
0.8,0.839287,-1.011267,0.739157,0.724798,0.660713,-0.488733,0.302615,0.485938,0.963052,0.072313,0.949001
0.9,0.875679,-0.782582,0.689882,0.831784,0.624321,-0.717418,0.300106,0.317095,1.070842,-0.053193,1.042217


In [31]:
results = [['var = 0.1, reps = 500, n = 1000, with int.'],[mean_ate_], [mse_ate_], [bias_ate_], [var_ate_], [MSE_], [bias_], [var_]]


In [12]:
df = pd.DataFrame([test_sizes, MSE1, MSE2, BIAS1, BIAS2, VAR1, VAR2, MSE_TOTAL, MSEB, MSEV]).transpose().set_index(0)
df.columns=['MSE1', 'MSE2', 'BIAS1', 'BIAS2', 'VAR1', 'VAR2', 'MSE_TOTAL', 'MSE_T_BIAS', 'MSE_T_VAR']

NameError: name 'MSE1' is not defined

In [2]:
import pickle as pkl
with open('te5_r1.pkl', 'rb') as f:
    df1 = pkl.load(f)

In [17]:
writer('ate2_ntrain500_vare001_reps500', df)