In [None]:
import sys
print(sys.executable) #check jupyter kernel
import os
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from data.simu_setting import prox_data
from src.proxITR import proxITR
import numpy as np
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler as Scaler
from sklearn.kernel_approximation import Nystroem

### Value Function

In [3]:
def value_fun(d,A,Y,X,U):
    A = A.to_numpy().reshape(-1)
    Y = Y.to_numpy().reshape(-1)
    U = U.to_numpy().reshape(-1)
    X = X.to_numpy()
    return np.mean((np.abs((np.sign(A-0.5)-d))<0.5)*Y*(1. + np.exp(np.sign(A-0.5)*(0.09375+X @ [0.1875, 0.1875] -0.25*U))))

## Examples for Different Simulation Settings

### L1, Assumption 10 holds

In [4]:
# Setting
linearity    = 'linear'
indepOutProx = 'WindepY'
Xonly        = 'XW'

In [5]:
# Generate Test Data
test = prox_data(500000, add_noise=False).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)
value_fun(test[["GOR"]].to_numpy(int).reshape(-1), test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])

4.821781763821496

In [6]:
# Generate Training Data
samp_size = 2000
train = prox_data(samp_size=samp_size, add_noise=True).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)

In [7]:
# Learn ITRs
proxy = proxITR(A=train[["A"]], X=train[["X0","X1","X2"]], Z=train[["Z"]], W=train[["W"]], Y=train[["Y"]], learning_rate=0.1, n_epoch=2000, opt='LBFGS', verbose=True)
d1_X  = proxy.fit_d1_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_X  = d1_X(test[["X0","X1","X2"]])
d1_XZ = proxy.fit_d1_XZ_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_XZ = d1_XZ(test[["X0","X1","X2","Z"]])
d2_XW = proxy.fit_d2_XW_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d2_XW = d2_XW(test[["X0","X1","X2","W"]])
d3_X  = proxy.fit_d3_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d3_X  = d3_X(test[["X0","X1","X2"]])
d_DR  = proxy.fit_d_DR_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)), learning_rate=0.01)
p_d_DR  = d_DR(test[["X0","X1","X2"]])

d1_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.6199927 4.620571  4.6166573 4.6156683 4.6156683]
rho_best: 0.01
d1_XZ
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.632531  4.629891  4.63208   4.6317472 4.6321497]
rho_best: 0.1
d2_XW
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.69487389 4.75974807 4.72378523 4.72953349 4.73446469]
rho_best: 0.01
d3_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.4139306  4.65816047 4.64694045 4.64581986 4.65575417]
rho_best: 0.01
d_DR
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.39773466 4.50440144 4.52716255 4.53894663 4.51059279]
rho_best: 0.0001


In [8]:
# Calculate Value
v_d1_X  = value_fun(p_d1_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d1_XZ = value_fun(p_d1_XZ,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d2_XW = value_fun(p_d2_XW,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d3_X  = value_fun(p_d3_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d_DR  = value_fun(p_d_DR, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
if proxy.cv_d1_XZ > proxy.cv_d2_XW:
    v_d4 = v_d1_XZ
else:
    v_d4 = v_d2_XW
pd.DataFrame.from_dict({'method':['d1_X','d1_XZ','d2_XW','d3_X','d_DR','d4'],
                        'value':[v_d1_X,v_d1_XZ,v_d2_XW,v_d3_X,v_d_DR,v_d4]})

Unnamed: 0,method,value
0,d1_X,4.650971
1,d1_XZ,4.776976
2,d2_XW,4.693326
3,d3_X,4.651836
4,d_DR,4.637801
5,d4,4.693326


### L1, Assumption 10 fails

In [9]:
# Setting
linearity    = 'linear'
indepOutProx = 'WdepY'
Xonly        = 'XW'

In [10]:
# Generate Test Data
test = prox_data(500000, add_noise=False).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)
value_fun(test[["GOR"]].to_numpy(int).reshape(-1), test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])

4.83827125339926

In [11]:
# Generate Training Data
samp_size = 2000
train = prox_data(samp_size=samp_size, add_noise=True).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)

In [12]:
# Learn ITRs
proxy = proxITR(A=train[["A"]], X=train[["X0","X1","X2"]], Z=train[["Z"]], W=train[["W"]], Y=train[["Y"]], learning_rate=0.1, n_epoch=2000, opt='LBFGS', verbose=True)
d1_X  = proxy.fit_d1_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_X  = d1_X(test[["X0","X1","X2"]])
d1_XZ = proxy.fit_d1_XZ_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_XZ = d1_XZ(test[["X0","X1","X2","Z"]])
d2_XW = proxy.fit_d2_XW_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d2_XW = d2_XW(test[["X0","X1","X2","W"]])
d3_X  = proxy.fit_d3_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d3_X  = d3_X(test[["X0","X1","X2"]])
d_DR  = proxy.fit_d_DR_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)), learning_rate=0.01)
p_d_DR  = d_DR(test[["X0","X1","X2"]])

d1_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.821906  4.8192954 4.818541  4.822725  4.8222136]
rho_best: 0.0001
d1_XZ
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.833283  4.8340816 4.838622  4.8381047 4.838045 ]
rho_best: 0.001
d2_XW
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.81301842 4.77651586 4.77425711 4.78162299 4.78430967]
rho_best: 0.1
d3_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.55759692 4.73673337 4.72354019 4.74214892 4.72640199]
rho_best: 0.0001
d_DR
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.73473551 4.80566572 4.79716629 4.80163369 4.80233798]
rho_best: 0.01


In [13]:
# Calculate Value
v_d1_X  = value_fun(p_d1_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d1_XZ = value_fun(p_d1_XZ,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d2_XW = value_fun(p_d2_XW,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d3_X  = value_fun(p_d3_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d_DR  = value_fun(p_d_DR, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
if proxy.cv_d1_XZ > proxy.cv_d2_XW:
    v_d4 = v_d1_XZ
else:
    v_d4 = v_d2_XW
pd.DataFrame.from_dict({'method':['d1_X','d1_XZ','d2_XW','d3_X','d_DR','d4'],
                        'value':[v_d1_X,v_d1_XZ,v_d2_XW,v_d3_X,v_d_DR,v_d4]})

Unnamed: 0,method,value
0,d1_X,4.677209
1,d1_XZ,4.771229
2,d2_XW,4.722843
3,d3_X,4.682077
4,d_DR,4.675412
5,d4,4.771229


### L2, Assumption 10 holds

In [14]:
# Setting
linearity    = 'linear'
indepOutProx = 'WindepY'
Xonly        = 'X'

In [15]:
# Generate Test Data
test = prox_data(500000, add_noise=False).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)
value_fun(test[["GOR"]].to_numpy(int).reshape(-1), test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])

6.184434626485812

In [16]:
# Generate Training Data
samp_size = 2000
train = prox_data(samp_size=samp_size, add_noise=True).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)

In [17]:
# Learn ITRs
proxy = proxITR(A=train[["A"]], X=train[["X0","X1","X2"]], Z=train[["Z"]], W=train[["W"]], Y=train[["Y"]], learning_rate=0.1, n_epoch=2000, opt='LBFGS', verbose=True)
d1_X  = proxy.fit_d1_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_X  = d1_X(test[["X0","X1","X2"]])
d1_XZ = proxy.fit_d1_XZ_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_XZ = d1_XZ(test[["X0","X1","X2","Z"]])
d2_XW = proxy.fit_d2_XW_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d2_XW = d2_XW(test[["X0","X1","X2","W"]])
d3_X  = proxy.fit_d3_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d3_X  = d3_X(test[["X0","X1","X2"]])
d_DR  = proxy.fit_d_DR_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)), learning_rate=0.01)
p_d_DR  = d_DR(test[["X0","X1","X2"]])

d1_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [6.1722665 6.160823  6.1612873 6.1623726 6.161882 ]
rho_best: 0.1
d1_XZ
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [6.181573  6.1812096 6.1794796 6.1776013 6.1776013]
rho_best: 0.1
d2_XW
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [5.87849351 6.2441227  6.19932502 6.19981075 6.20428991]
rho_best: 0.01
d3_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [5.87849351 6.31321941 6.30763356 6.31503143 6.32378834]
rho_best: 1e-05
d_DR
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [6.07456526 6.12133585 6.109575   6.11026246 6.11409357]
rho_best: 0.01


In [18]:
# Calculate Value
v_d1_X  = value_fun(p_d1_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d1_XZ = value_fun(p_d1_XZ,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d2_XW = value_fun(p_d2_XW,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d3_X  = value_fun(p_d3_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d_DR  = value_fun(p_d_DR, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
if proxy.cv_d1_XZ > proxy.cv_d2_XW:
    v_d4 = v_d1_XZ
else:
    v_d4 = v_d2_XW
pd.DataFrame.from_dict({'method':['d1_X','d1_XZ','d2_XW','d3_X','d_DR','d4'],
                        'value':[v_d1_X,v_d1_XZ,v_d2_XW,v_d3_X,v_d_DR,v_d4]})

Unnamed: 0,method,value
0,d1_X,6.162302
1,d1_XZ,6.211354
2,d2_XW,6.157277
3,d3_X,6.155118
4,d_DR,6.14716
5,d4,6.157277


### L2, Assumption 10 fails

In [19]:
# Setting
linearity    = 'linear'
indepOutProx = 'WdepY'
Xonly        = 'X'

In [20]:
# Generate Test Data
test = prox_data(500000, add_noise=False).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)
value_fun(test[["GOR"]].to_numpy(int).reshape(-1), test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])

6.1655366102378295

In [21]:
# Generate Training Data
samp_size = 2000
train = prox_data(samp_size=samp_size, add_noise=True).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)

In [22]:
# Learn ITRs
proxy = proxITR(A=train[["A"]], X=train[["X0","X1","X2"]], Z=train[["Z"]], W=train[["W"]], Y=train[["Y"]], learning_rate=0.1, n_epoch=2000, opt='LBFGS', verbose=True)
d1_X  = proxy.fit_d1_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_X  = d1_X(test[["X0","X1","X2"]])
d1_XZ = proxy.fit_d1_XZ_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_XZ = d1_XZ(test[["X0","X1","X2","Z"]])
d2_XW = proxy.fit_d2_XW_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d2_XW = d2_XW(test[["X0","X1","X2","W"]])
d3_X  = proxy.fit_d3_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d3_X  = d3_X(test[["X0","X1","X2"]])
d_DR  = proxy.fit_d_DR_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)), learning_rate=0.01)
p_d_DR  = d_DR(test[["X0","X1","X2"]])

d1_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [6.138482  6.1452875 6.146569  6.14742   6.147792 ]
rho_best: 1e-05
d1_XZ
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [6.1387854 6.144933  6.1451716 6.14682   6.146674 ]
rho_best: 0.0001
d2_XW
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [5.79736335 6.02528882 6.00128173 5.96028181 6.02061166]
rho_best: 0.01
d3_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [5.80035587 5.94738515 5.99370644 5.98606595 5.99587982]
rho_best: 1e-05
d_DR
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [6.10500359 6.10811508 6.10340724 6.10045115 6.09929773]
rho_best: 0.01


In [23]:
# Calculate Value
v_d1_X  = value_fun(p_d1_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d1_XZ = value_fun(p_d1_XZ,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d2_XW = value_fun(p_d2_XW,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d3_X  = value_fun(p_d3_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d_DR  = value_fun(p_d_DR, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
if proxy.cv_d1_XZ > proxy.cv_d2_XW:
    v_d4 = v_d1_XZ
else:
    v_d4 = v_d2_XW
pd.DataFrame.from_dict({'method':['d1_X','d1_XZ','d2_XW','d3_X','d_DR','d4'],
                        'value':[v_d1_X,v_d1_XZ,v_d2_XW,v_d3_X,v_d_DR,v_d4]})

Unnamed: 0,method,value
0,d1_X,6.149491
1,d1_XZ,6.136608
2,d2_XW,6.077733
3,d3_X,6.078348
4,d_DR,6.144255
5,d4,6.136608


### N1, Assumption 10 holds

In [24]:
# Setting
linearity    = 'nonlinear'
indepOutProx = 'WindepY'
Xonly        = 'XW'

In [25]:
# Generate Test Data
test = prox_data(500000, add_noise=False).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)
value_fun(test[["GOR"]].to_numpy(int).reshape(-1), test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])

4.869601683270521

In [26]:
# Generate Training Data
samp_size = 2000
train = prox_data(samp_size=samp_size, add_noise=True).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)

In [27]:
proxy = proxITR(A=train[["A"]], X=train[["X0","X1","X2"]], Z=train[["Z"]], W=train[["W"]], Y=train[["Y"]], learning_rate=0.1, n_epoch=2000, batch_size=200, opt='SGD', verbose=True)
d1_X  = proxy.fit_d1_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_X  = d1_X(test[["X0","X1","X2"]])
d1_XZ = proxy.fit_d1_XZ_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_XZ = d1_XZ(test[["X0","X1","X2","Z"]])
d2_XW = proxy.fit_d2_XW_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d2_XW = d2_XW(test[["X0","X1","X2","W"]])
d3_X  = proxy.fit_d3_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d3_X  = d3_X(test[["X0","X1","X2"]])
d_DR  = proxy.fit_d_DR_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d_DR  = d_DR(test[["X0","X1","X2"]])

d1_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.241563  4.225907  4.2017784 4.199397  4.2041225]
rho_best: 0.1
d1_XZ
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.3725405 4.375687  4.341916  4.3449464 4.3452387]
rho_best: 0.01
d2_XW
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.28426898 4.31679384 4.32546732 4.27950828 4.29533601]
rho_best: 0.001
d3_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.03926064 4.01211581 3.96293243 3.97098322 3.9958759 ]
rho_best: 0.1
d_DR
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.24081969 4.2445701  4.26305154 4.20272578 4.17363331]
rho_best: 0.001


In [28]:
# Calculate Value
v_d1_X  = value_fun(p_d1_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d1_XZ = value_fun(p_d1_XZ,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d2_XW = value_fun(p_d2_XW,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d3_X  = value_fun(p_d3_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d_DR  = value_fun(p_d_DR, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
if proxy.cv_d1_XZ > proxy.cv_d2_XW:
    v_d4 = v_d1_XZ
else:
    v_d4 = v_d2_XW
pd.DataFrame.from_dict({'method':['d1_X','d1_XZ','d2_XW','d3_X','d_DR','d4'],
                        'value':[v_d1_X,v_d1_XZ,v_d2_XW,v_d3_X,v_d_DR,v_d4]})

Unnamed: 0,method,value
0,d1_X,4.208487
1,d1_XZ,4.304459
2,d2_XW,4.229064
3,d3_X,4.152721
4,d_DR,4.193878
5,d4,4.304459


### N1, Assumption 10 fails

In [29]:
# Setting
linearity    = 'nonlinear'
indepOutProx = 'WdepY'
Xonly        = 'XW'

In [30]:
# Generate Test Data
test = prox_data(500000, add_noise=False).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)
value_fun(test[["GOR"]].to_numpy(int).reshape(-1), test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])

4.874850908443785

In [31]:
# Generate Training Data
samp_size = 2000
train = prox_data(samp_size=samp_size, add_noise=True).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)

In [32]:
proxy = proxITR(A=train[["A"]], X=train[["X0","X1","X2"]], Z=train[["Z"]], W=train[["W"]], Y=train[["Y"]], learning_rate=0.1, n_epoch=2000, batch_size=200, opt='SGD', verbose=True)
d1_X  = proxy.fit_d1_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_X  = d1_X(test[["X0","X1","X2"]])
d1_XZ = proxy.fit_d1_XZ_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_XZ = d1_XZ(test[["X0","X1","X2","Z"]])
d2_XW = proxy.fit_d2_XW_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d2_XW = d2_XW(test[["X0","X1","X2","W"]])
d3_X  = proxy.fit_d3_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d3_X  = d3_X(test[["X0","X1","X2"]])
d_DR  = proxy.fit_d_DR_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d_DR  = d_DR(test[["X0","X1","X2"]])

d1_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.180254  4.186386  4.188185  4.1538887 4.158159 ]
rho_best: 0.001
d1_XZ
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.382603  4.3883386 4.4043045 4.3653617 4.37253  ]
rho_best: 0.001
d2_XW
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.25116586 4.25343246 4.19198985 4.22786002 4.24295947]
rho_best: 0.01
d3_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.08864948 4.12087624 4.08027184 4.096299   4.08724806]
rho_best: 0.01
d_DR
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.19199998 4.16319099 4.16298307 4.13728774 4.14298121]
rho_best: 0.1


In [33]:
# Calculate Value
v_d1_X  = value_fun(p_d1_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d1_XZ = value_fun(p_d1_XZ,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d2_XW = value_fun(p_d2_XW,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d3_X  = value_fun(p_d3_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d_DR  = value_fun(p_d_DR, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
if proxy.cv_d1_XZ > proxy.cv_d2_XW:
    v_d4 = v_d1_XZ
else:
    v_d4 = v_d2_XW
pd.DataFrame.from_dict({'method':['d1_X','d1_XZ','d2_XW','d3_X','d_DR','d4'],
                        'value':[v_d1_X,v_d1_XZ,v_d2_XW,v_d3_X,v_d_DR,v_d4]})

Unnamed: 0,method,value
0,d1_X,4.195265
1,d1_XZ,4.291929
2,d2_XW,4.287518
3,d3_X,4.128514
4,d_DR,4.220117
5,d4,4.291929


### N2, Assumption 10 holds

In [34]:
# Setting
linearity    = 'nonlinear'
indepOutProx = 'WindepY'
Xonly        = 'X'

In [35]:
# Generate Test Data
test = prox_data(500000, add_noise=False).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)
value_fun(test[["GOR"]].to_numpy(int).reshape(-1), test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])

4.594705056662993

In [36]:
# Generate Training Data
samp_size = 2000
train = prox_data(samp_size=samp_size, add_noise=True).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)

In [37]:
proxy = proxITR(A=train[["A"]], X=train[["X0","X1","X2"]], Z=train[["Z"]], W=train[["W"]], Y=train[["Y"]], learning_rate=0.1, n_epoch=2000, batch_size=200, opt='SGD', verbose=True)
d1_X  = proxy.fit_d1_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_X  = d1_X(test[["X0","X1","X2"]])
d1_XZ = proxy.fit_d1_XZ_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_XZ = d1_XZ(test[["X0","X1","X2","Z"]])
d2_XW = proxy.fit_d2_XW_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d2_XW = d2_XW(test[["X0","X1","X2","W"]])
d3_X  = proxy.fit_d3_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d3_X  = d3_X(test[["X0","X1","X2"]])
d_DR  = proxy.fit_d_DR_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d_DR  = d_DR(test[["X0","X1","X2"]])

d1_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.6684628 4.6699    4.6683125 4.659698  4.6627827]
rho_best: 0.01
d1_XZ
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.684244  4.6841354 4.6926494 4.685613  4.682728 ]
rho_best: 0.001
d2_XW
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.40133253 4.39883021 4.4294202  4.53306437 4.48397412]
rho_best: 0.0001
d3_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.31879445 4.32138342 4.33485795 4.37505705 4.42874238]
rho_best: 1e-05
d_DR
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.41752935 4.43586425 4.39836469 4.37956475 4.35421008]
rho_best: 0.01


In [38]:
# Calculate Value
v_d1_X  = value_fun(p_d1_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d1_XZ = value_fun(p_d1_XZ,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d2_XW = value_fun(p_d2_XW,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d3_X  = value_fun(p_d3_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d_DR  = value_fun(p_d_DR, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
if proxy.cv_d1_XZ > proxy.cv_d2_XW:
    v_d4 = v_d1_XZ
else:
    v_d4 = v_d2_XW
pd.DataFrame.from_dict({'method':['d1_X','d1_XZ','d2_XW','d3_X','d_DR','d4'],
                        'value':[v_d1_X,v_d1_XZ,v_d2_XW,v_d3_X,v_d_DR,v_d4]})

Unnamed: 0,method,value
0,d1_X,4.565069
1,d1_XZ,4.441181
2,d2_XW,4.472548
3,d3_X,4.450704
4,d_DR,4.533154
5,d4,4.441181


### N2, Assumption 10 fails

In [39]:
# Setting
linearity    = 'nonlinear'
indepOutProx = 'WdepY'
Xonly        = 'X'

In [40]:
# Generate Test Data
test = prox_data(500000, add_noise=False).gen_Y(linearity=linearity, indepOutProx= indepOutProx, Xonly = Xonly)
value_fun(test[["GOR"]].to_numpy(int).reshape(-1), test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])

4.614626155350836

In [41]:
proxy = proxITR(A=train[["A"]], X=train[["X0","X1","X2"]], Z=train[["Z"]], W=train[["W"]], Y=train[["Y"]], learning_rate=0.1, n_epoch=2000, batch_size=200, opt='SGD', verbose=True)
d1_X  = proxy.fit_d1_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_X  = d1_X(test[["X0","X1","X2"]])
d1_XZ = proxy.fit_d1_XZ_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d1_XZ = d1_XZ(test[["X0","X1","X2","Z"]])
d2_XW = proxy.fit_d2_XW_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d2_XW = d2_XW(test[["X0","X1","X2","W"]])
d3_X  = proxy.fit_d3_X_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d3_X  = d3_X(test[["X0","X1","X2"]])
d_DR  = proxy.fit_d_DR_cv(gamma_f='auto', n_gamma_hs=10, linearity=linearity, n_components=int(2*np.sqrt(samp_size)), rhos = np.power(10.,-np.arange(1,6)))
p_d_DR  = d_DR(test[["X0","X1","X2"]])

d1_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.6738777 4.6665425 4.672474  4.6639752 4.658164 ]
rho_best: 0.1
d1_XZ
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.697095  4.685781  4.6937284 4.6809034 4.6842465]
rho_best: 0.1
d2_XW
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.41575689 4.50141368 4.44443955 4.49868504 4.50019947]
rho_best: 0.01
d3_X
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.31170609 4.33214946 4.36974403 4.34463265 4.40702808]
rho_best: 1e-05
d_DR
rho:  [1.e-01 1.e-02 1.e-03 1.e-04 1.e-05]
value:  [4.4479797  4.41102671 4.42644016 4.38523492 4.36829802]
rho_best: 0.1


In [42]:
# Calculate Value
v_d1_X  = value_fun(p_d1_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d1_XZ = value_fun(p_d1_XZ,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d2_XW = value_fun(p_d2_XW,test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d3_X  = value_fun(p_d3_X, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
v_d_DR  = value_fun(p_d_DR, test[["A"]],test[["Y"]],test[["X0","X1"]],test[["U"]])
if proxy.cv_d1_XZ > proxy.cv_d2_XW:
    v_d4 = v_d1_XZ
else:
    v_d4 = v_d2_XW
pd.DataFrame.from_dict({'method':['d1_X','d1_XZ','d2_XW','d3_X','d_DR','d4'],
                        'value':[v_d1_X,v_d1_XZ,v_d2_XW,v_d3_X,v_d_DR,v_d4]})

Unnamed: 0,method,value
0,d1_X,4.591785
1,d1_XZ,4.451697
2,d2_XW,4.54079
3,d3_X,4.469727
4,d_DR,4.550059
5,d4,4.451697
