In [1]:
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('./test_score.ipynb'))))

In [2]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

from ylearn.estimator_model import RLoss
from ylearn.estimator_model import meta_learner, double_ml, doubly_robust, causal_tree
from ylearn.exp_dataset.exp_data import single_binary_treatment

## Single binary treatment

In [3]:
train1, val1, treatment_effect1 = single_binary_treatment()
def exp_te(x): return np.exp(2*x[0])
n = 1000
n_x = 4
X_test1 = np.random.uniform(0, 1, size=(n, n_x))
X_test1[:, 0] = np.linspace(0, 1, n)
data_test_dict = {
    'c_0': X_test1[:, 0],
    'c_1': X_test1[:, 1],
    'c_2': X_test1[:, 2],
    'c_3': X_test1[:, 3],
}
data_test1 = pd.DataFrame(data_test_dict)
true_te = np.array([exp_te(x_i) for x_i in X_test1])

In [4]:
adjustment = train1.columns[:-7]
covariate = train1.columns[-7:-3]
# t_effect1 = train1['t_effect']
treatment = 'treatment'
outcome = 'outcome'
train1.head()

Unnamed: 0,w_0,w_1,w_2,w_3,w_4,w_5,w_6,w_7,w_8,w_9,...,w_27,w_28,w_29,c_0,c_1,c_2,c_3,treatment,outcome,TE
226,0.84069,-0.897341,-1.691907,-0.228597,0.219336,0.358384,0.358747,-0.620275,-1.281643,1.092314,...,-1.313111,-0.30247,-2.097424,0.542969,0.053986,0.802841,0.599849,1,3.519415,2.962217
159,2.444159,-0.007502,-0.034569,0.072856,-0.156477,-0.024344,-0.059101,-0.243882,0.944915,1.292537,...,-0.674241,0.964401,1.060411,0.747179,0.536196,0.818296,0.179472,0,-0.908731,4.45647
579,1.707901,-0.744127,-0.541328,-1.260657,-0.031214,1.717055,0.087732,-0.061014,-1.358938,-0.630703,...,-1.568256,0.916513,-1.188164,0.292859,0.465193,0.285123,0.998249,1,2.668287,1.796279
631,-0.876409,1.180617,1.782721,0.635121,-0.381472,-1.067087,-1.303075,1.717317,-1.43616,0.058888,...,-0.939212,-0.097768,0.286155,0.059515,0.685887,0.186069,0.102843,1,0.38758,1.126405
578,0.928512,0.973593,1.896839,-1.617485,0.744122,0.089856,0.714048,0.052563,0.512447,0.939993,...,-0.534567,-1.139242,-0.210185,0.528878,0.12988,0.990413,0.514765,1,4.464414,2.879903


In [5]:
rloss = RLoss(
    x_model=RandomForestClassifier(),
    y_model=RandomForestRegressor(),
    cf_fold=1,
    is_discrete_treatment=True
)
rloss.fit(
    data=val1,
    outcome=outcome,
    treatment=treatment,
    adjustment=adjustment,
    covariate=covariate,
)

06-23 11:20:01 I ylearn.e.double_ml.py 684 - _fit_1st_stage: fitting x_model RandomForestClassifier
06-23 11:20:01 I ylearn.e.double_ml.py 690 - _fit_1st_stage: fitting y_model RandomForestRegressor


RLoss

In [6]:
dml = double_ml.DoubleML(
    x_model=RandomForestClassifier(),
    y_model=RandomForestRegressor(),
    cf_fold=1,
    is_discrete_treatment=True
)
slearner = meta_learner.SLearner(
    model=RandomForestRegressor(),
)
tlearner = meta_learner.TLearner(
    model=RandomForestRegressor()
)
xlearner = meta_learner.XLearner(
    model=RandomForestRegressor()
)
dr = doubly_robust.DoublyRobust(
    x_model=RandomForestClassifier(),
    y_model=RandomForestRegressor(),
    yx_model=RandomForestRegressor(),
)
ct = causal_tree.CausalTree()
models = [dml, slearner, tlearner, xlearner, ct, dr]

In [7]:
for model in models:
    model.fit(
    data=train1,
    treatment=treatment,
    outcome=outcome,
    adjustment=adjustment,
    covariate=covariate
)


06-23 11:20:01 I ylearn.e.double_ml.py 684 - _fit_1st_stage: fitting x_model RandomForestClassifier
06-23 11:20:02 I ylearn.e.double_ml.py 690 - _fit_1st_stage: fitting y_model RandomForestRegressor
06-23 11:20:03 I ylearn.e.double_ml.py 716 - _fit_2nd_stage: fitting yx_model LinearRegression
06-23 11:20:06 I ylearn.e.causal_tree.py 561 - Start building the causal tree with criterion HonestCMSE
06-23 11:20:06 I ylearn.e.causal_tree.py 577 - Building the causal tree with splitter BestSplitter
06-23 11:20:06 I ylearn.e.causal_tree.py 620 - Building the causal tree with builder DepthFirstTreeBuilder


In [8]:
for model in models:
    print(f'The score of {model.__repr__()} is {rloss.score(model)}')

06-23 11:20:07 I ylearn.e.effect_score.py 374 - Calculating the score: DoubleML(x_model=RandomForestClassifier(), y_model=RandomForestRegressor(), yx_model=LinearRegression(), is_discrete_treatment=True) finished estimating.
06-23 11:20:07 I ylearn.e.effect_score.py 378 - using combined treat technique for discrete treatment.
The score of DoubleML(x_model=RandomForestClassifier(), y_model=RandomForestRegressor(), yx_model=LinearRegression(), is_discrete_treatment=True) is [0.19140699]
06-23 11:20:07 I ylearn.e.effect_score.py 374 - Calculating the score: SLearner(model=RandomForestRegressor(), args=None, kwargs=None) finished estimating.
06-23 11:20:07 I ylearn.e.effect_score.py 378 - using combined treat technique for discrete treatment.
The score of SLearner(model=RandomForestRegressor(), args=None, kwargs=None) is [0.20676972]
06-23 11:20:07 I ylearn.e.effect_score.py 374 - Calculating the score: TLearner(model=None, kwargs=None) finished estimating.
06-23 11:20:07 I ylearn.e.effect