## Setup Only for Colab

In [None]:
# prompt: mount drive

from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/Colab\ Notebooks/hidden_mediators

In [None]:
%ls

In [None]:
from IPython.display import clear_output

In [None]:
import time
!pip install -r requirements.txt
time.sleep(2)
clear_output()

In [None]:
import time
# replace `develop` with `install` if you wont make library code changes
!python setup.py develop
time.sleep(2)
clear_output()
# Restart the session after running this

In [None]:
%cd /content/drive/MyDrive/Colab\ Notebooks

# Main Logic

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats
from joblib import Parallel, delayed
from proximalde.proximal import proximal_direct_effect, ProximalDE, residualizeW
from sklearn.linear_model import LinearRegression
from proximalde.crossfit import fit_predict
from proximalde.gen_synthetic_data import gen_data
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from xgboost import XGBRegressor, XGBClassifier
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
from sklearn.model_selection import train_test_split
from proximalde.utilities import GridSearchCVList
from sklearn.linear_model import Lasso, LogisticRegression

# Using custom ML models for regressing W
Must be of type (BaseEstimator, RegressorMixin)
## Example 1: Simple XGB wrapper 

In [None]:
class XGBRegressorWrapper(BaseEstimator, RegressorMixin):

    def __init__(self, *, max_depth=3, early_stopping_rounds=50, learning_rate=.1):
        self.max_depth = max_depth
        self.early_stopping_rounds = early_stopping_rounds
        self.learning_rate = learning_rate

    def fit(self, X, y):
        Xtrain, Xval, ytrain, yval = train_test_split(X, y, test_size=.2)
        self.model_ = XGBRegressor(max_depth=self.max_depth,
                                   early_stopping_rounds=self.early_stopping_rounds,
                                   learning_rate=self.learning_rate, random_state=123)
        self.model_.fit(Xtrain, ytrain, eval_set=[(Xval, yval)], verbose=False)
        return self

    def predict(self, X):
        return self.model_.predict(X)


class XGBClassifierWrapper(BaseEstimator, ClassifierMixin):

    def __init__(self, *, max_depth=3, early_stopping_rounds=50, learning_rate=.1):
        self.max_depth = max_depth
        self.early_stopping_rounds = early_stopping_rounds
        self.learning_rate = learning_rate

    def fit(self, X, y):
        Xtrain, Xval, ytrain, yval = train_test_split(X, y, test_size=.2)
        self.model_ = XGBClassifier(max_depth=self.max_depth,
                                   early_stopping_rounds=self.early_stopping_rounds,
                                   learning_rate=self.learning_rate, eval_metric='logloss', random_state=123)
        self.model_.fit(Xtrain, ytrain, eval_set=[(Xval, yval)], verbose=False)
        self.classes_ = self.model_.classes_
        return self

    def predict(self, X):
        return self.model_.predict(X)

    def predict_proba(self, X):
        return self.model_.predict_proba(X)

In [None]:
a = 1.0  # a*b is the indirect effect through mediator
b = 1.0
c = .5  # this is the direct effect we want to estimate
d = 0  # this can be zero; does not hurt
e = .5  # if the product of e*f is small, then we have a weak instrument
f = .5  # if the product of e*f is small, then we have a weak instrument
g = 0  # this can be zero; does not hurt
n = 50000 # number of samples 
pw = 5 # dimension of controls / confounders; should be > 0 for this notebook
pz = 5 # dimension of Z
px = 5 # dimension of X
pm = 1 # dimension of the mediator M; should not be more than max(pz,px)
sm = 1.0  # strength of mediator noise; needs to be non-zero for identifiability; only used when pm=1.

In [None]:
W, X, Z, D, Y = gen_data(a, b, c, d, e, f, g, pm, pz, px, pw, n, sm=sm, seed=42)
est = ProximalDE(model_regression=XGBRegressorWrapper(), model_classification=XGBClassifierWrapper(),
                 cv=3, semi=False, n_jobs=-1, random_state=3, verbose=3)
est.fit(W, D, Z, X, Y)

In [None]:
est.summary()

## Example 2: Adding HyperParam Tuning and Semi-Crossfitting

In [None]:
regression = GridSearchCV(XGBRegressorWrapper(), {'learning_rate': [.01, .1, 1]}, scoring='neg_root_mean_squared_error')
classification = GridSearchCV(XGBClassifierWrapper(), {'learning_rate': [.01, .1, 1]}, scoring='neg_log_loss')

In [None]:
est = ProximalDE(model_regression=regression, model_classification=classification,
                 cv=3, semi=True, n_jobs=-1, random_state=3, verbose=3)
est.fit(W, D, Z, X, Y)

In [None]:
est.summary()

## Example 3: Adding grid search among many types of models

In [None]:
regression = GridSearchCVList([XGBRegressorWrapper(), Lasso()],
                              [{'learning_rate': [.01, .1, 1]},
                               {'alpha': np.logspace(-4, 2, 20)}],
                              scoring='neg_root_mean_squared_error')
classification = GridSearchCVList([XGBClassifierWrapper(),
                                   LogisticRegression(penalty='l1', solver='liblinear',
                                                      tol=1e-6, intercept_scaling=100)],
                                  [{'learning_rate': [.01, .1, 1]},
                                   {'C': np.logspace(-4, 4, 10)}],
                                  scoring='neg_log_loss')

In [None]:
est = ProximalDE(model_regression=regression, model_classification=classification,
                 cv=3, semi=True, n_jobs=-1, random_state=3, verbose=3)
est.fit(W, D, Z, X, Y)

In [None]:
est.summary()