In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import matthews_corrcoef

## Summary of Results:

$\hat Q$ is the outcome estimator, $\hat G$ is the propensity score estimator. Their respective columns tell us which estimators are use e.g. NN means a neural network was used.

'Reduction' is the relative percent error reduction when compared against the plug-in estimator using the outcome model alone. The results are averages over 60 simulations.


| Method | $\hat Q$ | $\hat G$ | Reduction $\%$ | Rel. Error $\%$ |
| --- | --- | --- | --- |--- |
| Naive | $NN$ | - |- |  4.059|
| TMLE | $NN$ | $NN$ | 1.450 | 2.608 |



## Problem Setup:

This example is taken from https://arxiv.org/abs/2107.00681 by Hines, Dukes, Diaz-Ordaz, and Vansteelandt (2021) and the empirical evaluation follows https://onlinelibrary.wiley.com/doi/full/10.1002/sim.7628 by Miguel Angel Luque-Fernandez, Michael Schomaker, Bernard Rachet, Mireille E. Schnitzer (2018).


The following experiments are very similar to the ones in ATE-NN.ipynb, but this time we will attempt to fit the IF during the training of the NN itself.

## 1. Define the DGP and some helper functions:

In [2]:

def sigm(x):
    return 1/(1 + np.exp(-x))

def generate_data(N, seed):
    np.random.seed(seed=seed)
    z1 = np.random.binomial(1, 0.5, (N,1))
    z2 = np.random.binomial(1, 0.65, (N,1))
    z3 = np.round(np.random.uniform(0, 4, (N,1)),3)
    z4 = np.round(np.random.uniform(0, 5, (N,1)),3)
    X = np.random.binomial(1, sigm(-0.4 + 0.2*z2 + 0.15*z3 + 0.2*z4 + 0.15*z2*z4), (N,1))
    Y1 = np.random.binomial(1, sigm(-1 + 1 - 0.1*z1 + 0.3*z2 + 0.25*z3 + 0.2*z4 + 0.15*z2*z4), (N,1))
    Y0 = np.random.binomial(1, sigm(-1 + 0 - 0.1*z1 + 0.3*z2 + 0.25*z3 + 0.2*z4 + 0.15*z2*z4), (N,1))
    Y = Y1 * X + Y0 * (1-X)
    Z = np.concatenate([z1,z2,z3,z4],1)
    return Z, X, Y, Y1, Y0

## 2. Define the Neural Network Objects/Classes

In [3]:


def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_normal_(m.weight)
        m.bias.data.fill_(0.01)     

class QNet(nn.Module):
    def __init__(self, input_size, num_layers, layers_size, output_size, output_type, dropout):
        super(QNet, self).__init__()      
        
        layers = []
        layers.extend([nn.Linear(input_size, layers_size), nn.LeakyReLU()])
        for i in range(num_layers-1):
            layers.extend([nn.Linear(layers_size, layers_size), nn.LeakyReLU(), nn.Dropout(p=dropout)])
        self.net = nn.Sequential(*layers)
        
        pos_arm = []
        pos_arm.extend([nn.Linear(layers_size, layers_size), nn.LeakyReLU()])
        pos_arm.extend([nn.Linear(layers_size, output_size)])     
        
        neg_arm = []
        neg_arm.extend([nn.Linear(layers_size, layers_size), nn.LeakyReLU()])
        neg_arm.extend([nn.Linear(layers_size, output_size)])    
        
        if output_type == 'categorical':
            pos_arm.append(nn.Sigmoid())
            neg_arm.append(nn.Sigmoid())
        elif output_type == 'continuous':
            pass
        self.pos_arm = nn.Sequential(*pos_arm)
        self.neg_arm = nn.Sequential(*neg_arm)
    
        self.net.apply(init_weights) 
        self.neg_arm.apply(init_weights) 
        self.pos_arm.apply(init_weights) 
        
        self.epsilon = nn.Parameter(torch.tensor([0.0]), requires_grad=True)


    def forward(self, X, Z):
        
        out = self.net(torch.cat([X,Z],1))
        out0 = self.neg_arm(out)
        out1 = self.pos_arm(out)
        cond = X.bool()
        return torch.where(cond, out1, out0)


class GNet(nn.Module):
    def __init__(self, input_size, num_layers, layers_size, output_size, output_type, dropout):
        super(GNet, self).__init__()      
        
        layers = []
        layers.extend([nn.Linear(input_size, layers_size), nn.LeakyReLU()])
        for i in range(num_layers-1):
            layers.extend([nn.Linear(layers_size, layers_size), nn.LeakyReLU(), nn.Dropout(p=dropout)])
        layers.extend([nn.Linear(layers_size, output_size)])

        if output_type == 'categorical':
            layers.append(nn.Sigmoid())
        elif output_type == 'continuous':
            pass
        self.net = nn.Sequential(*layers)
        self.net.apply(init_weights) 
        
    def forward(self, Z):
        return self.net(Z)

## 3. Create a Neural Network training class

In [4]:
def logit_(p):
    return torch.log(p / (1 - p))

class Trainer(object):
    def __init__(self, qnet, gnet, iterations=None, outcome_type='categorical', batch_size=None, test_iter=None, lr=None, treg_lr=None):
        self.qnet = qnet
        self.gnet = gnet
        self.iterations = iterations
        self.batch_size = batch_size
        self.test_iter = test_iter
        self.outcome_type = outcome_type
        
        if lr is not None:
            self.treg_optimizer = optim.Adam(qnet.parameters(), lr=treg_lr)
            self.q_optimizer = optim.Adam(qnet.parameters(), lr=lr)
            self.g_optimizer = optim.Adam(gnet.parameters(), lr=lr)
        self.bce_loss = nn.BCELoss(reduction='none')
        self.mse_loss = nn.MSELoss()
    
    def treg(self, x, pred_x, y, pred_y): 
        pred_x = torch.clip(pred_x, 0.05, 0.99)
        h = x / pred_x.detach() - (1 - x) / (1 - pred_x.detach())
        y_pert = torch.sigmoid(logit_(p=pred_y) + self.qnet.epsilon * h)
        t_reg = torch.sum(
                - y * torch.log(y_pert) - (1 - y) * torch.log(1 - y_pert))
        return t_reg
        
    def train(self, x, y, z):
        
        # create a small validation set
        indices = np.arange(len(x))
        np.random.shuffle(indices)
        val_inds = indices[:len(x)//8]
        train_inds = indices[len(x)//8:]
        x_val, y_val, z_val = x[val_inds], y[val_inds], z[val_inds]
        x_train, y_train, z_train = x[train_inds], y[train_inds], z[train_inds]
        
        indices = np.arange(len(x_train))
        
        train_losses_q = []
        train_losses_g = []
        test_losses_q = []
        test_losses_g = []
        
        for it in range(self.iterations):
            inds = np.random.choice(indices, self.batch_size)
            x_batch, y_batch, z_batch = x_train[inds], y_train[inds], z_train[inds]
            
            x_pred = self.gnet(z_batch)
            y_pred = self.qnet(x_batch, z_batch)
            
            if self.outcome_type == 'categorical':
                q_loss = self.bce_loss(y_pred, y_batch).mean()
            else:
                q_loss = self.mse_loss(y_pred, y_batch)
                       
            weight = torch.tensor([0.7, 0.3])
            weight_ = weight[x_batch.data.view(-1).long()].view_as(x_batch)
            g_loss = (self.bce_loss(x_pred, x_batch) * weight_).mean()
            
            treg_loss = self.treg(x_batch, x_pred, y_batch, y_pred)
            
            treg_loss.backward(retain_graph=True)
            q_loss.backward()
            g_loss.backward()
            
            self.treg_optimizer.step()
            self.q_optimizer.step()
            self.g_optimizer.step()
            self.treg_optimizer.zero_grad()
            self.q_optimizer.zero_grad()
            self.g_optimizer.zero_grad()
            
            if (it % self.test_iter == 0) or (it == (self.iterations-1)):
                self.qnet.eval()
                self.gnet.eval()
                x_pred = self.gnet(z_train[:800])
                y_pred = self.qnet(x_train[:800], z_train[:800])

                if self.outcome_type == 'categorical':
                    q_loss = self.bce_loss(y_pred, y_train[:800]).mean()
                else:
                    q_loss = self.mse_loss(y_pred, y_train[:800])
                    
                g_loss = self.bce_loss(x_pred, x_train[:800]).mean()
                train_losses_q.append(q_loss.item())
                train_losses_g.append(g_loss.item())
                
                q_loss_test, g_loss_test, _, _ = self.test(x_val, y_val, z_val)
                test_losses_q.append(q_loss_test.item())
                test_losses_g.append(g_loss_test.item())
#                 print('== Iteration {} =='.format(it))
#                 print('Test Loss Q:', q_loss_test.item(), '  Test Loss G:', g_loss_test.item())
                
                self.qnet.train()
                self.gnet.train()
        
        return train_losses_q, train_losses_g, test_losses_q, test_losses_g
    
    
    def test(self, x, y, z):
        self.qnet.eval()
        self.gnet.eval()
        
        x_pred = self.gnet(z)
        y_pred = self.qnet(x,z)

        if self.outcome_type == 'categorical':
            q_loss = self.bce_loss(y_pred, y).mean()
        else:
            q_loss = self.mse_loss(y_pred, y)
            
        g_loss = self.bce_loss(x_pred, x).mean()
        
        
        return q_loss, g_loss, x_pred, y_pred
    

## 4. Create a hyperparameter tuning class

In [5]:
class Tuner(object):
    def __init__(self, x, y, z, trials, best_params=None):
        self.best_params = best_params
        self.x = x
        self.y = y
        self.z = z
        self.trials = trials
        self.test_iter = 500
        self.best_params = best_params
        self.qnet = None
        self.gnet = None
        self.best_model_q = None
        self.best_model_g = None
        
    def tune(self):

        output_type_Q = 'categorical'
        output_size_Q = 1
        output_type_G = 'categorical'
        output_size_G = 1
        input_size_Q = z.shape[-1] + 1  # we will concatenate the treatment var inside the qnet class
        input_size_G = z.shape[-1]

        train_loss_q = []
        train_loss_g = []
        val_loss_q = []
        val_loss_g = []
        bs_ = []
        iters_ = []
        lr_ = []
        treg_lr_ = []
        layers_ = []
        dropout_ = []
        layer_size_ = []
        best_loss = 1e10
        j = 0
        while j < self.trials:
            
            try: 
                # sample hyper params and store the history
                bs = np.random.randint(30,120) if self.best_params == None else self.best_params['batch_size']
                bs_.append(bs)
                iters = np.random.randint(5000,100000) if self.best_params == None else self.best_params['iters']
                iters_.append(iters)
                lr = np.random.uniform(0.0001, 0.005) if self.best_params == None else self.best_params['lr']
                lr_.append(lr)
                treg_lr = np.random.uniform(0.0001, 0.005) if self.best_params == None else self.best_params['treg_lr']
                treg_lr_.append(treg_lr)
                layers = np.random.randint(2, 4) if self.best_params == None else self.best_params['layers']
                layers_.append(layers)
                dropout = np.random.uniform(0.1,0.4) if self.best_params == None else self.best_params['dropout']
                dropout_.append(dropout)
                layer_size = np.random.randint(16, 32) if self.best_params == None else self.best_params['layer_size']
                layer_size_.append(layer_size)
                print('======== Trial {} of {} ========='.format(j, self.trials-1))
                print('Batch size', bs, ' Iters', iters, ' Lr', lr, ' tlreg Lr', treg_lr, ' Layers', layers,
                     ' Dropout', dropout, ' Layer Size', layer_size)



                self.qnet = QNet(input_size=input_size_Q, num_layers=layers,
                          layers_size=layer_size, output_size=output_size_Q,
                         output_type=output_type_Q, dropout=dropout)

                self.gnet = GNet(input_size=input_size_G, num_layers=layers,
                          layers_size=layer_size, output_size=output_size_G,
                         output_type=output_type_G, dropout=dropout)


                trainer = Trainer(qnet=self.qnet, gnet=self.gnet, iterations=iters, outcome_type=output_type_Q,
                              batch_size=bs, test_iter=self.test_iter, lr=lr, treg_lr=treg_lr)
                train_loss_q_, train_loss_g_, val_loss_q_, val_loss_g_ = trainer.train(self.x,
                                                                                      self.y,
                                                                                      self.z)
                train_loss_q.append(train_loss_q_[-1])
                train_loss_g.append(train_loss_g_[-1])
                val_loss_q.append(val_loss_q_[-1])
                val_loss_g.append(val_loss_g_[-1])

                total_val_loss = val_loss_q_[-1] + val_loss_g_[-1]

                if total_val_loss < best_loss:
                    print('epsilon:', self.qnet.epsilon.item())
                    print('old loss:', best_loss)
                    print('new loss:', total_val_loss)
                    print('best model updated')
                    best_loss = total_val_loss
                    self.best_model_q = self.qnet
                    self.best_model_g = self.gnet
                j += 1
            except:
                print('Error at trial {}:', j)

        tuning_dict = {'batch_size': bs_, 'layers':layers_, 'dropout':dropout_,
                      'layer_size':layer_size_,'lr':lr_, 'iters':iters_, 'treg_lr':treg_lr_,
                      'train_loss_q':train_loss_q, 'train_loss_g':train_loss_g,
                      'val_loss_q':val_loss_q, 'val_loss_g':val_loss_g}
        
        return tuning_dict, self.best_model_q, self.best_model_g
        

## 5. Run Hyperparameter Search

Now we have everything we need, we can initialize the neural networks, run hyperparameter search to identify the best parameters.

In [6]:
# First establish ground truth treatment effect:
N = 5000000
Z, x, y, Y1, Y0 = generate_data(N, seed=0)
true_psi = (Y1-Y0).mean()


# Set some params
N = 10000
seed = 0
num_tuning_trials = 60

# data generation:
z, x, y, _, _ = generate_data(N, 0)
x = torch.tensor(x).type(torch.float32)
z = torch.tensor(z).type(torch.float32)
y = torch.tensor(y).type(torch.float32)
    
tuner = Tuner(x=x,y=y,z=z,trials=num_tuning_trials)
tuning_history, best_q, best_g = tuner.tune()

total_losses = np.asarray(tuning_history['val_loss_g']) + np.asarray(tuning_history['val_loss_q'])
best_index = np.argmin(total_losses)

best_params = {}
for key in tuning_history.keys():
    best_params[key] = tuning_history[key][best_index]

Batch size 78  Iters 43624  Lr 0.0003733993182942967  tlreg Lr 0.004495198013541288  Layers 2  Dropout 0.3225257649898322  Layer Size 19
epsilon: -0.05302128940820694
old loss: 10000000000.0
new loss: 1.2240602374076843
best model updated
Batch size 115  Iters 16712  Lr 0.002682077660233514  tlreg Lr 0.00420576216575615  Layers 3  Dropout 0.33822910047391863  Layer Size 19
Batch size 106  Iters 37992  Lr 0.0002518623386282587  tlreg Lr 0.003420400889094646  Layers 3  Dropout 0.12461431079113315  Layer Size 21
Batch size 66  Iters 60646  Lr 0.0009308256863955367  tlreg Lr 0.0005458583177761694  Layers 3  Dropout 0.16532860546429634  Layer Size 31
Batch size 66  Iters 74285  Lr 0.00356284820658788  tlreg Lr 0.001050807657046484  Layers 3  Dropout 0.1045584928737894  Layer Size 17
Batch size 41  Iters 9828  Lr 0.0048158086010098985  tlreg Lr 0.00013241558545377678  Layers 3  Dropout 0.1645543948149314  Layer Size 24
Batch size 114  Iters 92925  Lr 0.0042919854831173394  tlreg Lr 0.0037524

Batch size 82  Iters 44086  Lr 0.0004173980574009601  tlreg Lr 0.002225389107623747  Layers 3  Dropout 0.3555496966066629  Layer Size 21
Batch size 75  Iters 32394  Lr 0.0004673959770171352  tlreg Lr 0.004183748692291637  Layers 3  Dropout 0.197529351017468  Layer Size 28
Error at trial {}: 27
Batch size 83  Iters 9078  Lr 0.0010920617380885882  tlreg Lr 0.004397415580614684  Layers 2  Dropout 0.21131258286108978  Layer Size 19
Batch size 73  Iters 36453  Lr 0.0024159189565179573  tlreg Lr 0.004745579382886737  Layers 2  Dropout 0.13434110266876273  Layer Size 16
Batch size 59  Iters 96293  Lr 0.002242574385689902  tlreg Lr 0.00030159591941358985  Layers 2  Dropout 0.19071508603954257  Layer Size 16
Batch size 36  Iters 58343  Lr 0.0031387193092859283  tlreg Lr 0.002774892916405316  Layers 2  Dropout 0.12891424775825344  Layer Size 27
Error at trial {}: 30
Batch size 96  Iters 41982  Lr 0.0021040370751510508  tlreg Lr 0.0028776784497805247  Layers 3  Dropout 0.15760555048553643  Layer 

Batch size 53  Iters 67419  Lr 0.001817597188798792  tlreg Lr 0.003978974173343584  Layers 3  Dropout 0.23900659020376588  Layer Size 27
Error at trial {}: 51
Batch size 89  Iters 30167  Lr 0.003385654400760559  tlreg Lr 0.0016584953996469861  Layers 3  Dropout 0.26142818419802905  Layer Size 20
Batch size 66  Iters 60194  Lr 0.001607084061873295  tlreg Lr 0.0002746588151037081  Layers 2  Dropout 0.3704963698643874  Layer Size 17
Batch size 103  Iters 36289  Lr 0.0024182545270691434  tlreg Lr 0.0004696498500233234  Layers 2  Dropout 0.26031168874859156  Layer Size 19
Batch size 107  Iters 31501  Lr 0.004405111097911275  tlreg Lr 0.0031031193099003607  Layers 3  Dropout 0.37016295267981214  Layer Size 19
Batch size 68  Iters 17418  Lr 0.0010031635190881683  tlreg Lr 0.0013432215529990464  Layers 2  Dropout 0.19647375425279506  Layer Size 26
Batch size 59  Iters 73559  Lr 0.0028569563863496353  tlreg Lr 0.001526297516520008  Layers 3  Dropout 0.13233202452130452  Layer Size 31
Error at t

## 6. Run Simulation

Now we have the best hyperparameters, we will run the simulations accordingly

In [11]:
print(best_params)
N = 10000
seed = 0
num_runs = 60

output_type_Q = 'categorical'
output_size_Q = 1
output_type_G = 'categorical'
output_size_G = 1
input_size_Q = z.shape[-1] + 1  # we will concatenate the treatment var inside the qnet class
input_size_G = z.shape[-1]
layers = best_params['layers']
dropout = best_params['dropout']
layer_size = best_params['layer_size']
iters = best_params['iters']
lr = best_params['lr']
lr = best_params['lr']
treg_lr = best_params['treg_lr']
batch_size = best_params['batch_size']

estimates_upd = []
i = 0
while i < num_runs:
    try:
        print('=====================RUN {}==================='.format(i))
        seed += 1
        # data generation:
        z, x, y, _, _ = generate_data(N, seed=seed)
        x = torch.tensor(x).type(torch.float32)
        z = torch.tensor(z).type(torch.float32)
        y = torch.tensor(y).type(torch.float32)
        x_int1 = torch.ones_like(x)  # this is the 'intervention data'
        x_int0 = torch.zeros_like(x)    

        qnet = QNet(input_size=input_size_Q, num_layers=layers,
                              layers_size=layer_size, output_size=output_size_Q,
                             output_type=output_type_Q, dropout=dropout)

        gnet = GNet(input_size=input_size_G, num_layers=layers,
                              layers_size=layer_size, output_size=output_size_G,
                             output_type=output_type_G, dropout=dropout)


        trainer = Trainer(qnet=qnet, gnet=gnet, iterations=iters, outcome_type=output_type_Q,
                          batch_size=batch_size, test_iter=500, lr=lr, treg_lr=treg_lr)

        train_loss_q_, train_loss_g_, val_loss_q_, val_loss_g_ = trainer.train(x, y, z)

        _, _, x_pred, y_pred = trainer.test(x, y, z)
        x_pred, y_pred = x_pred.detach().numpy(), y_pred.detach().numpy()

        _, _, G10, Q1 = trainer.test(x_int1, y, z)
        _, _, _, Q0 = trainer.test(x_int0, y, z)

        Q1 = Q1.detach().numpy()
        Q0 = Q0.detach().numpy()

        upd_psi = (Q1 - Q0).mean()


        estimates_upd.append(upd_psi)
        i += 1
    except:
        print('Problem with run {}'.format(i))
    


{'batch_size': 72, 'layers': 3, 'dropout': 0.13808530460038354, 'layer_size': 16, 'lr': 0.0008474089244162126, 'iters': 74072, 'treg_lr': 0.0033217797238777216, 'train_loss_q': 0.5656583309173584, 'train_loss_g': 0.665006160736084, 'val_loss_q': 0.5169773697853088, 'val_loss_g': 0.6679068207740784}
Problem with run 0
Problem with run 6
Problem with run 9
Problem with run 11
Problem with run 12
Problem with run 14
Problem with run 14
Problem with run 18
Problem with run 21
Problem with run 21
Problem with run 21
Problem with run 22
Problem with run 26
Problem with run 28
Problem with run 29
Problem with run 31
Problem with run 33
Problem with run 34
Problem with run 35
Problem with run 40
Problem with run 42
Problem with run 42
Problem with run 42
Problem with run 45
Problem with run 46
Problem with run 48
Problem with run 49
Problem with run 49
Problem with run 49
Problem with run 52


In [12]:
estimates_upd = np.asarray(estimates_upd)


print('True psi: ', true_psi)
print('updated TMLE psi: ', estimates_upd.mean(), ' relative bias:',
      (estimates_upd.mean() - true_psi)/true_psi * 100, '%')
print('updated psi var:', estimates_upd.var())



True psi:  0.1956508
updated TMLE psi:  0.24102733  relative bias: 23.192609175004343 %
updated psi var: 0.0007042262
