In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import matthews_corrcoef

## Summary of Results:

$\hat Q$ is the outcome estimator, $\hat G$ is the propensity score estimator. Their respective columns tell us which estimators are use e.g. NN means a neural network was used.

'Reduction' is the relative percent error reduction when compared against the plug-in estimator using the outcome model alone. The results are averages over 60 simulations.


| Method | $\hat Q$ | $\hat G$ | Reduction $\%$ | Rel. Error $\%$ |
| --- | --- | --- | --- |--- |
| Naive | $NN$ | - |- |  4.059|
| TMLE | $NN$ | $NN$ | 1.450 | 2.608 |



## Problem Setup:

This example is taken from https://arxiv.org/abs/2107.00681 by Hines, Dukes, Diaz-Ordaz, and Vansteelandt (2021) and the empirical evaluation follows https://onlinelibrary.wiley.com/doi/full/10.1002/sim.7628 by Miguel Angel Luque-Fernandez, Michael Schomaker, Bernard Rachet, Mireille E. Schnitzer (2018).


The following experiments are very similar to the ones in ATE.ipynb, but this time we will fit the estimators using a neural network.

## 1. Define the DGP and some helper functions:

In [36]:
eps = 1e-4
def sigm(x):
    return 1/(1 + np.exp(-x))

def inv_sigm(x):
    return np.log((x + eps) / (1 - x + eps))

def generate_data(N, seed):
    np.random.seed(seed=seed)
    z1 = np.random.binomial(1, 0.5, (N,1))
    z2 = np.random.binomial(1, 0.65, (N,1))
    z3 = np.round(np.random.uniform(0, 4, (N,1)),3)
    z4 = np.round(np.random.uniform(0, 5, (N,1)),3)
    X = np.random.binomial(1, sigm(-0.4 + 0.2*z2 + 0.15*z3 + 0.2*z4 + 0.15*z2*z4), (N,1))
    Y1 = np.random.binomial(1, sigm(-1 + 1 - 0.1*z1 + 0.3*z2 + 0.25*z3 + 0.2*z4 + 0.15*z2*z4), (N,1))
    Y0 = np.random.binomial(1, sigm(-1 + 0 - 0.1*z1 + 0.3*z2 + 0.25*z3 + 0.2*z4 + 0.15*z2*z4), (N,1))
    Y = Y1 * X + Y0 * (1-X)
    Z = np.concatenate([z1,z2,z3,z4],1)
    return Z, X, Y, Y1, Y0

## 2. Define the Neural Network Objects/Classes

In [3]:


def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_normal_(m.weight)
        m.bias.data.fill_(0.01)     

class QNet(nn.Module):
    def __init__(self, input_size, num_layers, layers_size, output_size, output_type, dropout):
        super(QNet, self).__init__()      
        
        layers = []
        layers.extend([nn.Linear(input_size, layers_size), nn.LeakyReLU()])
        for i in range(num_layers-1):
            layers.extend([nn.Linear(layers_size, layers_size), nn.LeakyReLU(), nn.Dropout(p=dropout)])
        self.net = nn.Sequential(*layers)
        
        pos_arm = []
        pos_arm.extend([nn.Linear(layers_size, layers_size), nn.LeakyReLU()])
        pos_arm.extend([nn.Linear(layers_size, output_size)])     
        
        neg_arm = []
        neg_arm.extend([nn.Linear(layers_size, layers_size), nn.LeakyReLU()])
        neg_arm.extend([nn.Linear(layers_size, output_size)])    
        
        if output_type == 'categorical':
            pos_arm.append(nn.Sigmoid())
            neg_arm.append(nn.Sigmoid())
        elif output_type == 'continuous':
            pass
        self.pos_arm = nn.Sequential(*pos_arm)
        self.neg_arm = nn.Sequential(*neg_arm)
    
        self.net.apply(init_weights) 
        self.neg_arm.apply(init_weights) 
        self.pos_arm.apply(init_weights) 


    def forward(self, X, Z):
        
        out = self.net(torch.cat([X,Z],1))
        out0 = self.neg_arm(out)
        out1 = self.pos_arm(out)
        cond = X.bool()
        return torch.where(cond, out1, out0)

    
    
class GNet(nn.Module):
    def __init__(self, input_size, num_layers, layers_size, output_size, output_type, dropout):
        super(GNet, self).__init__()      
        
        layers = []
        layers.extend([nn.Linear(input_size, layers_size), nn.LeakyReLU()])
        for i in range(num_layers-1):
            layers.extend([nn.Linear(layers_size, layers_size), nn.LeakyReLU(), nn.Dropout(p=dropout)])
        layers.extend([nn.Linear(layers_size, output_size)])

        if output_type == 'categorical':
            layers.append(nn.Sigmoid())
        elif output_type == 'continuous':
            pass
        self.net = nn.Sequential(*layers)
        self.net.apply(init_weights) 
        
    def forward(self, Z):
        return self.net(Z)

## 3. Create a Neural Network training class

In [4]:
class Trainer(object):
    def __init__(self, qnet, gnet, iterations=None, batch_size=None, outcome_type='categorical', test_iter=None, lr=None):
        self.qnet = qnet
        self.gnet = gnet
        self.iterations = iterations
        self.batch_size = batch_size
        self.test_iter = test_iter
        self.outcome_type = outcome_type
        
        if lr is not None:
            self.q_optimizer = optim.Adam(qnet.parameters(), lr=lr)
            self.g_optimizer = optim.Adam(gnet.parameters(), lr=lr)
        self.bce_loss = nn.BCELoss(reduction='none')
        self.mse_loss = nn.MSELoss()
    
        
    def train(self, x, y, z):
        
        # create a small validation set
        indices = np.arange(len(x))
        np.random.shuffle(indices)
        val_inds = indices[:len(x)//8]
        train_inds = indices[len(x)//8:]
        x_val, y_val, z_val = x[val_inds], y[val_inds], z[val_inds]
        x_train, y_train, z_train = x[train_inds], y[train_inds], z[train_inds]
        
        indices = np.arange(len(x_train))
        
        train_losses_q = []
        train_losses_g = []
        test_losses_q = []
        test_losses_g = []
        
        for it in range(self.iterations):
            inds = np.random.choice(indices, self.batch_size)
            x_batch, y_batch, z_batch = x_train[inds], y_train[inds], z_train[inds]
            
            x_pred = self.gnet(z_batch)
            y_pred = self.qnet(x_batch, z_batch)
            
            if self.outcome_type == 'categorical':
                q_loss = self.bce_loss(y_pred, y_batch).mean()
            else:
                q_loss = self.mse_loss(y_pred, y_batch)
                       
            weight = torch.tensor([0.7, 0.3])
            weight_ = weight[x_batch.data.view(-1).long()].view_as(x_batch)
            g_loss = (self.bce_loss(x_pred, x_batch) * weight_).mean()
            
            q_loss.backward()
            g_loss.backward()
            
            self.q_optimizer.step()
            self.g_optimizer.step()
            self.q_optimizer.zero_grad()
            self.g_optimizer.zero_grad()
            
            if (it % self.test_iter == 0) or (it == (self.iterations-1)):
                self.qnet.eval()
                self.gnet.eval()
                x_pred = self.gnet(z_train[:800])
                y_pred = self.qnet(x_train[:800], z_train[:800])
                
                if self.outcome_type == 'categorical':
                    q_loss = self.bce_loss(y_pred, y_train[:800]).mean()
                else:
                    q_loss = self.mse_loss(y_pred, y_train[:800])
                    
                g_loss = self.bce_loss(x_pred, x_train[:800]).mean()
                train_losses_q.append(q_loss.item())
                train_losses_g.append(g_loss.item())
                
                q_loss_test, g_loss_test, _, _ = self.test(x_val, y_val, z_val)
                test_losses_q.append(q_loss_test.item())
                test_losses_g.append(g_loss_test.item())
#                 print('== Iteration {} =='.format(it))
#                 print('Test Loss Q:', q_loss_test.item(), '  Test Loss G:', g_loss_test.item())
                
                self.qnet.train()
                self.gnet.train()
        
        return train_losses_q, train_losses_g, test_losses_q, test_losses_g
    
    
    def test(self, x, y, z):
        self.qnet.eval()
        self.gnet.eval()
        
        x_pred = self.gnet(z)
        y_pred = self.qnet(x,z)
        
        if self.outcome_type == 'categorical':
            q_loss = self.bce_loss(y_pred, y).mean()
        else:
            q_loss = self.mse_loss(y_pred, y)
            
        g_loss = self.bce_loss(x_pred, x).mean()
        
        
        return q_loss, g_loss, x_pred, y_pred
    

## 4. Create a hyperparameter tuning class

In [5]:
class Tuner(object):
    def __init__(self, x, y, z, trials, best_params=None):
        self.best_params = best_params
        self.x = x
        self.y = y
        self.z = z
        self.trials = trials
        self.test_iter = 500
        self.best_params = best_params
        self.qnet = None
        self.gnet = None
        self.best_model_q = None
        self.best_model_g = None
        
    def tune(self):

        output_type_Q = 'categorical'
        output_size_Q = 1
        output_type_G = 'categorical'
        output_size_G = 1
        input_size_Q = z.shape[-1] + 1  # we will concatenate the treatment var inside the qnet class
        input_size_G = z.shape[-1]

        train_loss_q = []
        train_loss_g = []
        val_loss_q = []
        val_loss_g = []
        bs_ = []
        iters_ = []
        lr_ = []
        layers_ = []
        dropout_ = []
        layer_size_ = []
        best_loss = 1e10
        for trial in range(self.trials):
            # sample hyper params and store the history
            bs = np.random.randint(30,120) if self.best_params == None else self.best_params['batch_size']
            bs_.append(bs)
            iters = np.random.randint(5000,100000) if self.best_params == None else self.best_params['iters']
            iters_.append(iters)
            lr = np.random.uniform(0.0001, 0.01) if self.best_params == None else self.best_params['lr']
            lr_.append(lr)
            layers = np.random.randint(2, 6) if self.best_params == None else self.best_params['layers']
            layers_.append(layers)
            dropout = np.random.uniform(0.1,0.4) if self.best_params == None else self.best_params['dropout']
            dropout_.append(dropout)
            layer_size = np.random.randint(16, 128) if self.best_params == None else self.best_params['layer_size']
            layer_size_.append(layer_size)
            print('======== Trial {} of {} ========='.format(trial, self.trials-1))
            print('Batch size', bs, ' Iters', iters, ' Lr', lr, ' Layers', layers,
                 ' Dropout', dropout, ' Layer Size', layer_size)

            

            self.qnet = QNet(input_size=input_size_Q, num_layers=layers,
                      layers_size=layer_size, output_size=output_size_Q,
                     output_type=output_type_Q, dropout=dropout)
        
            self.gnet = GNet(input_size=input_size_G, num_layers=layers,
                      layers_size=layer_size, output_size=output_size_G,
                     output_type=output_type_G, dropout=dropout)


            trainer = Trainer(qnet=self.qnet, gnet=self.gnet, iterations=iters, outcome_type=output_type_Q,
                          batch_size=bs, test_iter=self.test_iter, lr=lr)
            train_loss_q_, train_loss_g_, val_loss_q_, val_loss_g_ = trainer.train(self.x,
                                                                                  self.y,
                                                                                  self.z)
            train_loss_q.append(train_loss_q_[-1])
            train_loss_g.append(train_loss_g_[-1])
            val_loss_q.append(val_loss_q_[-1])
            val_loss_g.append(val_loss_g_[-1])
            
            total_val_loss = val_loss_q_[-1] + val_loss_g_[-1]
            
            if total_val_loss < best_loss:
                print('old loss:', best_loss)
                print('new loss:', total_val_loss)
                print('best model updated')
                best_loss = total_val_loss
                self.best_model_q = self.qnet
                self.best_model_g = self.gnet

        tuning_dict = {'batch_size': bs_, 'layers':layers_, 'dropout':dropout_,
                      'layer_size':layer_size_,'lr':lr_, 'iters':iters_,
                      'train_loss_q':train_loss_q, 'train_loss_g':train_loss_g,
                      'val_loss_q':val_loss_q, 'val_loss_g':val_loss_g}
        
        return tuning_dict, self.best_model_q, self.best_model_g
        

## 5. Run Hyperparameter Search

Now we have everything we need, we can initialize the neural networks, run hyperparameter search to identify the best parameters.

In [6]:
# First establish ground truth treatment effect:
N = 5000000
Z, x, y, Y1, Y0 = generate_data(N, seed=0)
true_psi = (Y1-Y0).mean()


# Set some params
N = 10000
seed = 0
num_tuning_trials = 60

# data generation:
z, x, y, _, _ = generate_data(N, 0)
x = torch.tensor(x).type(torch.float32)
z = torch.tensor(z).type(torch.float32)
y = torch.tensor(y).type(torch.float32)
    
tuner = Tuner(x=x,y=y,z=z,trials=num_tuning_trials)
tuning_history, best_q, best_g = tuner.tune()

total_losses = np.asarray(tuning_history['val_loss_g']) + np.asarray(tuning_history['val_loss_q'])
best_index = np.argmin(total_losses)

best_params = {}
for key in tuning_history.keys():
    best_params[key] = tuning_history[key][best_index]

Batch size 78  Iters 43624  Lr 0.000652378214512967  Layers 4  Dropout 0.24005866222108652  Layer Size 62
old loss: 10000000000.0
new loss: 1.2859086394309998
best model updated
Batch size 45  Iters 48618  Lr 0.005221090611224163  Layers 3  Dropout 0.2993613094025411  Layer Size 100
old loss: 1.2859086394309998
new loss: 1.2085352540016174
best model updated
Batch size 97  Iters 39623  Lr 0.005642744726642037  Layers 5  Dropout 0.31929503500296186  Layer Size 81
old loss: 1.2085352540016174
new loss: 1.179295301437378
best model updated
Batch size 83  Iters 36428  Lr 0.004064598302245466  Layers 3  Dropout 0.38967239147412736  Layer Size 31
Batch size 49  Iters 53707  Lr 0.004296106901307403  Layers 4  Dropout 0.13701222446235628  Layer Size 55
Batch size 57  Iters 34124  Lr 0.0009893456171323955  Layers 5  Dropout 0.349807948126327  Layer Size 69
Batch size 61  Iters 91262  Lr 0.009624812957643857  Layers 5  Dropout 0.13738362131548867  Layer Size 94
Batch size 100  Iters 67242  Lr 0.

Batch size 84  Iters 82067  Lr 0.008625168192042103  Layers 4  Dropout 0.39576504862312833  Layer Size 113
Batch size 76  Iters 20404  Lr 0.003991116597253959  Layers 3  Dropout 0.28379441335774314  Layer Size 66
Batch size 70  Iters 77967  Lr 0.0016992870041747721  Layers 5  Dropout 0.3926752310771079  Layer Size 114


## 6. Run Simulation

Now we have the best hyperparameters, we will run the simulations accordingly

In [37]:
print(best_params)
N = 10000
seed = 0
num_runs = 60

output_type_Q = 'categorical'
output_size_Q = 1
output_type_G = 'categorical'
output_size_G = 1
input_size_Q = z.shape[-1] + 1  # we will concatenate the treatment var inside the qnet class
input_size_G = z.shape[-1]
layers = best_params['layers']
dropout = best_params['dropout']
layer_size = best_params['layer_size']
iters = best_params['iters']
lr = best_params['lr']
batch_size = best_params['batch_size']

estimates_naive = []
estimates_upd = []
for i in range(num_runs):
    print('=====================RUN {}==================='.format(i))
    seed += 1
    # data generation:
    z, x, y, _, _ = generate_data(N, seed=seed)
    x = torch.tensor(x).type(torch.float32)
    z = torch.tensor(z).type(torch.float32)
    y = torch.tensor(y).type(torch.float32)
    x_int1 = torch.ones_like(x)  # this is the 'intervention data'
    x_int0 = torch.zeros_like(x)    

    qnet = QNet(input_size=input_size_Q, num_layers=layers,
                          layers_size=layer_size, output_size=output_size_Q,
                         output_type=output_type_Q, dropout=dropout)

    gnet = GNet(input_size=input_size_G, num_layers=layers,
                          layers_size=layer_size, output_size=output_size_G,
                         output_type=output_type_G, dropout=dropout)


    trainer = Trainer(qnet=qnet, gnet=gnet, iterations=iters, outcome_type=output_type_Q,
                      batch_size=batch_size, test_iter=500, lr=lr)

    train_loss_q_, train_loss_g_, val_loss_q_, val_loss_g_ = trainer.train(x, y, z)

    _, _, x_pred, y_pred = trainer.test(x, y, z)
    x_pred, y_pred = x_pred.detach().numpy(), y_pred.detach().numpy()
    
    _, _, _, Q10 = trainer.test(x, y, z)
    _, _, G10, Q1 = trainer.test(x_int1, y, z)
    _, _, _, Q0 = trainer.test(x_int0, y, z)
    Q10 = Q10.detach().numpy()
    Q1 = Q1.detach().numpy()
    Q0 = Q0.detach().numpy()
    biased_psi = (Q1-Q0).mean()

    G10 = np.clip(G10.detach().numpy(), a_min=0.01, a_max=0.99)

    x_ = x.detach().numpy()
    y_ = y.detach().numpy()

    H1 = x_/(G10)
    H0 = (1-x_) / (1 - G10)

    eps0, eps1 = sm.GLM(y_, np.concatenate([H0, H1], 1), offset=inv_sigm(Q10[:,0]),
                        family=sm.families.Binomial()).fit().params

    Q0_star = sigm(inv_sigm(Q0) + eps0 * H0)
    Q1_star = sigm(inv_sigm(Q1) + eps1 * H1)

    upd_psi = (Q1_star - Q0_star).mean()

    estimates_naive.append(biased_psi)
    estimates_upd.append(upd_psi)

{'batch_size': 50, 'layers': 4, 'dropout': 0.1556149275710231, 'layer_size': 110, 'lr': 0.00835623860287358, 'iters': 77539, 'train_loss_q': 0.5121873617172241, 'train_loss_g': 0.675466001033783, 'val_loss_q': 0.5054511427879333, 'val_loss_g': 0.6706466674804688}


In [38]:
estimates_upd = np.asarray(estimates_upd)
estimates_naive = np.asarray(estimates_naive)

print('True psi: ', true_psi)
print('naive psi: ', estimates_naive.mean(), ' relative bias:',
      (estimates_naive.mean() - true_psi)/true_psi * 100, '%')
print('updated TMLE psi: ', estimates_upd.mean(), ' relative bias:',
      (estimates_upd.mean() - true_psi)/true_psi * 100, '%')
print('Reduction in bias:', np.abs(estimates_naive.mean() - true_psi)/true_psi * 100 - 
     np.abs(estimates_upd.mean() - true_psi)/true_psi * 100, '%')



True psi:  0.1956508
naive psi:  0.20950554  relative bias: 7.081362873422857 %
updated TMLE psi:  0.20527059  relative bias: 4.916815263714599 %
Reduction in bias: 2.164547609708258 %


In [39]:
# This takes the reduction in relative bias for each simulation first, then takes an average
# (Owing to the nonlinearity of the ||x|| function, this gives different results which are
# worth considering.)
print('naive psi var:', estimates_naive.var())
print('updated psi var:', estimates_upd.var())
errors_naive = (estimates_naive - true_psi)/true_psi *100
errors_updated = (estimates_upd - true_psi)/true_psi *100
diff_errors = np.abs(errors_naive) - np.abs(errors_updated)
print('Average of reductions:', diff_errors.mean(), '%')

naive psi var: 0.0018162876
updated psi var: 0.0008233825
Average of reductions: 6.0977077 %
