In [1]:
import sys
sys.path.insert(0, '../../')
import numpy as np
import pandas as pd
from scipy.stats import norm

from kernels.nn import ImplicitDenseNetKernel
from model.ick import ICK
from model.ick_cmgp import ICK_CMGP
from utils.train import CMGPEnsembleTrainer
from utils.helpers import *

# To make this notebook's output stable across runs
np.random.seed(2020)
torch.manual_seed(2020)
torch.cuda.manual_seed(2020)
torch.cuda.manual_seed_all(2020)
torch.backends.cudnn.deterministic = True



# 1. Load IHDP data

In [2]:
# Load data repetition function
def load_data_rep(data_train, data_test, i):
    # Training data
    X_train = data_train['x'][:, :, i]
    T_train = data_train['t'][:, i:i + 1]
    Y_train = data_train['yf'][:, i:i + 1]
    mu0_train = data_train['mu0'][:, i:i + 1]  # mu0 is the mean of control outcome
    mu1_train = data_train['mu1'][:, i:i + 1]  # mu1 is the mean of treatment outcome
    
    # Test data
    X_test = data_test['x'][:, :, i]
    T_test = data_test['t'][:, i:i + 1]
    Y0_test = data_test['yf'][:, i:i + 1] * (1.0 - data_test['t'][:, i:i + 1])  # Y0 is the outcome for control
    Y0_test += data_test['ycf'][:, i:i + 1] * data_test['t'][:, i:i + 1]
    Y1_test = data_test['ycf'][:, i:i + 1] * (1.0 - data_test['t'][:, i:i + 1])  # Y1 is the outcome for treatment
    Y1_test += data_test['yf'][:, i:i + 1] * data_test['t'][:, i:i + 1]
    mu0_test = data_test['mu0'][:, i:i + 1]
    mu1_test = data_test['mu1'][:, i:i + 1]
    
    # Log-likelihood
    ll_test = np.mean(np.log(norm.cdf(Y0_test - mu0_test + 0.5) - norm.cdf(Y0_test - mu0_test - 0.5))) + \
    np.mean(np.log(norm.cdf(Y1_test - mu1_test + 0.5) - norm.cdf(Y1_test - mu1_test - 0.5)))
    
    # Return data
    return X_train, T_train, Y_train, mu0_train, mu1_train, X_test, \
    T_test, Y0_test, Y1_test, mu0_test, mu1_test, ll_test

data_train = np.load('../../data/ihdp_npci_1-100.train.npz', allow_pickle=True)
data_test = np.load('../../data/ihdp_npci_1-100.test.npz', allow_pickle=True)

X_train, T_train, Y_train, mu0_train, mu1_train, X_test, T_test, \
Y0_test, Y1_test, mu0_test, mu1_test, ll_test = load_data_rep(
    data_train=data_train,
    data_test=data_test,
    i=0)
mu_test = mu1_test - mu0_test

# Define dataset and dataloaders
data_train = [X_train, T_train]
data_test = [X_test, T_test]
Y_test = np.concatenate((Y0_test, Y1_test), axis=1)[range(len(Y0_test)),T_test.astype(np.int32).reshape(-1)].reshape(-1,1)
data_generators = create_generators_from_data(data_train, Y_train, data_test, Y_test)

# 2. Define ICK-CMGP model

$Y_0(x) = \alpha_1 f_1(x) + \alpha_2 f_2(x) + 0 \\$
$Y_1(x) = 0 + \alpha_2 f_2(x) + \alpha_3 f_3(x)$

In [3]:
# Y0(x) = alpha1 * f1(x) + alpha2 * f2(x) + 0 * f3(x)
# Y1(x) = 0 * f1(x) + alpha2 * f2(x) + alpha3 * f3(x)
alpha1, alpha2, alpha3 = 1.0, 1.0, 1.0
f1 = ICK(
    kernel_assignment=['ImplicitDenseNetKernel'],
    kernel_params={
        'ImplicitDenseNetKernel':{
            'input_dim': X_train.shape[1],
            'latent_feature_dim': 16,
            'num_blocks': 1, 
            'num_layers_per_block': 1, 
            'num_units': 64
        }
    }
)
f2 = ICK(
    kernel_assignment=['ImplicitDenseNetKernel'],
    kernel_params={
        'ImplicitDenseNetKernel':{
            'input_dim': X_train.shape[1],
            'latent_feature_dim': 16,
            'num_blocks': 1, 
            'num_layers_per_block': 1, 
            'num_units': 64
        }
    }
)
f3 = ICK(
    kernel_assignment=['ImplicitDenseNetKernel'],
    kernel_params={
        'ImplicitDenseNetKernel':{
            'input_dim': X_train.shape[1],
            'latent_feature_dim': 16,
            'num_blocks': 2, 
            'num_layers_per_block': 1, 
            'num_units': 64
        }
    }
)
num_estimators = 100
ensemble = [ICK_CMGP(
    control_components=[f1], treatment_components=[f2], shared_components=[f3],
    control_coeffs=[alpha1], treatment_coeffs=[alpha2], shared_coeffs=[alpha3]
) for _ in range(num_estimators)]

# 3. Training and evaluation of ICK-CMGP model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
optim = 'sgd'
optim_params = {
    'lr': 5e-6, 
    'momentum': 0.9,
    'weight_decay': 0
}
epochs, patience = 200, 10
trainer = CMGPEnsembleTrainer(
    model=ensemble,
    data_generators=data_generators,
    optim=optim,
    optim_params=optim_params, 
    model_save_dir=None,
    device=device,
    epochs=epochs,
    patience=patience, 
    treatment_index=1   # The index of "T_train" in "data_train" is 1
)
trainer.train()

In [5]:
mean_test_pred, std_test_pred, y_test_true = trainer.predict()
mu_test_pred = mean_test_pred[:,1] - mean_test_pred[:,0]

# PEHE
pehe_test = np.sqrt(np.mean((mu_test_pred - mu_test) ** 2))
print('PEHE:             %.4f' % (pehe_test))

PEHE:             0.9740
