In [1]:
import sys
sys.path.insert(0, '../../')
import numpy as np
import pandas as pd
from scipy.stats import norm

from kernels.nn import ImplicitDenseNetKernel
from model.ick import ICK
from model.ick_cmgp import ICK_CMGP
from utils.train import CMGPEnsembleTrainer
from utils.helpers import *

# To make this notebook's output stable across runs
np.random.seed(2020)
torch.manual_seed(2020)
torch.cuda.manual_seed(2020)
torch.cuda.manual_seed_all(2020)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False



# 1. Load IHDP data

In [2]:
# Load data repetition function
def load_data_rep(data_train, data_test, i):
    # Training data
    X_train = data_train['x'][:, :, i]
    T_train = data_train['t'][:, i:i + 1]
    Y_train = data_train['yf'][:, i:i + 1]
    mu0_train = data_train['mu0'][:, i:i + 1]  # mu0 is the mean of control outcome
    mu1_train = data_train['mu1'][:, i:i + 1]  # mu1 is the mean of treatment outcome
    
    # Test data
    X_test = data_test['x'][:, :, i]
    T_test = data_test['t'][:, i:i + 1]
    Y0_test = data_test['yf'][:, i:i + 1] * (1.0 - data_test['t'][:, i:i + 1])  # Y0 is the outcome for control
    Y0_test += data_test['ycf'][:, i:i + 1] * data_test['t'][:, i:i + 1]
    Y1_test = data_test['ycf'][:, i:i + 1] * (1.0 - data_test['t'][:, i:i + 1])  # Y1 is the outcome for treatment
    Y1_test += data_test['yf'][:, i:i + 1] * data_test['t'][:, i:i + 1]
    mu0_test = data_test['mu0'][:, i:i + 1]
    mu1_test = data_test['mu1'][:, i:i + 1]
    
    # Log-likelihood
    ll_test = np.mean(np.log(norm.cdf(Y0_test - mu0_test + 0.5) - norm.cdf(Y0_test - mu0_test - 0.5))) + \
    np.mean(np.log(norm.cdf(Y1_test - mu1_test + 0.5) - norm.cdf(Y1_test - mu1_test - 0.5)))
    
    # Return data
    return X_train, T_train, Y_train, mu0_train, mu1_train, X_test, \
    T_test, Y0_test, Y1_test, mu0_test, mu1_test, ll_test

data_train = np.load('../../data/IHDP/ihdp_npci_1-100.train.npz', allow_pickle=True)
data_test = np.load('../../data/IHDP/ihdp_npci_1-100.test.npz', allow_pickle=True)

X_train, T_train, Y_train, mu0_train, mu1_train, X_test, T_test, \
Y0_test, Y1_test, mu0_test, mu1_test, ll_test = load_data_rep(
    data_train=data_train,
    data_test=data_test,
    i=0)
mu_test = mu1_test - mu0_test

# Define dataset and dataloaders
data_train = [X_train, T_train]
data_test = [X_test, T_test]
Y_test = np.concatenate((Y0_test, Y1_test), axis=1)[range(len(Y0_test)),T_test.astype(np.int32).reshape(-1)].reshape(-1,1)
data_generators = create_generators_from_data(data_train, Y_train, data_test, Y_test)

# 2. Define ICK-CMGP model

\begin{align*}
&f_1^1, f_2^1, f_3^1 \sim K_1 \\
&f_1^2, f_2^2, f_3^2 \sim K_2 \\
&Y_0(x) = \alpha_1^1 f_1^1(x) + \alpha_3^1 f_3^1(x) + 0 + \alpha_1^2 f_1^2(x) + \alpha_3^2 f_3^2(x) + 0 \\
&Y_1(x) = 0 + \alpha_2^1 f_2^1(x) + \alpha_3^1 f_3^1(x) + 0 + \alpha_2^2 f_2^2(x) + \alpha_3^2 f_3^2(x)
\end{align*}

In [3]:
alpha11, alpha12, alpha13 = 1.0, 1.0, 1.0
alpha21, alpha22, alpha23 = 1.0, 1.0, 1.0
num_estimators = 10

ensemble = []
for _ in range(num_estimators):
    f11 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'softplus'
            }
        }
    )
    f12 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'softplus'
            }
        }
    )
    f13 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'softplus'
            }
        }
    )
    f21 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'softplus'
            }
        }
    )
    f22 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'softplus'
            }
        }
    )
    f23 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'softplus'
            }
        }
    )
#     f11.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.1),b_std=np.sqrt(0.1))
#     f12.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.1),b_std=np.sqrt(0.1))
#     f13.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.1),b_std=np.sqrt(0.1))
#     f21.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.2),b_std=np.sqrt(0.2))
#     f22.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.2),b_std=np.sqrt(0.2))
#     f23.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.2),b_std=np.sqrt(0.2))
    baselearner = ICK_CMGP(
        control_components=[f11,f21], treatment_components=[f12,f22], shared_components=[f13,f23],
        control_coeffs=[alpha11,alpha21], treatment_coeffs=[alpha12,alpha22], shared_coeffs=[alpha13,alpha23], 
        coeff_trainable=True
    )
    ensemble.append(baselearner)

# 3. Training and evaluation of ICK-CMGP model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
optim = 'sgd'
optim_params = {
    'lr': 1e-4, 
    'momentum': 0.99,
    'weight_decay': 1e-3
}
epochs, patience = 1000, 10
trainer = CMGPEnsembleTrainer(
    model=ensemble,
    data_generators=data_generators,
    optim=optim,
    optim_params=optim_params, 
    model_save_dir=None,
    device=device,
    epochs=epochs,
    patience=patience, 
    treatment_index=1   # The index of "T_train" in "data_train" is 1
)
trainer.train()

In [5]:
mean_test_pred, std_test_pred, y_test_true = trainer.predict()
mu_test_pred = mean_test_pred[:,1] - mean_test_pred[:,0]

# PEHE
pehe_test = np.sqrt(np.mean((mu_test_pred - mu_test) ** 2))
print('PEHE:             %.4f' % (pehe_test))

PEHE:             0.9260


# 4. Apply original CMGP framework on the same dataset

In [9]:
import GPy
from benchmarks.cmgp_modified import CMGP

K0 = GPy.kern.MLP(X_train.shape[1], weight_variance=0.1, bias_variance=0.1, ARD=False)
K1 = GPy.kern.MLP(X_train.shape[1], weight_variance=0.2, bias_variance=0.2, ARD=False)
cmgp_model = CMGP(X_train, T_train, Y_train, kernels=[K0,K1], initialize_params=False)
mu0_test_pred, mu1_test_pred = cmgp_model.predict(X_test, return_var=False)
mu_test_pred = mu1_test_pred - mu0_test_pred

# PEHE
pehe_test = np.sqrt(np.mean((mu_test_pred - mu_test) ** 2))
print('PEHE:             %.4f' % (pehe_test))

PEHE:             0.2566


In [10]:
cmgp_model = CMGP(X_train, T_train, Y_train)
mu0_test_pred, mu1_test_pred = cmgp_model.predict(X_test, return_var=False)
mu_test_pred = mu1_test_pred - mu0_test_pred

# PEHE
pehe_test = np.sqrt(np.mean((mu_test_pred - mu_test) ** 2))
print('PEHE:             %.4f' % (pehe_test))

PEHE:             0.2438
