In [1]:
import sys
sys.path.insert(0, '../../')
import numpy as np
import pandas as pd
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler

import torch
from kernels.nn import ImplicitDenseNetKernel
from benchmarks.cmgp_modified import CMGP
from model.ick import ICK
from model.cmick import CMICK
from utils.train import CMICKEnsembleTrainer
from utils.helpers import *

# To make this notebook's output stable across runs
np.random.seed(2020)
torch.manual_seed(2020)
torch.cuda.manual_seed(2020)
torch.cuda.manual_seed_all(2020)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False



# 1. Load IHDP data

In [2]:
def load_and_preprocess_data(rep, batch_size=32):
    # Load data repetition function
    def load_data_rep(data_train, data_test, i):
        # Training data
        X_train = data_train['x'][:, :, i]
        T_train = data_train['t'][:, i:i + 1]
        Y_train = data_train['yf'][:, i:i + 1]
        mu0_train = data_train['mu0'][:, i:i + 1]  # mu0 is the mean of control outcome
        mu1_train = data_train['mu1'][:, i:i + 1]  # mu1 is the mean of treatment outcome

        # Test data
        X_test = data_test['x'][:, :, i]
        T_test = data_test['t'][:, i:i + 1]
        Y0_test = data_test['yf'][:, i:i + 1] * (1.0 - data_test['t'][:, i:i + 1])  # Y0 is the outcome for control
        Y0_test += data_test['ycf'][:, i:i + 1] * data_test['t'][:, i:i + 1]
        Y1_test = data_test['ycf'][:, i:i + 1] * (1.0 - data_test['t'][:, i:i + 1])  # Y1 is the outcome for treatment
        Y1_test += data_test['yf'][:, i:i + 1] * data_test['t'][:, i:i + 1]
        mu0_test = data_test['mu0'][:, i:i + 1]
        mu1_test = data_test['mu1'][:, i:i + 1]

        # Log-likelihood
        ll_test = np.mean(np.log(norm.cdf(Y0_test - mu0_test + 0.5) - norm.cdf(Y0_test - mu0_test - 0.5))) + \
        np.mean(np.log(norm.cdf(Y1_test - mu1_test + 0.5) - norm.cdf(Y1_test - mu1_test - 0.5)))

        # Return data
        return X_train, T_train, Y_train, mu0_train, mu1_train, X_test, \
        T_test, Y0_test, Y1_test, mu0_test, mu1_test, ll_test

    data_train = np.load('../../data/IHDP/ihdp_npci_1-100.train.npz', allow_pickle=True)
    data_test = np.load('../../data/IHDP/ihdp_npci_1-100.test.npz', allow_pickle=True)

    X_train, T_train, Y_train, mu0_train, mu1_train, X_test, T_test, \
    Y0_test, Y1_test, mu0_test, mu1_test, ll_test = load_data_rep(
        data_train=data_train,
        data_test=data_test,
        i=rep
    )
    mu_test = mu1_test - mu0_test
    data = {'X_train': X_train, 'T_train': T_train, 'Y_train': Y_train, 'X_test': X_test,
            'mu0_test': mu0_test, 'mu1_test': mu1_test ,'mu_test': mu_test}

    # Define dataset and dataloaders
    data_train = [X_train, T_train]
    data_test = [X_test, T_test]
    Y_test = np.concatenate((Y0_test, Y1_test), axis=1)[range(len(Y0_test)),T_test.astype(np.int32).reshape(-1)].reshape(-1,1)
    data_generators = create_generators_from_data(data_train, Y_train, data_test, Y_test, train_batch_size=batch_size)
    return data_generators, data

# 2. Define CMNN model

\begin{align*}
&f_1^1, f_2^1, f_3^1 \sim K_1 \\
&f_1^2, f_2^2, f_3^2 \sim K_2 \\
&Y_0(x) = \alpha_1^1 f_1^1(x) + \alpha_3^1 f_3^1(x) + 0 + \alpha_1^2 f_1^2(x) + \alpha_3^2 f_3^2(x) + 0 \\
&Y_1(x) = 0 + \alpha_2^1 f_2^1(x) + \alpha_3^1 f_3^1(x) + 0 + \alpha_2^2 f_2^2(x) + \alpha_3^2 f_3^2(x)
\end{align*}

In [3]:
def build_cmnn_ensemble(input_dim, load_weights=False):
    alpha11, alpha12, alpha13 = 1.0, 1.0, 1.0
    alpha21, alpha22, alpha23 = 1.0, 1.0, 1.0
    num_estimators = 10

    ensemble, ensemble_weights = [], {}
    for i in range(num_estimators):
        f11 = ICK(
            kernel_assignment=['ImplicitDenseNetKernel'],
            kernel_params={
                'ImplicitDenseNetKernel':{
                    'input_dim': input_dim,
                    'latent_feature_dim': 2048,
                    'num_blocks': 0, 
                    'num_layers_per_block': 1, 
                    'num_units': 2048, 
                    'dropout_ratio': 0.1, 
                    'activation': 'softplus'
                }
            }
        )
        f12 = ICK(
            kernel_assignment=['ImplicitDenseNetKernel'],
            kernel_params={
                'ImplicitDenseNetKernel':{
                    'input_dim': input_dim,
                    'latent_feature_dim': 2048,
                    'num_blocks': 0, 
                    'num_layers_per_block': 1, 
                    'num_units': 2048, 
                    'dropout_ratio': 0.1, 
                    'activation': 'softplus'
                }
            }
        )
        f13 = ICK(
            kernel_assignment=['ImplicitDenseNetKernel'],
            kernel_params={
                'ImplicitDenseNetKernel':{
                    'input_dim': input_dim,
                    'latent_feature_dim': 2048,
                    'num_blocks': 0, 
                    'num_layers_per_block': 1, 
                    'num_units': 2048, 
                    'dropout_ratio': 0.1, 
                    'activation': 'softplus'
                }
            }
        )
        f21 = ICK(
            kernel_assignment=['ImplicitDenseNetKernel'],
            kernel_params={
                'ImplicitDenseNetKernel':{
                    'input_dim': input_dim,
                    'latent_feature_dim': 2048,
                    'num_blocks': 0, 
                    'num_layers_per_block': 1, 
                    'num_units': 2048, 
                    'dropout_ratio': 0.1, 
                    'activation': 'softplus'
                }
            }
        )
        f22 = ICK(
            kernel_assignment=['ImplicitDenseNetKernel'],
            kernel_params={
                'ImplicitDenseNetKernel':{
                    'input_dim': input_dim,
                    'latent_feature_dim': 2048,
                    'num_blocks': 0, 
                    'num_layers_per_block': 1, 
                    'num_units': 2048, 
                    'dropout_ratio': 0.1, 
                    'activation': 'softplus'
                }
            }
        )
        f23 = ICK(
            kernel_assignment=['ImplicitDenseNetKernel'],
            kernel_params={
                'ImplicitDenseNetKernel':{
                    'input_dim': input_dim,
                    'latent_feature_dim': 2048,
                    'num_blocks': 0, 
                    'num_layers_per_block': 1, 
                    'num_units': 2048, 
                    'dropout_ratio': 0.1, 
                    'activation': 'softplus'
                }
            }
        )
        if load_weights:
            for f in ['f11', 'f12', 'f13', 'f21', 'f22', 'f23']:
                eval(f).kernels[0].load_state_dict(torch.load('./checkpoints/cmick_ihdp.pt')['model_'+str(i+1)][f])
        else:
            model_weights = {
                'f11': f11.kernels[0].state_dict(), 'f12': f12.kernels[0].state_dict(), 'f13': f13.kernels[0].state_dict(), 
                'f21': f21.kernels[0].state_dict(), 'f22': f22.kernels[0].state_dict(), 'f23': f23.kernels[0].state_dict()
            }
            ensemble_weights['model_'+str(i+1)] = model_weights
#         f11.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.05),b_std=np.sqrt(0.05))
#         f12.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.05),b_std=np.sqrt(0.05))
#         f13.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.05),b_std=np.sqrt(0.05))
#         f21.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.10),b_std=np.sqrt(0.10))
#         f22.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.10),b_std=np.sqrt(0.10))
#         f23.kernels[0].reset_parameters_normal(w_std=np.sqrt(0.10),b_std=np.sqrt(0.10))
        baselearner = CMICK(
            control_components=[f11,f21], treatment_components=[f12,f22], shared_components=[f13,f23],
            control_coeffs=[alpha11,alpha21], treatment_coeffs=[alpha12,alpha22], shared_coeffs=[alpha13,alpha23], 
            coeff_trainable=True
        )
        ensemble.append(baselearner)
    if not load_weights:
        if not os.path.exists('./checkpoints'):
            os.makedirs('./checkpoints')
        torch.save(ensemble_weights, './checkpoints/cmick_ihdp.pt')
        
    return ensemble

# 3. Training and evaluation of CMNN model

In [4]:
def fit_and_evaluate_cmnn(ensemble, data_generators, mu_test, lr, treatment_index=1):
    # The index of "T_train" in "data_train" is 1
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    optim = 'adam'
    optim_params = {
        'lr': lr, 
        # 'momentum': 0.99,
        'weight_decay': 1e-3
    }
    epochs, patience = 1000, 30
    trainer = CMICKEnsembleTrainer(
        model=ensemble,
        data_generators=data_generators,
        optim=optim,
        optim_params=optim_params, 
        model_save_dir=None,
        device=device,
        epochs=epochs,
        patience=patience, 
        treatment_index=treatment_index
    )
    trainer.train()
    
    mean_test_pred, std_test_pred, y_test_true = trainer.predict()
    mu_test_pred = mean_test_pred[:,1] - mean_test_pred[:,0]

    # PEHE
    pehe_test = np.sqrt(np.mean((mu_test_pred - mu_test) ** 2))
    print('PEHE (CMNN):             %.4f' % (pehe_test))
    
    return pehe_test

# 4. Benchmark 1: original CMGP

In [5]:
def fit_and_evaluate_original_cmgp(data):
    X_train, T_train, Y_train = data['X_train'], data['T_train'], data['Y_train']
    X_test, mu_test = data['X_test'], data['mu_test']
    cmgp_model = CMGP(X_train, T_train, Y_train)
    
    mu0_test_pred, mu1_test_pred = cmgp_model.predict(X_test, return_var=False)
    mu_test_pred = mu1_test_pred - mu0_test_pred
    pehe_test = np.sqrt(np.mean((mu_test_pred - mu_test) ** 2))
    print('PEHE (CMGP):             %.4f' % (pehe_test))
    return pehe_test

# Main function

In [None]:
def main():
    n_reps = 5
    lr = 1e-3
    batch_size = 32
    sqrt_pehe_cmnn_arr = np.zeros(n_reps)
    sqrt_pehe_cmgp_arr = np.zeros(n_reps)
    res = {'sqrt_pehe_mean': {}, 'sqrt_pehe_std': {}}
    for i in range(n_reps):
        data_generators, data = load_and_preprocess_data(rep=i, batch_size=batch_size)
        input_dim = data['X_train'].shape[1]
        ensemble = build_cmnn_ensemble(input_dim, load_weights=False)
        sqrt_pehe_cmnn = fit_and_evaluate_cmnn(
            ensemble, data_generators, data['mu_test'], lr=lr)
        data_generators, data = load_and_preprocess_data(rep=i)
        sqrt_pehe_cmgp = fit_and_evaluate_original_cmgp(data)
        sqrt_pehe_cmnn_arr[i] = sqrt_pehe_cmnn
        sqrt_pehe_cmgp_arr[i] = sqrt_pehe_cmgp
    print('PEHE (CMNN):             %.4f +/- %.4f' % (np.mean(sqrt_pehe_cmnn_arr), np.std(sqrt_pehe_cmnn_arr)))
    print('PEHE (CMGP):             %.4f +/- %.4f' % (np.mean(sqrt_pehe_cmgp_arr), np.std(sqrt_pehe_cmgp_arr)))
    res['sqrt_pehe_mean']['cmnn'] = np.mean(sqrt_pehe_cmnn_arr)
    res['sqrt_pehe_mean']['cmgp'] = np.mean(sqrt_pehe_cmgp_arr)
    res['sqrt_pehe_std']['cmnn'] = np.std(sqrt_pehe_cmnn_arr)
    res['sqrt_pehe_std']['cmgp'] = np.std(sqrt_pehe_cmgp_arr)
    try:
        os.makedirs('./results')
    except FileExistsError:
        print('Directory already exists.')
    with open('./results/ihdp_results.pkl', 'wb') as fp:
        pkl.dump(res, fp)

if __name__ == "__main__":
    main()