In [1]:
import os, sys
sys.path.insert(0, '../../')
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

from kernels.nn import ImplicitDenseNetKernel
from model.ick import ICK
from model.ick_cmgp import ICK_CMGP
from utils.train import CMGPEnsembleTrainer
from utils.losses import *
from utils.helpers import *

# To make this notebook's output stable across runs
np.random.seed(2020)
torch.manual_seed(2020)
torch.cuda.manual_seed(2020)
torch.cuda.manual_seed_all(2020)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False



# 1. Load and preprocess data

In [2]:
N_train = 200   # The original CMGP framework does not support large datasets so we limit the dataset size
N_test = 800
train_dir = '../../data/ACIC_challenge/high_dimensional_datasets/'
train_filenames = [x for x in os.listdir(train_dir) if x.split('.')[-1] == 'csv']
train_df = pd.concat([pd.read_csv(train_dir+x) for x in train_filenames], ignore_index=True)
test_df = pd.read_csv('../../data/ACIC_challenge/TestDatasets_highD/highDim_testdataset1.csv')
test_eval_df = pd.read_csv('../../data/ACIC_challenge/TestDatasets_highD/highDim_testdataset1_cf.csv')
for c in train_df.columns:
    if c != 'Y' and c != 'A':
        scaler = StandardScaler()
        train_df[c] = scaler.fit_transform(train_df[c].to_numpy().reshape(-1,1)).reshape(-1)
for c in test_df.columns:
    if c != 'Y' and c != 'A':
        scaler = StandardScaler()
        test_df[c] = scaler.fit_transform(test_df[c].to_numpy().reshape(-1,1)).reshape(-1)

# Training data
X_train = train_df.to_numpy()[:N_train, 2:]
T_train = train_df.to_numpy()[:N_train, 1][:,None]
Y_train = train_df.to_numpy()[:N_train, 0][:,None]

# Test data
X_test = test_df.to_numpy()[:N_test, 2:]
T_test = test_df.to_numpy()[:N_test, 1][:,None]
Y_test = test_df.to_numpy()[:N_test, 0][:,None]
mu0_test = test_eval_df.to_numpy()[:N_test, -1][:,None]
mu1_test = test_eval_df.to_numpy()[:N_test, -2][:,None]
mu_test = mu1_test - mu0_test

# Initialize dataloaders
data_train = [X_train, T_train]
data_test = [X_test, T_test]
data_generators = create_generators_from_data(data_train, Y_train, data_test, Y_test, 
                                              train_batch_size=256, test_batch_size=1000)

# 2. Define ICK-CMGP model

In [3]:
alpha11, alpha12, alpha13 = 0.1, 0.1, 1.0
alpha21, alpha22, alpha23 = 0.1, 0.1, 1.0
num_estimators = 10

ensemble = []
for _ in range(num_estimators):
    f11 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'relu'
            }
        }
    )
    f12 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'relu'
            }
        }
    )
    f13 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'relu'
            }
        }
    )
    f21 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'relu'
            }
        }
    )
    f22 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'relu'
            }
        }
    )
    f23 = ICK(
        kernel_assignment=['ImplicitDenseNetKernel'],
        kernel_params={
            'ImplicitDenseNetKernel':{
                'input_dim': X_train.shape[1],
                'latent_feature_dim': 512,
                'num_blocks': 1, 
                'num_layers_per_block': 1, 
                'num_units': 512, 
                'activation': 'relu'
            }
        }
    )
    baselearner = ICK_CMGP(
        control_components=[f11,f21], treatment_components=[f12,f22], shared_components=[f13,f23],
        control_coeffs=[alpha11,alpha21], treatment_coeffs=[alpha12,alpha22], shared_coeffs=[alpha13,alpha23], 
        coeff_trainable=True, output_binary=True
    )
    ensemble.append(baselearner)

# 3. Training and evaluation of ICK-CMGP model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
optim = 'sgd'
optim_params = {
    'lr': 5e-3, 
    'momentum': 0.99,
    'weight_decay': 0.00
}
epochs, patience = 1000, 10
trainer = CMGPEnsembleTrainer(
    model=ensemble,
    data_generators=data_generators,
    optim=optim,
    optim_params=optim_params, 
    model_save_dir=None,
    device=device,
    epochs=epochs,
    patience=patience, 
    treatment_index=1   # The index of "T_train" in "data_train" is 1
)
trainer.train()

In [5]:
mean_test_pred, std_test_pred, y_test_true = trainer.predict()
mu_test_pred = mean_test_pred[:,1] - mean_test_pred[:,0]

# PEHE
pehe_test = np.sqrt(np.mean((mu_test_pred - mu_test) ** 2))
print('PEHE:             %.4f' % (pehe_test))

PEHE:             0.1543


# 4. Apply original CMGP framework on the same dataset

In [3]:
from benchmarks.cmgp_modified import CMGP
cmgp_model = CMGP(X_train, T_train, Y_train)
mu0_test_pred, mu1_test_pred = cmgp_model.predict(X_test, return_var=False)
mu_test_pred = mu1_test_pred - mu0_test_pred

# PEHE
pehe_test = np.sqrt(np.mean((mu_test_pred - mu_test) ** 2))
print('PEHE:             %.4f' % (pehe_test))



PEHE:             0.1551
