In [1]:
import sys
sys.path.insert(0, '../../')
import argparse
import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import StandardScaler

from model.ick import ICK
from utils.helpers import create_generators_from_data
from kernels.kernel_fn import matern_type1_kernel_nys, periodic_kernel_nys
from utils.train import EnsembleTrainer

# To make outputs stable across runs
np.random.seed(2020)
torch.manual_seed(2020)
torch.cuda.manual_seed(2020)
torch.cuda.manual_seed_all(2020)
torch.backends.cudnn.deterministic = True

In [2]:
def preprocess_data(batch_size, random_state):
    df = pd.read_csv('../../data/UCI_ml_repo/3DRoad.txt', delimiter=',', header=None)
    N = len(df)
    df_train = df.sample(n=int(N*4/9), replace=False, random_state=random_state)
    df = df.drop(df_train.index, axis=0)
    df_val = df.sample(n=int(N*2/9), replace=False, random_state=random_state)
    df_test = df.drop(df_val.index, axis=0)
    X1_train, X2_train, y_train = np.array(df_train[df_train.columns[1]]), np.array(df_train[df_train.columns[2]]), np.array(df_train[df_train.columns[3]])
    X1_val, X2_val, y_val = np.array(df_val[df_val.columns[1]]), np.array(df_val[df_val.columns[2]]), np.array(df_val[df_val.columns[3]])
    X1_test, X2_test, y_test = np.array(df_test[df_test.columns[1]]), np.array(df_test[df_test.columns[2]]), np.array(df_test[df_test.columns[3]])
    
    scaler = StandardScaler()
    X1_train = scaler.fit_transform(X1_train.reshape(-1,1)).squeeze()
    X1_val = scaler.transform(X1_val.reshape(-1,1)).squeeze()
    X1_test = scaler.transform(X1_test.reshape(-1,1)).squeeze()

    scaler = StandardScaler()
    X2_train = scaler.fit_transform(X2_train.reshape(-1,1)).squeeze()
    X2_val = scaler.transform(X2_val.reshape(-1,1)).squeeze()
    X2_test = scaler.transform(X2_test.reshape(-1,1)).squeeze()
    
    y_train = scaler.fit_transform(y_train.reshape(-1,1)).squeeze()
    y_val = scaler.transform(y_val.reshape(-1,1)).squeeze()
    y_test = scaler.transform(y_test.reshape(-1,1)).squeeze()
    
    data_generators = create_generators_from_data(
        [X1_train, X2_train], y_train, [X1_test, X2_test], y_test, [X1_val, X2_val], y_val, 
        train_batch_size=batch_size, val_batch_size=batch_size, test_batch_size=batch_size
    )
    return data_generators

In [3]:
def train_ick_ensemble(data_generators, input_dim, depth, width, latent_feature_dim, lr, weight_decay, 
                       epochs, patience, verbose):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    kernel_assignment = ['ImplicitDenseNetKernel', 'ImplicitNystromKernel']
    kernel_params = { 
        'ImplicitDenseNetKernel': {
            'input_dim': input_dim, 
            'latent_feature_dim': latent_feature_dim, 
            'num_blocks': depth,
            'num_layers_per_block': 1, 
            'num_units': width,
        }, 
        'ImplicitNystromKernel': {
            'kernel_func': matern_type1_kernel_nys, 
            'params': ['std','lengthscale','noise'], 
            'vals': [1., 0.5, 0.1], 
            'trainable': [True,True,True], 
            'alpha': 1e-5, 
            'num_inducing_points': latent_feature_dim, 
            'nys_space': [[-1.73,2.3]]
        }
    }
    ensemble = [ICK(kernel_assignment, kernel_params) for _ in range(10)]
    optim = 'sgd'
    optim_params = {
        'lr': lr,
        'momentum': 0.9, 
        'weight_decay': weight_decay
    }
    trainer = EnsembleTrainer(
        ensemble, 
        data_generators, 
        optim, 
        optim_params, 
        num_jobs=1,
        device=device, 
        epochs=epochs, 
        patience=patience, 
        verbose=verbose
    )
    trainer.train()
    return trainer.predict()

In [4]:
def main(args):
    random_states, eps = [42, 43, 44], 1e-6
    rmse_arr, nll_arr = [], []
    for random_state in random_states:
        print("random state = {}".format(random_state))
        data_generators = preprocess_data(args.batch_size, random_state=random_state)
        y_test_pred_mean, y_test_pred_std, y_test_true = train_ick_ensemble(
            data_generators, 
            args.input_dim, 
            args.depth, 
            args.width, 
            args.latent_feature_dim, 
            args.lr, 
            args.weight_decay, 
            args.epochs, 
            args.patience, 
            args.verbose
        )
        rmse = np.sqrt(np.mean((y_test_pred_mean - y_test_true)**2))
        nll = np.mean(0.5 * (np.log(np.maximum(y_test_pred_std**2, eps)) + \
                     ((y_test_pred_mean - y_test_true)**2)/np.maximum(y_test_pred_std**2, eps)))
        print("RMSE (ICKy) = {:.4f}".format(rmse))
        print("NLL (ICKy) = {:4.f}".format(nll))
        rmse_arr.append(rmse)
        nll_arr.append(nll)
    print("Final results:")
    print("Final RMSE (ICKy) = {:.4f} +/- {:.4f}".format(np.mean(rmse), np.std(rmse)))
    print("Final NLL (ICKy) = {:.4f} +/- {:.4f}".format(np.mean(nll), np.std(nll)))

In [5]:
if __name__ == '__main__':
    arg_parser = argparse.ArgumentParser(description='Train an ICK ensemble model on HouseElectric data.')
    arg_parser.add_argument('--input_dim', type=int, default=1)
    arg_parser.add_argument('--depth', type=int, default=2)
    arg_parser.add_argument('--width', type=int, default=20)
    arg_parser.add_argument('--latent_feature_dim', type=int, default=16)
    arg_parser.add_argument('--lr', type=float, default=1e-4)
    arg_parser.add_argument('--batch_size', type=int, default=512)
    arg_parser.add_argument('--weight_decay', type=float, default=0)
    arg_parser.add_argument('--epochs', type=int, default=100)
    arg_parser.add_argument('--patience', type=int, default=15)
    arg_parser.add_argument('--verbose', type=int, default=0)
    args = arg_parser.parse_known_args()[0]
    main(args)

random state = 42
Training started:

Epoch 1/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 28s - loss 0.9937
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   27.7s finished


9s - loss 0.9574

Epoch 2/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.9507
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   24.9s finished


9s - loss 0.9288

Epoch 3/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.9240
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   24.9s finished


9s - loss 0.9065

Epoch 4/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.9024
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.2s finished


9s - loss 0.8863

Epoch 5/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.8829
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.2s finished


9s - loss 0.8671

Epoch 6/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.8645
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.2s finished


9s - loss 0.8483

Epoch 7/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.8468
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.1s finished


9s - loss 0.8301

Epoch 8/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.8299
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.2s finished


9s - loss 0.8124

Epoch 9/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.8139
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.2s finished


9s - loss 0.7956

Epoch 10/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.7989
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.1s finished


9s - loss 0.7799

Epoch 11/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 26s - loss 0.7849
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.5s finished


9s - loss 0.7656

Epoch 12/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 26s - loss 0.7720
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.6s finished


9s - loss 0.7526

Epoch 13/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 26s - loss 0.7602
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.7s finished


9s - loss 0.7411

Epoch 14/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 26s - loss 0.7494
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.7s finished


9s - loss 0.7308

Epoch 15/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 26s - loss 0.7397
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   26.1s finished


9s - loss 0.7218

Epoch 16/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.7310
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.2s finished


9s - loss 0.7140

Epoch 17/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.7231
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.0s finished


9s - loss 0.7071

Epoch 18/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.7161
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.1s finished


9s - loss 0.7009

Epoch 19/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.7098
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.1s finished


9s - loss 0.6955

Epoch 20/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.7042
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.1s finished


9s - loss 0.6906

Epoch 21/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.6992
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.0s finished


9s - loss 0.6861

Epoch 22/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.6946
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.0s finished


9s - loss 0.6821

Epoch 23/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.6904
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   25.0s finished


9s - loss 0.6783

Epoch 24/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.6865
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   24.8s finished


9s - loss 0.6747

Epoch 25/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 25s - loss 0.6829
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   24.8s finished


9s - loss 0.6714

Epoch 26/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Training time - 24s - loss 0.6795
Validation:


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:   24.5s finished


9s - loss 0.6682

Epoch 27/100
Learning rate: 0.000100


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


KeyboardInterrupt: 