This script is only used for testing the actual running time of ICKy compared to vanilla neural networks (NNs) and Gaussian processes (GPs).

In [1]:
import sys
import time
sys.path.insert(0, '../../')
import torch
import numpy as np
from tqdm.notebook import tqdm

from model.ick import ICK
from utils.train import Trainer
from kernels.kernel_fn import *
from utils.helpers import *

import gpytorch
from gpytorch.kernels import LinearKernel, SpectralMixtureKernel
from torch.distributions.multivariate_normal import MultivariateNormal

# To make this notebook's output stable across runs
np.random.seed(2020)
torch.manual_seed(2020)
torch.cuda.manual_seed(2020)
torch.cuda.manual_seed_all(2020)
torch.backends.cudnn.deterministic = True

### 1. Generate synthetic data

In [2]:
N = 1500
x = torch.rand(N,1)
cov_module = LinearKernel()
cov_lin = cov_module(x).evaluate().detach()
t = torch.linspace(0,2,N).reshape(-1,1)
cov_module = SpectralMixtureKernel(num_mixtures=2)
cov_sm = cov_module(t).evaluate().detach()
cov = cov_lin*cov_sm + torch.eye(cov_lin.shape[0])*1e-3
mv_normal_mul = MultivariateNormal(loc=torch.zeros(N),covariance_matrix=cov)
y = mv_normal_mul.sample()

data, target = [x.numpy(), t.numpy()], y.numpy()
x_train, y_train, x_val, y_val, x_test, y_test = train_val_test_split(
    data, target, shuffle_data=True, train_range=(0.,1.), random_seed=2020)
data_generators = create_generators_from_data(x_train, y_train, x_test, y_test, x_val, y_val, train_batch_size=100)

### 2. Train ICKy and record runtime

In [3]:
kernel_assignment = ['ImplicitDenseNetKernel', 'ImplicitNystromKernel']
kernel_params = {
    'ImplicitDenseNetKernel':{
        'input_dim': 1,
        'latent_feature_dim': 16,
        'num_blocks': 1, 
        'num_layers_per_block': 1, 
        'num_units': 64
    }, 
    'ImplicitNystromKernel': {
        'kernel_func': spectral_mixture_kernel_1d_nys, 
        'params': ['weight','mean','cov','noise'], 
        'vals': [[1.0,0.0],[0.0,0.0], [0.5,0.5], 0.1], 
        'trainable': [True,True,True,True], 
        'alpha': 1e-5, 
        'num_inducing_points': 16, 
        'nys_space': [[0.,2.]]
    }
}

model = ICK(kernel_assignment, kernel_params)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
optim = 'adam'
optim_params = {
    'lr': 5e-5, 
    'weight_decay': 0.1
}
epochs, patience = 100, 100
trainer = Trainer(
    model=model,
    data_generators=data_generators,
    optim=optim,
    optim_params=optim_params, 
    model_save_dir=None,
    device=device,
    epochs=epochs,
    patience=patience
)
start = time.time()
trainer.train()
print("Total training time: {:.4f} seconds".format(time.time()-start))

Training started:

Epoch 1/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.2850
Validation:
0s - loss 0.2465

Epoch 2/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.2765
Validation:
0s - loss 0.2382

Epoch 3/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.2686
Validation:
0s - loss 0.2303

Epoch 4/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.2613
Validation:
0s - loss 0.2232

Epoch 5/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.2544
Validation:
0s - loss 0.2166

Epoch 6/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.2481
Validation:
0s - loss 0.2103

Epoch 7/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.2421
Validation:
0s - loss 0.2046

Epoch 8/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.2365
Validation:
0s - loss 0.1992

Epoch 9/100
Learning rate: 0.000050
0s for 15 steps - 3ms/step - loss 0.2313
Validation:
0s - loss 0.1942

Epoch 10/100
Learn

0s - loss 0.0707

Epoch 77/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.0867
Validation:
0s - loss 0.0700

Epoch 78/100
Learning rate: 0.000050
0s for 15 steps - 3ms/step - loss 0.0858
Validation:
0s - loss 0.0692

Epoch 79/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.0848
Validation:
0s - loss 0.0685

Epoch 80/100
Learning rate: 0.000050
0s for 15 steps - 7ms/step - loss 0.0839
Validation:
0s - loss 0.0677

Epoch 81/100
Learning rate: 0.000050
0s for 15 steps - 7ms/step - loss 0.0829
Validation:
0s - loss 0.0670

Epoch 82/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.0820
Validation:
0s - loss 0.0663

Epoch 83/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.0811
Validation:
0s - loss 0.0656

Epoch 84/100
Learning rate: 0.000050
0s for 15 steps - 5ms/step - loss 0.0802
Validation:
0s - loss 0.0649

Epoch 85/100
Learning rate: 0.000050
0s for 15 steps - 4ms/step - loss 0.0794
Validation:
0s - loss 0.0642

Epoch 86/1

### 3. Train a plain NN with the same architecture as ICKy backbone and record runtime

In [4]:
# Concatenate x and t and input them into the plain NN
data, target = np.concatenate((x.numpy(), t.numpy()), axis=1), y.numpy()
x_train, y_train, x_val, y_val, x_test, y_test = train_val_test_split(
    data, target, shuffle_data=True, train_range=(0.,1.), random_seed=2020)
data_generators = create_generators_from_data(x_train, y_train, x_test, y_test, x_val, y_val, train_batch_size=100)

kernel_assignment = ['ImplicitDenseNetKernel']
kernel_params = {
    'ImplicitDenseNetKernel':{
        'input_dim': 2,
        'latent_feature_dim': 16,
        'num_blocks': 1, 
        'num_layers_per_block': 1, 
        'num_units': 64
    }
}

model = ICK(kernel_assignment, kernel_params)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
optim = 'adam'
optim_params = {
    'lr': 5e-5, 
    'weight_decay': 0.1
}
epochs, patience = 100, 100
trainer = Trainer(
    model=model,
    data_generators=data_generators,
    optim=optim,
    optim_params=optim_params, 
    model_save_dir=None,
    device=device,
    epochs=epochs,
    patience=patience
)
start = time.time()
trainer.train()
print("Total training time: {:.4f} seconds".format(time.time()-start))

Training started:

Epoch 1/100
Learning rate: 0.000050
0s for 15 steps - 2ms/step - loss 0.3797
Validation:
0s - loss 0.4008

Epoch 2/100
Learning rate: 0.000050
0s for 15 steps - 2ms/step - loss 0.3743
Validation:
0s - loss 0.3957

Epoch 3/100
Learning rate: 0.000050
0s for 15 steps - 2ms/step - loss 0.3693
Validation:
0s - loss 0.3909

Epoch 4/100
Learning rate: 0.000050
0s for 15 steps - 1ms/step - loss 0.3647
Validation:
0s - loss 0.3866

Epoch 5/100
Learning rate: 0.000050
0s for 15 steps - 1ms/step - loss 0.3606
Validation:
0s - loss 0.3827

Epoch 6/100
Learning rate: 0.000050
0s for 15 steps - 1ms/step - loss 0.3568
Validation:
0s - loss 0.3794

Epoch 7/100
Learning rate: 0.000050
0s for 15 steps - 1ms/step - loss 0.3537
Validation:
0s - loss 0.3764

Epoch 8/100
Learning rate: 0.000050
0s for 15 steps - 1ms/step - loss 0.3508
Validation:
0s - loss 0.3738

Epoch 9/100
Learning rate: 0.000050
0s for 15 steps - 1ms/step - loss 0.3484
Validation:
0s - loss 0.3715

Epoch 10/100
Learn

0s - loss 0.3574

Epoch 77/100
Learning rate: 0.000050
0s for 15 steps - 1ms/step - loss 0.3325
Validation:
0s - loss 0.3574

Epoch 78/100
Learning rate: 0.000050
0s for 15 steps - 1ms/step - loss 0.3325
Validation:
0s - loss 0.3574

Epoch 79/100
Learning rate: 0.000050
0s for 15 steps - 2ms/step - loss 0.3325
Validation:
0s - loss 0.3574

Epoch 80/100
Learning rate: 0.000050
0s for 15 steps - 1ms/step - loss 0.3325
Validation:
0s - loss 0.3574

Epoch 81/100
Learning rate: 0.000050
0s for 15 steps - 1ms/step - loss 0.3325
Validation:
0s - loss 0.3573

Epoch 82/100
Learning rate: 0.000050
0s for 15 steps - 1ms/step - loss 0.3325
Validation:
0s - loss 0.3573

Epoch 83/100
Learning rate: 0.000050
0s for 15 steps - 2ms/step - loss 0.3325
Validation:
0s - loss 0.3573

Epoch 84/100
Learning rate: 0.000050
0s for 15 steps - 2ms/step - loss 0.3325
Validation:
0s - loss 0.3573

Epoch 85/100
Learning rate: 0.000050
0s for 15 steps - 2ms/step - loss 0.3325
Validation:
0s - loss 0.3573

Epoch 86/1

### 4. Train a GP with spectral mixture kernel and record runtime

In [5]:
class SpectralMixtureGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(SpectralMixtureGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.SpectralMixtureKernel(num_mixtures=2, ard_num_dims=2)
        self.covar_module.initialize_from_data(train_x, train_y)

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
    
likelihood = gpytorch.likelihoods.GaussianLikelihood()
gp = SpectralMixtureGPModel(torch.tensor(x_train), torch.tensor(y_train).view(-1,1), likelihood)

epochs, patience = 100, 100
gp.train()
likelihood.train()
optimizer = torch.optim.Adam(gp.parameters(), lr=5e-5)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp)

best_loss, best_model_state_dict, trigger_times = 1e9, None, 0
num_epochs = tqdm(range(epochs), position=0, leave=True)
start = time.time()
for i in num_epochs:
    optimizer.zero_grad()
    output = gp(torch.tensor(x_train))
    loss = -mll(output, torch.tensor(y_train).float())
    loss.backward()
    num_epochs.set_description('Iter %d/%d - Loss: %.3f' % (
        i + 1, epochs, loss.item()
    ))
    optimizer.step()
    if loss.item() > best_loss:
        trigger_times += 1
        if trigger_times >= patience:
            if best_model_state_dict is not None:
                gp.load_state_dict(best_model_state_dict)
            break
    else:
        trigger_times = 0
        best_loss = loss.item()
        best_model_state_dict = gp.state_dict()
print("Total training time: {:.4f} seconds".format(time.time()-start))

  0%|          | 0/100 [00:00<?, ?it/s]

Total training time: 18.4007 seconds
