In [3]:
import math
import numpy as np
import torch
import gpytorch
import tqdm
import random
import time
from matplotlib import pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
import sys
sys.path.append("../")
sys.path.append("../directionalvi/utils")
sys.path.append("../directionalvi")
import traditional_vi
from RBFKernelDirectionalGrad import RBFKernelDirectionalGrad
#from DirectionalGradVariationalStrategy import DirectionalGradVariationalStrategy
from dfree_directional_vi import train_gp, eval_gp
from metrics import MSE
import testfun
from csv_dataset import csv_dataset

In [4]:
dataset = csv_dataset("../experiments/real_data/kin40k.csv", gradients=False, rescale=True)

[0 1 2 3 4 5 6 7]
[8]


In [5]:
dataset.dim

8

In [6]:
# data parameters
n   = dataset.n
print("n is: ", n)
dim = dataset.dim
print("dims is: ", dim)

# training params
num_inducing = 500
num_directions = 1
minibatch_size = 200
num_epochs = 100

# seed
torch.random.manual_seed(0)
# use tqdm or just have print statements
tqdm = False
# use data to initialize inducing stuff
inducing_data_initialization = False
# use natural gradients and/or CIQ
use_ngd = False
use_ciq = False
num_contour_quadrature=15
# learning rate
learning_rate_hypers = 0.01
learning_rate_ngd    = 0.1
gamma  = 10.0
#levels = np.array([20,150,300])
#def lr_sched(epoch):
#  a = np.sum(levels > epoch)
#  return (1./gamma)**a
lr_sched = None

n is:  40000
dims is:  8


In [7]:
# train-test split
n_train = int(0.8*dataset.n)
n_test  = n - n_train
train_dataset,test_dataset = torch.utils.data.random_split(dataset,[n_train,n_test])

In [8]:
#loaders
train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False)

In [9]:
test_y = [item[1] for item in test_loader]
test_x = [item[0] for item in test_loader]

# D-Free Grad SVGP

In [None]:
# train
print("\n\n---DirectionalGradVGP---")
print(f"Start training with {n} trainig data of dim {dim}")
print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions")
args={"verbose":True}
t1 = time.time()	
model,likelihood = train_gp(train_dataset,
                      num_inducing=num_inducing,
                      num_directions=num_directions,
                      minibatch_size = minibatch_size,
                      minibatch_dim = num_directions,
                      num_epochs =num_epochs, 
                      learning_rate_hypers=learning_rate_hypers,
                      learning_rate_ngd=learning_rate_ngd,
                      inducing_data_initialization=inducing_data_initialization,
                      use_ngd = use_ngd,
                      use_ciq = use_ciq,
                      lr_sched=lr_sched,
                      num_contour_quadrature=num_contour_quadrature,
                      tqdm=tqdm,**args
                      )
t2 = time.time()	

# save the model
# torch.save(model.state_dict(), "../data/test_dvi_basic.model")

# test
means, variances = eval_gp( test_dataset,model,likelihood,
                            num_directions=num_directions,
                            minibatch_size=n_test,
                            minibatch_dim=num_directions)
t3 = time.time()	




---DirectionalGradVGP---
Start training with 40000 trainig data of dim 8
VI setups: 500 inducing points, 1 inducing directions
All parameters to learn:
      variational_strategy.inducing_points
      torch.Size([500, 8])
      variational_strategy.inducing_directions
      torch.Size([500, 8])
      variational_strategy._variational_distribution.variational_mean
      torch.Size([1000])
      variational_strategy._variational_distribution.chol_variational_covar
      torch.Size([1000, 1000])
      mean_module.constant
      torch.Size([1])
      covar_module.raw_outputscale
      torch.Size([])
      covar_module.base_kernel.raw_lengthscale
      torch.Size([1, 1])
      noise_covar.raw_noise
      torch.Size([1])
Total number of parameters:  1009004.0
Epoch: 0; total_step: 0, loss: 2.3583228058187573, nll: 1.3929881315318045
Epoch: 0; total_step: 50, loss: 1.707945131244776, nll: 1.2066330661288887
Epoch: 0; total_step: 100, loss: 1.5104992874536232, nll: 0.9781890618114101
Epoch: 

Epoch: 29; total_step: 4700, loss: 0.5429617104588167, nll: -0.1343379990317302
Epoch: 29; total_step: 4750, loss: 0.7516069027322844, nll: 0.22613092887109595
Epoch: 30; total_step: 4800, loss: 0.6201627862872725, nll: 0.14684258623605037
Epoch: 30; total_step: 4850, loss: 0.5505099981111404, nll: 0.0075199864000499335
Epoch: 30; total_step: 4900, loss: 0.6105842906172958, nll: -0.02006576932385422
Epoch: 30; total_step: 4950, loss: 0.5309468367620472, nll: -0.08067179105470602
Epoch: 31; total_step: 5000, loss: 0.5761926240967695, nll: 0.04654691550475429
Epoch: 31; total_step: 5050, loss: 0.6582110252818575, nll: 0.1205556700821975
Epoch: 31; total_step: 5100, loss: 0.5412674871328451, nll: -0.016346521774201834
Epoch: 32; total_step: 5150, loss: 0.46626278020266637, nll: -0.11242512790312381
Epoch: 32; total_step: 5200, loss: 0.6438555378279649, nll: 0.10343979743750417
Epoch: 32; total_step: 5250, loss: 0.5626047948747147, nll: 0.05963453602746371
Epoch: 33; total_step: 5300, loss

In [None]:

# compute MSE
#test_y = test_y.cpu()
test_mse = MSE(test_y[0],means)
# compute mean negative predictive density
test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_y[0]).mean()
print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.")
print(f"Training time: {(t2-t1):.2f} sec, testing time: {(t3-t2):.2f} sec")

#plot=1
#if plot == 1:
#    from mpl_toolkits.mplot3d import axes3d
#    import matplotlib.pyplot as plt
#    fig = plt.figure(figsize=(12,6))
#    ax = fig.add_subplot(111, projection='3d')
#    ax.scatter(test_x[0][:,0],test_x[:,1],test_y, color='k')
#    ax.scatter(test_x[0][:,0],test_x[:,1],means, color='b')
#    plt.title("f(x,y) variational fit; actual curve is black, variational is blue")
#    plt.show()

In [None]:
# training params
#num_inducing = 50
#num_directions = 6
#minibatch_size = 200
#num_epochs = 100


# 2 directions
#At 104 testing points, MSE: 2.9133e+00, nll: 3.3945e+00. 
# 3 directions
#At 104 testing points, MSE: 2.9455e+00, nll: 3.3617e+00.
#Training time: 70.29 sec, testing time: 0.10 sec
# 4 directions
#At 104 testing points, MSE: 2.9810e+00, nll: 3.0743e+00.
#Training time: 57.68 sec, testing time: 0.08 sec
# 5 directions
#At 104 testing points, MSE: 2.9440e+00, nll: 3.6124e+00.
#Training time: 104.46 sec, testing time: 0.12 sec
# 6 directions
#At 104 testing points, MSE: 2.9795e+00, nll: 3.1092e+00.
#Training time: 127.73 sec, testing time: 0.10 sec
# 7 directions
#At 104 testing points, MSE: 2.9272e+00, nll: 3.6537e+00.
#Training time: 153.38 sec, testing time: 0.12 sec
# 8 directions
#At 104 testing points, MSE: 2.9503e+00, nll: 3.3300e+00.
#Training time: 173.86 sec, testing time: 0.15 sec
# 9 directions
# 10 directions

# Traditional SVGP

In [None]:
model_t,likelihood_t = traditional_vi.train_gp(train_dataset,dim,
                                                   num_inducing=num_inducing,
                                                   minibatch_size=minibatch_size,
                                                   num_epochs=num_epochs,
                                                   use_ngd=use_ngd, use_ciq=use_ciq,
                                                   learning_rate_hypers=learning_rate_hypers,
                                                   learning_rate_ngd=learning_rate_ngd,
                                                   lr_sched=lr_sched,
                                                   num_contour_quadrature=num_contour_quadrature,gamma=gamma, verbose=True)

In [None]:
means_t, variances_t = traditional_vi.eval_gp(test_dataset, model_t, likelihood_t, minibatch_size=n_test)

In [None]:
# compute MSE
#test_y = test_y.cpu()
test_mse = MSE(test_y[0],means_t)
# compute mean negative predictive density
test_nll = -torch.distributions.Normal(means_t, variances_t.sqrt()).log_prob(test_y[0]).mean()
print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.")
print(f"Training time: {(t2-t1):.2f} sec, testing time: {(t3-t2):.2f} sec")