In [48]:
import math
import numpy as np
import torch
import gpytorch
import tqdm
import random
import time
from matplotlib import pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
import sys
sys.path.append("../")
sys.path.append("../directionalvi/utils")
sys.path.append("../directionalvi")
import traditional_vi
from RBFKernelDirectionalGrad import RBFKernelDirectionalGrad
#from DirectionalGradVariationalStrategy import DirectionalGradVariationalStrategy
from dfree_directional_vi import train_gp, eval_gp
from metrics import MSE
import testfun
from csv_dataset import csv_dataset

In [49]:
dataset = csv_dataset("../experiments/real_data/WECs_DataSet/Adelaide_Data.csv", gradients=False, rescale=True)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31]
[48]


In [51]:
dataset.n

71999

In [52]:
# data parameters
n   = dataset.n
print("n is: ", n)
dim = dataset.dim
print("dims is: ", dim)

# training params
num_inducing = 500
num_directions = 1
minibatch_size = 500
num_epochs = 100

# seed
torch.random.manual_seed(0)
# use tqdm or just have print statements
tqdm = False
# use data to initialize inducing stuff
inducing_data_initialization = False
# use natural gradients and/or CIQ
use_ngd = False
use_ciq = False
num_contour_quadrature=15
# learning rate
learning_rate_hypers = 0.01
learning_rate_ngd    = 0.1
gamma  = 10.0
#levels = np.array([20,150,300])
#def lr_sched(epoch):
#  a = np.sum(levels > epoch)
#  return (1./gamma)**a
lr_sched = None

n is:  71999
dims is:  32


In [37]:
# train-test split
n_train = int(0.8*dataset.n)
n_test  = n - n_train
train_dataset,test_dataset = torch.utils.data.random_split(dataset,[n_train,n_test])

In [38]:
#loaders
train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False)

In [39]:
test_y = [item[1] for item in test_loader]
test_x = [item[0] for item in test_loader]

# D-Free Grad SVGP

In [40]:
# train
print("\n\n---DirectionalGradVGP---")
print(f"Start training with {n} trainig data of dim {dim}")
print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions")
args={"verbose":True}
t1 = time.time()	
model,likelihood = train_gp(train_dataset,
                      num_inducing=num_inducing,
                      num_directions=num_directions,
                      minibatch_size = minibatch_size,
                      minibatch_dim = num_directions,
                      num_epochs =num_epochs, 
                      learning_rate_hypers=learning_rate_hypers,
                      learning_rate_ngd=learning_rate_ngd,
                      inducing_data_initialization=inducing_data_initialization,
                      use_ngd = use_ngd,
                      use_ciq = use_ciq,
                      lr_sched=lr_sched,
                      num_contour_quadrature=num_contour_quadrature,
                      tqdm=tqdm,**args
                      )
t2 = time.time()	

# save the model
# torch.save(model.state_dict(), "../data/test_dvi_basic.model")

# test
means, variances = eval_gp( test_dataset,model,likelihood,
                            num_directions=num_directions,
                            minibatch_size=n_test,
                            minibatch_dim=num_directions)
t3 = time.time()	




---DirectionalGradVGP---
Start training with 517 trainig data of dim 12
VI setups: 400 inducing points, 2 inducing directions
All parameters to learn:
      variational_strategy.inducing_points
      torch.Size([400, 12])
      variational_strategy.inducing_directions
      torch.Size([800, 12])
      variational_strategy._variational_distribution.variational_mean
      torch.Size([1200])
      variational_strategy._variational_distribution.chol_variational_covar
      torch.Size([1200, 1200])
      mean_module.constant
      torch.Size([1])
      covar_module.raw_outputscale
      torch.Size([])
      covar_module.base_kernel.raw_lengthscale
      torch.Size([1, 1])
      noise_covar.raw_noise
      torch.Size([1])
Total number of parameters:  1455604.0
Epoch: 0; total_step: 0, loss: 2.3042598725043044, nll: 1.132673509154352
Epoch: 16; total_step: 50, loss: 1.2527715682622307, nll: 0.7283301913095945
Epoch: 33; total_step: 100, loss: 1.4360152891531113, nll: 0.7191127906908803
Epoc

In [41]:

# compute MSE
#test_y = test_y.cpu()
test_mse = MSE(test_y[0],means)
# compute mean negative predictive density
test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_y[0]).mean()
print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.")
print(f"Training time: {(t2-t1):.2f} sec, testing time: {(t3-t2):.2f} sec")

#plot=1
#if plot == 1:
#    from mpl_toolkits.mplot3d import axes3d
#    import matplotlib.pyplot as plt
#    fig = plt.figure(figsize=(12,6))
#    ax = fig.add_subplot(111, projection='3d')
#    ax.scatter(test_x[0][:,0],test_x[:,1],test_y, color='k')
#    ax.scatter(test_x[0][:,0],test_x[:,1],means, color='b')
#    plt.title("f(x,y) variational fit; actual curve is black, variational is blue")
#    plt.show()

At 104 testing points, MSE: 3.0218e+00, nll: 4.2752e+00.
Training time: 837.60 sec, testing time: 0.31 sec


In [42]:
# training params
#num_inducing = 50
#num_directions = 6
#minibatch_size = 200
#num_epochs = 100


# 2 directions
#At 104 testing points, MSE: 2.9133e+00, nll: 3.3945e+00. 
# 3 directions
#At 104 testing points, MSE: 2.9455e+00, nll: 3.3617e+00.
#Training time: 70.29 sec, testing time: 0.10 sec
# 4 directions
#At 104 testing points, MSE: 2.9810e+00, nll: 3.0743e+00.
#Training time: 57.68 sec, testing time: 0.08 sec
# 5 directions
#At 104 testing points, MSE: 2.9440e+00, nll: 3.6124e+00.
#Training time: 104.46 sec, testing time: 0.12 sec
# 6 directions
#At 104 testing points, MSE: 2.9795e+00, nll: 3.1092e+00.
#Training time: 127.73 sec, testing time: 0.10 sec
# 7 directions
#At 104 testing points, MSE: 2.9272e+00, nll: 3.6537e+00.
#Training time: 153.38 sec, testing time: 0.12 sec
# 8 directions
#At 104 testing points, MSE: 2.9503e+00, nll: 3.3300e+00.
#Training time: 173.86 sec, testing time: 0.15 sec
# 9 directions
# 10 directions

# Traditional SVGP

In [43]:
model_t,likelihood_t = traditional_vi.train_gp(train_dataset,dim,
                                                   num_inducing=num_inducing,
                                                   minibatch_size=minibatch_size,
                                                   num_epochs=num_epochs,
                                                   use_ngd=use_ngd, use_ciq=use_ciq,
                                                   learning_rate_hypers=learning_rate_hypers,
                                                   learning_rate_ngd=learning_rate_ngd,
                                                   lr_sched=lr_sched,
                                                   num_contour_quadrature=num_contour_quadrature,gamma=gamma, verbose=True)

All parameters to learn:
      variational_strategy.inducing_points
      torch.Size([400, 12])
      variational_strategy._variational_distribution.variational_mean
      torch.Size([400])
      variational_strategy._variational_distribution.chol_variational_covar
      torch.Size([400, 400])
      mean_module.constant
      torch.Size([1])
      covar_module.raw_outputscale
      torch.Size([])
      covar_module.base_kernel.raw_lengthscale
      torch.Size([1, 1])
      noise_covar.raw_noise
      torch.Size([1])
Total number of parameters:  165204.0
Using ELBO
Epoch: 0; total_step: 0, loss: 2.325866372370561, nll: 1.3773836596656395
Epoch: 16; total_step: 50, loss: 1.412803470859386, nll: 0.8634456532284903
Epoch: 33; total_step: 100, loss: 1.3417703412237887, nll: 0.7925601768467943
Epoch: 50; total_step: 150, loss: 1.304523028078541, nll: 0.7550020453570607
Epoch: 66; total_step: 200, loss: 1.1726882573731852, nll: 0.6361890493291381
Epoch: 83; total_step: 250, loss: 1.8273089577

In [44]:
means_t, variances_t = traditional_vi.eval_gp(test_dataset, model_t, likelihood_t, minibatch_size=n_test)

In [45]:
# compute MSE
#test_y = test_y.cpu()
test_mse = MSE(test_y[0],means_t)
# compute mean negative predictive density
test_nll = -torch.distributions.Normal(means_t, variances_t.sqrt()).log_prob(test_y[0]).mean()
print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.")
print(f"Training time: {(t2-t1):.2f} sec, testing time: {(t3-t2):.2f} sec")

At 104 testing points, MSE: 2.8974e+00, nll: 3.5117e+00.
Training time: 837.60 sec, testing time: 0.31 sec


In [46]:
# protein
# dfree and svgp
# At 9146 testing points, MSE: 5.9555e-01, nll: 1.1599e+00.
# At 9146 testing points, MSE: 6.2736e-01, nll: 1.1856e+00.

In [None]:
#forest fire

# At 104 testing points, MSE: 3.0218e+00, nll: 4.2752e+00.
#Training time: 837.60 sec, testing time: 0.31 sec

#At 104 testing points, MSE: 2.8974e+00, nll: 3.5117e+00.
#Training time: 837.60 sec, testing time: 0.31 sec