In [43]:
import math
import numpy as np

import torch
import gpytorch
from gpytorch.models import AbstractVariationalGP
from gpytorch.variational import (CholeskyVariationalDistribution,
                                VariationalStrategy)
from gpytorch.mlls.variational_elbo import VariationalELBO
from gpytorch.utils.grid import choose_grid_size
import torch.utils.data as utils

from torch.optim import SGD, Adam
from torch.optim.lr_scheduler import MultiStepLR
import torch.nn.functional as F
from torch import nn

from sklearn.cross_validation import train_test_split

from matplotlib import pyplot as plt
%matplotlib inline

SEED = 199 

In [44]:
X = np.load('X.npy')
y = np.load('y.npy')
print(X[0:1,:])
print(X.shape, y.shape,sep='\n')
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=.33, random_state=SEED)

[[0.67261774 0.85714286 0.         0.99999471 0.         0.59663866
  0.         1.         0.96969697 0.74348697 0.03006012 0.07630522
  0.82828283 0.35836034 0.46073662 0.         1.         1.
  0.         0.         0.         0.         1.         0.
  1.         0.         1.         0.         0.         0.
  1.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         1.
  0.         0.         0.         1.         0.         1.
  0.         0.         1.         0.         0.         1.
  0.        ]]
(20468, 91)
(20468,)


In [81]:
class NeuralnetLayer(torch.nn.Sequential):
    def __init__(self,data_dim, output_dim):
        super(NeuralnetLayer, self).__init__()
        #self.add_module('linear1', torch.nn.Linear(data_dim, 1000))
        #self.add_module('relu1', torch.nn.ReLU())
        self.add_module('linear2', torch.nn.Linear(data_dim, 500))
        self.add_module('relu2', torch.nn.ReLU())
        self.add_module('linear3', torch.nn.Linear(500, 50))
        self.add_module('relu3', torch.nn.ReLU())
        self.add_module('linear4', torch.nn.Linear(50,output_dim))

In [82]:
class GaussianProcessLayer(gpytorch.models.AdditiveGridInducingVariationalGP):
    def __init__(self, num_dim, grid_bounds, grid_size):
        super(GaussianProcessLayer, self).__init__(grid_size=grid_size, grid_bounds=[grid_bounds],
                                                   num_dim=num_dim, mixing_params=False, sum_output=False)
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(
                lengthscale_prior=gpytorch.priors.SmoothedBoxPrior(
                    math.exp(-1), math.exp(1), sigma=0.1, transform=torch.exp
                )
            )
        )
        self.mean_module = gpytorch.means.ConstantMean()
        self.grid_bounds = grid_bounds

    def forward(self, x):
        mean = self.mean_module(x)
        covar = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean, covar)

In [83]:
class DKLModel(gpytorch.Module):
    def __init__(self, neuralnet_layer, num_dim, grid_bounds,grid_size):
        super(DKLModel, self).__init__()
        self.neuralnet_layer = neuralnet_layer
        self.gp_layer = GaussianProcessLayer(num_dim=num_dim, 
                                             grid_bounds=grid_bounds,
                                            grid_size=grid_size)
        self.grid_bounds = grid_bounds
        self.num_dim = num_dim

    def forward(self, x):
        features = self.neuralnet_layer(x)
        features = gpytorch.utils.grid.scale_to_bounds(features, self.grid_bounds[0], self.grid_bounds[1])
        res = self.gp_layer(features)
        return res



In [84]:
X_train,y_train = torch.FloatTensor(X_train), torch.FloatTensor(y_train)

dataset = utils.TensorDataset(X_train,y_train)
dataloader = utils.DataLoader(dataset, 
                              batch_size = 128,
                              shuffle = True)

In [85]:
latent_dim = 2
grid_size = 64#choose_grid_size(X_train)
print(grid_size)
grid_bounds=(-1., 1.)

nnet_layer = NeuralnetLayer(data_dim=X_train.size(1),
                            output_dim=latent_dim).cuda()

model = DKLModel(nnet_layer, num_dim=latent_dim,
                           grid_bounds=grid_bounds ,
                           grid_size=grid_size).cuda()
likelihood = gpytorch.likelihoods.BernoulliLikelihood().cuda()

64


In [88]:
n_epochs = 100
lr =0.1
optimizer = SGD([
    {'params': model.neuralnet_layer.parameters()},
    {'params': model.gp_layer.hyperparameters(), 'lr': lr * 0.01},
    {'params': model.gp_layer.variational_parameters()},
    {'params': likelihood.parameters()},
], lr=lr,momentum=0.9, nesterov=True, weight_decay=0)

scheduler = MultiStepLR(optimizer, milestones=[0.5 * n_epochs, 0.75 * n_epochs], gamma=0.1)

def train(epoch):
    model.train()
    likelihood.train()

    mll = gpytorch.mlls.VariationalELBO(likelihood, model.gp_layer, num_data=len(dataloader.dataset))

    train_loss = 0.
    for batch_idx, (data, target) in enumerate(dataloader):
        data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = -mll(output, target)
        loss.backward()
        optimizer.step()
        if (batch_idx + 1) % 20 == 0:
            print('Train Epoch: %d [%03d/%03d], Loss: %.6f' % (epoch, batch_idx + 1, len(dataloader), loss.item()))

In [None]:
for epoch in range(1, n_epochs + 1):
    scheduler.step()
    train(epoch)

Train Epoch: 1 [020/108], Loss: 1.410385
Train Epoch: 1 [040/108], Loss: 1.394321
Train Epoch: 1 [060/108], Loss: 1.380714
Train Epoch: 1 [080/108], Loss: 1.362059
Train Epoch: 1 [100/108], Loss: 1.364481
Train Epoch: 2 [020/108], Loss: 1.341274
Train Epoch: 2 [040/108], Loss: 1.336330
Train Epoch: 2 [060/108], Loss: 1.319339
Train Epoch: 2 [080/108], Loss: 1.310478
Train Epoch: 2 [100/108], Loss: 1.335119
Train Epoch: 3 [020/108], Loss: 1.335832
Train Epoch: 3 [040/108], Loss: 1.291161
Train Epoch: 3 [060/108], Loss: 1.315436
Train Epoch: 3 [080/108], Loss: 1.295143
Train Epoch: 3 [100/108], Loss: 1.309992
Train Epoch: 4 [020/108], Loss: 1.287485
Train Epoch: 4 [040/108], Loss: 1.312078
Train Epoch: 4 [060/108], Loss: 1.326498
Train Epoch: 4 [080/108], Loss: 1.324845
Train Epoch: 4 [100/108], Loss: 1.293575
Train Epoch: 5 [020/108], Loss: 1.334522
Train Epoch: 5 [040/108], Loss: 1.272888
Train Epoch: 5 [060/108], Loss: 1.285204
Train Epoch: 5 [080/108], Loss: 1.348242
Train Epoch: 5 [

Train Epoch: 40 [060/108], Loss: 1.228277
Train Epoch: 40 [080/108], Loss: 1.259711
Train Epoch: 40 [100/108], Loss: 1.217582
Train Epoch: 41 [020/108], Loss: 1.239269
Train Epoch: 41 [040/108], Loss: 1.260476
Train Epoch: 41 [060/108], Loss: 1.249617
Train Epoch: 41 [080/108], Loss: 1.249351
Train Epoch: 41 [100/108], Loss: 1.292289
Train Epoch: 42 [020/108], Loss: 1.260226
Train Epoch: 42 [040/108], Loss: 1.334671
Train Epoch: 42 [060/108], Loss: 1.302510
Train Epoch: 42 [080/108], Loss: 1.302796
Train Epoch: 42 [100/108], Loss: 1.292064
Train Epoch: 43 [020/108], Loss: 1.249354
Train Epoch: 43 [040/108], Loss: 1.270616
Train Epoch: 43 [060/108], Loss: 1.249219
Train Epoch: 43 [080/108], Loss: 1.195734
Train Epoch: 43 [100/108], Loss: 1.259824
Train Epoch: 44 [020/108], Loss: 1.281700
Train Epoch: 44 [040/108], Loss: 1.292273
Train Epoch: 44 [060/108], Loss: 1.238421
Train Epoch: 44 [080/108], Loss: 1.238400
Train Epoch: 44 [100/108], Loss: 1.217451
Train Epoch: 45 [020/108], Loss: 1