In [166]:
import math
import numpy as np

import torch
import gpytorch
from gpytorch.models import AbstractVariationalGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy
from gpytorch.mlls.variational_elbo import VariationalELBO
import torch.utils.data as utils

from torch.optim import SGD, Adam
from torch.optim.lr_scheduler import MultiStepLR
import torch.nn.functional as F
from torch import nn

from sklearn.cross_validation import train_test_split

from matplotlib import pyplot as plt
%matplotlib inline

SEED = 199 

In [148]:
X = np.load('X.npy')
y = np.load('y.npy')
print(X[0:1,:])
print(X.shape, y.shape,sep='\n')
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=.33, random_state=SEED)

[[0.67261774 0.85714286 0.         0.99999471 0.         0.59663866
  0.         1.         0.96969697 0.74348697 0.03006012 0.07630522
  0.82828283 0.35836034 0.46073662 0.         1.         1.
  0.         0.         0.         0.         1.         0.
  1.         0.         1.         0.         0.         0.
  1.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         1.
  0.         0.         0.         1.         0.         1.
  0.         0.         1.         0.         0.         1.
  0.        ]]
(20468, 91)
(20468,)


In [159]:
class NeuralnetLayer(torch.nn.Sequential):
    def __init__(self,data_dim):
        super(NeuralnetLayer, self).__init__()
        self.add_module('linear1', torch.nn.Linear(data_dim, 1000))
        self.add_module('relu1', torch.nn.ReLU())
        self.add_module('linear2', torch.nn.Linear(1000, 500))
        self.add_module('relu2', torch.nn.ReLU())
        self.add_module('linear3', torch.nn.Linear(500, 50))
        self.add_module('relu3', torch.nn.ReLU())
        self.add_module('linear4', torch.nn.Linear(50, 10))

In [160]:
class GaussianProcessLayer(gpytorch.models.AdditiveGridInducingVariationalGP):
    def __init__(self, num_dim, grid_bounds=(-10., 10.), grid_size=64):
        super(GaussianProcessLayer, self).__init__(grid_size=grid_size, grid_bounds=[grid_bounds],
                                                   num_dim=num_dim, mixing_params=False, sum_output=False)
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(
                lengthscale_prior=gpytorch.priors.SmoothedBoxPrior(
                    math.exp(-1), math.exp(1), sigma=0.1, transform=torch.exp
                )
            )
        )
        self.mean_module = gpytorch.means.ConstantMean()
        self.grid_bounds = grid_bounds

    def forward(self, x):
        mean = self.mean_module(x)
        covar = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean, covar)

In [161]:
class DKLModel(gpytorch.Module):
    def __init__(self, neuralnet_layer, num_dim, grid_bounds=(-10., 10.)):
        super(DKLModel, self).__init__()
        self.neuralnet_layer = neuralnet_layer
        self.gp_layer = GaussianProcessLayer(num_dim=num_dim, grid_bounds=grid_bounds)
        self.grid_bounds = grid_bounds
        self.num_dim = num_dim

    def forward(self, x):
        features = self.neuralnet_layer(x)
        features = gpytorch.utils.grid.scale_to_bounds(features, self.grid_bounds[0], self.grid_bounds[1])
        res = self.gp_layer(features)
        return res



In [162]:
X_train,y_train = torch.FloatTensor(X_train), torch.FloatTensor(y_train)

dataset = utils.TensorDataset(X_train,y_train)
dataloader = utils.DataLoader(dataset, 
                              batch_size = 32,
                              shuffle = True)

In [164]:
nnet_layer = NeuralnetLayer(data_dim = X_train.size(1))
model = DKLModel(nnet_layer, num_dim=10)
likelihood = gpytorch.likelihoods.BernoulliLikelihood()

In [170]:
n_epochs = 300
lr = 0.1
optimizer = SGD([
    {'params': model.neuralnet_layer.parameters()},
    {'params': model.gp_layer.hyperparameters(), 'lr': lr * 0.01},
    {'params': model.gp_layer.variational_parameters()},
    {'params': likelihood.parameters()},
], lr=lr, momentum=0.9, nesterov=True, weight_decay=0)

scheduler = MultiStepLR(optimizer, milestones=[0.5 * n_epochs, 0.75 * n_epochs], gamma=0.1)

def train(epoch):
    model.train()
    likelihood.train()

    mll = gpytorch.mlls.VariationalELBO(likelihood, model.gp_layer, num_data=len(dataloader.dataset))

    train_loss = 0.
    for batch_idx, (data, target) in enumerate(dataloader):
       
        optimizer.zero_grad()
        output = model(data)
        loss = -mll(output, target)
        loss.backward()
        optimizer.step()
        if (batch_idx + 1) % 25 == 0:
            print('Train Epoch: %d [%03d/%03d], Loss: %.6f' % (epoch, batch_idx + 1, len(dataloader), loss.item()))

In [171]:
for epoch in range(1, n_epochs + 1):
    scheduler.step()
    train(epoch)

Train Epoch: 1 [025/429], Loss: 7.025413
Train Epoch: 1 [050/429], Loss: 6.965981
Train Epoch: 1 [075/429], Loss: 6.976180
Train Epoch: 1 [100/429], Loss: 6.766372
Train Epoch: 1 [125/429], Loss: 6.807305
Train Epoch: 1 [150/429], Loss: 6.600054
Train Epoch: 1 [175/429], Loss: 6.754462
Train Epoch: 1 [200/429], Loss: 6.160501
Train Epoch: 1 [225/429], Loss: 5.778797
Train Epoch: 1 [250/429], Loss: 5.920123
Train Epoch: 1 [275/429], Loss: 6.343127
Train Epoch: 1 [300/429], Loss: 6.939916
Train Epoch: 1 [325/429], Loss: 6.531673
Train Epoch: 1 [350/429], Loss: 6.314705
Train Epoch: 1 [375/429], Loss: 6.112271
Train Epoch: 1 [400/429], Loss: 5.900776
Train Epoch: 1 [425/429], Loss: 6.947797
Train Epoch: 2 [025/429], Loss: 6.322464
Train Epoch: 2 [050/429], Loss: 6.306827
Train Epoch: 2 [075/429], Loss: 6.523249
Train Epoch: 2 [100/429], Loss: 6.300926
Train Epoch: 2 [125/429], Loss: 6.319786
Train Epoch: 2 [150/429], Loss: 6.086827
Train Epoch: 2 [175/429], Loss: 5.890824
Train Epoch: 2 [

KeyboardInterrupt: 