In [416]:
import math
import numpy as np

import torch
import gpytorch
from gpytorch.models import AbstractVariationalGP
from gpytorch.variational import (CholeskyVariationalDistribution,
                                VariationalStrategy,
                                 WhitenedVariationalStrategy,
                                 AdditiveGridInterpolationVariationalStrategy)
from gpytorch.mlls.variational_elbo import VariationalELBO
from gpytorch.utils.grid import choose_grid_size
import torch.utils.data as utils

from torch.optim import SGD, Adam
from torch.optim.lr_scheduler import MultiStepLR
import torch.nn.functional as F
from torch import nn

from sklearn.cross_validation import train_test_split
from sklearn.metrics import auc

from matplotlib import pyplot as plt
%matplotlib inline

SEED = 199 

In [417]:
X = np.load('X.npy')
y = np.load('y.npy')
print(X[0:1,:])
print(X.shape, y.shape,sep='\n')
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=.33, random_state=SEED)

[[0.67261774 0.85714286 0.         0.99999471 0.         0.59663866
  0.         1.         0.96969697 0.74348697 0.03006012 0.07630522
  0.82828283 0.35836034 0.46073662 0.         1.         1.
  0.         0.         0.         0.         1.         0.
  1.         0.         1.         0.         0.         0.
  1.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         1.
  0.         0.         0.         1.         0.         1.
  0.         0.         1.         0.         0.         1.
  0.        ]]
(20468, 91)
(20468,)


In [418]:
class NeuralnetLayer(torch.nn.Sequential):
    def __init__(self,data_dim, output_dim):
        super(NeuralnetLayer, self).__init__()
        self.add_module('linear1', torch.nn.Linear(data_dim, 1000))
        self.add_module('bn1', torch.nn.BatchNorm1d(1000))
        self.add_module('relu1', torch.nn.ReLU())                    
        self.add_module('linear3', torch.nn.Linear(1000, 500))
        self.add_module('bn3', torch.nn.BatchNorm1d(500))
        self.add_module('relu3', torch.nn.ReLU())                  
        self.add_module('linear4', torch.nn.Linear(500, 50))       
        self.add_module('bn4', torch.nn.BatchNorm1d(50))
        self.add_module('relu4', torch.nn.ReLU())   
        self.add_module('linear5', torch.nn.Linear(50,output_dim))

In [419]:
class GaussianProcessLayer(AbstractVariationalGP):
    def __init__(self, num_dim, grid_bounds, grid_size):
        variational_distribution = CholeskyVariationalDistribution(num_inducing_points=grid_size,
                                                                  batch_size=num_dim)
       
        variational_strategy = AdditiveGridInterpolationVariationalStrategy(self,
                                                                            grid_size=grid_size,
                                                                            grid_bounds=[grid_bounds],
                                                                            num_dim=num_dim,
                                                                            variational_distribution=variational_distribution)
        super(GaussianProcessLayer,self).__init__(variational_strategy)

        '''super(GaussianProcessLayer, self).__init__(grid_size=grid_size, grid_bounds=grid_bounds,
                                                   num_dim=num_dim, mixing_params=False, sum_output=False)'''
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(
                lengthscale_prior=gpytorch.priors.SmoothedBoxPrior(
                    math.exp(-1), math.exp(1), sigma=0.1, transform=torch.exp
                )
            )
        )
        self.mean_module = gpytorch.means.ConstantMean()
        self.grid_bounds = grid_bounds

    def forward(self, x):
        mean = self.mean_module(x)
        covar = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean, covar)

In [420]:
class DKLModel(gpytorch.Module):
    def __init__(self, nnet_layer, num_dim, 
                 grid_bounds,grid_size):
        super(DKLModel, self).__init__()
        self.nnet_layer = nnet_layer
        self.gp_layer = GaussianProcessLayer(num_dim=num_dim, 
                                             grid_bounds=grid_bounds,
                                             grid_size=grid_size)
        self.grid_bounds = grid_bounds

    def forward(self, x):
        features = self.nnet_layer(x)
        features = gpytorch.utils.grid.scale_to_bounds(features, self.grid_bounds[0], self.grid_bounds[1])
        res = self.gp_layer(features)
        return res

In [429]:
X_train,y_train = torch.FloatTensor(X_train), torch.FloatTensor(y_train)
batch_size = 1024
dataset = utils.TensorDataset(X_train,y_train)
dataloader = utils.DataLoader(dataset, 
                              batch_size = batch_size,
                              shuffle = True)

In [456]:
latent_dim = 2
grid_size = 64 #choose_grid_size(X_train)
print(grid_size)
grid_bounds=(-1,1)

nnet_layer = NeuralnetLayer(data_dim=X_train.size(1),
                            output_dim=latent_dim).cuda()

model = DKLModel(nnet_layer,num_dim=latent_dim,
                           grid_bounds=grid_bounds,
                           grid_size=grid_size).cuda()
likelihood = gpytorch.likelihoods.BernoulliLikelihood().cuda()

64


In [461]:
n_epochs = 100
lr =0.1
optimizer = SGD([
    {'params': model.nnet_layer.parameters(),'lr':1e-4, 'weight_decay': 1e-3},
    {'params': model.gp_layer.hyperparameters(), 'lr': lr * 0.01},
    {'params': model.gp_layer.variational_parameters()},
    {'params': likelihood.parameters()},
], lr=lr)#,momentum=0.9, nesterov=True, weight_decay=0)

scheduler = MultiStepLR(optimizer, milestones=[0.5 * n_epochs, 0.75 * n_epochs], gamma=0.1)

def train(epoch):
    model.train()
    likelihood.train()

    mll = gpytorch.mlls.VariationalELBO(likelihood, model.gp_layer, num_data=y_train.numel())

    train_loss = 0.
    for batch_idx, (data, target) in enumerate(dataloader):
        data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = -mll(output, target)
        loss.backward()
        optimizer.step()
        if (batch_idx + 1) % 2 == 0:
            print('Train Epoch: %d [%03d/%03d], Loss: %.6f' % (epoch, batch_idx + 1, len(dataloader), loss.item()))

In [462]:
for epoch in range(1, n_epochs + 1):
        scheduler.step()
        train(epoch)

Train Epoch: 1 [002/014], Loss: 0.634688
Train Epoch: 1 [004/014], Loss: 0.629350
Train Epoch: 1 [006/014], Loss: 0.635888
Train Epoch: 1 [008/014], Loss: 0.644027
Train Epoch: 1 [010/014], Loss: 0.617084
Train Epoch: 1 [012/014], Loss: 0.634751
Train Epoch: 1 [014/014], Loss: 0.629391
Train Epoch: 2 [002/014], Loss: 0.636655
Train Epoch: 2 [004/014], Loss: 0.628432
Train Epoch: 2 [006/014], Loss: 0.633204
Train Epoch: 2 [008/014], Loss: 0.627791
Train Epoch: 2 [010/014], Loss: 0.640163
Train Epoch: 2 [012/014], Loss: 0.629969
Train Epoch: 2 [014/014], Loss: 0.626084
Train Epoch: 3 [002/014], Loss: 0.633129
Train Epoch: 3 [004/014], Loss: 0.629900
Train Epoch: 3 [006/014], Loss: 0.635258
Train Epoch: 3 [008/014], Loss: 0.623762
Train Epoch: 3 [010/014], Loss: 0.628447
Train Epoch: 3 [012/014], Loss: 0.640689
Train Epoch: 3 [014/014], Loss: 0.619145
Train Epoch: 4 [002/014], Loss: 0.628312
Train Epoch: 4 [004/014], Loss: 0.634480
Train Epoch: 4 [006/014], Loss: 0.643353
Train Epoch: 4 [

Train Epoch: 29 [006/014], Loss: 0.631776
Train Epoch: 29 [008/014], Loss: 0.644962
Train Epoch: 29 [010/014], Loss: 0.636654
Train Epoch: 29 [012/014], Loss: 0.627081
Train Epoch: 29 [014/014], Loss: 0.627756
Train Epoch: 30 [002/014], Loss: 0.633075
Train Epoch: 30 [004/014], Loss: 0.627094
Train Epoch: 30 [006/014], Loss: 0.634445
Train Epoch: 30 [008/014], Loss: 0.625195
Train Epoch: 30 [010/014], Loss: 0.633738
Train Epoch: 30 [012/014], Loss: 0.634777
Train Epoch: 30 [014/014], Loss: 0.638148
Train Epoch: 31 [002/014], Loss: 0.638690
Train Epoch: 31 [004/014], Loss: 0.635842
Train Epoch: 31 [006/014], Loss: 0.633265
Train Epoch: 31 [008/014], Loss: 0.622351
Train Epoch: 31 [010/014], Loss: 0.621696
Train Epoch: 31 [012/014], Loss: 0.635353
Train Epoch: 31 [014/014], Loss: 0.625988
Train Epoch: 32 [002/014], Loss: 0.638653
Train Epoch: 32 [004/014], Loss: 0.619006
Train Epoch: 32 [006/014], Loss: 0.624441
Train Epoch: 32 [008/014], Loss: 0.638604
Train Epoch: 32 [010/014], Loss: 0

Train Epoch: 57 [008/014], Loss: 0.622062
Train Epoch: 57 [010/014], Loss: 0.637211
Train Epoch: 57 [012/014], Loss: 0.633086
Train Epoch: 57 [014/014], Loss: 0.638193
Train Epoch: 58 [002/014], Loss: 0.632646
Train Epoch: 58 [004/014], Loss: 0.624598
Train Epoch: 58 [006/014], Loss: 0.637356
Train Epoch: 58 [008/014], Loss: 0.631152
Train Epoch: 58 [010/014], Loss: 0.639878
Train Epoch: 58 [012/014], Loss: 0.642697
Train Epoch: 58 [014/014], Loss: 0.627925
Train Epoch: 59 [002/014], Loss: 0.631192
Train Epoch: 59 [004/014], Loss: 0.635158
Train Epoch: 59 [006/014], Loss: 0.638561
Train Epoch: 59 [008/014], Loss: 0.616585
Train Epoch: 59 [010/014], Loss: 0.632052
Train Epoch: 59 [012/014], Loss: 0.632480
Train Epoch: 59 [014/014], Loss: 0.622627
Train Epoch: 60 [002/014], Loss: 0.620523
Train Epoch: 60 [004/014], Loss: 0.639923
Train Epoch: 60 [006/014], Loss: 0.633224
Train Epoch: 60 [008/014], Loss: 0.634460
Train Epoch: 60 [010/014], Loss: 0.634659
Train Epoch: 60 [012/014], Loss: 0

Train Epoch: 85 [010/014], Loss: 0.622553
Train Epoch: 85 [012/014], Loss: 0.634601
Train Epoch: 85 [014/014], Loss: 0.631189
Train Epoch: 86 [002/014], Loss: 0.634040
Train Epoch: 86 [004/014], Loss: 0.640888
Train Epoch: 86 [006/014], Loss: 0.631850
Train Epoch: 86 [008/014], Loss: 0.638060
Train Epoch: 86 [010/014], Loss: 0.619138
Train Epoch: 86 [012/014], Loss: 0.629097
Train Epoch: 86 [014/014], Loss: 0.627756
Train Epoch: 87 [002/014], Loss: 0.641956
Train Epoch: 87 [004/014], Loss: 0.636731
Train Epoch: 87 [006/014], Loss: 0.621635
Train Epoch: 87 [008/014], Loss: 0.636807
Train Epoch: 87 [010/014], Loss: 0.625470
Train Epoch: 87 [012/014], Loss: 0.636612
Train Epoch: 87 [014/014], Loss: 0.610670
Train Epoch: 88 [002/014], Loss: 0.637948
Train Epoch: 88 [004/014], Loss: 0.638689
Train Epoch: 88 [006/014], Loss: 0.621212
Train Epoch: 88 [008/014], Loss: 0.644140
Train Epoch: 88 [010/014], Loss: 0.631334
Train Epoch: 88 [012/014], Loss: 0.626480
Train Epoch: 88 [014/014], Loss: 0

In [463]:
test_dataset = utils.TensorDataset(torch.FloatTensor(X_test),
                                   torch.FloatTensor(y_test))
test_dataloader = utils.DataLoader(test_dataset,batch_size=24)

y_pred_lst = []
y_truth_lst = []

with torch.no_grad():
    for i, (X, y) in enumerate(test_dataloader):
        
        output = likelihood(model(X.cuda()))
        
        y_pred = output.mean.ge(0.5).float().cpu().numpy()
        
        
        y_pred_lst.append(y_pred)
        y_truth_lst.append(y.numpy())
    truth = np.concatenate(y_truth_lst)
    pred =  np.concatenate(y_pred_lst)
    print(truth)
    print(pred)
    auc = roc_auc_score(truth,pred)
                   
print(auc)



[0. 0. 0. ... 1. 0. 0.]
[1. 1. 1. ... 1. 1. 1.]
0.5
