In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
import torch
import gpytorch
import tqdm
from torch.utils.data import TensorDataset, DataLoader

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
all_clusters = np.loadtxt('all_clusters.txt')
all_labs = np.loadtxt('all_labs.txt')
all_labs_cp = np.loadtxt('all_labs_cleaned.txt')
print(all_clusters.shape, all_labs.shape, all_labs_cp.shape)

(694764, 7) (694764,) (694764,)


In [3]:
train_x = all_clusters[all_labs_cp!=-1.]
train_y = all_labs_cp[all_labs_cp!=-1.]
train_x = torch.tensor(train_x, dtype=torch.float32)
train_y = torch.tensor(train_y, dtype=torch.int64)

train_y = torch.nn.functional.one_hot(train_y)
# convert y to one-hot vector for each event
train_y = torch.tensor(train_y, dtype=torch.float32)

print(train_x.shape, train_y.shape)
print(train_x.dtype, train_y.dtype)
# print(train_x[:-10], train_y[:-10])

torch.Size([692025, 7]) torch.Size([692025, 13])
torch.float32 torch.float32


  train_y = torch.tensor(train_y, dtype=torch.float32)


In [4]:
print(torch.rand(10).dtype)

torch.float32


In [5]:
train_dataset = TensorDataset(train_x, train_y)
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)

In [6]:
class MultitaskBernoulliLikelihood(gpytorch.likelihoods.Likelihood):
    def forward(self, function_samples, **kwargs):

#         prob = torch.exp(function_samples)
        prob = function_samples
#         print(prob.shape, prob.dtype)
        output_probs = torch.distributions.Normal(0, 1).cdf(prob)
        out = torch.distributions.Independent(torch.distributions.Bernoulli(probs=output_probs), 1)
#         out = torch.distributions.Independent(torch.distributions.Bernoulli(logits=output_probs), 1)
#         print(out)
    
        return out

In [7]:
num_latents = 9
num_tasks = 13
input_dim=train_x.shape[-1]
num_ind_points = 1000

class MultitaskGPModel(gpytorch.models.ApproximateGP):
    def __init__(self):
        # Let's use a different set of inducing points for each latent function
        inducing_points = torch.rand(num_latents, num_ind_points, input_dim)
        print(inducing_points.shape)

        # We have to mark the CholeskyVariationalDistribution as batch
        # so that we learn a variational distribution for each task
        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
            inducing_points.size(-2), batch_shape=torch.Size([num_latents])
        )

        # We have to wrap the VariationalStrategy in a LMCVariationalStrategy
        # so that the output will be a MultitaskMultivariateNormal rather than a batch output
        variational_strategy = gpytorch.variational.LMCVariationalStrategy(
            gpytorch.variational.VariationalStrategy(
                self, inducing_points, variational_distribution, learn_inducing_locations=True
            ),
            num_tasks=num_tasks,
            num_latents=num_latents,
            latent_dim=-1
        )

        super().__init__(variational_strategy)

        # The mean and covariance modules should be marked as batch
        # so we learn a different set of hyperparameters
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_latents]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.MaternKernel(batch_shape=torch.Size([num_latents])),
            batch_shape=torch.Size([num_latents])
        )

    def forward(self, x):
        # The forward function should be written as if we were dealing with each output
        # dimension in batch
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


model = MultitaskGPModel()
# likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=num_tasks)
likelihood = MultitaskBernoulliLikelihood()

if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

torch.Size([9, 1000, 7])


In [8]:
num_epochs = 1


model.train()
likelihood.train()

optimizer = torch.optim.Adam([
    {'params': model.parameters()},
    {'params': likelihood.parameters()},
], lr=0.1)

# Our loss object. We're using the VariationalELBO, which essentially just computes the ELBO
# print(train_y.size(0))
mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))

losses = []

# We use more CG iterations here because the preconditioner introduced in the NeurIPS paper seems to be less
# effective for VI.
epochs_iter = tqdm.tqdm_notebook(range(num_epochs), desc="Epoch")
for i in epochs_iter:
    # Within each iteration, we will go over each minibatch of data
    minibatch_iter = tqdm.notebook.tqdm(train_loader, desc="Minibatch", leave=False)
    for x_batch, y_batch in minibatch_iter:
        if torch.cuda.is_available():
                x_batch, y_batch = x_batch.cuda(), y_batch.cuda()
        
        optimizer.zero_grad()
        
#         model.float()
#         likelihood.double()
    
#         print(x_batch.dtype)
#         print(y_batch.dtype)
        
        output = model(x_batch)
        loss = -mll(output, y_batch)
        losses.append(loss)
        minibatch_iter.set_postfix(loss=loss.item())
        loss.backward()
        optimizer.step()
        
        predictions = likelihood(output)
    
    
plt.plot(losses)  

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  epochs_iter = tqdm.tqdm_notebook(range(num_epochs), desc="Epoch")


HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value='Minibatch'), FloatProgress(value=0.0, max=676.0), HTML(value='')))




KeyboardInterrupt: 

In [None]:
# Set into eval mode
model.eval()
likelihood.eval()

# Initialize plots
fig, axs = plt.subplots(input_dim, (num_tasks-5), figsize=(3 * input_dim, 4 * (num_tasks -5)))

with torch.no_grad():
    for x_batch, y_batch in train_loader:
        if torch.cuda.is_available():
                x_batch, y_batch = x_batch.cuda(), y_batch.cuda()
                
        predictions = likelihood(model(x_batch))
#         mean = predictions.mean.ge(0.5).float()
        mean = predictions.mean
        
        for xdim in range(7):
            for task in range(num_tasks - 5):
                ax = axs[xdim][task]
                
                
                ax.plot(x_batch[:, xdim].detach().numpy(), mean[:, task].detach().numpy(), '*b')
                ax.plot(x_batch[:, xdim].detach().numpy(), y_batch[:, task].detach().numpy(), 'xr', alpha=0.2)

                # Shade in confidence
        #         ax.fill_between(
        #             test_x[:, xdim].detach().numpy(),
        #             lower[:, task].detach().numpy(),
        #             upper[:, task].detach().numpy(),
        #             alpha=0.5,
        #         )
                ax.set_ylim([-0.1, 1.1])
                ax.legend([ 'Mean', 'Observed Data','Confidence'])
                ax.set_title(f'Task {task + 1}')
        break

fig.tight_layout()
None

In [None]:
model.eval()
likelihood.eval()
maes = []
with torch.no_grad():
    for x_batch, y_batch in train_loader:
        if torch.cuda.is_available():
                x_batch, y_batch = x_batch.cuda(), y_batch.cuda()
        model.double()
        x_batch = x_batch.double()
        predictions = likelihood(model(x_batch))
        preds = predictions.mean.ge(0.5).float()
        
#         print(y_batch[0])
        if torch.rand(1) > 0.95:
#             print(preds[0:5])
            print(predictions.mean[0])
        
        mae = torch.mean(torch.abs(preds - y_batch))
#         print(means.shape, preds.shape,y_batch.shape)
        maes.append(mae.numpy())
        break
    
maes = np.array(maes)
print(maes.mean(), maes.min(), maes.max())

In [None]:
# SVGP https://docs.gpytorch.ai/en/stable/examples/04_Variational_and_Approximate_GPs/SVGP_Regression_CUDA.html
# SVGP CLass https://docs.gpytorch.ai/en/stable/examples/04_Variational_and_Approximate_GPs/Non_Gaussian_Likelihoods.html
# DKL Multiclass https://docs.gpytorch.ai/en/stable/examples/06_PyTorch_NN_Integration_DKL/Deep_Kernel_Learning_DenseNet_CIFAR_Tutorial.html
# Exact Dirichlet https://docs.gpytorch.ai/en/stable/examples/01_Exact_GPs/GP_Regression_on_Classification_Labels.html?highlight=dirichlet

# https://github.com/cornellius-gp/gpytorch/issues/1396

In [None]:
# 0.07692308
# tensor([0.1282, 0.1795, 0.0893, 0.0634, 0.0562, 0.0672, 0.0699, 0.0698, 0.0961,
#         0.0345, 0.0263, 0.0643, 0.0166], dtype=torch.float64)