In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
all_clusters = np.loadtxt('all_clusters.txt')
all_labs = np.loadtxt('all_labs.txt')
all_labs_cp = np.loadtxt('all_labs_cleaned.txt')
print(all_clusters.shape, all_labs.shape, all_labs_cp.shape)

In [None]:
import tqdm
import math
import torch
import gpytorch
from matplotlib import pyplot as plt

# Make plots inline
%matplotlib inline

In [None]:
train_x = all_clusters[all_labs_cp!=-1.]
train_y = all_labs_cp[all_labs_cp!=-1.]
train_x = torch.tensor(train_x)
train_y = torch.tensor(train_y)
print(train_x.shape, train_y.shape)
print(train_x[:-10], train_y[:-10])

In [None]:
from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(train_x, train_y)
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)

# test_dataset = TensorDataset(test_x, test_y)
# test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False)

In [None]:
from gpytorch.models import ApproximateGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy


class GPModel(ApproximateGP):
    def __init__(self, inducing_points):
        variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0))
        
        variational_strategy = VariationalStrategy(
            self,
            inducing_points,
            variational_distribution,
            learn_inducing_locations=True,
        )
        super(GPModel, self).__init__(variational_strategy)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.MaternKernel()
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

inducing_points = train_x[::1000, :]
print(inducing_points.shape)
model = GPModel(inducing_points=inducing_points)
likelihood = gpytorch.likelihoods.GaussianLikelihood()
if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

In [None]:
num_epochs = 20

model.train()
likelihood.train()

optimizer = torch.optim.Adam(
    [{'params': model.parameters()},
    {'params': likelihood.parameters()},],
    lr=0.1,
)

# Our loss object. We're using the VariationalELBO
mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))

losses = []
epochs_iter = tqdm.notebook.tqdm(range(num_epochs), desc="Epoch")
for i in epochs_iter:
    # Within each iteration, we will go over each minibatch of data
    minibatch_iter = tqdm.notebook.tqdm(train_loader, desc="Minibatch", leave=False)
    for x_batch, y_batch in minibatch_iter:
        if torch.cuda.is_available():
                x_batch, y_batch = x_batch.cuda(), y_batch.cuda()
        optimizer.zero_grad()
        model.double()
        x_batch = x_batch.double()
        y_batch = y_batch.double()
        output = model(x_batch)
        likelihood.double()
        loss = -mll(output, y_batch)
        losses.append(loss) 
        minibatch_iter.set_postfix(loss=loss.item())
        loss.backward()
        optimizer.step()
        
plt.plot(losses)
plt.show()

In [None]:
plt.plot(losses)

In [None]:
model.eval()
likelihood.eval()
means = torch.tensor([0.])
with torch.no_grad():
    for x_batch, y_batch in train_loader:
        if torch.cuda.is_available():
                x_batch, y_batch = x_batch.cuda(), y_batch.cuda()
        model.double()
        x_batch = x_batch.double()
        preds = model(x_batch)
        mean = torch.round(model(x_batch).mean)
        means = torch.cat([means, mean.cpu()])
means = means[1:]
print('Test MAE: {}'.format(torch.mean(torch.abs(means - train_y.cpu()))))

In [None]:
# SVGP https://docs.gpytorch.ai/en/stable/examples/04_Variational_and_Approximate_GPs/SVGP_Regression_CUDA.html
# SVGP CLass https://docs.gpytorch.ai/en/stable/examples/04_Variational_and_Approximate_GPs/Non_Gaussian_Likelihoods.html
# DKL Multiclass https://docs.gpytorch.ai/en/stable/examples/06_PyTorch_NN_Integration_DKL/Deep_Kernel_Learning_DenseNet_CIFAR_Tutorial.html
# Exact Dirichlet https://docs.gpytorch.ai/en/stable/examples/01_Exact_GPs/GP_Regression_on_Classification_Labels.html?highlight=dirichlet

# https://github.com/cornellius-gp/gpytorch/issues/1396

In [None]:
# Set into eval mode
model.eval()
likelihood.eval()

# Initialize plots
fig, axs = plt.subplots(7, 1, figsize=(4, 3 * 7))
    

with torch.no_grad():
    for x_batch, y_batch in train_loader:
        if torch.cuda.is_available():
                x_batch, y_batch = x_batch.cuda(), y_batch.cuda()
                
        mean = torch.round(model(x_batch).mean)
        
        for xdim in range(7):
            task = 1
            ax = axs[xdim]
            
            ax.plot(x_batch[:, xdim].detach().numpy(), mean.detach().numpy(), '*b')
            ax.plot(x_batch[:, xdim].detach().numpy(), y_batch.detach().numpy(), 'xr', alpha=0.5)
            ax.legend([ 'Mean', 'Observed Data','Confidence'])
            ax.set_title(f'Task {task + 1}')
        break

fig.tight_layout()
None