## try building a gpytorch model

In [1]:
import gpytorch
import torch
from torch.optim import SGD, Adam
from torch.optim.lr_scheduler import MultiStepLR
from torch import nn

from src.models.base_cnn import BaseCNN
from src.data.data_loader import get_splits, create_dataloaders

import math

import tqdm

Get some data

In [2]:
X_train, y_train, X_test, y_test = get_splits()

Train X dimensions: (238624, 101, 4) Test X dimensions: (26513, 101, 4)


In [31]:
test = torch.from_numpy(X_train)
torch.equal(test.reshape(-1, 404).reshape(-1, 101, 4), test)

True

In [3]:
start_size = 1000

device = torch.device('cuda')
train_loader, test_loader, dim_size = create_dataloaders(X_train=X_train.reshape((-1,))[:start_size], y_train=y_train[:start_size], X_test=X_test, y_test=y_test, device=device, batch_size=1, test_batch_size=1)

define a model

In [45]:
class BaseCNN(nn.Module):
    def __init__(self, seq_len: int = 101, 
                       dropout_prob: float = 0.15,
                       MLP_out_dim: int = 50) -> None:
        super().__init__()

        # configs
        self.dropout_prob = dropout_prob
        self.seq_len = seq_len

        # layers
        self.conv1 = nn.Conv1d(in_channels=4, out_channels=self.seq_len, kernel_size=4)
        self.pool1 = nn.MaxPool1d(kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=self.seq_len, out_channels=self.seq_len//2, kernel_size=4)
        self.pool2 = nn.MaxPool1d(kernel_size=3)
        self.dense = nn.Linear(in_features=450, out_features=MLP_out_dim)
        self.output = nn.Linear(in_features=MLP_out_dim, out_features=2)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.reshape(-1, 101, 4)
        x = x.transpose(1, 2)
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = self.pool2(x)
        x = nn.functional.dropout(x, p=self.dropout_prob)        

        x = x.reshape((x.size(0), -1))
        x = self.dense(x)
        x = nn.functional.relu(x)
        x = nn.functional.dropout(x, p=self.dropout_prob)        

        x = self.output(x)

        return x

In [33]:
class GPRegressionModel(gpytorch.models.ExactGP):
        def __init__(self, train_x, train_y, likelihood):
            super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
            self.mean_module = gpytorch.means.ConstantMean()
            self.covar_module = gpytorch.kernels.GridInterpolationKernel(
                gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=2)),
                num_dims=2, grid_size=100
            )
            self.feature_extractor = BaseCNN()

            # This module will scale the NN features so that they're nice values
            self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1., 1.)

        def forward(self, x):
            # We're first putting our data through a deep net (feature extractor)
            projected_x = self.feature_extractor(x)
            projected_x = self.scale_to_bounds(projected_x)  # Make the NN values "nice"

            mean_x = self.mean_module(projected_x)
            covar_x = self.covar_module(projected_x)
            mvn = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

            return mvn

In [34]:
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GPRegressionModel(torch.from_numpy(X_train[:start_size]).reshape(-1, 404).float().to(device), torch.from_numpy(y_train[:start_size]).float().to(device), likelihood)

if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

In [46]:
training_iterations = 100

# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = Adam([
    {'params': model.feature_extractor.parameters()},
    {'params': model.covar_module.parameters()},
    {'params': model.mean_module.parameters()},
    {'params': model.likelihood.parameters()},
], lr=0.001)

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

def train():
    iterator = tqdm.tqdm(range(training_iterations))
    for i in iterator:
        # Zero backprop gradients
        optimizer.zero_grad()
        # Get output from model
        output = model(torch.from_numpy(X_train[:start_size]).reshape(-1, 404).float().to(device))
        # Calc loss and backprop derivatives
        loss = -mll(output, torch.from_numpy(y_train[:start_size]).float().to(device))
        loss.backward()
        iterator.set_postfix(loss=loss.item())
        optimizer.step()

In [47]:
train()

  2%|▏         | 2/100 [00:00<00:08, 11.42it/s, loss=0.846]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


  4%|▍         | 4/100 [00:00<00:08, 11.89it/s, loss=0.84] 

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


  8%|▊         | 8/100 [00:00<00:07, 12.60it/s, loss=0.845]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 10%|█         | 10/100 [00:00<00:07, 12.72it/s, loss=0.844]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 14%|█▍        | 14/100 [00:01<00:06, 12.94it/s, loss=0.844]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 16%|█▌        | 16/100 [00:01<00:06, 12.93it/s, loss=0.842]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 20%|██        | 20/100 [00:01<00:06, 13.01it/s, loss=0.844]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 22%|██▏       | 22/100 [00:01<00:06, 12.98it/s, loss=0.837]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 26%|██▌       | 26/100 [00:02<00:05, 12.92it/s, loss=0.837]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 28%|██▊       | 28/100 [00:02<00:05, 12.99it/s, loss=0.838]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 32%|███▏      | 32/100 [00:02<00:05, 13.09it/s, loss=0.836]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 34%|███▍      | 34/100 [00:02<00:05, 13.09it/s, loss=0.836]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 38%|███▊      | 38/100 [00:02<00:04, 12.99it/s, loss=0.834]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 40%|████      | 40/100 [00:03<00:04, 12.93it/s, loss=0.831]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 44%|████▍     | 44/100 [00:03<00:04, 13.08it/s, loss=0.831]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 46%|████▌     | 46/100 [00:03<00:04, 13.09it/s, loss=0.833]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 50%|█████     | 50/100 [00:03<00:03, 13.08it/s, loss=0.83] 

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 52%|█████▏    | 52/100 [00:04<00:03, 12.99it/s, loss=0.829]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 56%|█████▌    | 56/100 [00:04<00:03, 13.05it/s, loss=0.827]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 58%|█████▊    | 58/100 [00:04<00:03, 13.03it/s, loss=0.826]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 62%|██████▏   | 62/100 [00:04<00:02, 12.82it/s, loss=0.827]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 64%|██████▍   | 64/100 [00:05<00:02, 12.85it/s, loss=0.825]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])
input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])


 66%|██████▌   | 66/100 [00:05<00:02, 12.72it/s, loss=0.825]

input shape: torch.Size([1000, 404])
shape before densetorch.Size([1000, 450])





KeyboardInterrupt: 

In [44]:
model.eval()
likelihood.eval()
with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
    preds = model(torch.from_numpy(X_test).reshape(-1, 404).float().to(device))

print('Test MSE: {}'.format(torch.mean((preds.mean.cpu() - y_test) ** 2)))

input shape: torch.Size([27513, 404])
shape before densetorch.Size([27513, 450])
Test MSE: 0.14424868101405325
