In [None]:
#| hide
#| default_exp simple_gp_imputation

# Simple GP Imputation

> Imputation using simple Gaussian Processes (1 per variable)

In [None]:
from gpfa_imputation.data_preparation import Normalizer
from gpfa_imputation.learner import NormParam
import gpytorch
import torch
from torch import Tensor
from tqdm.auto import tqdm

## GP

In [None]:
#| export
class SimpleGP(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x, **params):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x, **params)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

## Learner

In [None]:
#| export
class SimpleGPLearner():
    def __init__(self,
                 X: Tensor, # (n_features * n_obs) Multivariate time series
                 T: Tensor = None, # (n_obs) Vector of time of observations.
                 # If none each observation are considered to be at the same distance
                 latent_dims: int = 1 # Number of latent variables in GPFA
                ):
        self.prepare_X(X)
        if T is None: self.default_time(X)
        else: self.T = T
        self.T = self.T.to(X.device) # to support GPUs
        
        self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
        self.model = SimpleGP(self.T, self.X, self.likelihood)
                
    @torch.no_grad()
    def prepare_X(self, X):
        self.norm = Normalizer(X)
        self.X = self.norm.normalize(X)
        
    def default_time(self, X):
        self.T = torch.arange(X.shape[0])
        
    
    def train(self, n_iter=100, lr=0.1):
        # need to enable training mode
        self.model.train()
        self.likelihood.train()
        
        # Use the adam optimizer
        optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) 
        
        self.losses = torch.zeros(n_iter)
        # "Loss" for GPs - the marginal log likelihood
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
        for i in tqdm(range(n_iter)):
            # Zero gradients from previous iteration
            optimizer.zero_grad()
            # Output from model
            output = self.model(self.T)
            # Calc loss and backprop gradients
            loss = -mll(output, self.X)
            self.losses[i] = loss.detach()
            loss.backward()
            self.printer(i)

            optimizer.step()
        
        
    def printer(self, i):
        pass
    
    @torch.no_grad() # don't calc gradients on predictions
    def predict_raw(self, T):
        self.model.eval()
        self.likelihood.eval()
        return self.likelihood(self.model(T))

    @torch.no_grad() # needed because raw output still has gradients attached
    def prediction_from_raw(self, raw_mean, raw_std):
        """ Takes a raw prediction and produces and final prediction, by reshaping and reversing normalization"""
        pred_mean = self.norm.reverse_normalize(raw_mean)
        pred_std = self.norm.reverse_normalize_std(raw_std)

        #remove pytorch gradients
        return NormParam(pred_mean.detach(), pred_std.detach())

    def predict(self, T):
        pred_raw = self.predict_raw(T)
        return self.prediction_from_raw(pred_raw.mean, pred_raw.stddev)

In [None]:
X = torch.tensor([1.,2,3,4])

In [None]:
l = SimpleGPLearner(X)

In [None]:
l.train()

  0%|          | 0/100 [00:00<?, ?it/s]

torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  ../aten/src/ATen/native/BatchLinearAlgebra.cpp:2183.)
  res = torch.triangular_solve(right_tensor, self.evaluate(), upper=self.upper).solution


In [None]:
l.predict(torch.tensor([5]))

NormalParameters(mean=tensor([5.5955]), std=tensor([0.1980]))

Imputation

        self.models = []
        self.likelihoods = []
        
        for i in range(X.shape[1]):
            self.make_var_model(i)
        
    def make_var_model(self, i):
        x = self.X[i]
        lihelihood = gpytorch.likelihoods.GaussianLikelihood()
        model = SimpleGP(self.T, x, likelihood)
        self.models.append(model)
        self.likelihoods.append(likelihood)