In [165]:
import gpytorch
import torch
from gpytorch.kernels import RBFKernel
from torch.distributions import MultivariateNormal
from fastprogress.fastprogress import progress_bar

In [71]:
X = torch.tensor(
    [[1, 3],
     [2, 5],
     [3, 4]], dtype=torch.float32)

In [72]:
X = X.view(-1) 

In [73]:
T = torch.tensor([1,2,3], dtype=torch.float32)

In [74]:
X = X - X.mean() # important the mean is assumed to be zero

In [112]:
class GPFAKernel(gpytorch.kernels.Kernel):
    has_lengthscale = False
    def __init__(self, n_features, latent_kernel, psi = None, **kwargs):
        super(GPFAKernel, self).__init__(**kwargs)
        
        # Number of features in the X for each time step
        self.n_features = n_features
        self.latent_dims = 1 # More than 1 features is not implemented yet
        
        self.register_parameter(
            name = "Lambda",
            parameter = torch.nn.Parameter(torch.ones(self.n_features, self.latent_dims)))
        
        self.latent_kernel = latent_kernel
        
        self.register_parameter(
            name = "raw_psi_diag",
            parameter = torch.nn.Parameter(torch.zeros(self.n_features) if psi is None else psi)) # check that is is actually working as intented
        self.register_constraint("raw_psi_diag", gpytorch.constraints.Positive())
    
    # now set up the 'actual' parameter
    @property
    def psi(self):
        # when accessing the parameter, apply the constraint transform
        return self.raw_psi_diag_constraint.transform(self.raw_psi_diag)

    @psi.setter
    def psi(self, value):
        return self._set_length(value)

    def _set_length(self, value):
        if not torch.is_tensor(value):
            value = torch.as_tensor(value).to(self.raw_psi_diag)
        # when setting the paramater, transform the actual value to a raw one by applying the inverse transform
        self.initialize(raw_length=self.raw_psi_diag_constraint.inverse_transform(value))

    def forward(self, t1, t2, diag = False, last_dim_is_batch=False, **params):

        # not implemented yet
        assert diag is False
        assert last_dim_is_batch is False

        # taken the number of observations in the input
        n_obs = t1.shape[0]

        # compute the latent kernel
        kT = self.latent_kernel(t1, t2, diag, last_dim_is_batch, **params)
        # pre allocate covariance matrix
        X = torch.empty(self.n_features * n_obs, self.n_features * n_obs)
        
        for i in range(n_obs):
            for j in range(n_obs):
                # since `latent_dim=1 kT[i,j]` is a scalar so the matrix multiplication can be expressed in this way
                cov = kT[i,j] * self.Lambda @ self.Lambda.T
                # on diagonals add the noise
                if i == j: cov += torch.diag(self.psi) 
                X[i*self.n_features:(i*self.n_features + self.n_features),j*self.n_features:(j*self.n_features+self.n_features)] = cov
                
        return X
    
    def num_outputs_per_input(self, x1,x2):
        return self.n_features

In [113]:
gp_k = GPFAKernel(n_features=2, latent_kernel=RBFKernel())

In [114]:
gp_k

GPFAKernel(
  (latent_kernel): RBFKernel(
    (raw_lengthscale_constraint): Positive()
  )
  (raw_psi_diag_constraint): Positive()
)

In [115]:
list(gp_k.named_sub_kernels())

[('latent_kernel',
  RBFKernel(
    (raw_lengthscale_constraint): Positive()
  ))]

In [116]:
list(gp_k.named_parameters())

[('Lambda',
  Parameter containing:
  tensor([[1.],
          [1.]], requires_grad=True)),
 ('raw_psi_diag',
  Parameter containing:
  tensor([0., 0.], requires_grad=True)),
 ('latent_kernel.raw_lengthscale',
  Parameter containing:
  tensor([[0.]], requires_grad=True))]

In [117]:
cov = gp_k(T,T).evaluate()

In [118]:
MultivariateNormal(torch.zeros(cov[0].shape), cov)

MultivariateNormal(loc: torch.Size([6]), covariance_matrix: torch.Size([6, 6]))

This is a valid normal distribution!

In [119]:
class GPFAZeroMean(gpytorch.means.Mean):
    """
    Supports no batches
    """
    def __init__(self, n_features):
        super().__init__()
        self.n_features = n_features
    def forward(self, input, *params):
        shape = input.shape[0] * self.n_features
        return torch.zeros(shape)

In [120]:
class GPFA(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPFA, self).__init__(train_x, train_y, likelihood)
        self.mean_module = GPFAZeroMean(2)
        self.covar_module = GPFAKernel(2, RBFKernel())

    def forward(self, x, **params):
        mean_x = self.mean_module(x, **params)
        covar_x = self.covar_module(x, **params)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [121]:

# initialize likelihood and model
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GPFA(T, X, likelihood)

In [122]:
model

GPFA(
  (likelihood): GaussianLikelihood(
    (noise_covar): HomoskedasticNoise(
      (raw_noise_constraint): GreaterThan(1.000E-04)
    )
  )
  (mean_module): GPFAZeroMean()
  (covar_module): GPFAKernel(
    (latent_kernel): RBFKernel(
      (raw_lengthscale_constraint): Positive()
    )
    (raw_psi_diag_constraint): Positive()
  )
)

In [123]:
model(T).covariance_matrix

tensor([[1.6931, 1.0000, 0.3532, 0.3532, 0.0156, 0.0156],
        [1.0000, 1.6931, 0.3532, 0.3532, 0.0156, 0.0156],
        [0.3532, 0.3532, 1.6931, 1.0000, 0.3532, 0.3532],
        [0.3532, 0.3532, 1.0000, 1.6931, 0.3532, 0.3532],
        [0.0156, 0.0156, 0.3532, 0.3532, 1.6931, 1.0000],
        [0.0156, 0.0156, 0.3532, 0.3532, 1.0000, 1.6931]],
       grad_fn=<CopySlices>)

In [124]:
X.shape

torch.Size([6])

In [125]:
model.covar_module.num_outputs_per_input(T, T)

2

In [126]:
MultivariateNormal(torch.zeros(cov[0].shape), cov)

MultivariateNormal(loc: torch.Size([6]), covariance_matrix: torch.Size([6, 6]))

Trying to optimize it

In [127]:
# this is for running the notebook in our testing framework
training_iter = 200

# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
losses = []
for i in range(training_iter):
    # Zero gradients from previous iteration
    optimizer.zero_grad()
    # Output from model
    output = model(T)
    # Calc loss and backprop gradients
    loss = -mll(output, X)
    losses.append(loss.item())
    loss.backward()
    if i % 10 == 0:
        print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f, Lambda: %.3f   noise: %.3f' % (
            i + 1, training_iter, loss.item(),
            model.covar_module.latent_kernel.lengthscale.item(),
            model.covar_module.Lambda.mean().item(),
            model.likelihood.noise.item()
        ))
    optimizer.step()

Iter 1/200 - Loss: 1.800   lengthscale: 0.693, Lambda: 1.000   noise: 0.693
Iter 11/200 - Loss: 1.676   lengthscale: 0.672, Lambda: 0.052   noise: 0.897
Iter 21/200 - Loss: 1.684   lengthscale: 0.465, Lambda: -0.268   noise: 0.795
Iter 31/200 - Loss: 1.675   lengthscale: 0.314, Lambda: 0.014   noise: 0.854
Iter 41/200 - Loss: 1.675   lengthscale: 0.263, Lambda: 0.097   noise: 0.821
Iter 51/200 - Loss: 1.668   lengthscale: 0.247, Lambda: -0.023   noise: 0.811
Iter 61/200 - Loss: 1.633   lengthscale: 0.261, Lambda: -0.015   noise: 0.577
Iter 71/200 - Loss: 1.595   lengthscale: 0.588, Lambda: -0.016   noise: 0.424
Iter 81/200 - Loss: 1.500   lengthscale: 1.675, Lambda: 0.058   noise: 0.391
Iter 91/200 - Loss: 1.490   lengthscale: 2.590, Lambda: 0.121   noise: 0.336
Iter 101/200 - Loss: 1.488   lengthscale: 2.877, Lambda: 0.084   noise: 0.383
Iter 111/200 - Loss: 1.486   lengthscale: 2.980, Lambda: 0.037   noise: 0.472
Iter 121/200 - Loss: 1.485   lengthscale: 3.134, Lambda: 0.040   noise:

In [None]:
#losses

In [129]:
model.covar_module.Lambda

Parameter containing:
tensor([[ 0.9126],
        [-0.8667]], requires_grad=True)

In [130]:
model.covar_module.psi

tensor([0.2549, 0.3439], grad_fn=<SoftplusBackward0>)

In [131]:
X

tensor([-2.,  0., -1.,  2.,  0.,  1.])

# Hainich data

### Load data

Use 1 month of data from the Hainich site and using a subset of variables

Note for simplicity gap filled values are not excluded

In [132]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime, date

In [133]:
hai_path = Path("FLX_DE-Hai_FLUXNET2015_FULLSET_HH_2000-2012_1-4.csv")
hai_raw = pd.read_csv("../MeteoECGapFilling/data" / hai_path,
                      na_values=["-9999", "-9999.99"],
                      parse_dates=[0, 1],
                      nrows=100)

In [134]:
meteo_vars = {
    "TA_F": "TA",
    "SW_IN_F": "SW_IN",
    "LW_IN_F": "LW_IN",
    "VPD_F": "VPD",
    "PA": "PA"
}

hai = (hai_raw[
           hai_raw.TIMESTAMP_START.between(
               datetime(2000, 1, 1),
               datetime(2000, 1, 31)
           )]
       .rename(columns=meteo_vars)
       .set_index("TIMESTAMP_END")
       .loc[:, meteo_vars.values()])

In [135]:
hai["time"] = np.arange(0, len(hai))

In [136]:
hai

Unnamed: 0_level_0,TA,SW_IN,LW_IN,VPD,PA,time
TIMESTAMP_END,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-01 00:30:00,-0.60,0.0,302.475,0.222,96.63,0
2000-01-01 01:00:00,-0.65,0.0,302.475,0.122,96.58,1
2000-01-01 01:30:00,-0.58,0.0,301.677,0.090,96.56,2
2000-01-01 02:00:00,-0.51,0.0,301.677,0.110,96.56,3
2000-01-01 02:30:00,-0.49,0.0,301.677,0.102,96.57,4
...,...,...,...,...,...,...
2000-01-03 00:00:00,0.48,0.0,300.274,0.021,97.09,95
2000-01-03 00:30:00,0.41,0.0,300.274,0.013,97.09,96
2000-01-03 01:00:00,0.29,0.0,300.274,0.004,97.10,97
2000-01-03 01:30:00,0.31,0.0,304.148,0.000,97.07,98


The data need to be reshaped to be a vector, where the oberservations for each time step are next to each other. Or in other words you can find the same values every `n_features` elements in the vector.
We can visually check that this is the case by comparing with the df above

In [137]:
hai_X = torch.tensor(hai.drop("time", axis=1).to_numpy(), dtype=torch.float32)
n_features = hai_X.shape[-1]
hai_X[:10, :].reshape(-1)

tensor([-6.0000e-01,  0.0000e+00,  3.0248e+02,  2.2200e-01,  9.6630e+01,
        -6.5000e-01,  0.0000e+00,  3.0248e+02,  1.2200e-01,  9.6580e+01,
        -5.8000e-01,  0.0000e+00,  3.0168e+02,  9.0000e-02,  9.6560e+01,
        -5.1000e-01,  0.0000e+00,  3.0168e+02,  1.1000e-01,  9.6560e+01,
        -4.9000e-01,  0.0000e+00,  3.0168e+02,  1.0200e-01,  9.6570e+01,
        -4.0000e-01,  0.0000e+00,  3.0168e+02,  1.1100e-01,  9.6600e+01,
        -3.6000e-01,  0.0000e+00,  3.0168e+02,  1.0900e-01,  9.6600e+01,
        -3.5000e-01,  0.0000e+00,  3.0168e+02,  1.0700e-01,  9.6590e+01,
        -2.8000e-01,  0.0000e+00,  3.0805e+02,  1.2200e-01,  9.6600e+01,
        -2.7000e-01,  0.0000e+00,  3.0805e+02,  1.3800e-01,  9.6600e+01])

For time is easier as we need only to extract the vector of the time

In [138]:
hai_mean = hai_X.mean(0)
hai_std = hai_X.std(0)
hai_X = (hai_X - hai_mean)/hai_std
hai_X = hai_X.reshape(-1) #flatten input data

In [139]:
hai_X.mean()

tensor(9.0561e-06)

In [140]:
hai_T = torch.as_tensor(hai.time.to_numpy(), dtype=torch.float32)

In [141]:
hai_T = (hai_T - hai_T.mean())/hai_T.std() # do we really need to do this?
hai_T

tensor([-1.7062, -1.6718, -1.6373, -1.6028, -1.5683, -1.5339, -1.4994, -1.4649,
        -1.4305, -1.3960, -1.3615, -1.3271, -1.2926, -1.2581, -1.2237, -1.1892,
        -1.1547, -1.1202, -1.0858, -1.0513, -1.0168, -0.9824, -0.9479, -0.9134,
        -0.8790, -0.8445, -0.8100, -0.7756, -0.7411, -0.7066, -0.6721, -0.6377,
        -0.6032, -0.5687, -0.5343, -0.4998, -0.4653, -0.4309, -0.3964, -0.3619,
        -0.3275, -0.2930, -0.2585, -0.2240, -0.1896, -0.1551, -0.1206, -0.0862,
        -0.0517, -0.0172,  0.0172,  0.0517,  0.0862,  0.1206,  0.1551,  0.1896,
         0.2240,  0.2585,  0.2930,  0.3275,  0.3619,  0.3964,  0.4309,  0.4653,
         0.4998,  0.5343,  0.5687,  0.6032,  0.6377,  0.6721,  0.7066,  0.7411,
         0.7756,  0.8100,  0.8445,  0.8790,  0.9134,  0.9479,  0.9824,  1.0168,
         1.0513,  1.0858,  1.1202,  1.1547,  1.1892,  1.2237,  1.2581,  1.2926,
         1.3271,  1.3615,  1.3960,  1.4305,  1.4649,  1.4994,  1.5339,  1.5683,
         1.6028,  1.6373,  1.6718,  1.70

In [142]:
class GPFA(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, n_features, latent_kernel=RBFKernel()):
        super(GPFA, self).__init__(train_x, train_y, likelihood)
        self.mean_module = GPFAZeroMean(n_features)
        self.covar_module = GPFAKernel(n_features, latent_kernel)

    def forward(self, x, **params):
        mean_x = self.mean_module(x, **params)
        covar_x = self.covar_module(x, **params)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [160]:
# initialize likelihood and model
hai_lh = gpytorch.likelihoods.GaussianLikelihood()
hai_model = GPFA(hai_T, hai_X, hai_lh, n_features)

In [161]:
hai_model(hai_T)

MultivariateNormal(loc: torch.Size([500]))

In [162]:
#hai_T = hai_T.cuda()
#hai_X = hai_X.cuda()
#hai_model = hai_model.cuda()
#hai_lh = hai_lh.cuda()

In [167]:
# TODO convert this to a function
# this is for running the notebook in our testing framework
training_iter = 100

# Find optimal model hyperparameters
hai_model.train()
hai_lh.train()

# Use the adam optimizer
optimizer = torch.optim.Adam(hai_model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters
losses = torch.empty(training_iter)
# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(hai_lh, hai_model)

for i in progress_bar(range(training_iter)):
    # Zero gradients from previous iteration
    optimizer.zero_grad()
    # Output from model
    output = hai_model(hai_T)
    # Calc loss and backprop gradients
    loss = -mll(output, hai_X)
    losses[i] = loss.detach()
    loss.backward()
    print('Iter %02d/%d - Loss: %.3f   lengthscale: %.3f, Lambda: %.3f   noise: %.3f' % (
        i + 1, training_iter, loss.item(),
        hai_model.covar_module.latent_kernel.lengthscale.item(),
        hai_model.covar_module.Lambda.mean().item(),
        hai_model.likelihood.noise.item()
    ))
    optimizer.step()

Iter 01/100 - Loss: 1.424   lengthscale: 0.760, Lambda: 1.000   noise: 0.693
Iter 02/100 - Loss: 1.402   lengthscale: 0.814, Lambda: 1.020   noise: 0.644
Iter 03/100 - Loss: 1.381   lengthscale: 0.860, Lambda: 1.040   noise: 0.598
Iter 04/100 - Loss: 1.360   lengthscale: 0.912, Lambda: 1.057   noise: 0.555
Iter 05/100 - Loss: 1.341   lengthscale: 0.964, Lambda: 1.068   noise: 0.513
Iter 06/100 - Loss: 1.324   lengthscale: 0.994, Lambda: 1.072   noise: 0.475
Iter 07/100 - Loss: 1.309   lengthscale: 0.984, Lambda: 1.075   noise: 0.439
Iter 08/100 - Loss: 1.295   lengthscale: 0.953, Lambda: 1.078   noise: 0.405
Iter 09/100 - Loss: 1.282   lengthscale: 0.913, Lambda: 1.080   noise: 0.375
Iter 10/100 - Loss: 1.271   lengthscale: 0.869, Lambda: 1.080   noise: 0.347
Iter 11/100 - Loss: 1.260   lengthscale: 0.823, Lambda: 1.081   noise: 0.322
Iter 12/100 - Loss: 1.250   lengthscale: 0.776, Lambda: 1.083   noise: 0.300
Iter 13/100 - Loss: 1.239   lengthscale: 0.729, Lambda: 1.083   noise: 0.281

In [168]:
hai_model.covar_module.Lambda

Parameter containing:
tensor([[ 0.4329],
        [ 0.5256],
        [-1.4773],
        [ 0.7756],
        [ 1.6950]], requires_grad=True)