# Additional latent kernel

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from meteo_imp.gpfa.imputation import *
from meteo_imp.gpfa.data_preparation import *
from meteo_imp.gpfa.results import *
from meteo_imp.gpfa.gpfa import *
from meteo_imp.gpfa.results import _display_as_row

import torch
import pandas as pd
import numpy as np
from pathlib import Path
from pyprojroot import here
import matplotlib.pyplot as plt

from meteo_imp.utils import *
from gpytorch.kernels import *
import gpytorch

cp = here() / ".cache" / "add_kernel_gpu.pickle"

## Setup

### Load Data

take the first 200 rows from the Hainich dataset

In [None]:
hai_path = Path("FLX_DE-Hai_FLUXNET2015_FULLSET_HH_2000-2012_1-4.csv")
hai_raw = pd.read_csv(here("data") / hai_path, na_values=["-9999", "-9999.99"], parse_dates=[0, 1], nrows=1000)

In [None]:
meteo_vars = {
    "TA_F": "TA",
    "SW_IN_F": "SW_IN",
    # "LW_IN_F": "LW_IN",
    "VPD_F": "VPD",
    #"PA": "PA"
}

units = {
    'TA': '°C',
    'SW_IN': 'W m-2',
    'LW_IN': 'W m-2',
    'VPD': 'hPa'
}

hai = (hai_raw
       .rename(columns=meteo_vars)
       .set_index("TIMESTAMP_END")
       .loc[:, meteo_vars.values()])
hai

Unnamed: 0_level_0,TA,SW_IN,VPD
TIMESTAMP_END,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-01 00:30:00,-0.60,0.0,0.222
2000-01-01 01:00:00,-0.65,0.0,0.122
2000-01-01 01:30:00,-0.58,0.0,0.090
2000-01-01 02:00:00,-0.51,0.0,0.110
2000-01-01 02:30:00,-0.49,0.0,0.102
...,...,...,...
2000-01-21 18:00:00,-0.81,0.0,1.892
2000-01-21 18:30:00,-0.81,0.0,1.744
2000-01-21 19:00:00,-0.90,0.0,2.118
2000-01-21 19:30:00,-1.09,0.0,2.528


### Custom kernel

In [None]:
def _get_lengthscale_info(kernel: RBFKernel, suffix=''):
        ls = kernel.lengthscale.detach().item()
        return pd.DataFrame({
            'lengthscale'+suffix: [ls]
        }) 

In [None]:
def _get_outscale_info(kernel: ScaleKernel, suffix=''):
        ls = kernel.outputscale.detach().item()
        return pd.DataFrame({
            'outscale'+suffix: [ls]
        }) 

In [None]:
class GPFAMultiRbf(GPFA):
    latent_kernel = lambda x: AdditiveKernel(RBFKernel(), ScaleKernel(RBFKernel()))
    
    
    def get_info(self: GPFA,
             var_names = None # Optional variable names for better printing
            ) -> dict[str, pd.DataFrame]:
        "Model info for a GPFA with a RBFKernel"
        out = {}

        latent_names = [f"z{i}" for i in range(self.covar_module.latent_dims)]

        out["Lambda"] = pd.concat([
            None if var_names is None else pd.Series(var_names),
            pd.DataFrame(
                self.covar_module.Lambda.detach().cpu().numpy(),
                columns=latent_names)],
            axis=1)

        ls_all = []
        l_kernels = self.covar_module.latent_kernels
        for kernel in list(l_kernels):
            ls_s = [_get_lengthscale_info(kernel.kernels[0], "_k0")]
            ls_s.append(_get_lengthscale_info(kernel.kernels[1].base_kernel, "_k1")) # this is a scale kernel
            ls_all.append(pd.concat(ls_s, axis=1)) # attach multiple columns

        ls_all = pd.concat(ls_all)
        ls_all.insert(0, 'latent', latent_names)
        out["Lengthscale"] = ls_all
        
        os_all = []
        l_kernels = self.covar_module.latent_kernels
        for kernel in list(l_kernels):
            os_s = [pd.DataFrame({'outscale_k0': [1] })] # there is no scaling here
            os_s.append(_get_outscale_info(kernel.kernels[1], "_k1"))
            os_all.append(pd.concat(os_s, axis=1)) # attach multiple columns

        os_all = pd.concat(os_all)
        os_all.insert(0, 'latent', latent_names)
        out["Outscale"] = os_all

        psi = self.covar_module.psi.detach().cpu().numpy()
        out["Psi"] = pd.DataFrame({
            'variable': var_names,
            'psi': psi 
        })

        out["Likelihood"] = pd.DataFrame({'noise': [self.likelihood.noise_covar.noise.item()]})

        return out

In [None]:
k = GPFAMultiRbf(torch.tensor([1,2,3]), torch.tensor([1,2,3]), gpytorch.likelihoods.GaussianLikelihood(), 2)

In [None]:
k

GPFAMultiRbf(
  (likelihood): GaussianLikelihood(
    (noise_covar): HomoskedasticNoise(
      (raw_noise_constraint): GreaterThan(1.000E-04)
    )
  )
  (mean_module): GPFAZeroMean()
  (covar_module): GPFAKernel(
    (latent_kernels): ModuleList(
      (0): AdditiveKernel(
        (kernels): ModuleList(
          (0): RBFKernel(
            (raw_lengthscale_constraint): Positive()
          )
          (1): ScaleKernel(
            (base_kernel): RBFKernel(
              (raw_lengthscale_constraint): Positive()
            )
            (raw_outputscale_constraint): Positive()
          )
        )
      )
    )
    (raw_psi_diag_constraint): Positive()
  )
)

In [None]:
_display_as_row(k.get_info())

z0
0.199
0.826

latent,lengthscale_k0,lengthscale_k1
z0,0.6931,0.6931

latent,outscale_k0,outscale_k1
z0,1,0.6931

variable,psi
,0.6931
,0.6931

noise
0.6932


In [None]:
reset_seed()
data = GPFADataTest(hai[:150]).add_random_missing()

### Compute 

In [None]:
data = GPFADataTest(hai[:500]).add_random_missing()
imp = GPFAImputationExplorer(data.data, latent_dims=2, model=GPFAMultiRbf, cuda=True)

In [None]:
%time imp.fit(1)

  0%|          | 0/1 [00:00<?, ?it/s]

CPU times: user 1min 1s, sys: 1.4 s, total: 1min 2s
Wall time: 1min 3s


GPFA Imputation Explorer:
    N obs: 500
    N features 3 (TA, SW_IN, VPD)
    N missing observations 457
    N latent: 2

In [None]:
data = GPFADataTest(hai[:1000]).add_random_missing()
imp = GPFAImputationExplorer(data.data, latent_dims=2, model=GPFAMultiRbf, cuda=True)

In [None]:
%time imp.fit(1)

  0%|          | 0/1 [00:00<?, ?it/s]

CPU times: user 5min 1s, sys: 4.26 s, total: 5min 5s
Wall time: 5min 9s


GPFA Imputation Explorer:
    N obs: 1000
    N features 3 (TA, SW_IN, VPD)
    N missing observations 865
    N latent: 2

In [None]:
@cache_disk(cp)
def compute_large():
    reset_seed()
    data = GPFADataTest(hai[:500]).add_random_missing()
    imp = GPFAImputationExplorer(data.data, latent_dims=2, model=GPFAMultiRbf, cuda=True)
    return imp.fit(), data

## Small

In [None]:
imp = GPFAImputation(data.data, latent_dims=2, model=GPFAMultiRbf)

In [None]:
imp.fit()

In [None]:
imp.to_result(data.data_compl_tidy).display_results(plot_args={'bind_interaction': False})