# Multiple Latent

Trying to use more than 1 latent variable

In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [None]:
from gpfa_imputation.imputation import *
from gpfa_imputation.data_preparation import *
from gpfa_imputation.results import *

import torch
import pandas as pd
import numpy as np
from pathlib import Path
from pyprojroot import here
import matplotlib.pyplot as plt

from gpfa_imputation.utils import cache_disk

In [None]:
def reset_seed():
    torch.manual_seed(27);
    np.random.seed(27);

In [None]:
torch.manual_seed(27);
np.random.seed(27);
cache_path = here() / ".cache"

## Load Data

take the first 200 rows from the Hainich dataset

In [None]:
hai_path = Path("FLX_DE-Hai_FLUXNET2015_FULLSET_HH_2000-2012_1-4.csv")
hai_raw = pd.read_csv(here("data") / hai_path, na_values=["-9999", "-9999.99"], parse_dates=[0, 1], nrows=200)

In [None]:
meteo_vars = {
    "TA_F": "TA",
    "SW_IN_F": "SW_IN",
    "LW_IN_F": "LW_IN",
    "VPD_F": "VPD",
    #"PA": "PA"
}

units = {
    'TA': '°C',
    'SW_IN': 'W m-2',
    'LW_IN': 'W m-2',
    'VPD': 'hPa'
}

hai = (hai_raw
       .rename(columns=meteo_vars)
       .set_index("TIMESTAMP_END")
       .loc[:, meteo_vars.values()])
hai

Unnamed: 0_level_0,TA,SW_IN,LW_IN,VPD
TIMESTAMP_END,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-01 00:30:00,-0.60,0.0,302.475,0.222
2000-01-01 01:00:00,-0.65,0.0,302.475,0.122
2000-01-01 01:30:00,-0.58,0.0,301.677,0.090
2000-01-01 02:00:00,-0.51,0.0,301.677,0.110
2000-01-01 02:30:00,-0.49,0.0,301.677,0.102
...,...,...,...,...
2000-01-05 02:00:00,4.74,0.0,330.202,1.191
2000-01-05 02:30:00,4.75,0.0,330.202,1.057
2000-01-05 03:00:00,4.76,0.0,330.202,0.935
2000-01-05 03:30:00,4.62,0.0,330.202,1.162


###  Computation

makes here all the slow computations and cache them on disk

In [None]:
reset_seed()
data_r_gaps = GPFADataTest(hai[:150]).add_random_missing()
data_c_gaps = GPFADataTest(hai[:150]).add_gap(15, meteo_vars.values())

In [None]:
cache_file_gaps = cache_path / "hai_diff_latents.pickle"
# cache_file_gaps.unlink() # uncomment this line to reset the cache

In [None]:
@cache_disk(cache_file_gaps)
def compute_multiple_latent():
    hai_r_gaps = [GPFAImputation(
        data_r_gaps.data, latent_dims=i)
                  .fit()
                  .to_result(data_r_gaps.data_compl_tidy, units=units)
                  for i in range(1,4)]
    hai_c_gaps = [GPFAImputationExplorer(
        data_c_gaps.data, latent_dims=i)
                  .fit()
                  .to_result(data_c_gaps.data_compl_tidy, units=units)
                  for i in range(1,4)]
    return hai_r_gaps, hai_c_gaps

In [None]:
hai_r_gaps, hai_c_gaps = compute_multiple_latent()

  0%|          | 0/100 [00:00<?, ?it/s]

torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  ../aten/src/ATen/native/BatchLinearAlgebra.cpp:2183.)
  res = torch.triangular_solve(right_tensor, self.evaluate(), upper=self.upper).solution
 does not have profile information (Triggered internally at  ../torch/csrc/jit/codegen/cuda/graph_fuser.cpp:104.)
  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

## Random Gaps

In [None]:
hai_r_gaps

In [None]:
hai_r_gaps[0].display_results()

In [None]:
hai_r_gaps[0].plot_pred()

In [None]:
hai_r_gaps[1].display_results()

In [None]:
hai_r_gaps[2].display_results()

## Continous gap

In [None]:
hai_c_gaps[0].display_results()

In [None]:
hai_c_gaps[1].display_results()

In [None]:
hai_c_gaps[2].display_results()