In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [None]:
from gpfa_imputation.imputation import *
from gpfa_imputation.data_preparation import *

import torch
import pandas as pd
import numpy as np
from pathlib import Path
from pyprojroot import here
import matplotlib.pyplot as plt

from gpfa_imputation.utils import cache_disk

In [None]:
def reset_seed():
    torch.manual_seed(27);
    np.random.seed(27);

In [None]:
torch.manual_seed(27);
np.random.seed(27);
cache = True
cache_path = here() / ".cache"

## Load Data

take the first 200 rows from the Hainich dataset

In [None]:
hai_path = Path("FLX_DE-Hai_FLUXNET2015_FULLSET_HH_2000-2012_1-4.csv")
hai_raw = pd.read_csv(here("data") / hai_path, na_values=["-9999", "-9999.99"], parse_dates=[0, 1], nrows=200)

In [None]:
meteo_vars = {
    "TA_F": "TA",
    "SW_IN_F": "SW_IN",
    "LW_IN_F": "LW_IN",
    "VPD_F": "VPD",
    #"PA": "PA"
}

units = {
    'TA': '°C',
    'SW_IN': 'W m-2',
    'LW_IN': 'W m-2',
    'VPD': 'hPa'
}

hai = (hai_raw
       .rename(columns=meteo_vars)
       .set_index("TIMESTAMP_END")
       .loc[:, meteo_vars.values()])
hai

Unnamed: 0_level_0,TA,SW_IN,LW_IN,VPD
TIMESTAMP_END,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-01 00:30:00,-0.60,0.0,302.475,0.222
2000-01-01 01:00:00,-0.65,0.0,302.475,0.122
2000-01-01 01:30:00,-0.58,0.0,301.677,0.090
2000-01-01 02:00:00,-0.51,0.0,301.677,0.110
2000-01-01 02:30:00,-0.49,0.0,301.677,0.102
...,...,...,...,...
2000-01-05 02:00:00,4.74,0.0,330.202,1.191
2000-01-05 02:30:00,4.75,0.0,330.202,1.057
2000-01-05 03:00:00,4.76,0.0,330.202,0.935
2000-01-05 03:30:00,4.62,0.0,330.202,1.162


In [None]:
reset_seed()
data = GPFADataTest(hai[:150]).add_random_missing()

In [None]:
imp = GPFAImputationExplorer(data.data, latent_dims=2)

In [None]:
imp

GPFA Imputation:
    N obs: 150
    N features 4 (TA, SW_IN, LW_IN, VPD)
    N missing observations 168
    N latent: 2

In [None]:
# imp.learner.model.covar_module.Lambda = torch.nn.Parameter(torch.ones(4,2))

# imp.fit_predict()

# res = imp.to_result(data.data_compl_tidy, units=units)
# res.display_results()

###  Compute

In [None]:
cache = here() / ".cache" / "hai_test_init_values.pickle"

In [None]:
@cache_disk(cache)
def compute():
    out = {}
    
    imp = GPFAImputationExplorer(data.data, latent_dims=2)
    out["normal"] = imp.fit().to_result(data.data_compl_tidy, units=units)
    
    imp = GPFAImputationExplorer(data.data, latent_dims=2)
    imp.learner.model.covar_module.Lambda = torch.nn.Parameter(torch.ones(4,2))
    out["ones"] = imp.fit().to_result(data.data_compl_tidy, units=units)
    
    imp = GPFAImputationExplorer(data.data, latent_dims=2)
    imp.learner.model.covar_module.Lambda = torch.nn.Parameter(torch.zeros(4,2))
    out["zeros"] = imp.fit().to_result(data.data_compl_tidy, units=units)
    
    imp = GPFAImputationExplorer(data.data, latent_dims=2)
    imp.learner.model.covar_module.Lambda = torch.nn.Parameter(torch.rand(4,2))
    out["rand1"] = imp.fit().to_result(data.data_compl_tidy, units=units)
    
    imp = GPFAImputationExplorer(data.data, latent_dims=2)
    imp.learner.model.covar_module.Lambda = torch.nn.Parameter(torch.rand(4,2))
    out["rand2"] = imp.fit().to_result(data.data_compl_tidy, units=units)
    
    imp = GPFAImputationExplorer(data.data, latent_dims=2)
    imp.learner.model.covar_module.Lambda = torch.nn.Parameter(torch.rand(4,2))
    out["rand3"] = imp.fit().to_result(data.data_compl_tidy, units=units)
    
    return out

In [None]:
results = compute()

In [None]:
results["normal"].plot_pred()

  for col_name, dtype in df.dtypes.iteritems():


In [None]:
results["normal"].display_results()

  for col_name, dtype in df.dtypes.iteritems():


variable,r2
TA,0.9818
SW_IN,0.1506
LW_IN,0.1046
VPD,0.944

variable,z0,z1
time,0.9164,0.1246
variable,0.1252,0.4658
mean,0.0715,-0.3495
std,0.6051,0.8228

latent,lengthscale
z0,6.7173
z1,4.7379


In [None]:
results["ones"].display_results()

  for col_name, dtype in df.dtypes.iteritems():


variable,r2
TA,0.6805
SW_IN,0.0751
LW_IN,-0.0431
VPD,0.9393

variable,z0,z1
time,0.5621,0.5621
variable,0.2967,0.2967
mean,-0.1298,-0.1298
std,0.742,0.742

latent,lengthscale
z0,5.5764
z1,5.5764


In [None]:
results["zeros"].display_results()

  for col_name, dtype in df.dtypes.iteritems():


variable,r2
TA,-0.1121
SW_IN,-0.0709
LW_IN,-0.0378
VPD,-0.1479

variable,z0,z1
time,0.0,0.0
variable,0.0,0.0
mean,0.0,0.0
std,0.0,0.0

latent,lengthscale
z0,0.6931
z1,0.6931


In [None]:
results["rand1"].display_results()

  for col_name, dtype in df.dtypes.iteritems():


variable,r2
TA,0.9821
SW_IN,0.1829
LW_IN,0.9475
VPD,0.5743

variable,z0,z1
time,0.1689,0.9448
variable,-0.2678,0.2128
mean,0.7869,-0.0733
std,-0.0433,0.7077

latent,lengthscale
z0,7.8515
z1,6.8527


In [None]:
results["rand2"].display_results()

  for col_name, dtype in df.dtypes.iteritems():


variable,r2
TA,0.9821
SW_IN,0.1838
LW_IN,0.9475
VPD,0.5743

variable,z0,z1
time,0.9444,-0.2064
variable,0.223,0.2587
mean,-0.0948,-0.7806
std,0.7125,0.0134

latent,lengthscale
z0,6.8546
z1,7.7147


In [None]:
results["rand3"].display_results()

  for col_name, dtype in df.dtypes.iteritems():


variable,r2
TA,0.9821
SW_IN,0.1835
LW_IN,0.9475
VPD,0.5742

variable,z0,z1
time,0.9374,0.1708
variable,0.2093,-0.269
mean,-0.0658,0.7892
std,0.7009,-0.0414

latent,lengthscale
z0,6.8167
z1,7.8779
