# Simple GP Hainich

imputation using simple GP

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from gpfa_imputation.imputation import *
from gpfa_imputation.data_preparation import *
from gpfa_imputation.simple_gp_imputation import *

import torch
import pandas as pd
import numpy as np
from pathlib import Path
from pyprojroot import here
import matplotlib.pyplot as plt

from gpfa_imputation.utils import *

In [None]:
cache_path = here() / ".cache"

## Load Data

take the first 200 rows from the Hainich dataset

In [None]:
hai_path = Path("FLX_DE-Hai_FLUXNET2015_FULLSET_HH_2000-2012_1-4.csv")
hai_raw = pd.read_csv(here("data") / hai_path, na_values=["-9999", "-9999.99"], parse_dates=[0, 1], nrows=200)

In [None]:
meteo_vars = {
    "TA_F": "TA",
    "SW_IN_F": "SW_IN",
    "LW_IN_F": "LW_IN",
    "VPD_F": "VPD",
    #"PA": "PA"
}

units = {
    'TA': '°C',
    'SW_IN': 'W m-2',
    'LW_IN': 'W m-2',
    'VPD': 'hPa'
}

hai = (hai_raw
       .rename(columns=meteo_vars)
       .set_index("TIMESTAMP_END")
       .loc[:, meteo_vars.values()])
hai

Unnamed: 0_level_0,TA,SW_IN,LW_IN,VPD
TIMESTAMP_END,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-01 00:30:00,-0.60,0.0,302.475,0.222
2000-01-01 01:00:00,-0.65,0.0,302.475,0.122
2000-01-01 01:30:00,-0.58,0.0,301.677,0.090
2000-01-01 02:00:00,-0.51,0.0,301.677,0.110
2000-01-01 02:30:00,-0.49,0.0,301.677,0.102
...,...,...,...,...
2000-01-05 02:00:00,4.74,0.0,330.202,1.191
2000-01-05 02:30:00,4.75,0.0,330.202,1.057
2000-01-05 03:00:00,4.76,0.0,330.202,0.935
2000-01-05 03:30:00,4.62,0.0,330.202,1.162


## First Test

In [None]:
reset_seed()
data_r_gaps = GPFADataTest(hai[:150]).add_random_missing()
data_c_gaps = GPFADataTest(hai[:150]).add_gap(15, meteo_vars.values())

### Random gaps

In [None]:
res_r_gaps = SimpleGPImputationExplorer(data_r_gaps.data).fit().to_result(data_r_gaps.data_compl_tidy, units=units)

  0%|          | 0/100 [00:00<?, ?it/s]

torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  ../aten/src/ATen/native/BatchLinearAlgebra.cpp:2183.)
  res = torch.triangular_solve(right_tensor, self.evaluate(), upper=self.upper).solution


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
res_r_gaps.display_results(plot_args = {'bind_interaction': False})

  for col_name, dtype in df.dtypes.iteritems():


variable,r2
TA,0.9821
SW_IN,0.9538
LW_IN,0.9484
VPD,0.9493


### Continous Gaps

In [None]:
res_c_gaps = SimpleGPImputationExplorer(data_c_gaps.data).fit().to_result(data_c_gaps.data_compl_tidy, units=units)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
res_c_gaps.display_results(plot_args = {'bind_interaction': False})

  for col_name, dtype in df.dtypes.iteritems():


variable,r2
TA,-8.4622
SW_IN,0.1705
LW_IN,-37.5523
VPD,-1065.0841


## Second Test

try with a different random seed

In [None]:
reset_seed(101)
data_r_gaps = GPFADataTest(hai[:150]).add_random_missing()
data_c_gaps = GPFADataTest(hai[:150]).add_gap(15, meteo_vars.values())

### Random gaps

In [None]:
res_r_gaps = SimpleGPImputationExplorer(data_r_gaps.data).fit().to_result(data_r_gaps.data_compl_tidy, units=units)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
res_r_gaps.display_results(plot_args = {'bind_interaction': False})

  for col_name, dtype in df.dtypes.iteritems():


variable,r2
TA,0.9908
SW_IN,0.966
LW_IN,0.9693
VPD,0.9676


### Continous Gaps

In [None]:
res_c_gaps = SimpleGPImputationExplorer(data_c_gaps.data).fit().to_result(data_c_gaps.data_compl_tidy, units=units)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
res_c_gaps.display_results(plot_args = {'bind_interaction': False, 'properties': {}})

  for col_name, dtype in df.dtypes.iteritems():


variable,r2
TA,0.9849
SW_IN,0.9654
LW_IN,0.9809
VPD,0.9291
