# Imputation Kalman Model
> Imputation using Kalman Models

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
#| default_exp kalman.imputation

In [None]:
#| export
import pandas as pd
import numpy as np
from meteo_imp.kalman.model import KalmanModel
from meteo_imp.results import ImputationResult
from meteo_imp.utils import *
from fastcore.basics import store_attr, patch
from meteo_imp.data_preparation import StandardScaler

import torch
from torch import Tensor

## Imputation

In [None]:
#| export
class KalmanImputation:
    """Imputation using a kalman model"""
    def __init__(self, data: pd.DataFrame,
                 model: KalmanModel = KalmanModel, # a subclass of KalmanModel to be used as model
                ):
        self.data = data
        self.train_idx = ~torch.tensor(self.data.isna().any(axis=1))
        
        train_data = torch.tensor(data.to_numpy())
        self.scaler = StandardScaler(train_data)
        train_data = self.scaler.transform(train_data)
        self.train_data = train_data
        
        self.T = torch.arange(self.data.shape[0])
        self.model = model(self.train_data)
    def fit(self, n_iter=10, lr=.1) -> 'KalmanImputation':
        """Fit model parameters"""
        times = self.T[self.train_idx]
        obs_test = self.train_data[self.train_idx]
        self.model.train(times, obs_test, n_iter, lr)
        return self

    def impute(self,
               pred_all = False, # If the dataset should be replaced by the model predictions
                                # or only the gaps imputed using the model
              ):
        """Impute data in tidy format using model"""
        # predict either no all dataset or only on part
        if pred_all:
            time_mask = self.T
            data_mask = torch.ones_like(self.train_idx, dtype=bool)
        else:
            time_mask = self.T[~self.train_idx]
            data_mask = ~self.train_idx

        pred = self.model.predict(time_mask)
        
        imp_mean = self.data.copy()
        mean = self.scaler.inverse_transform(pred.mean)
        imp_mean.iloc[data_mask, :] = mean.cpu().numpy()
        imp_mean = imp_mean.assign(time=self.T).melt('time', value_name = 'mean')
        
        # for observations std is 0
        imp_std = pd.DataFrame(np.zeros_like(self.data), columns=self.data.columns)
        # get the diagonal of the covariance matrices (the variance) and transform to std
        std = cov2std(pred.cov)
        std = self.scaler.inverse_transform_std(std)
        imp_std.iloc[data_mask, :] = std.cpu().numpy()
        imp_std = imp_std.assign(time=self.T).melt('time',value_name = 'std')
        
        return pd.merge(imp_mean, imp_std, on=['time', 'variable'])       

In [None]:
from meteo_imp.data_preparation import MeteoDataTest

In [None]:
reset_seed(1)
data = MeteoDataTest.generate_gpfa(2, 5).add_random_missing()

In [None]:
data.data

Unnamed: 0,x0,x1
0,0.023263,
1,0.219627,0.268028
2,-0.039892,0.063075
3,,
4,-0.64549,-0.144866


In [None]:
k_imp = KalmanImputation(data.data)

In [None]:
k_imp.train_data

tensor([[ 0.3586,     nan],
        [ 0.8847,  0.9976],
        [ 0.1895,  0.0048],
        [    nan,     nan],
        [-1.4328, -1.0024]])

In [None]:
k_imp.train_data[k_imp.train_idx]

tensor([[ 0.8847,  0.9976],
        [ 0.1895,  0.0048],
        [-1.4328, -1.0024]])

In [None]:
k_imp.fit(10, lr=0.1)

starting


  0%|          | 0/10 [00:00<?, ?it/s]





















































<__main__.KalmanImputation>

In [None]:
k_imp.impute()








Unnamed: 0,time,variable,mean,std
0,0,x0,-0.031254,0.473841
1,1,x0,0.219627,0.0
2,2,x0,-0.039892,0.0
3,3,x0,-0.280452,0.514002
4,4,x0,-0.64549,0.0
5,0,x1,0.10962,0.259775
6,1,x1,0.268028,0.0
7,2,x1,0.063075,0.0
8,3,x1,-0.000474,0.280925
9,4,x1,-0.144866,0.0


In [None]:
data.data.shape

In [None]:
k_imp.impute(pred_all=True)

#### Result

In [None]:
#| export
@patch
def to_result(self: KalmanImputation, data_compl, var_names=None, units=None, pred_all=False):
    return ImputationResult(self.impute(pred_all), data_compl, self.model.filter.get_info(var_names), units)

In [None]:
X = np.hstack([np.arange(0,3.), np.arange(3., 0, -1)]).reshape(6, 1)

In [None]:
res = k_imp.to_result(data.data_compl_tidy)

In [None]:
res.display_results()

## Debug

In [None]:
from meteo_imp.data import hai

In [None]:
td = MeteoDataTest(hai)

In [None]:
td.add_gap(10, 'TA', 10)

In [None]:
i_hai = KalmanImputation(td.data)

In [None]:
i_hai.fit()

## Export 

In [None]:
#| hide
from nbdev import nbdev_export
nbdev_export()