# Tests for Kalman Filter
> Compare implementation of Kalman filters using pytorch with `pykalman` implementation

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from fastcore.test import *
from fastcore.basics import *
from meteo_imp.utils import *
from meteo_imp.gaussian import *
from meteo_imp.data_preparation import MeteoDataTest
from meteo_imp.kalman.filter import *

import pykalman
from typing import *

import numpy as np
import pandas as pd
import torch
from torch import Tensor
from torch.distributions import MultivariateNormal

## Introduction

## KalmanFilter

The Kalman Filter is an algorithm designed to estimate $P(x_t | y_{0:t})$.  As all state transitions and obss are linear with Gaussian distributed noise, these distributions can be represented exactly as Gaussian distributions with mean `filt_state_means[t]` and covs `filt_state_covs[t]`.
Similarly, the Kalman Smoother is an algorithm designed to estimate $P(x_t | y_{0:T-1})$



### Main class

In [None]:
k = KalmanFilter.init_random(1,1)

In [None]:
k.obs_cov

tensor([[0.2187]], grad_fn=<AddBackward0>)

In [None]:
k.init_state_cov

tensor([[0.4346]], grad_fn=<AddBackward0>)

In [None]:
list(k.named_parameters())

In [None]:
k = KalmanFilter()
# pykalman reference implementation
pyk = pykalman.KalmanFilter()

In [None]:
X = torch.tensor([1.,2,3])
nX = X.numpy()
X

2 dimensional dobs

In [None]:
X2 = torch.tensor([[i, 10. * i] for i in range(1,5)])
nX2 = X2.numpy()

In [None]:
k2 = KalmanFilter(transition_matrices = torch.eye(2), obs_matrices=torch.eye(2))

pyk2 = pykalman.KalmanFilter(n_dim_obs = 2, n_dim_state=2)

In [None]:
X2

In [None]:
obs_mask = torch.ones_like(X2, dtype=torch.bool)
obs_mask[0, 1] = False # only one value missing
obs_mask[2, :] = False # whole row missing
# Xm X missing
X2m = X2.clone()
X2m[~obs_mask] = torch.nan
# nXm Numpy X missing
nX2m = np.ma.masked_array(X2.numpy(), mask = ~obs_mask.numpy())

In [None]:
obs_mask

In [None]:
X2m

In [None]:
nX2m

In [None]:
KalmanFilter.init_random(3,6, dtype=torch.float64)

### Tester

In [None]:
from fastcore.basics import *

In [None]:

to_posdef = PosDef().transform

class KalmanFilterTester():
    """utility class to compare meteo_imp torch implementation with pykalman one"""
    
    torch2pyk = {
        'trans_matrix':    'transition_matrices',
        'trans_off':       'transition_offsets',        
        'trans_cov':       'transition_covariance',        
        'obs_matrix':      'observation_matrices',
        'obs_off':         'observation_offsets',          
        'obs_cov':         'observation_covariance',            
        'init_state_mean': 'initial_state_mean',        
        'init_state_cov':  'initial_state_covariance',
    }
    
    def __init__(self,
                 n_dim_state = 3,
                n_dim_obs = 3,
                n_obs = 10,
                p_missing = .3,
                seed=None,
                dtype=torch.float32,
                nan_mask = True
                ):
        store_attr(but='seed')
        if seed: reset_seed(seed)
        
        self.random_init()
    
    def random_init(self):
        self.params = {
            'trans_matrix': torch.rand(self.n_dim_state, self.n_dim_state, dtype=self.dtype),
            'trans_off':  torch.rand(self.n_dim_state, dtype=self.dtype),        
            'trans_cov':      to_posdef(torch.rand(self.n_dim_state, self.n_dim_state, dtype=self.dtype)),        
            'obs_matrix':        torch.rand(self.n_dim_obs, self.n_dim_state, dtype=self.dtype),
            'obs_off':         torch.rand(self.n_dim_obs, dtype=self.dtype),          
            'obs_cov':             to_posdef(torch.rand(self.n_dim_obs, self.n_dim_obs, dtype=self.dtype)),            
            'init_state_mean':  torch.rand(self.n_dim_state, dtype=self.dtype),        
            'init_state_cov':   to_posdef(torch.rand(self.n_dim_state, self.n_dim_state, dtype=self.dtype)),
        }
        self.params_pyk = {self.torch2pyk[name]: param.numpy() for name, param in self.params.items()}
        
        self.filter = KalmanFilter(**self.params)
        self.filter_pyk = pykalman.standard.KalmanFilter(**self.params_pyk)
        
        
        self.data = torch.rand(self.n_obs, self.n_dim_obs, dtype=self.dtype).unsqueeze(0) # batch dim
        self.mask = (torch.rand(self.n_obs, self.n_dim_obs) > self.p_missing).unsqueeze(0) 
        if self.nan_mask: self.data[~self.mask] = torch.nan # ensure that the data cannot be used
        self.data_pyk = np.ma.masked_array(self.data.squeeze(0).numpy(), mask = ~self.mask.squeeze(0).numpy())
    
    

In [None]:
tst = KalmanFilterTester()

In [None]:
tst64 = KalmanFilterTester(dtype=torch.float64)

In [None]:
tst.data.shape

torch.Size([1, 10, 3])

In [None]:
tst.data_pyk.shape

(10, 3)

In [None]:
test_close(tst.params.values(), tst.params_pyk.values())

### Filter

#### Filter predict

In [None]:
from datetime import datetime
def _filter_predict(transition_matrix, transition_cov,
                    transition_offset, current_state_mean,
                    current_state_cov, check_args=None):
    r"""Calculate the mean and cov of $P(x_{t+1} | z_{0:t})$"""
    pred_state_mean = transition_matrix @ current_state_mean + transition_offset
    pred_state_cov =  transition_matrix @ current_state_cov @ transition_matrix.T + transition_cov

    if check_args is not None: check_posdef(pred_state_cov, 'filter_predict', **check_args)
    
    return (pred_state_mean, pred_state_cov)

In [None]:
def _filter_predict2(transition_matrix, transition_covariance,
                    transition_offset, current_state_mean,
                    current_state_covariance):
    predicted_state_mean = (
        torch.matmul(transition_matrix, current_state_mean)
        + transition_offset
    )
    predicted_state_covariance = (
        torch.matmul(transition_matrix,
               torch.matmul(current_state_covariance,
                      transition_matrix.T))
        + transition_covariance
    )

    return (predicted_state_mean, predicted_state_covariance)

In [None]:
(
    (tst.params['transition_matrices'] @ tst.params['initial_state_cov'] ) -
    torch.matmul(tst.params['transition_matrices'], tst.params['initial_state_cov'])
)

In [None]:
(
    (tst.params['transition_matrices'] @ tst.params['initial_state_cov']  @ tst.params['transition_matrices'].T) -
    torch.matmul(tst.params['transition_matrices'], torch.matmul(tst.params['initial_state_cov'], tst.params['transition_matrices'].T))
)

In [None]:
(
    (tst.params['transition_matrices'] @ tst.params['initial_state_cov']  @ tst.params['transition_matrices'].T + tst.params['transition_cov']) -
    (torch.matmul(tst.params['transition_matrices'], torch.matmul(tst.params['initial_state_cov'], tst.params['transition_matrices'].T)) + tst.params['transition_cov'])
)

In [None]:
trans_m = np.eye(2)
trans_cov = np.eye(2)
trans_off = np.zeros((2,2))
curr_mean = np.ones((2,1))
curr_cov = np.zeros((2,2))

In [None]:
_filter_predict(torch.tensor(trans_m) , torch.tensor(trans_cov), torch.tensor(trans_off), torch.tensor(curr_mean), torch.tensor(curr_cov))

In [None]:
test_close(
    pykalman.standard._filter_predict(trans_m , trans_cov, trans_off, curr_mean, curr_cov),
    _filter_predict(torch.tensor(trans_m) , torch.tensor(trans_cov), torch.tensor(trans_off), torch.tensor(curr_mean), torch.tensor(curr_cov)))

In [None]:
pred_pyk = pykalman.standard._filter_predict(
   tst.params_pyk['transition_matrices'],
   tst.params_pyk['transition_covariance'],
   tst.params_pyk['transition_offsets'],
   tst.params_pyk['initial_state_mean'],
   tst.params_pyk['initial_state_covariance'],
)

In [None]:
pred_torch = _filter_predict(
   tst.params['transition_matrices'],
   tst.params['transition_cov'],
   tst.params['transition_offsets'],
   tst.params['initial_state_mean'],
   tst.params['initial_state_cov'],

)

In [None]:
pred_pyk[0] - pred_torch[0].numpy()

In [None]:
test_close(pred_pyk[0], pred_torch[0])

In [None]:
pred_pyk[1] - pred_torch[1].numpy()

In [None]:
test_close(pred_pyk[1], pred_torch[1])

In [None]:
pykalman.standard._filter_predict??

In [None]:
tst.params_pyk['transition_matrices'].shape

In [None]:
test_close(
   pykalman.standard._filter_predict(
       tst.params_pyk['transition_matrices'],
       tst.params_pyk['transition_covariance'],
       tst.params_pyk['transition_offsets'],
       tst.params_pyk['initial_state_mean'],
       tst.params_pyk['initial_state_covariance'],
    ),
    
    _filter_predict2(
       tst.params['transition_matrices'],
       tst.params['transition_cov'],
       tst.params['transition_offsets'],
       tst.params['initial_state_mean'],
       tst.params['initial_state_cov'],
    
    )
)

In [None]:
test_close(
    
    _filter_predict2(
       tst.params['transition_matrices'],
       tst.params['transition_cov'],
       tst.params['transition_offsets'],
       tst.params['initial_state_mean'],
       tst.params['initial_state_cov'],
    ),
    _filter_predict(
       tst.params['transition_matrices'],
       tst.params['transition_cov'],
       tst.params['transition_offsets'],
       tst.params['initial_state_mean'],
       tst.params['initial_state_cov'],
    )
)

In [None]:
test_close(
   pykalman.standard._filter_predict(
       tst.params_pyk['transition_matrices'],
       tst.params_pyk['transition_covariance'],
       tst.params_pyk['transition_offsets'],
       tst.params_pyk['initial_state_mean'],
       tst.params_pyk['initial_state_covariance'],
    ),
    
    _filter_predict(
       tst.params['transition_matrices'],
       tst.params['transition_cov'],
       tst.params['transition_offsets'],
       tst.params['initial_state_mean'],
       tst.params['initial_state_cov'],
    
    )
)

The issues seems is was from `check_posdef` but `torch.matmul` and `@` are correctly working as intended

#### Filter correct

In [None]:
pykalman.standard._filter_correct??

In [None]:
tst = KalmanFilterTester(p_missing=0, nan_mask=False)

In [None]:
observation_matrix = tst.params_pyk['observation_matrices']
observation_covariance = tst.params_pyk['observation_covariance']
predicted_state_mean = tst.params_pyk['initial_state_mean']
predicted_state_covariance = tst.params_pyk['initial_state_covariance']
observation_offset = tst.params_pyk['observation_offsets']
observation = tst.data_pyk[0]

In [None]:
observation

In [None]:
obs_matrix = tst.params['obs_matrices']
obs_cov = tst.params['obs_cov']
pred_state_mean = tst.params['initial_state_mean']
pred_state_cov = tst.params['initial_state_cov']
obs_offset = tst.params['obs_offsets']
obs = tst.data[0]

Pred obs mean

In [None]:
predicted_observation_mean = (
    np.dot(observation_matrix,
           predicted_state_mean)
    + observation_offset
)

In [None]:
pred_obs_mean = obs_matrix @ pred_state_mean + obs_offset

In [None]:
pred_obs_mean

In [None]:
test_close(pred_obs_mean, predicted_observation_mean)

Pred obs cov

In [None]:
predicted_observation_covariance = (
    np.dot(observation_matrix,
           np.dot(predicted_state_covariance,
                  observation_matrix.T))
    + observation_covariance
)

In [None]:
pred_obs_cov = obs_matrix @ pred_state_cov @ obs_matrix.T + obs_cov

In [None]:
pred_obs_cov

In [None]:
test_close(predicted_observation_covariance, pred_obs_cov)

Kalman gain

In [None]:
kalman_gain = (
    np.dot(predicted_state_covariance,
           np.dot(observation_matrix.T,
                  np.linalg.pinv(predicted_observation_covariance)))
)

In [None]:
kalman_gain_torch = pred_state_cov @ obs_matrix.T @ torch.inverse(pred_obs_cov)

In [None]:
kalman_gain_torch

In [None]:
test_close(kalman_gain, kalman_gain_torch)

corr state mean

In [None]:
corrected_state_mean = (
    predicted_state_mean
    + np.dot(kalman_gain, observation - predicted_observation_mean))

In [None]:
corr_state_mean = pred_state_mean + kalman_gain_torch @ (obs - pred_obs_mean)

In [None]:
corr_state_mean

In [None]:
test_close(corrected_state_mean, corr_state_mean)

corr state cov

In [None]:
corrected_state_covariance = (
    predicted_state_covariance
    - np.dot(kalman_gain,
             np.dot(observation_matrix,
                    predicted_state_covariance))
)

In [None]:
corr_state_cov = pred_state_cov - kalman_gain_torch @ obs_matrix @ pred_state_cov

In [None]:
corr_state_cov

In [None]:
test_close(corrected_state_covariance, corr_state_cov)

In [None]:
def print_info(xs, name=''):
    for x in listify(xs):
        print(f"{name} - shape: {x.shape}, type {x.dtype}, mean {x.mean()}")

In [None]:

def _filter_correct(obs_matrix,
                    obs_cov,
                    obs_offset,
                    pred_state_mean,
                    pred_state_cov,
                    obs,
                    mask,
                    check_args=None):
    if mask.all():
        pred_obs_mean = obs_matrix @ pred_state_mean + obs_offset
        pred_obs_cov = obs_matrix @ pred_state_cov @ obs_matrix.T + obs_cov
        
        kalman_gain = pred_state_cov @ obs_matrix.T @ torch.cholesky_inverse(torch.linalg.cholesky(pred_obs_cov))

        corrected_state_mean = pred_state_mean + kalman_gain @ (obs - pred_obs_mean)
        corrected_state_cov = pred_state_cov - kalman_gain @ obs_matrix @ pred_state_cov
    else:
        n_dim_state = pred_state_cov.shape[0]
        n_dim_obs = obs_matrix.shape[0]
        kalman_gain = torch.zeros((n_dim_state, n_dim_obs))

        corrected_state_mean = pred_state_mean
        corrected_state_cov = pred_state_cov
        
    if check_args is not None: check_posdef(pred_state_cov, 'filter_correct', **check_args)
    return (kalman_gain, corrected_state_mean,
            corrected_state_cov)

In [None]:
np.any(np.ma.getmask(observation))

In [None]:
def _pyk_filter_correct(observation_matrix, observation_covariance,
                    observation_offset, predicted_state_mean,
                    predicted_state_covariance, observation):
    if not np.any(np.ma.getmask(observation)):
        predicted_observation_mean = (
            np.dot(observation_matrix,
                   predicted_state_mean)
            + observation_offset
        )
        print_info(predicted_observation_mean, 'pred_obs_mean')
        predicted_observation_covariance = (
            np.dot(observation_matrix,
                   np.dot(predicted_state_covariance,
                          observation_matrix.T))
            + observation_covariance
        )
        print_info(predicted_observation_covariance, 'pred_obs_cov')

        kalman_gain = (
            np.dot(predicted_state_covariance,
                   np.dot(observation_matrix.T,
                          np.linalg.pinv(predicted_observation_covariance)))
        )
        print_info(kalman_gain, 'kalman_gain')

        corrected_state_mean = (
            predicted_state_mean
            + np.dot(kalman_gain, observation - predicted_observation_mean)
        )
        print_info(corrected_state_mean, 'corr_state_mean')
        corrected_state_covariance = (
            predicted_state_covariance
            - np.dot(kalman_gain,
                     np.dot(observation_matrix,
                            predicted_state_covariance))
        )
        print_info(corrected_state_covariance, 'corr_state_cov')
    else:
        n_dim_state = predicted_state_covariance.shape[0]
        n_dim_obs = observation_matrix.shape[0]
        kalman_gain = np.zeros((n_dim_state, n_dim_obs))

        corrected_state_mean = predicted_state_mean
        corrected_state_covariance = predicted_state_covariance

    return (kalman_gain, corrected_state_mean,
            corrected_state_covariance)

In [None]:
tst = KalmanFilterTester() # need nan

In [None]:
_filter_correct(
   tst.params['obs_matrices'],
   tst.params['obs_cov'],
   tst.params['obs_offsets'],
   tst.params['initial_state_mean'],
   tst.params['initial_state_cov'],
   tst.data[0],
   tst.mask[0]
)

In [None]:
tst.data

In [None]:
tst.mask

In [None]:
tst.mask

In [None]:
pykalman.standard._filter_correct(
       tst.params_pyk['observation_matrices'],
       tst.params_pyk['observation_covariance'],
       tst.params_pyk['observation_offsets'],
       tst.params_pyk['initial_state_mean'],
       tst.params_pyk['initial_state_covariance'],
       tst.data_pyk[0]
    )

In [None]:
_pyk_filter_correct(
       tst.params_pyk['observation_matrices'],
       tst.params_pyk['observation_covariance'],
       tst.params_pyk['observation_offsets'],
       tst.params_pyk['initial_state_mean'],
       tst.params_pyk['initial_state_covariance'],
       tst.data_pyk[0]
    )

In [None]:
pykalman.standard._filter_correct(
   tst.params['obs_matrices'].numpy(),
   tst.params['obs_cov'].numpy(),
   tst.params['obs_offsets'].numpy(),
   tst.params['initial_state_mean'].numpy(),
   tst.params['initial_state_cov'].numpy(),
   np.ma.masked_array(tst.data[0], mask=[True, True, True])
)

In [None]:
_pyk_filter_correct(
   tst.params['obs_matrices'].numpy(),
   tst.params['obs_cov'].numpy(),
   tst.params['obs_offsets'].numpy(),
   tst.params['initial_state_mean'].numpy(),
   tst.params['initial_state_cov'].numpy(),
   np.ma.masked_array(tst.data[0], mask=[True, True, True])
)

In [None]:
   (pykalman.standard._filter_correct(
       tst.params_pyk['observation_matrices'],
       tst.params_pyk['observation_covariance'],
       tst.params_pyk['observation_offsets'],
       tst.params_pyk['initial_state_mean'],
       tst.params_pyk['initial_state_covariance'],
       tst.data_pyk[0]
    )[0] -
    
    _filter_correct(
       tst.params['obs_matrices'],
       tst.params['obs_cov'],
       tst.params['obs_offsets'],
       tst.params['initial_state_mean'],
       tst.params['initial_state_cov'],
       tst.data[0],
       tst.mask[0]
    )[0].numpy())

In [None]:
args_np = {
    'obs_m': np.eye(2),
    'obs_cov': np.eye(2),
    'obs_off': np.zeros((2,1)),
    'pred_state_mean': np.ones(2),
    'pred_state_cov': np.eye(2),
    'obs': np.ones((2,1)),
}

args_torch = {k: torch.tensor(v) for k,v in args_np.items()}

In [None]:
_filter_correct(*args_torch.values(), mask=torch.ones_like(args_torch['obs'], dtype=torch.bool))

In [None]:
test_close(
    pykalman.standard._filter_correct(*args_np.values()),
    _filter_correct(*args_torch.values(), mask=torch.ones_like(args_torch['obs'], dtype=torch.bool)))

In [None]:
test_close(
   pykalman.standard._filter_correct(
       tst.params_pyk['observation_matrices'],
       tst.params_pyk['observation_covariance'],
       tst.params_pyk['observation_offsets'],
       tst.params_pyk['initial_state_mean'],
       tst.params_pyk['initial_state_covariance'],
       tst.data_pyk[0]
    ),
    
    _filter_correct(
       tst.params['obs_matrices'],
       tst.params['obs_cov'],
       tst.params['obs_offsets'],
       tst.params['initial_state_mean'],
       tst.params['initial_state_cov'],
       tst.data[0],
       tst.mask[0]
    )
)

In [None]:
obs_matrix, obs_cov, obs_offset,pred_state_mean,pred_state_cov,obs, mask = k.obs_matrices, k.obs_cov, k.obs_offsets, k.init_state_mean, k.init_state_cov, data, mask

In [None]:
mask.all(-1)

tensor([[ True, False,  True, False, False,  True, False, False,  True, False],
        [False, False,  True, False, False,  True, False, False,  True, False]])

In [None]:
mask

tensor([[[ True,  True,  True],
         [False,  True,  True],
         [ True,  True,  True],
         [False, False,  True],
         [False,  True, False],
         [ True,  True,  True],
         [False,  True,  True],
         [False,  True,  True],
         [ True,  True,  True],
         [False, False, False]],

        [[False,  True,  True],
         [ True, False,  True],
         [ True,  True,  True],
         [False,  True,  True],
         [False, False, False],
         [ True,  True,  True],
         [ True, False, False],
         [False,  True,  True],
         [ True,  True,  True],
         [ True,  True, False]]])

In [None]:
obs

tensor([[[0.9847, 0.0852, 0.5334],
         [   nan, 0.2617, 0.7972],
         [0.2088, 0.4545, 0.1455],
         [   nan,    nan, 0.2881],
         [   nan, 0.9087,    nan],
         [0.5610, 0.9079, 0.2507],
         [   nan, 0.7851, 0.0212],
         [   nan, 0.6513, 0.3955],
         [0.8111, 0.2558, 0.7570],
         [   nan,    nan,    nan]],

        [[   nan, 0.2511, 0.4720],
         [0.6684,    nan, 0.1489],
         [0.6714, 0.4719, 0.5053],
         [   nan, 0.7793, 0.3246],
         [   nan,    nan,    nan],
         [0.8191, 0.7040, 0.3264],
         [0.0842,    nan,    nan],
         [   nan, 0.3308, 0.7610],
         [0.3228, 0.0961, 0.3075],
         [0.0947, 0.4745,    nan]]])

In [None]:
obs[:, 0, :]

tensor([[0.9847, 0.0852, 0.5334],
        [   nan, 0.2511, 0.4720]])

In [None]:
(obs[:, 0, :] - pred_obs_mean).unsqueeze(-1).shape

torch.Size([2, 3, 1])

In [None]:
kalman_gain.shape

torch.Size([4, 3])

In [None]:
kalman_gain @ (obs[:, 0, :] - pred_obs_mean).unsqueeze(-1)

tensor([[[-0.1700],
         [-0.1658],
         [-0.7621],
         [ 0.4827]],

        [[    nan],
         [    nan],
         [    nan],
         [    nan]]], dtype=torch.float64, grad_fn=<TransposeBackward0>)

In [None]:
pred_obs_mean = obs_matrix @ pred_state_mean + obs_offset

In [None]:
pred_obs_cov = obs_matrix @ pred_state_cov @ obs_matrix.T + obs_cov

In [None]:
pred_obs_mean, pred_obs_cov

(tensor([1.1307, 1.4181, 0.4970], dtype=torch.float64, grad_fn=<AddBackward0>),
 tensor([[2.2690, 1.4801, 0.5773],
         [1.4801, 1.3001, 0.8774],
         [0.5773, 0.8774, 1.0938]], dtype=torch.float64, grad_fn=<AddBackward0>))

In [None]:
obs_matrix.shape

torch.Size([3, 4])

In [None]:
(pred_state_cov @ obs_matrix.T).shape

torch.Size([4, 3])

In [None]:
torch.cholesky_inverse(torch.linalg.cholesky(pred_state_cov)).shape

torch.Size([4, 4])

In [None]:
r.shape

torch.Size([4, 3])

In [None]:
kalman_gain = pred_state_cov @ obs_matrix.T @ torch.cholesky_inverse(torch.linalg.cholesky(pred_obs_cov))

In [None]:
kalman_gain

tensor([[ 0.0210,  0.1246, -0.0214],
        [ 0.2535,  0.1004,  0.1394],
        [ 0.3844,  0.5251, -0.1642],
        [ 1.0030, -0.4705,  0.0583]], dtype=torch.float64,
       grad_fn=<MmBackward0>)

In [None]:
corrected_state_mean = pred_state_mean + kalman_gain @ (obs - pred_obs_mean)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (6x10 and 3x4)

In [None]:
pred_obs_mean

tensor([1.1307, 1.4181, 0.4970], dtype=torch.float64, grad_fn=<AddBackward0>)

In [None]:
kalman_gain.shape

torch.Size([4, 3])

In [None]:
kalman_gain @ (obs[0] - pred_obs_mean)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (4x3 and 10x3)

In [None]:
pred_state_mean

Parameter containing:
tensor([0.8424, 0.0816, 0.8791, 0.3892], dtype=torch.float64,
       requires_grad=True)

In [None]:

corrected_state_cov = pred_state_cov - kalman_gain @ obs_matrix @ pred_state_cov

RuntimeError: mat1 and mat2 shapes cannot be multiplied (6x10 and 3x4)

In [None]:
k.obs_matrices @ k.init_state_mean

tensor([0.6928, 0.9289, 0.2174], dtype=torch.float64, grad_fn=<MvBackward0>)

trying multiple random values ...

In [None]:
for _ in range(100):
    tst = KalmanFilterTester(nan_mask=False)
    test_close(
       pykalman.standard._filter_correct(
           tst.params_pyk['observation_matrices'],
           tst.params_pyk['observation_covariance'],
           tst.params_pyk['observation_offsets'],
           tst.params_pyk['initial_state_mean'],
           tst.params_pyk['initial_state_covariance'],
           tst.data_pyk[0]
        ),

        _filter_correct(
           tst.params['obs_matrices'],
           tst.params['obs_cov'],
           tst.params['obs_offsets'],
           tst.params['initial_state_mean'],
           tst.params['initial_state_cov'],
           tst.data[0],
           tst.mask[0]
        )
    )

#### Filter

In [None]:

def _filter(transition_matrices, obs_matrices, transition_cov,
            obs_cov, transition_offsets, obs_offsets,
            initial_state_mean, initial_state_cov, obs, obs_mask, check_args={}):
    """Apply the Kalman Filter

    Calculate posterior distribution over hidden states given obss up
    to and including the current time step.

    Parameters
    ----------
    transition_matrices : [n_timesteps-1,n_dim_state,n_dim_state] or
    [n_dim_state,n_dim_state] array-like
        state transition matrices
    obs_matrices : [n_timesteps, n_dim_obs, n_dim_state] or [n_dim_obs, \
    n_dim_state] array-like
        obs matrix
    transition_cov : [n_timesteps-1,n_dim_state,n_dim_state] or
    [n_dim_state,n_dim_state] array-like
        state transition cov matrix
    obs_cov : [n_timesteps, n_dim_obs, n_dim_obs] or [n_dim_obs,
    n_dim_obs] array-like
        obs cov matrix
    transition_offsets : [n_timesteps-1, n_dim_state] or [n_dim_state] \
    array-like
        state offset
    obs_offsets : [n_timesteps, n_dim_obs] or [n_dim_obs] array-like
        obss for times [0...n_timesteps-1]
    initial_state_mean : [n_dim_state] array-like
        mean of initial state distribution
    initial_state_cov : [n_dim_state, n_dim_state] array-like
        cov of initial state distribution
    obss : [n_timesteps, n_dim_obs] array
        obss from times [0...n_timesteps-1].  If `obss` is a
        masked array and any of `obss[t]` is masked, then
        `obss[t]` will be treated as a missing obs.

    Returns
    -------
    pred_state_means : [n_timesteps, n_dim_state] array
        `pred_state_means[t]` = mean of hidden state at time t given
        obss from times [0...t-1]
    pred_state_covs : [n_timesteps, n_dim_state, n_dim_state] array
        `pred_state_covs[t]` = cov of hidden state at time t
        given obss from times [0...t-1]
    kalman_gains : [n_timesteps, n_dim_state] array
        `kalman_gains[t]` = Kalman gain matrix for time t
    filt_state_means : [n_timesteps, n_dim_state] array
        `filt_state_means[t]` = mean of hidden state at time t given
        obss from times [0...t]
    filt_state_covs : [n_timesteps, n_dim_state] array
        `filt_state_covs[t]` = cov of hidden state at time t
        given obss from times [0...t]
    """
    n_timesteps = obs.shape[0]
    n_dim_state = len(initial_state_mean)
    n_dim_obs = obs.shape[1]
    
    # those variables need to be lists and not Tensors,
    # otherwise pytorch tryies to compute the gradient for the whole tensor and it breaks due to the in place operations
    
    pred_state_means = [None for _ in range(n_timesteps)] # torch.zeros((n_timesteps, n_dim_state))
    pred_state_covs = [None for _ in range(n_timesteps)] # torch.zeros(
        #(n_timesteps, n_dim_state, n_dim_state)
    #)
    kalman_gains = [None for _ in range(n_timesteps)]
    filt_state_means = [None for _ in range(n_timesteps)]
    filt_state_covs = [None for _ in range(n_timesteps)]

    for t in range(n_timesteps):
        if t == 0:
            pred_state_means[t] = initial_state_mean
            pred_state_covs[t] = initial_state_cov
        else:
            transition_matrix = _last_dims(transition_matrices, t - 1)
            transition_cov = _last_dims(transition_cov, t - 1)
            transition_offset = _last_dims(transition_offsets, t - 1, ndims=1)
            pred_state_means[t], pred_state_covs[t] = (
                _filter_predict(
                    transition_matrix,
                    transition_cov,
                    transition_offset,
                    filt_state_means[t - 1],
                    filt_state_covs[t - 1],
                    check_args = {'t': t, **check_args} if check_args is not None else None
                )
            )

        obs_matrix = _last_dims(obs_matrices, t)
        obs_cov = _last_dims(obs_cov, t)
        obs_offset = _last_dims(obs_offsets, t, ndims=1)
        (kalman_gains[t], filt_state_means[t],
         filt_state_covs[t]) = (
            _filter_correct(obs_matrix,
                obs_cov,
                obs_offset,
                pred_state_means[t],
                pred_state_covs[t],
                obs[t],
                obs_mask[t],
                check_args = {'t': t, **check_args} if check_args is not None else None
            )
        )

    return (pred_state_means, pred_state_covs, filt_state_means,
            filt_state_covs)

In [None]:
def test_filter():
    pred_s, pred_s_cov, kal, filt_s, filt_s_cov =  pykalman.standard._filter(
       tst.params_pyk['transition_matrices'],
       tst.params_pyk['observation_matrices'],
       tst.params_pyk['transition_covariance'],
       tst.params_pyk['observation_covariance'],
       tst.params_pyk['transition_offsets'],
       tst.params_pyk['observation_offsets'],
       tst.params_pyk['initial_state_mean'],
       tst.params_pyk['initial_state_covariance'],
       tst.data_pyk[0:1]
    )
    
    pyk = (pred_s, pred_s_cov, filt_s, filt_s_cov,) # results without kalman gain
    
    filter_torch = tuple(map(
        torch.vstack, # need to convert lists to tensor
          _filter(
           tst.params['transition_matrices'],
           tst.params['obs_matrices'],
           tst.params['transition_cov'],
           tst.params['obs_cov'],
           tst.params['transition_offsets'],
           tst.params['obs_offsets'],
           tst.params['initial_state_mean'],
           tst.params['initial_state_cov'],
           tst.data[0:1],
           tst.mask[0:1]
    )))
    
    test_close(pyk, filter_torch)

In [None]:
test_filter()

##### Old testing

In [None]:
args_filt_np = {
    'trans_m': np.eye(2),
    'obs_m': np.eye(2),
    'trans_cov': np.eye(2),
    'obs_cov': np.eye(2),
    'trans_off': np.zeros((2,1)),
    'obs_off': np.zeros((3,2)),
    'init_state_mean': np.ones(2),
    'init_state_cov': np.eye(2),
    'obs': np.ones((3, 2)),
}

obs_mask = np.ones(3)

args_filt_torch = {k: torch.tensor(v, dtype = torch.float32) for k,v in args_filt_np.items()}

In [None]:
_filter(*args_filt_torch.values(), obs_mask=obs_mask)

In [None]:
filt_pyk = list(pykalman.standard._filter(*args_filt_np.values()))
del filt_pyk[2] # remove kalman gain that is not returned py _filter

filt =  list(map(lambda x: torch.stack(x), _filter(*args_filt_torch.values(), obs_mask=obs_mask)))

test_close( filt_pyk, filt)

missing data

In [None]:
obs_mask = np.array([True, False, True])
args_filt_np['obs'] = np.ma.masked_array(args_filt_np['obs'], mask = np.vstack([~obs_mask] * 2).T)

In [None]:
args_filt_np['obs']

In [None]:
filt_pyk = list(pykalman.standard._filter(*args_filt_np.values()))
del filt_pyk[2] # remove kalman gain that is not returned py _filter

filt =  list(map(lambda x: torch.stack(x), _filter(*args_filt_torch.values(), obs_mask=torch.tensor(obs_mask))))

test_close( filt_pyk, filt)

#### KalmanFilter method

In [None]:

@patch
def _filter_all(self: KalmanFilter, obs, mask=None, check_args=None) -> Tuple:
    obs, obs_mask = self._parse_obs(obs, mask)

    return _filter(
            self.transition_matrices,
            self.obs_matrices,
            self.transition_cov,
            self.obs_cov,
            self.transition_offsets,
            self.obs_offsets,
            self.initial_state_mean,
            self.initial_state_cov,
            obs,
            obs_mask,
            check_args
        )

@patch
def filter(self: KalmanFilter,
          obs: Tensor, # [n_timesteps, n_dim_obs] obs for times [0...n_timesteps-1]
          mask = None,
          check_args=None
          ) -> ListMNormal: # Filtered state
    """Filter observation"""
    _, _, filt_state_means, filt_state_covs = self._filter_all(obs, mask, check_args)
    # need to convert a list of tensors with gradients to a big tensors without gradients
    return ListMNormal(_stack_detach(filt_state_means), _stack_detach(filt_state_covs))


#### Final Testing

due to numerical issue the error is bigger thatn `1e-5` which is default

In [None]:
((tst.filter.filter(tst.data, tst.mask)[1] -
np.float32(tst.filter_pyk.filter(tst.data_pyk)[1]))).max()

In [None]:
tst = KalmanFilterTester()

In [None]:
test_close(
    tst.filter.filter(tst.data, tst.mask),
    tst.filter_pyk.filter(tst.data_pyk), eps=4e-2) # need to increase the resolution

using `float64` the error is smaller, which confirms that it is only a numerical issue

In [None]:
test_close(
    tst64.filter.filter(tst.data, tst.mask),
    tst64.filter_pyk.filter(tst.data_pyk))

In [None]:
((tst64.filter.filter(tst64.data, tst64.mask)[1] -
tst64.filter_pyk.filter(tst64.data_pyk)[1])).max()

##### Old Testing

In [None]:
test_close(
    pyk.filter(X.numpy()),
    k.filter(X)
)

In [None]:
test_close(
    pyk2.filter(nX2),
    k2.filter(X2)
)

In [None]:
k2.filter(X2m)

In [None]:
test_close(
    pyk.filter(X.numpy()),
    k.filter(X)
)

In [None]:
test_close(
    pyk2.filter(nX2),
    k2.filter(X2)
)

In [None]:
test_close(
    pyk2.filter(nX2m),
    k2.filter(X2m)
)

### Smooth

#### Smooth step

In [None]:

def _smooth_update(transition_matrix,      # [n_dim_state, n_dim_state]
                   filt_state: Normal, # [n_dim_state] filtered state at time `t`
                   pred_state: Normal,        # [n_dim_state] state before filtering at time `t + 1` (= using the observation until time t)
                   next_smoothed_state: Normal, # [n_dim_state] smoothed state at time  `t+1`
                   check_args: dict|None = None # if not None checks that the result is positive definite
                   ) -> Normal: # mean and cov of smoothed state at time `t`
    r"""Correct a pred state with a Kalman Smoother update

    Calculates posterior distribution of the hidden state at time `t` given the the observations via Kalman Smoothing.
    """
    kalman_smoothing_gain = filt_state.cov @ transition_matrix.T @ torch.cholesky_inverse(torch.linalg.cholesky(pred_state.cov))

    smoothed_state_mean = filt_state.mean + kalman_smoothing_gain @ (next_smoothed_state.mean - pred_state.mean)
    smoothed_state_cov = (filt_state.cov
                      + kalman_smoothing_gain @ (next_smoothed_state.cov - pred_state.cov) @ kalman_smoothing_gain.T)

    if check_args is not None: check_posdef(pred_state_cov, 'filter_correct', **check_args)
    
    return ListMNormal(smoothed_state_mean, smoothed_state_cov,)

In [None]:
args_np_sm = {
    'pred_state_m': np.zeros(2),
    'pred_state_cov': np.eye(2),
    'filt_state_m': np.zeros(2),
    'filt_state_cov': np.eye(2),
    'next_state_m': np.zeros(2),
    'next_state_cov': np.eye(2),
    'trans_m': np.eye(2),
}

args_torch_sm = {k: torch.tensor(v) for k,v in args_np_sm.items()}

In [None]:
pyk_mean, pyk_cov, _ = pykalman.standard._smooth_update(
    args_np_sm['trans_m'],
    args_np_sm['filt_state_m'],
    args_np_sm['filt_state_cov'],
    args_np_sm['pred_state_m'],
    args_np_sm['pred_state_cov'],
    args_np_sm['next_state_m'],
    args_np_sm['next_state_cov'],
)

In [None]:
torch_k = _smooth_update(
    args_torch_sm['trans_m'],
    Normal(args_torch_sm['filt_state_m'], args_torch_sm['filt_state_cov']),
    Normal(args_torch_sm['pred_state_m'], args_torch_sm['pred_state_cov']),
    Normal(args_torch_sm['next_state_m'], args_torch_sm['next_state_cov']),
)

In [None]:
test_close((pyk_mean, pyk_cov, ), torch_k)

In [None]:

def _smooth(transition_matrices, # `[n_timesteps-1, n_dim_state, n_dim_state]` or `[n_dim_state, n_dim_state]`
            filt_state: ListMNormal, # `[n_timesteps, n_dim_state]`
                # `filt_state_means[t]` = mean state estimate for time t given obs from times `[0...t]`
            pred_state: ListMNormal, # `[n_timesteps, n_dim_state]`
                # `pred_state_means[t]` = mean state estimate for time t given obs from times `[0...t-1]`
           check_args: dict|None = None # if not None checks that the result is positive definite
           ) -> ListMNormal: # `[n_timesteps, n_dim_state]` Smoothed state 
    """Apply the Kalman Smoother """
    n_timesteps, n_dim_state = len(pred_state.mean), pred_state.mean[0].shape[0]

    smoothed_state = ListMNormal(torch.zeros((n_timesteps,n_dim_state), dtype=pred_state.mean[0].dtype, device=pred_state.mean[0].device), 
                                torch.zeros((n_timesteps, n_dim_state,
                                           n_dim_state), dtype=pred_state.mean[0].dtype, device=pred_state.mean[0].device))

    smoothed_state.mean[-1] = filt_state.mean[-1]
    smoothed_state.cov[-1] = filt_state.cov[-1]

    for t in reversed(range(n_timesteps - 1)):
        transition_matrix = _last_dims(transition_matrices, t)
        (smoothed_state.mean[t], smoothed_state.cov[t]) = (
            _smooth_update(
                transition_matrix,
                filt_state.get_nth(t),
                pred_state.get_nth(t + 1),
                smoothed_state.get_nth(t+1),
                check_args = {'t': t, **check_args} if check_args is not None else None
            )
        )
    return smoothed_state

In [None]:
(pred_state_means, pred_state_covs, filt_state_means, filt_state_covs ) = k2._filter_all(X2m)

In [None]:
torch_smooth = _smooth(k2.transition_matrices,  ListMNormal(filt_state_means, filt_state_covs), ListMNormal(pred_state_means, pred_state_covs))

In [None]:
pyk_sm_mean, pyk_sm_cov, _ = pykalman.standard._smooth(k2.transition_matrices.detach().numpy(),
                          _stack_detach(filt_state_means).numpy(), _stack_detach(filt_state_covs).numpy(),
                          _stack_detach(pred_state_means).numpy(), _stack_detach(pred_state_covs).numpy())

In [None]:
test_close((pyk_sm_mean, pyk_sm_cov,), torch_smooth)

In [None]:
torch_smooth

#### KalmanFilter method

In [None]:

@patch
def smooth(self: KalmanFilter,
           obs: Tensor, # dataset
           mask = None,
           check_args=None
          ) -> ListMNormal: # `[n_timesteps, n_dim_state]` smoothed hidden state distributions for times `[0...n_timesteps-1]`
        
    """Kalman Filter Smoothing"""

    (pred_state_means, pred_state_covs, filt_state_means, filt_state_covs) = self._filter_all(obs, mask, check_args)

    return _smooth(
            self.transition_matrices,
            ListMNormal(filt_state_means, filt_state_covs),
            ListMNormal(pred_state_means, pred_state_covs),
            check_args
        )

  

In [None]:
k.smooth(X)

#### Final Testing

In [None]:
test_close(
    tst.filter.smooth(tst.data, tst.mask),
    tst.filter_pyk.smooth(tst.data_pyk), eps=4e-2) # need to increase the resolution

In [None]:
test_close(
    tst64.filter.smooth(tst64.data, tst64.mask),
    tst64.filter_pyk.smooth(tst64.data_pyk))

In [None]:
test_close(
    pyk.smooth(nX),
    k.smooth(X).detach()
)

In [None]:
test_close(
    pyk2.smooth(nX2),
    k2.smooth(X2).detach()
)

In [None]:
test_close(
    pyk2.smooth(nX2m),
    k2.smooth(X2m)
)

### Predict

In [None]:
from meteo_imp.gaussian import conditional_guassian

In order to have conditional predictions that make sense it's not possible to return the full covariance matrix for the predictions but only the standard deviations

In [None]:
def _get_cond_pred(pred: ListMNormal,
                  obs,
                  mask
                  ) -> ListNormal:
    """Conditional prediction given observations and transforms covariances into std deviations"""
    
    obs = obs[mask] # select only actually observed values
    pred_cond = conditional_guassian(pred.mean, pred.cov, obs, mask)
    
    mean = pred.mean.clone()
    mean[~mask] = pred_cond.mean
    
    std = torch.diagonal(pred.cov.clone(), dim1=-2, dim2=-1)
    std[~mask] = torch.diagonal(pred_cond.cov, dim1=-2, dim2=-1)
    
    return ListMNormal(mean, std)

In [None]:
obs = tst.data[1]

In [None]:
mask = tst.mask[1]

In [None]:
obs, mask

In [None]:
conditional_guassian

The conditional gaussian returns a distributions of 2 variables, which are the un-observed ones (you can see the `nan` in the observation vector)

In [None]:
conditional_guassian(tst.params['initial_state_mean'], tst.params['initial_state_cov'], obs[mask], mask)

which are correctly merged with the predictions

In [None]:
_get_cond_pred(ListMNormal(tst.params['initial_state_mean'], tst.params['initial_state_cov']), obs, mask)

In [None]:

@patch
def _obs_from_state(self: KalmanFilter, state_mean, state_cov, check_args=None):

    mean = self.obs_matrices @ state_mean
    cov = self.obs_matrices @ state_cov @ self.obs_matrices.mT + self.obs_cov
    
    if check_args is not None: check_posdef(cov, 'predict',  **check_args)
    
    return ListMNormal(mean, cov)

@patch
def predict(self: KalmanFilter, obs, mask=None, smooth=True, check_args=None):
    """Predicted observations at all times """
    state = self.smooth(obs, mask, check_args) if smooth else self.filter(obs, mask, check_args)
    obs, mask = self._parse_obs(obs, mask)
    
    means = torch.empty_like(obs)
    stds = torch.empty_like(obs)
                             
    for t in range(obs.shape[0]):
        mean, std = self._obs_from_state(
            state.mean[t],
            state.cov[t],
            {'t': t, **check_args} if check_args is not None else None
        )
        
        means[t], stds[t] = _get_cond_pred(ListNormal(mean, std), obs[t], mask[t])
    
    return ListNormal(means, stds)

In [None]:
k.predict(obs=X)

In [None]:
@patch
def predict_times(self: KalmanFilter, times, obs, mask=None, smooth=True, check_args=None):
    """Predicted observations at specific times """
    state = self.smooth(obs, mask, check_args) if smooth else self.filter(obs, mask, check_args)
    obs, mask = self._parse_obs(obs, mask)
    times = array1d(times)
    
    n_timesteps = obs.shape[0]
    n_features = obs.shape[1] if len(obs.shape) > 1 else 1
    
    if times.max() > n_timesteps or times.min() < 0:
        raise ValueError(f"provided times range from {times.min()} to {times.max()}, which is outside allowed range : 0 to {n_timesteps}")

    means = torch.empty((times.shape[0], n_features), dtype=obs.dtype, device=obs.device)
    stds = torch.empty((times.shape[0], n_features), dtype=obs.dtype, device=obs.device) 
    for i, t in enumerate(times):
        mean, std = self._obs_from_state(
            state.mean[t],
            state.cov[t],
            {'t': t, **check_args} if check_args is not None else None
        )
        
        means[i], stds[i] = _get_cond_pred(ListNormal(mean, std), obs[t], mask[t])
    
    return ListNormal(means, stds)  

pykalman doesn't support a predict method so cannot test it

In [None]:
tst.filter.predict(obs = tst.data, mask = tst.mask)

In [None]:
mean, std = tst.filter.predict_times(obs = tst.data, times = torch.tensor([0,1]), mask = tst.mask)

In [None]:
mean, std

In [None]:
print_info((mean, std))

In [None]:
mean, cov = tst64.filter.smooth(tst64.data, tst64.mask)

In [None]:
tst64.filter._obs_from_state(mean[0], cov[0])

### Log Likelihood

This code is old now as the log likelihood is not computed here

TODO: open issue in pykalman for error in ll missing data

In [None]:
pykalman.standard.KalmanFilter.loglikelihood??

In [None]:
pykalman.standard._loglikelihoods??

In [None]:

@patch
def filter_loglikelihood(self: KalmanFilter, obs, mask=None):
    "Compute log likelihood using only filter step"
    # Those are the means and covs before the updating step,
    # otherwise the model would have already seen the observation that we are predicting 
    pred_state_mean, pred_state_cov, _, _ = self._filter_all(obs, mask)
    obs, obs_mask = self._parse_obs(obs, mask)

    max_t = obs.shape[0]
    lls = torch.zeros(max_t)
    for t in range(max_t):
        if obs_mask[t].all():
            pred_obs_mean, pred_obs_cov = self._obs_from_state(pred_state_mean[t], pred_state_cov[t])
            ll = MultivariateNormal(pred_obs_mean, pred_obs_cov, validate_args=False).log_prob(obs[t])
            lls[t] = ll

    return lls.sum()

In [None]:
k.filter_loglikelihood(X)

In [None]:
test_close(k.filter_loglikelihood(X), pyk.loglikelihood(nX))

In [None]:
pred_state, pred_state_cov, _, _ = tuple(map(_stack_detach, tst.filter._filter_all(tst.data, tst.mask)))

In [None]:
tst = KalmanFilterTester(nan_mask = False, p_missing=0)

In [None]:
pykalman.standard._loglikelihoods(
    tst.params_pyk['observation_matrices'],
    tst.params_pyk['observation_offsets'],
    tst.params_pyk['observation_covariance'],
    pred_state.numpy(),
    pred_state_cov.numpy(),
    np.array(tst.data_pyk)
)

In [None]:
test_close(
    tst.filter.filter_loglikelihood(tst.data, tst.mask),
    tst.filter_pyk.loglikelihood(tst.data_pyk)) # need to increase the resolution

In [None]:
tst64 = KalmanFilterTester(p_missing=0, dtype=torch.float64)

In [None]:
(tst64.filter.filter_loglikelihood(tst64.data, tst64.mask),
tst64.filter_pyk.loglikelihood(tst64.data_pyk))

In [None]:
k2.filter_loglikelihood(X2)

In [None]:
test_close(k2.filter_loglikelihood(X2), pyk2.loglikelihood(nX2), eps=1e-4)

since the goal is to fill gaps we want the log likelihood for the whole gap and only for it

In [None]:

@patch
def loglikelihood(self: KalmanFilter,
                  obs_train: Tensor, # [n_timesteps, n_dim_obs] Observations use for the filter (can containt missing data)
                  times: Tensor, # [n_pred_timesteps] time at which to calculate the log likelihood
                  obs_test: Tensor, # [n_pred_timesteps, n_dim_obs] observed data to compute log likelihood
                  mask: Tensor=None, # [n_timesteps, n_dim_obs]
                 ) -> Tensor: # scalar that is sum of log likelihoods for all `times`
    "Log likelihood only for the `obs_test` at giben times"
    means, stds = self.predict(obs_train, mask=mask)
    lls = torch.zeros(len(times))
    for t in range(len(times)):
        lls[t] = MultivariateNormal(means[t], torch.diag(stds[t]), validate_args=False).log_prob(obs_test[t:t+1])
    return lls.sum() 
        

In [None]:
torch.diag(std[0]).dtype

In [None]:
tst.filter.loglikelihood(tst.data, tst.mask, tst.data)

In [None]:
X2.dtype

In [None]:
k.loglikelihood(X, [1,2], X[[1,2]])

In [None]:
k2.loglikelihood(X2, [1,2], X2[[1,2]])

In [None]:
k2.loglikelihood(X2m, [1,2], X2[[1,2]])

### Get Info

In [None]:

@patch
def get_info(self: KalmanFilter, var_names=None):
    out = {}
    if var_names is not None: self.var_names = var_names 
    latent_names = [f"z_{i}" for i in range(self.transition_matrices.shape[0])]
    out['A'] = array2df(self.transition_matrices, latent_names, latent_names, 'latent')
    out['H'] = array2df(self.obs_matrices,        var_names,    latent_names, 'variable')
    out['R'] = array2df(self.obs_cov,             var_names,    var_names,     'variable')
    out['Q'] = array2df(self.transition_cov,      latent_names, latent_names, 'latent')
    return out

In [None]:
display_as_row(k.get_info())

In [None]:
display_as_row(k2.get_info())

## Train Parameters

This implementation of `KalmanFilter` allows to find the optimal parameters by maximising the log-likelihood using gradient descend

In [None]:
training_iter = 200
k = KalmanFilter()
k.train()

optimizer = torch.optim.Adam(k.parameters(), lr=0.005) 

losses = []
for i in range(training_iter):
    # Zero gradients from previous iteration
    optimizer.zero_grad()
    # Output from model
    loss = - k.loglikelihood(X, range(len(X)), X)
    losses.append(loss.item())
    # backpropagate gradients
    loss.backward()
    optimizer.step()

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(losses)

In [None]:
list(k.parameters())

## Other

### Testing

### Fuzzing smoother

trying to run the filter many times to see if some of the matrix are not symmetric

In [None]:
from torch._C import _LinAlgError

In [None]:
def fuzz_symmetric(n_iter=10, n_obs=100, **kwargs):
    tst = KalmanFilterTester(n_obs=n_obs, **kwargs)
    _, sm_covs = tst.filter.smooth(tst.data, tst.mask)
    i_posdef = []
    for t, cov in enumerate(sm_covs):
        i_posdef.append(check_posdef(cov))
    return pd.concat(i_posdef)
    

In [None]:
def fuzz_smooth(n_obs=100, use_pykalman=False, **kwargs):
    tst = KalmanFilterTester(n_obs=n_obs, **kwargs)
    return tst.filter.smooth(tst.data, tst.mask) if not use_pykalman else tst.filter_pyk.smooth(tst.data_pyk)
    

In [None]:
def find_max_obs(start=100_000, end=100, steps=10, **kwargs):
    for n in torch.logspace(torch.log10(torch.tensor(start)), torch.log10(torch.tensor(end)), 10):
        try:
            %time fuzz_smooth(n_obs=int(n), **kwargs)
            print(n, "working")
            break
        except _LinAlgError:
            print(n, "not working")
    

#### PyKalman

In [None]:
sm_mean, sm_cov = fuzz_smooth(use_pykalman=True)

In [None]:
tst.data_pyk.dtype

dtype('float32')

In [None]:
tst.filter_pyk.transition_matrices

array([[0.9299797 , 0.29107332, 0.8075413 ],
       [0.09118336, 0.87263364, 0.37048805],
       [0.10431129, 0.9359443 , 0.9556711 ]], dtype=float32)

In [None]:
sm_mean.dtye

dtype('float64')

In [None]:
sm_cov.shape

(100, 3, 3)

In [None]:
pyk_pd = CheckPosDef(do_check=True).check(torch.tensor(sm_cov))

pyk_pd[['is_pd_eigv', 'is_pd_chol', 'is_sym']].all()

is_pd_eigv    True
is_pd_chol    True
is_sym        True
dtype: bool

In [None]:
sm_mean, sm_cov = fuzz_smooth(use_pykalman=False)

In [None]:
tst.filter

Kalman Filter
        N dim obs: 3, N dim state: 3

In [None]:
sm_cov.shape

torch.Size([1, 100, 3, 3])

In [None]:
pyk_pd = CheckPosDef(do_check=True).check(sm_cov.squeeze(0))

pyk_pd[['is_pd_eigv', 'is_pd_chol', 'is_sym']].all()

_LinAlgError: linalg.eigh: The algorithm failed to converge because the input matrix is ill-conditioned or has too many repeated eigenvalues (error code: 1).

In [None]:
find_max_obs(use_pykalman=True)

CPU times: user 15.7 s, sys: 61.5 ms, total: 15.8 s
Wall time: 14.9 s
tensor(100000.) working


In [None]:
import warnings

In [None]:
posdef_log = pd.DataFrame()

In [None]:
total_warn = []
for n in range(2, 200):
    with warnings.catch_warnings(record=True) as w:
        try:
            fuzz_smooth(n_obs=n)
        except _LinAlgError:
            print(n)
            break
        finally:
            total_warn.append((n, len(w)))
    

In [None]:
posdef_log

In [None]:
import altair as alt

In [None]:
alt.Chart(pd.DataFrame(total_warn, columns=["n_obs", "n_not_posdef"])).mark_line().encode(alt.X("n_obs"), alt.Y("n_not_posdef"))

In [None]:
alt.data_transformers.enable('data_server')

In [None]:
alt.Chart(posdef_log).mark_line().encode(alt.X("t"), alt.Y("average(sym_upto)"))

In [None]:
alt.Chart(posdef_log).mark_point().encode(alt.X("t"), alt.Y("count(is_sym)"))

In [None]:
posdef_log[["t", "name"]]

In [None]:
plt.scatter(posdef_log.reset_index().index, posdef_log.sym_upto)

In [None]:
for i in range(3):
    %time 1+1

In [None]:
# find_max_obs(dtype=torch.float64)

The function takes 5 min to run so this is the output saved

with `float64` there is no problem with positive definite matrices even with 100k observations
```CPU times: user 30min 43s, sys: 28.6 s, total: 31min 11s
Wall time: 5min 26s
tensor(100000.) working```

In [None]:
tst = KalmanFilterTester(n_obs=100)

In [None]:
is_posdef(tst.params['obs_cov'])

In [None]:
is_posdef(tst.params['transition_cov'])

In [None]:
is_posdef(tst.params['initial_state_cov'])

In [None]:
tst.filter.smooth(tst.data, tst.mask);

#### Random Testing

The goal is to generate random set of data and parameters and check that `meteo_imp` implementation is the same of `pykalman` implementation

In [None]:
n_dim_state = 3 
n_dim_obs = 3
n_obs = 10
p_missing = .3

In [None]:
to_posdef = PosDef().transform

In [None]:
data = torch.rand(n_obs, n_dim_obs)
mask = torch.rand(n_obs, n_dim_obs) > p_missing
mask = mask.all(1)

In [None]:
mask[:10]

In [None]:
mask[:10]

In [None]:
params = {
    'transition_matrices': torch.rand(n_dim_state, n_dim_state),
    'transition_offsets':  torch.rand(n_dim_state),        
    'transition_cov':      to_posdef(torch.rand(n_dim_state, n_dim_state)),        
    'obs_matrices':        torch.rand(n_dim_obs, n_dim_state),
    'obs_offsets':         torch.rand(n_dim_obs),          
    'obs_cov':             to_posdef(torch.rand(n_dim_obs, n_dim_obs)),            
    'initial_state_mean':  torch.rand(n_dim_state),        
    'initial_state_cov':   to_posdef(torch.rand(n_dim_state, n_dim_state)),
}

In [None]:
params2pyk = {
    'transition_matrices': 'transition_matrices',
    'transition_offsets':  'transition_offsets',        
    'transition_cov':      'transition_covariance',        
    'obs_matrices':        'observation_matrices',
    'obs_offsets':         'observation_offsets',          
    'obs_cov':             'observation_covariance',            
    'initial_state_mean':  'initial_state_mean',        
    'initial_state_cov':   'initial_state_covariance',
}

In [None]:
params

In [None]:
k = KalmanFilter(**params)

In [None]:
pred = k.smooth(data, mask)

make a `pykalman` model using the same parameters

In [None]:
data_pyk = np.ma.masked_array(data.numpy(), mask = mask.numpy())

In [None]:
pyk_k = pykalman.standard.KalmanFilter(

    transition_matrices=k.transition_matrices.detach().numpy(),
    transition_offsets=k.transition_offsets.detach().numpy(),
    transition_covariance=k.transition_cov.detach().numpy(),
    observation_matrices=k.obs_matrices.detach().numpy(),
    observation_offsets=k.obs_offsets.detach().numpy(),
    observation_covariance=k.obs_cov.detach().numpy(),
    initial_state_mean=k.initial_state_mean.detach().numpy(),
    initial_state_covariance=k.initial_state_cov.detach().numpy()
)

In [None]:
pred_pyk = pyk_k.smooth(data_pyk)

In [None]:
for p in params.keys():
    print(p, getattr(k, p))

In [None]:
for p in params.keys():
    print(p, getattr(pyk_k, params2pyk[p]))

### Compare Statsmodels

In [None]:
import statsmodels.api as sm
import statsmodels

In [None]:
# sm_kf = statsmodels.tsa.statespace.kalman_filter.KalmanFilter(
#     k_endog = 3,
#     k_states = 3,
#     initialization = 'known',
#     initial_state = pyk_ncov.initial_state_mean,
#     initial_state_cov = pyk_ncov.initial_state_covariance,
#     design = pyk_ncov.observation_matrices,
#     obs_cov = pyk_ncov.observation_covariance,
#     transition = pyk_ncov.transition_matrices,
#     state_cov = pyk_ncov.transition_covariance)

In [None]:
# sm_kf.bind(X_ncov.detach().numpy())

In [None]:
# sm_pred = sm_kf.filter() 

In [None]:
# sm_pred.predicted_state.shape

In [None]:
# sm_pred.predicted_state_cov.shape

In [None]:
# mean = MultivariateNormal(torch.tensor(sm_pred.predicted_state[:, 0]), torch.tensor(sm_pred.predicted_state_cov[:, :, 0]))

In [None]:
# sm_kf.loglikeobs()