In [1]:
import pathlib
import sys

sys.path.append("../")

local_path = pathlib.Path().resolve()
local_path

WindowsPath('C:/Users/NielsOta/Code/StateSpace/statespace')

In [2]:
import torch
import pathlib

from typing import List, Optional, Dict, Tuple
from torch.autograd import Variable
from scipy.optimize import minimize
from collections import deque

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from utils import _mm, _bmm, _inv, _mm3, _bmm3, _initiate_variables, _map_vector_to_matrices, _get_nan_positions, _remove_nan_tensor, _remove_inf_tensor, _get_bounds

In [4]:
# Will be forward method

def _kalman_step(T, Z, R, Q, H, a, P, y):
    """
    perform 1 step of the kalman filter
    """
    
    # prediction error: v
    v = y - _mm(Z, a)
    
    # prediction error variance: F
    F = _mm3(Z, P, Z.T) + H
    
    # incast-kalman gain: M
    M = _mm3(P, Z.T, _inv(F)) 
    
    # kalman gain: K
    K = _mm(T, M)
    
    # incasted updates
    att = a + _mm(M, v)
    Ptt = P - _mm3(M, F, M.T)
    
    a_next = _mm(T, att)
    P_next = _mm3(T, Ptt, T.T) + _mm3(R, Q, R.T)
    
    return a_next, P_next, att, Ptt, M, K, F, v

In [5]:
# Will be forward method

def _kalman_step_missing(T, Z, R, Q, H, a, P, y, dtype=np.float64):
    """
    perform 1 step of the kalman filter when observation is missing
    """
    
    # dimension of observation vector
    s = int(y.shape[0])
    
    # prediction error: v
    v = np.nan
    
    if s==1:
        F = np.array([[np.inf]]).astype(dtype)
    else:
        # prediction error variance: F
        F = np.eye(s).astype(dtype) * np.inf
    
    # incast-kalman gain: M
    M = _mm3(P, Z.T, _inv(F).astype(dtype)) 
    
    # kalman gain: K
    K = np.zeros_like(_mm(T, M))
    
    # incasted updates
    att = a
    Ptt = P
    
    a_next = _mm(T, att)
    P_next = _mm3(T, Ptt, T.T) + _mm3(R, Q, R.T)
    
    return a_next, P_next, att, Ptt, M, K, F, v

In [25]:
def kalman_filter(*args):
    """
    perform all the steps of the Kalman filter
    """
    T, Z, R, Q, H, y, diffuse = args
    
    # number of observations
    n = int(y.shape[2])
    
    # dimension of state vector
    p = int(T.shape[0])
    
    # dimension of observation vector
    s = int(y.shape[0])
    
    # initiate filters (a, att), filter variances (P, Ptt), errors (v), error variances (F), and Kalman gains (K, M)
    a, att, P, Ptt, v, F, K, M = _initiate_variables(p, s, n)
    
    # get positions of missing observations
    nan_pos_list = _get_nan_positions(y)
    
    # do a diffuse initialization
    if diffuse:
        a[:, :, 0] = 0
        P[:, :, 0] = P[:, :, 0] + 1e5 * np.eye(p, p)

    # iterate through time
    for t in range(1, n):
        
        if t-1 not in nan_pos_list:
            # a[0] contains a1, y[0] contains y1
            y_t = y[:, :, t-1]
            a[:, :, t], P[:, :, t], att[:, :, t-1], Ptt[:, :, t-1], M[:, :, t-1], K[:, :, t-1], F[:, :, t-1], v[:, :, t-1] = _kalman_step(T, Z, R, Q, H, a[:, :, t-1], P[:, :, t-1], y_t)
        
        else:
            y_t = y[:, :, t-1]
            a[:, :, t], P[:, :, t], att[:, :, t-1], Ptt[:, :, t-1], M[:, :, t-1], K[:, :, t-1], F[:, :, t-1], v[:, :, t-1] = _kalman_step_missing(T, Z, R, Q, H, a[:, :, t-1], P[:, :, t-1], y_t)
    
    if n-1 not in nan_pos_list:
        # do final incasting update
        y_t = y[:, :, n-1]
        _, _, att[:, :, n-1], Ptt[:, :, n-1], M[:, :, n-1], K[:, :, n-1], F[:, :, n-1], v[:, :, n-1] = _kalman_step(T, Z, R, Q, H, a[:, :, n-1], P[:, :, n-1], y_t)
    
    else:
        # do final incasting update
        y_t = y[:, :, n-1]
        _, _, att[:, :, n-1], Ptt[:, :, n-1], M[:, :, n-1], K[:, :, n-1], F[:, :, n-1], v[:, :, n-1] = _kalman_step_missing(T, Z, R, Q, H, a[:, :, n-1], P[:, :, n-1], y_t)
    
    return a, att, P, Ptt, F, v, K, M 


def kalman_forecast(*args, time=10, dtype=np.float64):
    """
    forecast 
    """
    T, Z, R, Q, H, att, Ptt = args
    
    # number of observations
    n = int(att.shape[2])
    
    # dimension of state vector
    p = int(T.shape[0])
    
    # dimension of observation vector
    s = int(att.shape[0])
    
    # initiate filters (a, att), filter variances (P, Ptt), errors (v), error variances (F), and Kalman gains (K, M)
    a_forecast, P_forecast = np.zeros((p, 1, time + 1)).astype(dtype), np.zeros((p, p, n)).astype(dtype)
    a_forecast[:,:,0], P_forecast[:,:,0] = att[:, :, -1], Ptt[:, :, -1]
    
    for t in range(1, time + 1):
        a_forecast[:,:,t] = _mm(T, a_forecast[:,:,t-1])
        P_forecast[:,:,t] = _mm3(T, P_forecast[:,:,t-1], T.T) + _mm3(R, Q, R.T)
    
    return a_forecast[:,:,1:], P_forecast[:,:,1:]


def kalman_smoother(*args, dtype=np.float64):
    """
    perform Kalman smoothing
    """
    T, Z, R, Q, H, a, P, v, F, K = args
    
    # number of observations
    n = int(a.shape[2])
    
    # dimension of state vector
    p = int(T.shape[0])
    
    # dimension of observation vector
    s = int(Z.shape[0])
    
    # instantiate a_hat
    a_hat = np.zeros_like(a)
    
    # can einsum this operation because it does not iterate through time
    T = np.repeat(T[:, :, None], n, axis=2)
    
    # L' = (T - KZ)'
    L = T - np.einsum('ijn,jk->ikn', K, Z)
    
    # r[n+1] = r_n = 0
    r = np.zeros((p, 1, n + 1)).astype(dtype)
    
    # a[0] contains a_1, y[0] contains y_1
    
    # from 99...0
    for t in range(n-1, -1, -1):
        
        if np.isnan(v[:, :, t]):
             r[:, :, t] =  r[:, :, t+1]
        else:
            r[:, :, t] = _mm3(Z.T, _inv(F[:, :, t]), v[:, :, t]) + _mm(L[:, :, t].T, r[:, :, t+1])
    
        a_hat[:, :, t] = a[:, :, t] + _mm(P[:, :, t], r[:, :, t])
        
    return a_hat, r, L, T


def log_likelihood(params, *args):
    
    T, Z, R, Q, H, y, param_map, diffuse = args
    T, Z, R, Q, H = _map_vector_to_matrices(params, param_map, T, Z, R, Q, H)
    
    # get means and variances
    _, _, _, _, F, v, _, _  = kalman_filter(T, Z, R, Q, H, y, diffuse)
    
    if diffuse:
        num_states = T.shape[0]
        F = F[:, :, num_states:]
        v = v[:, :, num_states:]
    
    # If an observation at time t is not present, should not include in log-likelihood
    v = _remove_nan_tensor(v)
    F = _remove_inf_tensor(F)
    
    # number of observations
    n = int(y.shape[2])
    
    # dimension of state vector
    p = int(T.shape[0])

    # dimension of observation vector
    s = int(y.shape[0])
    
    # get elementwise log determinants log|F|: check docs numpy returns (signs, abs of logdet)
    F_logdets = np.linalg.slogdet(F.transpose(2, 0, 1))
    F_logdets = F_logdets[0] * F_logdets[1]
    
    # get elementwise v'F^(-1)v, then convert shape from [100, 1, 1] -> [100]
    vFv = np.squeeze(_bmm3(v.transpose(2, 1, 0), _inv(F.transpose(2, 0, 1)), v.transpose(2, 0, 1)))
    
    # constant value 
    const = s * np.log(2 * np.pi) * np.ones_like(vFv)
    
    # compute log-likelihood
    llik = -(1/2) * np.sum(const + vFv + F_logdets) 
    
    negative_llik = -llik.item()
    
    return negative_llik

In [26]:
from dataclasses import dataclass

In [34]:
@dataclass
class LinearGaussianModel:
    
    """
    class implementing a linear Gaussian model in StateSpace form
    
        y_t         = Z_t * alpha_t + epsilon_t              epsilon_t ~ N(0, H_t)
        alpha_{t+1} = T_t * alpha_t + R_t * eta_t            eta_t ~ N(0, Q_t)
        
    """
    
    # Declare observation vector
    y: np.ndarray
    
    # Declare State Matrices 
    T: np.ndarray
    Z: np.ndarray
    R: np.ndarray
    Q: np.ndarray
    H: np.ndarray
        
    # Declare whether init is diffuse
    diffuse: bool
    
    # Declare map for mapping param vector of optimizer to state matrices; e.g. 0: {"matrix" : "Q", "index": (0, 0, 0), "constant": True, "bounds": (None, None)}
    param_map: Dict[int, Dict]
        
    def __post_init__(self):
        
        # Get p, s, n: dimension state, dimension observation and length of y (time)
        self.p = self.T.shape[0]
        self.s = self.y.shape[0]
        self.n = self.y.shape[2]
        
        # Get filtered and incasted signals and signal variances, 
        self.a, self.att, self.P, self.Ptt, self.v, self.F, self.K, self.M = _initiate_variables(self.p, self.s, self.n)
        
    def fit(self):
        
        # Get State Matrices 
        T = self.T
        Z = self.Z
        R = self.R
        Q = self.Q
        H = self.H

        # Get whether init is diffuse
        diffuse = self.diffuse

        # Get map for mapping param vector of optimizer to state matrices
        param_map= self.param_map

        # Get observation vector
        y = self.y
        
        # set options for minimization
        options = {
            'eps': 1e-7,
            'disp': True,
            'maxiter': 500
        }
        
        # Get bounds for optimization
        bounds = _get_bounds(param_map)
        
        params_ini = np.ones((len(param_map), 1))
        
        # maximize log-likelihook
        res = minimize(log_likelihood, params_ini, args=(T, Z, R, Q, H, y, param_map, diffuse), method='L-BFGS-B', options=options, bounds=bounds)
        
        # extract params
        params = res.x
        print(params)
        
        # Update instance state matrices
        T, Z, R, Q, H = _map_vector_to_matrices(params, param_map, T, Z, R, Q, H)
        self.T, self.Z, self.R, self.Q, self.H = T, Z, R, Q, H
        
        # Get filtered and incasted signals and signal variances, 
        a, att, P, Ptt, v, F, K, M = kalman_filter(T, Z, R, Q, H, y, diffuse)
        self.a, self.att, self.P, self.Ptt, self.v, self.F, self.K, self.M = a, att, P, Ptt, v, F, K, M

In [35]:
def read_target_from_path(path: pathlib.Path, dtype=np.float64) -> torch.Tensor:
    
    if not path.exists():
        return f"path: {path} does not exist"
    
    # read data
    data = pd.read_csv(path).values
    
    # if array of shape (n,)
    if data.ndim == 1:
        data = data[:, np.newaxis]
    
    n = data.shape[0]
    p = data.shape[1]
    
    y = data.T
    y = y[:, None, :]
  
    return y.astype(dtype)

In [36]:
dtype=np.float64
base_dir = pathlib.Path().resolve().parent
data_dir = base_dir / 'data'
nile_path = data_dir / 'Nile.txt'

# get data into shape [s, 1, n]
y = read_target_from_path(nile_path, dtype)
#y[:, :, 20:40] = np.nan

# Declare State Matrices Local Level Model -> use [[]] for extra dimension
T = np.array([[1]]).astype(dtype)
Z = np.array([[1]]).astype(dtype)
R = np.array([[1]]).astype(dtype)
Q = np.array([[1]]).astype(dtype)
H = np.array([[1]]).astype(dtype)
diffuse = True

dict_param_llm = {
    0: {"matrix" : "Q", "index": (0, 0, 0), "constant": True, "bounds": (0.1, None)},
    1:  {"matrix" : "H", "index": (0, 0, 0), "constant": True, "bounds": (0.1, None)}
}

llm = LinearGaussianModel(y, T, Z, R, Q, H, diffuse, dict_param_llm)
llm.fit()

[ 1616.27809536 14663.97320479]
