# Filter Perfomance and Stability
> Measure performance between Standard Filter/ Square Root Filter, CPU/GPU, batched/not batched 

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from fastcore.test import *
from fastcore.basics import *
from meteo_imp.utils import *
from meteo_imp.gaussian import *
from meteo_imp.data_preparation import MeteoDataTest
from meteo_imp.kalman.filter import *
from meteo_imp.kalman.filter import get_test_data

import pykalman
from typing import *

import numpy as np
import pandas as pd
import torch
from torch import Tensor
from torch.distributions import MultivariateNormal

from timeit import timeit
import polars as pl
import altair as alt

from tqdm.auto import tqdm

In [None]:
class KalmanFilterPerformance():
    def __init__(self, n_obs=100, n_dim_obs=4, n_dim_state=3, n_dim_contr=3, bs=5, p_missing=.3,
                 use_sr_filter=True, device='cpu', use_conditional=True, use_batch=True, **kwargs):
        store_attr()
        if not use_sr_filter:
            self.filter = KalmanFilter.init_random(self.n_dim_obs,self.n_dim_state, self.n_dim_contr)
        else:
            self.filter = KalmanFilterSR.init_random(self.n_dim_obs,self.n_dim_state, self.n_dim_contr)
        
        self.filter.to(device)
        self.filter.use_conditional = self.use_conditional
        self.data = get_test_data(self.n_obs, n_dim_obs = n_dim_obs, n_dim_contr=n_dim_contr, p_missing=p_missing, bs=bs, device=device)
        
    def get_method(self, method):
        data, mask, control = self.data
        method = getattr(self.filter, method)
        if self.use_batch:
            return lambda: method(data, mask, control)
        else:
            return lambda: [method(d,m,c) for d,m,c in zip(data, mask, control)]
    def time_method(self, method, rep = 1):
        method = self.get_method(method)
        time = timeit('method()', globals={'method': method}, number=rep)
        return time / rep
        
        
    

In [None]:
kf = KalmanFilterPerformance(p_missing=0)

In [None]:
kf.time_method('filter')

0.14900339799987705

In [None]:
import itertools
from fastcore.meta import delegates

In [None]:
# from https://stackoverflow.com/a/5228294
def product_dict(**kwargs):
    keys = kwargs.keys()
    vals = kwargs.values()
    for instance in itertools.product(*vals):
        yield dict(zip(keys, instance))

In [None]:
default_kwargs = {'n_obs':100, 'n_dim_obs':4, 'n_dim_state':3, 'n_dim_contr':3, 'bs':5,
                       'use_sr_filter': True, 'device':'cpu', 'use_conditional':True, 'use_batch':True}

In [None]:
@delegates(KalmanFilterPerformance)
def perf_comb_params(method,  **kwargs):
    kwargs = default_kwargs | kwargs
    kwargs = {key:tuplify(arg) for key, arg in kwargs.items()}
    arg_sets = product_dict(**kwargs)
    out = []
    for arg_set in tqdm(arg_sets):
        kf = KalmanFilterPerformance(**arg_set)
        time = kf.time_method(method)
        out.append({'time': time} | arg_set)
    return pl.DataFrame(out)

In [None]:
perf_comb_params('filter')

0it [00:00, ?it/s]

bs,device,n_dim_contr,n_dim_obs,n_dim_state,n_obs,time,use_batch,use_conditional,use_sr_filter
i64,str,i64,i64,i64,i64,f64,bool,bool,bool
5,"""cpu""",3,4,3,100,0.254086,True,True,True


### SR vs Normal Filter

In [None]:
perf1 = perf_comb_params('filter', use_sr_filter=[True, False], rep=range(100)) 

0it [00:00, ?it/s]

In [None]:
perf1

bs,device,n_dim_contr,n_dim_obs,n_dim_state,n_obs,rep,time,use_batch,use_conditional,use_sr_filter
i64,str,i64,i64,i64,i64,i64,f64,bool,bool,bool
5,"""cpu""",3,4,3,100,0,0.274235,true,true,true
5,"""cpu""",3,4,3,100,1,0.265272,true,true,true
5,"""cpu""",3,4,3,100,2,0.261474,true,true,true
5,"""cpu""",3,4,3,100,3,0.257838,true,true,true
5,"""cpu""",3,4,3,100,4,0.262304,true,true,true
5,"""cpu""",3,4,3,100,5,0.269207,true,true,true
5,"""cpu""",3,4,3,100,6,0.252871,true,true,true
5,"""cpu""",3,4,3,100,7,0.262902,true,true,true
5,"""cpu""",3,4,3,100,8,0.320611,true,true,true
5,"""cpu""",3,4,3,100,9,0.359268,true,true,true


In [None]:
perf1.groupby('use_sr_filter').agg(pl.col("time").mean()).with_column(pl.when(pl.col("use_sr_filter")).then(pl.lit("Square Root Filter")).otherwise(pl.lit("Standard Filter")).alias("Filter type"))

use_sr_filter,time,Filter type
bool,f64,str
True,0.268333,"""Square Root Fi..."
False,0.250166,"""Standard Filte..."


In [None]:
perf1 = perf1.with_column(pl.when(pl.col("use_sr_filter")).then(pl.lit("Square Root Filter")).otherwise(pl.lit("Standard Filter")).alias("Filter type"))

In [None]:
plot_perf_sr = alt.Chart(perf1.to_pandas()).mark_boxplot(size = 50).encode(
    x=alt.X('Filter type', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('time', scale=alt.Scale(zero=False), title="time [s]"),
    color=alt.Color('Filter type',
                    scale = alt.Scale(scheme = 'accent'))
).properties(width=300)

In [None]:
plot_perf_sr

In [None]:
import vl_convert as vlc
from pyprojroot import here
base_path_img = here("manuscript/Master Thesis - Meteorological time series imputation using Kalman filters - Simone Massaro/images/")

def save_plot(plot, path):
    png_data = vlc.vegalite_to_png(vl_spec=plot.to_json(), scale=3)
    with open(base_path_img / (path + ".png"), "wb") as f:
        f.write(png_data)

In [None]:
save_plot(plot_perf_sr, "perf_sr")

### CPU vs GPU

In [None]:
gpu_best = perf_comb_params('filter', bs=100, n_obs=50, n_dim_contr=5, n_dim_obs=5, n_dim_state=5,
                            device=['cpu', 'cuda'], use_sr_filter=[True, False], p_missing=0, rep=list(range(20)), use_batch=[True, False])

0it [00:00, ?it/s]

In [None]:
gpu_best

bs,device,n_dim_contr,n_dim_obs,n_dim_state,n_obs,p_missing,rep,time,use_batch,use_conditional,use_sr_filter
i64,str,i64,i64,i64,i64,i64,i64,f64,bool,bool,bool
100,"""cpu""",5,5,5,50,0,0,0.134151,true,true,true
100,"""cpu""",5,5,5,50,0,1,0.097733,true,true,true
100,"""cpu""",5,5,5,50,0,2,0.074773,true,true,true
100,"""cpu""",5,5,5,50,0,3,0.07477,true,true,true
100,"""cpu""",5,5,5,50,0,4,0.092972,true,true,true
100,"""cpu""",5,5,5,50,0,5,0.112495,true,true,true
100,"""cpu""",5,5,5,50,0,6,0.106605,true,true,true
100,"""cpu""",5,5,5,50,0,7,0.103665,true,true,true
100,"""cpu""",5,5,5,50,0,8,0.094165,true,true,true
100,"""cpu""",5,5,5,50,0,9,0.105327,true,true,true


In [None]:
gpu_best.groupby(['device', 'use_batch']).agg(pl.col("time").mean())

device,use_batch,time
str,bool,f64
"""cuda""",False,9.602944
"""cpu""",False,4.560856
"""cuda""",True,0.274758
"""cpu""",True,0.083738


In [None]:
kwargs = {'a': 1, 'b': (1,2)}
kwargs ={key:tuplify(arg) for key, arg in kwargs.items()}

In [None]:
list(product_dict(**kwargs))

[{'a': 1, 'b': 1}, {'a': 1, 'b': 2}]

In [None]:
method = kf.get_method('filter')

In [None]:
from timeit import timeit

In [None]:
timeit('method()', globals={'method': method}, number=10)

0.15532574900134932

## Performance

In [None]:
def compare_performance(n_obs, n_dim_obs, n_dim_state, n_dim_contr, bs, dtype=torch.float64):
    kf_cuda = KalmanFilter.init_random(n_dim_obs,n_dim_state, dtype=dtype).cuda()
    data_cuda, mask_cuda = get_test_data(n_dim_obs,n_dim_state, bs=bs, device="cuda", dtype=dtype)
    
    print("GPU")
    %timeit -n 1 -r 1 kf_cuda.predict(data_cuda, mask_cuda);

    kf_cuda = KalmanFilter.init_random(n_dim_obs,n_dim_state, dtype=dtype)
    data_cuda, mask_cuda = get_test_data(n_dim_obs,n_dim_state, bs=bs, dtype=dtype)
    print("CPU")
    %timeit -n 1 -r 1 kf.predict(data, mask)
    print("No batches CPU")
    %timeit -n 1 -r 1 [kf.predict(d.unsqueeze(0), m.unsqueeze(0)) for d,m in zip(data, mask)] 
    print("No batches GPU")
    %timeit -n 1 -r 1 [kf_cuda.predict(d.unsqueeze(0), m.unsqueeze(0)) for d,m in zip(data_cuda, mask_cuda)] 

In [None]:
compare_performance(100, 2,2,100)

GPU
87.9 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
7.83 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
12.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
154 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [None]:
compare_performance(200, 10,10,200)

GPU
2.04 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
7.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
2.07 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Float64

In [None]:
compare_performance(100, 2,2,100, dtype=torch.float64)

GPU
100 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
8.29 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
159 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [None]:
compare_performance(200, 10,10,200, dtype=torch.float64)

GPU
2.22 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
8.35 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
2.01 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


## Stability

In [None]:
import polars as pl
import altair as alt
from altair import datum


        kSR.Q_raw = torch.nn.Parameter(kSR.Q_raw + eye_like(kSR.Q_raw) * torch.sqrt(torch.tensor(1e-5)))
        kSR.R_raw = torch.nn.Parameter(kSR.R_raw + eye_like(kSR.R_raw) * torch.sqrt(torch.tensor(1e-5)))
        kSR.P0_raw = torch.nn.Parameter(kSR.P0_raw + eye_like(kSR.P0_raw) * torch.sqrt(torch.tensor(1e-5)))

In [None]:
def fuzz_filter_SR(n_iter=10, n_obs=50):
    reset_seed(27)
    out = []
    for n in tqdm(range(n_iter)):
        k = KalmanFilter.init_random(10,5,8)
        kSR = KalmanFilterSR.init_from(k)
        data, mask, control = get_test_data(n_obs,10,8)
        filt = k.filter(data, mask, control)
        filtSR = kSR.filter(data, mask, control)
        for t in range(n_obs):
            P = filt.cov[:,t]
            P_C = filtSR.cov[:,t]
            out.append({'t': t, 'n': n, 'MAE': (P - P_C @ P_C.mT).abs().mean().item()})
    return pl.DataFrame(out)

In [None]:
err_raw = fuzz_filter_SR(100, 100)

  0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
err = err_raw.groupby('t').agg([
    pl.col('MAE').median().alias("median"),
    pl.col('MAE').quantile(.75).alias("Q3"),
    pl.col('MAE').quantile(.25).alias("Q1"),
    pl.col('MAE').max().alias("max")
])

In [None]:
median = alt.Chart(err.to_pandas()).mark_line(color="black"
           ).encode(
    x = alt.X('t', title="Number of Iterations"),
    y = alt.Y('median', axis=alt.Axis(format=".1e"), scale=alt.Scale(type="log"), title="log MAE"),
    # color=datum("median"),
    strokeDash = datum("median")
    #, scale=alt.Scale(range=['black']))
)

Q1 = alt.Chart(err.to_pandas()).mark_line(color='dimgray', strokeDash=[4,6]).encode(x = 't', y = 'Q1', strokeDash=datum("quantile"))
Q3 = alt.Chart(err.to_pandas()).mark_line(color='dimgray', strokeDash=[4,6]).encode(x = 't', y = 'Q3', strokeDash=datum("quantile"))
max = alt.Chart(err.to_pandas()).mark_line(color='black', strokeDash=[2,2]).encode(x = 't', y = 'max', strokeDash=datum("max"))
p = (Q1 + Q3 + max + median).interactive().properties(title="Standard Filter vs Square Root Filter (Mean Absolute Error of state cavariances)")
p

In [None]:
import vl_convert as vlc
from pyprojroot import here
base_path = here("manuscript/Master Thesis - Meteorological time series imputation using Kalman filters - Simone Massaro/images/")
path = base_path / "numerical_stability.png"
png_data = vlc.vegalite_to_png(vl_spec=p.to_json(), scale=2)
with open(path, "wb") as f:
    f.write(png_data)