### Preprocessor

In [None]:
# %load imports.py
%load_ext autoreload
%autoreload 2
%reload_kedro
%config Completer.use_jedi = False  ## (To fix autocomplete)
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
from src.visualization.plot import track_plots, plot, captive_plot
import kedro
import numpy as np
import os.path
import anyconfig

import matplotlib.pyplot as plt
import matplotlib
plt.style.use('paper')
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf')

plt.rcParams.update({
    "font.family": "serif",
    "font.serif": ["Times"]})

from myst_nb import glue
from src.symbols import *
import src.symbols as symbols
from src.system_equations import *

from IPython.display import display, Math, Latex, Markdown
from sympy.physics.vector.printing import vpprint, vlatex

from src.parameters import df_parameters
p = df_parameters["symbol"]

# Read configs:
conf_path = os.path.join("../../conf/base/")
runs_globals_path = os.path.join(
    conf_path,
    "runs_globals.yml",
)

runs_globals = anyconfig.load(runs_globals_path)
model_test_ids = runs_globals["model_test_ids"]

join_globals_path = os.path.join(
    conf_path,
    "join_globals.yml",
)

joins = runs_globals["joins"]
join_runs_dict = anyconfig.load(join_globals_path)

globals_path = os.path.join(
    conf_path,
    "globals.yml",
)
global_variables = anyconfig.load(globals_path)



vmms = global_variables["vmms"]
from sklearn.metrics import r2_score
from pathlib import Path

In [None]:
#ship = "kvlcc2_hsva"
ship = "wpcc"
#id = model_test_ids[ship][0]
#id = "22774"
id = "22771"
#id = "22611"
raw_data = catalog.load(f"{ship}.{ id }.data")
ship_data = catalog.load(f"{ship}.ship_data")
vmm_name = "vmm_martins_simple"

lowpass_simulations = catalog.load(f"{ship}.lowpass_simulation")
EKF_simulation = catalog.load(f"{ship}.updated.vmm_martins_simple.joined.{ id }.data_resimulate")

df_lowpass_simulations = pd.DataFrame(index=lowpass_simulations.keys())
df_lowpass_simulations['id'] = [Path(key).parts[-1] for key in lowpass_simulations.keys()]
df_lowpass_simulations['cuttoff'] = [Path(key).parts[-2] for key in lowpass_simulations.keys()]
df_lowpass_simulations['loader'] = lowpass_simulations.values()
df_lowpass_simulations.sort_values(by=["id","cuttoff"], inplace=True)

r2_lowpass = catalog.load("wpcc.r2_lowpass")
exclude_runs = [22611,22635,22639]
mask = ~r2_lowpass['id'].isin(exclude_runs)
r2_lowpass = r2_lowpass.loc[mask].copy()

#r2_lowpass['mean_pos'] = r2_lowpass[['x0','y0']].mean(axis=1)

r2_EKF = catalog.load("wpcc.r2_EKF")
mask = ~r2_EKF.index.isin(exclude_runs)
r2_EKF = r2_EKF.loc[mask].copy()
#r2_EKF['mean_pos'] = r2_EKF[['x0','y0']].mean(axis=1)


In [None]:
mask = df_lowpass_simulations['id'] == id
dataframes_lowpass = {f"{row['cuttoff']} [Hz]": row['loader']() for key, row in df_lowpass_simulations.loc[mask].iterrows()}

In [None]:
dataframes = {
    'Model test' : raw_data,
    'EKF' : EKF_simulation,
    
}

dataframes.update(dataframes_lowpass)

styles = {
    'Model test' : {'style':'k-', "zorder":20},
    'EKF' : {'style':'b-', "zorder":19},   
}

for key in dataframes_lowpass.keys():
    styles[key] = {'alpha':0.5}

ax = track_plots(dataframes=dataframes, lpp=ship_data['L'], beam=ship_data['B'],N=2, styles=styles, flip=True, time_window=[0,50]);
ax.legend(loc='upper left')
#ax.set_ylim(-1,5)
#ax.set_xlim(-5,40)

In [None]:
r2_lowpass_average = r2_lowpass.groupby(by=['cuttoff'])[['RMSE']].mean()

Instead of the EKF which the proposed PIT uses, the low-pass filter is very comon choice to preprocess the model test data.
In order to study which of the filters that works the best, the proposed PIT has been run on the wPCC model test data with the EKF + RTS smoother replaced by a Low-pass filter instead. The lowpass filter applies a first order linear digital Butterworth filter twice, once forward and once backwards, to get zero phase {cite:p}`virtanen_scipy_2020`. {numref}`fig_lowpass_accuracy` shows the average simulation error $ \overline{RMSE} $ with low-pass filters at various cutt off frequencies for all wPCC model tets. Corresponding error with PIT using EKF + RTS is also shown in the figure. The smiulation error for each model test is expressed as Root Mean Square Error $RMSE$ [eq](eq_RMSE) of the distance between the position from the model test and simulation. The low-pass filter with 7 Hz cutt off frequency has the lowest error between the low-pass filters, but EKF + RTS in the PIT has even lower error.

```{glue:math} eq_RMSE
:label: "eq_RMSE"
```


In [None]:
eq_RMSE = Latex(r"$RMSE=\sqrt{ \frac{\sum_{n=1}^{N} (d_n^2) }{N}} $")
glue("eq_RMSE", eq_RMSE)

where $d_n$ is the euclidean distance for each time step between the model test positions ($x_0$, $y_0$) and the predicted positions. 

```{glue:figure} fig_lowpass_accuracy
:name: "fig_lowpass_accuracy"

Average simulation error with MA VMM fitted on wPCC model test data using low-pass filters with various cutt off frequency or EKF.
```

In [None]:
fig,ax=plt.subplots()
y='RMSE'
r2_lowpass_average.plot(y=y, ax=ax, label='Low-pass', style='.-')

accuracy_EKF = r2_EKF[y].mean()
y_err_EKF = r2_EKF[y].std()

y0 = np.array([accuracy_EKF, accuracy_EKF])
ax.plot([r2_lowpass_average.index[0],r2_lowpass_average.index[-1]], y0, label='EKF + smoother')

ax.set_ylim(0,0.5*r2_lowpass_average[y].max())

ax.set_ylabel(r'Average simulation error, $\overline{RMSE}$ $[m]$')
ax.set_xlabel('Low-pass cut off frequency [Hz]')
ax.grid(True)
ax.legend(loc='best')
ax.set_xticks(r2_lowpass_average.index.values);
glue("fig_lowpass_accuracy", fig, display=False)