# Measure accuracy
How should the accuracy of a model be measured?

In [None]:
# %load imports.py
%load_ext autoreload
%autoreload 2
%reload_kedro
%config Completer.use_jedi = False  ## (To fix autocomplete)
import pandas as pd
from src.models.vmm import ModelSimulator
import matplotlib.pyplot as plt
from src.visualization.plot import track_plots, plot, captive_plot
import kedro
import numpy as np
import os.path
import anyconfig

import matplotlib
matplotlib.rcParams["figure.figsize"] = (15,4)
from src.symbols import *

# Read configs:
conf_path = os.path.join("../conf/base/")
runs_globals_path = os.path.join(
    conf_path,
    "runs_globals.yml",
)

runs_globals = anyconfig.load(runs_globals_path)
model_test_ids = runs_globals["model_test_ids"]

join_globals_path = os.path.join(
    conf_path,
    "join_globals.yml",
)

joins = runs_globals["joins"]
join_runs_dict = anyconfig.load(join_globals_path)

globals_path = os.path.join(
    conf_path,
    "globals.yml",
)
global_variables = anyconfig.load(globals_path)



vmms = global_variables["vmms"]
only_joined = global_variables[
    "only_joined"
]  # (regress/predict with only models from joined runs)S

In [None]:
from wPCC_pipeline.pipelines.prediction.nodes import simulation_accuracy
from wPCC_pipeline.pipelines.filter_data_extended_kalman.nodes import extended_kalman_filter
from src.extended_kalman_vmm import ExtendedKalman


In [None]:
id = 22773
df_smooth = catalog.load(f"{ id }.data_ek_smooth")
df_data = catalog.load(f"{ id }.data")
df_data['U'] = np.sqrt(df_data['u']**2 + df_data['v']**2)

model1 = catalog.load(f"vmm_martin.motion_regression.joined.model")
model2 = catalog.load(f"vmm_linear.motion_regression.joined.model")

ship_data = catalog.load("ship_data")

In [None]:
result1 = model1.simulate(df_=df_smooth)
result2 = model2.simulate(df_=df_smooth)

In [None]:
dataframes = {
'model test' : df_smooth,
'simulation (model1)' : result1.result,  
'simulation (model2)' : result2.result,  
#'simulation (hybrid)' : result_hybrid.result,

}

fig,ax=plt.subplots()
track_plots(dataframes=dataframes, lpp=ship_data['L'], beam=ship_data['B'], ax=ax, plot_boats=False);

plot(dataframes, keys=result1.result.columns);

In [None]:
simulation_accuracy(data=df_smooth, results=result1.result)

In [None]:
simulation_accuracy(data=df_smooth, results=result2.result)

In [None]:
from wPCC_pipeline.pipelines.extended_kalman.nodes import create_extended_kalman
from wPCC_pipeline.pipelines.filter_data_extended_kalman.nodes import resimulate_extended_kalman

In [None]:


ek1 = create_extended_kalman(parameters=model1.parameters, ship_data=ship_data, vmm=model1)
ek2 = create_extended_kalman(parameters=model2.parameters, ship_data=ship_data, vmm=model2)

#ek1 = ExtendedKalman(vmm=model1, parameters=model1.parameters, ship_parameters=ship_data)

In [None]:
ek1.simulate(data=df_smooth, input_columns=['delta','thrust'])

In [None]:
df_smooth['U'] = np.sqrt(df_smooth['u']**2 + df_smooth['v']**2)
state_columns=["x0", "y0", "psi", "u", "v", "r"]

data = df_smooth[state_columns]

x = df_smooth[state_columns].values.T

t = df_smooth.index
h = t[1] - t[0]


def predict(ek, input_columns=["delta","thrust"]):
    
    
    input = df_smooth[input_columns]

    
    x_dot = ek.lambda_f(x, input).T
    x_dot = np.concatenate((np.zeros((0,len(state_columns))),x_dot))
        
    dx_data = x_dot*h
    dx = pd.DataFrame(dx_data, columns=state_columns)
    
    df_predict = data + dx.values
    
    return df_predict

In [None]:
df_predict1 = predict(ek1)
df_predict2 = predict(ek2, input_columns=["delta","U"])

df_error1 = df_predict1 - data
df_error2 = df_predict2 - data


In [None]:
dataframes = {
    'model1': df_error1,
    'model2': df_error2,
}
plot(dataframes, keys=data.columns);

In [None]:
from sklearn.metrics import r2_score, mean_squared_error

def mean_squared_errors(df_pred, data, keys=["u", "v", "r"]):
    accuracies = {
        key: np.sqrt(mean_squared_error(y_true=data[key], y_pred=df_pred[key]))
        #key: (r2_score(y_true=data[key], y_pred=df_pred[key]))
        for key in keys
        if df_pred[key].notnull().all() and len(data[key]) == len(df_pred[key])
    }
    return accuracies

In [None]:
df_accuracies = pd.DataFrame(columns=['u','v','r'])
accuracies = pd.Series(mean_squared_errors(df_predict1, data=data), name='model1')
df_accuracies =df_accuracies.append(accuracies)

accuracies = pd.Series(mean_squared_errors(df_predict2, data=data), name='model2')
df_accuracies =df_accuracies.append(accuracies)

In [None]:
df_accuracies.plot.bar()

In [None]:
keys_error = ['u','v','r']
keys_x = ['u','v','r','delta']

fig,axes = plt.subplots(ncols=len(keys_x))

for ax, key in zip(axes,keys_x):
    
    for key_error in keys_error:
        
        ax.plot(df_smooth[key],df_error1[key_error], '-', label=key_error)
    
    ax.set_xlabel(key)
    ax.legend()
    
    


In [None]:
from src.extended_kalman_filter import loglikelihood

In [None]:
covariance_matrixes = catalog.load("covariance_matrixes")

In [None]:
extended_kalman_filter(ek=ek1, data=df_data, covariance_matrixes=covariance_matrixes)
extended_kalman_filter(ek=ek2, data=df_data, covariance_matrixes=covariance_matrixes, input_columns=['delta','U'])

In [None]:
loglikelihoods = pd.Series()
loglikelihoods['model1'] = loglikelihood(ek1.time_steps)/len(df_smooth)

In [None]:
loglikelihoods['model2'] = loglikelihood(ek2.time_steps)/len(df_smooth)
loglikelihoods

In [None]:
loglikelihoods.plot.bar()

In [None]:
from wPCC_pipeline.pipelines.brix.nodes import initial_parameters
from wPCC_pipeline.pipelines.vessel_manoeuvring_models.nodes import martins_model
vmm = martins_model()
parameters = initial_parameters(ship_data=ship_data)

In [None]:
ek = create_extended_kalman(parameters=parameters, ship_data=ship_data, vmm=vmm)

In [None]:
extended_kalman_filter(ek=ek, data=df_data, covariance_matrixes=covariance_matrixes)

In [None]:
loglikelihoods['ek'] = loglikelihood(ek.time_steps)/len(df_smooth)

In [None]:
loglikelihoods