(case_wpcc)=
## wPCC test case

In [None]:
# %load imports.py
%load_ext autoreload
%autoreload 2
%reload_kedro
%config Completer.use_jedi = False  ## (To fix autocomplete)
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
from src.models.vmm import ModelSimulator
import matplotlib.pyplot as plt
from src.visualization.plot import track_plots, plot, captive_plot
import kedro
import numpy as np
import os.path
import anyconfig

import matplotlib
matplotlib.rcParams["figure.figsize"] = (10,7)
from src.symbols import *

# Read configs:
conf_path = os.path.join("../../conf/base/")
runs_globals_path = os.path.join(
    conf_path,
    "runs_globals.yml",
)

runs_globals = anyconfig.load(runs_globals_path)
model_test_ids = runs_globals["model_test_ids"]

join_globals_path = os.path.join(
    conf_path,
    "join_globals.yml",
)

joins = runs_globals["joins"]
join_runs_dict = anyconfig.load(join_globals_path)

globals_path = os.path.join(
    conf_path,
    "globals.yml",
)
global_variables = anyconfig.load(globals_path)



vmm_names = global_variables["vmms"]

from wPCC_pipeline.pipelines.motion_regression.nodes import predict_force, fit_motions, create_model_from_motion_regression
from wPCC_pipeline.pipelines.prediction.nodes import simulate_euler
from src.prime_system import PrimeSystem

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.pipeline import Pipeline
from src.parameters import df_parameters
p = df_parameters['symbol']
from src.feature_selection import drop_multicollinearity
from src.bias_variance_tradeoff import train_test_split_run
from sklearn.metrics import r2_score
from wPCC_pipeline.pipelines.motion_regression.nodes import fit_motions, create_model_from_motion_regression
from src.models.regression import Regression
from myst_nb import glue
import src.symbols as s
from IPython.display import Latex
from src.substitute_dynamic_symbols import run
import seaborn as sns
from src.feature_selection import feature_imporance, BestFeatures, DropCorrelation
from wPCC_pipeline.turning_circle import TurningCircle

In [None]:
ship="wpcc"
#vmm_name = "vmm_martins_simple"
vmm_name = "vmm_abkowitz"
vmm = catalog.load(vmm_name)

ship_data = catalog.load(f"{ship}.ship_data")

#regression = catalog.load(f"{ship}.updated.{vmm_name}.joined.regression")
#regression.diff_eq_Y.exclude_parameters.pop('Ydelta')
#regression.diff_eq_Y.exclude_parameters.pop('Ythrustdelta')


data = catalog.load(f"{ship}.updated.joined.data_ek_smooth")

added_masses = catalog.load(f"{ship}.added_masses")
exclude_parameters = catalog.load(f"params:{ship}.motion_regression.exclude_parameters")
columns = ['u','v','r','u1d','v1d','r1d','delta','thrust','id','x0','y0','psi']
data_with_force = predict_force(data=data[columns], added_masses=added_masses, ship_parameters=ship_data, vmm=vmm)

A turning circle manoeuvre should be predicted for the wPCC test case based on a series of model tests including ZigZag10/10, 20/20 to port and startboard as well as self propulsion and yaw rate tests. The turning circle test contain much larger drift angles, rudder angle and yaw rates compared to the model tests used for training, so that the VMM:s prediction ability outside the traning data is tested.

This test case focuses on the prediction of forces and moments from the ship hull and rudders. The propeller force is therefore not part of the prediction model and is instead taken from the model test measurements.

The model test data that is used for training is split into a training and validation dataset. The training dataset contains self propulsion, yaw rate tests and zigzag10/10 tests to startboard and port. The validation dataset constists of three zigzag20/20 tests, so that the validation set contains larger drift angles, rudder angles and yaw rates than the training set in a similiar way as for the real prediction case.
The training and validation datasets as well as the turning circle test case are shown in  the [fig](fig_traintest).

```{glue:figure} fig_traintest
:figwidth: 1000px
:name: "fig_traintest"

wPCC training, validation and testing datasets.
```

In [None]:
def break_plot(df, **kwargs):
    
    df_ = df.copy()
    mask = np.concatenate([[False],np.diff(df_.index)>1])
    df_.loc[mask] = np.NaN
    df_.plot(**kwargs)


In [None]:
ids = list(data_with_force['id'].unique())
ids_train = ids.copy()
id_test = 22774
ids_train.remove(id_test)
mask = data_with_force['id'].isin(ids_train)
data_train = data_with_force.loc[mask].copy()
assert not id_test in data_train['id'].unique()

ps = PrimeSystem(**ship_data)
data_prime = ps.prime(data_train, U=data_with_force['U'])

In [None]:
ids_test = [
    22771,
    22772,
    22773,
]

mask = data_train['id'].isin(ids_test)
data_validation = data_train.loc[mask].copy()
data_sub_train = data_train.loc[~mask].copy()
mask = data['id']==id_test
data_testing = data_with_force.loc[mask]

In [None]:
def rotate(df):
    
    dpsi = 2*np.pi*np.random.random()
    
    df['psi']+=dpsi
    x0 = df['x0'].copy()
    y0 = df['y0'].copy()
    
    df['x0'] = np.cos(dpsi)*x0 - np.sin(dpsi)*y0
    df['y0'] = np.sin(dpsi)*x0 + np.cos(dpsi)*y0
        
    
    return df
    
    

In [None]:
np.random.seed(3)
fig,ax=plt.subplots()

dataframes = {id:rotate(df_) for id, df_ in data_sub_train.groupby(by='id')}
styles = {id:{'style':'b-','label':'_nolegend_'} for id, df_ in data_sub_train.groupby(by='id')}

dataframes.update({id:rotate(df_) for id, df_ in data_validation.groupby(by='id')})
styles.update({id:{'style':'g-','label':'_nolegend_'} for id, df_ in data_validation.groupby(by='id')})

dataframes.update({id:rotate(df_) for id, df_ in data_testing.groupby(by='id')})
styles.update({id:{'style':'r-','label':'_nolegend_'} for id, df_ in data_testing.groupby(by='id')})

track_plots(dataframes, lpp=ship_data['L'], beam=ship_data['B'],  styles=styles, N=2, ax=ax);

ax.plot([],'b-',label='Training')
ax.plot([],'g-',label='Validation')
ax.plot([],'r-',label='Testing')


ax.legend();
glue('fig_traintest',fig, display=False)

The regression to identify a model for the wPCC test case is conducted on forces from the inverse dynamics calculated on states predicted with the recursive EKF.

In [None]:
t_df = sp.symbols('t_df')
eq_Xthrust = sp.Eq(p.Xthrust, thrust*(1-t_df))
glue("eqXthrust", eq_Xthrust)

In [None]:
eq_exclude = Latex(r"$y-X_T \cdot T = X \beta + \epsilon$")
glue("eqexclude", eq_exclude)

In [None]:
eq_Y_R = sp.Eq(Y_R, N_R/x_r)
glue("eqyr", eq_Y_R)

Strong multicollinearity is a known problem for the the VMM:s {cite:p}`luo_parameter_2016`, {cite:p}`wang_quantifying_2018`.
The thrust coefficient $X_T$ in the hydrodynamic function $X_D$ in [eq](eqXabkowitz) introduces multicollinearity to the regression. This coefficient is therefore instead calculated from the thrust deduction factor $t_{df}$:

```{glue:math} eqXthrust
:label: eqXthrust
```

The $X_T$ coefficient is excluded from the regression by moving it to the left hand side of the regression equation [eq](eqregression):

```{glue:math} eqexclude
:label: eqexclude
```

Rudder coefficients ($Y_R$) from $Y_D$ equation ([eq](eqYabkowitz)) such as $Y_{\delta}$, $Y_{\delta T}$ etc. have been exluded in the same way by assuming a connection with their $N_D$ equation counter part through the rudder lever arm $x_r$:

```{glue:math} eqyr
:label: eqyr
```



In [None]:
ps = PrimeSystem(**ship_data)
regression = Regression(
        vmm=vmm,
        data=data_train,
        added_masses=added_masses,
        ship_parameters=ship_data,
        prime_system=ps,
        exclude_parameters=exclude_parameters, 
        connect_equations_Y_N_rudder=True
    )

In [None]:
Xs = {}
ys = {}


Xs['X'], ys['X'] = regression.diff_eq_X.calculate_features_and_label(data=data_prime, y=data_prime['fx'])
Xs['Y'], ys['Y'] = regression.diff_eq_Y.calculate_features_and_label(data=data_prime, y=data_prime['fy'])
Xs['N'], ys['N'] = regression.diff_eq_N.calculate_features_and_label(data=data_prime, y=data_prime['mz'])

dofs = list(Xs.keys())

In [None]:
df_parameters = pd.DataFrame()
df_parameters['mean'] = regression.model_N.params
df_parameters['std'] = regression.model_N.bse
df_parameters.sort_values(by='std', ascending=False, inplace=True)

In [None]:
df_table = df_parameters.iloc[0:5]
glue('Ndelta',int(df_parameters.loc['Ndelta','mean'].round(0)))
glue('eNdelta',int(df_parameters.loc['Ndelta','std'].round(0)))

glue('Nvvdelta',int(df_parameters.loc['Nvvdelta','mean'].round(0)))
glue('eNvvdelta',int(df_parameters.loc['Nvvdelta','std'].round(0)))

df_table

The regression of $N_D$ gives huge values for some of the coefficients, for instance $N_{\delta}$: {glue}`Ndelta` which also has a huge uncertainty expressed as standard deviation: {glue}`eNdelta`. The regressed coefficients are however mathematically correct, where the regressed polynomials fit the training data well. But the predicted polynomial is the sum of very large counter acting coefficients. There are other coefficients such as  $N_{vv \delta}$: {glue}`Nvvdelta` to balance the $N_{\delta}$.
The model works, as long as the states are similar to the training data. But when extrapolating, it is easy to imagine that the balance between these huge coefficients is disturbed, giving large extrapolation errors very quickly. This is an example that coefficients which are found in the regression do not have to be physically correct, but mathematically correct {cite:p}`ittc_maneuvering_2008`. 

In [None]:
X = Xs['N']
corr = X.corr().abs()
corr_ = np.tril(corr, k=-1)
corr_ = pd.DataFrame(corr_, index=X.columns, columns=X.columns)

fig,ax=plt.subplots()
grid = sns.heatmap(corr_, cmap='gray_r')
glue('fig_Ncorr',fig, display=False)

The absolute correlation coefficient between the features in the wPCC yaw moment regression of the Abkowitz model are shown in [fig](fig_Ncorr). It can be seen that most of the coefficients have very high absolute correlation, indicated in black. 

```{glue:figure} fig_Ncorr
:figwidth: 1000px
:name: "fig_Ncorr"

Absolute correlation between the features in the WPCC yaw moment regression of Abkowitz model
```
The number of parameters in the Abkowitz model needs to be reduced for the wPCC test case to lower the multicollinearity and increase the robustness of the model, so that it can make predictions outside the known data. The authors have proposed a simplified Abkowitz model for this purpose ([eq](eqXmartins_simple),[eq](eqYmartins_simple),[eq](eqNmartins_simple)) where only the most essential parameters are kept. 

In [None]:
ship_model = catalog.load(f"{ship}.updated.{vmm_name}.joined.model")

## VMM Martin

In [None]:
vmm_martin = catalog.load("vmm_martins_simple")
ek_martin = catalog.load(f"{ship}.vmm_martins_simple.ek")

In [None]:
regression_martin, _ = fit_motions(data=data_sub_train, 
                                   added_masses=added_masses, 
                                   ship_data=ship_data, 
                                   vmm=vmm_martin, 
                                   exclude_parameters=exclude_parameters)

ship_model_martin = create_model_from_motion_regression(regression=regression_martin)
#result_martin = ship_model_martin.simulate(data_validation)
#df_predict_martin = result_martin.result.copy()

In [None]:
regression_abkowitz, _ = fit_motions(data=data_sub_train, 
                                   added_masses=added_masses, 
                                   ship_data=ship_data, 
                                   vmm=vmm, 
                                   exclude_parameters=exclude_parameters)

ship_model_abkowitz = create_model_from_motion_regression(regression=regression_abkowitz)

Forces and moment predicted with the VMM:s fitted on the training set are show in [fig](fig_validation_forces). It can be seen that the full Abkowitz model overpredicts the forces by far due to the anticipated extrapolation error. Simulations of the validation cases where therfore only possible with the simplified Abkowitz model as shown for one of the ZigZag20/20 validation cases in [fig](fig_validation_sim).

```{glue:figure} fig_validation_forces
:figwidth: 1000px
:name: "fig_validation_forces"

Validation of force models for wPCC.
```

```{glue:figure} fig_validation_sim
:figwidth: 1000px
:name: "fig_validation_sim"

Validation with simulations for wPCC.
```

In [None]:
validation_martin = regression_martin.predict(data_validation)
validation_abkowitz = regression_abkowitz.predict(data_validation)

fig,axes=plt.subplots(nrows=3)

for ax,dof in zip(axes,['fx','fy','mz']):
    
    validation_abkowitz.plot(y=dof,ax=ax, label='Validation Abkowitz')
    validation_martin.plot(y=dof,ax=ax, label='Validation Simplified Abkowitz')
    data_validation.plot(y=dof,ax=ax, label='True')
    
    ax.set_ylabel(dof)
    ax.set_xlabel('sample')
    ax.get_legend().set_visible(False)
    
axes[0].legend();
glue("fig_validation_forces",fig, display=False)

In [None]:
for id_validation in ids_test[0:1]:
    
    df_test = catalog.load(f'{ship}.updated.{id_validation}.data_ek_smooth')
    result_martin = ship_model_martin.simulate(df_test)   
    
    dataframes = {'True':df_test,
                 'Validation Simplified Abkowitz':result_martin.result,
                 }
    
    try:
        result_abkowitz = ship_model_abkowitz.simulate(df_test)
    except:
        pass
    else:
        dataframes['Abkowitz']=result_abkowitz.result
    
    track_plots(dataframes, lpp=ship_data['L'], beam=ship_data['B'],  N=2);
    fig = plot(dataframes=dataframes, keys=['u','v','r'], ncols=1, zero_origo=True);

glue("fig_validation_sim",fig, display=False)

In [None]:
df_test = catalog.load(f'{ship}.updated.{id_test}.data_ek_smooth')
ek = catalog.load(f"{ship}.{vmm_name}.ek")

In [None]:
regression_martin, _ = fit_motions(data=data_train, 
                                   added_masses=added_masses, 
                                   ship_data=ship_data, 
                                   vmm=vmm_martin, 
                                   exclude_parameters=exclude_parameters)

ship_model_martin = create_model_from_motion_regression(regression=regression_martin)
result_martin = ship_model_martin.simulate(df_test) 

In [None]:
dataframes = {
    'True' : df_test,
    'Test Simplified Abkowitz' : result_martin.result,
        
}

styles = {
    'Experiment' : {'style':'r--'},
    'Prediction' : {'style':'g-'},
    'Prediction2' : {'style':'b-'},
    'Prediction Martin' : {'style':'m-'},
    'Prediction Abkowitz' : {'style':'y-'},
    
}

In [None]:
fig = track_plots(dataframes, lpp=ship_data['L'], beam=ship_data['B'],  styles=styles, N=2);
glue("fig_track_plot_testing_sim", fig, display=False)

In [None]:
fig = plot(dataframes=dataframes, keys=['u','v','r'], ncols=1, styles=styles, zero_origo=False);

glue("fig_testing_sim", fig, display=False)


In [None]:
result_turning_circle = pd.DataFrame()
turning_circle = TurningCircle(angle=35, nominal_speed=df_test['U'].iloc[0], lpp=ship_data['L'], df=df_test)
result_turning_circle['True'] = pd.Series(turning_circle.evaluate(), name='True')
df_result = result_martin.result.copy()
df_result['V'] = np.sqrt(df_result['u']**2 + df_result['v']**2)
turning_circle = TurningCircle(angle=35, nominal_speed=df_test['U'].iloc[0], lpp=ship_data['L'], df=df_result)
result_turning_circle['Test Simplified Abkowitz'] = pd.Series(turning_circle.evaluate(), name='Test Simplified Abkowitz')

In [None]:
result_turning_circle.drop(index='units',inplace=True)

In [None]:
result_turning_circle.pct_change(axis=1)

Result from the final prediction of the turning cirlce test is seen in [fig](fig_track_plot_testing_sim) and [fig](fig_testing_sim).

```{glue:figure} fig_track_plot_testing_sim
:figwidth: 1000px
:name: "fig_track_plot_testing_sim"

Turning circle test case for wPCC, track plots from model test and simulation.
```

```{glue:figure} fig_testing_sim
:figwidth: 1000px
:name: "fig_testing_sim"

Turning circle test case for wPCC, time series from model test and simulation.
```
