# Cross validation

In [None]:
# %load imports.py
%load_ext autoreload
%autoreload 2
%reload_kedro
%config Completer.use_jedi = False  ## (To fix autocomplete)
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from src.models.vmm import ModelSimulator
import matplotlib.pyplot as plt
from src.visualization.plot import track_plots, plot, captive_plot
import kedro
import numpy as np
import os.path
import anyconfig

import matplotlib
matplotlib.rcParams["figure.figsize"] = (15,4)
from src.symbols import *

# Read configs:
conf_path = os.path.join("../conf/base/")
runs_globals_path = os.path.join(
    conf_path,
    "runs_globals.yml",
)

runs_globals = anyconfig.load(runs_globals_path)
model_test_ids = runs_globals["model_test_ids"]

join_globals_path = os.path.join(
    conf_path,
    "join_globals.yml",
)

joins = runs_globals["joins"]
join_runs_dict = anyconfig.load(join_globals_path)

globals_path = os.path.join(
    conf_path,
    "globals.yml",
)
global_variables = anyconfig.load(globals_path)



vmms = global_variables["vmms"]
only_joined = global_variables[
    "only_joined"
]  # (regress/predict with only models from joined runs)S

In [None]:
from wPCC_pipeline.pipelines.motion_regression.nodes import fit_motions
from sklearn.model_selection import KFold
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression

In [None]:
join = "joined"
data = catalog.load(f"{ join }.data_ek_smooth")


added_masses = catalog.load("added_masses")
ship_data = catalog.load("ship_data")
#vmm = catalog.load("vmm_martins_simple")
vmm_name = "vmm_abkowitz"
vmm = catalog.load(f"{vmm_name}")
regression = catalog.load(f"{ vmm_name }.motion_regression.joined.regression")
exclude_parameters = catalog.load("params:motion_regression.exclude_parameters")
ship_parameters = catalog.load("ship_data")
ek = catalog.load(f"{ vmm_name }.ek")

In [None]:
data.head()

In [None]:
for key in ['u1d','v1d','r1d']:
    
    fig,ax=plt.subplots()
    fig.set_size_inches(10,5)
    data.plot(y=key, ax=ax)
    ax.set_title(key)
    

In [None]:
regression.model_Y.summary()

In [None]:
select_k_best = SelectKBest(score_func=f_regression, k=4)
linear_regression = LinearRegression()

steps = [
    ('select_k_best', select_k_best),
    ('linear_regression', linear_regression),
]

pipeline = Pipeline(steps=steps)

In [None]:
from src.parameters import df_parameters
from src.models.vmm import VMM
from src.models.regression import MotionRegression
from sklearn.model_selection import PredefinedSplit
p = df_parameters['symbol']

In [None]:
id = 22774
#test_mask = data['id'] == id
test_mask = data['id'].isin([22771,22772,22773])

In [None]:
from copy import deepcopy
cv = RepeatedKFold(n_splits=2, n_repeats=5, random_state=1)
test_fold = data['id']
ps = PredefinedSplit(test_fold)

removes_all = []

for dof in ['X','Y','N']:
    
    X_key = f"X_{dof}"
    y_key = f"y_{dof}"
    #X = getattr(regression, X_key).loc[~test_mask]
    #y = getattr(regression, y_key).loc[~test_mask]
    
    X = getattr(regression, X_key)
    y = getattr(regression, y_key)
    
    grid = dict()
    n_ = len(X.columns)
    grid['select_k_best__k'] = list(np.arange(n_-15,n_))
    
    search = GridSearchCV(estimator=pipeline, 
                          param_grid=grid, 
                          scoring='neg_mean_absolute_error', 
                          n_jobs=-1, 
                          cv=ps,
                          #cv=cv,
                         
                         )
    # perform the search
    search_result = search.fit(X, y)
    mask = search_result.best_estimator_['select_k_best'].get_support()
    #exclude = set(['Y0','N0'])
    exclude = set([])
    
    removes = set(X.columns) - set(X.columns[mask]) - exclude
    if len(removes) > 0:
        removes_all+=list(removes)

In [None]:
removes_all = list(set(removes_all 
+ [f"Y{key[1:]}" for key in removes_all if key[0]=='N']
+ [f"N{key[1:]}" for key in removes_all if key[0]=='Y']
))

In [None]:
removes_all

In [None]:
eqs = {}
for dof in ['X','Y','N']:    
    key_eq = f"{dof}_eq"
    eq = getattr(vmm,key_eq)
    subs = [(p[key],0) for key in removes_all]
    eqs[key_eq] = eq.subs(subs)

In [None]:
eqs['N_eq']

In [None]:
vmm2 = VMM(**eqs)

In [None]:
eqs3 = {
    'X_eq' : vmm.X_eq,
    'Y_eq' : vmm.Y_eq.subs([('Y_{rdeltadelta}',0)]),
    'N_eq' : vmm.N_eq.subs([('N_{rdeltadelta}',0)]),
    
}
vmm3 = VMM(**eqs3)

In [None]:
regression2 = MotionRegression(vmm=vmm2, 
                               data=data.loc[~test_mask],
                               #data=data, 
                               added_masses=added_masses, 
                               ship_parameters=ship_parameters, 
                               exclude_parameters=exclude_parameters,
                              )

regression3 = MotionRegression(vmm=vmm, 
                               data=data.loc[~test_mask],
                               #data=data, 
                               added_masses=added_masses, 
                               ship_parameters=ship_parameters, 
                               exclude_parameters=exclude_parameters,
                              )

In [None]:
vmm.X_eq.rhs.free_symbols - vmm2.X_eq.rhs.free_symbols

In [None]:
vmm.Y_eq.rhs.free_symbols - vmm2.Y_eq.rhs.free_symbols

In [None]:
vmm.N_eq.rhs.free_symbols - vmm2.N_eq.rhs.free_symbols

In [None]:
display(vmm2.X_eq)
display(vmm2.Y_eq)
display(vmm2.N_eq)


In [None]:
vmm2.N_eq

In [None]:
model = regression.create_model(control_keys=['delta','thrust'])
model2 = regression2.create_model(control_keys=['delta','thrust'])
model3 = regression3.create_model(control_keys=['delta','thrust'])

In [None]:
missing = set(model.parameters.keys()) - set(model2.parameters.keys())
for key in missing:
    model2.parameters[key] = 0

In [None]:
from wPCC_pipeline.pipelines.prediction.nodes import simulate_euler

#id = 22774
id = 22771
df_smooth = catalog.load(f"{ id }.data_ek_smooth")

In [None]:
dataframes = {}
dataframes['model test'] = df_smooth
dataframes['Abkowitz'] = simulate_euler(data=df_smooth, model=model,ek=ek, solver='Radau')
dataframes['Reduced'] = simulate_euler(data=df_smooth, model=model2,ek=ek, solver='Radau')
dataframes['Abkowitz train'] = simulate_euler(data=df_smooth, model=model3,ek=ek, solver='Radau')


    

In [None]:
fig,ax=plt.subplots()
fig.set_size_inches(15,15)
track_plots(dataframes, lpp=ship_data['L'], beam=ship_data['B'], plot_boats=False, ax=ax)
plot(dataframes);

In [None]:
df_parameters_compare = pd.DataFrame()
df_parameters_compare['Abkowitz'] = model.parameters
df_parameters_compare['Reduced'] = model2.parameters

In [None]:
df_parameters_compare.plot.bar()

In [None]:
## Retrain on all data
regression2 = MotionRegression(vmm=vmm2, 
                               data=data,
                               added_masses=added_masses, 
                               ship_parameters=ship_parameters, 
                               exclude_parameters=exclude_parameters,
                              )
model2 = regression2.create_model(control_keys=['delta','thrust'])

catalog.save("vmm_abkowitz_simple.motion_regression.joined.model", model2) 
catalog.save("vmm_abkowitz_simple.motion_regression.joined.regression", regression2) 
catalog.save("vmm_abkowitz_simple", vmm2)