# Train/Test wPCC

In [None]:
# %load imports.py
%load_ext autoreload
%autoreload 2
%reload_kedro
%config Completer.use_jedi = False  ## (To fix autocomplete)
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
from src.models.vmm import ModelSimulator
import matplotlib.pyplot as plt
from src.visualization.plot import track_plots, plot, captive_plot, plot_parameter_contributions, parameter_contributions
import kedro
import numpy as np
import os.path
import anyconfig

import matplotlib
matplotlib.rcParams["figure.figsize"] = (10,7)
from src.symbols import *

# Read configs:
conf_path = os.path.join("../conf/base/")
runs_globals_path = os.path.join(
    conf_path,
    "runs_globals.yml",
)

runs_globals = anyconfig.load(runs_globals_path)
model_test_ids = runs_globals["model_test_ids"]

join_globals_path = os.path.join(
    conf_path,
    "join_globals.yml",
)

joins = runs_globals["joins"]
join_runs_dict = anyconfig.load(join_globals_path)

globals_path = os.path.join(
    conf_path,
    "globals.yml",
)
global_variables = anyconfig.load(globals_path)



vmm_names = global_variables["vmms"]

from wPCC_pipeline.pipelines.motion_regression.nodes import predict_force, fit_motions, create_model_from_motion_regression
from wPCC_pipeline.pipelines.prediction.nodes import simulate_euler
from src.prime_system import PrimeSystem

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.pipeline import Pipeline
from src.parameters import df_parameters
p = df_parameters['symbol']
from src.feature_selection import drop_multicollinearity
from src.bias_variance_tradeoff import train_test_split_run
from sklearn.metrics import r2_score
from wPCC_pipeline.pipelines.motion_regression.nodes import fit_motions, create_model_from_motion_regression
from src.models.regression import Regression
from myst_nb import glue
import src.symbols as s
from IPython.display import Latex
from src.substitute_dynamic_symbols import run
import seaborn as sns
from src.feature_selection import feature_imporance, BestFeatures, DropCorrelation

In [None]:
ship="wpcc"
#vmm_name = "vmm_martins_simple"
vmm_name = "vmm_abkowitz"
vmm = catalog.load(vmm_name)

ship_data = catalog.load(f"{ship}.ship_data")

#regression = catalog.load(f"{ship}.updated.{vmm_name}.joined.regression")
#regression.diff_eq_Y.exclude_parameters.pop('Ydelta')
#regression.diff_eq_Y.exclude_parameters.pop('Ythrustdelta')


data = catalog.load(f"{ship}.updated.joined.data_ek_smooth")

added_masses = catalog.load(f"{ship}.added_masses")
exclude_parameters = catalog.load(f"params:{ship}.motion_regression.exclude_parameters")
columns = ['u','v','r','u1d','v1d','r1d','delta','thrust','id']
data_with_force = predict_force(data=data[columns], added_masses=added_masses, ship_parameters=ship_data, vmm=vmm)

In [None]:
ids = list(data_with_force['id'].unique())
ids_train = ids.copy()
id_test = 22774
ids_train.remove(id_test)
mask = data_with_force['id'].isin(ids_train)
data_train = data_with_force.loc[mask].copy()

ps = PrimeSystem(**ship_data)
data_prime = ps.prime(data_train, U=data_with_force['U'])

In [None]:
def break_plot(y, **kwargs):
    
    y_ = y.copy()
    mask = np.concatenate([[False],np.diff(y_.index)>1])
    y_[mask] = np.NaN
    y_.plot(**kwargs)
    

In [None]:
ids_test = [
    22771,
    22772,
    22773,
]
   
X_train, y_train, X_test, y_test, train_data = train_test_split_run(X=data_prime,
                                                                    y=data_prime['fx'],
                                                                    id=data_prime.id, 
                                                                    ids=ids_test)

fig,ax=plt.subplots()
break_plot(y_train, ax=ax,label='sub training')
break_plot(y_test, ax=ax,label='validation')

ax.legend();
glue('fig_traintest',fig, display=False)

In [None]:
#def create_model(limit_corr, min_importance, **kwargs):
#    
#    drop_correlation = DropCorrelation(limit_corr=limit_corr)
#    best_features = BestFeatures(min_importance=min_importance)
#    linear_regression = LinearRegression(fit_intercept=False)
#    
#    steps = [
#        ('drop_correlation',drop_correlation),
#        ('best_features',best_features),
#        ('regression',linear_regression),
#    ]
#    
#    pipeline = Pipeline(steps)
#    return pipeline

def create_model(limit_corr, k):
    
    drop_correlation = DropCorrelation(limit_corr=limit_corr)
    best_features = SelectKBest(score_func=f_regression, k=k)
    linear_regression = LinearRegression(fit_intercept=False)
    
    steps = [
        ('drop_correlation',drop_correlation),
        ('best_features',best_features),
        ('regression',linear_regression),
    ]
    
    pipeline = Pipeline(steps)
    return pipeline

def create_model2(k):
    
    best_features = SelectKBest(score_func=f_regression, k=k)
    linear_regression = LinearRegression(fit_intercept=False)
    
    steps = [
        ('best_features',best_features),
        ('regression',linear_regression),
    ]
    
    pipeline = Pipeline(steps)
    return pipeline
    
    
    

In [None]:
ps = PrimeSystem(**ship_data)
regression = Regression(
        vmm=vmm,
        data=data_train,
        added_masses=added_masses,
        ship_parameters=ship_data,
        prime_system=ps,
        exclude_parameters=exclude_parameters, 
        connect_equations_Y_N_rudder=False
    )

In [None]:
def get_derivatives(model):
    drop_correlation = model['drop_correlation']
    drop_correlation.features
    best_features = model['best_features']
    mask = best_features.get_support()
    parameter_names = list(np.array(drop_correlation.features)[mask])
    parameter_values = model['regression'].coef_
    new_parameters = {key:value for (key,value) in zip(parameter_names, parameter_values)}
    return new_parameters

def get_derivatives2(model,X):
    best_features = model['best_features']
    mask = best_features.get_support()
    parameter_names = list(X.columns[mask])
    parameter_values = model['regression'].coef_
    new_parameters = {key:value for (key,value) in zip(parameter_names, parameter_values)}
    return new_parameters

In [None]:
derivatives = {}

X, y = regression.diff_eq_N.calculate_features_and_label(data=data_prime, 
                                                         y=data_prime['mz'])

X_train, y_train, X_test, y_test, train_data = train_test_split_run(X=X,
                                                                    y=y,
                                                                    id=data_prime.id, 
                                                                    ids=ids_test)

model = create_model(limit_corr=0.93, k=3)
model.fit(X_train, y_train)
derivatives_n = get_derivatives(model)
derivatives.update(derivatives_n)

y_pred = pd.Series(model.predict(X_test), index=X_test.index)


fig,ax=plt.subplots()
y_test.plot(ax=ax, label='True')
y_pred.plot(ax=ax, label='Pred', alpha=0.5)
fig.suptitle(f'N ($r^2$: {np.round(model.score(X_test, y_test),2)})');

In [None]:
#regression.diff_eq_Y.exclude_parameters.update(derivatives_n)  # Note!
X1, y1 = regression.diff_eq_Y.calculate_features_and_label(data=data_prime, 
                                                         y=data_prime['fy'])

regression.connect_equations_Y_N_rudder = True
connected_parameters = regression.calculate_connected_parameters_N(derivatives_n)
regression.diff_eq_Y.exclude_parameters = (
                connected_parameters.combine_first(regression.diff_eq_Y.exclude_parameters)
)


X2, y2 = regression.diff_eq_Y.calculate_features_and_label(data=data_prime, 
                                                         y=data_prime['fy'])



In [None]:
regression.diff_eq_Y.exclude_parameters

In [None]:
regression.connected_parameters_Y

In [None]:
fig,ax=plt.subplots()
y1.plot(ax=ax)
y2.plot(ax=ax)

In [None]:
regression.connect_equations_Y_N_rudder = True
connected_parameters = regression.calculate_connected_parameters_N(derivatives_n)
regression.diff_eq_Y.exclude_parameters = (
                connected_parameters.combine_first(regression.diff_eq_Y.exclude_parameters)
)

X, y = regression.diff_eq_Y.calculate_features_and_label(data=data_prime, 
                                                         y=data_prime['fy'])

X_train, y_train, X_test, y_test, train_data = train_test_split_run(X=X,
                                                                    y=y,
                                                                    id=data_prime.id, 
                                                                    ids=ids_test)

model = create_model(limit_corr=0.95, k=2)
model.fit(X_train, y_train)
derivatives_y = get_derivatives(model)
derivatives.update(derivatives_y)

y_pred = pd.Series(model.predict(X_test), index=X_test.index)


fig,ax=plt.subplots()
y_test.plot(ax=ax, label='True')
y_pred.plot(ax=ax, label='Pred', alpha=0.5)
fig.suptitle(f'Y ($r^2$: {np.round(model.score(X_test, y_test),2)})');

In [None]:
X, y = regression.diff_eq_X.calculate_features_and_label(data=data_prime, 
                                                         y=data_prime['fx'])

X_train, y_train, X_test, y_test, train_data = train_test_split_run(X=X,
                                                                    y=y,
                                                                    id=data_prime.id, 
                                                                    ids=ids_test)

#model = create_model2(k=10)
#model.fit(X_train, y_train)

model = LinearRegression(fit_intercept=False)
features = ['Xu','Xvr']
model.fit(X_train[features], y_train)

derivatives_x = {key:value for (key,value) in zip(features, model.coef_)}
derivatives.update(derivatives_x)

y_pred = pd.Series(model.predict(X_test[features]), index=X_test.index)


fig,ax=plt.subplots()
y_test.plot(ax=ax, label='True')
y_pred.plot(ax=ax, label='Pred', alpha=0.5)
fig.suptitle(f'X ($r^2$: {np.round(model.score(X_test[features], y_test),2)})');

In [None]:
derivatives_x

In [None]:
ship_model = catalog.load(f"{ship}.updated.{vmm_name}.joined.model")

In [None]:
df_parameters = pd.DataFrame()
df_parameters['original'] = ship_model.parameters
df_parameters['selected'] = 0
df_parameters['selected'].update(added_masses)
df_parameters['selected'].update(derivatives)
df_parameters['selected'].update(regression.exclude_parameters)
df_parameters['selected'].update(regression.diff_eq_Y.exclude_parameters)

ship_model2 = ship_model.copy()
ship_model2.parameters = df_parameters['selected']

In [None]:
mask = df_parameters['selected'] !=0
mask['Xthrust'] = False
df_parameters.loc[mask]['selected'].plot.bar()

In [None]:
df_test = catalog.load(f'{ship}.updated.{id_test}.data_ek_smooth')
ek = catalog.load(f"{ship}.{vmm_name}.ek")

In [None]:
#df_predict = simulate_euler(data=df_test, model=ship_model, ek=ek)
result = ship_model.simulate(df_test)
df_predict=result.result.copy()

In [None]:
X = regression.diff_eq_X.calculate_features(df_predict)
parameters = ship_model.parameters

In [None]:
keys = list(set(X.columns) & set(parameters.keys()))

In [None]:
forces = X.multiply(parameters[keys])

In [None]:
#df_predict2 = simulate_euler(data=df_test, model=ship_model2, ek=ek)
result2 = ship_model2.simulate(df_test)
df_predict2=result2.result.copy()


## VMM Martin

In [None]:
vmm_martin = catalog.load("vmm_martins_simple")
ek_martin = catalog.load(f"{ship}.vmm_martins_simple.ek")

In [None]:
regression_martin, _ = fit_motions(data=data_train, 
                                   added_masses=added_masses, 
                                   ship_data=ship_data, 
                                   vmm=vmm_martin, 
                                   exclude_parameters=exclude_parameters)

model_martin = create_model_from_motion_regression(regression=regression_martin)
#df_predict_martin = simulate_euler(data=df_test, model=model_martin, ek=ek_martin)
result_martin = model_martin.simulate(df_test)
df_predict_martin=result_martin.result.copy()

In [None]:
dataframes = {
    'Experiment' : df_test,
    'Prediction' : df_predict,
    'Prediction2' : df_predict2,
    'Prediction Martin' : df_predict_martin,
    #'Prediction Abkowitz' : df_predict_abkowitz,
    
}

styles = {
    'Experiment' : {'style':'r--'},
    'Prediction' : {'style':'g-'},
    'Prediction2' : {'style':'b-'},
    'Prediction Martin' : {'style':'m-'},
    'Prediction Abkowitz' : {'style':'y-'},
    
}

In [None]:
time_window=[0,40]
track_plots(dataframes, 
            lpp=ship_data['L'], 
            beam=ship_data['B'],  
            styles=styles, N=2,
            time_window=time_window,
           );

In [None]:
plot(dataframes=dataframes, 
     keys=['u','v','r'], 
     ncols=1, 
     styles=styles, 
     zero_origo=False,
     time_window=time_window,
    );

In [None]:
plot_parameter_contributions(data=df_predict_martin, model=model_martin, regression=regression)

In [None]:
plot_parameter_contributions(data=df_predict2, model=ship_model2, regression=regression)

In [None]:
regression.X_eq