# Train test split KVLCC2 HSVA

In [None]:
# %load imports.py
%load_ext autoreload
%autoreload 2
%reload_kedro
%config Completer.use_jedi = False  ## (To fix autocomplete)
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
from src.models.vmm import ModelSimulator
import matplotlib.pyplot as plt
from src.visualization.plot import track_plots, plot, captive_plot
import kedro
import numpy as np
import os.path
import anyconfig

import matplotlib
matplotlib.rcParams["figure.figsize"] = (10,7)
from src.symbols import *

# Read configs:
conf_path = os.path.join("../conf/base/")
runs_globals_path = os.path.join(
    conf_path,
    "runs_globals.yml",
)

runs_globals = anyconfig.load(runs_globals_path)
model_test_ids = runs_globals["model_test_ids"]

join_globals_path = os.path.join(
    conf_path,
    "join_globals.yml",
)

joins = runs_globals["joins"]
join_runs_dict = anyconfig.load(join_globals_path)

globals_path = os.path.join(
    conf_path,
    "globals.yml",
)
global_variables = anyconfig.load(globals_path)



vmm_names = global_variables["vmms"]

from wPCC_pipeline.pipelines.motion_regression.nodes import predict_force, fit_motions, create_model_from_motion_regression
from wPCC_pipeline.pipelines.prediction.nodes import simulate_euler
from src.prime_system import PrimeSystem

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.pipeline import Pipeline

In [None]:
ship="kvlcc2_hsva"
#vmm_name = "vmm_martins_simple"
vmm_name = "vmm_abkowitz"
vmm = catalog.load(vmm_name)

ship_data = catalog.load(f"{ship}.ship_data")

regression = catalog.load(f"{ship}.updated.{vmm_name}.joined.regression")
data = catalog.load(f"{ship}.updated.joined.data_ek_smooth")

added_masses = catalog.load(f"{ship}.added_masses")
exclude_parameters = catalog.load(f"params:{ship}.motion_regression.exclude_parameters")
data_with_force = predict_force(data=data, added_masses=added_masses, ship_parameters=ship_data, vmm=vmm)

In [None]:
from src.bias_variance_tradeoff import train_test_split_exteme, train_test_split_run
from sklearn.metrics import r2_score

In [None]:
ps = PrimeSystem(**ship_data)
#data['U'] = np.sqrt(data['u']**2 + data['v']**2)
data_prime = ps.prime(data_with_force, U=data_with_force['U'])

In [None]:
def create_model(k):
    
    select_k_best = SelectKBest(score_func=f_regression, k=k)
    linear_regression = LinearRegression()
    steps = [
        ('select_k_best', select_k_best),
        ('linear_regression', linear_regression),
    ]
    model = Pipeline(steps=steps)
    return model
        

def predict(model, X_test, dof, data):

    y_pred = model.predict(X_test)        
    y_pred = pd.Series(y_pred, index=X_test.index)
    
    return y_pred
        
def vary_k(X_train, y_train, X_test, y_test, dof, data):
    
    scores = {}
    for k in range(1,X.shape[1]):
    
        select_k_best = SelectKBest(score_func=f_regression, k=k)
        linear_regression = LinearRegression(fit_intercept=False)
        steps = [
            ('select_k_best', select_k_best),
            ('linear_regression', linear_regression),
        ]
        
        model = create_model(k=k)
        model.fit(X=X_train, y=y_train)
        
        y_pred = predict(model=model, X_test=X_test, dof=dof, data=data)
        
        scores[k] = r2_score(y_true=y_test, y_pred=y_pred)
        scores = pd.Series(scores)
    
    return scores

In [None]:
exclude_parameters

In [None]:
exclude_parameters_ = {'Xthrust':1}
regression.diff_eq_X.exclude_parameters = pd.Series(exclude_parameters_)
X_X, y_X = regression.diff_eq_X.calculate_features_and_label(data=data_prime, y=data_prime['fx'])
y_X

In [None]:
regression.data_prime = data_prime
exclude_parameters_ = pd.Series({'Xthrust':0.90})

regression.exclude_parameters = exclude_parameters_
regression.diff_eq_X.exclude_parameters = exclude_parameters_
regression.calculate_features_and_labels()

In [None]:
xthrust = exclude_parameters['Xthrust']*data_prime['thrust']

xys = {#'fx' : {'data' : (regression.X_X, regression.y_X-xthrust),},
       'fx' : {'data' : (regression.X_X, regression.y_X),'k':13},
           
       'fy' : {'data' :(regression.X_Y, regression.y_Y),},
       'mz' : {'data' :(regression.X_N, regression.y_N),},
      }

parameters = {}
for dof, items in xys.items():
    
    X = items['data'][0]
    y = items['data'][1]
    
    #X_train, y_train, X_test, y_test, train_data = train_test_split_exteme(X=X, y=y, data=data)
    X_train, y_train, X_test, y_test, train_data = train_test_split_run(X=X, y=y, id=data.id, ids=["HSVA_CPMC_KVLCC2_Z_35_05"])

    
    k = items.get('k', X_train.shape[1])
    
    scores = vary_k(X_train, y_train, X_test, y_test, dof, data_prime)
    
    fig,ax=plt.subplots()
    scores.plot(ax=ax, style='.-')
    ax.grid(True)
    ax.set_xticks(np.arange(scores.index[0], scores.index[-1], 1));
    ax.set_title(dof)
    ax.set_ylim(0.2,1)
    
    model = create_model(k=k)
    model.fit(X=X_train, y=y_train)
    
    select_k_best = model['select_k_best']
    parameter_names = X_train.columns[select_k_best.get_support()]
    linear_regression = model['linear_regression']
    parameter_values = linear_regression.coef_
    parameters.update({name:value for name,value in zip(parameter_names, parameter_values)})
    
    y_pred = predict(model=model, X_test=X_test, dof=dof, data=data_prime)
    
    fig,ax=plt.subplots()
    y_train.plot(style='.', ax=ax)
    y_test.plot(style='.', ax=ax)
    y_pred.plot(style='o', ax=ax, alpha=0.2)
    ax.set_title(dof)

In [None]:
ship_model = catalog.load(f"{ship}.updated.{vmm_name}.joined.model")
df_parameters = pd.DataFrame()
df_parameters['original'] = ship_model.parameters
df_parameters['selected'] = 0
df_parameters['selected'].update(added_masses)
df_parameters['selected'].update(parameters)
df_parameters['selected'].update(exclude_parameters_)


ship_model2 = ship_model.copy()
ship_model2.parameters = df_parameters['selected']

df_parameters['selected'].plot.bar()

In [None]:
test = "HSVA_CPMC_KVLCC2_Z_35_05"
df_test = catalog.load(f'{ship}.updated.{test}.data_ek_smooth')
ek = catalog.load(f"{ship}.{vmm_name}.ek")

In [None]:
simulate_euler(data=df_test, model=ship_model2, ek=ek)

In [None]:
model = ship_model2
df_ = df_test.copy()
df_.iloc[0]['r'] = 0
result = model.simulate(df_=df_)
df_result = result.result.copy()
df_result['psi_deg'] = np.rad2deg(df_result['psi'])
df_test['psi_deg'] = np.rad2deg(df_test['psi'])
df_result['delta_deg'] = np.rad2deg(df_result['delta'])
df_test['delta_deg'] = np.rad2deg(df_test['delta'])


In [None]:
dataframes = {
    'Experiment' : df_test,
    'Prediction' : df_result
}

styles = {
    'Experiment' : {'style':'r--'},
    'Prediction' : {'style':'b-'},
}

In [None]:
track_plots(dataframes, lpp=ship_data['L'], beam=ship_data['B'],  styles=styles);

In [None]:
plot(dataframes=dataframes, keys=['delta_deg','psi_deg','y0','u','v','r'], ncols=1, styles=styles, time_window=[0,2]);
plot(dataframes=dataframes, keys=['delta_deg','psi_deg','y0','u','v','r'], ncols=1, styles=styles);

In [None]:
parameters

In [None]:
import statsmodels.api as sm
import seaborn as sns

X = pd.DataFrame()
N = 1000
x1 = X['x1'] = np.linspace(0,1,N)
x2 = X['x2'] = X['x1'] + np.random.normal(scale=0.05, size=N)
y = x1 + x2 + np.random.normal(scale=0.1, size=N)

data = X.copy()
data['y'] = y

sns.pairplot(data=data)

In [None]:
data.corr()

In [None]:
model1 = sm.OLS(y,X)
result = model1.fit()
result.summary()

In [None]:
y2 = y.diff()[1:].copy()
X2 = X.diff().iloc[1:].copy()
model2 = sm.OLS(y2,X2)
result2 = model2.fit()
result2.summary()

In [None]:
X3 = X[['x1']]
model3 = sm.OLS(y,X3)
result3 = model3.fit()
result3.summary()

In [None]:
X.corr()

In [None]:
X2.corr()

In [None]:
result.condition_number

In [None]:
result2.condition_number