## SI corrector box

In [None]:
%matplotlib notebook
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)
import numpy as np
import os
import matplotlib.pyplot as plt
#plt.style.use('paper')

#import data
import copy
from rolldecay.bis_system import BisSystem
from rolldecay import database

import rolldecayestimators.lambdas as lambdas
from rolldecayestimators.substitute_dynamic_symbols import run, lambdify, significant_numbers
from rolldecayestimators.ikeda_estimator import IkedaQuadraticEstimator

from rolldecay.paper_writing import save_fig
from rolldecay.froude_scaling import froude_scale
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from latex_helpers import pylatex_extenders
import rolldecay
from rolldecay.paper_writing import save_fig
import rolldecayestimators.simplified_ikeda as si
import rolldecayestimators.sensitivity as sensitivity

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from rolldecayestimators.polynom_estimator import Polynom
from rolldecayestimators import symbols

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_regression

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

from sklearn.model_selection import cross_validate

In [None]:
db = database.get_db()

In [None]:
df_ikeda = database.load(rolldecay_table_name='rolldecay_simplified_ikeda', limit_score=0.5, 
                             exclude_table_name='rolldecay_exclude')

df_rolldecay = database.load(rolldecay_table_name='rolldecay_quadratic_b', limit_score=0.9, 
                             exclude_table_name='rolldecay_exclude')

df_rolldecay['ship_speed']*=1.852/3.6
df_ikeda['ship_speed']*=1.852/3.6

description = pd.read_sql_table('description', con=db.engine, index_col='id')
description.loc['ship_speed','unit']='m/s'
description.loc['VDES','unit']='m/s'
description.loc['Disp'] = {'description':'Ship discplacement','unit':'m3'}

T_f=df_rolldecay['TF']
T_a=df_rolldecay['TA']
L_pp=df_rolldecay['lpp']
df_rolldecay['trim']=np.arctan((T_a-T_f)/L_pp)
mask = df_rolldecay['trim'].abs() < np.deg2rad(0.3)
df_rolldecay=df_rolldecay.loc[mask].copy()

df_ikeda['Disp']=df_ikeda['Volume']
df_rolldecay['Disp']=df_rolldecay['Volume']

skip=[]
df_ikeda = froude_scale(data=df_ikeda, description=description, skip=skip)
df_rolldecay = froude_scale(data=df_rolldecay, description=description, skip=skip)

phi_a = np.deg2rad(3)
#phi_a = df_rolldecay['phi_start'].abs()
#phi_a=np.deg2rad(3.5)

g = 9.81
rho=1000
df_ikeda['B_e'] = run(function=lambdas.B_e_lambda, inputs=df_ikeda, phi_a=phi_a)
df_ikeda['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_ikeda, g=g, rho=rho)
df_rolldecay['B_e'] = run(function=lambdas.B_e_lambda, inputs=df_rolldecay, phi_a=phi_a)
df_rolldecay['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_rolldecay, g=g, rho=rho)

df_rolldecay['omega0_hat'] = run(function=lambdas.omega0_lambda, inputs=df_rolldecay, g=g)
df_ikeda['omega0_hat'] = run(function=lambdas.omega0_lambda, inputs=df_ikeda, g=g)

df_ikeda['scale_factor']=df_ikeda['lpp']
df_ikeda = froude_scale(data=df_ikeda, description=description, skip=skip)
df_rolldecay['scale_factor']=df_rolldecay['lpp']
df_rolldecay = froude_scale(data=df_rolldecay, description=description, skip=skip)

In [None]:
df_compare = pd.merge(left=df_rolldecay, right=df_ikeda, how = 'inner',left_index=True, right_index=True,
                      suffixes=('','_ikeda'))

df_compare['error'] = df_compare['B_e_hat']-df_compare['B_e_hat_ikeda']

In [None]:
fig,ax=plt.subplots()
x=df_compare['B_e_hat']
y=df_compare['B_e_hat_ikeda']
ax.plot(x, y, '.', alpha=0.5, label='limited')

ax.set_xlabel('$\hat{B_e}$ (model test)')
ax.set_ylabel('$\hat{B_e}$ (Simplified Ikeda)')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.legend()
ax.grid(True)

In [None]:
r2_score(y_true=df_compare['B_e_hat'], y_pred=df_compare['B_e_hat_ikeda'])

## SI corrector box

In [None]:
features = ['B_W_HAT','B_F_HAT','B_BK_HAT','B_E_HAT','B_L_HAT']
labels = ['B_e_hat']
X = df_compare[features+labels].copy()
X.dropna(inplace=True)

In [None]:
train_dataset = X.sample(frac=0.8,random_state=0)
test_dataset = X.drop(train_dataset.index)

## Split features from labels
Separate the target value, or "label", from the features. This label is the value that you will train the model to predict.

In [None]:
train_labels = train_dataset.pop(labels[0])
test_labels = test_dataset.pop(labels[0])

In [None]:
train_stats = train_dataset.describe()
train_stats = train_stats.transpose()
train_stats

In [None]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

X = df_compare[features+labels].copy()
X.dropna(inplace=True)
y = X.pop(labels[0])
X=norm(X)

In [None]:
polynomial_features = PolynomialFeatures(degree=2)
variance_treshold = VarianceThreshold()
linear_regression = LinearRegression()

In [None]:
cv=5

ks = np.arange(1,11,1)
scores = []
stds = []
for k in ks:
    select_k_best = SelectKBest(k=k, score_func=f_regression)
    steps=[
            ('polynomial_feature', polynomial_features),
            #('standard_scaler', standard_scaler),
            ('variance_treshold',variance_treshold),
            ('select_k_best',select_k_best),
            ('linear_regression', linear_regression)
    ]
    
    model = Pipeline(steps=steps)
    model.fit(X=X, y=y)
    cv=5
    score = cross_val_score(estimator=model,X=X,y=y,cv=cv).mean()
    std = cross_val_score(estimator=model,X=X,y=y,cv=cv).std()
    
    scores.append(score)
    stds.append(std)
    
scores = np.array(scores)
stds = np.array(stds)


In [None]:
fig,ax = plt.subplots()
ax.plot(ks,scores-stds,'.-')
ax.plot(ks,scores,'.-')
ax.plot(ks,scores+stds,'.-')

In [None]:
select_k_best = SelectKBest(k=4, score_func=f_regression)
polynomial_features = PolynomialFeatures(degree=2)
steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
]
model = Pipeline(steps=steps)
model.fit(X=X, y=y)

In [None]:
cross_val_score(estimator=model,X=X,y=y,cv=cv).mean()

In [None]:
polynomial_features = PolynomialFeatures(degree=1)
select_k_best = SelectKBest(k=len(features), score_func=f_regression)
steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
]
model = Pipeline(steps=steps)
model.fit(X=X, y=y)

In [None]:
cross_val_score(estimator=model,X=X,y=y,cv=cv).mean()

In [None]:
ridge_regression = Ridge()
lasso_regression = Lasso()

In [None]:
polynomial_features = PolynomialFeatures(degree=1)
select_k_best = SelectKBest(k=len(features), score_func=f_regression)
steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
]
model = Pipeline(steps=steps)
model.fit(X=X, y=y)

In [None]:
cross_val_score(estimator=model,X=X,y=y,cv=cv).mean()

In [None]:
df = df_compare[features+labels].dropna()
X_ = df[features]
y_ = df[labels[0]]


In [None]:
model.fit(X=X_, y=y_)

In [None]:
model.score(X=X_, y=y_)

In [None]:
fig,ax=plt.subplots()
ax.plot(y_,model.predict(X_), '.', label='improved')
ax.plot(df_compare[labels],df_compare['B_e_hat_ikeda'], 'x', label='Simplified Ikeda')


ax.set_xlabel('$\hat{B_e}$ (model test)')
ax.set_ylabel('$\hat{B_e}$ (Simplified Ikeda)')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.legend()
ax.grid(True)

In [None]:
polynom = Polynom(model=model, columns=X_.columns, y_symbol=symbols.B_e_hat)
polynom.fit(X=X_, y=y_)
polynom.equation

In [None]:
polynom.score(X=X_,y=y_)