## Compare Simplified Ikeda with database

In [None]:
# %load ../../imports.py
"""
These is the standard setup for the notebooks.
"""

%matplotlib inline
%load_ext autoreload
%autoreload 2

from jupyterthemes import jtplot
jtplot.style(theme='onedork', context='notebook', ticks=True, grid=False)

import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)
import numpy as np
import os
import matplotlib.pyplot as plt
#plt.style.use('paper')

#import data
import copy
from rolldecay.bis_system import BisSystem
from rolldecay import database
from mdldb.tables import Run

from sklearn.pipeline import Pipeline
from rolldecayestimators.transformers import CutTransformer, LowpassFilterDerivatorTransformer, ScaleFactorTransformer, OffsetTransformer
from rolldecayestimators.direct_estimator_cubic import EstimatorQuadraticB, EstimatorCubic
from rolldecayestimators.ikeda_estimator import IkedaQuadraticEstimator
import rolldecayestimators.equations as equations
import rolldecayestimators.lambdas as lambdas
from rolldecayestimators.substitute_dynamic_symbols import lambdify
import rolldecayestimators.symbols as symbols
import sympy as sp

from sklearn.metrics import r2_score



In [None]:
import rolldecayestimators.simplified_ikeda as si

In [None]:
from dask.distributed import Client, progress
client = Client(n_workers=7, threads_per_worker=2, memory_limit='5GB')
client

import dask.dataframe as dd

In [None]:
df_rolldecay = database.load(rolldecay_table_name='rolldecay_quadratic_b', limit_score=0.99, 
                             exclude_table_name='rolldecay_exclude')

df_rolldecay_raw = df_rolldecay.copy()

scale_factor=df_rolldecay['scale_factor']
df_rolldecay['lpp']/=scale_factor
df_rolldecay['TA']/=scale_factor 
df_rolldecay['TF']/=scale_factor
df_rolldecay['beam']/=scale_factor
df_rolldecay['BKL']/=scale_factor
df_rolldecay['BKB']/=scale_factor
#df_rolldecay['A0']=db_run.loading_condition.A0
df_rolldecay['kg']/=scale_factor
df_rolldecay['Volume']/=(scale_factor**3)
df_rolldecay['gm']/=scale_factor
df_rolldecay['V']=df_rolldecay['ship_speed']*1.852/3.6/np.sqrt(scale_factor)  #[m/s]
df_rolldecay['rho']=1000
df_rolldecay['g']=9.81

In [None]:
df = dd.from_pandas(df_rolldecay, npartitions=10)

In [None]:
def run_ikeda(df_rolldecay, verify_input=True, limit_inputs=True, **kwargs):
    df=pd.DataFrame()
    for run_id, data in df_rolldecay.iterrows():
        ikeda_estimator = IkedaQuadraticEstimator(**data, verify_input=verify_input, 
                                                  limit_inputs=limit_inputs)
        try:
            ikeda_estimator.fit(**kwargs)
        except si.SimplifiedIkedaInputError:
            continue
        
        result = ikeda_estimator.result_for_database(score=False)
        result = pd.Series(result, name=run_id)
        df=df.append(result)
    
    return df
    


In [None]:
df_si = run_ikeda(df_rolldecay=df, verify_input=False, limit_inputs=True, )

In [None]:
dataframes = {
                'model test': df_rolldecay,
                'si': df_si, 
             }

suffixes={
    'model test': '',
    'si': '_si', 
}

In [None]:
Rs = np.linspace(0.01,0.08,3)
for R in Rs:
    
    df_si_bk = run_ikeda(df_rolldecay=df, verify_input=False, limit_inputs=True, 
                     alternative_bilge_keel=True, RdivB=R)
    
    name = 'si alt. BK RdivB=%0.2f'%R
    dataframes[name] = df_si_bk
    suffixes[name] = '_si_bk_%0.2f' % R

In [None]:
df = df.persist()

In [None]:
Disp=df_rolldecay['Volume']
beam=df_rolldecay['beam']

g=9.81
rho=1000
phi_a=np.deg2rad(4)



for name, dataframe in dataframes.items():
    dataframe['B_e'] = lambdas.B_e_lambda(B_1=dataframe['B_1'], B_2=dataframe['B_2'], 
                                         omega0=dataframe['omega0'], phi_a=phi_a)

    dataframe['B_e_hat'] = lambdas.B_hat_lambda(B=dataframe['B_e'], Disp=Disp, beam=beam, 
                                                   g=g, rho=rho)
    
    dataframe.dropna(subset=['B_e_hat'], inplace=True)


In [None]:


df_compare=df_rolldecay.copy()
for name, dataframe in dataframes.items():
    if name=='model test':
        continue
    
    df_compare=pd.merge(left=df_compare, right=dataframe, left_index=True, right_index=True, 
                    suffixes=('',suffixes[name]))

In [None]:
fig,ax=plt.subplots()
key='B_e_hat'
keys = ['%s%s'% (key,suffix) for suffix in suffixes.values() if not suffix=='']

df_compare.plot(x='B_e_hat', y=keys, style='.', ax=ax)

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')

ax.grid(True)
ax.set_aspect('equal', 'box')

In [None]:
scores=pd.DataFrame()
for name, suffix in suffixes.items():
    if name=='model test':
        continue
    score = pd.Series(name=name)
    score['r2'] = r2_score(y_true=df_compare['B_e_hat'], y_pred=df_compare['B_e_hat%s'%suffix])
    scores=scores.append(score)

<a id='scores'></a>

In [None]:
scores

In [None]:
df_compare['residual'] = df_compare['B_e_hat_si'] - df_compare['B_e_hat']
df_compare['residual_abs']=df_compare['residual'].abs()

In [None]:
alpha=0.10
mask=((df_compare['residual_abs']>=df_compare['residual_abs'].quantile(alpha)) &
      (df_compare['residual_abs']<=df_compare['residual_abs'].quantile(1-alpha)))

df_compare_good = df_compare.loc[mask].copy()

In [None]:
fig,ax=plt.subplots()
df_compare_good.plot(x='B_e_hat', y='B_e_hat_si', style='.', ax=ax)

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')

ax.grid(True)
ax.set_aspect('equal', 'box')

In [None]:
df_compare['residual']=df_compare['B_e_hat_si']-df_compare['B_e_hat']
df_compare['residual_abs']=df_compare['residual'].abs()

In [None]:
fig,ax=plt.subplots()
df_compare.plot(x='B_e_hat', y='residual', style='.', ax=ax)
ax.grid(True)

In [None]:
df_compare['trim'] = np.rad2deg(np.arctan2(df_compare['TA']-df_compare['TF'], df_compare['lpp']))
df_compare.plot(x='trim', y='residual_abs', style='.')
df_compare.plot(x='trim', y='B_e_hat', style='.')

df_compare['T/B'] = (df_compare['TA']+df_compare['TF'])/2/df_compare['beam']
df_compare.plot(x='T/B', y='residual_abs', style='.')


In [None]:
df_worst=df_compare.sort_values(by='residual_abs', ascending=False).iloc[0:10].copy()

In [None]:
df_worst[['residual_abs','B_e_hat','project_path']]

In [None]:
meta_data = df_worst.iloc[0]

In [None]:
db = database.get_db()
db_run = db.session.query(Run).get(int(meta_data.name))
df = database.load_run(db_run)

In [None]:
lowpass_filter = LowpassFilterDerivatorTransformer(cutoff=2, minimum_score=0.99)
cutter = CutTransformer(phi_max=np.deg2rad(9), phi_min=np.deg2rad(0.25), phi1d_start_tolerance=0.015)
offset_transformer = OffsetTransformer()

steps = [
    ('filter',lowpass_filter),
#    ('scaler',scaler),  # Is froude scaling a good idea??
    ('cutter', cutter), 
#    ('offset_transformer',offset_transformer)
]
        
preprocessor = Pipeline(steps) # define the pipeline object.
preprocessor.fit(df)
X = preprocessor.transform(df)

In [None]:
direct_estimator = EstimatorQuadraticB.load(data=meta_data, X=X)

fig,ax=plt.subplots()
direct_estimator.plot_fit(ax=ax)

In [None]:
df_rolldecay_raw.loc[meta_data.name]

In [None]:
df_si['B_E_HAT'].hist()

In [None]:
fig,ax=plt.subplots()
df_compare.plot(x='residual', y = 'B_E_HAT', style='.', ax=ax)
ax.grid(True)
