# Compare SI-method with roll damping DB

In [None]:
#%matplotlib notebook
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
#from jupyterthemes import jtplot
#jtplot.style(theme='onedork', context='notebook', ticks=True, grid=False)

In [None]:
import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)
import numpy as np
import os
import matplotlib.pyplot as plt
#plt.style.use('paper')

#import data
import copy
from rolldecay.bis_system import BisSystem
from rolldecay import database

import rolldecayestimators.lambdas as lambdas
from rolldecayestimators.substitute_dynamic_symbols import run, lambdify, significant_numbers
from rolldecayestimators.ikeda_estimator import IkedaQuadraticEstimator

from rolldecay.paper_writing import save_fig
from rolldecay.froude_scaling import froude_scale
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from latex_helpers import pylatex_extenders
import rolldecay
from rolldecay.paper_writing import save_fig
import rolldecayestimators.simplified_ikeda as si
import rolldecayestimators.sensitivity as sensitivity


In [None]:
import matplotlib
from rolldecayestimators.measure import linearized_matrix


print(matplotlib.matplotlib_fname())

In [None]:
db = database.get_db()

In [None]:
df_ikeda = database.load(rolldecay_table_name='rolldecay_simplified_ikeda', limit_score=0.5, 
                             exclude_table_name='rolldecay_exclude')

df_rolldecay = database.load(rolldecay_table_name='rolldecay_quadratic_b', limit_score=0.9, 
                             exclude_table_name='rolldecay_exclude')

#df_rolldecay = database.load(rolldecay_table_name='rolldecay_linear_b', limit_score=0.9, 
#                             exclude_table_name='rolldecay_exclude')
#df_rolldecay['B_2']=0

df_rolldecay['ship_speed']*=1.852/3.6
df_ikeda['ship_speed']*=1.852/3.6

description = pd.read_sql_table('description', con=db.engine, index_col='id')
description.loc['ship_speed','unit']='m/s'
description.loc['VDES','unit']='m/s'
description.loc['Disp'] = {'description':'Ship discplacement','unit':'m3'}

T_f=df_rolldecay['TF']
T_a=df_rolldecay['TA']
L_pp=df_rolldecay['lpp']
df_rolldecay['trim']=np.arctan((T_a-T_f)/L_pp)
mask = df_rolldecay['trim'].abs() < np.deg2rad(0.3)
df_rolldecay=df_rolldecay.loc[mask].copy()

df_ikeda['Disp']=df_ikeda['Volume']
df_rolldecay['Disp']=df_rolldecay['Volume']

skip=['omega0']
df_ikeda = froude_scale(data=df_ikeda, description=description, skip=skip)
df_rolldecay = froude_scale(data=df_rolldecay, description=description, skip=skip)

#phi_a = np.deg2rad(3)

g = 9.81
rho=1000

#phi_a = df_ikeda['phi_start'].abs()
#phi_a = np.deg2rad(2)
#df_ikeda['B_e'] = run(function=lambdas.B_e_lambda, inputs=df_ikeda, phi_a=phi_a)
#df_ikeda['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_ikeda, g=g, rho=rho)
#
#phi_a = df_rolldecay['phi_start'].abs()
#phi_a = np.deg2rad(2)
#df_rolldecay['B_e'] = run(function=lambdas.B_e_lambda, inputs=df_rolldecay, phi_a=phi_a)
#df_rolldecay['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_rolldecay, g=g, rho=rho)
#
#df_rolldecay['omega0_hat'] = run(function=lambdas.omega0_lambda, inputs=df_rolldecay, g=g)
#df_ikeda['omega0_hat'] = run(function=lambdas.omega0_lambda, inputs=df_ikeda, g=g)
#
df_rolldecay['V']=df_rolldecay['ship_speed']
df_ikeda['V']=df_ikeda['ship_speed']


#df_ikeda['scale_factor']=df_ikeda['lpp']
#df_ikeda = froude_scale(data=df_ikeda, description=description, skip=skip)
#df_rolldecay['scale_factor']=df_rolldecay['lpp']
#df_rolldecay = froude_scale(data=df_rolldecay, description=description, skip=skip)

In [None]:
#df_compare = pd.merge(left=df_rolldecay, right=df_ikeda, how = 'inner',left_index=True, right_index=True,
#                      suffixes=('','_ikeda'))

## Zero speed

In [None]:
def run_ikeda(df_rolldecay, verify_input=True, limit_inputs=False):
    df=pd.DataFrame()
    for run_id, data in df_rolldecay.iterrows():
        ikeda_estimator = IkedaQuadraticEstimator(**data, verify_input=verify_input, 
                                                  limit_inputs=limit_inputs)
        try:
            ikeda_estimator.fit()
        except si.SimplifiedIkedaInputError:
            continue
        
        result = ikeda_estimator.result_for_database(score=False)
        result = pd.Series(result, name=run_id)
        df=df.append(result)
    
    return df
    

In [None]:
mask = df_rolldecay['ship_speed'].round(decimals=2)==0
df_roll_decay_zero = df_rolldecay.loc[mask].copy()

In [None]:
result_no_verify=run_ikeda(df_rolldecay=df_roll_decay_zero, verify_input=False)
result_limited=run_ikeda(df_rolldecay=df_roll_decay_zero, verify_input=True, limit_inputs=True)

In [None]:
steal=['beam','Disp']
df_no_verify = pd.merge(left=result_no_verify, right=df_roll_decay_zero[steal], how='inner', 
                        left_index=True, right_index=True)

df_limited = pd.merge(left=result_limited, right=df_roll_decay_zero[steal], how='inner', 
                        left_index=True, right_index=True)


In [None]:
df_no_verify = linearized_matrix(df_rolldecay=df_rolldecay, df_ikeda=df_no_verify)
df_limited = linearized_matrix(df_rolldecay=df_rolldecay, df_ikeda=df_limited)

In [None]:
#df_no_verify['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_no_verify, g=g, rho=rho)
#df_limited['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_limited, g=g, rho=rho)

In [None]:
#df_no_verify = pd.merge(left=df_no_verify, right=df_roll_decay_zero, how='inner', 
#                        left_index=True, right_index=True, suffixes=('_ikeda',''))
#
#df_limited = pd.merge(left=df_limited, right=df_roll_decay_zero, how='inner', 
#                        left_index=True, right_index=True, suffixes=('_ikeda',''))

In [None]:
#df_no_verify['error']=df_no_verify['B_e_hat']-df_no_verify['B_e_hat_ikeda']
#df_limited['error']=df_limited['B_e_hat']-df_limited['B_e_hat_ikeda']


In [None]:
fig,ax=plt.subplots()
x=df_no_verify['B_e_hat']
y=df_no_verify['B_e_hat_ikeda']
ax.plot(x, y, 'o', alpha=0.5, label='no verify')

x=df_limited['B_e_hat']
y=df_limited['B_e_hat_ikeda']
ax.plot(x, y, 'x', alpha=0.5, label='limited')

ax.set_xlabel('$\hat{B_e}$ (model test)')
ax.set_ylabel('$\hat{B_e}$ (Simplified Ikeda)')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.legend()
ax.grid(True)

In [None]:
r2_score(y_true=df_no_verify['B_e_hat'], y_pred=df_no_verify['B_e_hat_ikeda'])

In [None]:
r2_score(y_true=df_limited['B_e_hat'], y_pred=df_limited['B_e_hat_ikeda'])

## Speed

In [None]:
#df = df_rolldecay.copy()
#scale_factor=df.scale_factor
#df['lpp']/=scale_factor
#df['TA']/=scale_factor 
#df['TF']/=scale_factor
#df['beam']/=scale_factor
#df['BKL']/=scale_factor
#df['BKB']/=scale_factor
##df['A0']=db_run.loading_condition.A0
#df['kg']/=scale_factor
#df['Volume']/=(scale_factor**3)
#df['gm']/=scale_factor 
#df['V']=df['ship_speed']*1.852/3.6/np.sqrt(scale_factor)  #[m/s]

In [None]:
df_rolldecay['V']=df_rolldecay['ship_speed']
result_no_verify=run_ikeda(df_rolldecay=df_rolldecay, verify_input=False)
result_limited=run_ikeda(df_rolldecay=df_rolldecay, verify_input=True, limit_inputs=True)

In [None]:
steal=['beam','Disp']
df_no_verify = pd.merge(left=result_no_verify, right=df_rolldecay[steal], how='inner', 
                        left_index=True, right_index=True)

df_limited = pd.merge(left=result_limited, right=df_rolldecay[steal], how='inner', 
                        left_index=True, right_index=True)


In [None]:
df_no_verify = linearized_matrix(df_rolldecay=df_rolldecay, df_ikeda=df_no_verify)
df_limited = linearized_matrix(df_rolldecay=df_rolldecay, df_ikeda=df_limited)

In [None]:
#df_no_verify['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_no_verify, g=g, rho=rho)
#df_limited['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_limited, g=g, rho=rho)

In [None]:
#df_no_verify = pd.merge(left=df_no_verify, right=df_rolldecay, how='inner', 
#                        left_index=True, right_index=True, suffixes=('_ikeda',''))
#
#df_limited = pd.merge(left=df_limited, right=df_rolldecay, how='inner', 
#                        left_index=True, right_index=True, suffixes=('_ikeda',''))

In [None]:
#df_no_verify['error']=df_no_verify['B_e_hat']-df_no_verify['B_e_hat_ikeda']
#df_limited['error']=df_limited['B_e_hat']-df_limited['B_e_hat_ikeda']


In [None]:
fig,ax=plt.subplots()
x=df_no_verify['B_e_hat']
y=df_no_verify['B_e_hat_ikeda']
ax.plot(x, y, '.', label='unlimited')

x=df_limited['B_e_hat']
y=df_limited['B_e_hat_ikeda']
ax.plot(x, y, 'x',label='limited')

ax.set_xlabel('$\hat{B}_e^{Model}$')
ax.set_ylabel('$\hat{B}_e^{SI}$')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.legend()
ax.grid(True)

save_fig(fig=fig, name='ikeda_limited')

In [None]:
df = df_limited.copy()
df['abs(error)']=df['error'].abs()
df.sort_values(by='abs(error)', ascending=False, inplace=True)
df.head()
df.to_csv('bad_simplified_ikeda.csv',sep=';')

In [None]:
fig,ax=plt.subplots()
x=df_limited['B_e_hat']
y=df_limited['B_e_hat_ikeda']
ax.plot(x, y, '.', alpha=0.5, label='limited')

ax.set_xlabel('$\hat{B_e}$ (model test)')
ax.set_ylabel('$\hat{B_e}$ (Simplified Ikeda)')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.legend()
ax.grid(True)

In [None]:
df_limited.head()

In [None]:
fig,ax=plt.subplots()
x = df_no_verify['B_W_HAT']/df_limited['B_W_HAT']
y = df_no_verify['error']
ax.plot(x, y,'x', alpha=0.5)

fig,ax=plt.subplots()
x = df_no_verify['B_E_HAT']/df_limited['B_E_HAT']
y = df_no_verify['error']
ax.plot(x, y,'x', alpha=0.5)

fig,ax=plt.subplots()
x = df_no_verify['B_L_HAT']/df_limited['B_L_HAT']
y = df_no_verify['error']
ax.plot(x, y,'x', alpha=0.5)




In [None]:
fig,axes=plt.subplots(ncols=2)
ax=axes[0]
x=-df_no_verify['error']/df_no_verify['B_e_hat']

y=df_no_verify['B_W_HAT']
ax.plot(x,y,'.',label='$\hat{B_W}$')

y=df_no_verify['B_E_HAT']
ax.plot(x,y,'x',label='$\hat{B_E}$')

y=df_no_verify['B_L_HAT']
ax.plot(x,y,'+',label='$\hat{B_L}$')

y=df_no_verify['B_BK_HAT']
ax.plot(x,y,'*',label='$\hat{B_{BK}}$')
ax.set_xlabel(r'$ \frac{\hat{B_e}(Ikeda) - \hat{B_e}(model)}{\hat{B_e}(model)} $')
#ax.legend()
ax.grid()
ax.set_title('unlimited')

ax=axes[1]
x=-df_limited['error']/df_limited['B_e_hat']

y=df_limited['B_W_HAT']
ax.plot(x,y,'.',label='$\hat{B_W}$')

y=df_limited['B_E_HAT']
ax.plot(x,y,'x',label='$\hat{B_E}$')

y=df_limited['B_L_HAT']
ax.plot(x,y,'+',label='$\hat{B_L}$')

y=df_limited['B_BK_HAT']
ax.plot(x,y,'*',label='$\hat{B_{BK}}$')
ax.set_xlabel(r'$ \frac{\hat{B_e}(Ikeda) - \hat{B_e}(model)}{\hat{B_e}(model)} $')

ax.legend()
ax.grid()
ax.set_title('limited')
ax.set_xlim(axes[0].get_xlim())
ax.set_ylim(axes[0].get_ylim())
save_fig(fig=fig, name='ikeda_components')


In [None]:
fig,ax=plt.subplots()
df_limited['B_1'].hist(ax=ax)

fig,ax=plt.subplots()
df_limited['B_2'].hist(ax=ax)

In [None]:
fig,ax=plt.subplots()
df_limited['B_1_ikeda'].hist(ax=ax)

fig,ax=plt.subplots()
df_limited['B_2_ikeda'].hist(ax=ax)

In [None]:
fig,ax=plt.subplots()
df_limited.plot(x='ship_speed', y='error', ax=ax, style='o', alpha=0.5)

fig,ax=plt.subplots()
df_limited[r'T/B']=df_limited['TA']/df_limited['beam']
df_limited.plot(x=r'T/B', y='error', ax=ax, style='o', alpha=0.5)

fig,ax=plt.subplots()
df_limited.plot(x=r'omega0_hat', y='error', ax=ax, style='o', alpha=0.5)

fig,ax=plt.subplots()
df_limited['phi_max']=df_limited['phi_start'].abs()
df_limited.plot(x=r'phi_max', y='error', ax=ax, style='o', alpha=0.5)

fig,ax=plt.subplots()
df_limited.plot(x=r'B_W_HAT', y='error', ax=ax, style='o', alpha=0.5)

fig,ax=plt.subplots()
df_limited['Cb']=df_limited['Disp']/(df_limited['lpp']*df_limited['beam']*df_limited['TA'])
df_limited.plot(x='Cb', y='error', ax=ax, style='o', alpha=0.5)

fig,ax=plt.subplots()
df_limited['OG']=df_limited['kg']-df_limited['TA']
df_limited.plot(x='OG', y='error', ax=ax, style='o', alpha=0.5)

In [None]:
r2_score(y_true=df_no_verify['B_e_hat'], y_pred=df_no_verify['B_e_hat_ikeda'])

In [None]:
r2_score(y_true=df_limited['B_e_hat'], y_pred=df_limited['B_e_hat_ikeda'])

In [None]:
def calculate_limits(row):
    draught=(row['TA']+row['TF'])/2
    OG=row['kg']-draught
    CB=row['Disp']/(row['lpp']*row['beam']*draught)
    
    limits = si._calculate_limit_value(LPP=row['lpp'], Beam=row['beam'], DRAFT=draught)
    s = pd.Series(limits, name=row.name)
    return s
    

In [None]:
limits = df_limited.apply(func=calculate_limits, axis=1)
df=pd.concat([df_limited,limits], axis=1)
draught=(df['TA']+df['TF'])/2
df['OG']=df['kg']-draught
df['CB']=df['Disp']/(df['lpp']*df['beam']*draught)
df['CMID']=df['A0']

In [None]:
df_selection = df.copy()
for key,limits in si.limits_kawahara.items():
    fig,ax=plt.subplots()
    bins=np.linspace(df[key].min(), df[key].max(),30)
    df[key].hist(ax=ax, bins=bins, label='model test data')
    mask = ((df_selection[key] >= limits[0]) & (df_selection[key] <= limits[1]) |
           (df_selection[key] == 0)
           )
    
    df_selection = df_selection.loc[mask].copy()
    df_selection[key].hist(ax=ax, bins=bins, label='ok', alpha=0.3)
    
    ylims = ax.get_ylim()
    ax.fill_between(limits, [ylims[1],ylims[1]], y2=0, color='green', alpha=0.2, label='valid')
    ax.set_xlabel(key)
    ax.set_title('Removed: %i' % (len(mask)-mask.sum()))
    ax.legend()

In [None]:
def verify_limits(row):
    draught=(row['TA']+row['TF'])/2
    OG=row['kg']-draught
    CB=row['Disp']/(row['lpp']*row['beam']*draught)
    
    try:
        si.verify_inputs(LPP=row['lpp'], Beam=row['beam'], CB=CB, CMID=row['A0'], OG=OG, PHI=1, 
                  lBK=row['BKL'], bBK=row['BKB'], OMEGA=row['omega0'], DRAFT=draught)
    except si.SimplifiedIkedaInputError:
        return False
    except Exception:
        raise
    else:
        return True
        
    

In [None]:
row = df_limited.iloc[0]
mask = df_limited.apply(func=verify_limits, axis=1)
df_compare_zero_limits = df_limited.loc[mask].copy()

In [None]:
fig,ax=plt.subplots()
x=df_compare_zero_limits['B_e_hat']
y=df_compare_zero_limits['B_e_hat_ikeda']
ax.plot(x, y, 'o', alpha=0.5)

ax.set_xlabel('$\hat{B_e}$ (model test)')
ax.set_ylabel('$\hat{B_e}$ (Simplified Ikeda)')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.grid(True)

In [None]:
fig,ax=plt.subplots()
x=df_compare['B_e_hat']
y=df_compare['B_e_hat_ikeda']
ax.plot(x, y, 'o', alpha=0.5)

ax.set_xlabel('$\hat{B_e}$ (model test)')
ax.set_ylabel('$\hat{B_e}$ (Simplified Ikeda)')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.grid(True)
save_fig(fig=fig, name='B_e_hat_ikeda')

In [None]:
df_compare['error'] = (df_compare['B_e_hat']-df_compare['B_e_hat_ikeda']).abs()

In [None]:
fig,axes=plt.subplots(nrows=2)
ax=axes[0]
df_compare.plot(x='TA', y='error', style='o', alpha=0.5,ax=ax)
ax.set_xlabel('$T/L_{pp}$')
ax.grid(True)

ax=axes[1]
df_compare.plot(x='omega0_hat', y='error', style='o', alpha=0.5, ax=ax)
ax.set_xlabel('$\hat{\omega_0}$')
ax.grid(True)
save_fig(fig=fig, name='B_e_hat_error')

In [None]:
fig,ax=plt.subplots()
mask=((df_compare['TA']>0.035) & (df_compare['omega0_hat']<0.63))
df_compare_good = df_compare.loc[mask].copy()
df_compare_good.plot(x='B_e_hat', y='B_e_hat_ikeda', ax=ax, style='o', alpha=0.5)
ax.set_xlabel('$\hat{B_e}$')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.grid(True)
save_fig(fig=fig, name='B_e_hat_good')              



In [None]:
r2_score(y_true=df_compare_good['B_e_hat'], y_pred=df_compare_good.loc[mask]['B_e_hat_ikeda'])

In [None]:
df_ikeda.head()

In [None]:
data = df_compare.copy()
data['T'] = (data['TA']+data['TF'] )/2
data['CB'] = data['Disp']/(data['lpp']*data['T']*data['beam'])
data['OG'] = (-data.kg + data['T'])
renamers = {
    'CP' : 'C_p',
    'CB' : 'C_b',
    'IRUD' : 'I_RUD', 
    'BKL' : 'BK_L', 
    'gm' : 'GM', 
    'A0' : 'A_0', 
    'ship_type_id' : 'ship_type_id', 
    'Volume' : 'Disp', 
    'Ixx' : 'I_xx', 
    'BKB' : 'BK_B',
    'KXX' : 'K_xx', 
    'RH' : 'R_h', 
    'AR' : 'A_R', 
    'TWIN' : 'TWIN', 
    'kg': 'kg', 
    'CW' : 'C_W', 
    'beam' : 'beam', 
    'TF' : 'T_F', 
    'ship_speed' : 'V', 
    'TA' : 'T_A',
    'lpp' : 'L_pp',
}
data.rename(columns=renamers, inplace=True)

In [None]:
ikeda_parameters = [
        'beam',
        'T',
        'BK_L',
        'BK_B',
        'OG',
        'omega0_hat',        
        'C_b',
        'A_0',
        'V']

In [None]:
data[ikeda_parameters].head()

## Pure polynom ikeda parameters

In [None]:
variance_treshold = VarianceThreshold(0.000)
#standard_scaler = StandardScaler()


y = data['B_e_hat']
X = data[ikeda_parameters]
       
polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()

ks = np.arange(1,11,1)
scores = []
stds = []
for k in ks:
    select_k_best = SelectKBest(k=k, score_func=f_regression)
    steps=[
            ('polynomial_feature', polynomial_features),
            #('standard_scaler', standard_scaler),
            ('variance_treshold',variance_treshold),
            ('select_k_best',select_k_best),
            ('linear_regression', linear_regression)
    ]
    
    model = Pipeline(steps=steps)
    model.fit(X=X, y=y)
    cv=5
    score = cross_val_score(estimator=model,X=X,y=y,cv=cv).mean()
    std = cross_val_score(estimator=model,X=X,y=y,cv=cv).std()
    
    scores.append(score)
    stds.append(std)
    
scores = np.array(scores)
stds = np.array(stds)

In [None]:
fig,ax = plt.subplots()
ax.plot(ks,scores-stds,'.-')
ax.plot(ks,scores,'.-')
ax.plot(ks,scores+stds,'.-')


In [None]:
select_k_best = SelectKBest(k=10, score_func=f_regression)
steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
]

model = Pipeline(steps=steps)
model.fit(X=X, y=y)
cv=5
score = cross_val_score(estimator=model,X=X,y=y,cv=cv).mean()
std = cross_val_score(estimator=model,X=X,y=y,cv=cv).std()

In [None]:
score

In [None]:
std

In [None]:
polynom = Polynom(model=model, columns=X.columns, y_symbol=symbols.B_e_hat)
polynom.fit(X=X, y=y)
polynom.equation

In [None]:
data['B_e_regression_polynom'] = model.predict(X=X)


In [None]:
fig,ax=plt.subplots()
data.plot(x='B_e_hat', y=['B_e_hat_ikeda','B_e_regression_polynom'], ax=ax, style='o', alpha=0.4)

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')

In [None]:
fig,ax=plt.subplots()
mask = ((data['B_e_regression_polynom'] < 0.0029) & 
        (data['B_e_regression_polynom'] > 0.0026))
df_strange=data.loc[mask].copy()
df_strange.loc[mask].plot(x='B_e_hat', y=['B_e_regression_polynom'], ax=ax, style='o', alpha=0.4)

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.grid()

In [None]:
df_strange.head()

In [None]:
df_strange['T'].max()

In [None]:
df_strange['omega0_hat'].min()

In [None]:
df_strange['V'].hist()

In [None]:
mask = ((data['V'].round(decimals=2)==0) )
data_zero = data.loc[mask].copy()
data_zero['error'] = data_zero['B_e_hat']-data_zero['B_e_hat_ikeda']

In [None]:
fig,ax=plt.subplots()
data_zero.plot(x='B_e_hat', y=['B_e_hat_ikeda'], ax=ax, style='o', alpha=0.4)
ax.set_xlabel('$\hat{B_e}$')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.grid()
save_fig(fig=fig, name='B_e_hat_ikeda_zero')  

In [None]:
data_zero.head()

In [None]:
data_zero.plot(x='T', y='error', style='o', alpha=0.4)
data_zero.plot(x='error', y=['B_W_HAT','B_E_HAT','B_F_HAT','B_BK_HAT'], style='o', alpha=0.4)

In [None]:
r2_score(y_true=data_zero['B_e_hat'], y_pred=data_zero['B_e_hat_ikeda'])

In [None]:
data_zero['B_e_hat_ikeda2'] = data_zero['B_e_hat_ikeda']-data_zero['B_W_HAT']

In [None]:
r2_score(y_true=data_zero['B_e_hat'], y_pred=data_zero['B_e_hat_ikeda2'])

In [None]:
fig,ax=plt.subplots()
data_zero.plot(x='B_e_hat', y=['B_e_hat_ikeda2'], ax=ax, style='o', alpha=0.4)
ax.set_xlabel('$\hat{B_e}$')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.grid()

In [None]:
additional_parameters = [
'C_p',
'C_b',
'I_RUD', 
'BK_L', 
'GM', 
'A_0',  
'K_xx', 
'A_R', 
'TWIN', 
'kg', 
'C_W', 
'beam', 
]

#parameters = list(set(ikeda_parameters) | set(additional_parameters) | set(['B_F_HAT']))


In [None]:
parameters = list(set(ikeda_parameters) | set(additional_parameters) )

variance_treshold = VarianceThreshold(0.000)

y_key='B_e_hat'
data_=data_zero[parameters+[y_key]].copy()
data_.dropna(inplace=True)
y = data_[y_key]
X = data_[parameters].copy()
       
polynomial_features = PolynomialFeatures(degree=1)
linear_regression = LinearRegression()

ks = np.arange(1,17,1)
scores = []
stds = []
for k in ks:
    select_k_best = SelectKBest(k=k, score_func=f_regression)
    steps=[
            ('polynomial_feature', polynomial_features),
            ('variance_treshold',variance_treshold),
            ('select_k_best',select_k_best),
            ('linear_regression', linear_regression)
    ]
    
    model = Pipeline(steps=steps)
    model.fit(X=X, y=y)
    cv=5
    score = cross_val_score(estimator=model,X=X,y=y,cv=cv).mean()
    std = cross_val_score(estimator=model,X=X,y=y,cv=cv).std()
    
    scores.append(score)
    stds.append(std)
    
scores = np.array(scores)
stds = np.array(stds)

In [None]:
np.max(scores)

In [None]:
fig,ax = plt.subplots()
ax.plot(ks,scores-stds,'.-')
ax.plot(ks,scores,'.-')
ax.plot(ks,scores+stds,'.-')
ax.grid()

In [None]:
index = np.argmax(scores)
k=ks[index]
k

In [None]:
select_k_best = SelectKBest(k=5, score_func=f_regression)
steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
]

model_zero = Pipeline(steps=steps)
model_zero.fit(X=X, y=y)
cv=5
score = cross_val_score(estimator=model_zero,X=X,y=y,cv=cv).mean()
std = cross_val_score(estimator=model_zero,X=X,y=y,cv=cv).std()

In [None]:
score

In [None]:
polynom_zero = Polynom(model=model_zero, columns=X.columns, y_symbol=symbols.B_e_hat)
polynom_zero.fit(X=X, y=y)
polynom_zero.equation

In [None]:
polynom_zero.score(X=X, y=y)

In [None]:
data_['B_e_hat_regression'] = polynom_zero.predict(data_[parameters])

In [None]:
fig,ax=plt.subplots()
data_.plot(x='B_e_hat', y=['B_e_hat_regression'], ax=ax, style='o', alpha=0.4)
ax.set_xlabel('$\hat{B_e}$')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.grid()

save_fig(fig=fig, name='B_e_hat0_regression')


In [None]:
data['B_e_hat0']=polynom_zero.predict(data)

In [None]:
mask=data['V'].round(decimals=2)>0
data_speed=data.loc[mask].copy()

In [None]:
data_speed['B_e_hat0'].hist(bins=30)

In [None]:
#data_speed['speed_factor']=data_speed['B_e_hat']/(data_speed['B_e_hat0']*data_speed['V'])
data_speed['speed_factor']=data_speed['B_e_hat']/data_speed['B_e_hat0']

In [None]:
data_speed['speed_factor'].hist(bins=30)

In [None]:
mask = data_speed['speed_factor'] < data_speed['speed_factor'].quantile(0.90)
data_speed=data_speed.loc[mask].copy()

In [None]:
data_speed['speed_factor'].hist(bins=30)

In [None]:
parameters = list(set(ikeda_parameters) | set(additional_parameters) | set(['B_L_HAT']))

In [None]:
variance_treshold = VarianceThreshold(0.000)

y_key='speed_factor'
data_=data_speed[parameters+[y_key]].copy()
data_.dropna(inplace=True)
y = data_[y_key]
X = data_[parameters].copy()
       
polynomial_features = PolynomialFeatures(degree=1)
linear_regression = LinearRegression()

ks = np.arange(1,6,1)
scores = []
stds = []
for k in ks:
    select_k_best = SelectKBest(k=k, score_func=f_regression)
    steps=[
            ('polynomial_feature', polynomial_features),
            ('variance_treshold',variance_treshold),
            ('select_k_best',select_k_best),
            ('linear_regression', linear_regression)
    ]
    
    model = Pipeline(steps=steps)
    model.fit(X=X, y=y)
    cv=5
    score = cross_val_score(estimator=model,X=X,y=y,cv=cv).mean()
    std = cross_val_score(estimator=model,X=X,y=y,cv=cv).std()
    
    scores.append(score)
    stds.append(std)
    
scores = np.array(scores)
stds = np.array(stds)

In [None]:
np.max(scores)

In [None]:
fig,ax = plt.subplots()
ax.plot(ks,scores-stds,'.-')
ax.plot(ks,scores,'.-')
ax.plot(ks,scores+stds,'.-')
ax.grid()

In [None]:
select_k_best = SelectKBest(k=5, score_func=f_regression)
steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
]

model_speed = Pipeline(steps=steps)
model_speed.fit(X=X, y=y)
cv=5
score = cross_val_score(estimator=model_speed,X=X,y=y,cv=cv).mean()
std = cross_val_score(estimator=model_speed,X=X,y=y,cv=cv).std()

In [None]:
score

In [None]:
polynom_speed = Polynom(model=model_speed, columns=X.columns, y_symbol=symbols.B_e_hat)
polynom_speed.fit(X=X, y=y)
polynom_speed.equation

In [None]:
data_speed['speed_factor_regression'] = polynom_speed.predict(data_speed)

In [None]:
fig,ax=plt.subplots()
data_speed.plot(x='speed_factor', y=['speed_factor_regression'], ax=ax, style='o', alpha=0.4)

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.grid()
save_fig(fig=fig, name='B_e_factor_regression')


In [None]:
data['speed_factor_regression'] = polynom_speed.predict(data)

In [None]:
data['B_e_hat_speed_regression']=data['B_e_hat0']*data['speed_factor_regression'] 

In [None]:
data[['B_e_hat_speed_regression','B_e_hat0','speed_factor_regression','V','B_L_HAT']]

In [None]:
data['error'] = data['B_e_hat'] - data['B_e_hat_speed_regression']

In [None]:
fig,axes=plt.subplots(ncols=2)
ax=axes[0]
data.plot(x='B_e_hat', y=['B_e_hat_speed_regression'], ax=ax, style='o', alpha=0.4)
ax.set_xlabel('$\hat{B_e}$')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.grid()


ax=axes[1]
data.plot(x='T', y=['error'], ax=ax, style='o', alpha=0.4)
ax.set_xlabel('$T/L_{pp}$')
ax.grid()

save_fig(fig=fig, name='B_e_factor_regression_total')

In [None]:
mask = data['B_e_hat_speed_regression'].notnull()
data_ = data.loc[mask].copy()

r2_score(y_true=data_['B_e_hat'], y_pred=data_['B_e_hat_speed_regression'])

In [None]:
r2_score(y_true=data_['B_e_hat'], y_pred=data_['B_e_hat_ikeda'])

In [None]:
df_speed_and_no_speed = data.groupby(by='loading_condition_id').filter(lambda x: (((x['V'].round(decimals=2)==0).sum()>0) &
                                                          ((x['V'].round(decimals=2)!=0).sum()>0))
                                              )

In [None]:

for model_number, group_model in df_speed_and_no_speed.groupby(by=['model_number']):
    fig,ax=plt.subplots()
    for loading_condition_id, group_loading_condition in group_model.groupby(by=['loading_condition_id']):
        group_loading_condition.sort_values(by='V', inplace=True)
        group_loading_condition.plot(x='V', y='B_e_hat', ax=ax, style='.-')
    
    ax.set_xlim(df_speed_and_no_speed['V'].min(), df_speed_and_no_speed['V'].max())
    ax.set_ylim(df_speed_and_no_speed['B_e_hat'].min(), df_speed_and_no_speed['B_e_hat'].max())

In [None]:
def test_split(df_speed_and_no_speed, test_part = 0.20):
    df_speed_and_no_speed=df_speed_and_no_speed.copy()    
    
    model_groups = df_speed_and_no_speed.groupby(by='model_number')
    
    number_of_tests=int(len(model_groups)*test_part)
    random_index = np.random.permutation(len(model_groups))[0:number_of_tests]
    test_model_numbers = np.array(list(model_groups.groups.keys()))[random_index]
    
    data_test = model_groups.filter(lambda x:x.iloc[0]['model_number'] in test_model_numbers)
    
    index_train = list(set(data.index) - set(data_test.index))
    data_train=data.loc[index_train].copy()
    
    return data_train,data_test

In [None]:
data_train,data_test = test_split(df_speed_and_no_speed=df_speed_and_no_speed)

In [None]:
len(data_test)

In [None]:
len(data_train)

In [None]:
def fit(data):
    
    data=data.copy()
    polynom_zero= fit_zero(data)
    data['B_e_hat0']=polynom_zero.predict(data)
    
    mask = data['V'].round(decimals=2)>0
    data_speed = data.loc[mask].copy()
    data_speed['speed_factor']=data_speed['B_e_hat']/data_speed['B_e_hat0']
    
    polynom_speed = fit_speed_factor(data_speed)
    data['speed_factor_regression']=polynom_speed.predict(data) 
    
    return polynom_zero,polynom_speed


def fit_zero(data):
    
    mask = ((data['V'].round(decimals=2)==0) )
    data_zero = data.loc[mask].copy()
    parameters = list(set(ikeda_parameters) | set(additional_parameters) )
    
    y_key='B_e_hat'
    data_=data_zero[parameters+[y_key]].copy()
    data_.dropna(inplace=True)
    y = data_[y_key]
    X = data_[parameters].copy()
    
    select_k_best = SelectKBest(k=5, score_func=f_regression)
    steps=[
            ('polynomial_feature', polynomial_features),
            #('standard_scaler', standard_scaler),
            ('variance_treshold',variance_treshold),
            ('select_k_best',select_k_best),
            ('linear_regression', linear_regression)
    ]
    
    model_zero = Pipeline(steps=steps)
    model_zero.fit(X=X, y=y)
    
    polynom_zero = Polynom(model=model_zero, columns=X.columns, y_symbol=symbols.B_e_hat_0)
    polynom_zero.fit(X=X, y=y)
    
    return polynom_zero

def fit_speed_factor(data):
    
    parameters = list(set(ikeda_parameters) | set(additional_parameters) | set(['B_L_HAT']))
    
    y_key='speed_factor'
    data_=data_speed[parameters+[y_key]].copy()
    data_.dropna(inplace=True)
    y = data_[y_key]
    X = data_[parameters].copy()
    
    select_k_best = SelectKBest(k=5, score_func=f_regression)
    steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
    ]

    model_speed = Pipeline(steps=steps)
    model_speed.fit(X=X, y=y)
    polynom_speed = Polynom(model=model_speed, columns=X.columns, y_symbol=symbols.B_e_factor)
    polynom_speed.fit(X=X, y=y)
    polynom_speed.equation
    
    return polynom_speed
    



In [None]:
polynom_zero,polynom_speed = fit(data)
y_pred=polynom_zero.predict(data)*polynom_speed.predict(data)
r2_score(y_true=data['B_e_hat'], y_pred=y_pred)

In [None]:
name='polynom_zero'
equation = significant_numbers(polynom_zero.equation, precision=3)
eq=pylatex_extenders.Equation(equation,label='eq:%s'%name)
file_path = os.path.join(rolldecay.equations_path,name)
eq.generate_tex(file_path)

name='polynom_speed'
equation = significant_numbers(polynom_speed.equation, precision=3)
eq=pylatex_extenders.Equation(equation,label='eq:%s'%name)
file_path = os.path.join(rolldecay.equations_path,name)
eq.generate_tex(file_path)

In [None]:
polynom_zero,polynom_speed = fit(data_train)
y_pred=polynom_zero.predict(data_test)*polynom_speed.predict(data_test)
mask=y_pred.notnull()
r2_score(y_true=data_test.loc[mask]['B_e_hat'], y_pred=y_pred.loc[mask])


In [None]:
scores = []
for i in range(100):
    data_train,data_test = test_split(df_speed_and_no_speed=df_speed_and_no_speed)
    polynom_zero,polynom_speed = fit(data_train)
    y_pred=polynom_zero.predict(data_test)*polynom_speed.predict(data_test)
    mask=y_pred.notnull()
    score = r2_score(y_true=data_test.loc[mask]['B_e_hat'], y_pred=y_pred.loc[mask])
    scores.append(score)
    
scores=np.array(scores)

In [None]:
mask = ((np.quantile(scores, 0.10) < scores) & (scores< np.quantile(scores, 0.90)))
scores=scores[mask]

In [None]:
scores.mean()

In [None]:
scores.std()

In [None]:
scores