# $\hat{B_1}$ damping regression with a little help from Ikeda

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
#from jupyterthemes import jtplot
#jtplot.style(theme='onedork', context='notebook', ticks=True, grid=False)

In [None]:
import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)
import numpy as np
import os
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 15, 5

#import data
import copy
from rolldecay.bis_system import BisSystem
from rolldecay import database

from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_regression

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline

import signal_lab
from sqlalchemy.inspection import inspect
import seaborn as sns
import docs
import pickle
from rolldecayestimators.polynom_estimator import Polynom
from rolldecayestimators import symbols
from rolldecay import database
import rolldecayestimators.simplified_ikeda as simplified_ikeda
from rolldecayestimators.ikeda_estimator import IkedaQuadraticEstimator
import rolldecayestimators.lambdas as lambdas
from rolldecayestimators.substitute_dynamic_symbols import run, lambdify
import rolldecayestimators.equations as equations
import rolldecayestimators.symbols as symbols
import sympy as sp
from rolldecay.paper_writing import save_fig

In [None]:
data = pd.read_pickle('../05_new_method/data.sav')
y_s = pd.read_pickle('../05_new_method/y.sav')

In [None]:
#df_rolldecay = database.load(rolldecay_table_name='rolldecay_cubic_b', limit_score=0.90, 
#                             exclude_table_name='rolldecay_exclude')

df_rolldecay = database.load(rolldecay_table_name='rolldecay_simplified_ikeda', limit_score=-np.inf, 
                             exclude_table_name='rolldecay_exclude')

In [None]:
T_f=df_rolldecay['TF']
T_a=df_rolldecay['TA']
L_pp=df_rolldecay['lpp']
mask=T_f!=T_a

df_rolldecay['trim']=np.arctan((T_a-T_f)/L_pp)
df_rolldecay.loc[mask][['TF','TA','trim']]

In [None]:
mask = df_rolldecay['trim'].abs() < np.deg2rad(0.3)
df_rolldecay=df_rolldecay.loc[mask].copy()

In [None]:
T_f=df_rolldecay['TF']
T_a=df_rolldecay['TA']
L_pp=df_rolldecay['lpp']
mask=T_f!=T_a

df_rolldecay['trim']=np.arctan((T_a-T_f)/L_pp)
df_rolldecay.loc[mask][['TF','TA','trim']]

In [None]:
index = list(set(df_rolldecay.index) & set(data.index))
df_rolldecay=df_rolldecay.loc[index].copy()

data=data.loc[index].copy()

data['omega0']=df_rolldecay['omega0']
data['TA']=data['T']
data['TF']=data['T']

In [None]:
y_s.columns

In [None]:
data.columns

In [None]:
phi_max = 8

In [None]:
def calculate_ikeda(row, phi_max):
    
    ikeda_parameters = {

            'LPP' : row.L_pp,
            'Beam' : row.beam,
            'DRAFT' : row['T'],

            'PHI' : phi_max,
            'lBK' : row.BK_L,
            'bBK' : row.BK_B,
            'OMEGA' : row.omega0,
            'OG' : (-row.kg + row['T']),
            'CB' : row.C_b,
            'CMID' : row.A_0,
            'V':row.V,

        } 
        
    estimator = IkedaQuadraticEstimator(lpp=row.L_pp, 
                            TA=row.TA, 
                            TF=row.TF,
                            beam=row.beam, 
                            BKL=row.BK_L, 
                            BKB=row.BK_B, 
                            A0=row.A_0, 
                            kg=row.kg, 
                            Volume=row.Disp, 
                            gm=row.GM, 
                            V=row.V, 
                            rho=1000, 
                            g=9.81, 
                            phi_max=phi_max,
                            omega0=row.omega0)
    estimator.fit()
    s = estimator.result_for_database(score=False)
    
    return pd.Series(s,name=row.name)

In [None]:
result = data.apply(func=calculate_ikeda, phi_max=phi_max, axis=1)

In [None]:
result.head()

In [None]:
df_ikeda = pd.concat([data,result], sort=False, axis=1)

In [None]:
df_ikeda.head()

In [None]:
df_ikeda['g']=9.81
df_ikeda['rho']=1000
df_ikeda['B_1_hat'] = run(function=lambdas.B_1_hat_lambda, inputs=df_ikeda)
df_ikeda['B_2_hat'] = run(function=lambdas.B_2_hat_lambda, inputs=df_ikeda)

In [None]:
equations.B_e_equation

In [None]:
B_e_hat_equation = equations.B44_hat_equation.subs([(symbols.B_44_hat, symbols.B_e_hat),
                                 (symbols.B_44, symbols.B_e),
                                ])
B_e_hat_equation

In [None]:
eqs=[equations.B_1_hat_equation,
equations.B_2_hat_equation,
equations.B_e_equation,
B_e_hat_equation]
B_e_equation = sp.Eq(symbols.B_e_hat,sp.solve(eqs,symbols.B_1,symbols.B_2,symbols.B_e,
                                          symbols.B_e_hat)[symbols.B_e_hat])
B_e_equation

In [None]:
B_e_lambda = lambdify(sp.solve(B_e_equation,symbols.B_e_hat)[0])

In [None]:
inputs = y_s.copy()
inputs['phi_a'] = np.deg2rad(phi_max)
inputs['g']=9.81
inputs['omega0']=data['omega0']
inputs['beam']=data['beam']

y_s['B_e_hat'] = run(function=B_e_lambda, inputs=inputs)

In [None]:
y_s['B_e_hat'].hist()

In [None]:
df_ikeda.head()

In [None]:
 df_ikeda = df_ikeda.loc[:, ~df_ikeda.columns.duplicated()]

In [None]:
df_ikeda['phi_a'] = np.deg2rad(phi_max)
df_ikeda['B_e_hat'] = run(function=B_e_lambda, inputs=df_ikeda)

In [None]:
df_compare = pd.merge(left=y_s, right=df_ikeda, how = 'inner',left_index=True, right_index=True,
                      suffixes=('','_ikeda'))

In [None]:
fig,ax=plt.subplots()
df_compare.plot(x='B_e_hat', y='B_e_hat_ikeda', style='o', alpha=0.5, ax=ax)
#save_fig(fig=fig, name='B_e_hat_ikeda')

In [None]:
from sklearn.metrics import r2_score
r2_score(y_true=df_compare['B_e_hat'], y_pred=df_compare['B_e_hat_ikeda'])

In [None]:
df_compare['error'] = (df_compare['B_e_hat']-df_compare['B_e_hat_ikeda']).abs()

In [None]:
df_compare2 = df_compare.copy()
lpp = df_compare2['L_pp']
df_compare2['BK_B']/=lpp
df_compare2['BK_L']/=lpp
df_compare2['T']/=lpp
df_compare2['V']/=np.sqrt(lpp)
df_compare2['omega0']*=np.sqrt(lpp)
df_compare2['kg']/=lpp
df_compare2['GM']/=lpp
df_compare2['beam']/=lpp
#df_compare2['Cb']=df_compare2['Disp']

df_compare2.plot(x='kg', y='error', style='o', alpha=0.5)

fig,ax=plt.subplots()
df_compare2.plot(x='T', y='error', style='o', alpha=0.5,ax=ax)
#save_fig(fig=fig, name='B_e_hat_error')

df_compare2.plot(x='GM', y='error', style='o', alpha=0.5)
df_compare2.plot(x='V', y='error', style='o', alpha=0.5)
df_compare2.plot(x='C_b', y='error', style='o', alpha=0.5)


In [None]:
fig,ax=plt.subplots()
mask = ((df_compare2['T']>0.035) & (df_compare2['beam']<0.23))
df_compare_good = df_compare.loc[mask]
df_compare_good.plot(x='B_e_hat', y='B_e_hat_ikeda', style='o', alpha=0.5, ax=ax)
#save_fig(fig=fig, name='B_e_hat_good')


In [None]:
df_compare_good.plot(x='V', y='error', style='o', alpha=0.5)

In [None]:
name = df_compare_good.sort_values(by='error', ascending=False).iloc[0].name
meta_data = df_rolldecay.loc[name]

In [None]:
print(meta_data.project_path)

In [None]:
meta_data

In [None]:
r2_score(y_true=df_compare_good['B_e_hat'], y_pred=df_compare_good.loc[mask]['B_e_hat_ikeda'])

In [None]:
df_compare_good.plot(x='B_1_hat', y='B_1_hat_ikeda', style='o', alpha=0.5)

In [None]:
r2_score(y_true=df_compare_good['B_1_hat'], y_pred=df_compare_good['B_1_hat_ikeda'])

In [None]:
df_compare_good.plot(x='B_2_hat', y='B_2_hat_ikeda', style='o', alpha=0.5)

In [None]:
r2_score(y_true=df_compare_good['B_2_hat'], y_pred=df_compare_good['B_2_hat_ikeda'])

In [None]:
df_ikeda.head()

In [None]:
df_ikeda['phi_max']=phi_max
ikeda_parameters = ['L_pp','beam','T','phi_max','BK_L','BK_B','omega0','kg','C_b','A_0','V']
df_ikeda[ikeda_parameters].head()

In [None]:
data['phi_max']=phi_max
data[ikeda_parameters].head()

In [None]:
from rolldecay.froude_scaling import froude_scale
db = database.get_db()
description = pd.read_sql_table('description', con=db.engine, index_col='id')
description.loc['ship_speed','unit']='m/s'
description.loc['VDES','unit']='m/s'

description.loc['phi_start'] = {'description':'roll at start of test','unit':'rad'}
description.loc['phi_stop'] = {'description':'roll at end of test','unit':'rad'}
description.loc['Disp'] = {'description':'Ship discplacement','unit':'m3'}
description.loc['L_pp'] = {'description':'Perpendicular length','unit':'m'}
description.loc['A_R'] = {'description':'Rudder area','unit':'m2'}

skip = [
    'model_number',
    'loading_condition_id',
    'B_1A',
    'B_2A',
    'B_3A',
    'C_1A',
    'C_3A',
    'C_5A',
    'B_1A',
    'B_1',
    'B_2',
    'B_3',
    'C_1',
    'C_3',
    'C_5',
    'A_44',
    'omega0_fft',
    'omega0',
    'score',
    'id',
    'project_number',
    'series_number',
    'run_number',
    'test_number',
    'scale_factor',
    'g',
    'rho',
    'B_1_hat',
    'B_2_hat',
    'CB',
    'I_RUD',
    'C_W',
    'K_xx',
    'C_p',
    'A_0',
]
data['scale_factor']=data['L_pp']
df_model = froude_scale(data=data, description=description, skip=skip)

In [None]:
parameters = list(set(ikeda_parameters) - set(['L_pp']))
X=data[parameters].copy()
lpp = data['L_pp']
X['BK_B']/=lpp
X['BK_L']/=lpp
X['T']/=lpp
X['V']/=np.sqrt(lpp)
X['omega0']*=np.sqrt(lpp)
X['kg']/=lpp
X['beam']/=lpp
X.head()

In [None]:
variance_treshold = VarianceThreshold(0.001)
standard_scaler = StandardScaler()


y = y_s['B_e_hat']
#X = data[important]
#X=data[ikeda_parameters]
       
polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()

ks = np.arange(1,30)
scores = []
for k in ks:
    select_k_best = SelectKBest(k=k, score_func=f_regression)

    steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
    ]
    
    model = Pipeline(steps=steps)
    model.fit(X=X, y=y)
    scores.append(cross_val_score(estimator=model,X=X,y=y,cv=5).mean())

In [None]:
fig,ax=plt.subplots()
ax.plot(ks,scores)

In [None]:
polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()


scores = []
select_k_best = SelectKBest(k=10, score_func=f_regression)

steps=[
    ('polynomial_feature', polynomial_features),
    #('standard_scaler', standard_scaler),
    ('variance_treshold',variance_treshold),
    ('select_k_best',select_k_best),
    ('linear_regression', linear_regression)
]

model = Pipeline(steps=steps)
model.fit(X=X, y=y)
cross_val_score(estimator=model,X=X,y=y,cv=5).mean()

In [None]:
df_ikeda.head()

In [None]:
X2 = X.copy()
X2['B_1_hat'] = df_ikeda['B_1_hat']
X2['B_2_hat'] = df_ikeda['B_2_hat']

polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()


scores = []
select_k_best = SelectKBest(k=10, score_func=f_regression)

steps=[
    ('polynomial_feature', polynomial_features),
    #('standard_scaler', standard_scaler),
    ('variance_treshold',variance_treshold),
    ('select_k_best',select_k_best),
    ('linear_regression', linear_regression)
]

model = Pipeline(steps=steps)
model.fit(X=X2, y=y)
cross_val_score(estimator=model,X=X2,y=y,cv=5).mean()

In [None]:
r2_score(y_true=df_compare['B_e_hat'], y_pred=df_compare['B_e_hat_ikeda'])

In [None]:
y_key = 'B_1_hat'

fig,ax=plt.subplots()
y_s[y_key].hist(bins=50, ax=ax)
ax.set_title('Historgram: %s' % y_key)

In [None]:
df_ikeda = database.load(rolldecay_table_name='rolldecay_simplified_ikeda', limit_score=0.90, 
                             exclude_table_name='rolldecay_exclude')

In [None]:
data.head()

In [None]:
variance_treshold = VarianceThreshold(0.000)
standard_scaler = StandardScaler()


y = y_s[y_key]
#X = data[important]
X=data.drop(columns=['I_RUD','TWIN']).copy()  # Handle categorical data later
       
polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()

ks = np.arange(1,30)
scores = []
for k in ks:
    select_k_best = SelectKBest(k=k, score_func=f_regression)

    steps=[
        ('polynomial_feature', polynomial_features),
        ('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
    ]
    
    model = Pipeline(steps=steps)
    model.fit(X=X, y=y)
    scores.append(model.score(X=X, y=y))
    #scores.append(cross_val_score(estimator=model,X=X,y=y,cv=4).mean())
    
    

In [None]:
fig,ax=plt.subplots()
ax.plot(ks,scores)

In [None]:
variance_treshold = VarianceThreshold(0.000)
standard_scaler = StandardScaler()
select_k_best = SelectKBest(k=20, score_func=f_regression)

y = y_s[y_key]
#X = data[important]
X=data.drop(columns=['I_RUD','TWIN']).copy()  # Handle categorical data later
       
polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()

steps=[
    ('polynomial_feature', polynomial_features),
    ('standard_scaler', standard_scaler),
    ('variance_treshold',variance_treshold),
    ('select_k_best',select_k_best),
    ('linear_regression', linear_regression)
]

model = Pipeline(steps=steps)
#cross_val_score(estimator=model,X=X,y=y,cv=5).mean()
model.fit(X=X, y=y)
model.score(X=X, y=y)

In [None]:
fig,ax = plt.subplots()
ax.plot(y,model.predict(X),'o', alpha=0.5)
ax.set_title('Prediction of %s' % y_key)
ax.set_xlabel('test: %s' % y_key)
ax.set_ylabel('predicted: %s' % y_key)

In [None]:
polynom = Polynom(model=model, columns=X.columns, y_symbol=symbols.B_1_hat)
polynom.fit(X=X, y=y)
polynom.equation

In [None]:
ks = np.arange(1,10)
degrees = np.arange(1,3)
results = pd.DataFrame()

variance_treshold = VarianceThreshold(0.0001)
standard_scaler = StandardScaler()

y = y_s[y_key]
#X = data[important]
X=data.drop(columns=['I_RUD','TWIN']).copy()  # Handle categorical data later


for k in ks:
    for degree in degrees:
        select_k_best = SelectKBest(k=k, score_func=f_regression)
        
       
        polynomial_features = PolynomialFeatures(degree=degree)
        linear_regression = LinearRegression()
        
        steps=[
            ('polynomial_feature', polynomial_features),
            ('standard_scaler', standard_scaler),
            ('variance_treshold',variance_treshold),
            ('select_k_best',select_k_best),
            ('linear_regression', linear_regression)
        ]
        
        model = Pipeline(steps=steps)
        score = cross_val_score(estimator=model,X=X,y=y,cv=5).mean()
        s = pd.Series()
        s['k'] = k
        s['degree'] = degree
        s['score'] = score
        results = results.append(s, ignore_index=True)
        
        

In [None]:
results.sort_values(by='score', ascending=False, inplace=True)

In [None]:
results.head()

In [None]:
k = int(results.iloc[0]['k'])
degree = int(results.iloc[0]['degree'])

In [None]:
select_k_best = SelectKBest(k=k, score_func=f_regression)

standard_scaler = StandardScaler()
polynomial_features = PolynomialFeatures(degree=degree)
linear_regression = LinearRegression()

steps=[
    ('polynomial_feature', polynomial_features),
    ('standard_scaler', standard_scaler),
    ('variance_treshold',variance_treshold),
    ('select_k_best',select_k_best),
    ('linear_regression', linear_regression)
]

model = Pipeline(steps=steps)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
model.fit(X=X_train, y=y_train)
score = model.score(X=X_test, y=y_test)
score

In [None]:
fig,ax = plt.subplots()
ax.plot(y_test,model.predict(X_test),'.', alpha=0.5)
ax.set_title('Prediction of %s' % y_key)
ax.set_xlabel('test: %s' % y_key)
ax.set_ylabel('predicted: %s' % y_key)


In [None]:
fig,ax = plt.subplots()
ax.plot(y,model.predict(X),'o', alpha=0.6)
ax.set_title('Prediction of %s' % y_key)
ax.set_xlabel('test: %s' % y_key)
ax.set_ylabel('predicted: %s' % y_key)

In [None]:
import pickle
filename = 'B_1_hat_model.sav'
model.fit(X=X, y=y)
model.keys=list(X.columns)
pickle.dump(model, open(filename, 'wb'))

In [None]:
model.score(X=X,y=y)

In [None]:
polynom = Polynom(model=model, columns=X.columns, y_symbol=symbols.B_1_hat)
polynom.fit(X=X, y=y)

score = polynom.score(X=X, y=y)
score

In [None]:
polynom.equation

In [None]:
polynom.save('B_1_hat_polynom.sym')

In [None]:
X.describe()

In [None]:
select_k_best = SelectKBest(k=9, score_func=f_regression)
polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()
variance_treshold = VarianceThreshold(0.000001)

steps=[
    ('polynomial_feature', polynomial_features),
    ('standard_scaler', standard_scaler),
    ('variance_treshold',variance_treshold),
    ('select_k_best',select_k_best),
    ('linear_regression', linear_regression)
]

model = Pipeline(steps=steps)
score = cross_val_score(estimator=model,X=X,y=y,cv=5).mean()
score

In [None]:
model.fit(X=X,y=y)

In [None]:
model.score(X=X,y=y)

In [None]:
polynom = Polynom(model=model, columns=X.columns, y_symbol=symbols.B_1_hat)
polynom.fit(X=X, y=y)

In [None]:
polynom.equation

In [None]:
X.head()

In [None]:
mask = X['V']==0
X_0=X.loc[mask]
y_0=y.loc[mask]

In [None]:
y_0.hist()

In [None]:
model.fit(X=X_0,y=y_0)

In [None]:
polynom = Polynom(model=model, columns=X.columns, y_symbol=symbols.B_1_hat)
polynom.fit(X=X_0, y=y_0)

In [None]:
polynom.equation

In [None]:
fig,ax = plt.subplots()
ax.plot(y_0,model.predict(X_0),'o', alpha=0.6)
ax.set_title('Prediction of %s' % y_key)
ax.set_xlabel('test: %s' % y_key)
ax.set_ylabel('predicted: %s' % y_key)

In [None]:
X['B_1_hat0']=model.predict(X=X)

In [None]:
select_k_best = SelectKBest(k=9, score_func=f_regression)
polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()

steps=[
    ('polynomial_feature', polynomial_features),
    ('standard_scaler', standard_scaler),
    ('variance_treshold',variance_treshold),
    ('select_k_best',select_k_best),
    ('linear_regression', linear_regression)
]

model = Pipeline(steps=steps)
score = cross_val_score(estimator=model,X=X,y=y,cv=5).mean()
score

In [None]:
model.fit(X=X,y=y)

In [None]:
polynom = Polynom(model=model, columns=X.columns, y_symbol=symbols.B_1_hat)
polynom.fit(X=X, y=y)

In [None]:
polynom.equation

In [None]:
model.score(X=X,y=y)