## SI corrector box

In [None]:
%matplotlib notebook
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)
import numpy as np
import os
import matplotlib.pyplot as plt
plt.style.use('paper')

#import data
import copy
from rolldecay.bis_system import BisSystem
from rolldecay import database

import rolldecayestimators.lambdas as lambdas
from rolldecayestimators.substitute_dynamic_symbols import run, lambdify, significant_numbers
from rolldecayestimators.ikeda_estimator import IkedaQuadraticEstimator

from rolldecay.paper_writing import save_fig, save_table
from rolldecay.froude_scaling import froude_scale
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from latex_helpers import pylatex_extenders
import rolldecay
from rolldecay.paper_writing import save_fig
import rolldecayestimators.simplified_ikeda as si
import rolldecayestimators.sensitivity as sensitivity

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from rolldecayestimators.polynom_estimator import Polynom
from rolldecayestimators import symbols

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_regression
from sklearn.metrics import mean_squared_error

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

#from sklearn.model_selection import cross_validate
from rolldecayestimators import cross_validation

In [None]:
db = database.get_db()

In [None]:
def run_ikeda(df_rolldecay, verify_input=True, limit_inputs=False):
    df=pd.DataFrame()
    for run_id, data in df_rolldecay.iterrows():
        ikeda_estimator = IkedaQuadraticEstimator(**data, verify_input=verify_input, 
                                                  limit_inputs=limit_inputs, phi_max=2)
        try:
            ikeda_estimator.fit()
        except si.SimplifiedIkedaInputError:
            continue
        
        result = ikeda_estimator.result_for_database(score=False)
        result = pd.Series(result, name=run_id)
        df=df.append(result)
    
    return df

In [None]:
df_ikeda = database.load(rolldecay_table_name='rolldecay_simplified_ikeda', limit_score=0.1, 
                             exclude_table_name='rolldecay_exclude')

df_rolldecay = database.load(rolldecay_table_name='rolldecay_quadratic_b', limit_score=0.99, 
                             exclude_table_name='rolldecay_exclude')

#df_rolldecay = database.load(rolldecay_table_name='rolldecay_linear_b', limit_score=0.99, 
#                             exclude_table_name='rolldecay_exclude')
#df_rolldecay['B_2']=0

df_rolldecay['ship_speed']*=1.852/3.6
df_ikeda['ship_speed']*=1.852/3.6

description = pd.read_sql_table('description', con=db.engine, index_col='id')
description.loc['ship_speed','unit']='m/s'
description.loc['VDES','unit']='m/s'
description.loc['Disp'] = {'description':'Ship discplacement','unit':'m3'}

T_f=df_rolldecay['TF']
T_a=df_rolldecay['TA']
L_pp=df_rolldecay['lpp']
df_rolldecay['trim']=np.arctan((T_a-T_f)/L_pp)
mask = df_rolldecay['trim'].abs() < np.deg2rad(0.3)
df_rolldecay=df_rolldecay.loc[mask].copy()

df_ikeda['Disp']=df_ikeda['Volume']
df_rolldecay['Disp']=df_rolldecay['Volume']

skip=['omega0']  # This one is already model scale...
df_ikeda = froude_scale(data=df_ikeda, description=description, skip=skip)
df_rolldecay = froude_scale(data=df_rolldecay, description=description, skip=skip)

# Reruning ikeda to get better values for B_W_HAT etc.
df = df_ikeda.copy()
df['V']=df['ship_speed']
#result_ikeda_rerun = run_ikeda(df, verify_input=True, limit_inputs=True)
#reruns = ['B_W_HAT','B_F_HAT','B_BK_HAT','B_E_HAT','B_L_HAT']
#for rerun in reruns:
#    df_ikeda[rerun]=result_ikeda_rerun[rerun]

#phi_a = np.deg2rad(2.5)
phi_a = np.deg2rad(4.3)

#phi_a = df_rolldecay['phi_start'].abs()
#phi_a=np.deg2rad(3.5)

g = 9.81
rho=1000
df_ikeda['B_e'] = run(function=lambdas.B_e_lambda, inputs=df_ikeda, phi_a=phi_a)
df_ikeda['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_ikeda, g=g, rho=rho)
df_ikeda['B_1_hat'] = run(function=lambdas.B_1_hat_lambda, inputs=df_ikeda, g=g, rho=rho)
df_ikeda['B_2_hat'] = run(function=lambdas.B_2_hat_lambda, inputs=df_ikeda, g=g, rho=rho)

df_rolldecay['B_e'] = run(function=lambdas.B_e_lambda, inputs=df_rolldecay, phi_a=phi_a)
df_rolldecay['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_rolldecay, g=g, rho=rho)
df_rolldecay['B_1_hat'] = run(function=lambdas.B_1_hat_lambda, inputs=df_rolldecay, g=g, rho=rho)
df_rolldecay['B_2_hat'] = run(function=lambdas.B_2_hat_lambda, inputs=df_rolldecay, g=g, rho=rho)

df_rolldecay['omega0_hat'] = run(function=lambdas.omega0_lambda, inputs=df_rolldecay, g=g)
df_ikeda['omega0_hat'] = run(function=lambdas.omega0_lambda, inputs=df_ikeda, g=g)

df1 = df_rolldecay.copy()
df2 = df_ikeda.copy()

#skip=['omega0']  # This one is already model scale...
#df_ikeda['scale_factor']=df_ikeda['lpp']
#df_ikeda = froude_scale(data=df_ikeda, description=description, skip=skip)
#df_rolldecay['scale_factor']=df_rolldecay['lpp']
#df_rolldecay = froude_scale(data=df_rolldecay, description=description, skip=skip)

In [None]:
lambdas.B_e_hat_lambda

In [None]:
df_compare = pd.merge(left=df_rolldecay, right=df_ikeda, how = 'inner',left_index=True, right_index=True,
                      suffixes=('','_ikeda'))

df_compare['error'] = df_compare['B_e_hat']-df_compare['B_e_hat_ikeda']

In [None]:
fig,ax=plt.subplots()
x=df_compare['B_1_hat']
y=df_compare['B_1_hat_ikeda']
ax.plot(x, y, '.', alpha=0.5, label='limited')

In [None]:
fig,ax=plt.subplots()
x=df_compare['B_2_hat']
y=df_compare['B_2_hat_ikeda']
ax.plot(x, y, '.', alpha=0.5, label='limited')

In [None]:
fig,ax=plt.subplots()
x=df_compare['B_e_hat']
y=df_compare['B_e_hat_ikeda']
ax.plot(x, y, '.', alpha=0.5, label='limited')

ax.set_xlabel('$\hat{B_e}$ (model test)')
ax.set_ylabel('$\hat{B_e}$ (Simplified Ikeda)')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.legend()
ax.grid(True)

In [None]:
r2_score(y_true=df_compare['B_e_hat'], y_pred=df_compare['B_e_hat_ikeda'])

In [None]:
r2_score(y_true=df_compare['B_1_hat'], y_pred=df_compare['B_1_hat_ikeda'])

In [None]:
r2_score(y_true=df_compare['B_2_hat'], y_pred=df_compare['B_2_hat_ikeda'])

In [None]:
def linearize(phi_a, df_rolldecay, df_ikeda):
    df_rolldecay=df_rolldecay.copy()
    df_ikeda=df_ikeda.copy()
    
    df_rolldecay['B_e'] = run(function=lambdas.B_e_lambda, inputs=df_rolldecay, phi_a=phi_a)
    df_ikeda['B_e'] = run(function=lambdas.B_e_lambda, inputs=df_ikeda, phi_a=phi_a)
    
    df_rolldecay['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_rolldecay, g=g, rho=rho)
    df_ikeda['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df_ikeda, g=g, rho=rho)
    
    df3 = df_compare = pd.merge(left=df_rolldecay, right=df_ikeda, how='inner', 
                      left_index=True, right_index=True, suffixes=('','_ikeda'))
    
    return df3

In [None]:

fig = plt.figure()
gs = fig.add_gridspec(2,3)
axes=[
fig.add_subplot(gs[0, :]),
fig.add_subplot(gs[1,0]),
fig.add_subplot(gs[1,1]),
fig.add_subplot(gs[1,2])    
]

r2s = []
rms_SI = []

phi_as = np.deg2rad(np.linspace(0,10,20))
for phi_a in phi_as:
    
    df3 = linearize(phi_a=phi_a, df_rolldecay=df_rolldecay, df_ikeda=df_ikeda)
    r2s.append(r2_score(y_true=df3['B_e_hat'], y_pred=df3['B_e_hat_ikeda']))
    rms_SI.append(mean_squared_error(y_true=df3['B_e_hat'], y_pred=df3['B_e_hat_ikeda'], squared=False))
    
ax=axes[0]
#ax.plot(np.rad2deg(phi_as), r2s)
ax.plot(np.rad2deg(phi_as), rms_SI)

ax.set_xlabel('$\phi_a$ [deg]')
#ax.set_ylabel('$R^2(\hat{B_e})$')
ax.set_ylabel('$RMSE(\hat{B_e})$')
ax.grid()

phi_as = np.deg2rad([0,5,10])
for phi_a,ax in zip(phi_as,axes[1:]):
    
    df3 = linearize(phi_a=phi_a, df_rolldecay=df_rolldecay, df_ikeda=df_ikeda)
    df3['phi_a']=phi_a

    x=df3['B_e_hat']
    y=df3['B_e_hat_ikeda']
    ax.plot(x, y, '.', alpha=0.5)
    ax.set_title(label='$\phi_a$:%0.0f deg' % np.rad2deg(phi_a))

    ax.set_xlabel('$\hat{B_e}$ (model test)')
    
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    lim = np.max([xlim[1],ylim[1]])
    ax.set_xlim(0,lim)
    ax.set_ylim(0,lim)
    ax.plot([0,lim],[0,lim],'r-')
    ax.set_aspect('equal', 'box')
    ax.grid(True)
    ax.set_xlim(0,0.02)
    ax.set_ylim(0,0.02)
    
axes[1].set_ylabel('$\hat{B_e}$ (Simplified Ikeda)')
plt.tight_layout()
save_fig(fig, name='ikeda_phi_a')

In [None]:
r2s

In [None]:
r2s = []

phi_as = np.deg2rad(np.linspace(0,6,20))
for phi_a in phi_as:
    df1['B_e'] = run(function=lambdas.B_e_lambda, inputs=df1, phi_a=np.deg2rad(2.5))
    df1['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df1, g=g, rho=rho)
    
    df2_=df2.copy()
    df2_['B_1']*=0.8
    df2_['B_e'] = run(function=lambdas.B_e_lambda, inputs=df2_, phi_a=phi_a)
    df2_['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df2_, g=g, rho=rho)
    
    df3 = pd.merge(left=df1, right=df2_, how = 'inner',left_index=True, right_index=True,
                      suffixes=('','_ikeda'))
        
    r2s.append(r2_score(y_true=df3['B_e_hat'], y_pred=df3['B_e_hat_ikeda']))
    #rms.append(mean_squared_error(y_true=df3['B_e_hat'], y_pred=df3['B_e_hat_ikeda']))
    
fig,ax=plt.subplots()
ax.plot(np.rad2deg(phi_as), r2s)
ax.set_xlabel('$\phi_a$ [deg]')
ax.set_ylabel('$R^2$')
ax.grid()


In [None]:
phi_a=np.deg2rad(1.6)
df1['B_e'] = run(function=lambdas.B_e_lambda, inputs=df1, phi_a=np.deg2rad(2.5))
df1['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df1, g=g, rho=rho)

df2_=df2.copy()
df2_['B_1']*=0.8
df2_['B_e'] = run(function=lambdas.B_e_lambda, inputs=df2_, phi_a=phi_a)
df2_['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df2_, g=g, rho=rho)

df3 = pd.merge(left=df1, right=df2_, how = 'inner',left_index=True, right_index=True,
                  suffixes=('','_ikeda'))

r2_score(y_true=df3['B_e_hat'], y_pred=df3['B_e_hat_ikeda'])

In [None]:
fig,ax=plt.subplots()
x=df3['B_e_hat']
y=df3['B_e_hat_ikeda']
ax.plot(x, y, '.', alpha=0.5, label='limited')

ax.set_xlabel('$\hat{B_e}$ (model test)')
ax.set_ylabel('$\hat{B_e}$ (Simplified Ikeda)')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.legend()
ax.grid(True)

In [None]:
r2_score(y_true=1000*df3['B_e_hat'], y_pred=1000*df3['B_e_hat_ikeda'])

## SI corrector box

In [None]:
#skip=['omega0']  # This one is already model scale...
#df_ikeda['scale_factor']=df_ikeda['lpp']
#df_ikeda = froude_scale(data=df_ikeda, description=description, skip=skip)
#df_rolldecay['scale_factor']=df_rolldecay['lpp']
#df_rolldecay = froude_scale(data=df_rolldecay, description=description, skip=skip)

## $\phi_a$ approach

In [None]:
phi_as = np.deg2rad(np.linspace(0,10,5))
X2=pd.DataFrame()
for phi_a in phi_as:
    
    df3 = linearize(phi_a=phi_a, df_rolldecay=df_rolldecay, df_ikeda=df_ikeda)
    df3['phi_a']=phi_a
    X2=X2.append(df3, ignore_index=True)
  
features = ['B_W_HAT','B_F_HAT','B_BK_HAT','B_E_HAT','B_L_HAT','phi_a']
labels = ['B_e_hat']
mask=X2[features+labels].notnull().all(axis=1)
data = X2.loc[mask].copy()

data['T'] = (data['TA']+data['TF'] )/2
data['CB'] = data['Disp']/(data['lpp']*data['T']*data['beam'])
data['OG'] = (-data.kg + data['T'])

renamers = {
    'CP' : 'C_p',
    'CB' : 'C_b',
    'IRUD' : 'I_RUD', 
    'BKL' : 'BK_L', 
    'gm' : 'GM', 
    'A0' : 'A_0', 
    'ship_type_id' : 'ship_type_id', 
    'Volume' : 'Disp', 
    'Ixx' : 'I_xx', 
    'BKB' : 'BK_B',
    'KXX' : 'K_xx', 
    'RH' : 'R_h', 
    'AR' : 'A_R', 
    'TWIN' : 'TWIN', 
    'kg': 'kg', 
    'CW' : 'C_W', 
    'beam' : 'beam', 
    'TF' : 'T_F', 
    'ship_speed' : 'V', 
    'TA' : 'T_A',
    'lpp' : 'L_pp',
}
data.rename(columns=renamers, inplace=True)

# Froude scale:
scale_factor=data['L_pp']
data['beam']/=scale_factor
data['T']/=scale_factor
data['BK_L']/=scale_factor
data['BK_B']/=scale_factor
data['OG']/=scale_factor
data['V']/=np.sqrt(scale_factor)

In [None]:
X = data[features+labels].copy()
y = X.pop(labels[0])

In [None]:
X2['phi_a'].unique()

In [None]:
polynomial_features = PolynomialFeatures(degree=1)
variance_treshold = VarianceThreshold()
linear_regression = LinearRegression()

select_k_best = SelectKBest(k=len(features), score_func=f_regression)
#polynomial_features = PolynomialFeatures(degree=2)
steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
]
model_correction = Pipeline(steps=steps)
model_correction.fit(X=X, y=y)   
cv=5
#cross_val_score(estimator=model,X=X,y=y,cv=cv).mean()
scores = cross_validation.cross_validates(model=model_correction, data=data, features=features, 
                                          itterations=20)

In [None]:
scores

In [None]:
np.mean(scores)

In [None]:
np.std(scores)

In [None]:
fig=cross_validation.plot_validate(model=model_correction, data=data, features=features)

In [None]:
polynom_correction = Polynom(model=model_correction, columns=X.columns, y_symbol=symbols.B_e_hat)
polynom_correction.fit(X=X, y=y)
polynom_correction.equation

In [None]:
fig = plt.figure()
gs = fig.add_gridspec(2,3)
axes=[
fig.add_subplot(gs[0, :]),
fig.add_subplot(gs[1,0]),
fig.add_subplot(gs[1,1]),
fig.add_subplot(gs[1,2])    
]

r2s = []
rms_corrected = []
phi_as = np.deg2rad(np.linspace(0,10,20))
for phi_a in phi_as:
    
    df=df_compare.copy()
    df['B_e'] = run(function=lambdas.B_e_lambda, inputs=df, phi_a=phi_a)
    df['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df, g=g, rho=rho)  
    df['phi_a']=phi_a
    df['B_e_hat_ikeda']=polynom_correction.predict(df[features])
    r2s.append(r2_score(y_true=df['B_e_hat'], y_pred=df['B_e_hat_ikeda']))
    rms_corrected.append(mean_squared_error(y_true=df['B_e_hat'], y_pred=df['B_e_hat_ikeda'], squared=False))
    
ax=axes[0]
#ax.plot(np.rad2deg(phi_as), r2s)
#ax.plot(np.rad2deg(phi_as), rms, label='SI')
ax.plot(np.rad2deg(phi_as), rms_corrected, '--', label='SI-corrected')
ax.legend()

ax.set_xlabel('$\phi_a$ [deg]')
#ax.set_ylabel('$R^2(\hat{B_e})$')
ax.set_ylabel('$RMSE(\hat{B_e})$')

ax.grid()

phi_as = np.deg2rad([0,5,10])

for phi_a,ax in zip(phi_as,axes[1:]):
    
    df=df_compare.copy()
    df['B_e'] = run(function=lambdas.B_e_lambda, inputs=df, phi_a=phi_a)
    df['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df, g=g, rho=rho)  
    df['phi_a']=phi_a
    df['B_e_hat_ikeda']=polynom_correction.predict(df[features])
    
    x_=df['B_e_hat']
    y_=df['B_e_hat_ikeda']
    ax.plot(x_, y_, '.', alpha=0.5)
    ax.set_title(label='$\phi_a$:%0.0f deg' % np.rad2deg(phi_a))

    ax.set_xlabel('$\hat{B_e}$ (model test)')
    
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    lim = np.max([xlim[1],ylim[1]])
    ax.set_xlim(0,lim)
    ax.set_ylim(0,lim)
    ax.plot([0,lim],[0,lim],'r-')
    ax.set_aspect('equal', 'box')
    ax.grid(True)
    ax.set_xlim(0,0.02)
    ax.set_ylim(0,0.02)


In [None]:
equation_simple = significant_numbers(polynom_correction.equation, precision=4)
equation_simple

In [None]:
polynom_simple = Polynom(model=model_correction, columns=X.columns, y_symbol=symbols.B_e_hat)
polynom_simple.fit(X=X, y=y)
polynom_simple.equation = equation_simple

In [None]:
polynom_simple.score(X=X, y=y)

In [None]:
name='polynom_correction'
equation = pylatex_extenders.hatify(polynom_simple.equation)
eq=pylatex_extenders.Equation(equation,label='eq:%s'%name)
file_path = os.path.join(rolldecay.equations_path,name)
eq.generate_tex(file_path)

In [None]:
equation

In [None]:
for symbol in equation.free_symbols:
    pass

In [None]:
symbol.name

In [None]:
fig = plt.figure()
gs = fig.add_gridspec(2,3)
axes=[
fig.add_subplot(gs[0, :]),
fig.add_subplot(gs[1,0]),
fig.add_subplot(gs[1,1]),
fig.add_subplot(gs[1,2])    
]

r2s = []
rms_corrected = []
phi_as = np.deg2rad(np.linspace(0,10,20))
for phi_a in phi_as:
    
    df=df_compare.copy()
    df['B_e'] = run(function=lambdas.B_e_lambda, inputs=df, phi_a=phi_a)
    df['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df, g=g, rho=rho)  
    df['phi_a']=phi_a
    df['B_e_hat_ikeda']=polynom_simple.predict(df[features])
    r2s.append(r2_score(y_true=df['B_e_hat'], y_pred=df['B_e_hat_ikeda']))
    rms_corrected.append(mean_squared_error(y_true=df['B_e_hat'], y_pred=df['B_e_hat_ikeda'], squared=False))

    
ax=axes[0]
#ax.plot(np.rad2deg(phi_as), r2s)
ax.plot(np.rad2deg(phi_as), rms_SI, label='SI')
ax.plot(np.rad2deg(phi_as), rms_corrected, '--', label='SI-corrected')
ax.legend()

ax.set_xlabel('$\phi_a$ [deg]')
ax.set_ylabel('$RMSE(\hat{B_e})$')
ax.grid()

phi_as = np.deg2rad([0,5,10])

for phi_a,ax in zip(phi_as,axes[1:]):
    
    df=df_compare.copy()
    df['B_e'] = run(function=lambdas.B_e_lambda, inputs=df, phi_a=phi_a)
    df['B_e_hat'] = run(function=lambdas.B_e_hat_lambda, inputs=df, g=g, rho=rho)  
    df['phi_a']=phi_a
    df['B_e_hat_ikeda']=polynom_simple.predict(df[features])
    
    x_=df['B_e_hat']
    y_=df['B_e_hat_ikeda']
    ax.plot(x_, y_, '.', alpha=0.5)
    ax.set_title(label='$\phi_a$:%0.0f deg' % np.rad2deg(phi_a))

    ax.set_xlabel('$\hat{B_e}$ (model test)')
    
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    lim = np.max([xlim[1],ylim[1]])
    ax.set_xlim(0,lim)
    ax.set_ylim(0,lim)
    ax.plot([0,lim],[0,lim],'r-')
    ax.set_aspect('equal', 'box')
    ax.grid(True)
    ax.set_xlim(0,0.02)
    ax.set_ylim(0,0.02)

axes[1].set_ylabel('$\hat{B_e}$ (Simplified Ikeda)')
plt.tight_layout()
save_fig(fig, name='ikeda_corrected_phi_a')

## Pure polynom ikeda parameters

In [None]:
ikeda_parameters = [
        'beam',
        'T',
        'BK_L',
        'BK_B',
        'OG',
        'omega0_hat',        
        'C_b',
        'A_0',
        'V',
        'phi_a',
#        'B_L_HAT'
]

In [None]:
variance_treshold = VarianceThreshold(0.000)
#standard_scaler = StandardScaler()

y = data['B_e_hat']
X = data[ikeda_parameters]
       
polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()

ks = np.arange(1,30,1)
scores = []
stds = []
for k in ks:
    select_k_best = SelectKBest(k=k, score_func=f_regression)
    steps=[
            ('polynomial_feature', polynomial_features),
            #('standard_scaler', standard_scaler),
            ('variance_treshold',variance_treshold),
            ('select_k_best',select_k_best),
            ('linear_regression', linear_regression)
    ]
    
    model = Pipeline(steps=steps)
    model.fit(X=X, y=y)
    cv=5
    score = cross_val_score(estimator=model,X=X,y=y,cv=cv).mean()
    std = cross_val_score(estimator=model,X=X,y=y,cv=cv).std()
    
    scores.append(score)
    stds.append(std)
    
scores = np.array(scores)
stds = np.array(stds)

In [None]:
fig,ax = plt.subplots()
ax.plot(ks,scores-stds,'.-')
ax.plot(ks,scores,'.-')
ax.plot(ks,scores+stds,'.-')

In [None]:
models_=data['model_number'].unique()
models_
np.random.shuffle(models_)

In [None]:
np.random.shuffle([[1,2]])

In [None]:
np.random.seed(seed=0)
scores = cross_validation.cross_validate(model=model, data=data, features=ikeda_parameters)

In [None]:
scores

In [None]:
np.random.seed(seed=0)
scores = cross_validation.cross_validates(model=model, data=data, features=ikeda_parameters, itterations=10)
scores

In [None]:
variance_treshold = VarianceThreshold(0.000)
np.random.seed(seed=0)
#standard_scaler = StandardScaler()

y = data['B_e_hat']
X = data[ikeda_parameters]
       
polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()

ks = np.arange(1,20,1)
scores = []
stds = []
for k in ks:
    select_k_best = SelectKBest(k=k, score_func=f_regression)
    steps=[
            ('polynomial_feature', polynomial_features),
            #('standard_scaler', standard_scaler),
            ('variance_treshold',variance_treshold),
            ('select_k_best',select_k_best),
            ('linear_regression', linear_regression)
    ]
    
    model = Pipeline(steps=steps)
    model.fit(X=X, y=y)
    cv=5
    scores_ = cross_validation.cross_validates(model=model, data=data, features=ikeda_parameters, 
                                               itterations=20) 
    score = np.mean(scores_.flatten())
    std = np.std(scores_.flatten())
    scores.append(score)
    stds.append(std)
    
scores = np.array(scores)
stds = np.array(stds)

In [None]:
fig,ax = plt.subplots()
#ax.set_xlim(0,22)
ax.plot(ks,scores-stds,'.-')
ax.plot(ks,scores,'.-')
ax.plot(ks,scores+stds,'.-')
ax.grid()

In [None]:
select_k_best = SelectKBest(k=12, score_func=f_regression)
steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
]

model_complex = Pipeline(steps=steps)
X=data[ikeda_parameters]
y=data['B_e_hat']
model_complex.fit(X=X, y=y)

In [None]:
scores = cross_validation.cross_validates(model=model_complex, data=data, features=ikeda_parameters, 
                                          itterations=20)
scores

In [None]:
np.mean(scores)

In [None]:
np.std(scores)

In [None]:
fig=cross_validation.plot_validate(model=model_complex, data=data, features=ikeda_parameters)

In [None]:
polynom_pure_complex = Polynom(model=model_complex, columns=X.columns, y_symbol=symbols.B_e_hat)
polynom_pure_complex.fit(X=X, y=y)
polynom_pure_complex.equation

In [None]:
equation_pure_complex_simple = significant_numbers(polynom_pure_complex.equation, precision=4)
equation_pure_complex_simple

In [None]:
polynom_pure_complex_simple = Polynom(model=model_complex, columns=X.columns, y_symbol=symbols.B_e_hat)
polynom_pure_complex_simple.fit(X=X, y=y)
polynom_pure_complex_simple.equation = equation_pure_complex_simple

In [None]:
polynom_pure_complex.score(X=X, y=y)

In [None]:
polynom_pure_complex_simple.score(X=X, y=y)

In [None]:
name='polynom_complex'
equation = pylatex_extenders.hatify(polynom_pure_complex_simple.equation)
eq=pylatex_extenders.Multiline(equation,label='eq:%s'%name)
file_path = os.path.join(rolldecay.equations_path,name)
eq.generate_tex(file_path)

In [None]:
select_k_best = SelectKBest(k=8, score_func=f_regression)
steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
]

model_simple = Pipeline(steps=steps)
X=data[ikeda_parameters]
y=data['B_e_hat']
model_simple.fit(X=X, y=y)

In [None]:
scores = cross_validation.cross_validates(model=model_simple, data=data, features=ikeda_parameters,
                                          itterations=40)
scores

In [None]:
np.mean(scores)

In [None]:
fig=cross_validation.plot_validate(model=model_simple, data=data, features=ikeda_parameters)

In [None]:
polynom_pure_simple = Polynom(model=model_simple, columns=X.columns, y_symbol=symbols.B_e_hat)
polynom_pure_simple.fit(X=X, y=y)
polynom_pure_simple.equation

## Cross validation

### Uncorrected

In [None]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=5)

In [None]:
scores_w=[]
for train_index, test_index in kf.split(data):

    #X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    X2_test = data.iloc[test_index]
    y_true=X2_test['B_e_hat']
    y_pred=X2_test['B_e_hat_ikeda']
    score = r2_score(y_true=y_true, y_pred=y_pred)
    scores_w.append(score)
    
np.mean(scores_w)  

In [None]:
r2_uncorrected = r2_score(y_true=data['B_e_hat'], y_pred=data['B_e_hat_ikeda'])
r2_uncorrected

## Correction

In [None]:
np.random.seed(seed=0)
scores_correction = cross_validation.cross_validates(model=model_correction, data=data, features=features, 
                                          itterations=20)

In [None]:
r2_correction = np.mean(scores_correction)
r2_correction

In [None]:
np.std(scores_correction)

## Polynom complex

In [None]:
np.random.seed(seed=0)
scores_polynom = cross_validation.cross_validates(model=model_complex, data=data, features=ikeda_parameters, 
                                          itterations=20)

In [None]:
r2_polynom = np.mean(scores_polynom)
r2_polynom

In [None]:
fig,ax=plt.subplots()
ax.plot(data[labels],data['B_e_hat_ikeda'], '.', label='Simplified Ikeda', alpha=0.8)
ax.plot(y,polynom_simple.predict(data), 'x', label='SI-corrected', alpha=0.7)
ax.plot(y,polynom_pure_complex.predict(data), '+', label='Polynomial', alpha=0.5)


#ax.plot(y,polynom_pure_simple.predict(X), '+', label='simple',alpha=0.5)


ax.set_xlabel('$\hat{B_e}$ (model test)')
ax.set_ylabel('$\hat{B_e}$ (Simplified Ikeda)')

xlim = ax.get_xlim()
ylim = ax.get_ylim()
lim = np.max([xlim[1],ylim[1]])
ax.set_xlim(0,lim)
ax.set_ylim(0,lim)
ax.plot([0,lim],[0,lim],'r-')
ax.set_aspect('equal', 'box')
ax.legend()
ax.grid(True)

In [None]:
mean_='$mean(R^2)$'
std_='$std(R^2)$'

df_cross_validation = pd.DataFrame(columns=[mean_, std_])
df_cross_validation.index.name='model'

s = pd.Series(name='Simplified Ikeda')
s[mean_] = r2_uncorrected
df_cross_validation=df_cross_validation.append(s)

s = pd.Series(name='Simplified Ikeda corrected')
s[mean_] = np.mean(scores_correction)
s[std_] = np.std(scores_correction)
df_cross_validation=df_cross_validation.append(s)

s = pd.Series(name='New regression')
s[mean_] = np.mean(scores_polynom)
s[std_] = np.std(scores_polynom)
df_cross_validation=df_cross_validation.append(s)



In [None]:
df_cross_validation

In [None]:
latex = df_cross_validation.to_latex(float_format='%0.2f', na_rep='')
name='cross_validation'
file_path = os.path.join(rolldecay.equations_path,name)

# (Uncomment this one if you want to regenerate this table)
#save_table(file_path=file_path, tabular_tex=latex, label='tab:crossvalidation', 
#           caption='Statistics from cross validations with all models')


In [None]:
df_rolldecay['date'].min()

In [None]:
df_rolldecay['date'].max()

In [None]:
polynom_pure_complex.save('polynom_complex')