## This a preview of how the "new regression" was developed.

In [None]:
%matplotlib notebook
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)
import numpy as np
import os
import matplotlib.pyplot as plt

# Own stuff:
from rolldecayestimators.polynom_estimator import Polynom
from rolldecayestimators import symbols
from rolldecayestimators import cross_validation
from rolldecayestimators.substitute_dynamic_symbols import run, lambdify, significant_numbers


# Scikit-learn:
from sklearn.feature_selection import VarianceThreshold
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.model_selection import cross_val_score

## Pure polynom ikeda parameters

In [None]:
ikeda_parameters = [
        'beam',
        'T',
        'BK_L',
        'BK_B',
        'OG',
        'omega0_hat',        
        'C_b',
        'A_0',
        'V',
        'phi_a',
#        'B_L_HAT'
]

In [None]:
data = pd.read_csv('data_new_method.csv', sep=';', index_col=0)
data.head()

In [None]:
variance_treshold = VarianceThreshold(0.000)
np.random.seed(seed=0)
#standard_scaler = StandardScaler()

y = data['B_e_hat']
X = data[ikeda_parameters]
       
polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()

ks = np.arange(1,20,1)
scores = []
stds = []
for k in ks:
    select_k_best = SelectKBest(k=k, score_func=f_regression)
    steps=[
            ('polynomial_feature', polynomial_features),
            #('standard_scaler', standard_scaler),
            ('variance_treshold',variance_treshold),
            ('select_k_best',select_k_best),
            ('linear_regression', linear_regression)
    ]
    
    model = Pipeline(steps=steps)
    model.fit(X=X, y=y)
    cv=5
    scores_ = cross_validation.cross_validates(model=model, data=data, features=ikeda_parameters, 
                                               itterations=20) 
    score = np.mean(scores_.flatten())
    std = np.std(scores_.flatten())
    scores.append(score)
    stds.append(std)
    
scores = np.array(scores)
stds = np.array(stds)

In [None]:
fig,ax = plt.subplots()
#ax.set_xlim(0,22)
ax.plot(ks,scores-stds,'.-')
ax.plot(ks,scores,'.-')
ax.plot(ks,scores+stds,'.-')
ax.grid()

In [None]:
select_k_best = SelectKBest(k=12, score_func=f_regression)
steps=[
        ('polynomial_feature', polynomial_features),
        #('standard_scaler', standard_scaler),
        ('variance_treshold',variance_treshold),
        ('select_k_best',select_k_best),
        ('linear_regression', linear_regression)
]

model_complex = Pipeline(steps=steps)
X=data[ikeda_parameters]
y=data['B_e_hat']
model_complex.fit(X=X, y=y)

In [None]:
np.random.seed(seed=0)
fig=cross_validation.plot_validate(model=model_complex, data=data, features=ikeda_parameters)

## Showing the model as polynomial expression:

In [None]:
polynom_pure_complex = Polynom(model=model_complex, columns=X.columns, y_symbol=symbols.B_e_hat)
polynom_pure_complex.fit(X=X, y=y)
polynom_pure_complex.equation

#### Simplify the expression for paper:

In [None]:
equation_pure_complex_simple = significant_numbers(polynom_pure_complex.equation, precision=4)
equation_pure_complex_simple

## Cross validation
The cross validation is conducted with 5-fold cross validation that is repeated 20 times, with random selections of folds. The folds are constructed in such a way that all of the data related to a ship model is all in the same fold.

### Polynom complex

In [None]:
np.random.seed(seed=0)
scores_polynom = cross_validation.cross_validates(model=model_complex, data=data, features=ikeda_parameters, 
                                          itterations=20)

In [None]:
r2_polynom = np.mean(scores_polynom)
r2_polynom

In [None]:
mean_='$mean(R^2)$'
std_='$std(R^2)$'

df_cross_validation = pd.DataFrame(columns=[mean_, std_])
df_cross_validation.index.name='model'

s = pd.Series(name='New regression')
s[mean_] = np.mean(scores_polynom)
s[std_] = np.std(scores_polynom)
df_cross_validation=df_cross_validation.append(s)

In [None]:
df_cross_validation