# Regression simulation

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
from jupyterthemes import jtplot
jtplot.style(theme='onedork', context='notebook', ticks=True, grid=False)

In [None]:
import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)
import numpy as np
import os
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 15, 5

import data
import copy
from rolldecay.bis_system import BisSystem

from rolldecay import database
from mdldb.tables import Run, RolldecayQuadraticBRegression
from rolldecayestimators.direct_estimator_cubic import EstimatorQuadraticB
from rolldecayestimators.ikeda_estimator import IkedaQuadraticEstimator
from rolldecayestimators import symbols, equations
from rolldecayestimators.substitute_dynamic_symbols import lambdify
from rolldecayestimators.transformers import CutTransformer, LowpassFilterDerivatorTransformer, ScaleFactorTransformer, OffsetTransformer


import sympy as sp

from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_regression

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline

import signal_lab
from sqlalchemy.inspection import inspect
import seaborn as sns
import docs
import pickle
import gc
import dill
from rolldecayestimators.substitute_dynamic_symbols import lambdify,run
from rolldecayestimators.polynom_estimator import Polynom

## omega0

In [None]:
omega_hat_polynom = Polynom.load('omega0_hat_polynom.sym')

data = pd.read_pickle('data.sav')
data['omega_hat'] = omega_hat_polynom.predict(data)
y_s = pd.read_pickle('y.sav')

## $\hat{B_1}$

In [None]:
filename = 'B_1_hat_model.sav'
B_1_hat_model = pickle.load(open(filename, 'rb'))

In [None]:
y = y_s['B_1_hat']
#X=data[zeta_model.keys].copy()  # Handle categorical data later
#zeta_model.score(X=X,y=y)

In [None]:
B_1_hat_polynom = Polynom.load('B_1_hat_polynom.sym')
B_1_hat_polynom.score(X=data,y=y)

## $\hat{B_2}$

In [None]:
filename = 'B_2_hat_model.sav'
B_2_hat_model = pickle.load(open(filename, 'rb'))

In [None]:
B_2_hat_polynom = Polynom.load('B_2_hat_polynom.sym')
B_2_hat_polynom.score(X=data,y=y)

## Predict parameters

In [None]:
predict = data.copy()
#predict['omega_hat'] = omega0_hat_model.predict(data[omega0_hat_model.keys])
predict['omega_hat'] = omega_hat_polynom.predict(data)

#predict['zeta'] = zeta_model.predict(data[zeta_model.keys])
predict['B_1_hat'] = B_1_hat_polynom.predict(data)

#predict['d'] = d_model.predict(data[d_model.keys])
predict['B_2_hat'] = B_2_hat_polynom.predict(data)


## Simulate

In [None]:
df_rolldecay = database.load(rolldecay_table_name='rolldecay_quadratic_b', limit_score=0.90)
db = database.get_db()

In [None]:
df_rolldecay_ikeda = database.load(rolldecay_table_name='rolldecay_simplified_ikeda', limit_score=0.0)

In [None]:
predict.head()

In [None]:
meta_data = predict.iloc[0]

In [None]:
meta_data_full_scale = df_rolldecay.loc[meta_data.name]

In [None]:
db_run = db.session.query(Run).get(int(meta_data.name))
df = database.load_run(db_run)

In [None]:
df.plot(y='phi')

In [None]:
lowpass_filter = LowpassFilterDerivatorTransformer(cutoff=2, minimum_score=0.99)
#scaler = ScaleFactorTransformer(scale_factor=db_run.model.scale_factor)  # dummy value None for now
cutter = CutTransformer(phi_max=np.deg2rad(9), phi_min=np.deg2rad(0.25), phi1d_start_tolerance=0.015)
offset_transformer = OffsetTransformer()

steps = [
    ('filter',lowpass_filter),
#    ('scaler',scaler),  # Is froude scaling a good idea??
    ('cutter', cutter), 
    ('offset_transformer',offset_transformer)
]
        
preprocessor = Pipeline(steps) # define the pipeline object.
preprocessor.fit(df)
X = preprocessor.transform(df)

In [None]:
X.plot(y='phi')

In [None]:
omega_hat_equation = equations.omega_hat_equation.subs(symbols.omega,symbols.omega0)
omega_hat_equation

In [None]:
omega0_equation = sp.Eq(symbols.omega0,sp.solve(omega_hat_equation,symbols.omega0)[0])
omega0_equation

In [None]:
meta_data['omega_hat']

In [None]:
omega0_lambda = lambdify(sp.solve(omega0_equation,symbols.omega0)[0])

In [None]:
C_1A = equations.new_symbols[symbols.C_1]
C_1A_equation = sp.Eq(C_1A,sp.solve(EstimatorQuadraticB.omgea0_equation,C_1A)[0])
C_1A_equation

In [None]:
C_1A_lambda = lambdify(sp.solve(C_1A_equation,C_1A)[0])

In [None]:
B_1_equation= sp.Eq(symbols.B_1,sp.solve(equations.B_1_hat_equation,symbols.B_1)[0])
B_1_equation

In [None]:
B_2_equation= sp.Eq(symbols.B_2,sp.solve(equations.B_2_hat_equation,symbols.B_2)[0])
B_2_equation

In [None]:
B_1_lambda=lambdify(sp.solve(B_1_equation,symbols.B_1)[0])
B_2_lambda=lambdify(sp.solve(B_2_equation,symbols.B_2)[0])

In [None]:
equations.normalize_equations[symbols.B_1]

In [None]:
B_1A = equations.new_symbols[symbols.B_1]
B_1A_lambda = lambdify(sp.solve(equations.normalize_equations[symbols.B_1],B_1A)[0])

B_2A = equations.new_symbols[symbols.B_2]
B_2A_lambda = lambdify(sp.solve(equations.normalize_equations[symbols.B_2],B_2A)[0])

In [None]:
run_data = meta_data.copy()
run_data['g']=9.81
run_data['rho']=1000
run_data['m']=run_data['Disp']*run_data['rho']
run_data['omega0']=run(function=omega0_lambda, inputs=run_data)
run_data['C_1A']=run(function=C_1A_lambda, inputs=run_data)
run_data['A_44']=run(function=EstimatorQuadraticB.functions['A44'], inputs=run_data)
run_data['B_1']=run(function=B_1_lambda, inputs=run_data)
run_data['B_2']=run(function=B_2_lambda, inputs=run_data)
run_data['B_1A']=run(function=B_1A_lambda, inputs=run_data)
run_data['B_2A']=run(function=B_2A_lambda, inputs=run_data)


In [None]:
run_data['Disp']

In [None]:
direct_estimator = EstimatorQuadraticB.load(data=run_data, X=X)

In [None]:
direct_estimator.plot_fit()

In [None]:
run_data['omega0']

In [None]:
direct_estimator.omega0

In [None]:
direct_estimator.plot_error()

In [None]:
direct_estimator.score()

### Cutting

In [None]:
direct_estimator = EstimatorQuadraticB.load(data=run_data, X=X.loc[0:400])

In [None]:
direct_estimator.plot_fit()

In [None]:
direct_estimator.plot_error()

In [None]:
direct_estimator.score()

### Using "real" omega0

In [None]:
run_data['omega0']=meta_data_full_scale.omega0
run_data['C_1A']=run(function=C_1A_lambda, inputs=run_data)
run_data['A_44']=run(function=EstimatorQuadraticB.functions['A44'], inputs=run_data)
run_data['B_1']=run(function=B_1_lambda, inputs=run_data)
run_data['B_2']=run(function=B_2_lambda, inputs=run_data)
run_data['B_1A']=run(function=B_1A_lambda, inputs=run_data)
run_data['B_2A']=run(function=B_2A_lambda, inputs=run_data)

In [None]:
direct_estimator = EstimatorQuadraticB.load(data=run_data, X=X)

In [None]:
fig,ax=plt.subplots()
direct_estimator.plot_fit(ax=ax)

meta_data_ikeda = df_rolldecay_ikeda.loc[meta_data.name]
meta_data_ikeda['omega0']=meta_data_full_scale.omega0
ikeda_estimator = IkedaQuadraticEstimator.load(data=meta_data_ikeda, X=X)
ikeda_estimator.plot_fit(ax=ax, model_test=False)

In [None]:
direct_estimator.score()

In [None]:
ikeda_estimator.score()

## Do it for many...

In [None]:
def evaluate(meta_data):
    
    if not meta_data.name in df_rolldecay.index:
        return
    
    meta_data_full_scale = df_rolldecay.loc[meta_data.name]
    db_run = db.session.query(Run).get(int(meta_data.name))
    df = database.load_run(db_run)
    preprocessor.fit(df)
    X = preprocessor.transform(df)
    
    direct_estimator = EstimatorQuadraticB.load(data=meta_data, X=X)
    meta_data['Volume']=meta_data['Disp']
    s=direct_estimator.result_for_database(meta_data=meta_data)
    rolldecay_db=RolldecayQuadraticBRegression(run_id=db_run.id,**s)
    db.session.merge(rolldecay_db)
    db.session.commit()
    
    return direct_estimator.score(X=X)
    

In [None]:
predict['omega0']=df_rolldecay.omega0
predict['g']=9.81
predict['rho']=1000
predict['m']=predict['Disp']*predict['rho']
predict['C_1A']=run(function=C_1A_lambda, inputs=predict)
predict['A_44']=run(function=EstimatorQuadraticB.functions['A44'], inputs=predict)
predict['B_1']=run(function=B_1_lambda, inputs=predict)
predict['B_2']=run(function=B_2_lambda, inputs=predict)
predict['B_1A']=run(function=B_1A_lambda, inputs=predict)
predict['B_2A']=run(function=B_2A_lambda, inputs=predict)

In [None]:
predict['score'] = predict.apply(func=evaluate, axis=1)

In [None]:
predict['score'].hist(bins=40)

In [None]:
df_rolldecay['score'].hist(bins=40)

## Faster way to calculate score

In [None]:
predict.head()

In [None]:
B_e_equation =sp.Eq(symbols.B_e,sp.solve(equations.B_e_equation, symbols.B_e)[0])
B_e_equation

In [None]:
B_ep, B_1p, B_2p = sp.symbols('B_ep B_1p B_2p')

B_ep_equation = B_e_equation.subs([
    (symbols.B_e,B_ep),
    (symbols.B_1, B_1p),
    (symbols.B_2, B_2p),
])
B_ep_equation

In [None]:
Be_lambda = lambdify(sp.solve(B_e_equation,symbols.B_e)[0])

In [None]:
Be_lambda

In [None]:
predict['phi_a'] = np.rad2deg(df_rolldecay['phi_start'].abs())
df_rolldecay['phi_a'] = np.rad2deg(df_rolldecay['phi_start'].abs())

In [None]:
predict['B_e']=run(function=Be_lambda, inputs=predict)

In [None]:
df_rolldecay['B_e']=run(function=Be_lambda, inputs=df_rolldecay)

In [None]:
epsilon,SSR,SST,R = sp.symbols('epsilon SSR SST R')
phi_min, phi_max, = sp.symbols('phi_min phi_max')
residual_equation = sp.Eq(epsilon, symbols.B_e-B_ep)
residual_equation

In [None]:
SSR_equation = sp.Eq(SSR,sp.Integral(epsilon**2,(symbols.phi_a,phi_min,phi_max)))
SSR_equation

In [None]:
SSR_equation = SSR_equation.subs(epsilon, sp.solve(residual_equation, epsilon)[0]).subs(
    symbols.B_e,sp.solve(B_e_equation, symbols.B_e)[0]).subs(
    B_ep,sp.solve(B_ep_equation, B_ep)[0])
SSR_equation

In [None]:
SSR_equation2 = SSR_equation.doit()
SSR_equation2

In [None]:
B_mean = sp.symbols('B_mean')

B_mean_equation=sp.Eq(B_mean,
      sp.Integral(sp.solve(B_e_equation, symbols.B_e)[0]**2,
                  (symbols.phi_a,phi_min,phi_max)
                 )/(phi_max-phi_min)
     )
B_mean_equation

In [None]:
B_mean_equation2 = B_mean_equation.doit()
B_mean_equation2

In [None]:
SST_equation = sp.Eq(SST,sp.Integral((symbols.B_e-B_mean)**2,(symbols.phi_a,phi_min,phi_max)))
SST_equation

In [None]:
SST_equation2 = SST_equation.subs([
                                (B_mean, sp.solve(B_mean_equation2,B_mean)[0]),
                                (symbols.B_e, sp.solve(B_e_equation,symbols.B_e)[0])
                                  ]
                                  ).doit()
SST_equation2

In [None]:
r2_equation = sp.Eq(R**2, 1-SSR/SST)
r2_equation

In [None]:
r2_equation2 = r2_equation.subs(
    SSR,sp.solve(SSR_equation2,SSR)[0]).subs(
    SST, sp.solve(SST_equation2,SST)[0])

r2_equation2

In [None]:
r2_lambda = lambdify(r2_equation2.rhs)

In [None]:
r2_lambda

In [None]:
phi_min, phi_max, Be_tot = sp.symbols('phi_min phi_max B_e_tot')
Be_integral = sp.Eq(Be_tot,sp.Integral(B_e_equation.rhs,(symbols.phi_a,phi_min,phi_max)))
Be_integral

In [None]:
Be_tot_equation = sp.Eq(Be_tot,Be_integral.rhs.simplify())
Be_tot_equation

In [None]:
Be_tot_lambda = lambdify(sp.solve(Be_tot_equation, Be_tot)[0])

In [None]:
predict['phi_min'] = 0
predict['phi_max'] = predict['phi_a']
predict['Be_tot'] = run(function=Be_tot_lambda, inputs=predict)

df_rolldecay['phi_min'] = 0
df_rolldecay['phi_max'] = df_rolldecay['phi_a']
df_rolldecay['Be_tot'] = run(function=Be_tot_lambda, inputs=df_rolldecay)

In [None]:
mask = predict['score'] > 0.5
predict_good = predict.loc[mask].copy()
df_compare = pd.merge(left=predict_good, right=df_rolldecay, how='left', left_index=True, right_index=True,
                      suffixes=('_regression','_model_test'))

Be_tot_true = df_compare['Be_tot_model_test']
Be_tot_predicted = df_compare['Be_tot_regression']

df_compare['B_1'] = df_compare['B_1_model_test']
df_compare['B_2'] = df_compare['B_2_model_test']
df_compare['B_1p'] = df_compare['B_1_regression']
df_compare['B_2p'] = df_compare['B_2_regression']
df_compare['omega0'] = df_compare['omega0_model_test']
df_compare['phi_max'] = df_compare['phi_max_model_test']
df_compare['phi_min'] = df_compare['phi_min_model_test']

df_compare['score_simple'] = run(function=r2_lambda, inputs=df_compare)
mask=df_compare['score_simple']>0.7
df_compare=df_compare.loc[mask].copy()

fig,ax=plt.subplots()
df_compare.plot(x='B_e_model_test', y='B_e_regression', ax=ax, style='.')

fig,ax=plt.subplots()
df_compare.plot(x='score_regression', y='score_simple', ax=ax, style='.')

In [None]:
fig,ax=plt.subplots()
df_compare.plot(x='Be_tot_model_test', y='Be_tot_regression', ax=ax, style='.')

fig,ax=plt.subplots()
df_compare.plot(x='Be_tot_regression', y='score_regression', ax=ax, style='.')

In [None]:
def evaluate_plot(meta_data):
    
    if not meta_data.name in df_rolldecay.index:
        return
    
    meta_data_full_scale = df_rolldecay.loc[meta_data.name]
    db_run = db.session.query(Run).get(int(meta_data.name))
    df = database.load_run(db_run)
   
    preprocessor.fit(df)
    X = preprocessor.transform(df)
    
    direct_estimator = EstimatorQuadraticB.load(data=meta_data, X=X)
    fig,ax=plt.subplots()
    direct_estimator.plot_fit(ax=ax)
    ax.set_title('Score:%0.2f' % direct_estimator.score(X=X))
    
    if meta_data.name in df_rolldecay_ikeda.index:
        meta_data_ikeda = df_rolldecay_ikeda.loc[meta_data.name]
        meta_data_ikeda['omega0']=meta_data_full_scale.omega0
        ikeda_estimator = IkedaQuadraticEstimator.load(data=meta_data_ikeda, X=X)
        ikeda_estimator.plot_fit(ax=ax, model_test=False)
    

In [None]:
for run_id,meta_data in predict.iloc[0:50].iterrows():
    evaluate_plot(meta_data)
    gc.collect()
    