# MDL DB roll damping classification
This notebook is comparing a cubic, a quadratic and a linear rolldecay model on the whole database.

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
from jupyterthemes import jtplot
jtplot.style(theme='onedork', context='notebook', ticks=True, grid=False)

In [None]:
import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)
import numpy as np
import os
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 15, 5

import rolldecay.database as database
from mdldb.tables import Run
from rolldecayestimators.transformers import CutTransformer, LowpassFilterDerivatorTransformer, ScaleFactorTransformer, OffsetTransformer
from rolldecayestimators.direct_estimator_cubic import EstimatorCubic, EstimatorQuadraticB, EstimatorLinear

import data
import copy


from sklearn.pipeline import Pipeline
import seaborn as sns

from sympy.physics.vector.printing import vpprint, vlatex
from IPython.display import display, Math, Latex

## Cubic model
Has cubic damping and stiffness:

In [None]:
Math(vlatex(EstimatorCubic.roll_decay_equation))

## Quadratic model
Has quadratic damping and stiffness:

In [None]:
Math(vlatex(EstimatorQuadraticB.roll_decay_equation))

## Linear model
Has linear damping and stiffness:

In [None]:
Math(vlatex(EstimatorLinear.roll_decay_equation))

In [None]:
limit_score = 0.8
df_cubic = database.load(rolldecay_table_name='rolldecay_cubic_b', limit_score=limit_score,
                         exclude_table_name='rolldecay_exclude')
df_quadratic = database.load(rolldecay_table_name='rolldecay_quadratic_b', limit_score=limit_score,
                         exclude_table_name='rolldecay_exclude')
df_linear = database.load(rolldecay_table_name='rolldecay_linear_b', limit_score=limit_score,
                         exclude_table_name='rolldecay_exclude')

In [None]:
df_cubic.describe()

In [None]:
df_quadratic.describe()

In [None]:
df_linear.describe()

## Comparison of the r2 score with the three models

In [None]:
N=60

df_cubic['estimator'] = 'cubic'
df_quadratic['estimator'] = 'quadratic'
df_linear['estimator'] = 'linear'

df_comparison = df_cubic.copy()
df_comparison = df_comparison.append(df_quadratic, ignore_index=True, sort=False)
df_comparison = df_comparison.append(df_linear, ignore_index=True, sort=False)

bins = np.linspace(df_comparison['score'].min(), df_comparison['score'].max(),N)

groups=df_comparison.groupby(by='estimator', sort=False)
fig,axes=plt.subplots(nrows=len(groups))
for ax,(estimator, group) in zip(axes,groups):
    label = '%s' % estimator
    group['score'].hist(bins=bins, ax=ax, label = label)
    ax.set_title(label)
ax.set_xlabel('score')
    
ymaxs = []
for ax in axes:
    ylims = ax.get_ylim()
    ymaxs.append(ylims[1])
for ax in axes:
    ax.set_ylim(0,np.max(ymaxs))
    
fig.tight_layout()
    

In [None]:
fig,ax=plt.subplots()
df_comparison2 = pd.merge(left=df_cubic, right=df_quadratic, how='inner', left_index=True, 
                          right_index=True, suffixes=('','_quadratic'))
df_comparison2 = pd.merge(left=df_comparison2, right=df_linear, how='inner', left_index=True, 
                          right_index=True, suffixes=('','_linear'))

df_comparison2['score_cubic-score_quadratic']=df_comparison2['score']-df_comparison2['score_quadratic']
df_comparison2['score_cubic-score_linear']=df_comparison2['score']-df_comparison2['score_linear']
df_comparison2['score_quadratic-score_linear']=df_comparison2['score_quadratic']-df_comparison2['score_linear']

df_comparison2.plot(y='score_cubic-score_quadratic', style='o', ax=ax, alpha=0.50)
df_comparison2.plot(y='score_cubic-score_linear', style='*', ax=ax, alpha=0.50)

ax.set_title('Score comparison')
ax.grid(True)
ax.legend()

... it looks like cubic model will not so much more accurate than the quadratic 

## Closer look on the cubic coefficients

In [None]:
fig,axes=plt.subplots(ncols=3)
for i in range(0,3):
    key = 'B_%iA' % (i+1)
    ax=axes[i]
    df_cubic[key].hist(bins=40, ax=ax)
    ax.set_title(key)
    
fig,axes=plt.subplots(ncols=3)
for i in range(0,3):
    key = 'C_%iA' % (1+(i*2))
    ax=axes[i]
    df_cubic[key].hist(bins=40, ax=ax)
    ax.set_title(key)
    

## Compare signals
select the run where cubic and quadratic model differ the most

In [None]:
run_id = df_comparison2['score_cubic-score_quadratic'].abs().idxmax()

In [None]:
df_comparison2.loc[run_id]['score_cubic-score_quadratic']

In [None]:
db = database.get_db()

In [None]:
db_run = db.session.query(Run).get(int(run_id))
df = database.load_run(db_run, save_as_example=True)

In [None]:
df.plot(y='phi')

In [None]:
lowpass_filter = LowpassFilterDerivatorTransformer(cutoff=2, minimum_score=0.99)
#scaler = ScaleFactorTransformer(scale_factor=db_run.model.scale_factor)  # dummy value None for now
cutter = CutTransformer(phi_max=np.deg2rad(9), phi_min=np.deg2rad(0.25))
offset_transformer = OffsetTransformer()

steps = [
    ('filter',lowpass_filter),
#    ('scaler',scaler),  # Is froude scaling a good idea??
    ('cutter', cutter), 
    ('offset_transformer',offset_transformer)
]
        
preprocessor = Pipeline(steps) # define the pipeline object.
preprocessor.fit(df)
X = preprocessor.transform(df)

In [None]:
X.plot(y='phi')

In [None]:
meta_data = df_cubic.loc[run_id]
cubic_estimator = EstimatorCubic.load(data=meta_data, X=X)
cubic_estimator.plot_fit()

In [None]:
meta_data = df_quadratic.loc[run_id]
quadratic_estimator = EstimatorQuadratic.load(data=meta_data, X=X)
quadratic_estimator.plot_fit()


In [None]:
meta_data = df_linear.loc[run_id]
linear_estimator = EstimatorLinear.load(data=meta_data, X=X)
linear_estimator.plot_fit()

In [None]:
cubic_estimator.plot_fft()

In [None]:
cubic_estimator.omega0

In [None]:
cubic_estimator.calculate_amplitudes_and_damping()
cubic_estimator.X_amplitudes.plot(x='phi',y='omega0')

In [None]:
cubic_estimator.plot_peaks()

select the run where quadartic and linear model differ the most

In [None]:
run_id = df_comparison2['score_quadratic-score_linear'].abs().idxmax()
db_run = db.session.query(Run).get(int(run_id))
df = database.load_run(db_run, save_as_example=True)

#preprocessor['scaler'].scale_factor=db_run.model.scale_factor
X = preprocessor.transform(df)

meta_data = df_cubic.loc[run_id]
cubic_estimator = EstimatorCubic.load(data=meta_data, X=X)
cubic_estimator.plot_fit()

meta_data = df_quadratic.loc[run_id]
quadratic_estimator = EstimatorQuadraticB.load(data=meta_data, X=X)
quadratic_estimator.plot_fit()

meta_data = df_linear.loc[run_id]
linear_estimator = EstimatorLinear.load(data=meta_data, X=X)
linear_estimator.plot_fit()

In [None]:
meta_data = df_linear.loc[run_id]
meta_data['score']

In [None]:
linear_estimator.calculate_amplitudes_and_damping()
linear_estimator.X_amplitudes.plot(x='phi',y='omega0')