# MDL DB outliers
Check runs wit low score...

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
pd.set_option("display.max_columns", None)
import numpy as np
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['lines.linewidth'] = 1.5

import os
import copy

import data
from mdldb.mdl_db import MDLDataBase
from rolldecay import database
from mdldb.tables import Run
from rolldecayestimators.transformers import CutTransformer, LowpassFilterDerivatorTransformer, ScaleFactorTransformer, OffsetTransformer
from rolldecayestimators.analytical_linear_estimator import AnalyticalLinearEstimator
from rolldecayestimators.direct_linear_estimator import DirectLinearEstimator
from rolldecayestimators.direct_estimator_cubic import DirectEstimatorCubic
from rolldecayestimators.direct_estimator import DirectEstimator
from rolldecayestimators.norwegian_estimator import NorwegianEstimator



from mdldb import mdl_to_evaluation
from evaluation.run_dynamic import RunDynamic
from evaluation.run_manoeuvring import RunZigZag
from sklearn.pipeline import Pipeline
import signal_lab

In [None]:
df_rolldecay = database.load(rolldecay_table_name='rolldecay_direct',only_latest_runs=True, limit_score=0.0)
db = database.get_db()

In [None]:
df_rolldecay.head()

In [None]:
df_rolldecay['score'].hist(bins=30)

In [None]:
sql = """
SELECT * from
std
INNER JOIN run
ON std.run_id == run.id
    INNER JOIN projects
    ON run.project_number==projects.project_number
        INNER JOIN loading_conditions
        ON (run.loading_condition_id == loading_conditions.id)
            INNER JOIN models
            ON run.model_number == models.model_number
                INNER JOIN ships
                ON models.ship_name == ships.name
        
"""
df_std = pd.read_sql_query(sql=sql, con=db.engine,index_col='run_id')
df_std=pd.merge(left=df_rolldecay, right=df_std, how='left', left_index=True, right_index=True, suffixes=('','_std') )


In [None]:
df_std.plot(x='score',y='psi', style='.', alpha=0.5)

In [None]:
mask = df_rolldecay['score'] < 0.90

In [None]:
df_rolldecay=df_rolldecay.loc[mask].copy()

In [None]:
df_rolldecay.sort_values(by='score', inplace=True)

In [None]:
df_rolldecay.describe()

In [None]:
df_rolldecay.head()

In [None]:
row = df_rolldecay.iloc[2]
run_id = int(row.name)
db_run = db.session.query(Run).get(run_id)
assert not (db_run is None)

In [None]:
run_id

In [None]:
ascii_file = db_run.load()
df_raw = ascii_file.channels

df = signal_lab.mdl_to_evaluation.do_transforms(df=df_raw)
df.rename(columns={'MA/Roll':'phi'}, inplace=True)

In [None]:
row['score']

In [None]:
fig,ax=plt.subplots()
df.plot(y='phi',ax=ax)
ax.grid(True)

In [None]:
df.plot(y='Carriage/Psip')

In [None]:
lowpass_filter = LowpassFilterDerivatorTransformer(cutoff=1, minimum_score=0)
scaler = ScaleFactorTransformer(scale_factor=db_run.model.scale_factor)  # dummy value None for now
cutter = CutTransformer(phi_max=np.deg2rad(9), phi_min=np.deg2rad(1))
offset_transformer = OffsetTransformer()

steps = [('filter',lowpass_filter),
         ('offset',offset_transformer),
         ('scaler',scaler),
         ('cutter', cutter),
        ]

preprocess = Pipeline(steps)
X = preprocess.fit_transform(df)

In [None]:
fig,ax=plt.subplots()
X.plot(y='phi', ax=ax)
ax.grid(True)

In [None]:
fig,ax=plt.subplots()
X.plot(y='phi', ax=ax)
ax.grid(True)
ax.set_xlim(0,200)

In [None]:
X.plot(y='phi1d')

In [None]:
estimators = []
#estimators.append(DirectLinearEstimator(omega_regression=True))
#estimators.append(AnalyticalLinearEstimator(omega_regression=True))
estimators.append(DirectEstimator(omega_regression=True, fit_method='derivation'))
#estimators.append(NorwegianEstimator())
#estimators.append(DirectEstimatorCubic(omega_regression=True))

#estimators.append(DirectLinearEstimator(omega_regression=False))
#estimators.append(AnalyticalLinearEstimator(omega_regression=False))
estimators.append(DirectEstimator(omega_regression=False, fit_method='derivation'))
#estimators.append(NorwegianEstimator())
#estimators.append(DirectEstimatorCubic(omega_regression=False))

#estimators.append(DirectLinearEstimator(omega_regression=False))
#estimators.append(AnalyticalLinearEstimator(omega_regression=False))
estimators.append(DirectEstimator(omega_regression=True, fit_method='integration'))
#estimators.append(NorwegianEstimator())
#estimators.append(DirectEstimatorCubic(omega_regression=False))

#estimators.append(DirectLinearEstimator(omega_regression=False))
#estimators.append(AnalyticalLinearEstimator(omega_regression=False))
estimators.append(DirectEstimator(omega_regression=False, fit_method='integration'))
#estimators.append(NorwegianEstimator())
#estimators.append(DirectEstimatorCubic(omega_regression=False))


for estimator in estimators:

    estimator.fit(X)
    
    fig,ax=plt.subplots()
    fig.set_size_inches(14,10)
    estimator.plot_fit(ax=ax)
    ax.grid(True)
    score = estimator.score()
    
    title = ''
    if estimator.omega_regression:
        title+='Omega regression '
    else:
        title+='Omega fft '
    
    title+='%s ' % estimator.fit_method
    
    
    title+='Score:%0.2f' % score 
    ax.set_title(title)