# Roll damping data exploration

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 100
pd.set_option("display.max_columns", None)
import numpy as np
import os
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 15, 5

import data
import copy
from mdldb.mdl_db import MDLDataBase
from mdldb.tables import Base, Model, LoadingCondition, Run, RolldecayLinear, RolldecayDirect, RolldecayNorwegian
from mdldb.tables import Min, Mean, Max, Std, Ship
from mdldb import mdl_to_evaluation
from evaluation.run_dynamic import RunDynamic
from evaluation.run_manoeuvring import RunZigZag
from rolldecay.bis_system import BisSystem

from rolldecayestimators.direct_estimator import DirectEstimator
from rolldecayestimators.direct_linear_estimator import DirectLinearEstimator
from rolldecayestimators.norwegian_estimator import NorwegianEstimator
from rolldecayestimators.transformers import CutTransformer, LowpassFilterDerivatorTransformer, ScaleFactorTransformer, OffsetTransformer
#from rolldecay.equations_lambdify import calculate_acceleration, calculate_velocity
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_regression

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline

import signal_lab
from sqlalchemy.inspection import inspect
import seaborn as sns
import docs

In [None]:
from sqlalchemy import create_engine
engine = create_engine('sqlite:///' + data.mdl_db_path)
db = MDLDataBase(engine=engine)

In [None]:
sql="""
SELECT * from
rolldecay_direct_improved
INNER JOIN run
ON rolldecay_direct_improved.run_id == run.id
    INNER JOIN projects
    ON (run.project_number == projects.project_number)
        INNER JOIN loading_conditions
        ON (run.loading_condition_id == loading_conditions.id)
            INNER JOIN models
            ON run.model_number == models.model_number
                INNER JOIN ships
                ON models.ship_name == ships.name

"""
df_rolldecay = pd.read_sql(sql, con=engine, index_col='run_id',)
df_rolldecay = df_rolldecay.loc[:,~df_rolldecay.columns.duplicated()]
#df_rolldecay = remove_outliers(df_rolldecay)
df_rolldecay.describe()

In [None]:
df_rolldecay.columns

In [None]:
description = pd.read_sql_table('description', con=db.engine, index_col='id')

In [None]:
df = df_rolldecay.dropna(subset=['omega0'])

In [None]:
len(df)

In [None]:
mask = df['score'] > 0.95
df = df.loc[mask]

In [None]:
#df.sort_values(by='ship_speed', inplace=True)
#for model_number, model_group in df.groupby(by='model_number'):
#    
#    fig,ax=plt.subplots()
#    fig.set_dpi(50)
#    fig.set_size_inches(10,5)
#    model_group.plot(x='ship_speed', y='mean_damping', style='o-', ax=ax)
#    ax.set_title(model_number)
    

In [None]:
mask=df[['Volume']].notnull().all(axis=1)
df2 = df.loc[mask].copy()

In [None]:
df2['BKL'].fillna(0, inplace=True)
df2['BKB'].fillna(0, inplace=True)

In [None]:
df2.sort_values(by='ship_speed', inplace=True)
by = ['BKL','BKB']
y = 'mean_damping'
for model_number, model_group in df2.groupby(by=['model_number']):
    
    if len(model_group)==1:
        continue  # Don't do a plot with only one point
    
    bk_groups = model_group.groupby(by=by)
    
    fig,axes=plt.subplots(ncols=len(bk_groups))
    fig.set_dpi(100)
    fig.set_size_inches(15,5)
    
    if len(bk_groups)==1:
        axes=[axes]
    
    for ax,(index, bk_group) in zip(axes,bk_groups):
    
        title = '%s' % model_number
        for key,value in zip(by,index):
            title+=' %s:%0.1f' % (key,value)
        
        ax.set_title(title)
        ax.set_ylim(0,df2[y].max())
        ax.set_ylabel(y)
        
        bk_group.sort_values(by=['TA','ship_speed'], inplace=True)
        for loading_condition_id, df_loading_condition in bk_group.groupby(by='loading_condition_id'):
            row = df_loading_condition.iloc[0]
            label = 'ta:%0.1f, tf:%0.1f' % (row['TA'],row['TF'])
            df_loading_condition.plot(x='ship_speed', y=y, style='o-', label=label, ax=ax)
        
        
        

In [None]:
df_model = df2.groupby(by='model_number').get_group('3416-A')

In [None]:
print(df_model.iloc[0]['project_path'])

## This model illustrates that there can be many "false" runs:

In [None]:
df_model.set_index(['loading_condition_id','ship_speed']).sort_values(by=['loading_condition_id','ship_speed','date','run_number',], 
                                                                      ascending=False)[['series_number','run_number','TA','TF','kg','gm','comment','date']]

## Removing "false" runs by assuming that the latest is correct:

In [None]:
def get_latest(group):
    s = group.sort_values(by=['date','run_number'], ascending=False).iloc[0]
    s['run_id'] = s.name
    return s


df_latest = df_model.groupby(by=['loading_condition_id','ship_speed']).apply(func=get_latest)
df_latest[['date','series_number','run_number','comment']]

In [None]:
df3 = df2.groupby(by=['model_number','loading_condition_id','ship_speed']).apply(func=get_latest)
df3.drop(columns=['model_number','loading_condition_id','ship_speed'], inplace=True)
df3.reset_index(inplace=True)
df3.set_index('run_id',inplace=True)

In [None]:
df3.sort_values(by='ship_speed', inplace=True)
by = ['BKL','BKB']
y = 'mean_damping'
for model_number, model_group in df3.groupby(by=['model_number']):
    
    if len(model_group)==1:
        continue  # Don't do a plot with only one point
    
    bk_groups = model_group.groupby(by=by)
    
    fig,axes=plt.subplots(ncols=len(bk_groups))
    fig.set_dpi(100)
    fig.set_size_inches(15,5)
    
    if len(bk_groups)==1:
        axes=[axes]
    
    for ax,(index, bk_group) in zip(axes,bk_groups):
    
        title = '%s' % model_number
        for key,value in zip(by,index):
            title+=' %s:%0.1f' % (key,value)
        
        ax.set_title(title)
        ax.set_ylim(0,df3[y].max())
        ax.set_ylabel(y)
        
        bk_group.sort_values(by=['TA','ship_speed'], inplace=True)
        for loading_condition_id, df_loading_condition in bk_group.groupby(by='loading_condition_id'):
            row = df_loading_condition.iloc[0]
            label = 'ta:%0.1f, tf:%0.1f' % (row['TA'],row['TF'])
            df_loading_condition.plot(x='ship_speed', y=y, style='o-', label=label, ax=ax)