# Roll damping regression
Development of an empirical regression method to predict roll damping

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
from jupyterthemes import jtplot
jtplot.style(theme='onedork', context='notebook', ticks=True, grid=False)

In [None]:
import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)
import numpy as np
import os
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 15, 5

import data
import copy
from rolldecay.bis_system import BisSystem
from rolldecay import database
from rolldecayestimators.substitute_dynamic_symbols import lambdify,run

from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_regression

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline

import signal_lab
from sqlalchemy.inspection import inspect
import seaborn as sns
import docs
from sympy.parsing.sympy_parser import parse_expr
import sympy as sp
from rolldecayestimators import symbols
from rolldecayestimators.polynom_estimator import Polynom


In [None]:
data = pd.read_pickle('data.sav')
y_s = pd.read_pickle('y.sav')

In [None]:
y_key = 'omega0_hat'

fig,ax=plt.subplots()
y_s[y_key].hist(bins=50, ax=ax)
ax.set_title('Historgram: %s' % y_key)

In [None]:
ks = np.arange(1,10)
degrees = np.arange(1,3)
results = pd.DataFrame()

variance_treshold = VarianceThreshold(0.0001)
standard_scaler = StandardScaler()

y = y_s[y_key]
#X = data[important]
X=data

for k in ks:
    for degree in degrees:
        select_k_best = SelectKBest(k=k, score_func=f_regression)
        
       
        polynomial_features = PolynomialFeatures(degree=degree)
        linear_regression = LinearRegression()
        
        steps=[
            ('polynomial_feature', polynomial_features),
            ('standard_scaler', standard_scaler),
            ('variance_treshold',variance_treshold),
            ('select_k_best',select_k_best),
            ('linear_regression', linear_regression)
        ]
        
        model = Pipeline(steps=steps)
        score = cross_val_score(estimator=model,X=X,y=y,cv=5).mean()
        s = pd.Series()
        s['k'] = k
        s['degree'] = degree
        s['score'] = score
        results = results.append(s, ignore_index=True)
        
        

In [None]:
results.sort_values(by='score', ascending=False, inplace=True)

In [None]:
results.head()

In [None]:
k = int(results.iloc[0]['k'])
degree = int(results.iloc[0]['degree'])

In [None]:
select_k_best = SelectKBest(k=k, score_func=f_regression)

standard_scaler = StandardScaler()
polynomial_features = PolynomialFeatures(degree=degree)
linear_regression = LinearRegression()

steps=[
    ('polynomial_feature', polynomial_features),
    ('standard_scaler', standard_scaler),
    ('variance_treshold',variance_treshold),
    ('select_k_best',select_k_best),
    ('linear_regression', linear_regression)
]

model = Pipeline(steps=steps)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
model.fit(X=X_train, y=y_train)

score = model.score(X=X_test, y=y_test)
score

In [None]:
fig,ax = plt.subplots()
ax.plot(y_test,model.predict(X_test),'.', alpha=0.5)
ax.set_title('Prediction of %s' % y_key)
ax.set_xlabel('test: %s' % y_key)
ax.set_ylabel('predicted: %s' % y_key)

In [None]:
import pickle
filename = 'omega0_hat_model.sav'
model.fit(X=X, y=y)
model.keys=list(X.columns)
pickle.dump(model, open(filename, 'wb'))

In [None]:
polynom = Polynom(model=model, columns=X.columns, y_symbol=symbols.omega_hat)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
polynom.fit(X=X_train, y=y_train)

score = polynom.score(X=X_test, y=y_test)
score

In [None]:
polynom.good_feature_equations

In [None]:
polynom.equation

In [None]:
polynom.lamda

In [None]:
polynom.fit(X=X,y=y)

In [None]:
polynom.save('omega0_hat_polynom.sym')

In [None]:
fig,ax=plt.subplots()
ax.plot(model.predict(X),polynom.predict(X),'o')

In [None]:
model.score(X,y)

In [None]:
polynom.score(X,y)