# Regression on simulated data with better numerical gradient

## Purpose
* The regression of simulated data works extremly well with nonlinear data as seen in: [12.05_regression_simulated_data_simple_nonlinear.ipynb](12.05_regression_simulated_data_simple_nonlinear.ipynb)
* But what will happen if we add gausian noise to the measurement.

## Methodology
* Load simulated data generated by: [12.05_regression_simulated_data_simple_nonlinear.ipynb](12.05_regression_simulated_data_simple_nonlinear.ipynb)
* Add measurement noise
* Regress the noisy data
* What is the accuray?
* Are the coefficients the same?

## Results
Describe and comment the most important results.

## Setup

In [None]:
# %load imports.py
## Local packages:

%matplotlib inline
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False  ## (To fix autocomplete)

## External packages:
import pandas as pd
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)

import numpy as np
import os
import matplotlib.pyplot as plt
#if os.name == 'nt':
#    plt.style.use('presentation.mplstyle')  # Windows

import plotly.express as px 
import plotly.graph_objects as go

import seaborn as sns
import sympy as sp
from sympy.physics.mechanics import (dynamicsymbols, ReferenceFrame,
                                      Particle, Point)
from sympy.physics.vector.printing import vpprint, vlatex
from IPython.display import display, Math, Latex
from vessel_manoeuvring_models.substitute_dynamic_symbols import run, lambdify

import pyro

import sklearn
import pykalman
from statsmodels.sandbox.regression.predstd import wls_prediction_std
import statsmodels.api as sm

from scipy.integrate import solve_ivp

## Local packages:
from vessel_manoeuvring_models.data import mdl

from vessel_manoeuvring_models.symbols import *
from vessel_manoeuvring_models.parameters import *
import vessel_manoeuvring_models.symbols as symbols
from vessel_manoeuvring_models import prime_system
from vessel_manoeuvring_models.models import regression
from vessel_manoeuvring_models.visualization.regression import show_pred
from vessel_manoeuvring_models.visualization.plot import track_plot

## Load models:
# (Uncomment these for faster loading):
import vessel_manoeuvring_models.models.vmm_simple_nonlinear  as vmm
from vessel_manoeuvring_models.data.case_1 import ship_parameters, df_parameters, ps, ship_parameters_prime
from vessel_manoeuvring_models.data.transform import transform_to_ship

## Ship parameters

In [None]:
ship_parameters

## Brix parameters

In [None]:
mask = df_parameters['prime'].notnull()
index = df_parameters.loc[mask,'prime'].index
coefficients=vmm.simulator.get_all_coefficients(sympy_symbols=False)
missing_coefficients = set(coefficients) - set(index)
missing_coefficients

In [None]:
mask = df_parameters['prime'].notnull()
df_parameters.loc[mask,'prime']

## Load simulate data

In [None]:
df_result = pd.read_csv('../data/processed/simple_simulation.csv', index_col=0)
df_result['z0']=0
df_measurement = df_result.drop(columns=['u','v','r','u1d','v1d','r1d'])  # Removing vel and acc

In [None]:
ship_parameters['x_G']

### Check accelerations

In [None]:
import scipy.integrate

In [None]:
u_integrated = df_result.iloc[0]['u'] + scipy.integrate.cumtrapz(y=df_result['u1d'], 
                                                                 x=df_result.index)
fig,ax=plt.subplots()
df_result.plot(y='u', ax=ax)
ax.plot(df_result.index[1:], u_integrated, '--', label='u_integrated')
ax.legend();

In [None]:
def _central_difference(y:np.ndarray,t_:np.ndarray):
    dt_diff=t_[2:] - t_[0:-2]
    y1d_diff = (y[2:] - y[0:-2])/(dt_diff)
    t_diff=t_[1:-1]
    return y1d_diff, t_diff

def central_difference(y:pd.Series):
    
    y1d_diff, t_diff = _central_difference(y=y.values,t_=y.index)
    
    s = pd.Series(y1d_diff, index=t_diff)
    
    return s

    
    
t_ = np.linspace(0,2*np.pi,10)
df_ = pd.DataFrame(index=t_)
df_['y'] = np.sin(t_)
df_['y1d'] = np.cos(t_)

y1d_central = central_difference(df_['y'])
y1d_gradient = pd.Series(np.gradient(df_['y'], df_.index), index=df_.index)


y1d_diff = np.diff(df_['y'])/np.diff(t_)
t_diff = t_[0:-1]

fig,ax=plt.subplots()
df_.plot(y='y', style='.-', label='raw', ax=ax);
ax.set_ylabel('y')
ax.grid()

fig,ax=plt.subplots()
df_.plot(y='y1d', style='.-', label='raw', ax=ax);
y1d_central.plot(style='o--', label='central difference', ax=ax);
y1d_gradient.plot(style='--', label='gradient', ax=ax);

ax.plot(t_diff, y1d_diff, ':', label='diff')

ax.grid(True)
ax.set_ylabel('y1d')
ax.legend()
    

In [None]:
fig,ax=plt.subplots()
df_result.plot(y='u',ax=ax, label='simulation')


u_ = df_result['u']
t_ = df_result.index
u1d_ = np.gradient(u_, t_)
u1d_central = central_difference(u_)
u1d_diff = np.diff(u_)/np.diff(t_)
t_diff = t_[0:-1]


fig,ax=plt.subplots()
df_result.plot(y='u1d',ax=ax, label='simulation')
ax.plot(t_, u1d_, '--', label='gradient')
ax.plot(t_diff, u1d_diff, '-', label='diff')

u1d_central.plot(style=':', label='central difference', ax=ax)
ax.legend()
ax.grid(True)

In [None]:
t_ = df_measurement.index

suffix = ['','1d','2d']
for i in range(2):
    for key in ['x0','y0','z0','psi']:
        df_measurement[f'{key}{suffix[i+1]}'] = np.gradient(df_measurement[f'{key}{suffix[i]}'], t_) 
        
df_measurement = transform_to_ship(df=df_measurement)
df_measurement=df_measurement.iloc[2:-2].copy()

In [None]:
df_result['u1d'].tail()

In [None]:
for i in range(2):
    for key in ['u','v','r']:
        y = f'{key}{suffix[i]}'
        
        fig,ax=plt.subplots()
        df_result.plot(y=y,label='sim', ax=ax)
        df_measurement.plot(y=y,label='measurement', style='--', ax=ax)
        ax.set_ylabel(y)

## Regression

In [None]:
df_measurement_smooth = df_measurement.copy()
df_measurement_smooth.index = pd.TimedeltaIndex(df_measurement_smooth.index, unit='S')
df_measurement_smooth = df_measurement_smooth.resample('3S').mean()
df_measurement_smooth.index = df_measurement_smooth.index.total_seconds()

df = ps.prime(df_measurement_smooth, U=df_measurement_smooth['U'])  # Note primed!
#df = ps.prime(df_result, U=df_result['U'])  # Note primed!


In [None]:
df_measurement_smooth.plot(y='u1d')

## N

In [None]:
N_ = sp.symbols('N_')

diff_eq_N = regression.DiffEqToMatrix(ode=vmm.N_qs_eq.subs(N_qs,N_), 
                                      label=N_, base_features=[delta,u,v,r])

In [None]:
Math(vlatex(diff_eq_N.acceleration_equation))

In [None]:
X = diff_eq_N.calculate_features(data=df)
y = diff_eq_N.calculate_label(y=df['r1d'])

model_N = sm.OLS(y,X)
results_N = model_N.fit()

show_pred(X=X,y=y,results=results_N, label=r'$\dot{r}$')

## Y

In [None]:
Y_ = sp.symbols('Y_')
diff_eq_Y = regression.DiffEqToMatrix(ode=vmm.Y_qs_eq.subs(Y_qs,Y_), 
                                      label=Y_, base_features=[delta,u,v,r])

In [None]:
Math(vlatex(diff_eq_Y.acceleration_equation))

In [None]:
X = diff_eq_Y.calculate_features(data=df)
y = diff_eq_Y.calculate_label(y=df['v1d'])


model_Y = sm.OLS(y,X)
results_Y = model_Y.fit()

show_pred(X=X,y=y,results=results_Y, label=r'$\dot{v}$')

## X

In [None]:
X_ = sp.symbols('X_')
diff_eq_X = regression.DiffEqToMatrix(ode=vmm.X_qs_eq.subs(X_qs,X_), 
                                      label=X_, base_features=[delta,u,v,r,thrust])

In [None]:
diff_eq_X.acceleration_equation

In [None]:
X = diff_eq_X.calculate_features(data=df)
y = diff_eq_X.calculate_label(y=df['u1d'])

model_X = sm.OLS(y,X)
results_X = model_X.fit()

show_pred(X=X,y=y,results=results_X, label=r'$\dot{u}}$')

In [None]:
results_summary_X = regression.results_summary_to_dataframe(results_X)
results_summary_Y = regression.results_summary_to_dataframe(results_Y)
results_summary_N = regression.results_summary_to_dataframe(results_N)

### Decoupling

In [None]:
u1d,v1d,r1d = sp.symbols('u1d, v1d, r1d')
u_,v_,r_ = sp.symbols('u, v, r')

X_qs_, Y_qs_, N_qs_ = sp.symbols('X_qs, Y_qs, N_qs')


X_eq = vmm.X_eom.subs([
    (X_force,sp.solve(vmm.fx_eq,X_force)[0]),
])

Y_eq = vmm.Y_eom.subs([
    (Y_force,sp.solve(vmm.fy_eq,Y_force)[0]),
])


N_eq = vmm.N_eom.subs([
    (N_force,sp.solve(vmm.mz_eq,N_force)[0]),
])


subs = [
    #(X_qs,X_qs_), 
    #(Y_qs,Y_qs_), 
    #(N_qs,N_qs_),
   

    (u1d,u1d),
    (v1d,v1d),
    (r1d,r1d),

    (u,u_),
    (v,v_),
    (r,r_),
    
]
eq_X_ = X_eq.subs(subs)
eq_Y_ = Y_eq.subs(subs)
eq_N_ = N_eq.subs(subs)

A,b = sp.linear_eq_to_matrix([eq_X_,eq_Y_,eq_N_],[u1d,v1d,r1d])

In [None]:
subs = {value:key for key,value in p.items()}
A_ = A*sp.matrices.MutableDenseMatrix([A_coeff,B_coeff,C_coeff])
A_lambda=lambdify(A_.subs(subs))

In [None]:
A_coeff_ = results_summary_X['coeff']
B_coeff_ = results_summary_Y['coeff']
C_coeff_ = results_summary_N['coeff']

coeffs = run(A_lambda,A_coeff=A_coeff_.values, B_coeff=B_coeff_.values, C_coeff=C_coeff_.values, 
    **df_parameters['prime'], **ship_parameters_prime)

The way that the regression is formulated, inertial forces, such as centrifugal force will be included into the derivatives (I think) which means that centrifugal force : $-m \cdot r \cdot u$ will be included into $Y_{ur}$ coefficient. This coefficient is therefore not pure hydrodynamic, and can potentially be counted twice..?
The coefficients are recalculated below to avooid this:

In [None]:
results_summary_X['decoupled'] = coeffs[0][0]
results_summary_Y['decoupled'] = coeffs[1][0]
results_summary_N['decoupled'] = coeffs[2][0]

x_G_ = ship_parameters_prime['x_G']
m_ = ship_parameters_prime['m']

results_summary_X.loc['Xrr','decoupled']+=(-m_*x_G_)
results_summary_X.loc['Xvr','decoupled']+=(-m_)
results_summary_Y.loc['Yur','decoupled']+=m_
results_summary_N.loc['Nur','decoupled']+=m_*x_G_

#results_summary_X.loc['Xr','decoupled']+=(-m_*x_G_)
#results_summary_Y.loc['Yr','decoupled']+=m_
#results_summary_N.loc['Nr','decoupled']+=m_*x_G_


In [None]:
results_summary_N

## Add the regressed parameters
Hydrodynamic derivatives that depend on acceleration cannot be obtained from the VCT regression. They are however essential if a time simulation should be conducted. These values have then been taken from Brix semi empirical formulas for the simulations below.

In [None]:
df_parameters_all = df_parameters.copy()
for other in [results_summary_X, results_summary_Y, results_summary_N]:
    df_parameters_all = df_parameters_all.combine_first(other)

df_parameters_all.rename(columns={'decoupled':'regressed'}, inplace=True)
df_parameters_all.drop(columns=['brix_lambda'], inplace=True)

df_parameters_all['regressed'] = df_parameters_all['regressed'].combine_first(df_parameters_all['prime'])  # prefer regressed
#df_parameters_all['regressed'].fillna(0,inplace=True)

# Simulation

In [None]:
df_result.head()

In [None]:
parameters=df_parameters_all['regressed'].copy()

result_regression = vmm.simulator.simulate(df_=df_result, parameters=parameters, ship_parameters=ship_parameters, 
                                  control_keys=['delta','thrust'], 
                                  primed_parameters=True,
                                  prime_system=ps,
                                  name='regressed')


In [None]:
result_regression.track_plot()

In [None]:
result_regression.plot();

In [None]:
df_plot_parameters = df_parameters_all[['regressed','prime']].copy()
mask = ((df_plot_parameters['regressed'].abs() > 0) &
        (df_plot_parameters['regressed'].abs() < 0.8) 
       )

df_plot_parameters.loc[mask].plot(kind='bar')


mask = (
        (df_plot_parameters['regressed'].abs() > 0.8) 
       )

if mask.any():
    df_plot_parameters.loc[mask].plot(kind='bar')