(results)=
## Results

In [None]:
# %load imports.py
# %load ../imports.py
%matplotlib inline
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False  ## (To fix autocomplete)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns
plt.style.use('paper')
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf')

#import seaborn as sns
import os
from collections import OrderedDict

from IPython.display import display

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)

import folium
import plotly.express as px
import plotly.graph_objects as go

import sys
import os

from sklearn.metrics import r2_score

import scipy.integrate
import seaborn as sns

import pyarrow as pa
import pyarrow.parquet as pq

import dask.dataframe
import statsmodels.api as sm

from d2e2f.visualization import visualize
import scipy.integrate

from d2e2f.pipelines.trip_statistics import clean_statistics
from scipy.stats import norm
from myst_nb import glue
#plt.style.use('presentation')
from IPython.display import display, Markdown, Latex
import sympy as sp
from d2e2f.visualization.visualize import plot_map, plot_trips
import statsmodels.api as sm
import geopandas as gp
from d2e2f.reporting import pop_index, reload_kedro
import yaml
catalog = reload_kedro()
from d2e2f.analyze import arange_trip_matrix

In [None]:
#df_stat = catalog.load('uraniborg_experiment.trip_statistics_clean')
df_stat = catalog.load('uraniborg_experiment.steaming.trip_statistics')
df_stat['start_time'] = pd.to_datetime(df_stat['start_time'], utc=True)
df_stat['end_time'] = pd.to_datetime(df_stat['end_time'], utc=True);
df_stat['start_time'] = df_stat['start_time'].apply(lambda x : x.tz_convert(tz='Europe/Berlin'))
df_stat['end_time'] = df_stat['end_time'].apply(lambda x : x.tz_convert(tz='Europe/Berlin'))
#df_stat.describe()

Statistics for steaming data (excluding in port manoeuvres) between the points in {numref}`fig:steaming`.

```{figure} steaming.png
---
name: "fig:steaming"
height: 200px
---
The trips are cut at longitudes indicated by the black lines. 
```


![](steaming.png)

In [None]:
df = catalog.load('uraniborg_experiment.data_steaming')
visualize.plot_trips(df)

In [None]:
df_clean = df_stat.copy()

In [None]:
def extract(df_clean, start, end):
    start_index = (df_clean['start_time'] - start).abs().idxmin()
    end_index = (df_clean['start_time'] - end).abs().idxmin()
    df=df_clean.loc[start_index:end_index].copy()
    steps = df_clean['trip_no'] - df_clean.loc[start_index]['trip_no']
    super_trips = np.floor(steps / 2)
    df['operation'] = super_trips.apply(lambda x : 'ÖST' if x % 2 == 0 else 'BFH')
    return df

In [None]:
start_experiment = pd.to_datetime("2022-08-19 14:15:00+2", utc=False).tz_convert(tz='Europe/Berlin')
end_experiment = pd.to_datetime("2022-08-22 06:50:00+2", utc=False).tz_convert(tz='Europe/Berlin')
start_pre_experiment = pd.to_datetime("2022-07-08 14:15:00+2", utc=False).tz_convert(tz='Europe/Berlin')
end_pre_experiment = pd.to_datetime("2022-07-11 06:50:00+2", utc=False).tz_convert(tz='Europe/Berlin')

df_experiment = extract(df_clean, start=start_experiment, end=end_experiment)
df_experiment['group'] = 'experiment'
df_pre_experiment = extract(df_clean, start=start_pre_experiment, end=end_pre_experiment)
df_pre_experiment['group'] = 'before experiment'
df_data = df_pre_experiment.append(df_experiment)
df_data_raw = df_data.copy()

Data was collected from the experiment and also from a period before the experiment. This period is intended as a reference period to the experiment. The same chief and mate was operating the ship during both the experiment and the reference period. {numref}`fig:overview` shows the time frames of the experiment and reference period.  

```{glue:figure} fig:overview
:name: "fig:overview"

Fuel consumption for trips before (pre) and after experiment.
```

In [None]:
fig,ax=plt.subplots()
facegrid = sns.scatterplot(data=df_data_raw, x='start_time',y='consumption', hue='group', ax=ax);
ax.set_ylabel('Fuel consumption per trip [liters]');
ax.set_xlabel('Trip start time');

ax.set_xticks(pd.date_range(start=df_data_raw.iloc[0]['start_time'], 
                            end=df_data_raw.iloc[-1]['start_time'], 
                            periods=5))


ax.legend(loc='upper center')
fig = ax.get_figure()
glue("fig:overview",fig, display=False)

In [None]:
time_table = catalog.load("uraniborg_experiment.time_table")
time_table.index = time_table.index.tz_convert(tz='Europe/Berlin')
assert (df_data_raw['operation'] == time_table['operation'].values).all()
assert (df_data_raw['trip_direction'] == time_table['direction'].values).all()

In [None]:
df_data_raw['time_table'] = time_table.index.tz_convert(tz='Europe/Berlin')

It can be noted in {numref}`fig:steaming` that there are some trips where the ship makes long detours. The crew reported that this was due to other ships crossing the normal route. These trips were removed from the data as shown in {numref}`fig:clean_distance` and {numref}`fig:steaming_clean`. 

```{glue:figure} fig:clean_distance
:name: "fig:clean_distance"

Trips with long distances are excluded in the data cleaning.
```

```{figure} steaming_clean.png
---
name: "fig:steaming_clean"
height: 200px
---
Remaining trips after data cleaning.
```

In [None]:
mask = df_data_raw['distance'] <= df_data_raw['distance'].quantile(0.97)
df_data = df_data_raw.loc[mask].copy()

df_data['operation'].replace(to_replace='ÖST', value='Mate', inplace=True)
df_data['operation'].replace(to_replace='BFH', value='Capt.', inplace=True)
df_data['case'] = df_data.apply(lambda df : f"{df['operation']} {df['group']}", axis=1)
df_data_all = df_data.copy()

fig,axes=plt.subplots(nrows=2)
bins = np.linspace(df_data_raw['distance'].min(), df_data_raw['distance'].max(), 30)

ax=axes[0]
df_data_raw.hist('distance', ax=ax, bins=bins,label='Raw')
ax.set_title('')
ax.set_xticklabels([])
ax.set_ylabel('Number of trips')
ax.legend()

ax=axes[1]
df_data_all.hist('distance', ax=ax, bins=bins, color='g', label='Cleaned')
axes[1].set_ylim(axes[0].get_ylim())
ax.set_title('')
ax.legend()
ax.set_xlabel('Trip distance [m]')
ax.set_ylabel('Number of trips')
plt.tight_layout()
glue("fig:clean_distance",fig, display=False)

In [None]:
df_data.columns

In [None]:
df_data['delta1'].hist(bins=20)
df_data['delta2'].hist(bins=20)

In [None]:
mask = df['trip_no'].isin(df_data_all.index)
visualize.plot_trips(df.loc[mask])

In [None]:
mask_remove1 = ((df_data_all['operation']=='Capt.') & 
               (df_data_all['group']=='experiment') & 
               (df_data_all['PR']<0.9))

mask_remove2 = ((df_data_all['operation']=='Mate') & 
               (df_data_all['group']=='experiment') & 
               (df_data_all['PR']>=0.9))

mask_remove = mask_remove1 | mask_remove2
df_data_remove = df_data_all.loc[mask_remove].copy()
df_data = df_data_all.loc[~mask_remove].copy()


{numref}`fig:operation_group` shows the four datasets of fuel consumption during and before the experiment with the ship operated by either the captain or the chief mate. The captain managed to run the ship with almost  full utilization of the aft thruster ($r_{aft}=1$) for most of the trips, as seen in in the lower right plot. From this plot it can also bee seen that three of the captains experiment trips had utilization that were too low and were therefore excluded from the experiments. No points were removed from the data before the experiment regardless of which utilization was used, since this data was not part of the experiment and $r_{aft}$ was thus not a controlled variable.

```{glue:figure} fig:operation_group
:name: "fig:operation_group"

Fuel consumption for trips with ship operated by the captain or chief mate before and after the experiment.
```

In [None]:
df_data.sort_values(by=['start_time','operation'], inplace=True)

fig,axes = plt.subplots(ncols=2,nrows=2)
axes=axes.flatten()

remove_groups = df_data_remove.groupby(by='case', sort=False)
for (case,group),ax in zip(df_data.groupby('case', sort=False), axes):

    group.plot(x='PR', y='consumption', style='.', label='Data', ax=ax)
    if case in remove_groups.groups:
        remove_groups.get_group(case).plot(x='PR', 
                                           y='consumption', 
                                           style='r.', 
                                           label='Removed data', 
                                           ax=ax)
        ax.legend(loc='lower left')
    else:
        ax.get_legend().set_visible(False)
    
    ax.set_xlim(0.98*df_data['PR'].min(),1.02*df_data['PR'].max())
    ax.set_ylim(0.98*df_data['consumption'].min(),1.02*df_data['consumption'].max())
    ax.grid(True)
    ax.set_title(case)
    
axes[1].set_yticklabels([])
axes[3].set_yticklabels([])
axes[0].set_xticklabels([])
axes[1].set_xticklabels([])
axes[0].set_xlabel('')
axes[1].set_xlabel('')
axes[2].set_xlabel('$r_{aft}$')
axes[3].set_xlabel('$r_{aft}$')
axes[0].set_ylabel('F. consumption [l]')
axes[2].set_ylabel('F. consumption [l]')


plt.tight_layout()

glue('fig:operation_group',fig, display=False)

In [None]:
def means_and_comparison(df_data):
    means = df_data.groupby(by=['operation','group'], sort=False).mean()
    means['trips'] = df_data.groupby(by=['operation','group'], sort=False)['sog'].count()
    mean_BFH_experiment = means.loc[('Capt.','experiment')]
    comparison = (mean_BFH_experiment-means)/means
    return means, comparison

means, comparison = means_and_comparison(df_data)

{numref}`tab:means_1` shows the mean values of the four datasets where the mean fuel consumption obtained for the captain at the experiment is 20-25% lower, compared to the other datasets. The speed is around 5-10% lower for the captains experimental data which definitely has increases the fuel consumption reduction. The fuel consumption reduction in {numref}`tab:means_1` should therefore be considered as too optimistic. Different speed correction methods are applied in the next section, to come up with a more realistic estimation of the fuel saving potential.

```{glue:figure} tab:means_1
:name: "tab:means_1"

Mean values before and during the experiment with ship operated by the captain and the first mate.
```


In [None]:
interesting=['sog','PR','consumption','trips']

decimals = {
        'sog' : 1,
        'PR' : 1,
        'consumption':0,
    }

table=means[interesting].reset_index().round(decimals)
glue(f'tab:means_1',table)

In [None]:
interesting_pct=['sog','PR','consumption']
table=(100*comparison[interesting_pct]).reset_index().round(decimals=0)
glue(f'tab:pct_1',table)

## Speed correction by manual selection

In [None]:
mask_BFH_pre_experiment = ((df_data['operation'] == 'Capt.') & (df_data['group'] == 'before experiment') & (df_data['sog'] > 3.5) & (df_data['sog'] < 10.0))
mask_BFH_experiment = ((df_data['operation'] == 'Capt.') & (df_data['group'] == 'experiment') & (df_data['sog'] > 4.5) & (df_data['sog'] < 10.0) & (df_data['PR'] > 0.9))
mask_ÖST_pre_experiment = ((df_data['operation'] == 'Mate') & (df_data['group'] == 'before experiment') & (df_data['sog'] > 3.0) & (df_data['sog'] < 5.15))
mask_ÖST_experiment = ((df_data['operation'] == 'Mate') & (df_data['group'] == 'experiment') & (df_data['sog'] > 3.0) & (df_data['sog'] < 4.92))

df_data_select = pd.concat([df_data.loc[mask_BFH_pre_experiment],
                            df_data.loc[mask_BFH_experiment],
                            df_data.loc[mask_ÖST_pre_experiment],
                            df_data.loc[mask_ÖST_experiment],
                           ])
df_data_select.sort_values(by='start_time', inplace=True)
means_select, comparison_select = means_and_comparison(df_data_select)

The four datasets can be forced to have almost the same average speed by manually excluding some trips from the datasets as seen in {numref}`fig:remove_speed_average`.

```{glue:figure} fig:remove_speed_average
:name: "fig:remove_speed_average"

Average speed for all datasets, before (top) and after (bottom) some trips have been excluded.
```

In [None]:
fig,axes=plt.subplots(nrows=2)
ax = axes[0]
means['sog'].plot.bar(ax=ax)
means['sog_max'] = means['sog'].max()
means['sog_max'].plot(style='k--', ax=ax)
ax.set_ylim(4,5);
ax.set_xticklabels([])
ax.set_xlabel('')
ax.set_ylabel('Average sog [m/s]')

ax = axes[1]
means_select['sog'].plot.bar(ax=ax)
means_select['sog_max'] = means_select['sog'].max()
means_select['sog_max'].plot(style='k--', ax=ax)
ax.set_ylabel('Average sog [m/s]')
ax.set_ylim(4,5);
ax.set_xlabel('')
ax.set_xticklabels(ax.get_xmajorticklabels(), rotation = -10)
plt.tight_layout()
glue("fig:remove_speed_average", fig, display=False)

The mean values when speeds corrected by manual selections have been applied are shown in {numref}`tab:means_2`.

```{glue:figure} tab:means_2
:name: "tab:means_2"

Mean values before and during the experiment with ship operated by the captain and the first mate with speeds corrected by manual selections.
```

In [None]:
decimals = {
        'sog' : 1,
        'PR' : 1,
        'consumption':0,
    }

table=means_select[interesting].reset_index().round(decimals)
glue(f'tab:means_2',table)

In [None]:
table=(100*comparison_select[interesting_pct]).reset_index().round(decimals=0)
glue(f'tab:pct_2',table)

## Speed model correction

In [None]:
df_statistics_big = catalog.load('uraniborg.steaming.trip_statistics_clean')
mask = df_statistics_big['distance']<df_statistics_big['distance'].quantile(0.96)
df_statistics_big_clean = df_statistics_big.loc[mask].copy()

In [None]:
df_statistics_big_clean['delta1'].hist(bins=100)
df_statistics_big_clean['delta2'].hist(bins=100)

In [None]:
def features_uraniborg(df):
    
    X = pd.DataFrame(index=df.index)
    #X['sog'] = df['sog']
    X['sog**2'] = df['sog**2']
    #X['drift_angle'] = df['drift_angle']
    #X['Rate of turn (deg/min)'] = df['Rate of turn (deg/min)']
    #X['sog**3'] = df['sog**3']
    #X['distance'] = df['distance']
    X['PR'] = df['PR']
    X['intercept'] = 1
    
    if 'aw_x**2*sog' in df:
        X['aw_x**2*sog'] = df['aw_x**2*sog']
    
    mask = X.notnull().all(axis=1) & df['consumption'].notnull()
    y = df.loc[mask,'consumption']   
    X = X.loc[mask]
    
    return X, y

In [None]:
X,y=features_uraniborg(df_statistics_big_clean)
model_speed = sm.OLS(y,X,hasconst=True)

result_speed = model_speed.fit()
result_speed.summary()

In [None]:
from statsmodels.sandbox.regression.predstd import wls_prediction_std

In [None]:
fig,ax=plt.subplots()
X,y=features_uraniborg(df_statistics_big_clean.iloc[0:70])
prstd, iv_l, iv_u = wls_prediction_std(result_speed, X)
y_pred = result_speed.predict(X)
y.plot(ax=ax, style='.-', label='True')
y_pred.plot(ax=ax, style='.-', label='Prediction')
ax.fill_between(y.index, iv_l, iv_u, color='grey', label=r'Confidence intervall (95\%)', zorder=-10)
ax.set_ylabel('Fuel consumption per trip [l]')
ax.set_xlabel('Trip number')
ax.legend(loc='upper right')

In [None]:
fig,ax=plt.subplots()
df_ = df_statistics_big_clean.iloc[0:200].sort_values(by='sog').copy()
X,y=features_uraniborg(df_)
df_=df_.loc[y.index].copy()
#prstd, iv_l, iv_u = wls_prediction_std(result_speed, X)
y_pred = result_speed.predict(X)

ax.plot(df_['sog'], y, '.', label='True')
#ax.plot(df_['sog'], y_pred, label='Prediction')

#ax.fill_between(y.index, iv_l, iv_u, color='grey', label=r'Confidence intervall (95\%)', zorder=-10)
ax.set_ylabel('Fuel consumption per trip [l]')
ax.set_xlabel('sog [m/s]')
ax.legend(loc='upper right')

In [None]:
df_corrected = df_data.copy()
df_corrected['sog'] = df_data['sog'].mean()
df_corrected['sog**2'] = df_data['sog**2'].mean()
X_corrected,_ = features_uraniborg(df_corrected)
X_corrected["aw_x**2*sog"]=0 #missing"
X,_ = features_uraniborg(df_data)
X["aw_x**2*sog"]=0 #missing"
y = df_data['consumption']
model_error = y - result_speed.predict(X)

In [None]:
df_corrected['consumption'] = result_speed.predict(X_corrected) + model_error
means_corrected, comparison_corrected = means_and_comparison(df_corrected)

```{glue:figure} fig:speed_corrections
:name: "fig:speed_corrections"

Average speed for all datasets, before (top) and after (bottom) some trips have been excluded.
```

In [None]:
fig,ax=plt.subplots()

df_data.plot(x='sog', y='consumption', label='Original data',ax=ax, style='g.')
ax.plot(df_data['sog'], df_corrected['consumption'],'r.',label='Corrected data')
ax.legend()
ax.plot([df_data['sog'].mean(),df_data['sog'].mean()],[20,85],'b--',label='average sog')
ax.annotate('Average sog', xy=(df_data['sog'].mean()+0.02,20))
ax.set_ylabel('Fuel consumption per trip [l]')
ax.set_xlabel('Ship speed over ground (sog) [m/s]')

for (_,data),(_,corrected) in zip(df_data.iterrows(),df_corrected.iterrows()):
    ax.plot([data['sog'],data['sog']], [data['consumption'],corrected['consumption']], 'k:', zorder=-10)

glue("fig:speed_corrections", fig, display=False)

The mean values when speeds corrected by a regression model have been applied are shown in {numref}`tab:means_3`.

```{glue:figure} tab:means_3
:name: "tab:means_3"

Mean values before and during the experiment with ship operated by the captain and the first mate with speeds corrected by regression model.
```

In [None]:
decimals = {
        'sog' : 1,
        'PR' : 1,
        'consumption':0,
    }

table=means_corrected[interesting].reset_index().round(decimals)
glue(f'tab:means_3',table)

In [None]:
table=(100*comparison_corrected[interesting_pct]).reset_index().round(decimals=0)
glue(f'tab:pct_3',table)