In [None]:
# %load imports.py
# %load ../imports.py
%matplotlib inline
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False  ## (To fix autocomplete)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('paper')
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf')

import seaborn as sns

#import seaborn as sns
import os
from collections import OrderedDict

from IPython.display import display

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)

import folium
import plotly.express as px
import plotly.graph_objects as go

import sys
import os

from sklearn.metrics import r2_score

import scipy.integrate
import seaborn as sns

import pyarrow as pa
import pyarrow.parquet as pq

import dask.dataframe
import statsmodels.api as sm

from d2e2f.visualization import visualize
import scipy.integrate

from d2e2f.pipelines.trip_statistics import clean_statistics
from scipy.stats import norm
from myst_nb import glue
#plt.style.use('presentation')
from IPython.display import display, Markdown, Latex
import sympy as sp
from d2e2f.visualization.visualize import plot_map, plot_trips
import statsmodels.api as sm
import geopandas as gp
from d2e2f.pipelines.data_preprocessing.prepare import prepare
from operator import add
from functools import reduce
from d2e2f.reporting import pop_index, reload_kedro
import yaml
catalog = reload_kedro()

In [None]:
with open('metadata.yml',mode='r', encoding='utf8') as file:
    metadata = yaml.safe_load(file)
ship=metadata['ship']
shipname = metadata['shipname']

df = catalog.load(f'{ship}.data_with_trip_numbers')
loader_raw = catalog.load(f'{ship}.raw_data')
df_raw = loader_raw[list(loader_raw.keys())[-1]]()
dt = pd.to_datetime(df_raw['Timestamp']).diff().mean()
trip_statistics = catalog.load(f'{ship}.trip_statistics')
trip_statistics_clean = catalog.load(f'{ship}.trip_statistics_clean')


harbours = catalog.load(f'params:{ship}.harbours')
min_time = catalog.load(f'params:{ship}.min_time')
max_time = catalog.load(f'params:{ship}.max_time')
min_distance = catalog.load(f'params:{ship}.min_distance')
min_start_speed = catalog.load(f'params:{ship}.min_start_speed')
max_time_diff = catalog.load(f"params:{ship}.max_time_diff")

In [None]:
glue('harbour1', list(harbours.keys())[0])
glue('harbour2', list(harbours.keys())[1])
glue('min_time', min_time)
glue('max_time', max_time)
glue('min_distance', min_distance)
glue('min_start_speed', min_start_speed)
glue('shipname', shipname)
glue('max_time_diff', max_time_diff)
glue('dt',dt)

# Method
(method)=
In order to investigate the energy efficiency of {glue:text}`shipname` the data is divided into trips from {glue:text}`harbour1` to {glue:text}`harbour2` or the reversed direction. The method to define the trips is further explained in the next section.

(sec:trips)=
## Trips
The speed over ground (sog) signal is used to define the start an end of the trips. Start and ends are defined by up-crossings and down-crossing of $sog$ compared to a minumum speed $sog_{min}$ ({glue:text}`min_start_speed` m/s). This simple algorithm will however also identify false starts an ends as seen in {numref}`fig:crossings`. Starts and ends that enclose trips that have too short time duration, as seen in {numref}`fig:crossings` are disregarded as false starts and ends.

```{glue:figure} fig:crossings
:name: "fig:crossings"

Downcrossings and upcrossings of the speed signal are used to identify trip starts and ends.
```


In [None]:
# Find possible starts of trips:
df_ = df.loc["2022-06-13 09:45":"2022-06-13 11:00"]
sog = df_["sog"]
mask_downcross = (np.roll(sog, 1) >= min_start_speed) & (sog < min_start_speed) & (np.roll(sog, -1) < min_start_speed)
mask_upcross = (np.roll(sog, 1) < min_start_speed) & (sog < min_start_speed) & (np.roll(sog, -1) >= min_start_speed)


downcrossings = df_.loc[mask_downcross]
upcrossings = df_.loc[mask_upcross]

fig,ax=plt.subplots()
df_.plot(y='sog', style='.-', ax =ax)
downcrossings.plot(y='sog', style='rv', label='downcrossings', ax=ax)
upcrossings.plot(y='sog', style='g^', label='upcrossings', ax=ax)
df_['min_start_speed'] = min_start_speed
df_.plot(y='min_start_speed', style='--', label='minimum start speed', ax=ax)
ax.legend()
#ax.set_ylim(-0.05,5*min_start_speed)
ax.set_ylabel('Ship speed sog [m/s]')
ax.grid(True)
glue("fig:crossings",fig,display=False)

Examples of some starts and ends of trips are displayed in {numref}`fig:clean_starts`. 

```{glue:figure} fig:clean_starts
:name: "fig:clean_starts"

Downcrossings and upcrossings of the speed signal are used to identify trip starts and ends.
```

In [None]:
nrows=4
fig,axes = plt.subplots(ncols=1, nrows=nrows)
axes = axes.flatten()

index = df.loc["2022-06-13":"2022-06-13"].index
indexes = np.array_split(index,nrows)

for index,ax in zip(indexes,axes):
        
    df_=df.loc[index]
    
    starts=df_.loc[df['state']=='start']
    ends=df_.loc[df['state']=='end']
        
    df_.plot(y='sog', ax=ax)
    starts.plot(y='sog', style='gs', ax=ax, label='start')
    ends.plot(y='sog', style='r^', ax=ax, label='end')
        
    ax.get_legend().set_visible(False)
    ax.set_xlabel('')
    
axes[0].legend(loc='upper center', bbox_to_anchor=(0.5, 2.7), ncol=3)

ax.set_xlabel('Time')
plt.tight_layout()
glue("fig:clean_starts",fig, display=False)

(sec:datacleaning)=
## Data cleaning
Some of the trips are removed, for instance if there is missing data or the trip is too long, based on the following criteria:

* trips should have time gaps that are smaller than {glue:}`max_time_diff` s
* trips should have a duration between {glue:}`min_time` and {glue:}`max_time` s
* trips should be longer than {glue:}`min_distance` m