In [None]:
# %load imports.py
# %load ../imports.py
%matplotlib inline
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False  ## (To fix autocomplete)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('paper')
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf')

import seaborn as sns

#import seaborn as sns
import os
from collections import OrderedDict

from IPython.display import display

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)

import folium
import plotly.express as px
import plotly.graph_objects as go

import sys
import os

from sklearn.metrics import r2_score

import scipy.integrate
import seaborn as sns

import pyarrow as pa
import pyarrow.parquet as pq

import dask.dataframe
import statsmodels.api as sm

from d2e2f.visualization import visualize
import scipy.integrate

from d2e2f.pipelines.trip_statistics import clean_statistics
from scipy.stats import norm
from myst_nb import glue
#plt.style.use('presentation')
from IPython.display import display, Markdown, Latex
import sympy as sp
from d2e2f.visualization.visualize import plot_map, plot_trips
import statsmodels.api as sm
import geopandas as gp
from d2e2f.pipelines.data_preprocessing.prepare import prepare
from operator import add
from functools import reduce

# Method
(method)=
In order to investigate the energy efficiency of {glue:text}`shipname` the data is divided into trips from {glue:text}`harbour1` to {glue:text}`harbour2` or the reversed direction. The method to define the trips is further explained in the [trip section](sec:trips).

In [None]:
%reload_kedro
ship='vitaskar'
shipname = 'Vitaskär'
df = catalog.load(f'{ship}.preprocessed_data')
loader_raw = catalog.load(f'{ship}.raw_data')
df_raw = loader_raw[list(loader_raw.keys())[-1]]()
dt = pd.to_datetime(df_raw['Timestamp']).diff().mean()
trip_statistics = catalog.load(f'{ship}.trip_statistics')
trip_statistics_clean = catalog.load(f'{ship}.trip_statistics_clean')


harbours = catalog.load(f'params:{ship}.harbours')
min_time = catalog.load(f'params:{ship}.min_time')
max_time = catalog.load(f'params:{ship}.max_time')
min_distance = catalog.load(f'params:{ship}.min_distance')
min_start_speed = catalog.load(f'params:{ship}.min_start_speed')
max_time_diff = catalog.load(f"params:{ship}.max_time_diff")

In [None]:
glue('harbour1', list(harbours.keys())[0])
glue('harbour2', list(harbours.keys())[1])
glue('min_time', min_time)
glue('max_time', max_time)
glue('min_distance', min_distance)
glue('min_start_speed', min_start_speed)
glue('shipname', shipname)
glue('max_time_diff', max_time_diff)
glue('dt',dt)

(sec:trips)=
## Trips
The speed over ground (sog) signal is used to define the start an end of the trips. Start and ends are defined by upcrossings and downcrossing of $sog$ compared to a minumum speed $sog_{min}$. This simple algorithm will however also identify false starts of trips since {glue:text}`shipname` starts each trip by reversing from the pier and then turn to ahead as seen in {numref}`fig:false_start`.


```{glue:figure} fig:false_start
:name: "fig:false_start"

The initial reversing may identify false starts.
```


In [None]:
# Find possible starts of trips:
sog = df["sog"]
mask_start = (sog < min_start_speed) & (np.roll(sog, -1) >= min_start_speed)
df.loc[mask_start, "state"] = "start"

# Remove false starts:
mask = df["state"].isin(["start"])
df["time"] = df.index
events = df.loc[mask]



In [None]:
mask = events["time"].diff() < f'{min_time}S'
df.loc[events.loc[mask].index,'state'] = 'false start'
false_start = events.loc[mask].iloc[0]
t0 = false_start.name - pd.Timedelta(100,unit='s')
t1 = false_start.name + pd.Timedelta(300,unit='s')

df_ = df.loc[t0:t1]

with plt.style.context('paper'):
    fig,ax=plt.subplots()
    df_.plot(y='sog', label=r'$sog$', ax=ax)
    mask = (df_['state'] == 'start')
    df_.loc[mask].plot(y='sog', label=r'$t_{start}$', style='sg', ax=ax)
    mask = (df_['state'] == 'false start')
    df_.loc[mask].plot(y='sog', label=r'False $t_{start}$', style='sk', ax=ax)
    ax.set_ylabel('Ship speed over ground $sog$ [m/s]')
    ax.legend();
    glue('fig:false_start', fig, display=False)

A minimum trip time $t_{min}$ is introduced to exclude these false starts giving the following algorithm:


**Inputs** $sog$, $sog_{min}$, $t_{min}$

**Output** $t_{start}$, $t_{end}$

1. Identify possible trip starts: $t_{start}$: $(sog_i < sog_{min}) \wedge (sog_{i+i} >= sog_{min})$
2. Keep only valid trip starts: $t_{start,n+1} - t_{start,n} > t_{min}$
3. Identify trip ends: $t_{end}$: $(sog_{i-1} >= sog_{min}) \wedge (sog_{i} < sog_{min}) \wedge ((t_i - t_{i-1}) > t_{min} $

where $sog_{min}$={glue:}`min_start_speed` m/s and $t_{min}$={glue:}`min_time` s. The algorithm now disregards the initial reversing as seen in {numref}`fig:divide_trips`.  

```{glue:figure} fig:divide_trips
:name: "fig:divide_trips"

The data is divided into trips based on the speed over ground (sog) signal.
```

In [None]:
t2 = false_start.name + pd.Timedelta(4000,unit='s')

df_cut = df.loc[t0:t2]

with plt.style.context('paper'):
    
    fig,ax=plt.subplots()
    df_cut.plot(y='sog', label=r'$sog$', ax=ax)
    
    for state, df__ in df_cut.groupby(by='state'):
        if state == 'start':
            df__.plot(y='sog', style='sg', label=r'$t_{start}$', ax=ax)
        elif state == 'end':
            df__.plot(y='sog', style='^r', label=r'$t_{end}$', ax=ax)
            
    ax.legend();
    ax.set_ylabel('Ship speed over ground $sog$ [m/s]')
    glue('fig:divide_trips', fig, display=False)

In [None]:
glue('no_trips',len(trip_statistics))
glue('no_trips_clean',len(trip_statistics_clean))


## Data cleaning
Some of the trips are removed, for instance if there is missing data or the trip is too long, based on the following criteria:

* trips should have time gaps that are smaller than {glue:}`max_time_diff` s
* trips should have a duration between {glue:}`min_time` and {glue:}`max_time` s
* trips should be longer than {glue:}`min_distance` m

From {glue:}`no_trips` initial trips {glue:}`no_trips_clean` trips fulfills the above criteria.