# Create trip statistics

# Purpose
Before looking at the dynamics of the ferries from the time series it is a good idea to first look at some longer term trends. Statistics for each trip will be generated and saved as a first data reduction, to spot trends over the day/week/month and year.

# Methodology
* Trip statistics will be generated for each trip containing:
   * for all columns: min/mean/max/median/std
   * energy consumption for all thrusters
* The statistics will be stored into a [xarray](http://xarray.pydata.org/en/stable/)

# Setup

In [None]:
# %load imports.py
#%load imports.py
%matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,3)

#import seaborn as sns
import os
from collections import OrderedDict

from IPython.display import display

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)

import folium
import plotly.express as px
import plotly.graph_objects as go

import sys
import os
sys.path.append('../')
from src.visualization import visualize
from src.data import get_dataset
from src.data import trips
import scipy.integrate
import seaborn as sns
import xarray as xr


In [None]:
df = get_dataset.get()
deltas = ['delta_%i' % i for i in range(1,5)]
df.drop(columns=deltas, inplace=True)
df['trip_time'] = pd.TimedeltaIndex(df['trip_time']).total_seconds()

In [None]:
df.head()

In [None]:
trip = df.groupby(by='trip_no').get_group(11)

In [None]:
visualize.plot_map(trip)

In [None]:
def integrate_time(trip):
    trip_ = trip.copy()
    t = pd.TimedeltaIndex(trip_['trip_time'],unit='s').total_seconds()
    trip_.drop(columns=['trip_time'], inplace=True)
    integral_trip = scipy.integrate.simps(y=trip_.T,x=t)
    s = pd.Series(data=integral_trip, name='integral', index=trip_.columns)
    
    return s

In [None]:
integrate_time(trip)

In [None]:
t = pd.TimedeltaIndex(trip_['trip_time'], unit='s').total_seconds()
scipy.integrate.simps(y=trip['power_em_thruster_total'],x=t)

In [None]:
def trip_statistic(trip):
    
    stats = trip.describe()  # General statistics
    integral_trip = integrate_time(trip)
    stats = stats.append(integral_trip)
    return stats

In [None]:

ds_stats = None
for trip_no, trip in df.groupby(by='trip_no'):
    

    trip_ = trip.copy()
    trip_.drop(columns=['trip_no'], inplace=True)
    
    stats = trip_statistic(trip_)
    stats.index.name = 'statistic'
    
    ds = xr.Dataset.from_dataframe(stats)
    ds = ds.expand_dims('trip_no')
    ds = ds.assign_coords(trip_no=np.array([trip_no],dtype=np.int64))
    
    if ds_stats is None:
        ds_stats = ds
    else:
        ds_stats = xr.concat([ds_stats,ds], dim="trip_no")


In [None]:
ds_stats

In [None]:
ds_stats.coords['statistic']

In [None]:
ds

In [None]:
ds_stats.sel(trip_no=17, statistic='mean')

In [None]:
ds_stats.sel(statistic='mean').plot.scatter(x="sog",y="trip_time")

In [None]:
ds_stats.sel(statistic='max').plot.scatter(x="sog",y="power_propulsion_total")

In [None]:
ds_stats.sel(statistic=['min','mean','max']).plot.scatter(x="sog",y="power_propulsion_total", hue='statistic');


In [None]:
xr.plot.hist(ds_stats.sel(statistic='mean')['sog'], bins=20);

In [None]:
xr.plot.hist(ds_stats.sel(statistic='integral')["power_propulsion_total"], bins=20);

In [None]:
ds_stats.sel(statistic='integral').plot.scatter(x="sog",y="power_propulsion_total")