# Data extraction of trips

# Purpose
As a first step the time series data will be divided into trips, as a data reduction. Energy consumption can be calculated for each trip together with other aggregated quantities such as mean values, standard deviations etc. This will be used to analyze how much trips differ from each other over the year.

# Methodology
* Find a good logical condition to distinquish between the various trips from the time series.

# Setup

In [None]:
#%load imports.py
%matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import seaborn as sns
import os
from collections import OrderedDict

from IPython.display import display

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)

import folium
import plotly.express as px
import plotly.graph_objects as go

import sys
import os
sys.path.append('../')
from src.visualization import visualize
from src.data.get_dataset import get_dataset



In [None]:
df = get_dataset(name='tycho_short', n_rows=200000)

## Plot maps

In [None]:
visualize.plot_map(df=df, width=1000, height=600, zoom_start=14)

## Identify trips

In [None]:
#df_ = df.resample('20S').mean()
#df_.dropna(inplace=True)
#df_ = df_.iloc[0:1000]
df_ = df.iloc[0:5000]
df_.sort_index(inplace=True)
#mask = df_['sog'] > 0.5
#df_ = df_.loc[mask]

mask = df_.index.to_series().diff() > '0 days 00:00:20'
mask = np.roll(mask,-1)
mask[-1] = False

df_ends = df_.loc[mask].copy()

In [None]:
fig1 = px.line(df_, y='sog', template="plotly_dark", width=1500, height=400,)

fig2 = px.scatter(df_ends, y='sog', template="plotly_dark",  width=1500, height=400)

fig3 = go.Figure(data=fig1.data + fig2.data)

fig3.show()

In [None]:
df_ends['trip_no'] = np.arange(len(df_ends),dtype=int)

time_old = df_.index[0]
for time, end in df_ends.iterrows():
    
    mask = ((time_old < df_.index) & 
            (df_.index <= time)
           )
    time_old = time
    
    df_.loc[mask,'trip_no'] = end['trip_no']

In [None]:
df_2 = df_.dropna(subset=['trip_no'])

fig = px.line(df_2, y='sog', template="plotly_dark", color='trip_no', width=1500, height=400,)
fig.show()

In [None]:
for trip_no, trip in df_.groupby(by='trip_no'):
    
    trip_time = trip.index - trip.index[0]
    df_.loc[trip.index,'trip_time'] = trip_time
    

In [None]:
df_2 = df_.dropna(subset=['trip_no'])

fig = px.line(df_2, x='trip_time', y='sog', template="plotly_dark", color='trip_no', width=1500, height=400,)
fig.show()