In [None]:
from fbprophet import Prophet
import fbprophet.diagnostics as Prophet_Diagnostics

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
from datetime import datetime
import warnings
import scipy.stats as stats
warnings.filterwarnings('ignore')

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

# Reading data

In [None]:
def evo_cleaning(data):
    # only travels with more than 30 minutes of duration, that is the limit of cancellation of a reserve
    data = data.loc[(data['duration'] > 30) | (data['distance'] > 3)]

    data.Start_time = pd.to_datetime(data.Start_time)
    data.End_time = pd.to_datetime(data.End_time)

    # Colleting vehicle ids
    car_ids = list(data.Id.unique())

    # Removing uncommon ids
    # Ex: 4c5865a3-4b03-40f6-a3a8-d4e94aae3b17
    ids_uncommon = [id for id in car_ids if id.find('-') != -1]
    car_ids = [id for id in car_ids if id.find('-') == -1]

    data = data.loc[~data.Id.isin(ids_uncommon)]
    
    # Removing microseconds from the dates
    data.Start_time = data.Start_time.apply(lambda x: x.replace(microsecond=0))
    data.End_time = data.End_time.apply(lambda x: x.replace(microsecond=0))
    
    return data

In [None]:
# reading data
evo_data = pd.read_csv('data/evo_travels.csv')
modo_data = pd.read_csv('data/modo_travels.csv')
c2g_data = pd.read_csv('data/car2go_travels.csv', index_col=0)

In [None]:
#cleaning process
evo_data = evo_cleaning(evo_data)

modo_data.start = modo_data.start.apply(lambda x: datetime.fromtimestamp(x))
modo_data.end = modo_data.end.apply(lambda x: datetime.fromtimestamp(x))

c2g_data.init_time = c2g_data.init_time.apply(lambda x: datetime.fromtimestamp(x))
c2g_data.final_time = c2g_data.final_time.apply(lambda x: datetime.fromtimestamp(x))

In [None]:
init_period = '05-25'
end_period = '06-15'

evo_data = evo_data[(evo_data.Start_time >= '2018-'+init_period) & (evo_data.End_time <= '2018-'+end_period)]
modo_data = modo_data[(modo_data.start >= '2018-'+init_period) & (modo_data.end <= '2018-'+end_period)]
c2g_data = c2g_data[(c2g_data.init_time >= '2017-'+init_period) & (c2g_data.final_time <= '2017-'+end_period)]

In [None]:
def to_prophet_data(time_list):
    """
        Convert a time series list to the prophet data structure
    """
    values = time_list.value_counts(sort=False)
    prophet_data = pd.DataFrame(list(zip(values.index, values)), columns=['ds','y'])
    prophet_data.sort_values(by='ds',inplace=True)
    prophet_data.reset_index(inplace=True, drop=True)
    
    return prophet_data

In [None]:
#prophet data structure

evo_prophet = to_prophet_data(evo_data.Start_time)
modo_prophet = to_prophet_data(modo_data.start)
c2g_prophet = to_prophet_data(c2g_data.init_time)

In [None]:
evo_prophet.head()

## Train data used

In [None]:
def plot_time_points(data):
    fig, ax = plt.subplots(1,1)
    
    ax.scatter(data.ds, data.y, alpha=0.3)

    ax.set_xlim(data.ds.min(), data.ds.max())
    plt.xticks(rotation=45)
    
    plt.show()

### EVO

In [None]:
plot_time_points(evo_prophet)

### Modo

In [None]:
plot_time_points(modo_prophet)

### Car2Go

In [None]:
plot_time_points(c2g_prophet)

## Cleaning + Normalizing Datasets

In [None]:
evo_z_scores = stats.zscore(evo_prophet.y)
abs_evo_z_scores = np.abs(evo_z_scores)
filtered_evo_entries = (abs_evo_z_scores < 3)
evo_filtered_prophet = evo_prophet[filtered_evo_entries]
evo_filtered_prophet.y /= evo_filtered_prophet.y.max()
plot_time_points(evo_filtered_prophet)

In [None]:
modo_z_scores = stats.zscore(modo_prophet.y)
abs_modo_z_scores = np.abs(modo_z_scores)
filtered_modo_entries = (abs_modo_z_scores < 3)
modo_filtered_prophet = modo_prophet[filtered_modo_entries]
modo_filtered_prophet.y /= modo_filtered_prophet.y.max()
plot_time_points(modo_filtered_prophet)

In [None]:
c2g_z_scores = stats.zscore(c2g_prophet.y)
abs_c2g_z_scores = np.abs(c2g_z_scores)
filtered_c2g_entries = (abs_c2g_z_scores < 3)
c2g_filtered_prophet = c2g_prophet[filtered_c2g_entries]
c2g_filtered_prophet.y /= c2g_filtered_prophet.y.max()
plot_time_points(c2g_filtered_prophet)

## Prophet forecast

In [None]:
def prophet_predict(data, freq='min', periods=5):
    model = Prophet(weekly_seasonality=True)
    model.fit(data)
    
    future = model.make_future_dataframe(freq=freq, periods=periods)
    
    forecast = model.predict(future)
    
    return model, forecast

In [None]:
PREDICT_PERIOD = 60*24  # One day
evo_model, evo_forecast = prophet_predict(evo_prophet, periods=PREDICT_PERIOD)
print('EVO done.')
modo_model, modo_forecast = prophet_predict(modo_prophet, periods=PREDICT_PERIOD)
print('Modo done.')
c2g_model, c2g_forecast = prophet_predict(c2g_prophet, periods=PREDICT_PERIOD)
print('Car2Go done.')

In [None]:
evo_filtered_model, evo_filtered_forecast = prophet_predict(evo_filtered_prophet, periods=PREDICT_PERIOD)
print('EVO filtered done.')
modo_filtered_model, modo_filtered_forecast = prophet_predict(modo_filtered_prophet, periods=PREDICT_PERIOD)
print('Modo filtered done.')
c2g_filtered_model, c2g_filtered_forecast = prophet_predict(c2g_filtered_prophet, periods=PREDICT_PERIOD)
print('Car2Go filtered done.')

### EVO

In [None]:
fig1 = evo_model.plot(evo_forecast)

In [None]:
fig1 = evo_filtered_model.plot(evo_filtered_forecast)

In [None]:
fig1 = evo_model.plot_components(evo_forecast)

In [None]:
fig1 = evo_filtered_model.plot_components(evo_filtered_forecast)

In [None]:
cv_filtered_results_evo = Prophet_Diagnostics.cross_validation(evo_filtered_model,"24 hours")
Prophet_Diagnostics.performance_metrics(cv_filtered_results_evo,rolling_window=1)

### Modo

In [None]:
fig1 = modo_model.plot(modo_forecast)

In [None]:
fig1 = modo_filtered_model.plot(modo_filtered_forecast)

In [None]:
fig1 = modo_model.plot_components(modo_forecast)

In [None]:
fig1 = modo_filtered_model.plot_components(modo_filtered_forecast)

In [None]:
cv_filtered_results_modo = Prophet_Diagnostics.cross_validation(modo_filtered_model,"24 hours")
Prophet_Diagnostics.performance_metrics(cv_filtered_results_modo,rolling_window=1)

### Car2Go

In [None]:
fig1 = c2g_model.plot(c2g_forecast)

In [None]:
fig1 = c2g_filtered_model.plot(c2g_filtered_forecast)

In [None]:
fig1 = c2g_model.plot_components(c2g_forecast)

In [None]:
fig1 = c2g_filtered_model.plot_components(c2g_filtered_forecast)

In [None]:
cv_filtered_results_c2g = Prophet_Diagnostics.cross_validation(c2g_filtered_model,"24 hours")
Prophet_Diagnostics.performance_metrics(cv_filtered_results_c2g,rolling_window=1)