In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False
tf.random.set_seed(13)

## Reading data

In [None]:
# reading data
evo_data = pd.read_csv('data/demand_datasets/evo_demand.csv', index_col=0)
modo_data = pd.read_csv('data/demand_datasets/modo_demand.csv', index_col=0)
c2g_data = pd.read_csv('data/demand_datasets/c2g_demand.csv', index_col=0)

In [None]:
evo_data.columns

In [None]:
evo_data.drop(columns = ['hour_0', 'hour_1', 'hour_2', 'hour_3',
       'hour_4', 'hour_5', 'hour_6', 'hour_7', 'hour_8', 'hour_9', 'hour_10',
       'hour_11', 'hour_12', 'hour_13', 'hour_14', 'hour_15', 'hour_16',
       'hour_17', 'hour_18', 'hour_19', 'hour_20', 'hour_21', 'hour_22',
       'hour_23'])
modo_data.drop(columns = ['hour_0', 'hour_1', 'hour_2', 'hour_3',
       'hour_4', 'hour_5', 'hour_6', 'hour_7', 'hour_8', 'hour_9', 'hour_10',
       'hour_11', 'hour_12', 'hour_13', 'hour_14', 'hour_15', 'hour_16',
       'hour_17', 'hour_18', 'hour_19', 'hour_20', 'hour_21', 'hour_22',
       'hour_23'])
c2g_data.drop(columns = ['hour_0', 'hour_1', 'hour_2', 'hour_3',
       'hour_4', 'hour_5', 'hour_6', 'hour_7', 'hour_8', 'hour_9', 'hour_10',
       'hour_11', 'hour_12', 'hour_13', 'hour_14', 'hour_15', 'hour_16',
       'hour_17', 'hour_18', 'hour_19', 'hour_20', 'hour_21', 'hour_22',
       'hour_23'])

In [None]:
init_period = '06-23'
end_period = '07-12'

evo_data = evo_data[(evo_data.index >= '2018-'+init_period) & (evo_data.index <= '2018-'+end_period)]
modo_data = modo_data[(modo_data.index >= '2018-'+init_period) & (modo_data.index <= '2018-'+end_period)]
c2g_data = c2g_data[(c2g_data.index >= '2017-'+init_period) & (c2g_data.index <= '2017-'+end_period)]

In [None]:
def train_size(data, size=0.7):
    # 70% of the data to use as train set
    train_split = int(len(data) * size)
    return train_split

In [None]:
def norm_data(features, train_split):
    dataset = features.values
    data_max = dataset[:train_split].max()
    data_min = dataset[:train_split].min()
    data_std = dataset[:train_split].std(axis=0)

    dataset = (dataset-data_min)/(data_max - data_min)
    
    return dataset

In [None]:
modo_norm = norm_data(modo_data, train_size(modo_data))
evo_norm = norm_data(evo_data, train_size(evo_data))
c2g_norm = norm_data(c2g_data, train_size(c2g_data))

## LSTM Data preparation

For this analysis the aim is to predict the number of travels using a multivariate LSTM model

In [None]:
def multivariate_data(dataset, target, start_index, end_index, history_size,
                      target_size, step, single_step=False):
    """
    Reshape the data to usual representation of train and target sets
    
    single_step - In a single step setup (True), the model learns to predict a single point in the future 
                  based on some history provided. Else (False), the model needs to learn to predict a range 
                  of future values.
    
    target_size - Is how far in the future does the model need to learn to predict.
                  The target_size is from the label that needs to be predicted
    """
    data = []
    labels = []

    start_index = start_index + history_size # determining the real start since it has a history size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index): # creating the data 'chuncks' with size of history size
        indices = range(i-history_size, i, step)
        data.append(dataset[indices])

        if single_step: # selecting the point or interval to be predicted
            labels.append(target[i+target_size])
        else:
            labels.append(target[i:i+target_size])

    return np.array(data), np.array(labels)

In [None]:
def train_val_split(data, train_split, history_length=72, future_target=1, 
                    step=1, single_step=False, batch_size=256, buffer_size=10000):

    # splitting the train and evaluate sets
    x_train, y_train = multivariate_data(data, data[:, 0], 0, train_split, history_length, future_target, step, single_step)
    x_val, y_val = multivariate_data(data, data[:, 0], train_split, None, history_length, future_target, step, single_step)

    print ('\nSingle window of past history : {}'.format(x_train[0].shape))
    print ('Target feature to predict : {}'.format(y_train[0].shape))
    
    # slice and shuffle the train and evaluate sets based on the batch size
    train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train = train.cache().shuffle(buffer_size).batch(batch_size).repeat()

    val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
    val = val.batch(batch_size).repeat()
    
    shape = x_train.shape[-2:]
    
    return train, val, shape

In [None]:
def lstm_model(train_data, validation_data, shape, epochs=10, evaluation_interval=200, node_number=50):

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.LSTM(node_number, return_sequences=True,
                                  input_shape=shape))
    model.add(tf.keras.layers.Dropout(0.2))
    model.add(tf.keras.layers.LSTM(node_number, activation='relu'))
    model.add(tf.keras.layers.Dense(12))
    
    model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0), 
                  loss='mae', metrics=['mse','mae'])
    
    for x, y in validation_data.take(1):
        print(model.predict(x).shape)
    
    history = model.fit(train_data, epochs=epochs,
                      steps_per_epoch=evaluation_interval,
                      validation_data=validation_data,
                      validation_steps=50)
    
    return model, history

In [None]:
# split data on train and validate sets
evo_train, evo_val, evo_shape = train_val_split(data=evo_norm, train_split=train_size(evo_norm), history_length=24, future_target=12)
modo_train, modo_val, modo_shape = train_val_split(data=modo_norm, train_split=train_size(modo_norm), history_length=24, future_target=12)
c2g_train, c2g_val, c2g_shape = train_val_split(data=c2g_norm, train_split=train_size(c2g_norm), history_length=24, future_target=12)

In [None]:
print([0, 1, 2, 3, 4][:4])

# Plot Generation

In [None]:
def performance_representation_plot(hour_skip, data_val, data_model, data_regular_max, data_regular_min, title,
                                    title_in_plot=True, save_file=False, print_plot=True, upper_border=True, legend_font_size="medium"):
    data_predictions_array = []
    data_truth_array =  []
    
    
    for x, y in data_val.take(1):
        result = data_model.predict(x)
        blocks = len(result)//hour_skip
        for n in range(blocks):
            for prediction, truth in zip(result[n*hour_skip][:hour_skip], y[n*hour_skip][:hour_skip]):
                data_predictions_array.append(prediction)
                data_truth_array.append(float(truth))

    data_predictions_array = np.array(data_predictions_array)
    data_truth_array = np.array(data_truth_array)
    data_predictions_array *= data_regular_max
    data_predictions_array += data_regular_min
    data_truth_array *= data_regular_max
    data_truth_array += data_regular_min
    
    fig = plt.figure(figsize=(20,10))
    plt.xlabel("Hours")
    plt.title(title + " (Hours Skiped = " + str(hour_skip) + ")") if title_in_plot else print(title + " (Hours Skiped = " + str(hour_skip) + ")")
    plt.ylabel("Travels Requested Per Hour")
    plt.plot(data_truth_array, marker="o", label="True Values")
    plt.plot(data_predictions_array, marker="D", label="Predicted Values")
    if(not upper_border):
        ax = plt.gca()
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
    plt.legend(fontsize=legend_font_size)
    plt.savefig("plots\\" + title.replace(" ", "_") + "_" + str(hour_skip) + "_HrsBetweenPredictions.png", bbox_inches='tight') if save_file else print()
    plt.show() if print_plot else print()

# Training Models

In [None]:
def multi_step_plot(history, true_future, prediction):
    plt.figure(figsize=(12, 6))
    num_in = range(len(history))
    num_out = len(true_future)

    plt.plot(num_in, np.array(history[:, 0]), label='History')
    plt.plot(np.arange(len(history),num_out+len(history)), np.array(true_future), 'bo',
           label='True Future')
    if prediction.any():
        plt.plot(np.arange(len(history),num_out+len(history)), np.array(prediction), 'rX-',
                 label='Predicted Future')
    plt.legend(loc='upper left')
    plt.show()

In [None]:
def plot_train_history(history, title, save_file=False):
    pd.DataFrame(history.history).plot(figsize=(8, 5))
    plt.grid(True)
    plt.savefig("plots\\" + title.replace(" ", "_") + ".png", bbox_inches='tight') if save_file else print()
    plt.show()

Next, will be generated the model for each dataset

## Evo Model

In [None]:
print('Evo Model')
evo_model, evo_history = lstm_model(evo_train, evo_val, evo_shape, epochs=30)

### Plotting prediction

In [None]:
plot_train_history(evo_history,
                   'Training and validation loss')

In [None]:
for x, y in evo_val.take(1):
    multi_step_plot(x[0], y[0], evo_model.predict(x)[0])

In [None]:
performance_representation_plot(12 ,evo_val, evo_model, evo_data.travels.max(), evo_data.travels.min(), "Evo Performance",
                                upper_border=False, title_in_plot=False, legend_font_size="x-large", save_file=True)

In [None]:
_ = """
histories = []

for n_epochs in range(20, 51, 10):
    evo_model, evo_history = lstm_model(evo_train, evo_val, evo_shape, epochs=n_epochs)
    histories.append(evo_history)
    performance_representation_plot(12 ,evo_val, evo_model, evo_data.travels.max(), evo_data.travels.min(),
                                    "Epoch_Tests\\Evo\\Evo Performance " + str(n_epochs) + "epochs",
                                    upper_border=False, title_in_plot=False, legend_font_size="x-large", save_file=True, print_plot=False)

for history , n_epochs in zip(histories, list(range(20, 51, 10))):
    plot_train_history(history, 'Epoch_Tests\\Evo\\Training and validation loss Evo - (' + str(n_epochs) + " Epochs )", save_file=True)
"""

## Modo model

In [None]:
print('Modo Model')
modo_model, modo_history = lstm_model(modo_train, modo_val, modo_shape, epochs=30)

In [None]:
plot_train_history(modo_history,
                   'Training and validation loss')

In [None]:
for x, y in modo_val.take(1):
    multi_step_plot(x[0], y[0], modo_model.predict(x)[0])

In [None]:
performance_representation_plot(12, modo_val, modo_model, modo_data.travels.max(), modo_data.travels.min(), "Modo Performance",
                                upper_border=False, title_in_plot=False, legend_font_size="x-large", save_file=True)

In [None]:
_ = """histories = []

for n_epochs in range(20, 51, 10):
    modo_model, modo_history = lstm_model(modo_train, modo_val, modo_shape, epochs=n_epochs)
    histories.append(modo_history)
    performance_representation_plot(12 ,modo_val, modo_model, modo_data.travels.max(), modo_data.travels.min(),
                                    "Epoch_Tests\\Modo\\Modo Performance " + str(n_epochs) + "epochs",
                                    upper_border=False, title_in_plot=False, legend_font_size="x-large", save_file=True, print_plot=False)

for history , n_epochs in zip(histories, list(range(20, 51, 10))):
    plot_train_history(history, 'Epoch_Tests\\Modo\\Training and validation loss modo - (' + str(n_epochs) + " Epochs )", save_file=True)
    """

## Car2Go Model

In [None]:
print('Car2Go Model')
c2g_model, c2g_history = lstm_model(c2g_train, c2g_val, c2g_shape, epochs=30)

In [None]:
plot_train_history(c2g_history,
                   'Training and validation loss')

In [None]:
for x, y in c2g_val.take(1):
    multi_step_plot(x[0], y[0],c2g_model.predict(x)[0])

In [None]:
performance_representation_plot(12, c2g_val, c2g_model, c2g_data.travels.max(), c2g_data.travels.min(), "Car2Go Performance",
                                upper_border=False, title_in_plot=False, legend_font_size="x-large", save_file=True)

In [None]:
_ = """histories = []

for n_epochs in range(20, 51, 10):
    c2g_model, c2g_history = lstm_model(c2g_train, c2g_val, c2g_shape, epochs=n_epochs)
    histories.append(c2g_history)
    performance_representation_plot(12 ,c2g_val, c2g_model, c2g_data.travels.max(), c2g_data.travels.min(),
                                    "Epoch_Tests\\Car2Go\\c2g Performance " + str(n_epochs) + "epochs",
                                    upper_border=False, title_in_plot=False, legend_font_size="x-large", save_file=True, print_plot=False)

for history , n_epochs in zip(histories, list(range(20, 51, 10))):
    plot_train_history(history, 'Epoch_Tests\\Car2Go\\Training and validation loss modo - (' + str(n_epochs) + " Epochs )", save_file=True)
    """

performance_representation_plotrences

* https://www.tensorflow.org/tutorials/structured_data/time_series#part_2_forecast_a_multivariate_time_series
* http://netlab.ice.ufjf.br/index.php/carsharingdata/