# LSTM Model
Reservations are aggregated by grid cells (1000m x 1000m).

Inspired by: https://github.com/HAKO411/Deep-Learning-for-Short-term-bike-sharing-demand-prediction

In [None]:
# imports
import pandas as pd

import tensorflow as tf

## Data Preparation
Preparing data to train the cnn-lstm model

In [None]:
# variables
training_data_filepath = '../../pickles/reservations_training.pickle'
validation_data_filepath = '../../pickles/reservations_validation.pickle'

results_filepath = '../../pickles/results/lstm_results.pickle'

In [None]:
# training variables
num_units = 50
batch_size = 32
epochs = 20
learning_rate = 0.001
loss_function = 'mean_squared_error'
seed = 42
dropout = 0.1

num_input_features = 3

In [None]:
# load data
input_training_data = pd.read_pickle(training_data_filepath)
input_training_data.set_index('startTime', inplace=True)
input_training_data.index = pd.to_datetime(input_training_data.index)
input_training_data.head()

In [None]:
# load data
input_validation_data = pd.read_pickle(validation_data_filepath)
input_validation_data.set_index('startTime', inplace=True)
input_validation_data.index = pd.to_datetime(input_validation_data.index)
input_validation_data.head()

In [None]:
# transform data
training_data = input_training_data.resample('h').large_grid_id.value_counts().unstack().fillna(0)
training_data.sort_index(inplace=True, axis=1)
training_data.head()

In [None]:
# decompose timestamp
training_data['hour'] = training_data.index.hour
training_data['day'] = training_data.index.dayofweek
training_data['month'] = training_data.index.month
training_data.columns = training_data.columns.astype(str)
training_data = training_data.reindex(sorted(training_data.columns, reverse=True), axis=1)
training_data.head()

In [None]:
# transform data
validation_data = input_validation_data.resample('h').large_grid_id.value_counts().unstack().fillna(0)
validation_data.sort_index(inplace=True, axis=1)
validation_data.head()

In [None]:
# decompose timestamp
validation_data['hour'] = validation_data.index.hour
validation_data['day'] = validation_data.index.dayofweek
validation_data['month'] = validation_data.index.month
validation_data.columns = validation_data.columns.astype(str)
validation_data = validation_data.reindex(sorted(validation_data.columns, reverse=True), axis=1)
validation_data.head()

In [None]:
# index is timestamp (hourly)
# columns are the grid cells
# values are the demand e.g. pickup in the timeframe
full_demand = training_data
full_demand.head()

In [None]:
# get values, grid-cell name and drop null values
def get_value_name(all_cells_demand, index):
    station_value = all_cells_demand[['month', 'day', 'hour', all_cells_demand.columns[index]]]
    station_name = all_cells_demand.columns[index]
    return station_value, station_name

In [None]:
def prepare_data_for_model(demand_at_cell):
    x = demand_at_cell.iloc[:, 0:num_input_features].values
    y = demand_at_cell.iloc[:, num_input_features:].values

    return x.reshape((x.shape[0], 1, x.shape[1])), y

In [None]:
def lstm_model(name, units, training_x, validation_x, training_y, validation_y):
    model = tf.keras.models.Sequential(name=name)

    model.add(tf.keras.layers.Input(shape=(1, num_input_features)))

    model.add(tf.keras.layers.LSTM(units, return_sequences=True))
    model.add(tf.keras.layers.Dropout(dropout, seed=seed))

    model.add(tf.keras.layers.LSTM(units, return_sequences=True))
    model.add(tf.keras.layers.Dropout(dropout, seed=seed))

    model.add(tf.keras.layers.LSTM(units, return_sequences=True))
    model.add(tf.keras.layers.Dropout(dropout, seed=seed))

    model.add(tf.keras.layers.LSTM(units, return_sequences=True))
    model.add(tf.keras.layers.Dropout(dropout, seed=seed))

    model.add(tf.keras.layers.LSTM(units, return_sequences=False))
    model.add(tf.keras.layers.Dropout(dropout, seed=seed))

    model.add(tf.keras.layers.Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss=loss_function)

    model.fit(training_x, training_y, batch_size=batch_size, epochs=epochs, validation_data=(validation_x, validation_y), verbose=0)
    return model

In [None]:
# Input data
all_grid_cells_temp = full_demand
val_grid_cells_temp = validation_data.copy()

# run the model
grid_cells = list()
models = list()

# loop through all the grid cells
for i in range(len(all_grid_cells_temp.columns) - num_input_features):
    # preprocessing
    grid_values, grid_name = get_value_name(all_grid_cells_temp, i + num_input_features)
    val_grid_values, val_grid_name = get_value_name(val_grid_cells_temp, i + num_input_features)

    train_x, train_y = prepare_data_for_model(grid_values)
    val_x, val_y = prepare_data_for_model(val_grid_values)
    
    model_name = 'LSTM_GridCell_' + str(grid_name)
    print('Training model ' + model_name)
    
    # LSTM modelling & forecast
    current_model = lstm_model(model_name, num_units, train_x, val_x, train_y, val_y)

    #Save result
    grid_cells.append(grid_name)
    models.append(current_model)

    #current_model.summary()


results = pd.DataFrame({'grid_cell': grid_cells, 'model': models}, columns=['grid_cell', 'model'])

results.head()

In [None]:
# save results
results.to_pickle(results_filepath)