# Traffic Flow Predictions with LSTM model

## The goal is to predict traffic flow for multiple steps ahead for all the highways in Belgium

## General Import

In [None]:
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from math import sqrt

import gc
import time

from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras



In [None]:
import geojson
import geopandas as gpd
from fiona.crs import from_epsg
import os, json
from shapely.geometry import shape, Point, Polygon, MultiPoint
from geopandas.tools import sjoin
import matplotlib.cm as cm
import matplotlib.pyplot as plt # plotting
import seaborn as sns; sns.set()

from IPython.display import Image

import folium

from branca.colormap import  linear
import json
import branca.colormap as cm

### SEED

In [None]:
from numpy.random import seed

# Reproducability
def set_seed(seed=31415):
    
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    
set_seed(31415)

## Check files

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Visualize Streets Network

In [None]:
df_belgium = gpd.read_file('/kaggle/input/belgium-obu/Belgium_streets.json')

m = folium.Map([50.85045, 4.34878], zoom_start=9, tiles='cartodbpositron')
folium.GeoJson(df_belgium).add_to(m)
m

In [None]:
# BXL_timeseries_kaggle.csv may have more rows in reality, but we are only loading/previewing the first 1000 rows
new_table = pd.read_csv('../input/obu-data-preprocessing/Flow_BEL_street_30min.csv')
nRow, nCol = new_table.shape
print(f'There are {nRow} rows and {nCol} columns')

# SELECT STREETS BASED ON AVERAGE TRAFFIC FLOW

In [None]:
mean_value = 10

In [None]:
table_index = new_table.iloc[:,1:]
ALL_STREETS = list(table_index.columns.values)

mean_flow =[]
new_street=[]


for street in ALL_STREETS:
    single_street=table_index[street]
    mean = np.mean(single_street)
    mean_flow.append(mean)
    new_street.append(street)
    
    
df_mean_flow = pd.DataFrame({'street_index':new_street, 'mean_flow': mean_flow})
print('')
print(df_mean_flow.head())
print('')

STREETS = df_mean_flow[(df_mean_flow['mean_flow']>= mean_value)] 
STREETS = STREETS.sort_values(by=['street_index'])
STREETS = list(STREETS.street_index)

print('considering a average traffic flow of ' + str(mean_value)+' per street')
print('')
print('mean traffic flow '+str(mean_value)+ ' ---> number of street segments: ' + str(len(STREETS)))


# ADD Auxiliary Temporal Features

In [None]:
new_table['Datetime'] = pd.to_datetime(new_table['datetime'])

DATAFRAME = new_table
DATAFRAME = DATAFRAME.drop(['datetime'],axis=1) 
DATAFRAME = DATAFRAME[DATAFRAME.columns.intersection(STREETS)]

# Auxiliary

DATAFRAME['minutes'] = new_table['Datetime'].dt.minute
DATAFRAME['hour'] = new_table['Datetime'].dt.hour

DATAFRAME['hour_x']=np.sin(DATAFRAME.hour*(2.*np.pi/23))
DATAFRAME['hour_y']=np.cos(DATAFRAME.hour*(2.*np.pi/23))

DATAFRAME['day'] = new_table['Datetime'].dt.day

DATAFRAME['DayOfWeek'] = new_table['Datetime'].dt.dayofweek
DATAFRAME['WorkingDays'] = DATAFRAME['DayOfWeek'].apply(lambda y: 2 if y < 5 else y)
DATAFRAME['WorkingDays'] = DATAFRAME['WorkingDays'].apply(lambda y: 1 if y == 5 else y)
DATAFRAME['WorkingDays'] = DATAFRAME['WorkingDays'].apply(lambda y: 0 if y == 6 else y)

DATAFRAME = DATAFRAME.drop(['minutes','hour','day'],axis=1)

# temporal features = 4
feat_time = 4

DATAFRAME.head()



# Visualize Traffic Flow at particular time

In [None]:
STREETS = [int(float(s)) for s in STREETS]


df_belgium = df_belgium[df_belgium.index.isin(STREETS)]
df_belgium['Trucks_Flow'] =  DATAFRAME.iloc[2182,:-4].astype(float).values

nbh_count_colormap = linear.YlOrRd_09.scale(0,200)

colormap_dept = cm.StepColormap(
    colors=['#00ae53', '#86dc76', '#daf8aa',
            '#ffe6a4', '#ff9a61', '#ee0028'],
    vmin = 0,
    vmax = 200,
    index=[0, 20, 50, 80, 110, 150, 180])

polygons = df_belgium
m = folium.Map([50.85045, 4.34878], zoom_start= 9, tiles='cartodbpositron')

style_function = lambda x: {
    'fillColor': colormap_dept(x['properties']['Trucks_Flow']),
    'color': colormap_dept(x['properties']['Trucks_Flow']),
    'weight': 1.5,
    'fillOpacity': 1
}
folium.GeoJson(polygons,
    style_function=style_function).add_to(m)


colormap_dept.caption = 'Traffic Flow (N#Trucks/30min) at (not real) 12:00 a.m.'
colormap_dept.add_to(m)

m

# SPLITTING Training/Testing

In [None]:
test_step = 168*2*2 # 1 WEEK

# ATTENTION: anything you learn and is not known in advance, must be learnt only from training data!
scaler = MinMaxScaler(feature_range=(0, 1))
scaler_aux = MinMaxScaler(feature_range=(0, 1))

# TRAINING --- (scaler/scaler_aux).fit_transform()
# TESTING --- (scaler/scaler_aux).transform()

# TRAINING SET
TRAIN = DATAFRAME[: -test_step ]
train_feat = scaler.fit_transform(TRAIN.values[:,:-feat_time])


# TESTING SET
TEST = DATAFRAME[-test_step:]
test_feat = scaler.transform(TEST.values[:,:-feat_time])


# AUX are known in advance
AUX = scaler_aux.fit_transform(DATAFRAME.values[:,-feat_time:])
train_aux = AUX[: -test_step ]
test_aux = AUX[-test_step:]


# concate final results
train_feat = np.hstack([train_feat, train_aux])
test_feat = np.hstack([test_feat, test_aux])

In [None]:
def inverse_transform(forecasts, scaler):
    # invert scaling
    inv_pred = scaler.inverse_transform(forecasts)
    return inv_pred

In [None]:
nRow, nCol = DATAFRAME.shape

plt.figure(figsize=(20,10))
plt.plot(np.mean(TEST.iloc[:,:-feat_time],axis=1))
plt.title('TESTING SET')
plt.show()

print(f'Consider {nRow} instances (rows) and {nCol} streets segments (columns)')
print('')
print('TRAIN SIZE: '+ str(TRAIN.shape))
print('')
print('TEST SIZE: '+ str(TEST.shape))



# LSTM model - Multivariate Multiple-step ahead Prediction Model

## Autoregressive Approach

In [None]:
Image("/kaggle/input/image-lstm/Autoregressive.png")

## DATA PREPARATION
### * {BATCH_SIZE, INPUT_SEQUENCE (OUTPUT), FEATURES_SIZE}

In [None]:
Image("/kaggle/input/image-lstm/DATAPREP.png")

In [None]:
def prep_data(dataframe, INPUT, OUTPUT, AUX, BATCH):
    
    TOTAL = INPUT + OUTPUT
    
    dataset_feat = tf.data.Dataset.from_tensor_slices(dataframe)
    
    aux = tf.data.Dataset.from_tensor_slices(dataframe[:,-AUX:])
    
    dataset_labels = tf.data.Dataset.from_tensor_slices(dataframe)

    # features
    feat = dataset_feat.window(INPUT,  shift=1,  stride=1,  drop_remainder=True)
    feat = feat.flat_map(lambda window: window.batch(INPUT))
    
    # aux
    aux = aux.window(OUTPUT,  shift=1,  stride=1,  drop_remainder=True).skip(INPUT)
    aux = aux.flat_map(lambda window: window.batch(OUTPUT))
    
    # labels
    label = dataset_labels.window(OUTPUT, shift=1,  stride=1,  drop_remainder=True).skip(INPUT)
    label = label.flat_map(lambda window: window.batch(OUTPUT))
    
    dataset = tf.data.Dataset.zip(((feat, aux), label))
    
    dataset = dataset.batch(BATCH).prefetch(tf.data.experimental.AUTOTUNE)

    return dataset

## PARAMETERS

In [None]:
n_total_features = len(DATAFRAME.columns) 

size_input = 12
size_forecast = 12
size_total = size_input + size_forecast
size_aux = feat_time

batch_size = 256
batch_train = batch_size
batch_test = 1

windowed_train = prep_data(train_feat, size_input, size_forecast, size_aux, batch_train)
windowed_test = prep_data(test_feat, size_input, size_forecast, size_aux, batch_test)

latent_dim = 150


## LSTM Cell

In [None]:
Image("/kaggle/input/image-lstm/The-structure-of-the-LSTM-unit.png")

## LSTM Architecture

In [None]:
Image("/kaggle/input/image-lstm/ECDEC.jpg")


In [None]:
from tensorflow.keras import regularizers

class FeedBack_LSTM(tf.keras.Model):
    
    def __init__(self, units, sz_input, tot_feat):
        
        super(FeedBack_LSTM, self).__init__()
        self.tot_feat = tot_feat
        self.units = units
        self.inp = sz_input
        
        # encoder
        self.cell_encoder = tf.keras.layers.LSTMCell(self.units, kernel_initializer='glorot_uniform',
                                                     recurrent_initializer='glorot_uniform', 
                                                     kernel_regularizer=regularizers.l2(0.001),
                                                     bias_initializer='zeros') 
        
        self.encoder = tf.keras.layers.RNN(self.cell_encoder, return_state = True)
        
        # decoder
        self.cell_decoder = tf.keras.layers.LSTMCell(self.units,
                                                     kernel_initializer='glorot_uniform',
                                                     recurrent_initializer='glorot_uniform',
                                                     kernel_regularizer=regularizers.l2(0.001),
                                                     bias_initializer='zeros') 
        
        self.decoder = tf.keras.layers.RNN(self.cell_decoder, return_state = True)
        
        self.dense_0 = tf.keras.layers.Dense(150, activation='relu',
                                           kernel_regularizer=regularizers.l2(0.001))
    
                
        self.dense = tf.keras.layers.Dense(self.tot_feat,
                                           kernel_regularizer=regularizers.l2(0.001))
        
        
        
    def warmup(self, inp_encoder):
        
        out_encoder, *state = self.encoder(inp_encoder)
        
        return out_encoder, state
    
    
    
    def call(self, inputs_enc, inputs_dec):
        
        # Use a TensorArray to capture dynamically unrolled outputs.
        predictions = []

        # Initialize the lstm state
        context_vector, state_enc = self.warmup(inputs_enc)
        
        inputs = tf.dtypes.cast(inputs_dec[:, 0, :], tf.float32)
        
#         print(context_vector.shape)
#         print(inputs.shape)
        
        x = tf.concat([inputs, context_vector], -1)
        
        out_dec, state = self.cell_decoder(x, states = state_enc, training=True)
            
        dense_0 = self.dense_0(out_dec)
        
        prediction = self.dense(dense_0)
        
        # Insert the first prediction
        predictions.append(prediction)
        
        # Run the rest of the prediction steps
        for n in range(1, self.inp):

            inputs = tf.dtypes.cast(inputs_dec[:, n, :], tf.float32)
            
#             print(out_dec.shape)
#             print(inputs.shape)
            
            x = tf.concat([inputs, out_dec], -1)
            
            # Execute one lstm step.
            out_dec, state = self.cell_decoder(x, states=state, training=True)
            
            dense_0 = self.dense_0(out_dec)
        
            prediction = self.dense(dense_0)
        
            # Add the prediction to the output
            predictions.append(prediction)

        # predictions.shape => (time, batch, features)
        predictions = tf.stack(predictions)

        # predictions.shape => (batch, time, features)
        predictions = tf.transpose(predictions, [1, 0, 2])
    
        return predictions
    
    
    def inference(self, inputs_enc, inputs_dec):
        
        # Use a TensorArray to capture dynamically unrolled outputs.
        predictions = []

        # Initialize the lstm state
        context_vector, state_enc = self.warmup(inputs_enc)
        
        inputs = tf.dtypes.cast(inputs_dec[:, 0, :], tf.float32)
        
#         print(context_vector.shape)
#         print(inputs.shape)
        
        x = tf.concat([inputs, context_vector], -1)
        
        out_dec, state = self.cell_decoder(x, states = state_enc, training=False)
            
        dense_0 = self.dense_0(out_dec)
        
        prediction = self.dense(dense_0)
        
        # Insert the first prediction
        predictions.append(prediction)
        
        # Run the rest of the prediction steps
        for n in range(1, self.inp):

            inputs = tf.dtypes.cast(inputs_dec[:, n, :], tf.float32)
            
#             print(out_dec.shape)
#             print(inputs.shape)
            
            x = tf.concat([inputs, out_dec], -1)
            
            # Execute one lstm step.
            out_dec, state = self.cell_decoder(x, states=state, training=True)
            
            dense_0 = self.dense_0(out_dec)
        
            prediction = self.dense(dense_0)
        
            # Add the prediction to the output
            predictions.append(prediction)

        # predictions.shape => (time, batch, features)
        predictions = tf.stack(predictions)

        # predictions.shape => (batch, time, features)
        predictions = tf.transpose(predictions, [1, 0, 2])
    
        return predictions
    
    

    
    


### Define LSTM for training

the batch size for training the lstm model is 32. (different from testing as we will see below)

In [None]:
# the input for encoder
encoder_inputs = tf.keras.Input(shape=(size_input, n_total_features), name = 'enc_inputs')
encoder_inputs

In [None]:
# the input for decoder
decoder_inputs = tf.keras.Input(shape=(size_input, size_aux), name='aux_inputs')
decoder_inputs

In [None]:
FEEDBACK_lstm = FeedBack_LSTM(latent_dim, size_input, n_total_features)
FEEDBACK_lstm(encoder_inputs, decoder_inputs)

# TRAIN MODEL

### optimizer

In [None]:
optimizer = tf.keras.optimizers.Adam(lr=0.0001)

# comment for now
# checkpoint_dir = './training_checkpoints'
# checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
# checkpoint = tf.train.Checkpoint(optimizer = optimizer, lstm = lstm )

### loss function

In [None]:
loss_object = tf.keras.losses.MeanAbsoluteError()

def loss_function(real, pred):
    
    loss_ = loss_object(real, pred)

    return tf.reduce_mean(loss_)

### - *@tf.function decorator* - to speed-up training

In [None]:

@tf.function
def train_step(inp_enc, inp_dec, targ):

    loss = 0

    with tf.GradientTape() as tape:
        
        predictions = FEEDBACK_lstm(inp_enc, inp_dec)

        loss += loss_function(targ, predictions)
   
    batch_loss = loss 
    
    variables = FEEDBACK_lstm.trainable_variables 

    gradients = tape.gradient(loss, variables)

    optimizer.apply_gradients(zip(gradients, variables))
    
    return batch_loss

In [None]:
EPOCHS = 200

steps_per_epoch = len(TRAIN) // batch_size

# Keep results for plotting
train_loss_results = []
train_rmse_accuracy_results = []

print('')
print('TRAINING')
print('')

for epoch in range(EPOCHS):
    
    start = time.time()
    
    epoch_loss_avg = tf.keras.metrics.Mean()

    total_loss = 0

    for (batch, (inp, targ)) in enumerate(windowed_train.take(steps_per_epoch)):
        
        inp_enc = inp[0] 
        inp_dec = inp[1]

        batch_loss = train_step(inp_enc, inp_dec, targ)

        # Track progress
        epoch_loss_avg.update_state(batch_loss)  # Add current batch loss
        
    # End epoch
    train_loss_results.append(epoch_loss_avg.result())
    
    

    if epoch % 10 == 0:
        print("Epoch {}: Loss MAE: {:.3f}".format(epoch, epoch_loss_avg.result()))

          
          
print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

# Plot Training Progress

In [None]:
fig, axes = plt.subplots(1, sharex=True, figsize=(12, 8))
fig.suptitle('Training Metrics')

axes.set_ylabel("Loss (MAE)", fontsize=14)
axes.plot(train_loss_results)
axes.set_xlabel("Epoch", fontsize=14)
plt.show()

# TEST and UPDATE MODEL

### Define LSTM for prediction

we define the same lstm model for prediction: the only difference here is the size, batch_test = 1

In [None]:
# input for LSTM
inputs_test = tf.keras.Input(shape=(size_input, n_total_features), name='inputs')
print(' INPUT SHAPE for LSTM: { batch size, input sequence, features size}')
inputs_test


In [None]:
def evaluate_forecasts(targets, forecasts, n_seq):
    
    list_rmse = []
    list_mae = []
    
    for i in range(n_seq):
        true = np.vstack([target[i] for target in targets])
        predicted = np.vstack([forecast[i] for forecast in forecasts])
        
        rmse = np.sqrt((np.square(true - predicted)).mean(axis=0))
        mae = np.absolute(true - predicted).mean(axis=0)
        
        list_rmse.append(rmse)
        list_mae.append(mae)
        
    list_rmse = np.vstack(list_rmse)
    list_mae = np.vstack(list_mae)
    
    return list_rmse, list_mae

In [None]:
forecasts = []
targets = []

rmse_list = []
mae_list = []

    
for (step, (inp, targ)) in enumerate(windowed_test):
    
        inp_enc = inp[0]
        inp_dec = inp[1]

        pred  = FEEDBACK_lstm.inference(inp_enc, inp_dec)
        
        truth = inverse_transform(targ[0][:,:-feat_time],  scaler)
        pred = inverse_transform(pred[0][:,:-feat_time],  scaler)
        
        forecasts.append(pred)
        targets.append(truth)
        
        rmse, mae = evaluate_forecasts(targets, forecasts, 12)
           
        rmse_list.append(rmse)
        mae_list.append(mae)
           
        plt.plot(np.sum(pred, axis=1), label='Prediction') 
        plt.plot(np.sum(truth, axis=1), label='Truth') 
#         plt.ylim(-1, 150)
        plt.title('Average Prediction on all highways in Belgium')
        plt.legend()
        plt.show()
        
        print('* Time step '+str(step))
        print('* Prediction Accuracy (MAE) '+ str(np.absolute(truth - pred).mean()))
        print('----')
        print('* After prediction UPDATE model with new streets observations')
        
        new_instance = test_feat[step,:].reshape(1,-1)
    
        train_feat = np.vstack([train_feat, new_instance])
    
        windowed_new = prep_data(train_feat, size_input, size_forecast, size_aux, batch_train)

        update_steps_per_epoch = len(train_feat)//batch_train
        
        UPDATE = 2
        
        for epoch in range(UPDATE):
            
            # resetting the hidden state at the start of every epoch if state_train = True
#             lstm.reset_states()
            
            for (batch, (inp_new, targ_new)) in enumerate(windowed_new.take(update_steps_per_epoch)):
            
                inp_enc = inp_new[0] 
                inp_dec = inp_new[1]

                batch_loss = train_step(inp_enc, inp_dec, targ_new)

                # Track progress
                epoch_loss_avg.update_state(batch_loss)  # Add current batch loss
                
            # End epoch
            train_loss_results.append(epoch_loss_avg.result())


            if epoch % UPDATE == 0:
                print("UPDATE - Epoch {}: Loss MAE: {:.3f}".format(epoch, epoch_loss_avg.result()))



In [None]:
RMSE_MEAN = np.mean(rmse_list,axis=0).mean(axis=1)
RMSE_STD =  np.std(rmse_list,axis=0).std(axis=1)

for i in range(len(RMSE_MEAN)):
    print('t+'+str(i+1)+' RMSE MEAN ' +str(np.round(RMSE_MEAN[i],3))+' +- '+str(np.round(RMSE_STD[i],3)))
    print('')

In [None]:
MAE_MEAN = np.mean(mae_list,axis=0).mean(axis=1)
MAE_STD =  np.std(mae_list,axis=0).std(axis=1)

for i in range(len(MAE_MEAN)):
    print('t+'+str(i+1)+' MAE MEAN ' +str(np.round(MAE_MEAN[i],3))+' +- '+str(np.round(MAE_STD[i],3)))
    print('')

In [None]:
import pickle

# Saving the objects:
with open('save_predictions_results.pkl', 'wb') as f: 
    pickle.dump([rmse_list, mae_list], f)