# Seasonal Persistence Model Predictions

![ ](https://i.imgur.com/jSMD7Gj.gif)

In [1]:
# from PIL import Image
# import imageio

# list_images = []

# for i in range(649):
#     image = Image.open('../input/seasonal-persistence-model/'+str(i)+'_plot.png')
#     list_images.append(image)

# list_images[0]

# imageio.mimwrite('baseline_predictions.gif', list_images, fps=3)

## General Import

In [1]:
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from math import sqrt

import gc
import time
import seaborn as sns; sns.set()


In [1]:
import geojson
import geopandas as gpd
from fiona.crs import from_epsg
import os, json
from shapely.geometry import shape, Point, Polygon, MultiPoint
from geopandas.tools import sjoin
import matplotlib.cm as cm
import matplotlib.pyplot as plt # plotting
import seaborn as sns; sns.set()
from IPython.display import Image

import pickle
import folium


from branca.colormap import  linear
import json
import branca.colormap as cm

from tqdm.notebook import tqdm

# Visualize Streets Network

In [1]:
df_belgium = gpd.read_file('/kaggle/input/belgium-obu/Belgium_streets.json')

m = folium.Map([50.85045, 4.34878], zoom_start=9, tiles='cartodbpositron')
folium.GeoJson(df_belgium).add_to(m)
m

In [1]:
# BXL_timeseries_kaggle.csv may have more rows in reality, but we are only loading/previewing the first 1000 rows
new_table = pd.read_csv('../input/obu-data-preprocessing/Flow_BEL_street_30min.csv')
nRow, nCol = new_table.shape
print(f'There are {nRow} rows and {nCol} columns')


# SELECT STREETS BASED ON AVERAGE TRAFFIC FLOW

In [1]:
mean_value = 10

In [1]:
table_index = new_table.iloc[:,1:]
ALL_STREETS = list(table_index.columns.values)

mean_flow =[]
new_street=[]


for street in ALL_STREETS:
    
    single_street=table_index[street]
    mean = np.mean(single_street)
    mean_flow.append(mean)
    new_street.append(street)
    
    
df_mean_flow = pd.DataFrame({'street_index':new_street, 'mean_flow': mean_flow})
print('')
print(df_mean_flow.head())
print('')

STREETS = df_mean_flow[(df_mean_flow['mean_flow'] >= mean_value)] 
STREETS = STREETS.sort_values(by=['street_index'])
STREETS = list(STREETS.street_index)


print('considering a average traffic flow of ' + str(mean_value)+' per street')
print('')
print('mean traffic flow '+str(mean_value)+ ' ---> number of street segments: ' + str(len(STREETS)))


# Prepare Data For Multistep-ahead Forecasting, Split in Training and Testing Sets

In [1]:
def split_sequences_multistep(sequences, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out
        # check if we are beyond the dataset
        if out_end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix:out_end_ix, :]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [1]:
# transform series into train and test sets for supervised learning

def data_mimo_persistent(dataframe, n_train, n_lag, n_output):
    # extract raw values
    raw_values = dataframe.values
    # transform into supervised learning problem X, y
    X,Y = split_sequences_multistep(raw_values, n_lag, n_output)
    X_tr,Y_tr, X_val, Y_val = X[:-n_train+23], Y[:-n_train+23], X[-n_train+23:],Y[-n_train+23:]
    
    return X_tr,Y_tr, X_val, Y_val

In [1]:
n_LAGS = 12
n_OUTPUT = 12

table_date = new_table
table_date['datetime'] = pd.to_datetime(table_date['datetime'])

    
DATAFRAME = table_date
DATAFRAME = DATAFRAME.drop('datetime',axis=1)
DATAFRAME = DATAFRAME[DATAFRAME.columns.intersection(STREETS)]
DATAFRAME['datetime'] = table_date['datetime']

nRow, nCol = DATAFRAME.shape
print(f'Consider {nRow} instances (rows) and {nCol} streets segments (columns)')
print('')

n_TRAIN = 168*2*2 # 2 weeks

X_tr_pers, Y_tr_pers, X_val_pers, Y_val_pers = data_mimo_persistent(DATAFRAME, n_TRAIN, n_LAGS, n_OUTPUT)

datetime = X_val_pers[:,:,-1]

x_tr_pers = X_tr_pers[:,:,:-1]
y_tr_pers = Y_tr_pers[:,:,:-1]
x_val_pers = X_val_pers[:,:,:-1]
y_val_pers = Y_val_pers[:,:,:-1]

print('X_Train: %s, Y_Train: %s' % (x_tr_pers.shape, y_tr_pers.shape))
print('')
print('X_Valid: %s, Y_Valid: %s' % (x_val_pers.shape, y_val_pers.shape))

# Visualize Traffic Flow at time t

In [1]:

STREETS = [int(float(s)) for s in STREETS]


df_belgium = df_belgium[df_belgium.index.isin(STREETS)]
df_belgium['Trucks_Flow'] = y_val_pers[12][5]

nbh_count_colormap = linear.YlOrRd_09.scale(0,200)

colormap_dept = cm.StepColormap(
    colors=['#00ae53', '#86dc76', '#daf8aa',
            '#ffe6a4', '#ff9a61', '#ee0028'],
    vmin = 0,
    vmax = 200,
    index=[0, 20, 50, 80, 110, 150, 180])

polygons = df_belgium
m = folium.Map([50.85045, 4.34878], zoom_start= 9, tiles='cartodbpositron')

style_function = lambda x: {
    'fillColor': colormap_dept(x['properties']['Trucks_Flow']),
    'color': colormap_dept(x['properties']['Trucks_Flow']),
    'weight': 1.5,
    'fillOpacity': 1
}
folium.GeoJson(polygons,
    style_function=style_function).add_to(m)


colormap_dept.caption = 'Traffic Flow (N#Trucks/30min) at (not real) 12:00 a.m.'
colormap_dept.add_to(m)

m

# Function defining Seasonal Persistence Model

In [1]:
def seasonal_mean(X, seas, n_seq):
    
    seas_0 = 168*2*2*2
    seas_1 = 168*2*2+168*2
    seas_2 = 168*2*2
    seas_3 = 168*2
    

    
    list_seas_0 = []
    list_seas_1 = []
    list_seas_2 = []
    list_seas_3 = []
    
    if seas > X.shape[0]:
        
        for i in reversed(range(1,24*2 +1)):
            season = X[-i][-1]
            list_seas.append(season)
    else:
        
        for i in reversed(range(seas_1+1,seas_0+1)):
            season_0 = X[-i][-1]
            list_seas_0.append(season_0)
            
        for i in reversed(range(seas_2+1,seas_1+1)):
            season_1 = X[-i][-1]
            list_seas_1.append(season_1)
            
        for i in reversed(range(seas_3+1,seas_2+1)):
            season_2 = X[-i][-1]
            list_seas_2.append(season_2)
            
        for i in reversed(range(1,seas_3+1)):
            season_3 = X[-i][-1]
            list_seas_3.append(season_3)
            
        list_seas = np.array([np.vstack(list_seas_0),
                              np.vstack(list_seas_1),
                              np.vstack(list_seas_2),
                              np.vstack(list_seas_3)]).mean(axis=0)
            
    return list_seas[:n_seq]

# Test Model

In [1]:
import math


def evaluate_forecasts(targets, forecasts, n_seq):
    
    list_rmse = []
    list_mae = []
    
    for i in range(n_seq):
        
        true = np.vstack([target[i] for target in targets])
        predicted = np.vstack([forecast[i] for forecast in forecasts])
        
        rmse = np.sqrt((np.square(true - predicted)).mean(axis=0))
        mae = np.absolute(true - predicted).mean(axis=0)
        
        list_rmse.append(rmse)
        list_mae.append(mae)
        
    list_rmse = np.vstack(list_rmse)
    list_mae = np.vstack(list_mae)
    
    return list_rmse, list_mae

In [1]:
seas = 168*2*2*2 # weekly season - past 4 weeks 

forecasts = []
targets = []

rmse_list = []
mae_list = []

img_list = []


for i in tqdm(range(len(y_val_pers))):

    
    x_tr_pers = np.insert(x_tr_pers, x_tr_pers.shape[0], x_val_pers[i], axis=0)
    Y = y_val_pers[i]
    
    # make forecast
    forecast = seasonal_mean(x_tr_pers, seas, n_OUTPUT).astype(np.int32)
    
    # retrieve true value
    true_value = Y.astype(np.int32)

    forecasts.append(forecast)
    targets.append(true_value)
    
    # evaluate model performance each period
    rmse, mae = evaluate_forecasts(targets, forecasts, 12)
           
    rmse_list.append(rmse)
    mae_list.append(mae)
    
    # Forecast Horizons - H = {1, 2, ..., 12}
    x = ['t+1','t+2','t+3','t+4','t+5','t+6','t+7','t+8','t+9','t+10','t+11','t+12']

    fig = plt.figure(figsize=(15,7))
    
    plt.subplot(311)
    plt.title('Sum of predictions for all highways in Belgium')
    plt.plot(np.sum(forecast, axis=1), label='Baseline Prediction') 
    plt.plot(np.sum(true_value, axis=1), label='Truth')
    plt.ylabel('Trucks Flow')
    plt.ylim(-1, 350000)
#     plt.xticks(rotation=45)
    plt.legend()
    
    plt.subplot(312)
    plt.plot(np.sum(forecast, axis=1), label='Baseline Prediction') 
    plt.plot(np.sum(true_value, axis=1), label='Truth')
    plt.ylabel('Zoom ')
#     plt.ylim(-1, 150)
#     plt.xticks(rotation=45)
    plt.legend()
    
    plt.subplot(313)
    plt.errorbar(x, np.absolute(true_value - forecast).mean(axis=1), mae_list[-1].std(axis=1),
                 fmt='o', color='grey',
                 ecolor='lightblue', elinewidth=3, capsize=0)
    
    plt.ylabel('error MAE +- std')
    plt.xticks(rotation=45)
    plt.xlabel('Time (t): '+str(datetime[i][-1]))

#     plt.show()
    fig.savefig(str(i)+'_plot.png')
#     img_list.append(fig)
    fig.clear()
    plt.close(fig)


#     print('Prediction Accuracy (MAE) '+str(np.absolute(true_value - forecast).mean()))

    
    del forecast, true_value, rmse, mae, x
    gc.collect()


# # Saving the objects:
# with open('list_plots.pickle', 'wb') as f:
#     pickle.dump([img_list], f)

In [1]:
RMSE_MEAN = np.mean(rmse_list,axis=0).mean(axis=1)
RMSE_STD =  np.std(rmse_list,axis=0).std(axis=1)

for i in range(len(RMSE_MEAN)):
    print('t+'+str(i+1)+' RMSE MEAN ' +str(np.round(RMSE_MEAN[i],3))+' +- '+str(np.round(RMSE_STD[i],3)))
    print('')
    

In [1]:
MAE_MEAN = np.mean(mae_list,axis=0).mean(axis=1)
MAE_STD =  np.std(mae_list,axis=0).std(axis=1)

for i in range(len(MAE_MEAN)):
    print('t+'+str(i+1)+' MAE MEAN ' +str(np.round(MAE_MEAN[i],3))+' +- '+str(np.round(MAE_STD[i],3)))
    print('')
    

In [1]:
import pickle

# Saving the objects:
with open('save_predictions_results.pkl', 'wb') as f:  # Python 3: open(..., 'wb')
    pickle.dump([rmse_list, mae_list], f)

# Results Comparison between Seasonal model (baseline) and LSTM encoder decoder model

In [1]:
# mean_rmse_seas = [12.432, 12.42, 12.421, 12.425, 12.437, 12.463, 12.522, 10.868, 10.387, 10.321, 10.264, 10.199]
# std_rmse_seas = [2.34, 2.448, 2.584, 2.74, 2.936, 3.23, 3.815, 2.107, 1.749, 1.767, 1.759, 1.7]

# mean_rmse_ed = [11.618, 11.485, 11.646, 11.626, 11.693, 11.576, 11.625, 9.712, 9.081, 9.035, 9.009, 8.988]
# std_rmse_ed = [2.286, 2.57, 2.998, 3.143, 3.281, 3.472, 4.122, 2.175, 1.526, 1.528, 1.517, 1.471]

# mean_mae_seas = [7.592, 7.566, 7.552, 7.541, 7.536, 7.55, 7.614, 6.977, 6.727, 6.677, 6.636, 6.587]
# std_mae_seas = [1.674, 1.746, 1.861, 1.998, 2.153, 2.465, 3.361, 1.865, 1.395, 1.379, 1.343, 1.214]

# mean_mae_ed = [7.266, 6.988, 6.997, 6.983, 6.992, 6.967, 7.028, 6.336, 6.052, 6.027, 6.02, 6.013]
# std_mae_ed = [1.65, 1.699, 1.983, 2.155, 2.312, 2.563, 3.529, 1.919, 1.26, 1.235, 1.201, 1.125]

# ax = ['t+1', 't+2', 't+3', 't+4', 't+5', 't+6', 't+7', 't+8','t+9','t+10','t+11','t+12']


# mean_rmse = pd.DataFrame(list(zip(mean_rmse_seas, mean_rmse_ed)), 
#                columns = ['SW', 'ED'], index=ax)


# std_rmse = pd.DataFrame(list(zip(std_rmse_seas, std_rmse_ed)),
#                    columns = ['SW', 'ED'],index=ax)


# mean_mae = pd.DataFrame(list(zip(mean_mae_seas, mean_mae_ed)), 
#                columns = ['SW', 'ED'], index=ax)


# std_mae = pd.DataFrame(list(zip(std_mae_seas, std_mae_ed)),
#                    columns = ['SW', 'ED'],index=ax)


# fig, ax = plt.subplots(figsize=(12, 5))
# mean_rmse.plot.bar(yerr=std_rmse, ax=ax, capsize=3, rot=45, grid=True, color=['green', 'orange'])
# fig.suptitle('Multi-horizon RMSE', fontsize=20)
# plt.show()
 
# fig, ax = plt.subplots(figsize=(12, 5))
# mean_mae.plot.bar(yerr=std_mae, ax=ax, capsize=3, rot=45, grid=True, color=['limegreen', 'goldenrod'])
# fig.suptitle('Multi-horizon MAE', fontsize=20)
# plt.show()