# Long-Short-Term-Memory (LSTM)

In [2]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import Sequential
from keras.layers import *
from keras.losses import MeanSquaredError
from keras.metrics import RootMeanSquaredError
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from utils import train_test_split, X_Y_split_DL, last_x_y_generator_DL, insert_end_DL

In [3]:
data = pd.read_csv('data_droped_nov18_dummy_final.csv')

In [5]:
data = data.set_index('datetime')
data.index = pd.to_datetime(data.index)
data = data.drop(['Unnamed: 0.2','Unnamed: 0.1','Unnamed: 0', 'diff'],axis=1)
data=data.reindex(columns=['lots_available', 'total_lot',  'carpark_number','x_coord', 'y_coord',
'car_park_decks', 'gantry_height','BASEMENT CAR PARK', 'COVERED CAR PARK','MECHANISED AND SURFACE CAR PARK', 'MULTI-STOREY CAR PARK',
'SURFACE CAR PARK', '7AM-10.30PM', '7AM-7PM', 'NO', 'WHOLE DAY', 'NO.1','SUN & PH FR 1PM-10.30PM', 'SUN & PH FR 7AM-10.30PM', 'NO.2', 
'YES','N', 'Y'])

In [None]:
data["day_of_week"] = data.index.weekday
data["hour_of_day"] = data.index.hour

In [None]:
# feature selection based on RFE algorithm
features = ['lots_available','day_of_week','hour_of_day','total_lot', 'carpark_number', 'x_coord', 'y_coord','car_park_decks', 'gantry_height', 'MULTI-STOREY CAR PARK','WHOLE DAY', 
       'NO.1','SUN & PH FR 7AM-10.30PM']

In [None]:
data = data[features]
data=data.reindex(columns=features)

In [None]:
data.loc['2016-02-19 11:15:00',:] = np.nan
data.dropna(inplace=True)

In [None]:
Train, Test = train_test_split(data, test_step_size=673)
train, val = train_test_split(Train, test_step_size=480)

## data normalization using MinMaxScaler, values range from 0 to 1 interval.

In [None]:
for i in Train.columns:
    scaler = MinMaxScaler()
    
    s_train = scaler.fit_transform(train[i].values.reshape((-1,1)))
    s_val = scaler.transform(val[i].values.reshape((-1,1)))
    s_test = scaler.transform(Test[i].values.reshape((-1,1)))

    s_train = np.reshape(s_train,(len(s_train)))
    s_val = np.reshape(s_val,(len(s_val)))
    s_test = np.reshape(s_test,(len(s_test)))

    train[i] = s_train
    val[i] = s_val
    Test[i] = s_test

## performance on different time window using entire dataset

### 15 minutes (used previous timestep to predict next timestep, 1 timestep = 15 minute)

In [None]:
# best model with selected hyperparameters from literature & keras tuner random search
def LSTM_best():
  model = Sequential()
  model.add(LSTM(150, return_sequences=True, input_shape=(X_train.shape[1],X_train.shape[2])))
  model.add(Dropout(0.1))
  model.add(LSTM(100, return_sequences=False))
  model.add(Dense(1, activation='linear'))
  model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[RootMeanSquaredError()])
  return model

In [None]:
X_train,Y_train= X_Y_split_DL(train, window_size=1, label_col_no=0)
X_val, Y_val = X_Y_split_DL(val, window_size=1, label_col_no=0)
X_test,Y_test= X_Y_split_DL(Test, window_size=1, label_col_no=0)

In [None]:
lstm_best = LSTM_best()
stop_early = EarlyStopping(monitor='val_loss', patience=3)
history_15m = lstm_best.fit(X_train,Y_train, validation_data=(X_val,Y_val), epochs=10, callbacks=[stop_early])

In [None]:
yp_lstm_15m = lstm_best_15min.predict(X_test)

In [None]:
mse = mean_squared_error(y_pred=yp_lstm_15m, y_true=Y_test)
mae = mean_absolute_error(y_pred=yp_lstm_15m, y_true=Y_test)
rmse = math.sqrt(mean_squared_error(y_pred=yp_lstm_15m, y_true=Y_test))
r2 = r2_score(y_pred=yp_lstm_15m, y_true=Y_test)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 1 hour (used 4 timesteps to predict next timestep, 1 timestep = 15 minute, 4 timestep = 1hour)

In [None]:
def LSTM_best_1hr():
  model = Sequential()
  model.add(LSTM(150, return_sequences=True, input_shape=(X_train_1hr.shape[1],X_train_1hr.shape[2])))
  model.add(Dropout(0.1))
  model.add(LSTM(100, return_sequences=False))
  model.add(Dense(1, activation='linear'))
  model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[RootMeanSquaredError()])
  return model

In [None]:
X_train_1hr,Y_train_1hr= X_Y_split_DL(train, window_size=4, label_col_no=0)
X_val_1hr,Y_val_1hr = X_Y_split_DL(val, window_size=4, label_col_no=0)
X_test_1hr,Y_test_1hr= X_Y_split_DL(Test, window_size=4, label_col_no=0)

In [None]:
lstm_best_1hr = LSTM_best_1hr()
stop_early = EarlyStopping(monitor='val_loss', patience=3)
history_1hr = lstm_best_1hr.fit(X_train_1hr,Y_train_1hr, validation_data=(X_val_1hr,Y_val_1hr), epochs=10, callbacks=[stop_early])

In [None]:
yp_lstm_1hr = lstm_best_1hr.predict(X_test_1hr)

In [None]:
mse = mean_squared_error(y_pred=yp_lstm_1hr, y_true=Y_test_1hr)
mae = mean_absolute_error(y_pred=yp_lstm_1hr, y_true=Y_test_1hr)
rmse = math.sqrt(mean_squared_error(y_pred=yp_lstm_1hr, y_true=Y_test_1hr))
r2 = r2_score(y_pred=yp_lstm_1hr, y_true=Y_test_1hr)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 4 hour (used 16 timesteps to predict next timestep, 1 timestep = 15 minute, 16 timestep = 4 hour)

In [None]:
def LSTM_best_4hr():
  model = Sequential()
  model.add(LSTM(150, return_sequences=True, input_shape=(X_train_4hr.shape[1],X_train_4hr.shape[2])))
  model.add(Dropout(0.1))
  model.add(LSTM(100, return_sequences=False))
  model.add(Dense(1, activation='linear'))
  model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[RootMeanSquaredError()])
  return model

In [None]:
X_train_4hr,Y_train_4hr= X_Y_split_DL(train, window_size=16, label_col_no=0)
X_val_4hr,Y_val_4hr = X_Y_split_DL(val, window_size=16, label_col_no=0)
X_test_4hr,Y_test_4hr= X_Y_split_DL(Test, window_size=16, label_col_no=0)

In [None]:
lstm_best_4hr = LSTM_best_4hr()# model that hyperparameter configuration from literature & keras tuner random search
stop_early = EarlyStopping(monitor='val_loss', patience=3)
history_4hr = lstm_best_4hr.fit(X_train_4hr,Y_train_4hr, validation_data=(X_val_4hr,Y_val_4hr), epochs=10, callbacks=[stop_early])

In [None]:
yp_lstm_4hr = lstm_best_4hr.predict(X_test_4hr)

In [None]:
mse = mean_squared_error(y_pred=yp_lstm_4hr, y_true=Y_test_4hr)
mae = mean_absolute_error(y_pred=yp_lstm_4hr, y_true=Y_test_4hr)
rmse = math.sqrt(mean_squared_error(y_pred=yp_lstm_4hr, y_true=Y_test_4hr))
r2 = r2_score(y_pred=yp_lstm_4hr, y_true=Y_test_4hr)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

## performance on different time window in five regions (group level)

- dataset was divided by five subsets based on 5 fold k-mean clustering analysis. each subset consists of 10 to 12 parking lots.

### select 5 subsets based on 5 centroids from 5-fold k-mean clustering analysis

In [None]:
central =data[(data['x_coord'] >30500) & (data['x_coord'] < 32500)& (data['y_coord'] >35000) & (data['y_coord'] < 36000)]
north_area = data[(data['x_coord'] >25000) & (data['x_coord'] < 26500)& (data['y_coord'] >44000) ]
west_area = data[ (data['x_coord'] < 20000)& (data['y_coord'] >37500) & (data['y_coord'] < 38500)]
east_area = data[ (data['x_coord'] > 35000)& (data['y_coord'] >38000) & (data['y_coord'] < 39000)]
south_area = data[(data['x_coord'] >25000) & (data['x_coord'] < 26000)& (data['y_coord'] >30000) & (data['y_coord'] < 31000)]

### train-test split

In [None]:
TRAIN_central, TEST_central = train_test_split(central, test_step_size=673)
TRAIN_north, TEST_north = train_test_split(north_area, test_step_size=673)
TRAIN_west, TEST_west = train_test_split(west_area, test_step_size=673)
TRAIN_east, TEST_east = train_test_split(east_area, test_step_size=673)
TRAIN_south, TEST_south = train_test_split(south_area, test_step_size=673)

### data normalization

In [None]:
train_central,test_central= scaler(TRAIN_central,TEST_central)
train_north, test_north  = scaler(TRAIN_north,TEST_north)
train_west, test_west  = scaler(TRAIN_west,TEST_west)
train_east, test_east  = scaler(TRAIN_east,TEST_east)
train_south, test_south  = scaler(TRAIN_south,TEST_south)

### 15 minute

In [None]:
xtest_central,ytest_central = X_Y_split_DL(test_central, window_size=1,label_col_no=0)
xtest_east,ytest_east = X_Y_split_DL(test_east, window_size=1,label_col_no=0)
xtest_west,ytest_west = X_Y_split_DL(test_west, window_size=1,label_col_no=0)
xtest_south,ytest_south = X_Y_split_DL(test_south, window_size=1,label_col_no=0)
xtest_north,ytest_north = X_Y_split_DL(test_north, window_size=1,label_col_no=0)

In [None]:
#15min
m_central=lstm_best_15min
m_east=lstm_best_15min
m_west=lstm_best_15min
m_south=lstm_best_15min
m_north=lstm_best_15min

In [None]:
yp_central = m_central.predict(xtest_central)
yp_east = m_east.predict(xtest_east)
yp_west = m_west.predict(xtest_west)
yp_south = m_south.predict(xtest_south)
yp_north = m_north.predict(xtest_north)

In [None]:
#central area 15min
mse = mean_squared_error(y_pred=yp_central, y_true=ytest_central)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest_central)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest_central))
r2 = r2_score(y_pred=yp_central, y_true=ytest_central)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 15min
mse = mean_squared_error(y_pred=yp_east, y_true=ytest_east)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest_east)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest_east))
r2 = r2_score(y_pred=yp_east, y_true=ytest_east)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 15min
mse = mean_squared_error(y_pred=yp_west, y_true=ytest_west)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest_west)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest_west))
r2 = r2_score(y_pred=yp_west, y_true=ytest_west)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 15min
mse = mean_squared_error(y_pred=yp_south, y_true=ytest_south)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest_south)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest_south))
r2 = r2_score(y_pred=yp_south, y_true=ytest_south)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 15min
mse = mean_squared_error(y_pred=yp_north, y_true=ytest_north)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest_north)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest_north))
r2 = r2_score(y_pred=yp_north, y_true=ytest_north)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 1 hour

In [None]:
xtest_central,ytest_central = X_Y_split_DL(test_central, window_size=4,label_col_no=0)
xtest_east,ytest_east = X_Y_split_DL(test_east, window_size=4,label_col_no=0)
xtest_west,ytest_west = X_Y_split_DL(test_west, window_size=4,label_col_no=0)
xtest_south,ytest_south = X_Y_split_DL(test_south, window_size=4,label_col_no=0)
xtest_north,ytest_north = X_Y_split_DL(test_north, window_size=4,label_col_no=0)

In [None]:
#1hr
m_central=lstm_best_1hr
m_east=lstm_best_1hr
m_west=lstm_best_1hr
m_south=lstm_best_1hr
m_north=lstm_best_1hr

In [None]:
yp_central = m_central.predict(xtest_central)
yp_east = m_east.predict(xtest_east)
yp_west = m_west.predict(xtest_west)
yp_south = m_south.predict(xtest_south)
yp_north = m_north.predict(xtest_north)

In [None]:
#central area 1hr
mse = mean_squared_error(y_pred=yp_central, y_true=ytest_central)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest_central)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest_central))
r2 = r2_score(y_pred=yp_central, y_true=ytest_central)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 1hr
mse = mean_squared_error(y_pred=yp_east, y_true=ytest_east)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest_east)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest_east))
r2 = r2_score(y_pred=yp_east, y_true=ytest_east)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 1hr
mse = mean_squared_error(y_pred=yp_west, y_true=ytest_west)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest_west)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest_west))
r2 = r2_score(y_pred=yp_west, y_true=ytest_west)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 1hr
mse = mean_squared_error(y_pred=yp_south, y_true=ytest_south)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest_south)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest_south))
r2 = r2_score(y_pred=yp_south, y_true=ytest_south)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 1hr
mse = mean_squared_error(y_pred=yp_north, y_true=ytest_north)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest_north)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest_north))
r2 = r2_score(y_pred=yp_north, y_true=ytest_north)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 4 hour

In [None]:
xtest_central,ytest_central = X_Y_split_DL(test_central, window_size=16,label_col_no=0)
xtest_east,ytest_east = X_Y_split_DL(test_east, window_size=16,label_col_no=0)
xtest_west,ytest_west = X_Y_split_DL(test_west, window_size=16,label_col_no=0)
xtest_south,ytest_south = X_Y_split_DL(test_south, window_size=16,label_col_no=0)
xtest_north,ytest_north = X_Y_split_DL(test_north, window_size=16,label_col_no=0)

In [None]:
#4hr
m_central=lstm_best_4hr
m_east=lstm_best_4hr
m_west=lstm_best_4hr
m_south=lstm_best_4hr
m_north=lstm_best_4hr

In [None]:
yp_central = m_central.predict(xtest_central)
yp_east = m_east.predict(xtest_east)
yp_west = m_west.predict(xtest_west)
yp_south = m_south.predict(xtest_south)
yp_north = m_north.predict(xtest_north)

In [None]:
#central area 4hr
mse = mean_squared_error(y_pred=yp_central, y_true=ytest_central)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest_central)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest_central))
r2 = r2_score(y_pred=yp_central, y_true=ytest_central)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 4hr
mse = mean_squared_error(y_pred=yp_east, y_true=ytest_east)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest_east)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest_east))
r2 = r2_score(y_pred=yp_east, y_true=ytest_east)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 4hr
mse = mean_squared_error(y_pred=yp_west, y_true=ytest_west)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest_west)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest_west))
r2 = r2_score(y_pred=yp_west, y_true=ytest_west)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 4hr
mse = mean_squared_error(y_pred=yp_south, y_true=ytest_south)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest_south)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest_south))
r2 = r2_score(y_pred=yp_south, y_true=ytest_south)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 4hr
mse = mean_squared_error(y_pred=yp_north, y_true=ytest_north)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest_north)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest_north))
r2 = r2_score(y_pred=yp_north, y_true=ytest_north)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

## performance on different time window in five regions (individual level)

In [None]:
#individual car park
#central
train_41 =train_central[train_central.carpark_number==0]
test_41 =test_central[test_central.carpark_number==0]
#north
train_547 =train_north[train_north.carpark_number==0]
test_547 =test_north[test_north.carpark_number==0]
#west
train_22 =train_west[train_west.carpark_number==0]
test_22 =test_west[test_west.carpark_number==0]
#east
train_437 =train_east[train_east.carpark_number==0]
test_437 =test_east[test_east.carpark_number==0]
#south
train_514 =train_south[train_south.carpark_number==0]
test_514=test_south[test_south.carpark_number==0]

### 15 minute

In [None]:
#central
xtest41,ytest41 = window_generator_DL(test_41, window_size=1,label_col_no=0)
#north
xtest547,ytest547 = window_generator_DL(test_547, window_size=1,label_col_no=0)
#west
xtest22,ytest22 = window_generator_DL(test_22, window_size=1,label_col_no=0)
#east
xtest437,ytest437 = window_generator_DL(test_437, window_size=1,label_col_no=0)
#south
xtest514,ytest514 = window_generator_DL(test_514, window_size=1,label_col_no=0)

In [None]:
#15min
m_central_single=lstm_best_15min
m_east_single=lstm_best_15min
m_west_single=lstm_best_15min
m_south_single=lstm_best_15min
m_north_single=lstm_best_15min

In [None]:
yp_central = m_central_single.predict(xtest41)
yp_east = m_east_single.predict(xtest437)
yp_west = m_west_single.predict(xtest22)
yp_south = m_south_single.predict(xtest514)
yp_north = m_north_single.predict(xtest547)

In [None]:
#central area 15min
mse = mean_squared_error(y_pred=yp_central, y_true=ytest41)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest41)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest41))
r2 = r2_score(y_pred=yp_central, y_true=ytest41)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 15min
mse = mean_squared_error(y_pred=yp_east, y_true=ytest437)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest437)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest437))
r2 = r2_score(y_pred=yp_east, y_true=ytest437)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 15min
mse = mean_squared_error(y_pred=yp_west, y_true=ytest22)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest22)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest22))
r2 = r2_score(y_pred=yp_west, y_true=ytest22)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 15min
mse = mean_squared_error(y_pred=yp_south, y_true=ytest514)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest514)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest514))
r2 = r2_score(y_pred=yp_south, y_true=ytest514)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 15min
mse = mean_squared_error(y_pred=yp_north, y_true=ytest547)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest547)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest547))
r2 = r2_score(y_pred=yp_north, y_true=ytest547)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 1 hour

In [None]:
#central
xtest41,ytest41 = window_generator_DL(test_41, window_size=4,label_col_no=0)
#north
xtest547,ytest547 = window_generator_DL(test_547, window_size=4,label_col_no=0)
#west
xtest22,ytest22 = window_generator_DL(test_22, window_size=4,label_col_no=0)
#east
xtest437,ytest437 = window_generator_DL(test_437, window_size=4,label_col_no=0)
#south
xtest514,ytest514 = window_generator_DL(test_514, window_size=4,label_col_no=0)

In [None]:
#1hr
m_central_single=lstm_best_1hr
m_east_single=lstm_best_1hr
m_west_single=lstm_best_1hr
m_south_single=lstm_best_1hr
m_north_single=lstm_best_1hr

In [None]:
yp_central = m_central_single.predict(xtest41)
yp_east = m_east_single.predict(xtest437)
yp_west = m_west_single.predict(xtest22)
yp_south = m_south_single.predict(xtest514)
yp_north = m_north_single.predict(xtest547)

In [None]:
#central area 1hr
mse = mean_squared_error(y_pred=yp_central, y_true=ytest41)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest41)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest41))
r2 = r2_score(y_pred=yp_central, y_true=ytest41)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 1hr
mse = mean_squared_error(y_pred=yp_east, y_true=ytest437)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest437)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest437))
r2 = r2_score(y_pred=yp_east, y_true=ytest437)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 1hr
mse = mean_squared_error(y_pred=yp_west, y_true=ytest22)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest22)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest22))
r2 = r2_score(y_pred=yp_west, y_true=ytest22)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 1hr
mse = mean_squared_error(y_pred=yp_south, y_true=ytest514)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest514)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest514))
r2 = r2_score(y_pred=yp_south, y_true=ytest514)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 1hr
mse = mean_squared_error(y_pred=yp_north, y_true=ytest547)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest547)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest547))
r2 = r2_score(y_pred=yp_north, y_true=ytest547)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 4 hour

In [None]:
#central
xtest41,ytest41 = window_generator_DL(test_41, window_size=16,label_col_no=0)
#north
xtest547,ytest547 = window_generator_DL(test_547, window_size=16,label_col_no=0)
#west
xtest22,ytest22 = window_generator_DL(test_22, window_size=16,label_col_no=0)
#east
xtest437,ytest437 = window_generator_DL(test_437, window_size=16,label_col_no=0)
#south
xtest514,ytest514 = window_generator_DL(test_514, window_size=16,label_col_no=0)

In [None]:
#4hr
m_central_single=lstm_best_4hr
m_east_single=lstm_best_4hr
m_west_single=lstm_best_4hr
m_south_single=lstm_best_4hr
m_north_single=lstm_best_4hr

In [None]:
yp_central = m_central_single.predict(xtest41)
yp_east = m_east_single.predict(xtest437)
yp_west = m_west_single.predict(xtest22)
yp_south = m_south_single.predict(xtest514)
yp_north = m_north_single.predict(xtest547)

In [None]:
#central area 4hr
mse = mean_squared_error(y_pred=yp_central, y_true=ytest41)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest41)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest41))
r2 = r2_score(y_pred=yp_central, y_true=ytest41)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 4hr
mse = mean_squared_error(y_pred=yp_east, y_true=ytest437)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest437)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest437))
r2 = r2_score(y_pred=yp_east, y_true=ytest437)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 4hr
mse = mean_squared_error(y_pred=yp_west, y_true=ytest22)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest22)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest22))
r2 = r2_score(y_pred=yp_west, y_true=ytest22)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 4hr
mse = mean_squared_error(y_pred=yp_south, y_true=ytest514)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest514)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest514))
r2 = r2_score(y_pred=yp_south, y_true=ytest514)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 4hr
mse = mean_squared_error(y_pred=yp_north, y_true=ytest547)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest547)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest547))
r2 = r2_score(y_pred=yp_north, y_true=ytest547)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

## performance on different time horizon using model with best time window size (4 hour = 16 timestep)

### entire dataset

In [None]:
lstm_4hr_whole = lstm_best_4hr

In [None]:
x,y = last_x_y_generator_DL(val, window_size=16,label_col_no=0)

In [None]:
current_batch = x[:,1:,:]

In [None]:
#entire dataset
future_len = 40
Test_new = Test[['lots_available', 'carpark_number','day_of_week','hour_of_day']]
Test_new = Test_new.sort_values(by=['carpark_number', 'datetime'])
l=[]
L=pd.DataFrame()
for i in sorted(Test_new.carpark_number.value_counts().keys()):
  inner = Test_new[Test_new.carpark_number == i]
  inner = inner.reset_index()
  inner = inner.iloc[0:future_len,:]
  l.append(inner)
L = L.append(l)
L['value'] = L.index.values
L = L.sort_index()
L = L.sort_values(by=['value','carpark_number'])

In [None]:
future=40
forecast = []
Xin = current_batch

for i in range(future):
    out = lstm_4hr_whole.predict(Xin, batch_size=855)    
    forecast.append(out) 
    print(forecast)
    Xin = insert_end_DL(Xin,out,16)
    Xin[:,15,1:2] = L[['day_of_week']][i*855:(i*855)+855]
    Xin[:,15,2:3] = L[['hour_of_day']][i*855:(i*855)+855]

In [None]:
Y_t = L['lots_available']

l=[]
for i in range(40):
  l.append(Y_t[i*855:(i*855)+855])

In [None]:
rmse40step=[]
for i in range(40):
  rmse40step.append(math.sqrt(mean_squared_error(y_pred=forecast[i], y_true=l[i])))

In [None]:
rmse40step = pd.DataFrame(rmse40step)
rmse40step.to_csv('rmse40step855.csv')

### group level (five regions, 10 to 12 parking lots per region)

In [None]:
lstm_4hr_central = lstm_best_4hr
lstm_4hr_north = lstm_best_4hr
lstm_4hr_east = lstm_best_4hr
lstm_4hr_west = lstm_best_4hr
lstm_4hr_south = lstm_best_4hr

In [None]:
x_central, _ = last_x_y_generator_DL(train_central, window_size=16,label_col_no=0)
x_north,   _ = last_x_y_generator_DL(train_north, window_size=16,label_col_no=0)
x_west,    _ = last_x_y_generator_DL(train_west, window_size=16,label_col_no=0)
x_east,    _ = last_x_y_generator_DL(train_east, window_size=16,label_col_no=0)
x_south,   _ = last_x_y_generator_DL(train_south, window_size=16,label_col_no=0)

In [None]:
current_batch_central = x_central[:,1:,:]
current_batch_north = x_north[:,1:,:]
current_batch_west = x_west[:,1:,:]
current_batch_east = x_east[:,1:,:]
current_batch_south = x_south[:,1:,:]

In [None]:
future_len = 40
Test_new = test_south[['lots_available', 'carpark_number','day_of_week','hour_of_day']]
Test_new = Test_new.sort_values(by=['carpark_number', 'datetime'])
l=[]
L=pd.DataFrame()
for i in sorted(Test_new.carpark_number.value_counts().keys()):
  inner = Test_new[Test_new.carpark_number == i]
  inner = inner.reset_index()
  inner = inner.iloc[0:future_len,:]
  l.append(inner)
L = L.append(l)
L['value'] = L.index.values
L = L.sort_index()
L = L.sort_values(by=['value','carpark_number'])

In [None]:
#central
future=40
forcast = []
Xin = current_batch_central

for i in range(future):
    out = lstm_4hr_central.predict(Xin, batch_size=11)    
    forcast.append(out) 
    print(forcast)
    Xin = insert_end_DL(Xin,out,16)
    Xin[:,15,1:2] = L[['day_of_week']][i*11:(i*11)+11]
    Xin[:,15,2:3] = L[['hour_of_day']][i*11:(i*11)+11]

In [None]:
#north
future=40
forcast = []
Xin = current_batch_north

for i in range(future):
    out = lstm_4hr_north.predict(Xin, batch_size=11)    
    forcast.append(out) 
    print(forcast)
    Xin = insert_end_DL(Xin,out,16)
    Xin[:,15,1:2] = L[['day_of_week']][i*11:(i*11)+11]
    Xin[:,15,2:3] = L[['hour_of_day']][i*11:(i*11)+11]

In [None]:
#west
future=40
forcast = []
Xin = current_batch_west

for i in range(future):
    out = lstm_4hr_west.predict(Xin, batch_size=12)    
    forcast.append(out) 
    print(forcast)
    Xin = insert_end_DL(Xin,out,16)
    Xin[:,15,1:2] = L[['day_of_week']][i*12:(i*12)+12]
    Xin[:,15,2:3] = L[['hour_of_day']][i*12:(i*12)+12]

In [None]:
#east
future=40
forcast = []
Xin = current_batch_east

for i in range(future):
    out = lstm_4hr_east.predict(Xin, batch_size=11)    
    forcast.append(out) 
    print(forcast)
    Xin = insert_end_DL(Xin,out,16)
    Xin[:,15,1:2] = L[['day_of_week']][i*11:(i*11)+11]
    Xin[:,15,2:3] = L[['hour_of_day']][i*11:(i*11)+11]

In [None]:
#south
future=40
forcast = []
Xin = current_batch_south

for i in range(future):
    out = lstm_4hr_south.predict(Xin, batch_size=10)    
    forcast.append(out) 
    print(forcast)
    Xin = insert_end_DL(Xin,out,16)
    Xin[:,15,1:2] = L[['day_of_week']][i*10:(i*10)+10]
    Xin[:,15,2:3] = L[['hour_of_day']][i*10:(i*10)+10]

In [None]:
Y_t = L['lots_available']

l=[]
for i in range(40):
  l.append(Y_t[i*10:(i*10)+10])

In [None]:
rmse40step=[]
for i in range(40):
  rmse40step.append(math.sqrt(mean_squared_error(y_pred=forcast[i], y_true=l[i])))

In [None]:
rmse40step = pd.DataFrame(rmse40step)
rmse40step.to_csv('rmse40step_south_lstm.csv')

### individual level (five regions, 1 parking lot per region)

In [None]:
lstm_best_4hr_41 = lstm_best_4hr
lstm_best_4hr_547 = lstm_best_4hr
lstm_best_4hr_22 = lstm_best_4hr
lstm_best_4hr_437 = lstm_best_4hr
lstm_best_4hr_514 = lstm_best_4hr

In [None]:
#central
xtrain41 = window_generator_DL(train_41, window_size=16,label_col_no=0)
#north
xtrain547 = window_generator_DL(train_547, window_size=16,label_col_no=0)
#west
xtrain22 = window_generator_DL(train_22, window_size=16,label_col_no=0)
#east
xtrain437 = window_generator_DL(train_437, window_size=16,label_col_no=0)
#south
xtrain514 = window_generator_DL(train_514, window_size=16,label_col_no=0)

In [None]:
last_central = xtrain41[-1:,:,:]
last_north = xtrain547[-1:,:,:]
last_west = xtrain22[-1:,:,:]
last_east = xtrain437[-1:,:,:]
last_south = xtrain514[-1:,:,:]

In [None]:
current_batch_41 = last_central[:,1:,:]
current_batch_547 = last_north[:,1:,:]
current_batch_22 = last_west[:,1:,:]
current_batch_437 = last_east[:,1:,:]
current_batch_514 = last_south[:,1:,:]

In [None]:
future_len = 40
Test_new = test_514[['lots_available', 'carpark_number','day_of_week','hour_of_day']]
Test_new = Test_new.sort_values(by=['carpark_number', 'datetime'])
l=[]
L=pd.DataFrame()
for i in sorted(Test_new.carpark_number.value_counts().keys()):
  inner = Test_new[Test_new.carpark_number == i]
  inner = inner.reset_index()
  inner = inner.iloc[0:future_len,:]
  l.append(inner)
L = L.append(l)
L['value'] = L.index.values
L = L.sort_index()
L = L.sort_values(by=['value','carpark_number'])

In [None]:
#central
future=40
forcast = []
Xin = current_batch_41
time=[]
for i in range(future):
    out = lstm_best_4hr_41.predict(Xin, batch_size=1)    
    forcast.append(out) 
    print(forcast)
    Xin = insert_end_DL(Xin,out,16)
    Xin[:,15,1:2] = L[['day_of_week']][i*1:(i*1)+1]
    Xin[:,15,2:3] = L[['hour_of_day']][i*1:(i*1)+1]

In [None]:
#north
future=40
forcast = []
Xin = current_batch_547
time=[]
for i in range(future):
    out = lstm_best_4hr_547.predict(Xin, batch_size=1)    
    forcast.append(out) 
    print(forcast)
    Xin = insert_end_DL(Xin,out,16)
    Xin[:,15,1:2] = L[['day_of_week']][i*1:(i*1)+1]
    Xin[:,15,2:3] = L[['hour_of_day']][i*1:(i*1)+1]

In [None]:
#west
future=40
forcast = []
Xin = current_batch_22
time=[]
for i in range(future):
    out = lstm_best_4hr_22.predict(Xin, batch_size=1)    
    forcast.append(out) 
    print(forcast)
    Xin = insert_end_DL(Xin,out,16)
    Xin[:,15,1:2] = L[['day_of_week']][i*1:(i*1)+1]
    Xin[:,15,2:3] = L[['hour_of_day']][i*1:(i*1)+1]

In [None]:
#east
future=40
forcast = []
Xin = current_batch_437
time=[]
for i in range(future):
    out = lstm_best_4hr_437.predict(Xin, batch_size=1)    
    forcast.append(out) 
    print(forcast)
    Xin = insert_end_DL(Xin,out,16)
    Xin[:,15,1:2] = L[['day_of_week']][i*1:(i*1)+1]
    Xin[:,15,2:3] = L[['hour_of_day']][i*1:(i*1)+1]

In [None]:
#south
future=40
forcast = []
Xin = current_batch_514
time=[]
for i in range(future):
    out = lstm_best_4hr_514.predict(Xin, batch_size=1)    
    forcast.append(out) 
    print(forcast)
    Xin = insert_end_DL(Xin,out,16)
    Xin[:,15,1:2] = L[['day_of_week']][i*1:(i*1)+1]
    Xin[:,15,2:3] = L[['hour_of_day']][i*1:(i*1)+1]

In [None]:
Y_t = L['lots_available']

l=[]
for i in range(40):
  l.append(Y_t[i*1:(i*1)+1])

In [None]:
rmse40step=[]
for i in range(40):
  rmse40step.append(math.sqrt(mean_squared_error(y_pred=forcast[i], y_true=l[i])))

In [None]:
rmse40step = pd.DataFrame(rmse40step)
rmse40step.to_csv('rmse40step_single_south_lstm.csv')

## robustness check (train-test error)

- based on performance on different time window and different sample size, study determined best time window size is 16 timesteps (4 hour).
- to check the model fit, study tested performance of model on train and test set

In [None]:
# plot diagnostic learning curves
# plot training and validation loss
plt.figure(1,figsize = (8, 6))
plt.subplot(2,1,1)
plt.title('MSE Loss')
plt.plot(history_4hr.history['loss'], color='blue', label='train')
plt.plot(history_4hr.history['val_loss'], color='orange', label='validation')
plt.legend(['training', 'validation'])

#plot training and test accuracy	
plt.figure(2,figsize = (8, 6))
plt.subplot(2,1,2)
plt.title(' RMSE')
plt.plot(history_4hr.history['root_mean_squared_error'], color='blue', label="train")
plt.plot(history_4hr.history['val_root_mean_squared_error'], color='orange', label='validation')
plt.legend(['training','validation'])
plt.show()