# Bagging Regressor

In [None]:
import math
import numpy as np
import pandas as pd
from sklearn.ensemble import BaggingRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from utils import train_test_split, X_Y_split, scaler, window_generator, last_x_y_generator, insert_end, insert_end_multi

In [None]:
data = pd.read_csv('data_droped_nov18_dummy_final.csv')

In [None]:
data = data.set_index('datetime')
data.index = pd.to_datetime(data.index)
data = data.drop(['Unnamed: 0.2','Unnamed: 0.1','Unnamed: 0', 'diff'],axis=1)
data=data.reindex(columns=['lots_available', 'total_lot',  'carpark_number','x_coord', 'y_coord',
'car_park_decks', 'gantry_height','BASEMENT CAR PARK', 'COVERED CAR PARK','MECHANISED AND SURFACE CAR PARK', 'MULTI-STOREY CAR PARK',
'SURFACE CAR PARK', '7AM-10.30PM', '7AM-7PM', 'NO', 'WHOLE DAY', 'NO.1','SUN & PH FR 1PM-10.30PM', 'SUN & PH FR 7AM-10.30PM', 'NO.2', 
'YES','N', 'Y'])

In [None]:
# create time features, day of week, hour of day
data["day_of_week"] = data.index.weekday
data["hour_of_day"] = data.index.hour

In [None]:
features = ['lots_available','day_of_week','hour_of_day','total_lot', 'carpark_number', 'x_coord', 'y_coord','car_park_decks', 'gantry_height', 'MULTI-STOREY CAR PARK','WHOLE DAY', 
       'NO.1','SUN & PH FR 7AM-10.30PM']

In [None]:
data = data[features]
data=data.reindex(columns=features)

In [None]:
data.loc['2016-02-19 11:15:00',:] = np.nan
data.dropna(inplace=True)

In [None]:
Train, Test = train_test_split(data, test_step_size=673)

## data normalization using MinMaxScaler, values range from 0 to 1 interval.

In [None]:
for i in Train.columns:
    scaler = MinMaxScaler()
    s_train = scaler.fit_transform(Train[i].values.reshape((-1,1)))
    s_train = np.reshape(s_train,(len(s_train)))
    Train[i] = s_train   
    s_test = scaler.transform(Test[i].values.reshape((-1,1)))
    s_test = np.reshape(s_test,(len(s_test)))
    Test[i] = s_test

## best model with selected hyperparameters from RandomizedSearchCV

In [None]:
bagg_best = BaggingRegressor(max_samples=0.7, n_estimators=500)

## performance on different time window using entire dataset

### 15 minutes (used previous timestep to predict next timestep, 1 timestep = 15 minute)

In [None]:
X_train_15min, Y_train_15min = X_Y_split(Train, window_size=1,label_col_no=0)
X_test_15min, Y_test_15min = X_Y_split(Test, window_size=1,label_col_no=0)

In [None]:
bagg_best_15min =bagg_best
bagg_best_15min.fit(X_test_15min,Y_test_15min)
yp_15min_bagg = bagg_best_15min.predict(X_test_15min)

In [None]:
mse = mean_squared_error(y_pred=yp_15min_bagg, y_true=Y_test_15min)
mae = mean_absolute_error(y_pred=yp_15min_bagg, y_true=Y_test_15min)
rmse = math.sqrt(mean_squared_error(y_pred=yp_15min_bagg, y_true=Y_test_15min))
r2 = r2_score(y_pred=yp_15min_bagg, y_true=Y_test_15min)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 1 hour (used 4 timesteps to predict next timestep, 1 timestep = 15 minute, 4 timestep = 1hour)

In [None]:
#1hour(4 steps)
X_train_1hour, Y_train_1hour = X_Y_split(Train, window_size=4,label_col_no=0)
X_test_1hour, Y_test_1hour = X_Y_split(Test, window_size=4,label_col_no=0)

In [None]:
bagg_best_1hour =bagg_best
bagg_best_1hour.fit(X_train_1hour,Y_train_1hour)
yp_1hour_bagg = bagg_best_1hour.predict(X_test_1hour)

In [None]:
mse = mean_squared_error(y_pred=yp_1hour_bagg, y_true=Y_test_1hour)
mae = mean_absolute_error(y_pred=yp_1hour_bagg, y_true=Y_test_1hour)
rmse = math.sqrt(mean_squared_error(y_pred=yp_1hour_bagg, y_true=Y_test_1hour))
r2 = r2_score(y_pred=yp_1hour_bagg, y_true=Y_test_1hour)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 4 hour (used 16 timesteps to predict next timestep, 1 timestep = 15 minute, 16 timestep = 4 hour)

In [None]:
#4hour(16 steps)
X_train_4hour, Y_train_4hour = X_Y_split(Train, window_size=16,label_col_no=0)
X_test_4hour, Y_test_4hour = X_Y_split(Test, window_size=16,label_col_no=0)

In [None]:
bagg_best_4hour =bagg_best
bagg_best_4hour.fit(X_train_4hour,Y_train_4hour)
yp_4hour_bagg = bagg_best_4hour_test.predict(X_test_4hour)

In [None]:
mse_test = mean_squared_error(y_pred=yp_4hour_bagg, y_true=Y_test_4hour)
mae_test = mean_absolute_error(y_pred=yp_4hour_bagg, y_true=Y_test_4hour)
rmse_test = math.sqrt(mean_squared_error(y_pred=yp_4hour_bagg, y_true=Y_test_4hour))
r2_test = r2_score(y_pred=yp_4hour_bagg, y_true=Y_test_4hour)
print(round(mse_test,5))
print(round(mae_test,5))
print(round(rmse_test,5))
print(round(r2_test,5))

## performance on different time window in five regions (group level)

- dataset was divided by five subsets based on 5 fold k-mean clustering analysis. each subset consists of 10 to 12 parking lots.

### select 5 subsets based on 5 centroids from 5-fold k-mean clustering analysis

In [None]:
#performance by regions
central =data[(data['x_coord'] >30500) & (data['x_coord'] < 32500)& (data['y_coord'] >35000) & (data['y_coord'] < 36000)]
north_area = data[(data['x_coord'] >25000) & (data['x_coord'] < 26500)& (data['y_coord'] >44000) ]
west_area = data[ (data['x_coord'] < 20000)& (data['y_coord'] >37500) & (data['y_coord'] < 38500)]
east_area = data[ (data['x_coord'] > 35000)& (data['y_coord'] >38000) & (data['y_coord'] < 39000)]
south_area = data[(data['x_coord'] >25000) & (data['x_coord'] < 26000)& (data['y_coord'] >30000) & (data['y_coord'] < 31000)]

### train-test split

In [None]:
TRAIN_central, TEST_central = train_test_split(central, test_step_size=673)
TRAIN_north, TEST_north = train_test_split(north_area, test_step_size=673)
TRAIN_west, TEST_west = train_test_split(west_area, test_step_size=673)
TRAIN_east, TEST_east = train_test_split(east_area, test_step_size=673)
TRAIN_south, TEST_south = train_test_split(south_area, test_step_size=673)

### data normalization

In [None]:
train_central,test_central= scaler(TRAIN_central,TEST_central)
train_north, test_north  = scaler(TRAIN_north,TEST_north)
train_west, test_west  = scaler(TRAIN_west,TEST_west)
train_east, test_east  = scaler(TRAIN_east,TEST_east)
train_south, test_south  = scaler(TRAIN_south,TEST_south)

### 15 minute 

In [None]:
xtest_central,ytest_central = X_Y_split(test_central, window_size=1,label_col_no=0)
xtest_east,ytest_east = X_Y_split(test_east, window_size=1,label_col_no=0)
xtest_west,ytest_west = X_Y_split(test_west, window_size=1,label_col_no=0)
xtest_south,ytest_south = X_Y_split(test_south, window_size=1,label_col_no=0)
xtest_north,ytest_north = X_Y_split(test_north, window_size=1,label_col_no=0)

In [None]:
#15min
m_central=bagg_best_15min
m_east=bagg_best_15min
m_west=bagg_best_15min
m_south=bagg_best_15min
m_north=bagg_best_15min

In [None]:
yp_central = m_central.predict(xtest_central)
yp_east = m_east.predict(xtest_east)
yp_west = m_west.predict(xtest_west)
yp_south = m_south.predict(xtest_south)
yp_north = m_north.predict(xtest_north)

In [None]:
#central area 15min
mse = mean_squared_error(y_pred=yp_central, y_true=ytest_central)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest_central)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest_central))
r2 = r2_score(y_pred=yp_central, y_true=ytest_central)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 15min
mse = mean_squared_error(y_pred=yp_east, y_true=ytest_east)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest_east)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest_east))
r2 = r2_score(y_pred=yp_east, y_true=ytest_east)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 15min
mse = mean_squared_error(y_pred=yp_west, y_true=ytest_west)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest_west)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest_west))
r2 = r2_score(y_pred=yp_west, y_true=ytest_west)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 15min
mse = mean_squared_error(y_pred=yp_south, y_true=ytest_south)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest_south)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest_south))
r2 = r2_score(y_pred=yp_south, y_true=ytest_south)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 15min
mse = mean_squared_error(y_pred=yp_north, y_true=ytest_north)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest_north)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest_north))
r2 = r2_score(y_pred=yp_north, y_true=ytest_north)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 1 hour

In [None]:
xtest_central,ytest_central = X_Y_split(test_central, window_size=4,label_col_no=0)
xtest_east,ytest_east = X_Y_split(test_east, window_size=4,label_col_no=0)
xtest_west,ytest_west = X_Y_split(test_west, window_size=4,label_col_no=0)
xtest_south,ytest_south = X_Y_split(test_south, window_size=4,label_col_no=0)
xtest_north,ytest_north = X_Y_split(test_north, window_size=4,label_col_no=0)

In [None]:
#1hr
m_central=bagg_best_1hour
m_east=bagg_best_1hour
m_west=bagg_best_1hour
m_south=bagg_best_1hour
m_north=bagg_best_1hour

In [None]:
yp_central = m_central.predict(xtest_central)
yp_east = m_east.predict(xtest_east)
yp_west = m_west.predict(xtest_west)
yp_south = m_south.predict(xtest_south)
yp_north = m_north.predict(xtest_north)

In [None]:
#central area 1hr
mse = mean_squared_error(y_pred=yp_central, y_true=ytest_central)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest_central)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest_central))
r2 = r2_score(y_pred=yp_central, y_true=ytest_central)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 1hr
mse = mean_squared_error(y_pred=yp_east, y_true=ytest_east)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest_east)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest_east))
r2 = r2_score(y_pred=yp_east, y_true=ytest_east)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 1hr
mse = mean_squared_error(y_pred=yp_west, y_true=ytest_west)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest_west)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest_west))
r2 = r2_score(y_pred=yp_west, y_true=ytest_west)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 1hr
mse = mean_squared_error(y_pred=yp_south, y_true=ytest_south)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest_south)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest_south))
r2 = r2_score(y_pred=yp_south, y_true=ytest_south)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 1hr
mse = mean_squared_error(y_pred=yp_north, y_true=ytest_north)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest_north)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest_north))
r2 = r2_score(y_pred=yp_north, y_true=ytest_north)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 4 hour

In [None]:
xtest_central,ytest_central = X_Y_split(test_central, window_size=16,label_col_no=0)
xtest_east,ytest_east = X_Y_split(test_east, window_size=16,label_col_no=0)
xtest_west,ytest_west = X_Y_split(test_west, window_size=16,label_col_no=0)
xtest_south,ytest_south = X_Y_split(test_south, window_size=16,label_col_no=0)
xtest_north,ytest_north = X_Y_split(test_north, window_size=16,label_col_no=0)

In [None]:
#4hr
m_central=bagg_best_4hour
m_east=bagg_best_4hour
m_west=bagg_best_4hour
m_south=bagg_best_4hour
m_north=bagg_best_4hour

In [None]:
yp_central = m_central.predict(xtest_central)
yp_east = m_east.predict(xtest_east)
yp_west = m_west.predict(xtest_west)
yp_south = m_south.predict(xtest_south)
yp_north = m_north.predict(xtest_north)

In [None]:
#central area 4hr
mse = mean_squared_error(y_pred=yp_central, y_true=ytest_central)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest_central)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest_central))
r2 = r2_score(y_pred=yp_central, y_true=ytest_central)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 4hr
mse = mean_squared_error(y_pred=yp_east, y_true=ytest_east)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest_east)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest_east))
r2 = r2_score(y_pred=yp_east, y_true=ytest_east)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 4hr
mse = mean_squared_error(y_pred=yp_west, y_true=ytest_west)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest_west)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest_west))
r2 = r2_score(y_pred=yp_west, y_true=ytest_west)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 4hr
mse = mean_squared_error(y_pred=yp_south, y_true=ytest_south)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest_south)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest_south))
r2 = r2_score(y_pred=yp_south, y_true=ytest_south)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 4hr
mse = mean_squared_error(y_pred=yp_north, y_true=ytest_north)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest_north)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest_north))
r2 = r2_score(y_pred=yp_north, y_true=ytest_north)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

## performance on different time window in five regions (individual level)

In [None]:
#individual car park
#central
train_41 =train_central[train_central.carpark_number==0]
test_41 =test_central[test_central.carpark_number==0]
#north
train_547 =train_north[train_north.carpark_number==0]
test_547 =test_north[test_north.carpark_number==0]
#west
train_22 =train_west[train_west.carpark_number==0]
test_22 =test_west[test_west.carpark_number==0]
#east
train_437 =train_east[train_east.carpark_number==0]
test_437 =test_east[test_east.carpark_number==0]
#south
train_514 =train_south[train_south.carpark_number==0]
test_514=test_south[test_south.carpark_number==0]

### 15 minute

In [None]:
xtest41,ytest41 = window_generator(test_41, window_size=1,label_col_no=0)
xtest547,ytest547 = window_generator(test_547, window_size=1,label_col_no=0)
xtest22,ytest22 = window_generator(test_22, window_size=1,label_col_no=0)
xtest437,ytest437 = window_generator(test_437, window_size=1,label_col_no=0)
xtest514,ytest514 = window_generator(test_514, window_size=1,label_col_no=0)

In [None]:
#15min
m_central_single=bagg_best_15min
m_east_single=bagg_best_15min
m_west_single=bagg_best_15min
m_south_single=bagg_best_15min
m_north_single=bagg_best_15min

In [None]:
yp_central = m_central_single.predict(xtest41)
yp_east = m_east_single.predict(xtest437)
yp_west = m_west_single.predict(xtest22)
yp_south = m_south_single.predict(xtest514)
yp_north = m_north_single.predict(xtest547)

In [None]:
#central area 15min
mse = mean_squared_error(y_pred=yp_central, y_true=ytest41)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest41)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest41))
r2 = r2_score(y_pred=yp_central, y_true=ytest41)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 15min
mse = mean_squared_error(y_pred=yp_east, y_true=ytest437)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest437)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest437))
r2 = r2_score(y_pred=yp_east, y_true=ytest437)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 15min
mse = mean_squared_error(y_pred=yp_west, y_true=ytest22)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest22)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest22))
r2 = r2_score(y_pred=yp_west, y_true=ytest22)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 15min
mse = mean_squared_error(y_pred=yp_south, y_true=ytest514)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest514)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest514))
r2 = r2_score(y_pred=yp_south, y_true=ytest514)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 15min
mse = mean_squared_error(y_pred=yp_north, y_true=ytest547)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest547)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest547))
r2 = r2_score(y_pred=yp_north, y_true=ytest547)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 1 hour

In [None]:
xtest41,ytest41 = window_generator(test_41, window_size=4,label_col_no=0)
xtest547,ytest547 = window_generator(test_547, window_size=4,label_col_no=0)
xtest22,ytest22 = window_generator(test_22, window_size=4,label_col_no=0)
xtest437,ytest437 = window_generator(test_437, window_size=4,label_col_no=0)
xtest514,ytest514 = window_generator(test_514, window_size=4,label_col_no=0)

In [None]:
#1hr
m_central_single=bagg_best_1hour
m_east_single=bagg_best_1hour
m_west_single=bagg_best_1hour
m_south_single=bagg_best_1hour
m_north_single=bagg_best_1hour

In [None]:
yp_central = m_central_single.predict(xtest41)
yp_east = m_east_single.predict(xtest437)
yp_west = m_west_single.predict(xtest22)
yp_south = m_south_single.predict(xtest514)
yp_north = m_north_single.predict(xtest547)

In [None]:
#central area 1hr
mse = mean_squared_error(y_pred=yp_central, y_true=ytest41)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest41)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest41))
r2 = r2_score(y_pred=yp_central, y_true=ytest41)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 1hr
mse = mean_squared_error(y_pred=yp_east, y_true=ytest437)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest437)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest437))
r2 = r2_score(y_pred=yp_east, y_true=ytest437)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 1hr
mse = mean_squared_error(y_pred=yp_west, y_true=ytest22)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest22)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest22))
r2 = r2_score(y_pred=yp_west, y_true=ytest22)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 1hr
mse = mean_squared_error(y_pred=yp_south, y_true=ytest514)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest514)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest514))
r2 = r2_score(y_pred=yp_south, y_true=ytest514)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 1hr
mse = mean_squared_error(y_pred=yp_north, y_true=ytest547)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest547)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest547))
r2 = r2_score(y_pred=yp_north, y_true=ytest547)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

### 4 hour

In [None]:
xtest41,ytest41 = window_generator(test_41, window_size=16,label_col_no=0)
xtest547,ytest547 = window_generator(test_547, window_size=16,label_col_no=0)
xtest22,ytest22 = window_generator(test_22, window_size=16,label_col_no=0)
xtest437,ytest437 = window_generator(test_437, window_size=16,label_col_no=0)
xtest514,ytest514 = window_generator(test_514, window_size=16,label_col_no=0)

In [None]:
#4hr
m_central_single=bagg_best_4hour
m_east_single=bagg_best_4hour
m_west_single=bagg_best_4hour
m_south_single=bagg_best_4hour
m_north_single=bagg_best_4hour

In [None]:
yp_central = m_central_single.predict(xtest41)
yp_east = m_east_single.predict(xtest437)
yp_west = m_west_single.predict(xtest22)
yp_south = m_south_single.predict(xtest514)
yp_north = m_north_single.predict(xtest547)

In [None]:
#central area 4hr
mse = mean_squared_error(y_pred=yp_central, y_true=ytest41)
mae = mean_absolute_error(y_pred=yp_central, y_true=ytest41)
rmse = math.sqrt(mean_squared_error(y_pred=yp_central, y_true=ytest41))
r2 = r2_score(y_pred=yp_central, y_true=ytest41)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#east area 4hr
mse = mean_squared_error(y_pred=yp_east, y_true=ytest437)
mae = mean_absolute_error(y_pred=yp_east, y_true=ytest437)
rmse = math.sqrt(mean_squared_error(y_pred=yp_east, y_true=ytest437))
r2 = r2_score(y_pred=yp_east, y_true=ytest437)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#west area 4hr
mse = mean_squared_error(y_pred=yp_west, y_true=ytest22)
mae = mean_absolute_error(y_pred=yp_west, y_true=ytest22)
rmse = math.sqrt(mean_squared_error(y_pred=yp_west, y_true=ytest22))
r2 = r2_score(y_pred=yp_west, y_true=ytest22)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#south area 4hr
mse = mean_squared_error(y_pred=yp_south, y_true=ytest514)
mae = mean_absolute_error(y_pred=yp_south, y_true=ytest514)
rmse = math.sqrt(mean_squared_error(y_pred=yp_south, y_true=ytest514))
r2 = r2_score(y_pred=yp_south, y_true=ytest514)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

In [None]:
#north area 4hr
mse = mean_squared_error(y_pred=yp_north, y_true=ytest547)
mae = mean_absolute_error(y_pred=yp_north, y_true=ytest547)
rmse = math.sqrt(mean_squared_error(y_pred=yp_north, y_true=ytest547))
r2 = r2_score(y_pred=yp_north, y_true=ytest547)
print(round(mse,5))
print(round(mae,5))
print(round(rmse,5))
print(round(r2,5))

## performance on different time horizon using model with best time window size (4 hour = 16 timestep)

### entire dataset

In [None]:
bagg_best_4hr_whole = bagg_best_4hour

In [None]:
x,y = last_x_y_generator(Train, window_size=16,label_col_no=0)

In [None]:
current_batch = insert_end(x,y,16)

In [None]:
#entire dataset
future_len = 40
Test_new = Test[['lots_available', 'carpark_number','day_of_week','hour_of_day']]
Test_new = Test_new.sort_values(by=['carpark_number', 'datetime'])
l=[]
L=pd.DataFrame()
for i in sorted(Test_new.carpark_number.value_counts().keys()):
  inner = Test_new[Test_new.carpark_number == i]
  inner = inner.reset_index()
  inner = inner.iloc[0:future_len,:]
  l.append(inner)
L = L.append(l)
L['value'] = L.index.values
L = L.sort_index()
L = L.sort_values(by=['value','carpark_number'])

In [None]:
future = 40
Xin = current_batch
forcast = []
for i in range(0,future):
  out = bagg_best_4hr_whole.predict(Xin)    
  forcast.append(out)
  Xin = insert_end_multi(Xin,out, timestep=16)
  Xin[:,41:42] =  L[['day_of_week']][i*855:(i*855)+855]
  Xin[:,57:] = L[['hour_of_day']][i*855:(i*855)+855]

In [None]:
Y_t = L['lots_available']

l=[]
for i in range(40):
  l.append(Y_t[i*855:(i*855)+855])

In [None]:
rmse40step=[]
for i in range(40):
  rmse40step.append(math.sqrt(mean_squared_error(y_pred=forcast[i], y_true=l[i])))

In [None]:
rmse40step = pd.DataFrame(rmse40step)
rmse40step.to_csv('rmse40step855_bagging.csv')

### group level (five regions, 10 to 12 parking lots per region)

In [None]:
bagg_4hr_central = bagg_best_4hour
bagg_4hr_north = bagg_best_4hour
bagg_4hr_west = bagg_best_4hour
bagg_4hr_east = bagg_best_4hour
bagg_4hr_south = bagg_best_4hour

In [None]:
x_central,y_central = last_x_y_generator(train_central, window_size=16,label_col_no=0)
x_north,y_north = last_x_y_generator(train_north, window_size=16,label_col_no=0)
x_west,y_west = last_x_y_generator(train_west, window_size=16,label_col_no=0)
x_east,y_east = last_x_y_generator(train_east, window_size=16,label_col_no=0)
x_south,y_south = last_x_y_generator(train_south, window_size=16,label_col_no=0)

In [None]:
current_batch_central = insert_end(x_central,y_central,16)
current_batch_north = insert_end(x_north,y_north,16)
current_batch_west = insert_end(x_west,y_west,16)
current_batch_east = insert_end(x_east,y_east,16)
current_batch_south = insert_end(x_south,y_south,16)

In [None]:
future_len = 40
Test_new = test_south[['lots_available', 'carpark_number','day_of_week','hour_of_day']]
Test_new = Test_new.sort_values(by=['carpark_number', 'datetime'])
l=[]
L=pd.DataFrame()
for i in sorted(Test_new.carpark_number.value_counts().keys()):
  inner = Test_new[Test_new.carpark_number == i]
  inner = inner.reset_index()
  inner = inner.iloc[0:future_len,:]
  l.append(inner)
L = L.append(l)
L['value'] = L.index.values
L = L.sort_index()
L = L.sort_values(by=['value','carpark_number'])

In [None]:
#central
future = 40
Xin = current_batch_central
forcast = []
for i in range(0,future):
  out = bagg_4hr_central.predict(Xin)    
  forcast.append(out)
  Xin = insert_end_multi(Xin,out, timestep=16)
  Xin[:,41:42] =  L[['day_of_week']][i*11:(i*11)+11]
  Xin[:,57:] = L[['hour_of_day']][i*11:(i*11)+11]

In [None]:
#north
future = 40
Xin = current_batch_north
forcast = []
for i in range(0,future):
  out = bagg_4hr_north.predict(Xin)    
  forcast.append(out)
  Xin = insert_end_multi(Xin,out, timestep=16)
  Xin[:,41:42] =  L[['day_of_week']][i*11:(i*11)+11]
  Xin[:,57:] = L[['hour_of_day']][i*11:(i*11)+11]

In [None]:
#west
future = 40
Xin = current_batch_west
forcast = []
for i in range(0,future):
  out = bagg_4hr_west.predict(Xin)    
  forcast.append(out)
  Xin = insert_end_multi(Xin,out, timestep=16)
  Xin[:,41:42] =  L[['day_of_week']][i*12:(i*12)+12]
  Xin[:,57:] = L[['hour_of_day']][i*12:(i*12)+12]

In [None]:
#east
future = 40
Xin = current_batch_east
forcast = []
for i in range(0,future):
  out = bagg_4hr_east.predict(Xin)    
  forcast.append(out)
  Xin = insert_end_multi(Xin,out, timestep=16)
  Xin[:,41:42] =  L[['day_of_week']][i*11:(i*11)+11]
  Xin[:,57:] = L[['hour_of_day']][i*11:(i*11)+11]

In [None]:
#south
future = 40
Xin = current_batch_south
forcast = []
for i in range(0,future):
  out = bagg_4hr_south.predict(Xin)    
  forcast.append(out)
  Xin = insert_end_multi(Xin,out, timestep=16)
  Xin[:,41:42] =  L[['day_of_week']][i*10:(i*10)+10]
  Xin[:,57:] = L[['hour_of_day']][i*10:(i*10)+10]

In [None]:
Y_t = L['lots_available']

l=[]
for i in range(40):
  l.append(Y_t[i*10:(i*10)+10])

In [None]:
rmse40step=[]
for i in range(40):
  rmse40step.append(math.sqrt(mean_squared_error(y_pred=forcast[i], y_true=l[i])))

In [None]:
rmse40step = pd.DataFrame(rmse40step)
rmse40step.to_csv('rmse40step_south_bagging.csv')

### individual level (five regions, 1 parking lot per region)

In [None]:
bagg_4hr_41 = bagg_best_4hour
bagg_4hr_547 = bagg_best_4hour
bagg_4hr_22 = bagg_best_4hour
bagg_4hr_437 = bagg_best_4hour
bagg_4hr_514 = bagg_best_4hour

In [None]:
xtrain41,ytrain41 = window_generator(train_41, window_size=16,label_col_no=0)
xtrain547,ytrain547 = window_generator(train_547, window_size=16,label_col_no=0)
xtrain22,ytrain22 = window_generator(train_22, window_size=16,label_col_no=0)
xtrain437,ytrain437 = window_generator(train_437, window_size=16,label_col_no=0)
xtrain514,ytrain514 = window_generator(train_514, window_size=16,label_col_no=0)

In [None]:
last_x41 = xtrain41[-1:,:]
last_y41 = ytrain41[-1:]

last_x547 = xtrain547[-1:,:]
last_y547 = ytrain547[-1:]

last_x22 = xtrain22[-1:,:]
last_y22 = ytrain22[-1:]

last_x437 = xtrain437[-1:,:]
last_y437 = ytrain437[-1:]

last_x514 = xtrain514[-1:,:]
last_y514 = ytrain514[-1:]

In [None]:
current_batch_41 = insert_end(last_x41,last_y41,16)
current_batch_547 = insert_end(last_x547,last_y547,16)
current_batch_22 = insert_end(last_x22,last_y22,16)
current_batch_437 = insert_end(last_x437,last_y437,16)
current_batch_514 = insert_end(last_x514,last_y514,16)

In [None]:
future_len = 40
Test_new = test_514[['lots_available', 'carpark_number','day_of_week','hour_of_day']]
Test_new = Test_new.sort_values(by=['carpark_number', 'datetime'])
l=[]
L=pd.DataFrame()
for i in sorted(Test_new.carpark_number.value_counts().keys()):
  inner = Test_new[Test_new.carpark_number == i]
  inner = inner.reset_index()
  inner = inner.iloc[0:future_len,:]
  l.append(inner)
L = L.append(l)
L['value'] = L.index.values
L = L.sort_index()
L = L.sort_values(by=['value','carpark_number'])

In [None]:
#central
future = 40
Xin = current_batch_41
forcast = []
for i in range(0,future):
  out = bagg_4hr_41.predict(Xin)    
  forcast.append(out)
  Xin = insert_end_multi(Xin,out, timestep=16)
  Xin[:,41:42] =  L[['day_of_week']][i*1:(i*1)+1]
  Xin[:,57:] = L[['hour_of_day']][i*1:(i*1)+1]

In [None]:
#north
future = 40
Xin = current_batch_547
forcast = []
for i in range(0,future):
  out = bagg_4hr_547.predict(Xin)    
  forcast.append(out)
  Xin = insert_end_multi(Xin,out, timestep=16)
  Xin[:,41:42] =  L[['day_of_week']][i*1:(i*1)+1]
  Xin[:,57:] = L[['hour_of_day']][i*1:(i*1)+1]

In [None]:
#west
future = 40
Xin = current_batch_22
forcast = []
for i in range(0,future):
  out = bagg_4hr_22.predict(Xin)    
  forcast.append(out)
  Xin = insert_end_multi(Xin,out, timestep=16)
  Xin[:,41:42] =  L[['day_of_week']][i*1:(i*1)+1]
  Xin[:,57:] = L[['hour_of_day']][i*1:(i*1)+1]

In [None]:
#east
future = 40
Xin = current_batch_437
forcast = []
for i in range(0,future):
  out = bagg_4hr_437.predict(Xin)    
  forcast.append(out)
  Xin = insert_end_multi(Xin,out, timestep=16)
  Xin[:,41:42] =  L[['day_of_week']][i*1:(i*1)+1]
  Xin[:,57:] = L[['hour_of_day']][i*1:(i*1)+1]

In [None]:
#south
future = 40
Xin = current_batch_514
forcast = []
for i in range(0,future):
  out = bagg_4hr_514.predict(Xin)    
  forcast.append(out)
  Xin = insert_end_multi(Xin,out, timestep=16)
  Xin[:,41:42] =  L[['day_of_week']][i*1:(i*1)+1]
  Xin[:,57:] = L[['hour_of_day']][i*1:(i*1)+1]

In [None]:
Y_t = L['lots_available']

l=[]
for i in range(40):
  l.append(Y_t[i*1:(i*1)+1])

In [None]:
rmse40step=[]
for i in range(40):
  rmse40step.append(math.sqrt(mean_squared_error(y_pred=forcast[i], y_true=l[i])))

In [None]:
rmse40step = pd.DataFrame(rmse40step)
rmse40step.to_csv('rmse40step_single_south_bagging.csv')

## robustness check (train-test error)

- based on performance on different time window and different sample size, study determined best time window size is 16 timesteps (4 hour).
- to check the model fit, study tested performance of model on train and test set

In [None]:
bagg_best_4hour_train = bagg_best_4hour
bagg_best_4hour_test = bagg_best_4hour

In [None]:
yp_4hour_bagg_train= bagg_best_4hour_train.predict(X_train_4hour)
yp_4hour_bagg_test = bagg_best_4hour_test.predict(X_test_4hour)

In [None]:
#train error
mse_train = mean_squared_error(y_pred=yp_4hour_bagg_train, y_true=Y_train_4hour)
mae_train = mean_absolute_error(y_pred=yp_4hour_bagg_train, y_true=Y_train_4hour)
rmse_train = math.sqrt(mean_squared_error(y_pred=yp_4hour_bagg_train, y_true=Y_train_4hour))
r2_train = r2_score(y_pred=yp_4hour_bagg_train, y_true=Y_train_4hour)
print(round(mse_train,5))
print(round(mae_train,5))
print(round(rmse_train,5))
print(round(r2_train,5))

In [None]:
#test error
mse_test = mean_squared_error(y_pred=yp_4hour_bagg_test, y_true=Y_test_4hour)
mae_test = mean_absolute_error(y_pred=yp_4hour_bagg_test, y_true=Y_test_4hour)
rmse_test = math.sqrt(mean_squared_error(y_pred=yp_4hour_bagg_test, y_true=Y_test_4hour))
r2_test = r2_score(y_pred=yp_4hour_bagg_test, y_true=Y_test_4hour)
print(round(mse_test,5))
print(round(mae_test,5))
print(round(rmse_test,5))
print(round(r2_test,5))