In [1]:
from typing import Callable
from time import time
import pandas as pd

# for neural networks
from keras.models import Sequential
from keras.layers import Dense, Dropout
import tensorflow as tf

# for evaluation & preprocessing
from sklearn.model_selection import (
    train_test_split,
    ParameterGrid,
)
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
)
import sys, os
sys.path.append(os.path.abspath(os.path.join("..")))

# for displaying results & feedback
# from tabulate import tabulate
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [2]:
import sys, os

sys.path.append(os.path.abspath('..'))
%load_ext autoreload
%autoreload 2
from modules.config import *

In [3]:
model_data = pd.read_pickle(MODEL_DATA_PATH)
model_data.head(2)

Unnamed: 0,demand,hour,day,weekday,month,sustenance_poi_start,public_transport_poi_start,education_poi_start,arts_and_culture_poi_start,sports_poi_start,...,end_881f1abb2dfffff,end_881f1abb31fffff,end_881f1abb35fffff,end_881f1abb39fffff,end_881f1abb61fffff,end_881f1abb63fffff,end_881f1abb65fffff,end_881f1abb67fffff,end_881f1abb69fffff,end_881f1abb6bfffff
0,3,0,20,6,1,82,80,11,5,5,...,0,0,0,0,0,0,0,0,0,0
1,1,12,30,2,1,82,80,11,5,5,...,0,0,0,0,0,0,0,0,0,0


In [4]:
y = model_data["demand"]
X = model_data.drop(columns=["demand"])

X_train, X_rest, y_train, y_rest = train_test_split(X, y, train_size=0.7, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_rest, y_rest, test_size=0.5, random_state=42)

print(f"Size of the train dataset is: {X_train.shape[0]}")
print(f"Size of the validation dataset is: {X_valid.shape[0]}")
print(f"Size of the test dataset is: {X_test.shape[0]}")

Size of the train dataset is: 260545
Size of the validation dataset is: 55831
Size of the test dataset is: 55832


In [5]:
X_train = StandardScaler().fit_transform(X_train)
X_valid = StandardScaler().fit_transform(X_valid)
X_test = StandardScaler().fit_transform(X_test)

In [6]:
X_train.shape

(260545, 468)

In [7]:
def mean_average_percentage_error(y_true, y_pred):
    return mean_absolute_error(y_true, y_pred) / y_true.mean()


def root_mean_squared_error(y_true, y_pred):
    return mean_squared_error(y_true, y_pred) ** 0.5

In [8]:
# hyperparameters
# n

In [9]:
model = Sequential()
model.add(Dense(100, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(100, activation='relu'))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

model.fit(X_train, y_train, epochs=20, batch_size=500)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1544cdfff70>

In [10]:
y_pred = model.predict(X_test)



In [11]:
print(f"MSE: {mean_squared_error(y_test, y_pred)}")
print(f"MAE: {mean_absolute_error(y_test, y_pred)}")
print(f"MAPE: {mean_average_percentage_error(y_test, y_pred)}")
print(f"RMSE: {root_mean_squared_error(y_test, y_pred)}")

MSE :0.8567572350345711
MAE :0.5365295392882945
MAPE :0.3485956016099248
RMSE :0.9256118166027112
