In [1]:
import keras

# set random seed to ensure reproducibility
keras.utils.set_random_seed(42)

In [2]:
import pandas as pd
from train_val_test_split import train_val_test_split

data = pd.read_csv("datasets/processed_dataset.csv")
data = data.drop(columns=['MONTH', 'CITY'])

X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(data)

In [3]:
import tensorflow as tf
from tensorflow.keras import Input, Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Input(shape=(10,)))
model.add(Dense(15, activation="relu"))
model.add(Dense(1, activation="sigmoid"))
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"],
)

# use Early Stopping to combat potential overfitting.
earlystop = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=10)
train_history = model.fit(
    X_train,
    y_train,
    epochs=200,
    validation_data=(X_val, y_val),
    callbacks=earlystop,
)

Epoch 1/200
[1m343/343[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 780us/step - accuracy: 0.6755 - loss: 0.6594 - val_accuracy: 0.7249 - val_loss: 0.5799
Epoch 2/200
[1m343/343[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 584us/step - accuracy: 0.7493 - loss: 0.5186 - val_accuracy: 0.7384 - val_loss: 0.5494
Epoch 3/200
[1m343/343[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 576us/step - accuracy: 0.7599 - loss: 0.4989 - val_accuracy: 0.7460 - val_loss: 0.5350
Epoch 4/200
[1m343/343[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 582us/step - accuracy: 0.7642 - loss: 0.4892 - val_accuracy: 0.7490 - val_loss: 0.5255
Epoch 5/200
[1m343/343[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 606us/step - accuracy: 0.7672 - loss: 0.4834 - val_accuracy: 0.7537 - val_loss: 0.5185
Epoch 6/200
[1m343/343[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 650us/step - accuracy: 0.7692 - loss: 0.4782 - val_accuracy: 0.7556 - val_loss: 0.5165
Epoch 7/20

In [4]:
from sklearn.metrics import accuracy_score, fbeta_score

y_val_pred = model.predict(X_val)
y_val_pred = y_val_pred > 0.5

print(f"Accuracy on validation set: {accuracy_score(y_val, y_val_pred)}")
print(f"F2-score on validation set: {fbeta_score(y_val, y_val_pred, beta=2)}")

[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Accuracy on validation set: 0.7838356164383562
F2-score on validation set: 0.8404802744425386
