In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

from spark.preprocessor import preprocessor
from tensorflow.keras.models import Sequential
from tensorflow.keras import Model, regularizers, layers
import tensorflow.keras as keras
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report, balanced_accuracy_score


In [2]:
X_train, X_test, y_train, y_test = preprocessor("~/code/marzecovaa/spark/processed_data/merged_dfq_v2.csv")

In [3]:
def initialize_model(input_shape:tuple) -> Model:
    """
    Initialize the Neural Network with random weights
    """

    reg = regularizers.l1_l2(l1=0.005)

    model = Sequential()
    model.add(layers.Input(shape=input_shape))
    model.add(layers.Dense(100, activation="relu", kernel_regularizer=reg))
    model.add(layers.BatchNormalization(momentum=0.9))
    model.add(layers.Dropout(rate=0.1))
    model.add(layers.Dense(50, activation="relu"))
    model.add(layers.BatchNormalization(momentum=0.9))
    model.add(layers.Dropout(rate=0.1))
    model.add(layers.Dense(3, activation="softmax"))

    return model

In [4]:
model = initialize_model(input_shape=X_train.shape[1:])
model.summary()


In [5]:
batch_size = 4


model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])


In [6]:
es = EarlyStopping(
    monitor="val_loss",
    patience=50,
    restore_best_weights=True,
    verbose=0
)

history = model.fit(
    X_train,
    y_train,
    validation_split = 0.2,
    epochs=500,
    batch_size=batch_size,
    callbacks=[es],
    verbose=1
)


Epoch 1/500
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4500 - loss: 3.1344 - val_accuracy: 0.5733 - val_loss: 2.8534
Epoch 2/500
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 735us/step - accuracy: 0.5300 - loss: 2.8392 - val_accuracy: 0.6133 - val_loss: 2.7337
Epoch 3/500
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 728us/step - accuracy: 0.6367 - loss: 2.6140 - val_accuracy: 0.6400 - val_loss: 2.5827
Epoch 4/500
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 757us/step - accuracy: 0.6933 - loss: 2.5877 - val_accuracy: 0.6400 - val_loss: 2.4994
Epoch 5/500
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 727us/step - accuracy: 0.6533 - loss: 2.5138 - val_accuracy: 0.7200 - val_loss: 2.4850
Epoch 6/500
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 727us/step - accuracy: 0.6800 - loss: 2.4451 - val_accuracy: 0.6933 - val_loss: 2.4694
Epoch 7/500
[1m75/75[0

In [7]:
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)


# Metrics
print("Balanced Accuracy:", balanced_accuracy_score(y_test, y_pred))

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Balanced Accuracy: 0.5698287220026351
