# Load Dataset

In [66]:
# Import Statements
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_auc_score, average_precision_score, precision_recall_curve, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.regularizers import l2
from sklearn.utils import class_weight
import tensorflow as tf

In [67]:
# Build dataset directory path
base_dir = os.path.dirname(os.getcwd())  
data_dir = os.path.join(base_dir, "datasets")

# Load CSVs
X_train_resampled = pd.read_csv(os.path.join(data_dir, "X_train_resampled.csv"))
y_train_resampled = pd.read_csv(os.path.join(data_dir, "y_train_resampled.csv")).values.ravel()

X_train = pd.read_csv(os.path.join(data_dir, "X_train.csv"))
y_train = pd.read_csv(os.path.join(data_dir, "y_train.csv")).values.ravel()

X_val = pd.read_csv(os.path.join(data_dir, "X_val.csv"))
y_val = pd.read_csv(os.path.join(data_dir, "y_val.csv")).values.ravel()

X_test = pd.read_csv(os.path.join(data_dir, "X_test.csv"))
y_test = pd.read_csv(os.path.join(data_dir, "y_test.csv")).values.ravel()

In [68]:
print("Shapes BEFORE dropping columns:")
print(X_train_resampled.shape, y_train_resampled.shape)
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

Shapes BEFORE dropping columns:
(1573676, 68) (1573676,)
(794989, 68) (794989,)
(108168, 68) (108168,)
(96843, 68) (96843,)


# Use Log-Transformed Features

In [69]:
original_columns = ['days_since_request', 'intended_balcon_amount_clean', 'zip_count_4w', 'velocity_24h', 'velocity_4w', 'date_of_birth_distinct_emails_4w', 'session_length_in_minutes_cleaned']

## Use SMOTE Resampled Training Data

In [70]:
X_train_resampled_log = X_train_resampled.drop(columns=original_columns, errors='ignore')
X_val_log = X_val.drop(columns=original_columns, errors='ignore')
X_test_log = X_test.drop(columns=original_columns, errors='ignore')

In [71]:
print("\nShapes AFTER dropping columns:")
print(X_train_resampled_log.shape, y_train_resampled.shape)
print(X_val_log.shape, y_val.shape)
print(X_test_log.shape, y_test.shape)


Shapes AFTER dropping columns:
(1573676, 61) (1573676,)
(108168, 61) (108168,)
(96843, 61) (96843,)


### Baseline

In [72]:
scaler = StandardScaler()
X_train_resampled_log = scaler.fit_transform(X_train_resampled_log)
X_val_log = scaler.transform(X_val_log)
X_test_log = scaler.transform(X_test_log)

In [73]:
# Build baseline model
input_dim = X_train_resampled_log.shape[1]

baseline_model_log = Sequential([
    Dense(32, activation='relu', input_dim=input_dim),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

baseline_model_log.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_baseline_log = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_baseline_log = baseline_model_log.fit(
    X_train_resampled_log, y_train_resampled,
    validation_data=(X_val_log, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_baseline_log],
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 7ms/step - accuracy: 0.9851 - loss: 0.0471 - val_accuracy: 0.9861 - val_loss: 0.0596
Epoch 2/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9894 - loss: 0.0358 - val_accuracy: 0.9797 - val_loss: 0.0710
Epoch 3/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9890 - loss: 0.0373 - val_accuracy: 0.9861 - val_loss: 0.0591
Epoch 4/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9884 - loss: 0.0396 - val_accuracy: 0.9865 - val_loss: 0.0664
Epoch 5/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9879 - loss: 0.0414 - val_accuracy: 0.9840 - val_loss: 0.0640
Epoch 6/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9869 - loss: 0.0463 - val_accuracy: 0.9833 - val_loss: 0.0838
Epoch 7/50

In [74]:
# Evaluate on Train Set
y_train_proba = baseline_model_log.predict(X_train_resampled_log).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train_resampled, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train_resampled, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_pred))

[1m49178/49178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 906us/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9814    0.9982    0.9897    786838
           1     0.9982    0.9811    0.9896    786838

    accuracy                         0.9896   1573676
   macro avg     0.9898    0.9896    0.9896   1573676
weighted avg     0.9898    0.9896    0.9896   1573676

Final Model Training ROC-AUC Score: 0.9982
Final Model Training PR-AUC Score: 0.9986
Final Model Training Confusion Matrix:
 [[785433   1405]
 [ 14892 771946]]


In [75]:
# Evaluate on Validation Set
y_val_proba = baseline_model_log.predict(X_val_log).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 940us/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9872    0.9989    0.9930    106718
           1     0.3607    0.0455    0.0808      1450

    accuracy                         0.9861    108168
   macro avg     0.6739    0.5222    0.5369    108168
weighted avg     0.9788    0.9861    0.9808    108168

Final Model Validation ROC-AUC Score: 0.8605
Final Model Validation PR-AUC Score: 0.1292
Final Model Validation Confusion Matrix:
 [[106601    117]
 [  1384     66]]


In [76]:
# Evaluate on Testing Set
y_test_proba = baseline_model_log.predict(X_test_log).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 963us/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9860    0.9989    0.9924     95415
           1     0.4124    0.0511    0.0910      1428

    accuracy                         0.9849     96843
   macro avg     0.6992    0.5250    0.5417     96843
weighted avg     0.9775    0.9849    0.9791     96843

Final Model Testing ROC-AUC Score: 0.8635
Final Model Testing PR-AUC Score: 0.1513
Final Model Testing Confusion Matrix:
 [[95311   104]
 [ 1355    73]]


### Regularization Tuning

In [77]:
# Build Regularized Model
reg_model_log = Sequential([
    Dense(32, activation='relu', input_dim=input_dim, kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(16, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(1, activation='sigmoid')
])

reg_model_log.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_reg_log = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_reg_log = reg_model_log.fit(
    X_train_resampled_log, y_train_resampled,
    validation_data=(X_val_log, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_reg_log],
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 12ms/step - accuracy: 0.9823 - loss: 0.0731 - val_accuracy: 0.9820 - val_loss: 0.0726
Epoch 2/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 12ms/step - accuracy: 0.9889 - loss: 0.0459 - val_accuracy: 0.9852 - val_loss: 0.0661
Epoch 3/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 12ms/step - accuracy: 0.9890 - loss: 0.0450 - val_accuracy: 0.9849 - val_loss: 0.0667
Epoch 4/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 12ms/step - accuracy: 0.9889 - loss: 0.0447 - val_accuracy: 0.9852 - val_loss: 0.0666
Epoch 5/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 12ms/step - accuracy: 0.9891 - loss: 0.0444 - val_accuracy: 0.9837 - val_loss: 0.0679
Epoch 6/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 12ms/step - accuracy: 0.9890 - loss: 0.0440 - val_accuracy: 0.9854 - val_loss: 0.0657
Epoc

In [78]:
# Evaluate on Train Set
y_train_proba = reg_model_log.predict(X_train_resampled_log).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train_resampled, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train_resampled, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_pred))

[1m49178/49178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 1ms/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9809    0.9983    0.9895    786838
           1     0.9983    0.9806    0.9893    786838

    accuracy                         0.9894   1573676
   macro avg     0.9896    0.9894    0.9894   1573676
weighted avg     0.9896    0.9894    0.9894   1573676

Final Model Training ROC-AUC Score: 0.9981
Final Model Training PR-AUC Score: 0.9986
Final Model Training Confusion Matrix:
 [[785524   1314]
 [ 15299 771539]]


In [79]:
# Evaluate on Validation Set
y_val_proba = reg_model_log.predict(X_val_log).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9870    0.9995    0.9932    106718
           1     0.4184    0.0283    0.0530      1450

    accuracy                         0.9864    108168
   macro avg     0.7027    0.5139    0.5231    108168
weighted avg     0.9793    0.9864    0.9806    108168

Final Model Validation ROC-AUC Score: 0.8577
Final Model Validation PR-AUC Score: 0.1349
Final Model Validation Confusion Matrix:
 [[106661     57]
 [  1409     41]]


In [80]:
# Evaluate on Testing Set
y_test_proba = reg_model_log.predict(X_test_log).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9855    0.9998    0.9926     95415
           1     0.6000    0.0189    0.0367      1428

    accuracy                         0.9853     96843
   macro avg     0.7928    0.5094    0.5146     96843
weighted avg     0.9798    0.9853    0.9785     96843

Final Model Testing ROC-AUC Score: 0.8563
Final Model Testing PR-AUC Score: 0.1504
Final Model Testing Confusion Matrix:
 [[95397    18]
 [ 1401    27]]


## Use original train with class weighting

In [81]:
X_train_log = X_train.drop(columns=original_columns, errors='ignore')
X_val_log = X_val.drop(columns=original_columns, errors='ignore')
X_test_log = X_test.drop(columns=original_columns, errors='ignore')

In [82]:
print("\nShapes AFTER dropping columns:")
print(X_train_log.shape, y_train.shape)
print(X_val_log.shape, y_val.shape)
print(X_test_log.shape, y_test.shape)


Shapes AFTER dropping columns:
(794989, 61) (794989,)
(108168, 61) (108168,)
(96843, 61) (96843,)


### Baseline

In [83]:
scaler = StandardScaler()
X_train_log = scaler.fit_transform(X_train_log)
X_val = scaler.transform(X_val_log)
X_test = scaler.transform(X_test_log)

In [84]:
# Compute class weights automatically
classes = np.unique(y_train)
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)
class_weights = dict(zip(classes, class_weights))

print("Class weights:", class_weights)

# Build baseline model
input_dim = X_train_log.shape[1]

baseline_model_log = Sequential([
    Dense(32, activation='relu', input_dim=input_dim),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

baseline_model_log.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_baseline_log = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_baseline_log = baseline_model_log.fit(
    X_train_log, y_train,
    validation_data=(X_val_log, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_baseline_log],
    class_weight=class_weights, 
    verbose=1
)


Class weights: {0: 0.5051795922413508, 1: 48.76634768740032}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 9ms/step - accuracy: 0.7775 - loss: 0.4650 - val_accuracy: 0.5869 - val_loss: 35.0102
Epoch 2/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.8002 - loss: 0.4522 - val_accuracy: 0.7443 - val_loss: 19.9747
Epoch 3/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - accuracy: 0.7943 - loss: 0.4583 - val_accuracy: 0.0136 - val_loss: 216.9610
Epoch 4/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7917 - loss: 0.4669 - val_accuracy: 0.9103 - val_loss: 7.2724
Epoch 5/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7880 - loss: 0.4757 - val_accuracy: 0.8374 - val_loss: 12.4783
Epoch 6/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.7855 - loss: 0.4783 - val_accuracy: 0.8592 - val_loss: 8.9478
Epoch

In [85]:
# Evaluate on Train Set
y_train_proba = baseline_model_log.predict(X_train_log).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred))

[1m24844/24844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 960us/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9967    0.8035    0.8897    786838
           1     0.0375    0.7400    0.0715      8151

    accuracy                         0.8028    794989
   macro avg     0.5171    0.7717    0.4806    794989
weighted avg     0.9868    0.8028    0.8813    794989

Final Model Training ROC-AUC Score: 0.8508
Final Model Training PR-AUC Score: 0.1000
Final Model Training Confusion Matrix:
 [[632190 154648]
 [  2119   6032]]


In [86]:
# Evaluate on Validation Set
y_val_proba = baseline_model_log.predict(X_val_log).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 993us/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9876    0.9292    0.9575    106718
           1     0.0267    0.1428    0.0449      1450

    accuracy                         0.9186    108168
   macro avg     0.5071    0.5360    0.5012    108168
weighted avg     0.9747    0.9186    0.9453    108168

Final Model Validation ROC-AUC Score: 0.5643
Final Model Validation PR-AUC Score: 0.0178
Final Model Validation Confusion Matrix:
 [[99160  7558]
 [ 1243   207]]


In [87]:
# Evaluate on Testing Set
y_test_proba = baseline_model_log.predict(X_test_log).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9871    0.9229    0.9539     95415
           1     0.0361    0.1933    0.0609      1428

    accuracy                         0.9121     96843
   macro avg     0.5116    0.5581    0.5074     96843
weighted avg     0.9731    0.9121    0.9407     96843

Final Model Testing ROC-AUC Score: 0.5866
Final Model Testing PR-AUC Score: 0.0224
Final Model Testing Confusion Matrix:
 [[88055  7360]
 [ 1152   276]]


### Regularization Tuning

In [88]:
# Compute class weights automatically
classes = np.unique(y_train)
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)
class_weights = dict(zip(classes, class_weights))

print("Class weights:", class_weights)

# Build Regularized Model
reg_model_log = Sequential([
    Dense(32, activation='relu', input_dim=input_dim, kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(16, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(1, activation='sigmoid')
])

reg_model_log.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_reg_log = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_reg_log = reg_model_log.fit(
    X_train_log, y_train,
    validation_data=(X_val_log, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_reg_log],
    class_weight=class_weights,
    verbose=1
)


Class weights: {0: 0.5051795922413508, 1: 48.76634768740032}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 14ms/step - accuracy: 0.7317 - loss: 0.5801 - val_accuracy: 0.7853 - val_loss: 16.5392
Epoch 2/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 13ms/step - accuracy: 0.7930 - loss: 0.5011 - val_accuracy: 0.8019 - val_loss: 15.4795
Epoch 3/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 14ms/step - accuracy: 0.8003 - loss: 0.4889 - val_accuracy: 0.7505 - val_loss: 18.8328
Epoch 4/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.8006 - loss: 0.4852 - val_accuracy: 0.1258 - val_loss: 62.6123
Epoch 5/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 13ms/step - accuracy: 0.7997 - loss: 0.4893 - val_accuracy: 0.2160 - val_loss: 64.4714
Epoch 6/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 13ms/step - accuracy: 0.7977 - loss: 0.4945 - val_accuracy: 0.7289 - val_loss: 23.189

In [89]:
# Evaluate on Train Set
y_train_proba = reg_model_log.predict(X_train_log).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred))

[1m24844/24844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 1ms/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9973    0.7992    0.8874    786838
           1     0.0392    0.7913    0.0748      8151

    accuracy                         0.7992    794989
   macro avg     0.5183    0.7953    0.4811    794989
weighted avg     0.9875    0.7992    0.8790    794989

Final Model Training ROC-AUC Score: 0.8734
Final Model Training PR-AUC Score: 0.1243
Final Model Training Confusion Matrix:
 [[628877 157961]
 [  1701   6450]]


In [90]:
# Evaluate on Validation Set
y_val_proba = reg_model_log.predict(X_val_log).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9892    0.8458    0.9119    106718
           1     0.0274    0.3200    0.0505      1450

    accuracy                         0.8388    108168
   macro avg     0.5083    0.5829    0.4812    108168
weighted avg     0.9763    0.8388    0.9004    108168

Final Model Validation ROC-AUC Score: 0.6111
Final Model Validation PR-AUC Score: 0.0208
Final Model Validation Confusion Matrix:
 [[90267 16451]
 [  986   464]]


In [91]:
# Evaluate on Testing Set
y_test_proba = reg_model_log.predict(X_test_log).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9886    0.8479    0.9129     95415
           1     0.0332    0.3487    0.0606      1428

    accuracy                         0.8405     96843
   macro avg     0.5109    0.5983    0.4867     96843
weighted avg     0.9745    0.8405    0.9003     96843

Final Model Testing ROC-AUC Score: 0.6354
Final Model Testing PR-AUC Score: 0.0251
Final Model Testing Confusion Matrix:
 [[80899 14516]
 [  930   498]]


# Use Non Log-Transformed Features

In [92]:
# Build dataset directory path
base_dir = os.path.dirname(os.getcwd())  
data_dir = os.path.join(base_dir, "datasets")

# Load CSVs
X_train_resampled = pd.read_csv(os.path.join(data_dir, "X_train_resampled.csv"))
y_train_resampled = pd.read_csv(os.path.join(data_dir, "y_train_resampled.csv")).values.ravel()

X_train = pd.read_csv(os.path.join(data_dir, "X_train.csv"))
y_train = pd.read_csv(os.path.join(data_dir, "y_train.csv")).values.ravel()

X_val = pd.read_csv(os.path.join(data_dir, "X_val.csv"))
y_val = pd.read_csv(os.path.join(data_dir, "y_val.csv")).values.ravel()

X_test = pd.read_csv(os.path.join(data_dir, "X_test.csv"))
y_test = pd.read_csv(os.path.join(data_dir, "y_test.csv")).values.ravel()

In [93]:
log_columns = ['days_since_request_log', 'intended_balcon_amount_log', 'zip_count_4w_log', 'velocity_24h_log', 'velocity_4w_log', 'date_of_birth_distinct_emails_4w_log', 'session_length_in_minutes_log']

## Use SMOTE Resampled Training Data

In [94]:
X_train_resampled_orig = X_train_resampled.drop(columns=log_columns, errors='ignore')
X_val_orig = X_val.drop(columns=log_columns, errors='ignore')
X_test_orig = X_test.drop(columns=log_columns, errors='ignore')

In [95]:
print("\nShapes AFTER dropping columns:")
print(X_train_resampled_orig.shape, y_train_resampled.shape)
print(X_val_orig.shape, y_val.shape)
print(X_test_orig.shape, y_test.shape)


Shapes AFTER dropping columns:
(1573676, 61) (1573676,)
(108168, 61) (108168,)
(96843, 61) (96843,)


### Baseline

In [96]:
scaler = StandardScaler()
X_train_resampled_orig = scaler.fit_transform(X_train_resampled_orig)
X_val_orig = scaler.transform(X_val_orig)
X_test_orig = scaler.transform(X_test_orig)

In [97]:
# Build baseline model
input_dim = X_train_resampled_orig.shape[1]

baseline_model_orig = Sequential([
    Dense(32, activation='relu', input_dim=input_dim),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

baseline_model_orig.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_baseline_orig = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_baseline_orig = baseline_model_orig.fit(
    X_train_resampled_orig, y_train_resampled,
    validation_data=(X_val_orig, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_baseline_orig],
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 8ms/step - accuracy: 0.9848 - loss: 0.0484 - val_accuracy: 0.9858 - val_loss: 0.0599
Epoch 2/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 8ms/step - accuracy: 0.9895 - loss: 0.0358 - val_accuracy: 0.9826 - val_loss: 0.0687
Epoch 3/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 8ms/step - accuracy: 0.9890 - loss: 0.0382 - val_accuracy: 0.9764 - val_loss: 0.0810
Epoch 4/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 8ms/step - accuracy: 0.9887 - loss: 0.0398 - val_accuracy: 0.9865 - val_loss: 0.0642
Epoch 5/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 8ms/step - accuracy: 0.9882 - loss: 0.0436 - val_accuracy: 0.9860 - val_loss: 0.0709
Epoch 6/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 8ms/step - accuracy: 0.9884 - loss: 0.0431 - val_accuracy: 0.9738 - val_loss: 0.1009
Epoch 7/50

In [98]:
# Evaluate on Train Set
y_train_proba = baseline_model_orig.predict(X_train_resampled_orig).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train_resampled, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train_resampled, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_pred))

[1m49178/49178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 972us/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9828    0.9975    0.9901    786838
           1     0.9975    0.9826    0.9900    786838

    accuracy                         0.9900   1573676
   macro avg     0.9902    0.9900    0.9900   1573676
weighted avg     0.9902    0.9900    0.9900   1573676

Final Model Training ROC-AUC Score: 0.9982
Final Model Training PR-AUC Score: 0.9986
Final Model Training Confusion Matrix:
 [[784873   1965]
 [ 13706 773132]]


In [99]:
# Evaluate on Validation Set
y_val_proba = baseline_model_orig.predict(X_val_orig).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9874    0.9983    0.9928    106718
           1     0.3270    0.0593    0.1004      1450

    accuracy                         0.9858    108168
   macro avg     0.6572    0.5288    0.5466    108168
weighted avg     0.9785    0.9858    0.9809    108168

Final Model Validation ROC-AUC Score: 0.8527
Final Model Validation PR-AUC Score: 0.1260
Final Model Validation Confusion Matrix:
 [[106541    177]
 [  1364     86]]


In [100]:
# Evaluate on Testing Set
y_test_proba = baseline_model_orig.predict(X_test_orig).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9860    0.9986    0.9923     95415
           1     0.3610    0.0518    0.0906      1428

    accuracy                         0.9847     96843
   macro avg     0.6735    0.5252    0.5414     96843
weighted avg     0.9768    0.9847    0.9790     96843

Final Model Testing ROC-AUC Score: 0.8574
Final Model Testing PR-AUC Score: 0.1477
Final Model Testing Confusion Matrix:
 [[95284   131]
 [ 1354    74]]


### Regularization Tuning

In [101]:
# Build Regularized Model
reg_model_orig = Sequential([
    Dense(32, activation='relu', input_dim=input_dim, kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(16, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(1, activation='sigmoid')
])

reg_model_orig.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_reg_orig = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_reg_orig = reg_model_orig.fit(
    X_train_resampled_orig, y_train_resampled,
    validation_data=(X_val_orig, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_reg_orig],
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 13ms/step - accuracy: 0.9768 - loss: 0.0861 - val_accuracy: 0.9841 - val_loss: 0.0692
Epoch 2/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 13ms/step - accuracy: 0.9889 - loss: 0.0459 - val_accuracy: 0.9821 - val_loss: 0.0705
Epoch 3/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 13ms/step - accuracy: 0.9890 - loss: 0.0446 - val_accuracy: 0.9846 - val_loss: 0.0659
Epoch 4/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 13ms/step - accuracy: 0.9889 - loss: 0.0442 - val_accuracy: 0.9806 - val_loss: 0.0730
Epoch 5/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 13ms/step - accuracy: 0.9890 - loss: 0.0441 - val_accuracy: 0.9860 - val_loss: 0.0638
Epoch 6/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 13ms/step - accuracy: 0.9890 - loss: 0.0440 - val_accuracy: 0.9842 - val_loss: 0.0671
Epoc

In [102]:
# Evaluate on Train Set
y_train_proba = reg_model_orig.predict(X_train_resampled_orig).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train_resampled, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train_resampled, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_pred))

[1m49178/49178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 1ms/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9825    0.9978    0.9901    786838
           1     0.9977    0.9822    0.9899    786838

    accuracy                         0.9900   1573676
   macro avg     0.9901    0.9900    0.9900   1573676
weighted avg     0.9901    0.9900    0.9900   1573676

Final Model Training ROC-AUC Score: 0.9982
Final Model Training PR-AUC Score: 0.9986
Final Model Training Confusion Matrix:
 [[785070   1768]
 [ 13982 772856]]


In [103]:
# Evaluate on Validation Set
y_val_proba = reg_model_orig.predict(X_val_orig).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9872    0.9987    0.9929    106718
           1     0.3366    0.0469    0.0823      1450

    accuracy                         0.9860    108168
   macro avg     0.6619    0.5228    0.5376    108168
weighted avg     0.9785    0.9860    0.9807    108168

Final Model Validation ROC-AUC Score: 0.8559
Final Model Validation PR-AUC Score: 0.1221
Final Model Validation Confusion Matrix:
 [[106584    134]
 [  1382     68]]


In [106]:
# Evaluate on Testing Set
y_test_proba = reg_model_orig.predict(X_test_orig).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9857    0.9995    0.9925     95415
           1     0.4574    0.0301    0.0565      1428

    accuracy                         0.9852     96843
   macro avg     0.7216    0.5148    0.5245     96843
weighted avg     0.9779    0.9852    0.9787     96843

Final Model Testing ROC-AUC Score: 0.8605
Final Model Testing PR-AUC Score: 0.1433
Final Model Testing Confusion Matrix:
 [[95364    51]
 [ 1385    43]]


## Use original train with class weighting

In [107]:
X_train_orig = X_train.drop(columns=log_columns, errors='ignore')
X_val_orig = X_val.drop(columns=log_columns, errors='ignore')
X_test_orig = X_test.drop(columns=log_columns, errors='ignore')

In [108]:
print("\nShapes AFTER dropping columns:")
print(X_train_orig.shape, y_train.shape)
print(X_val_orig.shape, y_val.shape)
print(X_test_orig.shape, y_test.shape)


Shapes AFTER dropping columns:
(794989, 61) (794989,)
(108168, 61) (108168,)
(96843, 61) (96843,)


### Baseline

In [109]:
scaler = StandardScaler()
X_train_orig = scaler.fit_transform(X_train_orig)
X_val_orig = scaler.transform(X_val_orig)
X_test_orig = scaler.transform(X_test_orig)

In [110]:
# Compute class weights automatically
classes = np.unique(y_train)
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)
class_weights = dict(zip(classes, class_weights))

print("Class weights:", class_weights)

# Build baseline model
input_dim = X_train_orig.shape[1]

baseline_model_orig = Sequential([
    Dense(32, activation='relu', input_dim=input_dim),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

baseline_model_orig.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_baseline_orig = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_baseline_orig = baseline_model_orig.fit(
    X_train_orig, y_train,
    validation_data=(X_val_orig, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_baseline_orig],
    class_weight=class_weights, 
    verbose=1
)


Class weights: {0: 0.5051795922413508, 1: 48.76634768740032}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 8ms/step - accuracy: 0.7732 - loss: 0.4695 - val_accuracy: 0.8477 - val_loss: 0.3601
Epoch 2/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.7969 - loss: 0.4529 - val_accuracy: 0.8258 - val_loss: 0.3999
Epoch 3/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7961 - loss: 0.4566 - val_accuracy: 0.8743 - val_loss: 0.3171
Epoch 4/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7929 - loss: 0.4660 - val_accuracy: 0.7140 - val_loss: 0.6267
Epoch 5/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7934 - loss: 0.4699 - val_accuracy: 0.8864 - val_loss: 0.2853
Epoch 6/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7898 - loss: 0.4746 - val_accuracy: 0.8818 - val_loss: 0.3085
Epoch 7/50

In [111]:
# Evaluate on Train Set
y_train_proba = baseline_model_orig.predict(X_train_orig).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred))

[1m24844/24844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 933us/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9955    0.7821    0.8760    786838
           1     0.0303    0.6570    0.0579      8151

    accuracy                         0.7808    794989
   macro avg     0.5129    0.7195    0.4669    794989
weighted avg     0.9856    0.7808    0.8676    794989

Final Model Training ROC-AUC Score: 0.7923
Final Model Training PR-AUC Score: 0.0461
Final Model Training Confusion Matrix:
 [[615355 171483]
 [  2796   5355]]


In [112]:
# Evaluate on Validation Set
y_val_proba = baseline_model_orig.predict(X_val_orig).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 965us/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9900    0.9647    0.9772    106718
           1     0.0985    0.2841    0.1463      1450

    accuracy                         0.9556    108168
   macro avg     0.5443    0.6244    0.5618    108168
weighted avg     0.9781    0.9556    0.9660    108168

Final Model Validation ROC-AUC Score: 0.8263
Final Model Validation PR-AUC Score: 0.0695
Final Model Validation Confusion Matrix:
 [[102949   3769]
 [  1038    412]]


In [113]:
# Evaluate on Testing Set
y_test_proba = baseline_model_orig.predict(X_test_orig).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 955us/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9866    0.9917    0.9892     95415
           1     0.1564    0.1029    0.1242      1428

    accuracy                         0.9786     96843
   macro avg     0.5715    0.5473    0.5567     96843
weighted avg     0.9744    0.9786    0.9764     96843

Final Model Testing ROC-AUC Score: 0.8392
Final Model Testing PR-AUC Score: 0.0883
Final Model Testing Confusion Matrix:
 [[94622   793]
 [ 1281   147]]


### Regularization Tuning

In [114]:
# Compute class weights automatically
classes = np.unique(y_train)
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)
class_weights = dict(zip(classes, class_weights))

print("Class weights:", class_weights)

# Build Regularized Model
reg_model_orig = Sequential([
    Dense(32, activation='relu', input_dim=input_dim, kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(16, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(1, activation='sigmoid')
])

reg_model_orig.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_reg_orig = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_reg_orig = reg_model_orig.fit(
    X_train_orig, y_train,
    validation_data=(X_val_orig, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_reg_orig],
    class_weight=class_weights,
    verbose=1
)


Class weights: {0: 0.5051795922413508, 1: 48.76634768740032}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 13ms/step - accuracy: 0.7322 - loss: 0.5718 - val_accuracy: 0.8269 - val_loss: 0.4407
Epoch 2/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.7955 - loss: 0.4983 - val_accuracy: 0.8037 - val_loss: 0.4643
Epoch 3/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.7996 - loss: 0.4885 - val_accuracy: 0.7784 - val_loss: 0.5040
Epoch 4/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.7982 - loss: 0.4890 - val_accuracy: 0.8037 - val_loss: 0.4629
Epoch 5/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 13ms/step - accuracy: 0.7998 - loss: 0.4870 - val_accuracy: 0.8198 - val_loss: 0.4327
Epoch 6/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.7990 - loss: 0.4896 - val_accuracy: 0.7870 - val_loss: 0.4889
Epoc

In [115]:
# Evaluate on Train Set
y_train_proba = reg_model_orig.predict(X_train_orig).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred))

[1m24844/24844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 1ms/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9973    0.8055    0.8912    786838
           1     0.0402    0.7857    0.0764      8151

    accuracy                         0.8053    794989
   macro avg     0.5187    0.7956    0.4838    794989
weighted avg     0.9874    0.8053    0.8828    794989

Final Model Training ROC-AUC Score: 0.8733
Final Model Training PR-AUC Score: 0.1247
Final Model Training Confusion Matrix:
 [[633771 153067]
 [  1747   6404]]


In [116]:
# Evaluate on Validation Set
y_val_proba = reg_model_orig.predict(X_val_orig).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9963    0.8204    0.8998    106718
           1     0.0553    0.7738    0.1032      1450

    accuracy                         0.8198    108168
   macro avg     0.5258    0.7971    0.5015    108168
weighted avg     0.9837    0.8198    0.8892    108168

Final Model Validation ROC-AUC Score: 0.8763
Final Model Validation PR-AUC Score: 0.1504
Final Model Validation Confusion Matrix:
 [[87553 19165]
 [  328  1122]]


In [117]:
# Evaluate on Testing Set
y_test_proba = reg_model_orig.predict(X_test_orig).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9953    0.8644    0.9252     95415
           1     0.0743    0.7276    0.1348      1428

    accuracy                         0.8623     96843
   macro avg     0.5348    0.7960    0.5300     96843
weighted avg     0.9817    0.8623    0.9136     96843

Final Model Testing ROC-AUC Score: 0.8799
Final Model Testing PR-AUC Score: 0.1811
Final Model Testing Confusion Matrix:
 [[82472 12943]
 [  389  1039]]
