# Load Dataset

In [60]:
# Import Statements
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_auc_score, average_precision_score, precision_recall_curve, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.regularizers import l2
from sklearn.utils import class_weight
import tensorflow as tf

In [61]:
# Build dataset directory path
base_dir = os.path.dirname(os.getcwd())  
data_dir = os.path.join(base_dir, "datasets")

# Load CSVs
X_train_resampled = pd.read_csv(os.path.join(data_dir, "X_train_resampled.csv"))
y_train_resampled = pd.read_csv(os.path.join(data_dir, "y_train_resampled.csv")).values.ravel()

X_train = pd.read_csv(os.path.join(data_dir, "X_train.csv"))
y_train = pd.read_csv(os.path.join(data_dir, "y_train.csv")).values.ravel()

X_val = pd.read_csv(os.path.join(data_dir, "X_val.csv"))
y_val = pd.read_csv(os.path.join(data_dir, "y_val.csv")).values.ravel()

X_test = pd.read_csv(os.path.join(data_dir, "X_test.csv"))
y_test = pd.read_csv(os.path.join(data_dir, "y_test.csv")).values.ravel()

In [62]:
print("Shapes BEFORE dropping columns:")
print(X_train_resampled.shape, y_train_resampled.shape)
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

Shapes BEFORE dropping columns:
(1573676, 68) (1573676,)
(794989, 68) (794989,)
(108168, 68) (108168,)
(96843, 68) (96843,)


# Use Log-Transformed Features

In [63]:
original_columns = ['days_since_request', 'intended_balcon_amount_clean', 'zip_count_4w', 'velocity_24h', 'velocity_4w', 'date_of_birth_distinct_emails_4w', 'session_length_in_minutes_cleaned']

## Use SMOTE Resampled Training Data

In [64]:
X_train_resampled_log = X_train_resampled.drop(columns=original_columns, errors='ignore')
X_val_log = X_val.drop(columns=original_columns, errors='ignore')
X_test_log = X_test.drop(columns=original_columns, errors='ignore')

In [65]:
print("\nShapes AFTER dropping columns:")
print(X_train_resampled_log.shape, y_train_resampled.shape)
print(X_val_log.shape, y_val.shape)
print(X_test_log.shape, y_test.shape)


Shapes AFTER dropping columns:
(1573676, 61) (1573676,)
(108168, 61) (108168,)
(96843, 61) (96843,)


### Baseline

In [66]:
scaler = StandardScaler()
X_train_resampled_log = scaler.fit_transform(X_train_resampled_log)
X_val_log = scaler.transform(X_val_log)
X_test_log = scaler.transform(X_test_log)

In [67]:
# Build baseline model
input_dim = X_train_resampled_log.shape[1]

baseline_model_log = Sequential([
    Dense(32, activation='relu', input_dim=input_dim),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

baseline_model_log.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_baseline_log = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_baseline_log = baseline_model_log.fit(
    X_train_resampled_log, y_train_resampled,
    validation_data=(X_val_log, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_baseline_log],
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 8ms/step - accuracy: 0.9857 - loss: 0.0462 - val_accuracy: 0.9825 - val_loss: 0.0647
Epoch 2/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 8ms/step - accuracy: 0.9893 - loss: 0.0363 - val_accuracy: 0.9866 - val_loss: 0.0630
Epoch 3/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 8ms/step - accuracy: 0.9888 - loss: 0.0390 - val_accuracy: 0.9866 - val_loss: 0.0616
Epoch 4/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 8ms/step - accuracy: 0.9886 - loss: 0.0401 - val_accuracy: 0.9866 - val_loss: 0.0685
Epoch 5/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 8ms/step - accuracy: 0.9884 - loss: 0.0425 - val_accuracy: 0.9850 - val_loss: 0.0682
Epoch 6/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 8ms/step - accuracy: 0.9880 - loss: 0.0464 - val_accuracy: 0.9825 - val_loss: 0.0764
Epoch 7/50

In [68]:
# Evaluate on Train Set
y_train_proba = baseline_model_log.predict(X_train_resampled_log).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train_resampled, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train_resampled, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_pred))

[1m49178/49178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 1ms/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9792    0.9993    0.9891    786838
           1     0.9992    0.9788    0.9889    786838

    accuracy                         0.9890   1573676
   macro avg     0.9892    0.9890    0.9890   1573676
weighted avg     0.9892    0.9890    0.9890   1573676

Final Model Training ROC-AUC Score: 0.9981
Final Model Training PR-AUC Score: 0.9986
Final Model Training Confusion Matrix:
 [[786249    589]
 [ 16689 770149]]


In [69]:
# Evaluate on Validation Set
y_val_proba = baseline_model_log.predict(X_val_log).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 960us/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9868    0.9998    0.9933    106718
           1     0.5000    0.0145    0.0282      1450

    accuracy                         0.9866    108168
   macro avg     0.7434    0.5071    0.5107    108168
weighted avg     0.9803    0.9866    0.9803    108168

Final Model Validation ROC-AUC Score: 0.8532
Final Model Validation PR-AUC Score: 0.1315
Final Model Validation Confusion Matrix:
 [[106697     21]
 [  1429     21]]


In [70]:
# Evaluate on Testing Set
y_test_proba = baseline_model_log.predict(X_test_log).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 930us/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9854    0.9999    0.9926     95415
           1     0.7143    0.0105    0.0207      1428

    accuracy                         0.9853     96843
   macro avg     0.8498    0.5052    0.5067     96843
weighted avg     0.9814    0.9853    0.9783     96843

Final Model Testing ROC-AUC Score: 0.8563
Final Model Testing PR-AUC Score: 0.1557
Final Model Testing Confusion Matrix:
 [[95409     6]
 [ 1413    15]]


### Regularization Tuning

In [71]:
# Build Regularized Model
reg_model_log = Sequential([
    Dense(32, activation='relu', input_dim=input_dim, kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(16, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(1, activation='sigmoid')
])

reg_model_log.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_reg_log = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_reg_log = reg_model_log.fit(
    X_train_resampled_log, y_train_resampled,
    validation_data=(X_val_log, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_reg_log],
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 13ms/step - accuracy: 0.9797 - loss: 0.0795 - val_accuracy: 0.9864 - val_loss: 0.0693
Epoch 2/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 13ms/step - accuracy: 0.9889 - loss: 0.0455 - val_accuracy: 0.9846 - val_loss: 0.0677
Epoch 3/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 13ms/step - accuracy: 0.9890 - loss: 0.0442 - val_accuracy: 0.9855 - val_loss: 0.0635
Epoch 4/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 14ms/step - accuracy: 0.9889 - loss: 0.0439 - val_accuracy: 0.9859 - val_loss: 0.0648
Epoch 5/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 13ms/step - accuracy: 0.9889 - loss: 0.0436 - val_accuracy: 0.9859 - val_loss: 0.0642
Epoch 6/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 13ms/step - accuracy: 0.9890 - loss: 0.0432 - val_accuracy: 0.9847 - val_loss: 0.0656
Epoc

In [72]:
# Evaluate on Validation Set
y_val_proba = baseline_model_log.predict(X_val_log).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 959us/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9868    0.9998    0.9933    106718
           1     0.5000    0.0145    0.0282      1450

    accuracy                         0.9866    108168
   macro avg     0.7434    0.5071    0.5107    108168
weighted avg     0.9803    0.9866    0.9803    108168

Final Model Validation ROC-AUC Score: 0.8532
Final Model Validation PR-AUC Score: 0.1315
Final Model Validation Confusion Matrix:
 [[106697     21]
 [  1429     21]]


In [73]:
# Evaluate on Train Set
y_train_proba = reg_model_log.predict(X_train_resampled_log).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train_resampled, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train_resampled, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_pred))

[1m49178/49178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 1ms/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9821    0.9974    0.9897    786838
           1     0.9974    0.9818    0.9895    786838

    accuracy                         0.9896   1573676
   macro avg     0.9897    0.9896    0.9896   1573676
weighted avg     0.9897    0.9896    0.9896   1573676

Final Model Training ROC-AUC Score: 0.9982
Final Model Training PR-AUC Score: 0.9986
Final Model Training Confusion Matrix:
 [[784800   2038]
 [ 14290 772548]]


In [74]:
# Evaluate on Validation Set
y_val_proba = reg_model_log.predict(X_val_log).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9876    0.9979    0.9927    106718
           1     0.3253    0.0745    0.1212      1450

    accuracy                         0.9855    108168
   macro avg     0.6564    0.5362    0.5570    108168
weighted avg     0.9787    0.9855    0.9810    108168

Final Model Validation ROC-AUC Score: 0.8611
Final Model Validation PR-AUC Score: 0.1297
Final Model Validation Confusion Matrix:
 [[106494    224]
 [  1342    108]]


In [75]:
# Evaluate on Testing Set
y_test_proba = reg_model_log.predict(X_test_log).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9862    0.9982    0.9922     95415
           1     0.3523    0.0651    0.1099      1428

    accuracy                         0.9844     96843
   macro avg     0.6692    0.5317    0.5510     96843
weighted avg     0.9768    0.9844    0.9791     96843

Final Model Testing ROC-AUC Score: 0.8634
Final Model Testing PR-AUC Score: 0.1460
Final Model Testing Confusion Matrix:
 [[95244   171]
 [ 1335    93]]


## Use original train with class weighting

In [76]:
X_train_log = X_train.drop(columns=original_columns, errors='ignore')
X_val_log = X_val.drop(columns=original_columns, errors='ignore')
X_test_log = X_test.drop(columns=original_columns, errors='ignore')

In [77]:
print("\nShapes AFTER dropping columns:")
print(X_train_log.shape, y_train.shape)
print(X_val_log.shape, y_val.shape)
print(X_test_log.shape, y_test.shape)


Shapes AFTER dropping columns:
(794989, 61) (794989,)
(108168, 61) (108168,)
(96843, 61) (96843,)


### Baseline

In [78]:
scaler = StandardScaler()
X_train_log = scaler.fit_transform(X_train_log)
X_val_log = scaler.transform(X_val_log)
X_test_log = scaler.transform(X_test_log)

In [79]:
# Compute class weights automatically
classes = np.unique(y_train)
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)
class_weights = dict(zip(classes, class_weights))

print("Class weights:", class_weights)

# Build baseline model
input_dim = X_train_log.shape[1]

baseline_model_log = Sequential([
    Dense(32, activation='relu', input_dim=input_dim),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

baseline_model_log.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_baseline_log = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_baseline_log = baseline_model_log.fit(
    X_train_log, y_train,
    validation_data=(X_val_log, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_baseline_log],
    class_weight=class_weights, 
    verbose=1
)


Class weights: {0: 0.5051795922413508, 1: 48.76634768740032}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - accuracy: 0.7748 - loss: 0.4696 - val_accuracy: 0.7791 - val_loss: 0.4877
Epoch 2/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7955 - loss: 0.4528 - val_accuracy: 0.7830 - val_loss: 0.4729
Epoch 3/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7918 - loss: 0.4585 - val_accuracy: 0.7869 - val_loss: 0.4719
Epoch 4/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7852 - loss: 0.4672 - val_accuracy: 0.8315 - val_loss: 0.3962
Epoch 5/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.7828 - loss: 0.4751 - val_accuracy: 0.7046 - val_loss: 0.6114
Epoch 6/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.7799 - loss: 0.4915 - val_accuracy: 0.7340 - val_loss: 0.6255
Epoch 7/50

In [80]:
# Evaluate on Train Set
y_train_proba = baseline_model_log.predict(X_train_log).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred))

[1m24844/24844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 963us/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9970    0.8053    0.8910    786838
           1     0.0392    0.7676    0.0747      8151

    accuracy                         0.8049    794989
   macro avg     0.5181    0.7865    0.4828    794989
weighted avg     0.9872    0.8049    0.8826    794989

Final Model Training ROC-AUC Score: 0.8652
Final Model Training PR-AUC Score: 0.0852
Final Model Training Confusion Matrix:
 [[633665 153173]
 [  1894   6257]]


In [81]:
# Evaluate on Validation Set
y_val_proba = baseline_model_log.predict(X_val_log).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 947us/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9957    0.8328    0.9070    106718
           1     0.0563    0.7338    0.1045      1450

    accuracy                         0.8315    108168
   macro avg     0.5260    0.7833    0.5058    108168
weighted avg     0.9831    0.8315    0.8962    108168

Final Model Validation ROC-AUC Score: 0.8656
Final Model Validation PR-AUC Score: 0.1066
Final Model Validation Confusion Matrix:
 [[88876 17842]
 [  386  1064]]


In [82]:
# Evaluate on Testing Set
y_test_proba = baseline_model_log.predict(X_test_log).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 971us/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9945    0.8861    0.9372     95415
           1     0.0812    0.6730    0.1450      1428

    accuracy                         0.8830     96843
   macro avg     0.5379    0.7795    0.5411     96843
weighted avg     0.9810    0.8830    0.9255     96843

Final Model Testing ROC-AUC Score: 0.8773
Final Model Testing PR-AUC Score: 0.1378
Final Model Testing Confusion Matrix:
 [[84547 10868]
 [  467   961]]


### Regularization Tuning

In [83]:
# Compute class weights automatically
classes = np.unique(y_train)
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)
class_weights = dict(zip(classes, class_weights))

print("Class weights:", class_weights)

# Build Regularized Model
reg_model_log = Sequential([
    Dense(32, activation='relu', input_dim=input_dim, kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(16, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(1, activation='sigmoid')
])

reg_model_log.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_reg_log = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_reg_log = reg_model_log.fit(
    X_train_log, y_train,
    validation_data=(X_val_log, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_reg_log],
    class_weight=class_weights,
    verbose=1
)


Class weights: {0: 0.5051795922413508, 1: 48.76634768740032}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 14ms/step - accuracy: 0.7409 - loss: 0.5663 - val_accuracy: 0.8220 - val_loss: 0.4477
Epoch 2/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.7949 - loss: 0.4980 - val_accuracy: 0.8267 - val_loss: 0.4261
Epoch 3/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 14ms/step - accuracy: 0.7985 - loss: 0.4854 - val_accuracy: 0.8030 - val_loss: 0.4619
Epoch 4/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 13ms/step - accuracy: 0.7998 - loss: 0.4847 - val_accuracy: 0.7745 - val_loss: 0.5108
Epoch 5/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.8001 - loss: 0.4865 - val_accuracy: 0.8049 - val_loss: 0.4595
Epoch 6/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.8014 - loss: 0.4873 - val_accuracy: 0.8163 - val_loss: 0.4409
Epoc

In [84]:
# Evaluate on Train Set
y_train_proba = reg_model_log.predict(X_train_log).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred))

[1m24844/24844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 1ms/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9972    0.8068    0.8920    786838
           1     0.0404    0.7843    0.0768      8151

    accuracy                         0.8066    794989
   macro avg     0.5188    0.7956    0.4844    794989
weighted avg     0.9874    0.8066    0.8836    794989

Final Model Training ROC-AUC Score: 0.8750
Final Model Training PR-AUC Score: 0.1252
Final Model Training Confusion Matrix:
 [[634824 152014]
 [  1758   6393]]


In [85]:
# Evaluate on Validation Set
y_val_proba = reg_model_log.predict(X_val_log).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9961    0.8276    0.9041    106718
           1     0.0567    0.7621    0.1055      1450

    accuracy                         0.8267    108168
   macro avg     0.5264    0.7948    0.5048    108168
weighted avg     0.9835    0.8267    0.8934    108168

Final Model Validation ROC-AUC Score: 0.8768
Final Model Validation PR-AUC Score: 0.1520
Final Model Validation Confusion Matrix:
 [[88321 18397]
 [  345  1105]]


In [86]:
# Evaluate on Testing Set
y_test_proba = reg_model_log.predict(X_test_log).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9953    0.8710    0.9290     95415
           1     0.0774    0.7234    0.1399      1428

    accuracy                         0.8688     96843
   macro avg     0.5364    0.7972    0.5345     96843
weighted avg     0.9817    0.8688    0.9174     96843

Final Model Testing ROC-AUC Score: 0.8822
Final Model Testing PR-AUC Score: 0.1815
Final Model Testing Confusion Matrix:
 [[83109 12306]
 [  395  1033]]


# Use Non Log-Transformed Features

In [87]:
log_columns = ['days_since_request_log', 'intended_balcon_amount_log', 'zip_count_4w_log', 'velocity_24h_log', 'velocity_4w_log', 'date_of_birth_distinct_emails_4w_log', 'session_length_in_minutes_log']

## Use SMOTE Resampled Training Data

In [88]:
X_train_resampled_orig = X_train_resampled.drop(columns=log_columns, errors='ignore')
X_val_orig = X_val.drop(columns=log_columns, errors='ignore')
X_test_orig = X_test.drop(columns=log_columns, errors='ignore')

In [89]:
print("\nShapes AFTER dropping columns:")
print(X_train_resampled_orig.shape, y_train_resampled.shape)
print(X_val_orig.shape, y_val.shape)
print(X_test_orig.shape, y_test.shape)


Shapes AFTER dropping columns:
(1573676, 61) (1573676,)
(108168, 61) (108168,)
(96843, 61) (96843,)


### Baseline

In [90]:
scaler = StandardScaler()
X_train_resampled_orig = scaler.fit_transform(X_train_resampled_orig)
X_val_orig = scaler.transform(X_val_orig)
X_test_orig = scaler.transform(X_test_orig)

In [91]:
# Build baseline model
input_dim = X_train_resampled_orig.shape[1]

baseline_model_orig = Sequential([
    Dense(32, activation='relu', input_dim=input_dim),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

baseline_model_orig.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_baseline_orig = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_baseline_orig = baseline_model_orig.fit(
    X_train_resampled_orig, y_train_resampled,
    validation_data=(X_val_orig, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_baseline_orig],
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 7ms/step - accuracy: 0.9820 - loss: 0.0537 - val_accuracy: 0.9840 - val_loss: 0.0617
Epoch 2/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - accuracy: 0.9895 - loss: 0.0356 - val_accuracy: 0.9861 - val_loss: 0.0646
Epoch 3/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 7ms/step - accuracy: 0.9891 - loss: 0.0371 - val_accuracy: 0.9866 - val_loss: 0.0591
Epoch 4/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 7ms/step - accuracy: 0.9885 - loss: 0.0392 - val_accuracy: 0.9827 - val_loss: 0.0659
Epoch 5/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9883 - loss: 0.0409 - val_accuracy: 0.9863 - val_loss: 0.0606
Epoch 6/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.9876 - loss: 0.0447 - val_accuracy: 0.9866 - val_loss: 0.0617
Epoch 7/50

In [92]:
# Evaluate on Train Set
y_train_proba = baseline_model_orig.predict(X_train_resampled_orig).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train_resampled, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train_resampled, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_pred))

[1m49178/49178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 973us/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9794    0.9992    0.9892    786838
           1     0.9992    0.9789    0.9890    786838

    accuracy                         0.9891   1573676
   macro avg     0.9893    0.9891    0.9891   1573676
weighted avg     0.9893    0.9891    0.9891   1573676

Final Model Training ROC-AUC Score: 0.9981
Final Model Training PR-AUC Score: 0.9986
Final Model Training Confusion Matrix:
 [[786194    644]
 [ 16569 770269]]


In [93]:
# Evaluate on Validation Set
y_val_proba = baseline_model_orig.predict(X_val_orig).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9869    0.9997    0.9932    106718
           1     0.4688    0.0207    0.0396      1450

    accuracy                         0.9866    108168
   macro avg     0.7278    0.5102    0.5164    108168
weighted avg     0.9799    0.9866    0.9804    108168

Final Model Validation ROC-AUC Score: 0.8555
Final Model Validation PR-AUC Score: 0.1267
Final Model Validation Confusion Matrix:
 [[106684     34]
 [  1420     30]]


In [94]:
# Evaluate on Testing Set
y_test_proba = baseline_model_orig.predict(X_test_orig).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step  
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9854    0.9999    0.9926     95415
           1     0.5938    0.0133    0.0260      1428

    accuracy                         0.9853     96843
   macro avg     0.7896    0.5066    0.5093     96843
weighted avg     0.9797    0.9853    0.9783     96843

Final Model Testing ROC-AUC Score: 0.8570
Final Model Testing PR-AUC Score: 0.1529
Final Model Testing Confusion Matrix:
 [[95402    13]
 [ 1409    19]]


### Regularization Tuning

In [95]:
# Build Regularized Model
reg_model_orig = Sequential([
    Dense(32, activation='relu', input_dim=input_dim, kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(16, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(1, activation='sigmoid')
])

reg_model_orig.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_reg_orig = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_reg_orig = reg_model_orig.fit(
    X_train_resampled_orig, y_train_resampled,
    validation_data=(X_val_orig, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_reg_orig],
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 13ms/step - accuracy: 0.9774 - loss: 0.0851 - val_accuracy: 0.9854 - val_loss: 0.0668
Epoch 2/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 13ms/step - accuracy: 0.9887 - loss: 0.0462 - val_accuracy: 0.9865 - val_loss: 0.0671
Epoch 3/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 12ms/step - accuracy: 0.9889 - loss: 0.0447 - val_accuracy: 0.9862 - val_loss: 0.0644
Epoch 4/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 12ms/step - accuracy: 0.9888 - loss: 0.0446 - val_accuracy: 0.9852 - val_loss: 0.0679
Epoch 5/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 13ms/step - accuracy: 0.9888 - loss: 0.0454 - val_accuracy: 0.9854 - val_loss: 0.0644
Epoch 6/50
[1m3074/3074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 15ms/step - accuracy: 0.9889 - loss: 0.0444 - val_accuracy: 0.9856 - val_loss: 0.0637
Epoc

In [96]:
# Evaluate on Train Set
y_train_proba = reg_model_orig.predict(X_train_resampled_orig).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train_resampled, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train_resampled, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train_resampled, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train_resampled, y_train_pred))

[1m49178/49178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 1ms/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9810    0.9983    0.9896    786838
           1     0.9983    0.9806    0.9894    786838

    accuracy                         0.9895   1573676
   macro avg     0.9896    0.9895    0.9895   1573676
weighted avg     0.9896    0.9895    0.9895   1573676

Final Model Training ROC-AUC Score: 0.9982
Final Model Training PR-AUC Score: 0.9986
Final Model Training Confusion Matrix:
 [[785499   1339]
 [ 15247 771591]]


In [97]:
# Evaluate on Validation Set
y_val_proba = reg_model_orig.predict(X_val_orig).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9871    0.9992    0.9931    106718
           1     0.4069    0.0407    0.0740      1450

    accuracy                         0.9863    108168
   macro avg     0.6970    0.5199    0.5336    108168
weighted avg     0.9793    0.9863    0.9808    108168

Final Model Validation ROC-AUC Score: 0.8561
Final Model Validation PR-AUC Score: 0.1307
Final Model Validation Confusion Matrix:
 [[106632     86]
 [  1391     59]]


In [98]:
# Evaluate on Testing Set
y_test_proba = reg_model_orig.predict(X_test_orig).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9856    0.9996    0.9925     95415
           1     0.4688    0.0210    0.0402      1428

    accuracy                         0.9852     96843
   macro avg     0.7272    0.5103    0.5164     96843
weighted avg     0.9779    0.9852    0.9785     96843

Final Model Testing ROC-AUC Score: 0.8577
Final Model Testing PR-AUC Score: 0.1481
Final Model Testing Confusion Matrix:
 [[95381    34]
 [ 1398    30]]


## Use original train with class weighting

In [99]:
X_train_orig = X_train.drop(columns=log_columns, errors='ignore')
X_val_orig = X_val.drop(columns=log_columns, errors='ignore')
X_test_orig = X_test.drop(columns=log_columns, errors='ignore')

In [100]:
print("\nShapes AFTER dropping columns:")
print(X_train_orig.shape, y_train.shape)
print(X_val_orig.shape, y_val.shape)
print(X_test_orig.shape, y_test.shape)


Shapes AFTER dropping columns:
(794989, 61) (794989,)
(108168, 61) (108168,)
(96843, 61) (96843,)


### Baseline

In [101]:
scaler = StandardScaler()
X_train_orig = scaler.fit_transform(X_train_orig)
X_val_orig = scaler.transform(X_val_orig)
X_test_orig = scaler.transform(X_test_orig)

In [102]:
# Compute class weights automatically
classes = np.unique(y_train)
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)
class_weights = dict(zip(classes, class_weights))

print("Class weights:", class_weights)

# Build baseline model
input_dim = X_train_orig.shape[1]

baseline_model_orig = Sequential([
    Dense(32, activation='relu', input_dim=input_dim),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

baseline_model_orig.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_baseline_orig = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_baseline_orig = baseline_model_orig.fit(
    X_train_orig, y_train,
    validation_data=(X_val_orig, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_baseline_orig],
    class_weight=class_weights, 
    verbose=1
)


Class weights: {0: 0.5051795922413508, 1: 48.76634768740032}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.7790 - loss: 0.4690 - val_accuracy: 0.8193 - val_loss: 0.4167
Epoch 2/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7986 - loss: 0.4553 - val_accuracy: 0.8022 - val_loss: 0.4359
Epoch 3/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7925 - loss: 0.4612 - val_accuracy: 0.8194 - val_loss: 0.4118
Epoch 4/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7898 - loss: 0.4685 - val_accuracy: 0.7658 - val_loss: 0.5103
Epoch 5/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7842 - loss: 0.4787 - val_accuracy: 0.8537 - val_loss: 0.3520
Epoch 6/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7788 - loss: 0.4929 - val_accuracy: 0.7440 - val_loss: 0.5517
Epoch 7/50

In [103]:
# Evaluate on Train Set
y_train_proba = baseline_model_orig.predict(X_train_orig).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred))

[1m24844/24844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 992us/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9970    0.8073    0.8921    786838
           1     0.0393    0.7620    0.0748      8151

    accuracy                         0.8068    794989
   macro avg     0.5181    0.7846    0.4835    794989
weighted avg     0.9871    0.8068    0.8838    794989

Final Model Training ROC-AUC Score: 0.8653
Final Model Training PR-AUC Score: 0.0941
Final Model Training Confusion Matrix:
 [[635182 151656]
 [  1940   6211]]


In [104]:
# Evaluate on Validation Set
y_val_proba = baseline_model_orig.predict(X_val_orig).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9953    0.8558    0.9203    106718
           1     0.0620    0.7021    0.1140      1450

    accuracy                         0.8537    108168
   macro avg     0.5287    0.7789    0.5171    108168
weighted avg     0.9828    0.8537    0.9095    108168

Final Model Validation ROC-AUC Score: 0.8667
Final Model Validation PR-AUC Score: 0.1145
Final Model Validation Confusion Matrix:
 [[91324 15394]
 [  432  1018]]


In [105]:
# Evaluate on Testing Set
y_test_proba = baseline_model_orig.predict(X_test_orig).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 962us/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9939    0.9053    0.9475     95415
           1     0.0907    0.6317    0.1587      1428

    accuracy                         0.9012     96843
   macro avg     0.5423    0.7685    0.5531     96843
weighted avg     0.9806    0.9012    0.9359     96843

Final Model Testing ROC-AUC Score: 0.8774
Final Model Testing PR-AUC Score: 0.1428
Final Model Testing Confusion Matrix:
 [[86377  9038]
 [  526   902]]


### Regularization Tuning

In [106]:
# Compute class weights automatically
classes = np.unique(y_train)
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=y_train
)
class_weights = dict(zip(classes, class_weights))

print("Class weights:", class_weights)

# Build Regularized Model
reg_model_orig = Sequential([
    Dense(32, activation='relu', input_dim=input_dim, kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(16, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(1, activation='sigmoid')
])

reg_model_orig.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop_reg_orig = EarlyStopping(
    monitor='val_loss',
    patience=8,
    restore_best_weights=True
)

history_reg_orig = reg_model_orig.fit(
    X_train_orig, y_train,
    validation_data=(X_val_orig, y_val),
    epochs=50,
    batch_size=512,
    callbacks=[early_stop_reg_orig],
    class_weight=class_weights,
    verbose=1
)


Class weights: {0: 0.5051795922413508, 1: 48.76634768740032}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 14ms/step - accuracy: 0.7233 - loss: 0.5912 - val_accuracy: 0.8124 - val_loss: 0.4671
Epoch 2/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 14ms/step - accuracy: 0.7908 - loss: 0.5037 - val_accuracy: 0.8128 - val_loss: 0.4504
Epoch 3/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 14ms/step - accuracy: 0.7984 - loss: 0.4910 - val_accuracy: 0.8058 - val_loss: 0.4599
Epoch 4/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.7976 - loss: 0.4920 - val_accuracy: 0.7958 - val_loss: 0.4797
Epoch 5/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.7943 - loss: 0.4940 - val_accuracy: 0.8131 - val_loss: 0.4464
Epoch 6/50
[1m1553/1553[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.7964 - loss: 0.4984 - val_accuracy: 0.7995 - val_loss: 0.4720
Epoc

In [107]:
# Evaluate on Train Set
y_train_proba = reg_model_orig.predict(X_train_orig).ravel()
y_train_pred = (y_train_proba > 0.5).astype(int)

# Classification Report
print("Final Model Training Classification Report:\n")
print(classification_report(y_train, y_train_pred, digits=4))

# ROC-AUC
roc_auc_train = roc_auc_score(y_train, y_train_proba)
print(f"Final Model Training ROC-AUC Score: {roc_auc_train:.4f}")

# PR-AUC
pr_auc_train = average_precision_score(y_train, y_train_proba)
print(f"Final Model Training PR-AUC Score: {pr_auc_train:.4f}")

# Confusion Matrix
print("Final Model Training Confusion Matrix:\n", confusion_matrix(y_train, y_train_pred))

[1m24844/24844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 1ms/step
Final Model Training Classification Report:

              precision    recall  f1-score   support

           0     0.9973    0.7937    0.8839    786838
           1     0.0384    0.7962    0.0733      8151

    accuracy                         0.7937    794989
   macro avg     0.5179    0.7950    0.4786    794989
weighted avg     0.9875    0.7937    0.8756    794989

Final Model Training ROC-AUC Score: 0.8730
Final Model Training PR-AUC Score: 0.1259
Final Model Training Confusion Matrix:
 [[624511 162327]
 [  1661   6490]]


In [108]:
# Evaluate on Validation Set
y_val_proba = reg_model_orig.predict(X_val_orig).ravel()
y_val_pred = (y_val_proba > 0.5).astype(int)

# Classification Report
print("Final Model Validation Classification Report:\n")
print(classification_report(y_val, y_val_pred, digits=4))

# ROC-AUC
roc_auc_val = roc_auc_score(y_val, y_val_proba)
print(f"Final Model Validation ROC-AUC Score: {roc_auc_val:.4f}")

# PR-AUC
pr_auc_val = average_precision_score(y_val, y_val_proba)
print(f"Final Model Validation PR-AUC Score: {pr_auc_val:.4f}")

# Confusion Matrix
print("Final Model Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

[1m3381/3381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Validation Classification Report:

              precision    recall  f1-score   support

           0     0.9964    0.8135    0.8957    106718
           1     0.0539    0.7814    0.1008      1450

    accuracy                         0.8131    108168
   macro avg     0.5251    0.7975    0.4983    108168
weighted avg     0.9837    0.8131    0.8851    108168

Final Model Validation ROC-AUC Score: 0.8771
Final Model Validation PR-AUC Score: 0.1534
Final Model Validation Confusion Matrix:
 [[86819 19899]
 [  317  1133]]


In [109]:
# Evaluate on Testing Set
y_test_proba = reg_model_orig.predict(X_test_orig).ravel()
y_test_pred = (y_test_proba > 0.5).astype(int)

# Classification Report
print("Final Model Testing Classification Report:\n")
print(classification_report(y_test, y_test_pred, digits=4))

# ROC-AUC
roc_auc_test = roc_auc_score(y_test, y_test_proba)
print(f"Final Model Testing ROC-AUC Score: {roc_auc_test:.4f}")

# PR-AUC
pr_auc_test = average_precision_score(y_test, y_test_proba)
print(f"Final Model Testing PR-AUC Score: {pr_auc_test:.4f}")

# Confusion Matrix
print("Final Model Testing Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

[1m3027/3027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
Final Model Testing Classification Report:

              precision    recall  f1-score   support

           0     0.9958    0.8442    0.9138     95415
           1     0.0683    0.7633    0.1254      1428

    accuracy                         0.8430     96843
   macro avg     0.5321    0.8038    0.5196     96843
weighted avg     0.9821    0.8430    0.9021     96843

Final Model Testing ROC-AUC Score: 0.8809
Final Model Testing PR-AUC Score: 0.1769
Final Model Testing Confusion Matrix:
 [[80551 14864]
 [  338  1090]]
