In [11]:
!pip install kagglehub --upgrade --quiet
import kagglehub
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import RobustScaler
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import pandas as pd
path = kagglehub.dataset_download("mlg-ulb/creditcardfraud")

Using Colab cache for faster access to the 'creditcardfraud' dataset.


In [14]:
path = f'{path}/creditcard.csv'
df = pd.read_csv(path)

In [47]:

X = df.drop('Class', axis=1)
y = df['Class']
train_x, temp_x, train_y, temp_y = train_test_split(
    X,y,
    test_size = 0.3,
    random_state = 42,
    stratify = y

)

val_x, test_x, val_y, test_y = train_test_split(
    temp_x, temp_y,
    test_size=0.5,
    random_state=42,
    stratify = temp_y
)

scaler = RobustScaler()
train_x[['Amount', 'Time']] = scaler.fit_transform(train_x[['Amount', 'Time']])
val_x[['Amount', 'Time']]   = scaler.transform(val_x[['Amount', 'Time']])
test_x[['Amount', 'Time']]  = scaler.transform(test_x[['Amount', 'Time']])

train_x_np = train_x.to_numpy().astype(np.float32)
train_y_np = train_y.to_numpy().astype(np.float32)
val_x_np = val_x.to_numpy().astype(np.float32)
val_y_np = val_y.to_numpy().astype(np.float32)
test_x_np = test_x.to_numpy().astype(np.float32)
test_y_np = test_y.to_numpy().astype(np.float32)
class_weight = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_y),
    y=train_y
)
class_weight_dict = {0:class_weight[0],1:class_weight[1]}



In [48]:
model = keras.Sequential([
    layers.Input(shape=(train_x_np.shape[1],)),
    layers.Dense(256,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    layers.Dense(128,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    layers.Dense(64,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    layers.Dense(1,activation='sigmoid')
])

model.compile(
    optimizer = keras.optimizers.Adam(learning_rate = 0.001),
    loss = keras.losses.BinaryCrossentropy,
    metrics = [
        keras.metrics.AUC(name='pr_auc', curve='PR'),
        keras.metrics.AUC(name='roc_auc', curve='ROC'),
        keras.metrics.BinaryAccuracy(),
        keras.metrics.Precision(),
        keras.metrics.Recall()
    ]
)

early_stop = keras.callbacks.EarlyStopping(monitor='val_pr_auc',mode='max',patience=18, restore_best_weights=True, verbose=1)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_pr_auc',mode='max',patience=12)


In [49]:
history = model.fit(
    train_x_np, train_y_np,
    validation_data=(val_x_np, val_y_np),
    epochs=60,
    batch_size=2048,
    class_weight=class_weight_dict,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

Epoch 1/60
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 48ms/step - binary_accuracy: 0.5888 - loss: 0.7057 - pr_auc: 0.1124 - precision_5: 0.0039 - recall_5: 0.8626 - roc_auc: 0.8642 - val_binary_accuracy: 0.9772 - val_loss: 0.3482 - val_pr_auc: 0.5951 - val_precision_5: 0.0615 - val_recall_5: 0.8514 - val_roc_auc: 0.9500 - learning_rate: 0.0010
Epoch 2/60
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - binary_accuracy: 0.8634 - loss: 0.2660 - pr_auc: 0.2530 - precision_5: 0.0118 - recall_5: 0.9449 - roc_auc: 0.9764 - val_binary_accuracy: 0.9863 - val_loss: 0.1901 - val_pr_auc: 0.5574 - val_precision_5: 0.0976 - val_recall_5: 0.8378 - val_roc_auc: 0.9604 - learning_rate: 0.0010
Epoch 3/60
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 52ms/step - binary_accuracy: 0.9417 - loss: 0.1869 - pr_auc: 0.3335 - precision_5: 0.0291 - recall_5: 0.9454 - roc_auc: 0.9845 - val_binary_accuracy: 0.9911 - val_loss: 0.1255 - val_pr_auc: 0

In [50]:
loss, pr_auc, roc_auc, accuracy, precision, recall = model.evaluate(test_x_np, test_y_np)

[1m1336/1336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - binary_accuracy: 0.9936 - loss: 0.0241 - pr_auc: 0.6672 - precision_5: 0.2052 - recall_5: 0.9188 - roc_auc: 0.9674


In [51]:
print(loss, pr_auc, roc_auc, accuracy, precision, recall)

0.022992033511400223 0.6704648733139038 0.9745488166809082 0.9936800599098206 0.20121951401233673 0.8918918967247009
