In [None]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ctgan_t2_balanced.csv')

In [None]:
X = df.drop(columns=['Fraud_Label'])
y = df['Fraud_Label']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, stratify=y, random_state=42
)

print("Training set shape:", X_train.shape)
print("Test set shape:", X_test.shape)


Training set shape: (53600, 19)
Test set shape: (13400, 19)


In [None]:
X_train_nonfraud = X_train[y_train == 0]

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

input_dim = X_train_nonfraud.shape[1]

autoencoder = models.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(input_dim, activation='linear')
])

autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.summary()

In [None]:
history = autoencoder.fit(
    X_train_nonfraud,
    X_train_nonfraud,
    epochs=100,
    batch_size=256,
    validation_split=0.2,
    verbose=1
)


Epoch 1/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 133867792.0000 - val_loss: 489133.0625
Epoch 2/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 198672.6719 - val_loss: 21115.8184
Epoch 3/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 14690.3604 - val_loss: 3920.5222
Epoch 4/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3164.7527 - val_loss: 1870.0887
Epoch 5/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1805.0991 - val_loss: 1600.0939
Epoch 6/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1563.4843 - val_loss: 1378.5371
Epoch 7/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1386.8721 - val_loss: 1285.6663
Epoch 8/100
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1239.5374 - val_loss:

In [None]:
reconstructions = autoencoder.predict(X_test)
import numpy as np

reconstruction_errors = ((X_test - reconstructions) ** 2).mean(axis=1)

[1m419/419[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 735us/step


In [None]:
threshold = np.percentile(reconstruction_errors[y_test == 0], 95)
print("Reconstruction error threshold:", threshold)

Reconstruction error threshold: 42.22437536124513


In [None]:
autoencoder_preds = (reconstruction_errors > threshold).astype(int)

In [None]:
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, roc_auc_score
)


autoencoder_preds = (reconstruction_errors > threshold).astype(int)


accuracy = accuracy_score(y_test, autoencoder_preds)
precision = precision_score(y_test, autoencoder_preds)
recall = recall_score(y_test, autoencoder_preds)
f1 = f1_score(y_test, autoencoder_preds)
roc_auc = roc_auc_score(y_test, reconstruction_errors)

print(f"✅ Autoencoder Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC-AUC Score (on reconstruction errors): {roc_auc:.4f}")
print("\nClassification Report:\n", classification_report(y_test, autoencoder_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, autoencoder_preds))


✅ Autoencoder Performance:
Accuracy: 0.5039
Precision: 0.4729
Recall: 0.0461
F1 Score: 0.0840
ROC-AUC Score (on reconstruction errors): 0.4974

Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.95      0.66      6787
           1       0.47      0.05      0.08      6613

    accuracy                           0.50     13400
   macro avg       0.49      0.50      0.37     13400
weighted avg       0.49      0.50      0.38     13400

Confusion Matrix:
 [[6447  340]
 [6308  305]]
