In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

# a. Load and preprocess dataset
data = pd.read_csv('/Users/akshay/Downloads/demo_dl/datasets1/creditcardfraud-csv/creditcardfraud-csv/creditcard.csv')
data['Amount'] = StandardScaler().fit_transform(data[['Amount']])
data = data.drop(['Time'], axis=1)

# Separate normal transactions (Class = 0) for training
x_train = data[data['Class'] == 0].drop(['Class'], axis=1).values

# Test data (normal + fraud)
x_test = data.drop(['Class'], axis=1).values
y_test = data['Class'].values

# b. Build Autoencoder model
inp = Input((29,))
enc = Dense(16, activation='relu')(inp)
lat = Dense(8, activation='relu')(enc)
dec = Dense(16, activation='relu')(lat)
out = Dense(29, activation='linear')(dec)
model = Model(inp, out)

# c. Compile & Train
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.fit(x_train, x_train, epochs=5, batch_size=32, validation_split=0.2, verbose=1)

# d. Reconstruction on test data
reconstructions = model.predict(x_test)

# e. Calculate reconstruction error
mse = np.mean(np.power(x_test - reconstructions, 2), axis=1)

# f. Set threshold for anomaly detection
threshold = np.percentile(mse, 95)  # top 5% errors = fraud
print("Threshold: ", threshold)

# g. Predict anomalies
y_pred = (mse > threshold).astype(int)

# h. Evaluation
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Fraud']))

print("Accuracy: ", accuracy_score(y_test, y_pred))

Epoch 1/5
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2ms/step - loss: 0.4419 - mae: 0.4253 - val_loss: 0.3454 - val_mae: 0.3819
Epoch 2/5
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step - loss: 0.3216 - mae: 0.3647 - val_loss: 0.3065 - val_mae: 0.3659
Epoch 3/5
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step - loss: 0.2911 - mae: 0.3472 - val_loss: 0.2828 - val_mae: 0.3499
Epoch 4/5
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step - loss: 0.2758 - mae: 0.3395 - val_loss: 0.2747 - val_mae: 0.3453
Epoch 5/5
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - loss: 0.2690 - mae: 0.3337 - val_loss: 0.2707 - val_mae: 0.3436
[1m8901/8901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 882us/step
Threshold:  0.647574812027537

Confusion Matrix:
[[270506  13809]
 [    60    432]]

Classification Report:
              precision    recall  f1-score