In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt

In [6]:
data = pd.read_csv('/content/creditcard.csv')

In [7]:
X = data.drop(columns=['Class'])
y = data['Class']

In [8]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [9]:
normal_data = X_scaled[y == 0]
anomalous_data = X_scaled[y == 1]


X_train, X_val = train_test_split(normal_data, test_size=0.2, random_state=42)


input_dim = X_train.shape[1]

In [10]:
autoencoder = models.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(input_dim, activation='linear')
])

autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.summary()

In [11]:
history = autoencoder.fit(
    X_train, X_train,
    epochs=10,
    batch_size=32,
    validation_data=(X_val, X_val),
    verbose=1
)

Epoch 1/10
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - loss: 0.6976 - val_loss: 0.5406
Epoch 2/10
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.5193 - val_loss: 0.5128
Epoch 3/10
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.4939 - val_loss: 0.4933
Epoch 4/10
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.4774 - val_loss: 0.4857
Epoch 5/10
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.4748 - val_loss: 0.4874
Epoch 6/10
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.4723 - val_loss: 0.4820
Epoch 7/10
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.4709 - val_loss: 0.4768
Epoch 8/10
[1m7108/7108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 0.4706 - val_loss: 0.4783
Epoch 9/10
[1m7

In [12]:
X_all = np.concatenate([normal_data, anomalous_data])
y_all = np.concatenate([np.zeros(len(normal_data)), np.ones(len(anomalous_data))])
reconstructions = autoencoder.predict(X_all)

[1m8901/8901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1ms/step


In [15]:
reconstruction_errors = np.mean(np.square(X_all - reconstructions), axis=1)

normal_errors = reconstruction_errors[y_all == 0]
threshold = np.percentile(normal_errors, 95)
print(f'Reconstruction error threshold: {threshold}')

y_pred = (reconstruction_errors > threshold).astype(int)


print(f"Detected anomalies: {np.sum(y_pred)} out of {len(y_pred)} total samples.")

Reconstruction error threshold: 1.0596076407052495
Detected anomalies: 14654 out of 284807 total samples.
