In [2]:
# ------------------------------------------------------------
# (a) Import required libraries
# ------------------------------------------------------------
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
import numpy as np


# ------------------------------------------------------------
# (b) Upload / access the dataset + preprocessing
# ------------------------------------------------------------
data = pd.read_csv('creditcard.csv')     # Load dataset

# Normalize the "Amount" feature
data['Amount'] = StandardScaler().fit_transform(data[['Amount']])

# Drop the "Time" column (not needed)
data = data.drop(['Time'], axis=1)

# Use only normal transactions (Class = 0) for training
# Autoencoder learns normal pattern, anomalies → high error
x_train = data[data['Class'] == 0].drop(['Class'], axis=1).values


# ------------------------------------------------------------
# (c) Encoder converts it into latent representation
# ------------------------------------------------------------
inp = Input((29,))                      # Input layer with 29 features
enc = Dense(16, activation='relu')(inp)  # Encoder layer
lat = Dense(8, activation='relu')(enc)   # Latent (compressed) representation


# ------------------------------------------------------------
# (d) Decoder networks convert it back to original input
# ------------------------------------------------------------
dec = Dense(16, activation='relu')(lat)   # Decoder layer
out = Dense(29, activation='linear')(dec) # Output layer reconstructing input

# Build the complete Autoencoder model
model = Model(inp, out)


# ------------------------------------------------------------
# (e) Compile the model with Optimizer, Loss, Evaluation Metrics
# ------------------------------------------------------------
model.compile(optimizer='adam', loss='mse', metrics=['mae'])


# Train the autoencoder
# Training input = output, since goal is reconstruction
model.fit(x_train, x_train, epochs=5, batch_size=32)


X = data.drop(["Class"], axis=1).values
y_true = data["Class"].values

recon = model.predict(X)
mse = np.mean(np.square(X - recon), axis=1)
threshold = np.percentile(mse, 95)
y_pred = mse > threshold


from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred))


Epoch 1/5
[1m8885/8885[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1ms/step - loss: 0.4281 - mae: 0.4139
Epoch 2/5
[1m8885/8885[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 998us/step - loss: 0.3050 - mae: 0.3495 
Epoch 3/5
[1m8885/8885[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - loss: 0.2847 - mae: 0.3301   
Epoch 4/5
[1m8885/8885[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1ms/step - loss: 0.2754 - mae: 0.3194
Epoch 5/5
[1m8885/8885[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1ms/step - loss: 0.2703 - mae: 0.3146
[1m8901/8901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 672us/step
[[270494  13821]
 [    72    420]]
              precision    recall  f1-score   support

           0       1.00      0.95      0.97    284315
           1       0.03      0.85      0.06       492

    accuracy                           0.95    284807
   macro avg       0.51      0.90      0.52    284807
weighted avg       1.00  