# Implement anomaly detection for given credit card dataset using Autoencoder and  build the model by using the following steps:
* a. Import required libraries
* b. Upload / access the dataset
* c. Encoder converts it into latent representation
* d. Decoder networks convert it back to the original input
* e. Compile the models with Optimizer, Loss, and Evaluation Metrics

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("../datasets/ECGdataset(Ass4)/ecg_autoencoder_dataset.csv")

In [3]:
x = data.values
x

array([[-1.1008778 , -3.9968398 , -4.2858426 , ...,  1.1196209 ,
        -1.4362499 ,  1.        ],
       [-0.56708802, -2.5934502 , -3.8742297 , ...,  0.90422673,
        -0.42179659,  1.        ],
       [ 0.49047253, -1.9144071 , -3.6163638 , ...,  1.403011  ,
        -0.38356426,  1.        ],
       ...,
       [-1.1229693 , -2.2529248 , -2.8676281 , ..., -2.0083694 ,
        -1.8083338 ,  0.        ],
       [-0.54770461, -1.8895451 , -2.8397786 , ...,  1.1504486 ,
         0.80493225,  0.        ],
       [-1.3517791 , -2.2090058 , -2.5202247 , ..., -1.577823  ,
        -0.68453092,  0.        ]])

In [4]:
y = np.zeros(x.shape[0])
y

array([0., 0., 0., ..., 0., 0., 0.])

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=40)

In [7]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from sklearn.preprocessing import StandardScaler

In [8]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [11]:
x_train.shape

(3747, 141)

In [18]:
input_dim = x_train.shape[1]
embedding_dim = 16

input_dim

141

In [19]:
encoder = Sequential([
    InputLayer(input_dim),
    Dense(32, "relu"),
    Dense(embedding_dim, "relu")
])

In [20]:
decoder = Sequential([
    InputLayer(embedding_dim),
    Dense(32, "relu"),
    Dense(input_dim, "sigmoid")
])

In [21]:
autoencoder = Sequential([encoder, decoder])

In [22]:
from tensorflow.keras.optimizers import Adam

In [23]:
autoencoder.compile("adam", "mean_squared_error")

In [26]:
autoencoder.fit(x_train, y_train, 120, 10, validation_data=(x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f22a01dd8d0>

In [15]:
autoencoder.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 16)                5072      
                                                                 
 sequential_1 (Sequential)   (None, 141)               5197      
                                                                 
Total params: 10269 (40.11 KB)
Trainable params: 10269 (40.11 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [28]:
predictions = autoencoder.predict(x_test)
mse = np.mean((x_test - predictions) * 2, 1)
mse



array([ 0.62063868,  0.15085741, -0.15821668, ...,  0.29980554,
        0.14342884, -0.47071824])

In [23]:
threshold = np.percentile(mse, 95)
y_test_pred = np.where(mse > threshold, 1, 0)
y_test_pred

array([1, 0, 0, ..., 0, 0, 0])

In [28]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_test_pred)

0.9496