In [1]:
#imports
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Input,Dense
from tensorflow.keras.models import Model

In [2]:
#create a synethetic data
def generate_data(samples=1000,features=20,anomalies=10):
    normal_size= (samples-anomalies,features)
    anomalies_size=(anomalies,features)
    normal_data=np.random.normal(0,1,size=normal_size)
    anomalies_data=np.random.uniform(-5,5,size=anomalies_size)
    data=np.vstack([normal_data,anomalies_data])
    np.random.shuffle(data)
    return data

data=generate_data()


In [3]:
type(data)

numpy.ndarray

In [4]:
data.shape

(1000, 20)

In [5]:
data[0,:]

array([-0.38914054,  0.30147886, -2.30717223, -0.42715849, -0.52494079,
       -0.45451444, -0.36694853, -1.72845302, -0.3179811 , -0.62416868,
       -1.47684611, -0.58005341,  1.48745178,  0.93788449, -1.44028006,
        1.08261865,  0.05505279,  0.96059046, -1.01417098, -0.38947207])

In [6]:
#splitting the data into train and test sets
x_tr,x_test=train_test_split(data,test_size=0.25,random_state=42)
x_tr.shape

(750, 20)

In [7]:
x_test.shape

(250, 20)

In [8]:
scaler=MinMaxScaler()
x_tr=scaler.fit_transform(x_tr)
x_test=scaler.transform(x_test)

In [9]:
input_dim=x_tr.shape[1]
input_dim

20

In [10]:
input_layer=Input(shape=(input_dim,))
#Encoder
encoded=Dense(128,activation='relu')(input_layer)
encoded=Dense(64,activation='relu')(encoded)
encoded=Dense(32,activation='relu')(encoded)
#Decoder
decoded=Dense(64,activation='relu')(encoded)
decoded=Dense(128,activation='relu')(decoded)
decoded=Dense(input_dim,activation='sigmoid')(decoded)

autoencoder=Model(inputs=input_layer,outputs=decoded)


In [11]:
autoencoder.compile(optimizer='adam',loss='mse')

In [12]:
#training the model
autoencoder.fit(x_tr,x_tr,epochs=100,validation_split=0.2)

Epoch 1/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 99ms/step - loss: 0.0222 - val_loss: 0.0149
Epoch 2/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0165 - val_loss: 0.0147
Epoch 3/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0159 - val_loss: 0.0140
Epoch 4/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0148 - val_loss: 0.0131
Epoch 5/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0139 - val_loss: 0.0123
Epoch 6/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0132 - val_loss: 0.0113
Epoch 7/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0126 - val_loss: 0.0107
Epoch 8/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0115 - val_loss: 0.0100
Epoch 9/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7ec2d95062f0>

In [13]:
#Evaluate the model on test data
loss=autoencoder.evaluate(x_test,x_test)
print(f"mean squared error = {loss}")

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 4.4048e-04
mean squared error = 0.0008017347427085042


In [22]:
#calculating the reconstruction errors for the anomalous data
preds=autoencoder.predict(x_test)
reconstruction_errors=preds-x_test
anomaly_threshold=0.1
anomaly_indices=np.where(reconstruction_errors>anomaly_threshold)[0]
unique_anomaly_indices=np.unique(anomaly_indices)
print(f"the first two reconstruction errors = {reconstruction_errors[:2]}")
print(f"anomaly indices = {unique_anomaly_indices}")
print(f"the number of the anomaly elements = {len(unique_anomaly_indices)}")


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
the first two reconstruction errors = [[ 0.00089734  0.00223528 -0.01643368 -0.0112798   0.00290935  0.00567957
  -0.00317595  0.00428614 -0.00396277  0.01852792 -0.00408206  0.02143476
   0.00642485 -0.01065798 -0.00437566 -0.00176496  0.01197668  0.0103968
  -0.01117392 -0.0044121 ]
 [-0.00188437  0.00762674  0.00574002 -0.01328129  0.00321044  0.00508281
   0.00969081  0.00365943 -0.00188123 -0.003774   -0.00524873  0.00432992
   0.00267401 -0.0031334   0.00614341  0.0006414   0.00060645  0.00995017
  -0.0070885   0.00229623]]
anomaly indices = [ 12  52 152 157 192 219 223 230 248]
the number of the anomaly elements = 9


In [19]:
reconstruction_errors.shape

(250, 20)

References

*blog post : https://readmedium.com/en/https:/levelup.gitconnected.com/build-deep-autoencoders-model-for-anomaly-detection-in-python-a-complete-guide-a7d0ec0e688

*chatgpt conversation : https://chatgpt.com/share/67328e03-fb04-800f-a08b-97f467d444ae