In [1]:
# Import Required Libraries
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense


In [2]:
#Step 1: Create Synthetic Data
# Normal data (mean=0, std=1)
normal_data = np.random.normal(0, 1, (1000, 10))

# Anomaly data (mean=5, std=1)
anomaly_data = np.random.normal(5, 1, (250, 10))

# Combine both into one dataset
data = np.vstack([normal_data, anomaly_data])
labels = np.hstack([np.zeros(1000), np.ones(250)])        # 0 = normal, 1 = anomaly


In [3]:
#Step 2: Split into Training and Testing Sets
x_train, x_test, y_train, y_test = train_test_split(
    data, labels, test_size=0.3, random_state=42
)


In [4]:
#Step 3: Define Autoencoder Architecture
input_dim = x_train.shape[1]   # number of features = 10
encoding_dim = 3               # compressed latent dimension

# Input Layer
input_layer = Input(shape=(input_dim,))

# Encoder Layer
encoder_layer = Dense(encoding_dim, activation='relu')(input_layer)

# Decoder Layer
decoder_layer = Dense(input_dim, activation='linear')(encoder_layer)

# Build the Autoencoder Model
autoencoder = Model(input_layer, decoder_layer)
autoencoder.compile(optimizer='adam', loss='mse')


In [5]:
#Step 4: Train Autoencoder (Only on Normal Data)
autoencoder.fit(
    x_train[y_train == 0],
    x_train[y_train == 0],
    epochs=10
)


Epoch 1/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - loss: 1.3007 
Epoch 2/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1.2346 
Epoch 3/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 1.1827  
Epoch 4/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1.1392 
Epoch 5/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 1.1046 
Epoch 6/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 1.0759  
Epoch 7/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.0515  
Epoch 8/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1.0303 
Epoch 9/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 1.0119 
Epoch 10/10
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss

<keras.src.callbacks.history.History at 0x1cb0764a660>

In [6]:
autoencoder.summary()

In [7]:
#Step 5: Predict and Detect Anomalies
x_pred = autoencoder.predict(x_test)
mse = np.mean((x_test - x_pred) ** 2, axis=1)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step 


In [8]:
#Step 6: Calculate Dynamic Threshold using traning normal data
x_train_pred = autoencoder.predict(x_train[y_train == 0])
mse_train = np.mean((x_train[y_train == 0] - x_train_pred) ** 2, axis=1)

threshold = np.mean(mse_train) + 3 * np.std(mse_train)   # Dynamic threshold


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 


In [9]:
#Step 7: Predict Anomalies Using Dynamic Threshold
y_pred = (mse > threshold).astype(int)   # 1 = anomaly, 0 = normal


In [10]:
#Step 8: Display results
print("Calculated Threshold:", threshold)
print("True labels (first 20):", y_test[:20])
print("Predicted labels (first 20):", y_pred[:20])
print("MSE (first 20 samples):", mse[:20])


Calculated Threshold: 2.3875919564560046
True labels (first 20): [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 1. 1.]
Predicted labels (first 20): [0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 1 1]
MSE (first 20 samples): [ 0.58027076 22.5873353   0.89013684  0.64223884  0.90497145  0.98601801
  1.07587829  0.75333876  1.22855914  1.21505652 29.0016159   0.90708263
  0.7679564   1.34090763 33.14597612 24.67664935 30.63511163  1.4356781
 27.8983201  26.95805844]
