<a href="https://colab.research.google.com/github/suba0712200-oss/time-series-forecasting-attention/blob/main/Implementing_Variational_Autoencoders_for_Anomaly_Detection_on_Synthetic_High_Dimensional_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# 1. Import Libraries

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_blobs
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt


In [6]:
# 2. Synthetic Dataset Generation

np.random.seed(42)

# Normal data
X_normal, _ = make_blobs(
    n_samples=9800,
    n_features=50,
    centers=1,
    cluster_std=1.0
)

# Anomalies
X_anomaly, _ = make_blobs(
    n_samples=200,
    n_features=50,
    centers=1,
    cluster_std=5.0
)

X = np.vstack([X_normal, X_anomaly])
y = np.hstack([np.zeros(9800), np.ones(200)])

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.int64)


In [7]:
# 3. DataLoader

dataset = TensorDataset(X_tensor)
loader = DataLoader(dataset, batch_size=128, shuffle=True)


In [8]:
# 4. VAE Model Architecture

class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(VAE, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )

        self.mu = nn.Linear(64, latent_dim)
        self.log_var = nn.Linear(64, latent_dim)

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim)
        )

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        encoded = self.encoder(x)
        mu = self.mu(encoded)
        log_var = self.log_var(encoded)
        z = self.reparameterize(mu, log_var)
        reconstructed = self.decoder(z)
        return reconstructed, mu, log_var


In [9]:
# 5. Loss Function (ELBO)

def vae_loss(recon_x, x, mu, log_var):
    recon_loss = nn.MSELoss(reduction='sum')(recon_x, x)
    kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
    return recon_loss + kl_loss


In [10]:
# 6. Model Training

def train_vae(latent_dim, epochs=20):
    model = VAE(input_dim=50, latent_dim=latent_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch in loader:
            data = batch[0]
            optimizer.zero_grad()
            recon, mu, log_var = model(data)
            loss = vae_loss(recon, data, mu, log_var)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss:.2f}")
    return model


In [11]:
# 7. Anomaly Score Calculation

def evaluate(model):
    model.eval()
    with torch.no_grad():
        recon, mu, log_var = model(X_tensor)
        reconstruction_error = torch.mean((X_tensor - recon) ** 2, dim=1)
        return reconstruction_error.numpy()


In [12]:
# 8. AUC-ROC Evaluation

latent_dims = [5, 10, 20]
auc_scores = {}

for ld in latent_dims:
    print(f"\nTraining VAE with latent dimension = {ld}")
    model = train_vae(ld)
    scores = evaluate(model)
    auc = roc_auc_score(y, scores)
    auc_scores[ld] = auc
    print(f"AUC-ROC for latent dim {ld}: {auc:.4f}")



Training VAE with latent dimension = 5
Epoch 1, Loss: 415000.89
Epoch 2, Loss: 354424.99
Epoch 3, Loss: 351043.66
Epoch 4, Loss: 349251.65
Epoch 5, Loss: 348662.45
Epoch 6, Loss: 348804.04
Epoch 7, Loss: 346669.26
Epoch 8, Loss: 346186.73
Epoch 9, Loss: 345744.48
Epoch 10, Loss: 344725.47
Epoch 11, Loss: 342948.11
Epoch 12, Loss: 341884.76
Epoch 13, Loss: 341713.31
Epoch 14, Loss: 340270.94
Epoch 15, Loss: 339694.47
Epoch 16, Loss: 339078.85
Epoch 17, Loss: 338786.58
Epoch 18, Loss: 338511.79
Epoch 19, Loss: 337761.15
Epoch 20, Loss: 337274.19
AUC-ROC for latent dim 5: 1.0000

Training VAE with latent dimension = 10
Epoch 1, Loss: 425081.31
Epoch 2, Loss: 356425.61
Epoch 3, Loss: 352114.73
Epoch 4, Loss: 350447.93
Epoch 5, Loss: 348859.42
Epoch 6, Loss: 349395.42
Epoch 7, Loss: 348846.52
Epoch 8, Loss: 347732.18
Epoch 9, Loss: 348096.87
Epoch 10, Loss: 347612.59
Epoch 11, Loss: 347763.23
Epoch 12, Loss: 346834.04
Epoch 13, Loss: 346721.28
Epoch 14, Loss: 347045.69
Epoch 15, Loss: 3465

In [13]:
# 9. Results & Analysis

final_scores = scores.tolist()
print(",".join(map(str, final_scores[:200])))  # preview


0.5040157437324524,0.39200425148010254,0.35280197858810425,0.47212550044059753,0.3792875409126282,0.5617756843566895,0.5473516583442688,0.57037353515625,0.49684181809425354,0.5089572668075562,0.5743149518966675,0.26335370540618896,0.3197672367095947,0.41882583498954773,0.512352705001831,0.30921050906181335,0.3356395363807678,0.4801691770553589,0.4136938452720642,0.44558510184288025,0.4841136634349823,0.5334721207618713,0.4089968502521515,0.7923189401626587,0.5038527846336365,0.38222819566726685,0.3677014112472534,0.42980507016181946,0.330659955739975,0.5835064053535461,0.45232436060905457,0.4643898010253906,0.6415038108825684,0.4338971674442291,0.5476923584938049,0.3894232213497162,0.4097849130630493,0.5171718001365662,0.3540676236152649,0.5395333766937256,0.6163232326507568,0.4045148491859436,0.2983090877532959,0.447322815656662,0.4261983633041382,0.3382141888141632,0.5015267729759216,0.470851868391037,0.5047181844711304,0.5178002715110779,0.4025498330593109,0.3747885227203369,0.42937