In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# ---------- CONFIGURATION ----------
preprocessed_folder = "/home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data"  # Same folder where preprocessed batches were saved
output_folder = "/home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data/Results"
os.makedirs(output_folder, exist_ok=True)

In [3]:
batch_files = sorted([os.path.join(preprocessed_folder, f) for f in os.listdir(preprocessed_folder) if f.startswith("preprocessed_batch_")])

In [4]:
# ---------- Autoencoder Model ----------
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [5]:
# ---------- Training Function ----------
def train_autoencoder(model, dataloader, epochs=30, device="cpu"):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for data in dataloader:
            inputs = data[0].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch [{epoch+1}/{epochs}] Loss: {epoch_loss/len(dataloader):.6f}")

In [6]:
# ---------- Batchwise Processing ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
for batch_num, batch_file in enumerate(batch_files, 1):
    print(f"\n🔹 Processing Batch {batch_num}: {batch_file}")

    # Load Data
    df = pd.read_csv(batch_file)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(df)

    # Convert to PyTorch Dataset
    X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
    dataloader = DataLoader(TensorDataset(X_tensor), batch_size=256, shuffle=True)

    # Initialize Autoencoder
    input_dim = X_tensor.shape[1]
    autoencoder = Autoencoder(input_dim).to(device)

    # Train Autoencoder
    train_autoencoder(autoencoder, dataloader, epochs=30, device=device)

    # Save Model
    model_path = os.path.join(output_folder, f"autoencoder_batch_{batch_num}.pt")
    torch.save(autoencoder.state_dict(), model_path)
    print(f"✅ Model for Batch {batch_num} saved to {model_path}")

    # ---------- Anomaly Detection for the Batch ----------
    autoencoder.eval()
    with torch.no_grad():
        X_reconstructed = autoencoder(X_tensor.to(device)).cpu().numpy()

    # Reconstruction Error
    reconstruction_errors = np.mean((X_scaled - X_reconstructed) ** 2, axis=1)

    # Anomaly Detection (Top 5% as Anomalies)
    threshold = np.percentile(reconstruction_errors, 95)
    anomalies = reconstruction_errors > threshold

    # Save Anomaly Results
    df['reconstruction_error'] = reconstruction_errors
    df['anomaly'] = anomalies.astype(int)

    result_file = os.path.join(output_folder, f"anomaly_results_batch_{batch_num}.csv")
    df.to_csv(result_file, index=False)
    print(f"✅ Anomaly results for Batch {batch_num} saved to {result_file}")
    print(f"🔔 Detected {df['anomaly'].sum()} anomalies out of {len(df)} rows in Batch {batch_num}")

print("\n✅ All batches processed for anomaly detection.")



🔹 Processing Batch 1: /home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data/preprocessed_batch_1.csv
Epoch [1/30] Loss: 0.813959
Epoch [2/30] Loss: 0.810594
Epoch [3/30] Loss: 0.810574
Epoch [4/30] Loss: 0.810660
Epoch [5/30] Loss: 0.794926
Epoch [6/30] Loss: 0.766134
Epoch [7/30] Loss: 0.766114
Epoch [8/30] Loss: 0.766122
Epoch [9/30] Loss: 0.766148
Epoch [10/30] Loss: 0.766114
Epoch [11/30] Loss: 0.766115
Epoch [12/30] Loss: 0.766111
Epoch [13/30] Loss: 0.766171
Epoch [14/30] Loss: 0.766292
Epoch [15/30] Loss: 0.766300
Epoch [16/30] Loss: 0.766298
Epoch [17/30] Loss: 0.766296
Epoch [18/30] Loss: 0.766295
Epoch [19/30] Loss: 0.766295
Epoch [20/30] Loss: 0.766294
Epoch [21/30] Loss: 0.766122
Epoch [22/30] Loss: 0.766100
Epoch [23/30] Loss: 0.766101
Epoch [24/30] Loss: 0.766101
Epoch [25/30] Loss: 0.766100
Epoch [26/30] Loss: 0.766103
Epoch [27/30] Loss: 0.766116
Epoch [28/30] Loss: 0.766179
Epoch [29/30] Loss: 0.766154
Epoch [30/30] Loss: 0.766090
✅ Model for Batch 1 saved to /home