In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# ---------- CONFIGURATION ----------
preprocessed_folder = "/home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data"  # Same folder where preprocessed batches were saved
output_folder = "/home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data/Results"
os.makedirs(output_folder, exist_ok=True)

In [3]:
batch_files = sorted([os.path.join(preprocessed_folder, f) for f in os.listdir(preprocessed_folder) if f.startswith("preprocessed_batch_")])

In [4]:
# ---------- Autoencoder Model ----------
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [5]:
# ---------- Training Function ----------
def train_autoencoder(model, dataloader, epochs=1, device="cpu"):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for data in dataloader:
            inputs = data[0].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch [{epoch+1}/{epochs}] Loss: {total_loss / len(dataloader):.6f}")

In [6]:
# ---------- Compute Reconstruction Error ----------
def compute_reconstruction_error(model, loader, device):
    model.eval()
    errors = []
    with torch.no_grad():
        for batch in loader:
            inputs = batch[0].to(device)
            outputs = model(inputs)
            error = torch.mean((inputs - outputs) ** 2, dim=1).cpu().numpy()
            errors.extend(error)
    return np.array(errors)

In [7]:
# ---------- Train Model Batchwise with Train Accuracy ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
autoencoder = None  # Model will be initialized on first batch
threshold = None  # Stores anomaly detection threshold after first batch

In [8]:
for batch_num, batch_file in enumerate(batch_files, 1):
    print(f"\n🔹 Processing Batch {batch_num}: {batch_file}")

    # Load Data
    df = pd.read_csv(batch_file)

    # Scale Data
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(df.drop(columns=['Tag'], errors='ignore'))  # Drop label if exists

    # Convert to PyTorch Tensors
    X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
    dataloader = DataLoader(TensorDataset(X_tensor), batch_size=256, shuffle=True)

    # Initialize Model on First Batch
    if autoencoder is None:
        input_dim = X_tensor.shape[1]
        autoencoder = Autoencoder(input_dim).to(device)

    # Train Autoencoder on This Batch
    train_autoencoder(autoencoder, dataloader, epochs=1, device=device)

    # Save Model After Each Batch
    model_path = os.path.join(output_folder, f"autoencoder_progressive.pth")
    torch.save(autoencoder.state_dict(), model_path)
    print(f"✅ Model saved: {model_path}")

    model_path = os.path.join(output_folder, f"autoencoder_progressive.h5")
    torch.save(autoencoder.state_dict(), model_path)
    print(f"✅ Model saved: {model_path}")

    # Compute Reconstruction Error for Train Data
    reconstruction_errors = compute_reconstruction_error(autoencoder, dataloader, device)

    # Set Threshold Based on First Batch (95th Percentile)
    if threshold is None:
        threshold = np.percentile(reconstruction_errors, 95)
        print(f"🔹 Set Anomaly Threshold: {threshold:.6f}")

    # Detect Anomalies in Training Data
    train_predictions = (reconstruction_errors > threshold).astype(int)  # 1 = anomaly, 0 = normal

    # Compute Train Accuracy
    train_accuracy = accuracy_score(np.zeros_like(train_predictions), train_predictions)  # Assuming all training data is normal
    print(f"✅ Train Accuracy for Batch {batch_num}: {train_accuracy:.4f}")

    # Save Train Accuracy
    with open(os.path.join(output_folder, "train_accuracy_log.txt"), "a") as log_file:
        log_file.write(f"Batch {batch_num}: Train Accuracy = {train_accuracy:.4f}\n")

    # Save Anomaly Results
    df['reconstruction_error'] = reconstruction_errors
    df['anomaly'] = train_predictions

    result_file = os.path.join(output_folder, f"anomaly_results_batch_{batch_num}.csv")
    df.to_csv(result_file, index=False)
    print(f"✅ Anomalies saved to {result_file}")
    print(f"🔔 Detected {df['anomaly'].sum()} anomalies out of {len(df)} rows in Batch {batch_num}")

print("\n✅ Batchwise Anomaly Detection Completed.")



🔹 Processing Batch 1: /home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data/preprocessed_batch_1.csv
Epoch [1/1] Loss: 0.832700
✅ Model saved: /home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data/Results/autoencoder_progressive.pth
✅ Model saved: /home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data/Results/autoencoder_progressive.h5
🔹 Set Anomaly Threshold: 0.662226
✅ Train Accuracy for Batch 1: 0.9500
✅ Anomalies saved to /home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data/Results/anomaly_results_batch_1.csv
🔔 Detected 346463 anomalies out of 6929245 rows in Batch 1

🔹 Processing Batch 2: /home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data/preprocessed_batch_10.csv
Epoch [1/1] Loss: 0.836621
✅ Model saved: /home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data/Results/autoencoder_progressive.pth
✅ Model saved: /home/smartdragon/Videos/SWAT/SWaT.A1 & A2_Dec 2015/Data/Results/autoencoder_progressive.h5
✅ Train Accuracy for Batch 2: 0.8993
✅ Anomalies saved to /ho