In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.metrics import classification_report, confusion_matrix


# Load data
df = pd.read_csv("../data/synthetic_data.csv")

# Use only sensor readings (no labels for unsupervised learning)
X = df[['temperature', 'pressure', 'vibration']]

# Standardize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [32]:
# Train Isolation Forest
iso_forest = IsolationForest(
    n_estimators=1000, 
    contamination=0.05,  # Expected % of anomalies
    random_state=42
)
iso_forest.fit(X_scaled)

# Predict anomalies (-1 = anomaly, 1 = normal)
iso_pred = iso_forest.predict(X_scaled)
df['iso_forest_anomaly'] = np.where(iso_pred == -1, 1, 0)

# Evaluate (compare with synthetic "failure" labels)
print("Isolation Forest Performance:")
print(classification_report(df['failure'], df['iso_forest_anomaly']))

Isolation Forest Performance:
              precision    recall  f1-score   support

           0       0.97      0.96      0.97     12405
           1       0.32      0.37      0.34       555

    accuracy                           0.94     12960
   macro avg       0.64      0.67      0.65     12960
weighted avg       0.94      0.94      0.94     12960



In [33]:
# Train One-Class SVM
oc_svm = OneClassSVM(
    nu=0.05,  #% of expected outliers
    kernel='rbf'
)
oc_svm.fit(X_scaled)

# Predict anomalies
svm_pred = oc_svm.predict(X_scaled)
df['oc_svm_anomaly'] = np.where(svm_pred == -1, 1, 0)

# Evaluate
print("One-Class SVM Performance:")
print(classification_report(df['failure'], df['oc_svm_anomaly']))

One-Class SVM Performance:
              precision    recall  f1-score   support

           0       0.97      0.96      0.96     12405
           1       0.21      0.25      0.23       555

    accuracy                           0.93     12960
   macro avg       0.59      0.60      0.59     12960
weighted avg       0.93      0.93      0.93     12960



In [34]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Convert data to PyTorch tensors
X_tensor = torch.FloatTensor(X_scaled)
dataset = TensorDataset(X_tensor, X_tensor)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Define Autoencoder architecture
class Autoencoder(nn.Module):
    def __init__(self, input_dim=3, encoding_dim=2):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, input_dim),
            nn.Identity()  # Linear activation for reconstruction
        )
    
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Initialize model, loss, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Autoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for batch_x, _ in dataloader:
        batch_x = batch_x.to(device)
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_x)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(dataloader):.4f}")

Epoch 1/50, Loss: 0.9283
Epoch 2/50, Loss: 0.8146
Epoch 3/50, Loss: 0.7689
Epoch 4/50, Loss: 0.7444
Epoch 5/50, Loss: 0.7285
Epoch 6/50, Loss: 0.7107
Epoch 7/50, Loss: 0.6977
Epoch 8/50, Loss: 0.6854
Epoch 9/50, Loss: 0.6744
Epoch 10/50, Loss: 0.6642
Epoch 11/50, Loss: 0.6551
Epoch 12/50, Loss: 0.6461
Epoch 13/50, Loss: 0.6383
Epoch 14/50, Loss: 0.6310
Epoch 15/50, Loss: 0.6239
Epoch 16/50, Loss: 0.6178
Epoch 17/50, Loss: 0.6127
Epoch 18/50, Loss: 0.6071
Epoch 19/50, Loss: 0.6018
Epoch 20/50, Loss: 0.5969
Epoch 21/50, Loss: 0.5932
Epoch 22/50, Loss: 0.5893
Epoch 23/50, Loss: 0.5871
Epoch 24/50, Loss: 0.5823
Epoch 25/50, Loss: 0.5793
Epoch 26/50, Loss: 0.5767
Epoch 27/50, Loss: 0.5740
Epoch 28/50, Loss: 0.5713
Epoch 29/50, Loss: 0.5690
Epoch 30/50, Loss: 0.5666
Epoch 31/50, Loss: 0.5653
Epoch 32/50, Loss: 0.5629
Epoch 33/50, Loss: 0.5616
Epoch 34/50, Loss: 0.5593
Epoch 35/50, Loss: 0.5576
Epoch 36/50, Loss: 0.5561
Epoch 37/50, Loss: 0.5548
Epoch 38/50, Loss: 0.5539
Epoch 39/50, Loss: 0.

In [35]:
model.eval()
with torch.no_grad():
    X_tensor = X_tensor.to(device)
    reconstructions = model(X_tensor).cpu().numpy()

mse = np.mean((X_scaled - reconstructions) ** 2, axis=1)
df['autoencoder_anomaly'] = (mse > np.quantile(mse, 0.95)).astype(int)  # Top 5% as anomalies

# Evaluate
print("PyTorch Autoencoder Performance:")
print(classification_report(df['failure'], df['autoencoder_anomaly']))

PyTorch Autoencoder Performance:
              precision    recall  f1-score   support

           0       0.96      0.95      0.95     12405
           1       0.07      0.08      0.07       555

    accuracy                           0.91     12960
   macro avg       0.51      0.52      0.51     12960
weighted avg       0.92      0.91      0.92     12960

