In [None]:
import torch
import torchaudio
import torchvision.transforms as transforms
from torchvision.models import vgg16, VGG16_Weights
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import  StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix, auc, classification_report, roc_auc_score


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU available")

data = np.load("hvcm/RFQ.npy", allow_pickle=True)
label = np.load("hvcm/RFQ_labels.npy", allow_pickle=True)
label = label[:, 1]  # Assuming the second column is the label
label = (label == "Fault").astype(int)  # Convert to binary labels
print(data.shape, label.shape)

normal_indices = np.where(label == 0)
anomalous_indices = np.where(label == 1)


# Processing: Mel Spec > Resizing > Feature Extraction

In [None]:
# Resize and convert to 3-channel image
def resize_spectrogram(spectrogram):
    spectrogram = (spectrogram - spectrogram.min()) / (spectrogram.max() - spectrogram.min() + 1e-6)
    spectrogram = np.uint8(spectrogram.cpu().numpy() * 255)
    spectrogram = np.stack([spectrogram] * 3, axis=-1)
    image = Image.fromarray(spectrogram)
    image = transforms.Resize((224, 224))(image)
    return transforms.ToTensor()(image)

# Process dataset
def process_dataset(data):
    num_samples, _, num_channels = data.shape
    features = np.zeros((num_samples, num_channels, 4096))
    mel_transform = torchaudio.transforms.MelSpectrogram(sample_rate=2500000, n_mels=128).to(device)
    model = vgg16(weights=VGG16_Weights.IMAGENET1K_V1).to(device)
    model.classifier = model.classifier[:-3]
    model.eval()

    for i in range(num_samples):
        for j in range(num_channels):
            ts = torch.tensor(data[i, :, j], dtype=torch.float32).to(device)
            mel = mel_transform(ts)
            img = resize_spectrogram(mel)
            with torch.no_grad():
                feat = model(img.unsqueeze(0).to(device))
            features[i, j, :] = feat.squeeze().cpu().numpy()
    return features

# AE Class

In [None]:
# Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_size=4096):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 64), 
            nn.ReLU(),
            nn.Linear(64, 32), 
            nn.ReLU(),
            nn.Linear(32, 16), 
            nn.ReLU(),
            nn.Linear(16, 8), 
            nn.ReLU(),
            nn.Linear(8, 4), 
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 16), 
            nn.ReLU(),
            nn.Linear(16, 32), 
            nn.ReLU(),
            nn.Linear(32, 64), 
            nn.ReLU(),
            nn.Linear(64, input_size), 
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))



# Train autoencoder
def train_autoencoder(features, epochs=20, batch_size=128):
    x = torch.tensor(features.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader = DataLoader(TensorDataset(x), batch_size=batch_size, shuffle=True)
    model = Autoencoder().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    for epoch in range(epochs):
        total_loss = 0
        for batch in loader:
            inputs = batch[0]
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(loader):.6f}")
    return model


def print_eval(predictions, labels):
  print("Accuracy = {}".format(accuracy_score(labels, predictions)))
  print("Precision = {}".format(precision_score(labels, predictions)))
  print("Recall = {}".format(recall_score(labels, predictions)))
  print("F1 = {}".format(f1_score(labels, predictions)))
  print(confusion_matrix(labels, predictions))

# Plot reconstruction error histogram
def plot_reconstruction_error(model, features, percentile=95):
    x = torch.tensor(features.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader = DataLoader(TensorDataset(x), batch_size=64)
    errors = []
    criterion = nn.MSELoss(reduction='none')

    with torch.no_grad():
        for batch in loader:
            inputs = batch[0]
            outputs = model(inputs)
            batch_errors = criterion(outputs, inputs).mean(dim=1)
            errors.extend(batch_errors.cpu().numpy())

    threshold = np.percentile(errors, percentile)
    anomalies = np.sum(np.array(errors) > threshold)

    plt.hist(errors, bins=50, alpha=0.75)
    plt.axvline(threshold, color='r', linestyle='--', label=f'Threshold ({percentile}%)')
    plt.xlabel('Reconstruction Error')
    plt.ylabel('Frequency')
    plt.title('Reconstruction Error Histogram')
    plt.legend()
    plt.grid(True)
    plt.show()

    print(f"Anomaly threshold: {threshold:.6f}")
    print(f"Detected anomalies: {anomalies}")


# Cross Validation without Scalers

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
features = process_dataset(data)
print("Features shape:", features.shape)
for fold, (train_idx, val_idx) in enumerate(skf.split(features, label)):
    print(f"Fold {fold + 1}")
    train_fold_data, val_fold_data = features[train_idx], features[val_idx]
    train_fold_labels, val_fold_labels = label[train_idx], label[val_idx]

    # Train autoencoder on the training fold
    model = train_autoencoder(features[normal_indices], epochs=15, batch_size=64)

    # Evaluate on validation fold
    x_val = torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader_val = DataLoader(TensorDataset(x_val), batch_size=64)
    
    # Compute reconstruction errors
    x = model(torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)).cpu().detach().numpy()
    errors = np.mean((x - val_fold_data.reshape(-1, 4096)) ** 2, axis=1)

    # Reshape to (175, 14)
    errors = errors.reshape(val_fold_data.shape[0], val_fold_data.shape[1])

    # Aggregate per sample (e.g., mean across channels)
    sample_errors = np.mean(errors, axis=1)

    percentile = 90
    # Thresholding
    threshold = np.percentile(sample_errors, percentile)
    predictions = (sample_errors > threshold).astype(int)


    
    plot_reconstruction_error(model, val_fold_data, percentile=95)
    print_eval(predictions, val_fold_labels)

# Cross Validation with StandardScaler

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scaled_data = StandardScaler().fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape)
features = process_dataset(scaled_data)
print("Features shape:", features.shape)
for fold, (train_idx, val_idx) in enumerate(skf.split(features, label)):
    print(f"Fold {fold + 1}")
    train_fold_data, val_fold_data = features[train_idx], features[val_idx]
    train_fold_labels, val_fold_labels = label[train_idx], label[val_idx]

    # Train autoencoder on the training fold
    model = train_autoencoder(features[normal_indices], epochs=15, batch_size=64)

    # Evaluate on validation fold
    x_val = torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader_val = DataLoader(TensorDataset(x_val), batch_size=64)
    
    # Compute reconstruction errors
    x = model(torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)).cpu().detach().numpy()
    errors = np.mean((x - val_fold_data.reshape(-1, 4096)) ** 2, axis=1)

    # Reshape to (175, 14)
    errors = errors.reshape(val_fold_data.shape[0], val_fold_data.shape[1])

    # Aggregate per sample (e.g., mean across channels)
    sample_errors = np.mean(errors, axis=1)

    percentile = 90
    # Thresholding
    threshold = np.percentile(sample_errors, percentile)
    predictions = (sample_errors > threshold).astype(int)


    
    plot_reconstruction_error(model, val_fold_data, percentile=percentile)
    print_eval(predictions, val_fold_labels)

# Cross Validation with MinMax

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scaled_data = MinMaxScaler().fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape)
features = process_dataset(scaled_data)
print("Features shape:", features.shape)
for fold, (train_idx, val_idx) in enumerate(skf.split(features, label)):
    print(f"Fold {fold + 1}")
    train_fold_data, val_fold_data = features[train_idx], features[val_idx]
    train_fold_labels, val_fold_labels = label[train_idx], label[val_idx]

    # Train autoencoder on the training fold
    model = train_autoencoder(features[normal_indices], epochs=15, batch_size=64)

    # Evaluate on validation fold
    x_val = torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader_val = DataLoader(TensorDataset(x_val), batch_size=64)
    
    # Compute reconstruction errors
    x = model(torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)).cpu().detach().numpy()
    errors = np.mean((x - val_fold_data.reshape(-1, 4096)) ** 2, axis=1)

    # Reshape to (175, 14)
    errors = errors.reshape(val_fold_data.shape[0], val_fold_data.shape[1])

    # Aggregate per sample (e.g., mean across channels)
    sample_errors = np.mean(errors, axis=1)

    percentile = 90
    # Thresholding
    threshold = np.percentile(sample_errors, percentile)
    predictions = (sample_errors > threshold).astype(int)


    
    plot_reconstruction_error(model, val_fold_data, percentile=percentile)
    print_eval(predictions, val_fold_labels)

# Observation:
Comparing with and without normalizing data 

### MinMaxed scored

Accuracy = 0.7126436781609196

Precision = 0.1111111111111111

Recall = 0.05555555555555555

F1 = 0.07407407407407407

[[122  16]

[ 34   2]]

---

### StandardScaled scored


Accuracy = 0.896551724137931

Precision = 1.0

Recall = 0.5

F1 = 0.6666666666666666

[[138   0]

[ 18  18]]

---

### Without any normlaization scored (highest in cross-val):

Accuracy = 0.896551724137931

Precision = 1.0

Recall = 0.5

F1 = 0.6666666666666666

[[138   0]

[ 18  18]]