In [1]:
import torch, torchaudio, torchvision.transforms as transforms, matplotlib.pyplot as plt, torch.nn as nn, torch.optim as optim, numpy as np
from torchvision.models import vgg16, VGG16_Weights
from torch.utils.data import DataLoader, TensorDataset
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import  StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix, auc, classification_report, roc_auc_score
from torch.autograd import grad


cuda0 = torch.device("cuda:0")
cuda1 = torch.device("cuda:1")
device = cuda1
print(torch.cuda.get_device_name(device) if torch.cuda.is_available() else "No GPU available")

data = np.load("../../hvcm/RFQ.npy", allow_pickle=True)
label = np.load("../../hvcm/RFQ_labels.npy", allow_pickle=True)
label = label[:, 1]  # Assuming the second column is the label
label = (label == "Fault").astype(int)  # Convert to binary labels
print(data.shape, label.shape)

normal_data = data[label == 0]
faulty_data = data[label == 1]

normal_label = label[label == 0]
faulty_label = label[label == 1]


NVIDIA A30
(872, 4500, 14) (872,)


# Wasserstein GAN

In [2]:
class Conv1DGenerator(nn.Module):
    def __init__(self, latent_dim=100, n_features=14, seq_len=4500):
        super(Conv1DGenerator, self).__init__()
        self.latent_dim = latent_dim
        self.n_features = n_features
        self.seq_len = seq_len
        
        # Calculate initial sequence length after upsampling
        self.init_size = seq_len // 64  # Will be upsampled 6 times (2^6 = 64)
        
        # Project latent to initial feature map
        self.fc = nn.Sequential(
            nn.Linear(latent_dim, 256 * self.init_size),
            nn.BatchNorm1d(256 * self.init_size),
            nn.ReLU(True)
        )
        
        # Transposed convolutions for upsampling
        self.conv_blocks = nn.Sequential(
            # 256 -> 128 channels, 2x upsampling
            nn.ConvTranspose1d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(True),
            
            # 128 -> 64 channels, 2x upsampling  
            nn.ConvTranspose1d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(True),
            
            # 64 -> 32 channels, 2x upsampling
            nn.ConvTranspose1d(64, 32, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(32),
            nn.ReLU(True),
            
            # 32 -> 16 channels, 2x upsampling
            nn.ConvTranspose1d(32, 16, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(16),
            nn.ReLU(True),
            
            # 16 -> 8 channels, 2x upsampling
            nn.ConvTranspose1d(16, 8, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(8),
            nn.ReLU(True),
            
            # 8 -> n_features, final upsampling
            nn.ConvTranspose1d(8, n_features, kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, z):
        # Project and reshape
        out = self.fc(z)
        out = out.view(out.shape[0], 256, self.init_size)
        
        # Apply conv blocks
        out = self.conv_blocks(out)
        
        # Adjust to exact sequence length if needed
        if out.shape[2] != self.seq_len:
            out = nn.functional.interpolate(out, size=self.seq_len, mode='linear', align_corners=False)
        
        # Transpose to (batch, seq_len, features)
        return out.transpose(1, 2)

class Conv1DDiscriminator(nn.Module):
    def __init__(self, n_features=14, seq_len=4500):
        super(Conv1DDiscriminator, self).__init__()
        
        # Convolutional feature extraction
        self.conv_blocks = nn.Sequential(
            # Input: (batch, 14, 4500)
            nn.Conv1d(n_features, 16, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.25),
            
            nn.Conv1d(16, 32, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(32),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.25),
            
            nn.Conv1d(32, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.25),
            
            nn.Conv1d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.25),
            
            nn.Conv1d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.25),
            
            nn.Conv1d(256, 512, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
        )
        
        # Calculate the size after convolutions
        self.conv_output_size = self._get_conv_output_size(seq_len)
        
        # Final classification layer
        self.classifier = nn.Sequential(
            nn.Linear(512 * self.conv_output_size, 1024),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.5),
            nn.Linear(1024, 1)
        )

    def _get_conv_output_size(self, seq_len):
        # Calculate output size after 6 conv layers with stride 2
        size = seq_len
        for _ in range(6):
            size = (size - 4 + 2) // 2 + 1
        return size

    def forward(self, x):
        # Transpose from (batch, seq_len, features) to (batch, features, seq_len)
        x = x.transpose(1, 2)
        
        # Apply conv blocks
        features = self.conv_blocks(x)
        
        # Flatten and classify
        features = features.view(features.shape[0], -1)
        return self.classifier(features)
    
# Improved Gradient Penalty
def compute_gradient_penalty(discriminator, real_samples, fake_samples, device):
    batch_size = real_samples.size(0)
    alpha = torch.rand(batch_size, 1, 1).to(device)
    
    # Expand alpha to match sample dimensions
    alpha = alpha.expand_as(real_samples)
    
    interpolates = (alpha * real_samples + (1 - alpha) * fake_samples).requires_grad_(True)
    d_interpolates = discriminator(interpolates)
    
    gradients = grad(
        outputs=d_interpolates,
        inputs=interpolates,
        grad_outputs=torch.ones_like(d_interpolates).to(device),
        create_graph=True,
        retain_graph=True,
        only_inputs=True
    )[0]
    
    gradients = gradients.view(batch_size, -1)
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
    
    return gradient_penalty

# WGANS Training

In [None]:
def train_conv1d_wgan(normal_data, device, n_epochs=100, batch_size=16, lr_g=0.0001, lr_d=0.0004):
    """
    Train WGAN with Conv1D architecture
    """
    latent_dim = 100
    
    # Initialize models
    generator = Conv1DGenerator(latent_dim).to(device)
    discriminator = Conv1DDiscriminator().to(device)
    
    # Initialize weights
    def weights_init(m):
        if isinstance(m, (nn.Conv1d, nn.ConvTranspose1d, nn.Linear)):
            nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.zeros_(m.bias)
    
    generator.apply(weights_init)
    discriminator.apply(weights_init)
    
    # Optimizers
    optimizer_G = optim.Adam(generator.parameters(), lr=lr_g, betas=(0.5, 0.999))
    optimizer_D = optim.Adam(discriminator.parameters(), lr=lr_d, betas=(0.5, 0.999))
    
    # Training parameters
    lambda_gp = 10
    n_critic = 5
    
    # Convert to tensor and create dataloader
    dataset = TensorDataset(torch.tensor(normal_data, dtype=torch.float32))
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    
    print("Starting Conv1D WGAN training...")
    
    for epoch in range(n_epochs):
        d_losses = []
        g_losses = []
        
        for i, (real_samples,) in enumerate(dataloader):
            real_samples = real_samples.to(device)
            batch_size_actual = real_samples.size(0)
            
            # Train Discriminator
            for _ in range(n_critic):
                optimizer_D.zero_grad()
                
                # Real samples
                real_validity = discriminator(real_samples)
                
                # Fake samples
                z = torch.randn(batch_size_actual, latent_dim).to(device)
                fake_samples = generator(z).detach()
                fake_validity = discriminator(fake_samples)
                
                # Gradient penalty
                gp = compute_gradient_penalty(discriminator, real_samples, fake_samples, device)
                
                # Discriminator loss
                d_loss = -torch.mean(real_validity) + torch.mean(fake_validity) + lambda_gp * gp
                d_loss.backward()
                optimizer_D.step()
            
            # Train Generator
            if i % n_critic == 0:
                optimizer_G.zero_grad()
                
                z = torch.randn(batch_size_actual, latent_dim).to(device)
                fake_samples = generator(z)
                fake_validity = discriminator(fake_samples)
                
                g_loss = -torch.mean(fake_validity)
                g_loss.backward()
                optimizer_G.step()
                
                g_losses.append(g_loss.item())
            
            d_losses.append(d_loss.item())
        
        # Print progress
        if epoch % 10 == 0:
            avg_d_loss = np.mean(d_losses)
            avg_g_loss = np.mean(g_losses) if g_losses else 0
            print(f"Epoch [{epoch}/{n_epochs}], D Loss: {avg_d_loss:.4f}, G Loss: {avg_g_loss:.4f}")
    
    return generator, discriminator

# Use the Conv1D version
trained_generator, trained_discriminator = train_conv1d_wgan(normal_data, device)

Starting Conv1D WGAN training...


RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

# Generate and Combine

In [None]:
generator = Generator(latent_dim).to(device)
num_samples = len(data[label == 0])  # Number of synthetic samples to generate
batch_size = 64  # Generate in batches to avoid memory issues

generated_list = []
for i in range(0, num_samples, batch_size):
	current_batch = min(batch_size, num_samples - i)
	z = torch.randn(current_batch, latent_dim).to(device)
	with torch.no_grad():
		batch_samples = generator(z).cpu().numpy()
	# batch_samples shape: (current_batch, 14, 10, 15, 30)
	batch_samples = batch_samples.reshape(current_batch, 14, -1).transpose(0, 2, 1)  # (current_batch, 4500, 14)
	generated_list.append(batch_samples)

generated_samples = np.concatenate(generated_list, axis=0)  # (num_samples, 4500, 14)

combine_data_normal = np.concatenate((generated_samples, normal_data), axis=0)  # Combine real and generated data
combine_labels_normal = np.concatenate((np.zeros(num_samples), normal_label), axis=0)  # Labels: 0 for generated, 1 for real

# Processing: Mel Spec > Resizing > Feature Extraction

In [None]:
# Resize and convert to 3-channel image
def resize_spectrogram(spectrogram):
    spectrogram = (spectrogram - spectrogram.min()) / (spectrogram.max() - spectrogram.min() + 1e-6)
    spectrogram = np.uint8(spectrogram.cpu().numpy() * 255)
    spectrogram = np.stack([spectrogram] * 3, axis=-1)
    image = Image.fromarray(spectrogram)
    image = transforms.Resize((224, 224))(image)
    return transforms.ToTensor()(image)

# Process dataset
def process_dataset(data):
    num_samples, _, num_channels = data.shape
    features = np.zeros((num_samples, num_channels, 4096))
    mel_transform = torchaudio.transforms.MelSpectrogram(sample_rate=2500000, n_mels=128).to(device)
    model = vgg16(weights=VGG16_Weights.IMAGENET1K_V1).to(device)
    model.classifier = model.classifier[:-3]
    model.eval()

    for i in range(num_samples):
        for j in range(num_channels):
            ts = torch.tensor(data[i, :, j], dtype=torch.float32).to(device)
            mel = mel_transform(ts)
            img = resize_spectrogram(mel)
            with torch.no_grad():
                feat = model(img.unsqueeze(0).to(device))
            features[i, j, :] = feat.squeeze().cpu().numpy()
    return features

# AE Class

In [None]:
# Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_size=4096):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 64), 
            nn.Tanh(),
            nn.Linear(64, 32), 
            nn.Tanh(),
            nn.Linear(32, 16), 
            nn.Tanh(),
            nn.Linear(16, 8), 
            nn.Tanh(),
            nn.Linear(8, 4), 
            nn.Tanh()
        )
        self.decoder = nn.Sequential(
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 16), 
            nn.ReLU(),
            nn.Linear(16, 32), 
            nn.ReLU(),
            nn.Linear(32, 64), 
            nn.ReLU(),
            nn.Linear(64, input_size), 
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))


# Train autoencoder
def train_autoencoder(features, epochs=20, batch_size=128):
    x = torch.tensor(features.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader = DataLoader(TensorDataset(x), batch_size=batch_size, shuffle=True)
    model = Autoencoder().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.L1Loss()

    for epoch in range(epochs):
        total_loss = 0
        for batch in loader:
            inputs = batch[0]
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(loader):.6f}")
    return model

# Compute reconstruction errors
def compute_reconstruction_loss(model, data):
    model.eval()
    x = torch.tensor(data.reshape(-1, 4096), dtype=torch.float32).to(next(model.parameters()).device)
    loader = DataLoader(TensorDataset(x), batch_size=64)
    loss = []
    criterion = torch.nn.MSELoss(reduction='none')
    with torch.no_grad():
        for batch in loader:
            inputs = batch[0]
            outputs = model(inputs)
            batch_errors = criterion(outputs, inputs).mean(dim=1)
            loss.extend(batch_errors.cpu().numpy())
    return np.array(loss)

# 2. Find best threshold based on F1 score
def find_best_threshold(errors, labels):
    best_f1 = 0
    best_threshold = 0
    for threshold in np.linspace(min(errors), max(errors), 100):
        preds = (errors > threshold).astype(int)
        f1 = f1_score(labels, preds)
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold
    return best_threshold, best_f1

def evaluate_on_test_with_threshold_search(model, X_test, y_test):
    # Step 1: Flatten the test data
    n_samples, n_segments, n_features = X_test.shape
    X_test_flat = X_test.reshape(-1, n_features)

    # Step 2: Compute reconstruction errors per segment
    segment_errors = compute_reconstruction_loss(model, X_test_flat)

    # Step 3: Aggregate errors per sample (mean over segments)
    sample_errors = segment_errors.reshape(n_samples, n_segments).mean(axis=1)

    # Step 4: Find best threshold based on F1 score
    best_threshold = 0
    best_f1 = 0
    for threshold in np.linspace(sample_errors.min(), sample_errors.max(), 100):
        preds = (sample_errors > threshold).astype(int)
        f1 = f1_score(y_test, preds)
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold

    # Step 5: Predict using best threshold
    test_preds = (sample_errors > best_threshold).astype(int)

    # Step 6: Evaluate
    print(f"Best Threshold = {best_threshold:.6f}, Best F1 Score = {best_f1:.4f}")
    print("Evaluation on Test Set:")
    print("Accuracy =", accuracy_score(y_test, test_preds))
    print("Precision =", precision_score(y_test, test_preds))
    print("Recall =", recall_score(y_test, test_preds))
    print("F1 Score =", f1_score(y_test, test_preds))
    print("Confusion Matrix:\n", confusion_matrix(y_test, test_preds))



# Preprocessing

In [None]:
combine_data = np.concatenate((combine_data_normal, data[label == 1]), axis=0)  # Combine real and generated data
combine_label = np.concatenate((np.zeros(len(combine_labels_normal)), label[label == 1]), axis=0)  # Labels: 0 for real, 0 for generated

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scaled_data = StandardScaler().fit_transform(combine_data.reshape(-1, combine_data.shape[-1])).reshape(combine_data.shape)
features = process_dataset(scaled_data)
print("Features shape:", features.shape)

# Cross Validation with StandardScaler

In [None]:

for fold, (train_idx, val_idx) in enumerate(skf.split(features, combine_label)):
    
    print(f"Fold {fold + 1}")
    
    # Get full fold data
    fold_data, fold_labels = features[train_idx], combine_label[train_idx]
    val_data, val_labels = features[val_idx], combine_label[val_idx]
   
    # Split into training and validation folds
    X_train, X_temp, y_train, y_temp = train_test_split(fold_data, fold_labels, test_size=0.2, shuffle=True, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(val_data, val_labels, test_size=0.5, shuffle=True, random_state=42)

    # Train autoencoder on the training fold
    model = train_autoencoder(X_train[y_train == 0], epochs=15, batch_size=32)

    # Evaluate on validation fold
    val_errors_normal = compute_reconstruction_loss(model, X_val[y_val == 0])
    val_errors_abnormal = compute_reconstruction_loss(model, X_val[y_val == 1])
    val_errors = np.concatenate([val_errors_normal, val_errors_abnormal])
    y_val_combined = np.concatenate([np.zeros(len(val_errors_normal)), np.ones(len(val_errors_abnormal))])
    
    threshold, best_f1 = find_best_threshold(val_errors, y_val_combined)
    print(f"Best threshold: {threshold}, Best F1 Score: {best_f1}")

    # Plot histogram of reconstruction errors on both normal and abnormal samples
    plt.figure(figsize=(12, 6))
    plt.hist(val_errors_normal, bins=50, alpha=0.5, label='Normal Samples', color='blue')
    plt.hist(val_errors_abnormal, bins=50, alpha=0.5, label='Abnormal Samples', color='red')
    plt.axvline(threshold, color='black', linestyle='--', label='Threshold')
    plt.title('Reconstruction Errors on Validation Set')
    plt.xlabel('Reconstruction Error')
    plt.ylabel('Frequency')
    plt.legend()
    plt.show()

    

    val_errors_test = compute_reconstruction_loss(model, X_test)
    

    # Evaluate on test set
    evaluate_on_test_with_threshold_search(model, X_test, y_test)



# Observation:

Accuracy = 0.9166666666666666


Precision = 0.7142857142857143

Recall = 0.3125

F1 Score = 0.43478260869565216

Confusion Matrix:

 [[138   2]

 [ 11   5]]



