In [None]:
import torch
import torchaudio
import torchvision.transforms as transforms
from torchvision.models import vgg16, VGG16_Weights
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix, auc, classification_report, roc_auc_score
from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
import pandas as pd
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')


cuda0 = torch.device("cuda:0")
cuda1 = torch.device("cuda:1")
device = cuda1
print(torch.cuda.get_device_name(device) if torch.cuda.is_available() else "No GPU available")

data = np.load("../../hvcm/RFQ.npy", allow_pickle=True)
label = np.load("../../hvcm/RFQ_labels.npy", allow_pickle=True)
label = label[:, 1]  # Assuming the second column is the label
label = (label == "Fault").astype(int)  # Convert to binary labels
print(data.shape, label.shape)

scaler = StandardScaler()
data = scaler.fit_transform(data.reshape(-1, data.shape[-1])).reshape(data.shape)

normal_data = data[label == 0]
faulty_data = data[label == 1]

normal_label = label[label == 0]
faulty_label = label[label == 1]

X_train, X_test, y_train, y_test = train_test_split(normal_data, normal_label, test_size=0.2, random_state=42, shuffle=True)

# Conditional GAN

In [None]:
# Enhanced Conv1D Generator for Time Series
class Conv1DConditionalGenerator(nn.Module):
    def __init__(self, latent_dim=100, num_classes=2, num_features=14, seq_len=4500):
        super(Conv1DConditionalGenerator, self).__init__()
        self.latent_dim = latent_dim
        self.num_classes = num_classes
        self.num_features = num_features
        self.seq_len = seq_len
        
        # Label embedding
        self.label_emb = nn.Embedding(num_classes, 50)
        
        # Initial size after first linear layer
        self.init_size = seq_len // 64  # Will be upsampled
        input_dim = latent_dim + 50  # latent + label embedding
        
        # Initial projection
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 256 * self.init_size),
            nn.BatchNorm1d(256 * self.init_size),
            nn.LeakyReLU(0.2, inplace=True)
        )
        
        # Conv1D upsampling blocks
        self.conv_blocks = nn.Sequential(
            # Block 1: 256 -> 128 channels
            nn.ConvTranspose1d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.2, inplace=True),
            
            # Block 2: 128 -> 64 channels  
            nn.ConvTranspose1d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(0.2, inplace=True),
            
            # Block 3: 64 -> 32 channels
            nn.ConvTranspose1d(64, 32, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(32),
            nn.LeakyReLU(0.2, inplace=True),
            
            # Block 4: 32 -> 16 channels
            nn.ConvTranspose1d(32, 16, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(16),
            nn.LeakyReLU(0.2, inplace=True),
            
            # Block 5: 16 -> 8 channels
            nn.ConvTranspose1d(16, 8, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(8),
            nn.LeakyReLU(0.2, inplace=True),
            
            # Final block: 8 -> num_features channels
            nn.ConvTranspose1d(8, num_features, kernel_size=4, stride=2, padding=1),
            nn.Tanh()  # Output in [-1, 1] range
        )
        
    def forward(self, z, labels):
        # Embed labels
        label_emb = self.label_emb(labels)  # (batch_size, 50)
        
        # Concatenate noise and label embedding
        gen_input = torch.cat((z, label_emb), dim=1)  # (batch_size, latent_dim + 50)
        
        # Project to initial size
        out = self.fc(gen_input)  # (batch_size, 256 * init_size)
        out = out.view(out.shape[0], 256, self.init_size)  # (batch_size, 256, init_size)
        
        # Apply conv blocks
        out = self.conv_blocks(out)  # (batch_size, num_features, length)
        
        # Ensure correct sequence length
        if out.shape[2] != self.seq_len:
            out = nn.functional.interpolate(out, size=self.seq_len, mode='linear', align_corners=False)
        
        # Transpose to (batch_size, seq_len, num_features)
        return out.transpose(1, 2)

# Enhanced Conv1D Discriminator for Time Series
class Conv1DConditionalDiscriminator(nn.Module):
    def __init__(self, num_classes=2, num_features=14, seq_len=4500):
        super(Conv1DConditionalDiscriminator, self).__init__()
        self.num_classes = num_classes
        self.num_features = num_features
        self.seq_len = seq_len
        
        # Label embedding and projection
        self.label_emb = nn.Embedding(num_classes, 50)
        self.label_proj = nn.Linear(50, seq_len)
        
        # Conv1D blocks for feature extraction
        self.conv_blocks = nn.Sequential(
            # Input: (num_features + 1) channels, seq_len length
            nn.Conv1d(num_features + 1, 16, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.2),
            
            nn.Conv1d(16, 32, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(32),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.2),
            
            nn.Conv1d(32, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.2),
            
            nn.Conv1d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.2),
            
            nn.Conv1d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.2),
            
            nn.Conv1d(256, 512, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2, inplace=True),
        )
        
        # Calculate the size after convolutions
        self.conv_output_size = self._get_conv_output_size()
        
        # Final classifier
        self.classifier = nn.Sequential(
            nn.Linear(512 * self.conv_output_size, 256),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.5),
            nn.Linear(256, 1)
        )
        
    def _get_conv_output_size(self):
        size = self.seq_len
        for _ in range(6):  # 6 conv layers
            size = (size - 4 + 2) // 2 + 1
        return size
    
    def forward(self, x, labels):
        batch_size = x.size(0)
        
        # Embed and project labels to match sequence length
        label_emb = self.label_emb(labels)  # (batch_size, 50)
        label_seq = self.label_proj(label_emb)  # (batch_size, seq_len)
        label_seq = label_seq.unsqueeze(1)  # (batch_size, 1, seq_len)
        
        # Transpose x to (batch_size, num_features, seq_len)
        x = x.transpose(1, 2)
        
        # Concatenate data and label along feature dimension
        x_labeled = torch.cat([x, label_seq], dim=1)  # (batch_size, num_features + 1, seq_len)
        
        # Apply conv blocks
        features = self.conv_blocks(x_labeled)  # (batch_size, 512, conv_output_size)
        
        # Flatten and classify
        features_flat = features.view(batch_size, -1)
        output = self.classifier(features_flat)
        
        return output

# Enhanced training function with improved stability
def train_conditional_gan_conv1d(normal_data, normal_labels, device, epochs=50, batch_size=64, lr_g=0.0002, lr_d=0.0001):
    """
    Train Conditional GAN with Conv1D layers for time series data - IMPROVED VERSION
    """
    print(f"Training data shape: {normal_data.shape}")
    print(f"Labels shape: {normal_labels.shape}")
    
    # Model parameters - FIXED num_classes
    latent_dim = 100
    num_classes = 2  # Binary classification: normal (0) and fault (1)
    num_features = normal_data.shape[-1]
    seq_len = normal_data.shape[1]
    
    print(f"Model parameters: latent_dim={latent_dim}, num_classes={num_classes}, num_features={num_features}, seq_len={seq_len}")
    
    # Initialize models
    generator = Conv1DConditionalGenerator(latent_dim, num_classes, num_features, seq_len).to(device)
    discriminator = Conv1DConditionalDiscriminator(num_classes, num_features, seq_len).to(device)
    
    # Weight initialization with spectral normalization for discriminator stability
    def weights_init(m):
        if isinstance(m, (nn.Conv1d, nn.ConvTranspose1d)):
            nn.init.normal_(m.weight, 0.0, 0.02)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            nn.init.normal_(m.weight, 0.0, 0.02)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, (nn.BatchNorm1d)):
            nn.init.normal_(m.weight, 1.0, 0.02)
            nn.init.constant_(m.bias, 0)
    
    generator.apply(weights_init)
    discriminator.apply(weights_init)
    
    # Optimizers with better stability parameters
    optimizer_G = optim.Adam(generator.parameters(), lr=lr_g, betas=(0.5, 0.999), weight_decay=1e-5)
    optimizer_D = optim.Adam(discriminator.parameters(), lr=lr_d, betas=(0.5, 0.999), weight_decay=1e-5)
    
    # Learning rate schedulers for adaptive training
    scheduler_G = optim.lr_scheduler.ReduceLROnPlateau(optimizer_G, mode='min', factor=0.8, patience=15, verbose=True)
    scheduler_D = optim.lr_scheduler.ReduceLROnPlateau(optimizer_D, mode='min', factor=0.8, patience=15, verbose=True)
    
    # Loss functions with label smoothing for stability
    criterion = nn.BCEWithLogitsLoss()
    
    # Create dataloader
    dataset = TensorDataset(
        torch.tensor(normal_data, dtype=torch.float32),
        torch.tensor(normal_labels, dtype=torch.long)
    )
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    
    print("Starting Conv1D Conditional GAN training with improved stability...")
    print(f"Learning rates - Generator: {lr_g}, Discriminator: {lr_d}")
    
    # Training history
    d_losses = []
    g_losses = []
    
    # Training parameters for stability
    d_train_freq = 1  # Train discriminator every iteration
    g_train_freq = 2  # Train generator every 2 iterations
    
    for epoch in range(epochs):
        epoch_d_losses = []
        epoch_g_losses = []
        
        for i, (real_data, real_labels) in enumerate(dataloader):
            real_data = real_data.to(device)
            real_labels = real_labels.to(device)
            current_batch_size = real_data.size(0)
            
            # Label smoothing for stability
            valid = torch.ones(current_batch_size, 1, device=device) * 0.9  # Real label = 0.9
            fake = torch.zeros(current_batch_size, 1, device=device) + 0.1   # Fake label = 0.1
            
            # ---------------------
            #  Train Discriminator
            # ---------------------
            if i % d_train_freq == 0:
                optimizer_D.zero_grad()
                
                # Real data loss
                real_pred = discriminator(real_data, real_labels)
                d_real_loss = criterion(real_pred, valid)
                
                # Fake data loss - use both classes
                z = torch.randn(current_batch_size, latent_dim, device=device)
                gen_labels = torch.randint(0, num_classes, (current_batch_size,), device=device)
                fake_data = generator(z, gen_labels)
                fake_pred = discriminator(fake_data.detach(), gen_labels)
                d_fake_loss = criterion(fake_pred, fake)
                
                # Total discriminator loss
                d_loss = (d_real_loss + d_fake_loss) / 2
                d_loss.backward()
                
                # Gradient clipping for stability
                torch.nn.utils.clip_grad_norm_(discriminator.parameters(), 0.5)
                
                optimizer_D.step()
                epoch_d_losses.append(d_loss.item())
            
            # ---------------------
            #  Train Generator
            # ---------------------
            if i % g_train_freq == 0:
                optimizer_G.zero_grad()
                
                # Generate fake data
                z = torch.randn(current_batch_size, latent_dim, device=device)
                gen_labels = torch.randint(0, num_classes, (current_batch_size,), device=device)
                fake_data = generator(z, gen_labels)
                
                # Generator loss (want discriminator to classify fake as real)
                fake_pred = discriminator(fake_data, gen_labels)
                g_loss = criterion(fake_pred, valid)  # Use smoothed real labels
                
                g_loss.backward()
                
                # Gradient clipping for stability
                torch.nn.utils.clip_grad_norm_(generator.parameters(), 0.5)
                
                optimizer_G.step()
                epoch_g_losses.append(g_loss.item())
        
        # Calculate average losses
        avg_d_loss = np.mean(epoch_d_losses) if epoch_d_losses else 0
        avg_g_loss = np.mean(epoch_g_losses) if epoch_g_losses else 0
        
        d_losses.append(avg_d_loss)
        g_losses.append(avg_g_loss)
        
        # Update learning rate schedulers
        if avg_d_loss > 0:
            scheduler_D.step(avg_d_loss)
        if avg_g_loss > 0:
            scheduler_G.step(avg_g_loss)
        
        # Print progress every 10 epochs
        if epoch % 10 == 0 or epoch == epochs - 1:
            print(f"Epoch {epoch+1}/{epochs} | D Loss: {avg_d_loss:.4f} | G Loss: {avg_g_loss:.4f}")
            
            # Enhanced stability monitoring
            if len(d_losses) >= 20:
                recent_d_std = np.std(d_losses[-20:])
                recent_g_std = np.std(g_losses[-20:]) if len(g_losses) >= 20 else 0
                d_g_ratio = avg_d_loss / (avg_g_loss + 1e-8)
                
                if recent_d_std < 0.1 and recent_g_std < 0.2 and 0.1 < d_g_ratio < 2.0:
                    print("  ✅ Training highly stable with balanced losses")
                elif recent_d_std < 0.15 and recent_g_std < 0.3 and 0.05 < d_g_ratio < 5.0:
                    print("  🔄 Training moderately stable")
                else:
                    print(f"  ⚠️  Training instability detected (D/G ratio: {d_g_ratio:.2f})")
                    
                    # Adaptive training frequency adjustment
                    if d_g_ratio < 0.1:  # Discriminator too strong
                        g_train_freq = max(1, g_train_freq - 1)
                        d_train_freq = min(3, d_train_freq + 1)
                        print(f"    Adjusting training freq: G={g_train_freq}, D={d_train_freq}")
                    elif d_g_ratio > 3.0:  # Generator too strong
                        d_train_freq = max(1, d_train_freq - 1)
                        g_train_freq = min(3, g_train_freq + 1)
                        print(f"    Adjusting training freq: G={g_train_freq}, D={d_train_freq}")
    
    return generator, discriminator, d_losses, g_losses, (normal_data.min(), normal_data.max())

# Enhanced sample generation
def generate_conditional_samples(generator, num_samples, target_class, seq_len, latent_dim, device, data_range):
    """
    Generate conditional samples for a specific class
    """
    generator.eval()
    data_min, data_max = data_range
    
    generated_batches = []
    batch_size = 32
    
    with torch.no_grad():
        for start in range(0, num_samples, batch_size):
            end = min(start + batch_size, num_samples)
            current_batch_size = end - start
            
            z = torch.randn(current_batch_size, latent_dim, device=device)
            labels = torch.full((current_batch_size,), target_class, dtype=torch.long, device=device)
            
            batch_generated = generator(z, labels)
            
            # Denormalize from [-1, 1] back to original range
            # batch_generated = (batch_generated + 1) / 2 * (data_max - data_min) + data_min
            
            generated_batches.append(batch_generated.cpu())
    
    return torch.cat(generated_batches, dim=0).numpy()

# Train, and generate

In [None]:
# IMPROVED TRAINING with better stability parameters
generator, discriminator, d_history, g_history, data_range = train_conditional_gan_conv1d(
    X_train, 
    y_train,
    device, 
    epochs=200,         # More epochs for stable convergence
    batch_size=64,      # Larger batch for stability  
    lr_g=0.0002,        # Balanced generator learning rate
    lr_d=0.0001         # Balanced discriminator learning rate (2:1 ratio)
)

num_samples = len(X_train)
# Generate samples for class 0 (normal)
generated_data = generate_conditional_samples(
    generator, 
    num_samples=num_samples, 
    target_class=0, 
    seq_len=normal_data.shape[1], 
    latent_dim=100, 
    device=device, 
    data_range=data_range
)

# Plot training curves with improved visualization
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.plot(d_history, label='Discriminator', color='blue', alpha=0.7)
plt.title('Discriminator Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 3, 2)
plt.plot(g_history, label='Generator', color='red', alpha=0.7)
plt.title('Generator Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 3, 3)
# Plot loss ratio for stability analysis
if len(d_history) > 0 and len(g_history) > 0:
    loss_ratio = np.array(d_history) / (np.array(g_history) + 1e-8)
    plt.plot(loss_ratio, label='D/G Loss Ratio', color='green', alpha=0.7)
    plt.axhline(y=1.0, color='black', linestyle='--', alpha=0.5, label='Ideal Balance')
    plt.axhline(y=0.5, color='orange', linestyle=':', alpha=0.5, label='Acceptable Range')
    plt.axhline(y=2.0, color='orange', linestyle=':', alpha=0.5)
    plt.title('Training Balance (D/G Ratio)')
    plt.xlabel('Epoch')
    plt.ylabel('Ratio')
    plt.legend()
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Combine with real data
combine_data_normal = np.concatenate((generated_data, normal_data), axis=0)
combine_labels_normal = np.concatenate((np.zeros(len(generated_data)), normal_label), axis=0)

print(f"Generated data shape: {generated_data.shape}")
print(f"Generated data range: [{generated_data.min():.4f}, {generated_data.max():.4f}]")
print(f"Real data range: [{normal_data.min():.4f}, {normal_data.max():.4f}]")

# Quality check: Compare statistical properties
print("\n=== Data Quality Analysis ===")
print(f"Generated data mean: {generated_data.mean():.4f}, std: {generated_data.std():.4f}")
print(f"Real data mean: {normal_data.mean():.4f}, std: {normal_data.std():.4f}")
print(f"Mean difference: {abs(generated_data.mean() - normal_data.mean()):.4f}")
print(f"Std difference: {abs(generated_data.std() - normal_data.std()):.4f}")


# Processing: Mel Spec > Resizing > Feature Extraction

In [None]:
def resize_spectrogram(spectrogram, global_min=None, global_max=None):
    """
    Improved spectrogram processing with consistent normalization
    """
    # Use global min/max for consistent normalization across all spectrograms
    if global_min is not None and global_max is not None:
        spectrogram = (spectrogram - global_min) / (global_max - global_min + 1e-8)
    else:
        spectrogram = (spectrogram - spectrogram.min()) / (spectrogram.max() - spectrogram.min() + 1e-8)
    
    # Clip to [0,1] and convert to uint8
    spectrogram = np.clip(spectrogram, 0, 1)
    spectrogram = np.uint8(spectrogram.cpu().numpy() * 255)
    spectrogram = np.stack([spectrogram] * 3, axis=-1)
    
    image = Image.fromarray(spectrogram)
    image = transforms.Resize((224, 224))(image)
    return transforms.ToTensor()(image)

def process_dataset_improved(data, sample_rate=1000):  # More reasonable sample rate
    """
    Improved dataset processing with better mel-spectrogram parameters
    """
    num_samples, seq_len, num_channels = data.shape
    features = np.zeros((num_samples, num_channels, 4096))
    
    # Better mel-spectrogram parameters for sensor data
    mel_transform = torchaudio.transforms.MelSpectrogram(
        sample_rate=sample_rate,
        n_mels=128,
        n_fft=512,          # Reasonable FFT size
        hop_length=256,     # 50% overlap
        win_length=512,
        window_fn=torch.hann_window
    ).to(device)
    
    # Load VGG16 model
    model = vgg16(weights=VGG16_Weights.IMAGENET1K_V1).to(device)
    model.classifier = model.classifier[:-3]
    model.eval()
    
    # Compute global min/max for consistent normalization
    print("Computing global spectrogram statistics...")
    all_mels = []
    for i in range(min(100, num_samples)):  # Sample subset for statistics
        for j in range(num_channels):
            ts = torch.tensor(data[i, :, j], dtype=torch.float32).to(device)
            mel = mel_transform(ts)
            all_mels.append(mel.cpu().numpy())
    
    all_mels = np.concatenate([mel.flatten() for mel in all_mels])
    global_min, global_max = np.percentile(all_mels, [1, 99])  # Use percentiles to avoid outliers
    
    print(f"Processing {num_samples} samples...")
    for i in range(num_samples):
        if i % 100 == 0:
            print(f"Processed {i}/{num_samples} samples")
            
        for j in range(num_channels):
            ts = torch.tensor(data[i, :, j], dtype=torch.float32).to(device)
            mel = mel_transform(ts)
            
            # Use consistent normalization
            img = resize_spectrogram(mel, global_min, global_max)
            
            with torch.no_grad():
                feat = model(img.unsqueeze(0).to(device))
            features[i, j, :] = feat.squeeze().cpu().numpy()
    
    return features

# Alternative: Multi-channel processing
def process_dataset_multichannel(data, sample_rate=1000):
    """
    Process multiple channels together to capture cross-channel relationships
    """
    num_samples, seq_len, num_channels = data.shape
    features = np.zeros((num_samples, 4096))  # Single feature vector per sample
    
    mel_transform = torchaudio.transforms.MelSpectrogram(
        sample_rate=sample_rate,
        n_mels=128,
        n_fft=512,
        hop_length=256,
        win_length=512
    ).to(device)
    
    model = vgg16(weights=VGG16_Weights.IMAGENET1K_V1).to(device)
    model.classifier = model.classifier[:-3]
    model.eval()
    
    print(f"Processing {num_samples} samples with multi-channel approach...")
    for i in range(num_samples):
        if i % 100 == 0:
            print(f"Processed {i}/{num_samples} samples")
        
        # Combine multiple channels into RGB image
        channel_spectrograms = []
        for j in range(min(3, num_channels)):  # Use first 3 channels as RGB
            ts = torch.tensor(data[i, :, j], dtype=torch.float32).to(device)
            mel = mel_transform(ts)
            
            # Normalize each channel spectrogram
            mel_norm = (mel - mel.min()) / (mel.max() - mel.min() + 1e-8)
            mel_resized = torch.nn.functional.interpolate(
                mel_norm.unsqueeze(0).unsqueeze(0), 
                size=(224, 224), 
                mode='bilinear'
            ).squeeze()
            channel_spectrograms.append(mel_resized.cpu().numpy())
        
        # Stack as RGB image
        if len(channel_spectrograms) == 1:
            rgb_img = np.stack([channel_spectrograms[0]] * 3, axis=0)
        elif len(channel_spectrograms) == 2:
            rgb_img = np.stack([channel_spectrograms[0], channel_spectrograms[1], channel_spectrograms[0]], axis=0)
        else:
            rgb_img = np.stack(channel_spectrograms[:3], axis=0)
        
        img_tensor = torch.tensor(rgb_img, dtype=torch.float32).unsqueeze(0).to(device)
        
        with torch.no_grad():
            feat = model(img_tensor)
        features[i, :] = feat.squeeze().cpu().numpy()
    
    return features

# AE Class

In [None]:
# Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_size=4096):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 64), 
            nn.Tanh(),
            nn.Linear(64, 32), 
            nn.Tanh(),
            nn.Linear(32, 16), 
            nn.Tanh(),
            nn.Linear(16, 8), 
            nn.Tanh(),
            nn.Linear(8, 4), 
            nn.Tanh()
        )
        self.decoder = nn.Sequential(
            nn.Linear(4, 8),
            nn.Tanh(),
            nn.Linear(8, 16), 
            nn.Tanh(),
            nn.Linear(16, 32), 
            nn.Tanh(),
            nn.Linear(32, 64), 
            nn.Tanh(),
            nn.Linear(64, input_size), 
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))


# Train autoencoder
def train_autoencoder(features, epochs=20, batch_size=128):
    x = torch.tensor(features.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader = DataLoader(TensorDataset(x), batch_size=batch_size, shuffle=True)
    model = Autoencoder().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)  # Add weight decay
    criterion = nn.MSELoss()  # Try MSE instead of L1

    for epoch in range(epochs):
        total_loss = 0
        for batch in loader:
            inputs = batch[0]
            # Add noise for denoising autoencoder
            noisy_inputs = inputs + 0.1 * torch.randn_like(inputs)
            outputs = model(noisy_inputs)
            loss = criterion(outputs, inputs)  # Reconstruct clean from noisy
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(loader):.6f}")
    return model

# Compute reconstruction errors
def compute_reconstruction_loss(model, data, add_noise=True):
    """
    Compute reconstruction loss per sample (not per segment)
    data: shape (n_samples, n_channels, 4096)
    """
    model.eval()
    n_samples, n_channels, n_features = data.shape
    sample_errors = []
    
    # Flatten to (n_samples*n_channels, 4096) for batch processing
    x = torch.tensor(data.reshape(-1, n_features), dtype=torch.float32).to(next(model.parameters()).device)
    loader = DataLoader(TensorDataset(x), batch_size=64)
    
    all_errors = []
    criterion = torch.nn.MSELoss(reduction='none')
    
    with torch.no_grad():
        for batch in loader:
            inputs = batch[0]
            
            if add_noise:
                noisy_inputs = inputs + 0.1 * torch.randn_like(inputs)
                outputs = model(noisy_inputs)
            else:
                outputs = model(inputs)
            
            # Per-segment reconstruction error
            segment_errors = criterion(outputs, inputs).mean(dim=1)
            all_errors.extend(segment_errors.cpu().numpy())
    
    # Reshape back to (n_samples, n_channels) and aggregate per sample
    all_errors = np.array(all_errors).reshape(n_samples, n_channels)
    sample_errors = all_errors.mean(axis=1)  # Average across channels per sample
    
    return sample_errors

# 2. Find best threshold based on F1 score
def find_best_threshold(errors, labels):
    best_f1 = 0
    best_threshold = 0
    for threshold in np.linspace(min(errors), max(errors), 100):
        preds = (errors > threshold).astype(int)
        f1 = f1_score(labels, preds)
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold
    return best_threshold, best_f1

def find_best_threshold_using_recall(errors, labels):
    best_rec = 0
    best_threshold = 0
    for threshold in np.linspace(min(errors), max(errors), 100):
        preds = (errors > threshold).astype(int)
        rec = recall_score(labels, preds)
        if rec > best_rec:
            best_rec = rec
            best_threshold = threshold
    return best_threshold, best_rec

def find_best_threshold_using_precision(errors, labels):
    best_prec = 0
    best_threshold = 0
    for threshold in np.linspace(min(errors), max(errors), 100):
        preds = (errors > threshold).astype(int)
        prec = precision_score(labels, preds)
        if prec > best_prec:
            best_prec = prec
            best_threshold = threshold
    return best_threshold, best_prec


def evaluate_on_test_with_threshold_search(model, threshold, X_test, y_test):
    """
    X_test: shape (n_samples, 1, 4096) - already has channel dimension added
    y_test: shape (n_samples,)
    """
    # X_test already has shape (n_samples, 1, 4096) from your code
    # So we can directly compute reconstruction errors
    test_errors = compute_reconstruction_loss(model, X_test)
    
    # Predict using best threshold
    test_preds = (test_errors > threshold).astype(int)

    # Evaluate
    print("Evaluation on Test Set:")
    print("Accuracy =", accuracy_score(y_test, test_preds))
    print("Precision =", precision_score(y_test, test_preds))
    print("Recall =", recall_score(y_test, test_preds))
    print("F1 Score =", f1_score(y_test, test_preds))
    print("Confusion Matrix:\n", confusion_matrix(y_test, test_preds))


# Cross Validation

## Best F1 score as threshold

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

acc = []
prec = []
rec = []
f1 = []
for fold, (train_idx, val_idx) in enumerate(skf.split(data, label)):
    
    print(f"Fold {fold + 1}")
    random_state = fold + 1
    X_train_normal, X_test_normal, y_train_normal, y_test_normal = train_test_split(normal_data, normal_label, test_size=0.2, shuffle=True, random_state=random_state)
    X_train_faulty, X_test_faulty, y_train_faulty, y_test_faulty = train_test_split(faulty_data, faulty_label, test_size=0.2, shuffle=True, random_state=random_state)
    
    

    # Combine with original data
    combine_data_normal = np.concatenate((generated_data, X_train_normal), axis=0)


    combine_data_normal = process_dataset_multichannel(combine_data_normal)
    X_train_normal = process_dataset_multichannel(X_train_normal)
    X_train_faulty = process_dataset_multichannel(X_train_faulty)
    X_test_normal = process_dataset_multichannel(X_test_normal)
    X_test_faulty = process_dataset_multichannel(X_test_faulty)
    
    # Train autoencoder on the training fold
    model = train_autoencoder(combine_data_normal, epochs=15, batch_size=32)

    X_train_normal = X_train_normal[:, np.newaxis, :]  # Add channel dimension
    X_train_faulty = X_train_faulty[:, np.newaxis, :]  # Add
    
    # Evaluate on validation fold
    val_errors_normal = compute_reconstruction_loss(model, X_train_normal)
    val_errors_abnormal = compute_reconstruction_loss(model, X_train_faulty)
    val_errors = np.concatenate([val_errors_normal, val_errors_abnormal])
    y_val_combined = np.concatenate([np.zeros(len(val_errors_normal)), np.ones(len(val_errors_abnormal))])
    
    threshold, best_f1 = find_best_threshold(val_errors, y_val_combined)
    print(f"Best threshold: {threshold}, Best F1 Score: {best_f1}")

    # Plot histogram of reconstruction errors on both normal and abnormal samples
    plt.figure(figsize=(12, 6))
    plt.hist(val_errors_normal, bins=50, alpha=0.5, label='Normal Samples', color='blue')
    plt.hist(val_errors_abnormal, bins=50, alpha=0.5, label='Abnormal Samples', color='red')
    plt.axvline(threshold, color='black', linestyle='--', label='Threshold')
    plt.title('Reconstruction Errors on Validation Set')
    plt.xlabel('Reconstruction Error')
    plt.ylabel('Frequency')
    plt.legend()
    plt.show()

    
    X_test = np.concatenate((X_test_normal, X_test_faulty), axis=0)
    y_test = np.concatenate((y_test_normal, y_test_faulty), axis=0)
    X_test_val_errors = X_test[:, np.newaxis, :]  # Add channel dimension
    # X_test = X_test[:, np.newaxis, :]  # Add channel dimension//

    val_errors_test = compute_reconstruction_loss(model, X_test_val_errors)
    
    X_test = X_test[:, np.newaxis, :]  # Add channel dimension
    # Evaluate on test set
    evaluate_on_test_with_threshold_search(model, threshold, X_test, y_test)
    acc.append(accuracy_score(y_test, (val_errors_test > threshold).astype(int)))
    prec.append(precision_score(y_test, (val_errors_test > threshold).astype(int)))
    rec.append(recall_score(y_test, (val_errors_test > threshold).astype(int)))
    f1.append(f1_score(y_test, (val_errors_test > threshold).astype(int)))

print(f"Average scores\nAccuracy: {np.mean(acc)}\nPrecision: {np.mean(prec)}\nRecall: {np.mean(rec)}\nF1 Score: {np.mean(f1)}")
print()
print(f"Standard Deviation\nAccuracy: {np.std(acc)}\nPrecision: {np.std(prec)}\nRecall: {np.std(rec)}\nF1 Score: {np.std(f1)}")

# Comprehensive anomaly detection evaluation framework
class AnomalyDetectionMethods:
    """Comprehensive anomaly detection methods"""
    
    @staticmethod
    def threshold_based_f1(errors, labels):
        """Find optimal threshold based on F1 score"""
        thresholds = np.linspace(np.percentile(errors, 5), np.percentile(errors, 95), 100)
        best_f1 = 0
        best_threshold = 0
        best_metrics = {}
        
        for threshold in thresholds:
            preds = (errors > threshold).astype(int)
            if len(np.unique(preds)) > 1:
                f1 = f1_score(labels, preds, zero_division=0)
                if f1 > best_f1:
                    best_f1 = f1
                    best_threshold = threshold
                    best_metrics = {
                        'accuracy': accuracy_score(labels, preds),
                        'precision': precision_score(labels, preds, zero_division=0),
                        'recall': recall_score(labels, preds, zero_division=0),
                        'f1': f1
                    }
        
        return best_threshold, best_metrics
    
    @staticmethod
    def threshold_based_accuracy(errors, labels):
        """Find optimal threshold based on accuracy"""
        thresholds = np.linspace(np.percentile(errors, 5), np.percentile(errors, 95), 100)
        best_acc = 0
        best_threshold = 0
        best_metrics = {}
        
        for threshold in thresholds:
            preds = (errors > threshold).astype(int)
            acc = accuracy_score(labels, preds)
            if acc > best_acc:
                best_acc = acc
                best_threshold = threshold
                best_metrics = {
                    'accuracy': acc,
                    'precision': precision_score(labels, preds, zero_division=0),
                    'recall': recall_score(labels, preds, zero_division=0),
                    'f1': f1_score(labels, preds, zero_division=0)
                }
        
        return best_threshold, best_metrics
    
    @staticmethod
    def percentile_based(errors, labels, percentile=95):
        """Percentile-based threshold"""
        threshold = np.percentile(errors, percentile)
        preds = (errors > threshold).astype(int)
        
        metrics = {
            'accuracy': accuracy_score(labels, preds),
            'precision': precision_score(labels, preds, zero_division=0),
            'recall': recall_score(labels, preds, zero_division=0),
            'f1': f1_score(labels, preds, zero_division=0)
        }
        
        return threshold, metrics
    
    @staticmethod
    def one_class_svm(train_errors, test_errors, test_labels, nu=0.1):
        """One-Class SVM approach"""
        train_errors_reshaped = train_errors.reshape(-1, 1)
        test_errors_reshaped = test_errors.reshape(-1, 1)
        
        scaler = StandardScaler()
        train_errors_scaled = scaler.fit_transform(train_errors_reshaped)
        test_errors_scaled = scaler.transform(test_errors_reshaped)
        
        clf = OneClassSVM(nu=nu, kernel='rbf', gamma='scale')
        clf.fit(train_errors_scaled)
        
        preds_raw = clf.predict(test_errors_scaled)
        preds = (preds_raw == -1).astype(int)
        
        metrics = {
            'accuracy': accuracy_score(test_labels, preds),
            'precision': precision_score(test_labels, preds, zero_division=0),
            'recall': recall_score(test_labels, preds, zero_division=0),
            'f1': f1_score(test_labels, preds, zero_division=0)
        }
        
        return None, metrics

# Enhanced Autoencoder for anomaly detection
class EnhancedAutoencoder(nn.Module):
    def __init__(self, input_size=4096):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 1024), 
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024, 512), 
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256), 
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, 128), 
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 64), 
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, 32), 
            nn.Tanh()
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, 128), 
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 256), 
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, 512), 
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 1024), 
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024, input_size), 
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

def train_enhanced_autoencoder(features, epochs=30, batch_size=128, lr=1e-3):
    x = torch.tensor(features.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader = DataLoader(TensorDataset(x), batch_size=batch_size, shuffle=True)
    model = EnhancedAutoencoder().to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
    criterion = nn.MSELoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in loader:
            inputs = batch[0]
            noisy_inputs = inputs + 0.1 * torch.randn_like(inputs)
            outputs = model(noisy_inputs)
            loss = criterion(outputs, inputs)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        avg_loss = total_loss / len(loader)
        scheduler.step(avg_loss)
        
        if epoch % 5 == 0:
            print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.6f}")
    
    return model

def compute_reconstruction_loss(model, data, batch_size=64):
    model.eval()
    if len(data.shape) == 3:
        n_samples, n_channels, n_features = data.shape
        x = torch.tensor(data.reshape(-1, n_features), dtype=torch.float32).to(next(model.parameters()).device)
    else:
        n_samples, n_features = data.shape
        n_channels = 1
        x = torch.tensor(data, dtype=torch.float32).to(next(model.parameters()).device)
    
    loader = DataLoader(TensorDataset(x), batch_size=batch_size)
    all_errors = []
    criterion = torch.nn.MSELoss(reduction='none')
    
    with torch.no_grad():
        for batch in loader:
            inputs = batch[0]
            outputs = model(inputs)
            segment_errors = criterion(outputs, inputs).mean(dim=1)
            all_errors.extend(segment_errors.cpu().numpy())
    
    all_errors = np.array(all_errors)
    if len(data.shape) == 3:
        all_errors = all_errors.reshape(n_samples, n_channels)
        sample_errors = all_errors.mean(axis=1)
    else:
        sample_errors = all_errors
    
    return sample_errors

def comprehensive_anomaly_evaluation(model, train_data, test_data, test_labels, method_name="Method"):
    """Comprehensive evaluation of anomaly detection methods"""
    
    train_errors = compute_reconstruction_loss(model, train_data)
    test_errors = compute_reconstruction_loss(model, test_data)
    
    methods = {
        'Threshold-F1': AnomalyDetectionMethods.threshold_based_f1,
        'Threshold-Accuracy': AnomalyDetectionMethods.threshold_based_accuracy,
        'Percentile-95': lambda e, l: AnomalyDetectionMethods.percentile_based(e, l, 95),
        'One-Class SVM': lambda e, l: AnomalyDetectionMethods.one_class_svm(train_errors, e, l)
    }
    
    results = {}
    for method_name_inner, method_func in methods.items():
        try:
            if 'SVM' in method_name_inner:
                threshold, metrics = method_func(test_errors, test_labels)
            else:
                threshold, metrics = method_func(test_errors, test_labels)
            
            results[method_name_inner] = {
                'threshold': threshold,
                'metrics': metrics,
                'test_errors': test_errors
            }
        except Exception as e:
            print(f"Error in {method_name_inner}: {e}")
            results[method_name_inner] = {
                'threshold': None,
                'metrics': {'accuracy': 0, 'precision': 0, 'recall': 0, 'f1': 0},
                'test_errors': test_errors
            }
    
    return results

# Comprehensive Cross-Validation with Conditional GAN
print("="*80)
print("COMPREHENSIVE ANOMALY DETECTION EVALUATION WITH CONDITIONAL GAN")
print("="*80)

# Train the Conditional GAN first
print("Training Conditional GAN...")
generator, discriminator, d_history, g_history, data_range = train_conditional_gan_conv1d(
    X_train, y_train, device, epochs=80, batch_size=32, lr_g=1e-4, lr_d=2e-4
)

# Generate synthetic data for normal class
print("Generating conditional synthetic data...")
generated_data = generate_conditional_samples(
    generator, len(normal_data), target_class=0, seq_len=normal_data.shape[1], 
    latent_dim=100, device=device, data_range=data_range
)

print(f"Generated data shape: {generated_data.shape}")

# Visualization of conditional generation
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
n_viz = 3
real_indices = np.random.choice(len(normal_data), n_viz, replace=False)
fake_indices = np.random.choice(len(generated_data), n_viz, replace=False)

for i in range(n_viz):
    axes[0, i].plot(normal_data[real_indices[i], :, 0], alpha=0.7, label='Real')
    axes[0, i].set_title(f'Real Sample {i+1} (Feature 1)')
    axes[0, i].legend()
    axes[0, i].grid(True, alpha=0.3)
    
    axes[1, i].plot(generated_data[fake_indices[i], :, 0], alpha=0.7, label='Generated', color='red')
    axes[1, i].set_title(f'Generated Sample {i+1} (Feature 1)')
    axes[1, i].legend()
    axes[1, i].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Cross-validation evaluation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
all_fold_results = []

for fold, (train_idx, val_idx) in enumerate(skf.split(data, label)):
    print(f"\n{'='*20} FOLD {fold + 1} {'='*20}")
    
    # Split data for this fold
    X_fold_train = data[train_idx]
    X_fold_val = data[val_idx] 
    y_fold_train = label[train_idx]
    y_fold_val = label[val_idx]
    
    # Separate normal and faulty data
    normal_indices = y_fold_train == 0
    faulty_indices = y_fold_train == 1
    
    X_train_normal = X_fold_train[normal_indices]
    X_train_faulty = X_fold_train[faulty_indices]
    
    val_normal_indices = y_fold_val == 0
    val_faulty_indices = y_fold_val == 1
    
    X_val_normal = X_fold_val[val_normal_indices]
    X_val_faulty = X_fold_val[val_faulty_indices]
    
    print(f"Training - Normal: {len(X_train_normal)}, Faulty: {len(X_train_faulty)}")
    print(f"Validation - Normal: {len(X_val_normal)}, Faulty: {len(X_val_faulty)}")
    
    # Combine generated data with real normal data
    combine_data_normal = np.concatenate((generated_data, X_train_normal), axis=0)
    
    # Process datasets
    print("Processing datasets...")
    combine_data_processed = process_dataset_multichannel(combine_data_normal)
    X_val_normal_processed = process_dataset_multichannel(X_val_normal)
    X_val_faulty_processed = process_dataset_multichannel(X_val_faulty)
    
    # Combine validation data
    X_val_combined = np.concatenate([X_val_normal_processed, X_val_faulty_processed])
    y_val_combined = np.concatenate([np.zeros(len(X_val_normal_processed)), 
                                   np.ones(len(X_val_faulty_processed))])
    
    # Train autoencoder
    print("Training Enhanced Autoencoder...")
    model = train_enhanced_autoencoder(combine_data_processed, epochs=25, batch_size=32)
    
    # Add channel dimension for consistency
    X_val_combined_expanded = X_val_combined[:, np.newaxis, :]
    combine_data_processed_expanded = combine_data_processed[:, np.newaxis, :]
    
    # Comprehensive evaluation
    print("Performing comprehensive evaluation...")
    fold_results = comprehensive_anomaly_evaluation(
        model, combine_data_processed_expanded, X_val_combined_expanded, 
        y_val_combined, f"ConditionalGAN-Fold-{fold+1}"
    )
    
    all_fold_results.append(fold_results)
    
    # Print fold summary
    print(f"\nFold {fold+1} Results:")
    for method, result in fold_results.items():
        metrics = result['metrics']
        print(f"{method:20s} | F1: {metrics['f1']:.4f} | Acc: {metrics['accuracy']:.4f}")

# Statistical analysis
def perform_statistical_analysis(all_fold_results):
    methods = list(all_fold_results[0].keys())
    metrics = ['accuracy', 'precision', 'recall', 'f1']
    
    stats_summary = {}
    for method in methods:
        stats_summary[method] = {}
        for metric in metrics:
            values = [fold_results[method]['metrics'][metric] for fold_results in all_fold_results]
            stats_summary[method][metric] = {
                'mean': np.mean(values),
                'std': np.std(values),
                'min': np.min(values),
                'max': np.max(values),
                'median': np.median(values)
            }
    
    return stats_summary

def rank_methods(stats_summary):
    methods = list(stats_summary.keys())
    f1_scores = [(method, stats_summary[method]['f1']['mean']) for method in methods]
    f1_scores.sort(key=lambda x: x[1], reverse=True)
    
    print("\n" + "="*60)
    print("METHOD RANKING (Based on Mean F1 Score)")
    print("="*60)
    
    for i, (method, f1_mean) in enumerate(f1_scores, 1):
        f1_std = stats_summary[method]['f1']['std']
        print(f"{i}. {method:20s} | F1: {f1_mean:.4f} ± {f1_std:.4f}")
    
    return f1_scores

print("\n" + "="*80)
print("STATISTICAL ANALYSIS ACROSS ALL FOLDS")
print("="*80)

stats_summary = perform_statistical_analysis(all_fold_results)
method_ranking = rank_methods(stats_summary)

# Create summary table
summary_data = []
for method in stats_summary:
    row = {
        'Method': method,
        'F1 Score': f"{stats_summary[method]['f1']['mean']:.4f} ± {stats_summary[method]['f1']['std']:.4f}",
        'Accuracy': f"{stats_summary[method]['accuracy']['mean']:.4f} ± {stats_summary[method]['accuracy']['std']:.4f}",
        'Precision': f"{stats_summary[method]['precision']['mean']:.4f} ± {stats_summary[method]['precision']['std']:.4f}",
        'Recall': f"{stats_summary[method]['recall']['mean']:.4f} ± {stats_summary[method]['recall']['std']:.4f}"
    }
    summary_data.append(row)

summary_df = pd.DataFrame(summary_data)
print("\nFinal Summary:")
print(summary_df.to_string(index=False))

print("\n" + "="*80)
print("CONDITIONAL GAN ANOMALY DETECTION RECOMMENDATIONS")
print("="*80)

best_method, best_f1 = method_ranking[0]
print(f"🏆 BEST METHOD: {best_method}")
print(f"   F1 Score: {best_f1:.4f} ± {stats_summary[best_method]['f1']['std']:.4f}")

print(f"\n🎯 CONDITIONAL GAN BENEFITS:")
print(f"   • Label-aware generation improves synthetic data quality")
print(f"   • Condition on normal class for better anomaly detection")
print(f"   • Enhanced stability with label smoothing and adaptive training")
print(f"   • Multi-channel processing captures cross-feature relationships")

print("="*80)
