In [1]:
import torch, torchaudio, torchvision.transforms as transforms, matplotlib.pyplot as plt, torch.nn as nn, torch.optim as optim, numpy as np
from torchvision.models import vgg16, VGG16_Weights
from torch.utils.data import DataLoader, TensorDataset
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import  StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix, auc, classification_report, roc_auc_score
from torch.autograd import grad

cuda0 = torch.device("cuda:0")
cuda1 = torch.device("cuda:1")
device = cuda0
print(torch.cuda.get_device_name(device) if torch.cuda.is_available() else "No GPU available")

data = np.load("../../hvcm/RFQ.npy", allow_pickle=True)
label = np.load("../../hvcm/RFQ_labels.npy", allow_pickle=True)
label = label[:, 1]  # Assuming the second column is the label
label = (label == "Fault").astype(int)  # Convert to binary labels
print(data.shape, label.shape)

normal_indices = np.where(label == 0)

NVIDIA A100-PCIE-40GB
(872, 4500, 14) (872,)


# Time GAN

## Model Components 

In [2]:
class Embedder(nn.Module):
    """Embedding network between original feature space and latent space."""
    def __init__(self, input_dim, hidden_dim, num_layers):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # Architecture
        self.rnn = nn.GRU(
            input_size=input_dim, 
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True
        )
    
    def forward(self, X):
        """Forward pass for embedding.
        
        Args:
            X: input time series features, shape [batch_size, seq_len, input_dim]
            
        Returns:
            H: latent representation, shape [batch_size, seq_len, hidden_dim]
        """
        H, _ = self.rnn(X)
        return H

class Recovery(nn.Module):
    """Recovery network from latent space to original space."""
    def __init__(self, hidden_dim, output_dim, num_layers):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers
        
        # Architecture
        self.rnn = nn.GRU(
            input_size=hidden_dim, 
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True
        )
        self.output_layer = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, H):
        """Forward pass for recovery.
        
        Args:
            H: latent representation, shape [batch_size, seq_len, hidden_dim]
            
        Returns:
            X_tilde: recovered data, shape [batch_size, seq_len, output_dim]
        """
        H_tilde, _ = self.rnn(H)
        X_tilde = self.output_layer(H_tilde)
        return X_tilde

class Generator(nn.Module):
    """Generator network for generating synthetic data."""
    def __init__(self, z_dim, hidden_dim, num_layers):
        super().__init__()
        self.z_dim = z_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # Architecture
        self.rnn = nn.GRU(
            input_size=z_dim, 
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True
        )
    
    def forward(self, Z):
        """Forward pass for generator.
        
        Args:
            Z: random noise, shape [batch_size, seq_len, z_dim]
            
        Returns:
            H_hat: generated latent data, shape [batch_size, seq_len, hidden_dim]
        """
        H_hat, _ = self.rnn(Z)
        return H_hat

class Supervisor(nn.Module):
    """Supervisor network for predicting next sequence step."""
    def __init__(self, hidden_dim, num_layers):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # Architecture
        self.rnn = nn.GRU(
            input_size=hidden_dim, 
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True
        )
    
    def forward(self, H):
        """Forward pass for supervisor.
        
        Args:
            H: latent representation, shape [batch_size, seq_len, hidden_dim]
            
        Returns:
            H_hat_supervise: predicted next step, shape [batch_size, seq_len, hidden_dim]
        """
        H_hat_supervise, _ = self.rnn(H)
        return H_hat_supervise

class Discriminator(nn.Module):
    """Discriminator network between real and synthetic data."""
    def __init__(self, hidden_dim, num_layers):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # Architecture
        self.rnn = nn.GRU(
            input_size=hidden_dim, 
            hidden_size=hidden_dim,
            num_layers=num_layers-1,
            batch_first=True
        )
        self.linear = nn.Linear(hidden_dim, 1)
    
    def forward(self, H):
        """Forward pass for discriminator.
        
        Args:
            H: latent representation, shape [batch_size, seq_len, hidden_dim]
            
        Returns:
            Y: discrimination result, shape [batch_size, seq_len, 1]
        """
        features, _ = self.rnn(H)
        Y = self.linear(features)
        return Y

In [3]:
# Add this chunking function after your imports
def chunk_sequences(data, chunk_size=100, overlap=10):
    """
    Split long sequences into smaller chunks
    
    Args:
        data: shape [n_samples, seq_len, features] = (690, 4500, 14)
        chunk_size: size of each chunk
        overlap: overlap between chunks
    
    Returns:
        chunked_data: shape [n_chunks, chunk_size, features]
    """
    n_samples, seq_len, n_features = data.shape
    chunks = []
    
    for sample in data:
        # Create chunks with overlap
        for start in range(0, seq_len - chunk_size + 1, chunk_size - overlap):
            end = start + chunk_size
            if end <= seq_len:
                chunks.append(sample[start:end])
    
    return np.array(chunks)

# Update your loss functions to be more stable
def embedding_loss(X, X_tilde):
    """
    Robust reconstruction loss using relative error
    """
    # Use relative L1 loss to handle large values
    return torch.mean(torch.abs(X - X_tilde) / (torch.abs(X) + 1e-6))


def supervised_loss(H, H_hat_supervise):
    """
    Supervised loss for the supervisor network - with safety check
    """
    if H.size(1) > 1:
        return torch.mean(torch.abs(H[:, 1:, :] - H_hat_supervise[:, :-1, :]))
    return torch.tensor(0.0, device=H.device)

def discriminator_loss(Y_real, Y_fake):
    """
    Discriminator loss using BCE with logits for stability
    """
    criterion = nn.BCEWithLogitsLoss()
    real_loss = criterion(Y_real, torch.ones_like(Y_real))
    fake_loss = criterion(Y_fake, torch.zeros_like(Y_fake))
    return real_loss + fake_loss

def generator_loss(Y_fake, H, H_hat_supervise, X, X_hat, lambda_sup=1.0, lambda_recon=0.01):
    """
    Generator loss with MUCH lower reconstruction weight for raw data
    """
    criterion = nn.BCEWithLogitsLoss()
    
    # Adversarial loss
    loss_adv = criterion(Y_fake, torch.ones_like(Y_fake))
    
    # Supervised loss
    loss_sup = supervised_loss(H, H_hat_supervise)
    
    # Relative reconstruction loss (VERY low weight for raw data)
    loss_recon = torch.mean(torch.abs(X - X_hat) / (torch.abs(X) + 1e-6))
    
    # CRITICAL: Much lower reconstruction weight for raw data
    total_loss = loss_adv + lambda_sup * loss_sup + lambda_recon * loss_recon
    return total_loss

In [4]:
# Updated training function with chunking support
# Updated training function WITHOUT normalization
def train_timegan(data, seq_len, batch_size, model_params, train_params):
    """
    Train TimeGAN model with chunked sequences (no normalization)
    """
    # Chunk the sequences directly without normalization
    chunk_size = seq_len  # Use the provided seq_len as chunk size
    print(f"Chunking sequences into size {chunk_size}...")
    chunked_data = chunk_sequences(data, chunk_size=chunk_size, overlap=10)
    print(f"Created {len(chunked_data)} chunks from {len(data)} original sequences")
    
    # Model parameters
    input_dim = model_params['input_dim']
    hidden_dim = model_params['hidden_dim']
    num_layers = model_params['num_layers']
    z_dim = model_params['z_dim']
    
    # Training parameters
    epochs = train_params['epochs']
    learning_rate = train_params['learning_rate']
    
    # Create dataset and loader
    data_tensor = torch.tensor(chunked_data, dtype=torch.float32)
    dataset = TensorDataset(data_tensor)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    # Initialize models
    embedder = Embedder(input_dim, hidden_dim, num_layers).to(device)
    recovery = Recovery(hidden_dim, input_dim, num_layers).to(device)
    generator = Generator(z_dim, hidden_dim, num_layers).to(device)
    supervisor = Supervisor(hidden_dim, num_layers).to(device)
    discriminator = Discriminator(hidden_dim, num_layers).to(device)
    
    # Initialize optimizers with different learning rates
    e_optimizer = optim.Adam(list(embedder.parameters()) + list(recovery.parameters()), lr=learning_rate)
    g_optimizer = optim.Adam(list(generator.parameters()) + list(supervisor.parameters()), lr=learning_rate)
    d_optimizer = optim.Adam(discriminator.parameters(), lr=learning_rate * 0.1)  # Slower discriminator
    
    print('Start training TimeGAN...')
    
    for epoch in range(epochs):
        step_e_loss = 0
        step_g_loss = 0
        step_d_loss = 0
        
        for batch_idx, (X_mb,) in enumerate(dataloader):
            X_mb = X_mb.to(device)
            batch_size_actual = X_mb.shape[0]
            
            # Phase 1: Embedding network training (every iteration)
            embedder.train()
            recovery.train()
            
            H = embedder(X_mb)
            X_tilde = recovery(H)
            
            e_loss = embedding_loss(X_mb, X_tilde)
            
            e_optimizer.zero_grad()
            e_loss.backward()
            torch.nn.utils.clip_grad_norm_(list(embedder.parameters()) + list(recovery.parameters()), 1.0)
            e_optimizer.step()
            
            step_e_loss += e_loss.item()
            
            # Phase 2: Train generator and discriminator (every few iterations)
            if batch_idx % 2 == 0:  # Train G and D every 2 iterations
                # Generator training
                generator.train()
                supervisor.train()
                
                Z_mb = torch.randn(batch_size_actual, seq_len, z_dim).to(device)
                H_hat = generator(Z_mb)
                H_hat_supervise = supervisor(H_hat)
                X_hat = recovery(H_hat)
                
                # Get embeddings from real data
                with torch.no_grad():
                    H_real = embedder(X_mb)
                
                # Discriminator outputs
                Y_fake = discriminator(H_hat)
                
                # Generator loss
                g_loss = generator_loss(Y_fake, H_real, H_hat_supervise, X_mb, X_hat)
                
                g_optimizer.zero_grad()
                g_loss.backward()
                torch.nn.utils.clip_grad_norm_(list(generator.parameters()) + list(supervisor.parameters()), 1.0)
                g_optimizer.step()
                
                step_g_loss += g_loss.item()
                
                # Discriminator training
                discriminator.train()
                
                # Generate new samples for discriminator
                Z_mb_d = torch.randn(batch_size_actual, seq_len, z_dim).to(device)
                with torch.no_grad():
                    H_hat_d = generator(Z_mb_d)
                    H_real_d = embedder(X_mb)
                
                Y_fake_d = discriminator(H_hat_d)
                Y_real_d = discriminator(H_real_d)
                
                d_loss = discriminator_loss(Y_real_d, Y_fake_d)
                
                d_optimizer.zero_grad()
                d_loss.backward()
                torch.nn.utils.clip_grad_norm_(discriminator.parameters(), 1.0)
                d_optimizer.step()
                
                step_d_loss += d_loss.item()
        
        # Print progress with reasonable scaling
        num_batches = len(dataloader)
        avg_e_loss = step_e_loss / num_batches
        avg_g_loss = step_g_loss / (num_batches // 2) if num_batches > 2 else step_g_loss / max(1, num_batches)
        avg_d_loss = step_d_loss / (num_batches // 2) if num_batches > 2 else step_d_loss / max(1, num_batches)
        
        print(f'Epoch {epoch+1}/{epochs}:')
        print(f'  Embedding loss: {avg_e_loss:.4f}')
        print(f'  Generator loss: {avg_g_loss:.4f}')
        print(f'  Discriminator loss: {avg_d_loss:.4f}')
    
    return {
        'embedder': embedder,
        'recovery': recovery,
        'generator': generator,
        'supervisor': supervisor,
        'discriminator': discriminator,
        'chunk_size': chunk_size,
        'original_seq_len': data.shape[1]
    }

# Updated generation function for chunks
def generate_timegan_samples(model, n_samples, seq_len, z_dim):
    """
    Generate synthetic samples and denormalize them
    """
    generator = model['generator']
    supervisor = model['supervisor']
    recovery = model['recovery']
    
    # Get normalization parameters
    data_min = model['data_min']
    data_max = model['data_max']
    data_range = model['data_range']
    
    # Generate random noise
    Z = torch.randn(n_samples, seq_len, z_dim).to(device)
    
    # Generate synthetic data
    with torch.no_grad():
        generator.eval()
        supervisor.eval()
        recovery.eval()
        
        H_hat = generator(Z)
        H_hat = supervisor(H_hat)
        X_hat = recovery(H_hat)
    
    # Denormalize generated data
    X_hat_denorm = X_hat.cpu().numpy() * data_range + data_min
    
    return X_hat_denorm

def reconstruct_full_sequences(chunks, original_length=4500, chunk_size=100, overlap=10):
    """
    Reconstruct full sequences from generated chunks
    """
    step_size = chunk_size - overlap
    chunks_needed = (original_length - overlap) // step_size
    
    n_full_sequences = len(chunks) // chunks_needed
    full_sequences = []
    
    for i in range(n_full_sequences):
        start_idx = i * chunks_needed
        end_idx = start_idx + chunks_needed
        sequence_chunks = chunks[start_idx:end_idx]
        
        # Reconstruct by overlapping chunks
        reconstructed = np.zeros((original_length, sequence_chunks.shape[2]))
        pos = 0
        
        for j, chunk in enumerate(sequence_chunks):
            if j == 0:
                reconstructed[pos:pos + chunk_size] = chunk
                pos += step_size
            else:
                # Average overlapping regions
                overlap_start = pos
                overlap_end = pos + overlap
                if overlap_end <= original_length:
                    reconstructed[overlap_start:overlap_end] = (
                        reconstructed[overlap_start:overlap_end] + chunk[:overlap]
                    ) / 2
                    reconstructed[pos + overlap:pos + chunk_size] = chunk[overlap:]
                    pos += step_size
        
        full_sequences.append(reconstructed)
    
    return np.array(full_sequences)

In [5]:
# Update your model instantiation code - NO NORMALIZATION
# Model parameters - updated for chunked approach
chunk_size = 100  # Much more manageable than 4500
input_dim = data.shape[2]  # 14 features
hidden_dim = 32  # Reduced for stability
num_layers = 2   # Fewer layers
z_dim = input_dim
seq_len = chunk_size  # Use chunk size as sequence length
batch_size = 64  # Larger batch size for chunks

model_params = {
    'input_dim': input_dim,
    'hidden_dim': hidden_dim,
    'num_layers': num_layers,
    'z_dim': z_dim
}

train_params = {
    'epochs': 50,  # Fewer epochs initially
    'learning_rate': 0.0001  # Higher learning rate
}

# Convert data to tensor and create a subset for training (NO NORMALIZATION)
normal_data = data[normal_indices]
print(f"Original data shape: {normal_data.shape}")
print(f"Training on {len(normal_data)} normal samples")

# Train the TimeGAN model on RAW data
trained_model = train_timegan(normal_data, seq_len, batch_size, model_params, train_params)

# Generate synthetic chunks (RAW data)
n_synthetic_chunks = 1000
synthetic_chunks = generate_timegan_samples(trained_model, n_synthetic_chunks, seq_len, z_dim)
print(f"Generated {synthetic_chunks.shape} synthetic chunks")

# Reconstruct some full sequences (optional)
n_full_sequences = 100
synthetic_full = reconstruct_full_sequences(
    synthetic_chunks[:n_full_sequences * 45],  # 45 chunks per full sequence (4500/100)
    original_length=4500,
    chunk_size=chunk_size
)
print(f"Reconstructed {synthetic_full.shape} full synthetic sequences")

# For downstream tasks, you can use either chunks or reconstructed sequences
# Option 1: Use chunks directly
combine_data_chunks = np.concatenate((synthetic_chunks, chunk_sequences(data, chunk_size)), axis=0)
combine_labels_chunks = np.concatenate((
    np.zeros(len(synthetic_chunks)), 
    np.repeat(label, (data.shape[1] - 10) // (chunk_size - 10))  # Repeat labels for chunks
), axis=0)

# Option 2: Use reconstructed full sequences
combine_data = np.concatenate((synthetic_full, data), axis=0)
combine_labels = np.concatenate((np.zeros(len(synthetic_full)), label), axis=0)

print(f"Combined data shape: {combine_data.shape}")
print(f"Combined labels shape: {combine_labels.shape}")

Original data shape: (690, 4500, 14)
Training on 690 normal samples
Chunking sequences into size 100...
Created 33810 chunks from 690 original sequences
Start training TimeGAN...
Epoch 1/50:
  Embedding loss: 1.0047
  Generator loss: 0.8224
  Discriminator loss: 1.5493
Epoch 2/50:
  Embedding loss: 0.9926
  Generator loss: 0.4573
  Discriminator loss: 1.8277
Epoch 3/50:
  Embedding loss: 0.9903
  Generator loss: 0.4860
  Discriminator loss: 1.7540
Epoch 4/50:
  Embedding loss: 0.9896
  Generator loss: 0.5283
  Discriminator loss: 1.6700
Epoch 5/50:
  Embedding loss: 0.9888
  Generator loss: 0.5580
  Discriminator loss: 1.6215
Epoch 6/50:
  Embedding loss: 0.9877
  Generator loss: 0.6145
  Discriminator loss: 1.5375
Epoch 7/50:
  Embedding loss: 0.9865
  Generator loss: 0.6712
  Discriminator loss: 1.4679
Epoch 8/50:
  Embedding loss: 0.9862
  Generator loss: 0.7349
  Discriminator loss: 1.3973
Epoch 9/50:
  Embedding loss: 0.9816
  Generator loss: 0.7552
  Discriminator loss: 1.3692
Ep

KeyError: 'data_min'









## Key Changes Made:

1. **Added chunking functionality** that splits your 4500-length sequences into manageable 100-length chunks
2. **Updated loss functions** for better stability (L1 loss, BCE with logits)
3. **Added data normalization** per channel
4. **Reduced model complexity** (32 hidden units, 2 layers)
5. **Added gradient clipping** to prevent exploding gradients
6. **Different training frequency** for G/D vs embedding networks
7. **Sequence reconstruction** capability to get back full-length sequences

## Benefits:

- **Memory efficient**: Processes 100-timestep chunks instead of 4500
- **Stable training**: Should see losses in range 0.1-10.0 instead of billions
- **Faster convergence**: Smaller sequences train faster
- **Preserves patterns**: Overlapping chunks maintain temporal structure

This should resolve your training instability while keeping your existing TimeGAN architecture intact!

# Generate and Combine

In [None]:
# combine_data = np.concatenate((generated_samples, data), axis=0)  # Combine real and generated data
# combine_labels = np.concatenate((np.zeros(num_samples), label), axis=0)  # Labels: 0 for real, 0 for generated

# Processing: Mel Spec > Resizing > Feature Extraction

In [None]:
# Resize and convert to 3-channel image
def resize_spectrogram(spectrogram):
    spectrogram = (spectrogram - spectrogram.min()) / (spectrogram.max() - spectrogram.min() + 1e-6)
    spectrogram = np.uint8(spectrogram.cpu().numpy() * 255)
    spectrogram = np.stack([spectrogram] * 3, axis=-1)
    image = Image.fromarray(spectrogram)
    image = transforms.Resize((224, 224))(image)
    return transforms.ToTensor()(image)

# Process dataset
def process_dataset(data):
    num_samples, _, num_channels = data.shape
    features = np.zeros((num_samples, num_channels, 4096))
    mel_transform = torchaudio.transforms.MelSpectrogram(sample_rate=2500000, n_mels=128).to(device)
    model = vgg16(weights=VGG16_Weights.IMAGENET1K_V1).to(device)
    model.classifier = model.classifier[:-3]
    model.eval()

    for i in range(num_samples):
        for j in range(num_channels):
            ts = torch.tensor(data[i, :, j], dtype=torch.float32).to(device)
            mel = mel_transform(ts)
            img = resize_spectrogram(mel)
            with torch.no_grad():
                feat = model(img.unsqueeze(0).to(device))
            features[i, j, :] = feat.squeeze().cpu().numpy()
    return features

# Mel Scale comparison

# AE Class

In [None]:
# Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_size=4096):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 64), 
            nn.ReLU(),
            nn.Linear(64, 32), 
            nn.ReLU(),
            nn.Linear(32, 16), 
            nn.ReLU(),
            nn.Linear(16, 8), 
            nn.ReLU(),
            nn.Linear(8, 4), 
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 16), 
            nn.ReLU(),
            nn.Linear(16, 32), 
            nn.ReLU(),
            nn.Linear(32, 64), 
            nn.ReLU(),
            nn.Linear(64, input_size), 
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))


# Train autoencoder
def train_autoencoder(features, epochs=20, batch_size=128):
    x = torch.tensor(features.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader = DataLoader(TensorDataset(x), batch_size=batch_size, shuffle=True)
    model = Autoencoder().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    for epoch in range(epochs):
        total_loss = 0
        for batch in loader:
            inputs = batch[0]
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(loader):.6f}")
    return model


def print_eval(predictions, labels):
  print("Accuracy = {}".format(accuracy_score(labels, predictions)))
  print("Precision = {}".format(precision_score(labels, predictions)))
  print("Recall = {}".format(recall_score(labels, predictions)))
  print("F1 = {}".format(f1_score(labels, predictions)))
  print(confusion_matrix(labels, predictions))

# Plot reconstruction error histogram
def plot_reconstruction_error(model, features, percentile=95):
    x = torch.tensor(features.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader = DataLoader(TensorDataset(x), batch_size=64)
    errors = []
    criterion = nn.MSELoss(reduction='none')

    with torch.no_grad():
        for batch in loader:
            inputs = batch[0]
            outputs = model(inputs)
            batch_errors = criterion(outputs, inputs).mean(dim=1)
            errors.extend(batch_errors.cpu().numpy())

    threshold = np.percentile(errors, percentile)
    anomalies = np.sum(np.array(errors) > threshold)

    plt.hist(errors, bins=50, alpha=0.75)
    plt.axvline(threshold, color='r', linestyle='--', label=f'Threshold ({percentile}%)')
    plt.xlabel('Reconstruction Error')
    plt.ylabel('Frequency')
    plt.title('Reconstruction Error Histogram')
    plt.legend()
    plt.grid(True)
    plt.show()

    print(f"Anomaly threshold: {threshold:.6f}")
    print(f"Detected anomalies: {anomalies}")


# Cross Validation without Scalers

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
features = process_dataset(combine_data)
normal_indices = np.where(combine_labels == 0)[0]
print("Features shape:", features.shape)
for fold, (train_idx, val_idx) in enumerate(skf.split(features, combine_labels)):
    print(f"Fold {fold + 1}")
    train_fold_data, val_fold_data = features[train_idx], features[val_idx]
    train_fold_labels, val_fold_labels = combine_labels[train_idx], combine_labels[val_idx]

    # Train autoencoder on the training fold
    model = train_autoencoder(features[normal_indices], epochs=15, batch_size=64)

    # Evaluate on validation fold
    x_val = torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader_val = DataLoader(TensorDataset(x_val), batch_size=64)
    
    # Compute reconstruction errors
    x = model(torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)).cpu().detach().numpy()
    errors = np.mean((x - val_fold_data.reshape(-1, 4096)) ** 2, axis=1)

    # Reshape to (175, 14)
    errors = errors.reshape(val_fold_data.shape[0], val_fold_data.shape[1])

    # Aggregate per sample (e.g., mean across channels)
    sample_errors = np.mean(errors, axis=1)

    percentile = 90
    # Thresholding
    threshold = np.percentile(sample_errors, percentile)
    predictions = (sample_errors > threshold).astype(int)


    
    plot_reconstruction_error(model, val_fold_data, percentile=percentile)
    print_eval(predictions, val_fold_labels)

# Cross Validation with StandardScaler

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scaled_data = StandardScaler().fit_transform(combine_data.reshape(-1, data.shape[-1])).reshape(combine_data.shape)
features = process_dataset(scaled_data)
normal_indices = np.where(combine_labels == 0)[0]
print("Features shape:", features.shape)
for fold, (train_idx, val_idx) in enumerate(skf.split(features, combine_labels)):
    print(f"Fold {fold + 1}")
    train_fold_data, val_fold_data = features[train_idx], features[val_idx]
    train_fold_labels, val_fold_labels = combine_labels[train_idx], combine_labels[val_idx]

    # Train autoencoder on the training fold
    model = train_autoencoder(features[normal_indices], epochs=15, batch_size=64)

    # Evaluate on validation fold
    x_val = torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader_val = DataLoader(TensorDataset(x_val), batch_size=64)
    
    # Compute reconstruction errors
    x = model(torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)).cpu().detach().numpy()
    errors = np.mean((x - val_fold_data.reshape(-1, 4096)) ** 2, axis=1)

    # Reshape to (175, 14)
    errors = errors.reshape(val_fold_data.shape[0], val_fold_data.shape[1])

    # Aggregate per sample (e.g., mean across channels)
    sample_errors = np.mean(errors, axis=1)

    percentile = 90
    # Thresholding
    threshold = np.percentile(sample_errors, percentile)
    predictions = (sample_errors > threshold).astype(int)


    
    plot_reconstruction_error(model, val_fold_data, percentile=percentile)
    print_eval(predictions, val_fold_labels)

# Cross Validation with MinMax

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scaled_data = MinMaxScaler().fit_transform(combine_data.reshape(-1, combine_data.shape[-1])).reshape(combine_data.shape)
features = process_dataset(scaled_data)
normal_indices = np.where(combine_labels == 0)[0]
print("Features shape:", features.shape)
for fold, (train_idx, val_idx) in enumerate(skf.split(features, combine_labels)):
    print(f"Fold {fold + 1}")
    train_fold_data, val_fold_data = features[train_idx], features[val_idx]
    train_fold_labels, val_fold_labels = combine_labels[train_idx], combine_labels[val_idx]

    # Train autoencoder on the training fold
    model = train_autoencoder(features[normal_indices], epochs=15, batch_size=64)

    # Evaluate on validation fold
    x_val = torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)
    loader_val = DataLoader(TensorDataset(x_val), batch_size=64)
    
    # Compute reconstruction errors
    x = model(torch.tensor(val_fold_data.reshape(-1, 4096), dtype=torch.float32).to(device)).cpu().detach().numpy()
    errors = np.mean((x - val_fold_data.reshape(-1, 4096)) ** 2, axis=1)

    # Reshape to (175, 14)
    errors = errors.reshape(val_fold_data.shape[0], val_fold_data.shape[1])

    # Aggregate per sample (e.g., mean across channels)
    sample_errors = np.mean(errors, axis=1)

    percentile = 90
    # Thresholding
    threshold = np.percentile(sample_errors, percentile)
    predictions = (sample_errors > threshold).astype(int)


    
    plot_reconstruction_error(model, val_fold_data, percentile=percentile)
    print_eval(predictions, val_fold_labels)

# Observation:
Comparing with and without normalizing data 

### MinMaxed scored

Accuracy = 0.8461538461538461

Precision = 0.3125

Recall = 0.2777777777777778

F1 = 0.29411764705882354

[[254  22]

[ 26  10]]

---

### StandardScaled scored


Accuracy = 0.782051282051282

Precision = 0.0

Recall = 0.0

F1 = 0.0

[[244  32]

[ 36   0]]


---

### Without any normlaization scored

Accuracy = 0.782051282051282

Precision = 0.0

Recall = 0.0

F1 = 0.0

[[244  32]
 
[ 36   0]]
