<a href="https://colab.research.google.com/github/ken00H/Csharp-windows-form-sample/blob/master/Voice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import kagglehub
import shutil

# Define the target directory
target_dir = "/content/DataSet"
os.makedirs(target_dir, exist_ok=True)

# Download and extract the dataset to the default KaggleHub cache location
# The 'path' argument in dataset_download seems to be causing issues when trying to set the destination directly
downloaded_path_in_cache = kagglehub.dataset_download("birdy654/deep-voice-deepfake-voice-recognition")

# Move the contents from the downloaded cache path to the target directory
for item in os.listdir(downloaded_path_in_cache):
    source_item = os.path.join(downloaded_path_in_cache, item)
    destination_item = os.path.join(target_dir, item)
    # Use shutil.move to move files and directories
    shutil.move(source_item, destination_item)

# Optional: Remove the now empty cache directory if it's not the root of the cache
# Check if the directory is empty before removing
if not os.listdir(downloaded_path_in_cache):
    os.rmdir(downloaded_path_in_cache)

# The final path where the dataset contents are located is the target_dir
path = target_dir

print(f"Dataset downloaded and extracted to: {path}")
print(f"Contents of {path}: {os.listdir(path)}")

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
# torchaudio, librosa are no longer needed as we're using pre-extracted features
import numpy as np
import warnings
import random
import time
import logging
import signal
import sys
from torch.utils.data import Dataset, DataLoader
# torchaudio.transforms, MFCC, FrequencyMasking, TimeMasking are no longer needed
# torchvision.models.resnet18 is no longer needed
import optuna
import pandas as pd

# --- Configuration & Device ---
warnings.filterwarnings("ignore")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Detect if running on Google Colab
IS_COLAB = False
try:
    import google.colab  # type: ignore
    IS_COLAB = True
except (ImportError, ModuleNotFoundError):
    IS_COLAB = False

# Update this to your path
# For Colab: Use "/content/your_data_path" or mount Google Drive
# For local: Use your local path
if IS_COLAB:
    # Colab path - update this to your Colab data path
    DATA_PATH = "/content/DataSet/KAGGLE/AUDIO"  # Update this! (though now it points to parent of CSV)
    # Or mount Google Drive:
    # from google.colab import drive
    # drive.mount('/content/drive')
    # DATA_PATH = "/content/drive/MyDrive/your_data_path"
else:
    # Local Windows path
    DATA_PATH = r"C:\Users\Eng Jamal\Downloads\Compressed\DeepFake Voice Recognition\KAGGLE\AUDIO"

# Segment extraction settings (no longer relevant but kept for now)
SEGMENT_DURATION = 3  # seconds
SEGMENT_STRIDE = 0.5  # Fraction of window length (0.5 = 50% overlap). Set to 1.0 for no overlap.

# Optuna hyperparameter search settings
if IS_COLAB:
    OPTUNA_N_TRIALS = 15  # More trials on Colab (faster GPUs, more time)
else:
    OPTUNA_N_TRIALS = 5  # Fewer trials locally (slower, time-constrained)

# Colab-optimized settings
if IS_COLAB:
    # Colab typically has better GPUs (T4/V100/A100) with 16GB+ VRAM
    DEFAULT_BATCH_SIZE = 16  # Can go higher (32-64) depending on GPU
    USE_PIN_MEMORY = True  # Faster data transfer on Colab
    NUM_WORKERS = 2  # Can use multiple workers on Colab
else:
    # Local settings (conservative for GTX 1050 Ti with 4GB VRAM)
    DEFAULT_BATCH_SIZE = 4
    USE_PIN_MEMORY = False
    NUM_WORKERS = 0  # Avoid CUDA context issues on Windows

# Basic validation to fail fast if the path is incorrect
if not os.path.exists(DATA_PATH):
    logger.warning(f"DATA_PATH does not exist: {DATA_PATH!r}. This might be acceptable if DATASET-balanced.csv is in a parent directory.")

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
logger = logging.getLogger(__name__)

# Log CUDA / device info at startup
if IS_COLAB:
    logger.info("Running on Google Colab - using optimized settings")
if torch.cuda.is_available():
    logger.info("CUDA available: %d devices", torch.cuda.device_count())
    for i in range(torch.cuda.device_count()):
        try:
            gpu_name = torch.cuda.get_device_name(i)
            gpu_memory = torch.cuda.get_device_properties(i).total_memory / (1024**3)
            logger.info("GPU %d: %s (%.1f GB)", i, gpu_name, gpu_memory)
        except Exception:
            logger.info("GPU %d: (name unavailable)", i)
else:
    logger.info("CUDA not available — using CPU")

# --- Reproducibility seeds ---
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
logger.info("Random seed set to %d", SEED)

# --- Global model for interrupt saving ---
current_model = None

def save_model_on_interrupt(signum, frame):
    if current_model is not None:
        checkpoint_path = "interrupted_model.pth"
        torch.save(current_model.state_dict(), checkpoint_path)
        logger.info("Training interrupted. Model saved to %s", checkpoint_path)
    sys.exit(0)

# --- Segment Generation Helper --- (NO LONGER USED AND CAN BE REMOVED)
def generate_segments_from_files(file_list, sample_rate=16000, duration=3, stride=0.5):
    """
    This function is no longer used as features are pre-extracted.
    Keeping a stub for compatibility until fully removed.
    """
    logger.warning("generate_segments_from_files is deprecated and should not be called with CSV features.")
    return file_list # Simply return the list of (feature, label) tuples

# --- Dataset Class --- UPDATED!
class UltimateForensicDataset(Dataset):
    def __init__(self, data_list, is_train=True, device=device):
        """
        data_list: List of (feature_vector, label) tuples
        Each tuple represents one entry from the CSV.
        """
        self.data_list = data_list
        self.is_train = is_train
        self.device = device

        # Audio transforms are no longer needed
        # self.mfcc_transform = MFCC(...)
        # self.freq_mask = FrequencyMasking(...)
        # self.time_mask = TimeMasking(...)
        # logger.info("Removed audio transforms as using pre-extracted features.")

    def __len__(self):
        return len(self.data_list)

    # simulate_compression is no longer needed
    # def simulate_compression(self, audio):
    #     ...

    def __getitem__(self, idx):
        features, label = self.data_list[idx]

        # Features are already processed and ready
        features = torch.from_numpy(features).float()

        # No more audio loading or MFCC extraction
        # if getattr(self, "device", None) is not None and getattr(self.device, "type", "cpu") == "cuda":
        #     features = features.to(self.device)

        return features, torch.tensor(label, dtype=torch.float32)

# --- Model Architecture --- UPDATED to UltimateMLP!
class UltimateMLP(nn.Module):
    def __init__(self, input_size=26): # 26 features as per task
        super(UltimateMLP, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.network(x)

# --- File Collection Helper --- (Already updated, included for completeness)
def collect_files_from_csv_or_folders(data_path):
    """
    Reads specified feature columns and the 'LABEL' column from DATASET-balanced.csv.
    Returns a list of (feature_vector, label) pairs.
    """
    all_data = []

    feature_columns = [
        'chroma_stft', 'rms', 'spectral_centroid', 'spectral_bandwidth', 'rolloff', 'zero_crossing_rate',
    ] + [f'mfcc{i}' for i in range(1, 21)]

    # Try multiple possible CSV locations (Colab and local compatible)
    csv_candidates = [
        os.path.join(data_path, "DATASET-balanced.csv"),  # In data folder
        os.path.join(os.path.dirname(data_path), "DATASET-balanced.csv"),  # Parent folder
    ]

    # Colab-specific paths
    if IS_COLAB:
        csv_candidates.extend([
            "/content/DataSet/KAGGLE/DATASET-balanced.csv",  # Specific Colab path from analysis
            "/content/drive/MyDrive/DataSet/KAGGLE/DATASET-balanced.csv",  # If using Google Drive
        ])
    else:
        # Local Windows paths
        csv_candidates.append(r"C:\Users\Eng Jamal\Downloads\Compressed\DeepFake Voice Recognition\KAGGLE\DATASET-balanced.csv")

    csv_path = None
    for candidate in csv_candidates:
        if os.path.exists(candidate):
            csv_path = candidate
            break

    if csv_path:
        try:
            logger.info("Found CSV file, loading from: %s", csv_path)
            df = pd.read_csv(csv_path)
            logger.info("CSV columns: %s", df.columns.tolist())
            logger.info("CSV shape: %d rows", len(df))

            # Validate required columns exist
            required_cols = feature_columns + ['LABEL']
            if not all(col in df.columns for col in required_cols):
                raise ValueError(f"Missing one or more required columns in CSV. Expected: {required_cols}")

            for _, row in df.iterrows():
                feature_vector = row[feature_columns].values.astype(np.float32)

                # Handle different label formats (string to int mapping)
                label_val = row['LABEL']
                if isinstance(label_val, (int, float)):
                    label = int(label_val)  # 0=REAL, 1=FAKE
                elif isinstance(label_val, str):
                    label_val_upper = label_val.upper()
                    if 'FAKE' in label_val_upper or label_val_upper == '1' or label_val_upper == 'TRUE':
                        label = 1
                    elif 'REAL' in label_val_upper or label_val_upper == '0' or label_val_upper == 'FALSE':
                        label = 0
                    else:
                        logger.warning(f"Unknown label format: {label_val}. Skipping row.")
                        continue
                else:
                    logger.warning(f"Unknown label type: {type(label_val)}. Skipping row.")
                    continue

                all_data.append((feature_vector, label))

            logger.info("Loaded %d feature vectors and labels from CSV", len(all_data))
            if len(all_data) > 0:
                return all_data
        except Exception as e:
            logger.error("Error reading CSV file or extracting features: %s. Cannot proceed without CSV data.", str(e))
            raise
    else:
        logger.error("No DATASET-balanced.csv found in expected locations. Cannot proceed without CSV data.")
        raise FileNotFoundError("DATASET-balanced.csv not found.")

    return all_data # Should ideally be reached only if an error occurs above

# --- Training Logic --- UPDATED!
def run_training_session(trial, data_path, is_optuna=True, lr=None, epochs=10):
    all_data = collect_files_from_csv_or_folders(data_path)

    if not all_data:
        raise ValueError(f"No data found. Check your path and CSV.")

    # Log label distribution
    label_counts = {}
    for _, label in all_data:
        label_counts[label] = label_counts.get(label, 0) + 1
    logger.info("Label distribution: %s", label_counts)

    # CRITICAL: Split data first to avoid data leakage
    random.shuffle(all_data)
    split = int(0.8 * len(all_data))
    train_data = all_data[:split]
    val_data = all_data[split:]

    logger.info("Data split: %d train samples, %d val samples", len(train_data), len(val_data))

    # We directly use train_data and val_data which now contain (feature_vector, label)
    train_ds = UltimateForensicDataset(train_data, is_train=True, device=device)
    val_ds = UltimateForensicDataset(val_data, is_train=False, device=device)

    logger.info("Creating DataLoaders...")
    # Adjust batch size based on environment
    batch_size = DEFAULT_BATCH_SIZE
    if IS_COLAB:
        # Try to detect GPU memory and adjust batch size
        if torch.cuda.is_available():
            gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
            logger.info("GPU Memory: %.1f GB", gpu_memory_gb)
            if gpu_memory_gb >= 16:  # V100/A100
                batch_size = 32
            elif gpu_memory_gb >= 8:  # T4
                batch_size = 16
            else:
                batch_size = 8

    logger.info("Using batch_size=%d, num_workers=%d, pin_memory=%s",
                batch_size, NUM_WORKERS, USE_PIN_MEMORY)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,
                             num_workers=NUM_WORKERS, pin_memory=USE_PIN_MEMORY)
    val_loader = DataLoader(val_ds, batch_size=batch_size,
                           num_workers=NUM_WORKERS, pin_memory=USE_PIN_MEMORY)
    logger.info("DataLoaders created. Total batches: train=%d, val=%d", len(train_loader), len(val_loader))

    # Test loading one sample to identify any bottlenecks
    logger.info("Testing data loading with first sample...")
    try:
        test_sample, test_label = train_ds[0] # This now correctly unpacks 2 values
        logger.info("First sample loaded successfully. Shape: %s, Label: %s", test_sample.shape, test_label)
        if test_sample.dim() > 1: # If features are 2D (like MFCCs), flatten them for MLP
            logger.warning("Input features are not 1D for MLP. Flattening.")
            test_sample = test_sample.view(-1)
        logger.info("Input size for MLP: %d", test_sample.shape[0])
        input_feature_size = test_sample.shape[0]
    except Exception as e:
        logger.error("Failed to load first sample: %s", str(e))
        raise

    model = UltimateMLP(input_size=input_feature_size).to(device) # Use the new MLP model
    global current_model
    current_model = model

    if is_optuna:
        lr = trial.suggest_float("lr", 1e-6, 1e-3, log=True)

    optimizer = optim.AdamW(model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
    criterion = nn.BCELoss()

    best_acc = 0
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        total_batches = len(train_loader)
        logger.info("Epoch %d/%d: Starting training (%d batches)...", epoch + 1, epochs, total_batches)
        for batch_idx, (inputs, labels) in enumerate(train_loader):
            if batch_idx == 0:
                logger.info("First batch loaded successfully, continuing training...")
            elif batch_idx % 500 == 0:  # Log progress every 500 batches
                logger.info("Epoch %d: Processed %d/%d batches (%.1f%%)",
                           epoch + 1, batch_idx, total_batches, 100 * batch_idx / total_batches)

            # Ensure inputs are flattened for MLP if they aren't already 1D
            if inputs.dim() > 2: # e.g., if still getting (Batch, Channels, Freq, Time)
                 inputs = inputs.view(inputs.size(0), -1) # Flatten to (Batch, Features)
            elif inputs.dim() == 1: # Case where input is already a single feature vector, but DataLoader might add a batch dim
                 inputs = inputs.unsqueeze(0) # Ensure it's (1, Features)

            inputs, labels = inputs.to(device), labels.to(device).view(-1, 1)

            if batch_idx == 0:
                logger.info("Batch %d: Inputs moved to device, shape: %s", batch_idx, inputs.shape)
            optimizer.zero_grad()
            if batch_idx == 0:
                logger.info("Batch %d: Starting forward pass...", batch_idx)
            outputs = model(inputs)
            if batch_idx == 0:
                logger.info("Batch %d: Forward pass complete, starting loss calculation...", batch_idx)
            loss = criterion(outputs, labels)
            if batch_idx == 0:
                logger.info("Batch %d: Loss calculated: %.4f, starting backward pass...", batch_idx, loss.item())
            loss.backward()
            if batch_idx == 0:
                logger.info("Batch %d: Backward pass complete, updating weights...", batch_idx)
            optimizer.step()
            if batch_idx == 0:
                logger.info("Batch %d: Weights updated, batch complete!", batch_idx)
            train_loss += loss.item()

        model.eval()
        correct, val_loss = 0, 0
        logger.info("Epoch %d: Starting validation (%d batches)...", epoch + 1, len(val_loader))
        with torch.no_grad():
            for val_batch_idx, (inputs, labels) in enumerate(val_loader):
                if val_batch_idx == 0:
                    logger.info("First validation batch loaded...")

                # Ensure inputs are flattened for MLP if they aren't already 1D
                if inputs.dim() > 2:
                    inputs = inputs.view(inputs.size(0), -1)
                elif inputs.dim() == 1:
                    inputs = inputs.unsqueeze(0)

                inputs, labels = inputs.to(device), labels.to(device).view(-1, 1)
                outputs = model(inputs)
                val_loss += criterion(outputs, labels).item()
                correct += ((outputs > 0.5).float() == labels).sum().item()

        acc = 100 * correct / len(val_ds)
        scheduler.step(val_loss)

        print(f"Epoch {epoch+1:02d} | Val Acc: {acc:.2f}% | LR: {optimizer.param_groups[0]['lr']:.1e}")

        # Save checkpoint after each epoch
        torch.save(model.state_dict(), f"checkpoint_epoch_{epoch+1}.pth")

        if is_optuna:
            trial.report(acc, epoch)
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()
        else:
            if acc >= best_acc:
                best_acc = acc
                torch.save(model.state_dict(), "best_model.pth")

    return acc if is_optuna else best_acc

# --- Execution ---
if __name__ == "__main__":
    # Set up interrupt handler to save model on Ctrl+C
    signal.signal(signal.SIGINT, save_model_on_interrupt)

    print("Starting Optuna Hyperparameter Search...")
    print(f"Number of trials: {OPTUNA_N_TRIALS}")
    print(f"Each trial trains for 5 epochs to find optimal learning rate")
    study = optuna.create_study(direction="maximize")
    study.optimize(lambda t: run_training_session(t, DATA_PATH, is_optuna=True, epochs=5), n_trials=OPTUNA_N_TRIALS)

    print("\n--- Optuna Study Results ---")
    best_lr = study.best_trial.params['lr']
    print(f"Best LR Found: {best_lr:.2e}")

    print("\nStarting Final Training with Best Parameters...")
    final_acc = run_training_session(None, DATA_PATH, is_optuna=False, lr=best_lr, epochs=5)
    print(f"\nFinal Model Saved. Best Validation Accuracy: {final_acc:.2f}%")


In [None]:
import torch
import torch.nn as nn
# torchaudio, librosa, numpy are no longer needed for inference as features are read from CSV
import pandas as pd
import os

# Detect if running on Google Colab
IS_COLAB = False
try:
    import google.colab  # type: ignore
    IS_COLAB = True
except (ImportError, ModuleNotFoundError):
    IS_COLAB = False

# --- 1. CONFIGURATION (Must Match Training) ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_PATH = "best_model.pth"

# --- 2. UPDATED MODEL DEFINITION (Must Match Training) ---
# The MLP model used for training
class UltimateMLP(nn.Module):
    def __init__(self, input_size=26): # 26 features as per task
        super(UltimateMLP, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.network(x)

# --- 3. UPDATED ANALYSIS LOGIC ---
def analyze_audio_forensics(csv_path):
    # Define feature columns, must match training
    feature_columns = [
        'chroma_stft', 'rms', 'spectral_centroid', 'spectral_bandwidth', 'rolloff', 'zero_crossing_rate',
    ] + [f'mfcc{i}' for i in range(1, 21)]

    # Load the entire CSV
    try:
        df = pd.read_csv(csv_path)
    except FileNotFoundError:
        print(f"Error: CSV file not found at {csv_path}")
        return

    if df.empty:
        print(f"Error: The CSV file at {csv_path} is empty.")
        return

    # We cannot lookup a specific audio file by name as the CSV lacks filename columns.
    # For demonstration, we will take the first row's features for prediction.
    print(f"Warning: The CSV lacks audio file path/name metadata. Performing inference on the first entry in {csv_path} for demonstration.")
    file_row = df.iloc[[0]]  # Take the first row

    # Extract features and label
    feature_vector_np = file_row[feature_columns].values.astype(float).flatten() # Ensure 1D numpy array
    label_val = file_row['LABEL'].iloc[0]

    # Determine input_size based on the extracted features
    input_size = len(feature_vector_np)
    model = UltimateMLP(input_size=input_size).to(DEVICE)

    try:
        # Load the weights from your robust training
        model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
    except Exception as e:
        print(f"Error loading model from {MODEL_PATH}: {e}")
        return
    model.eval()

    # Convert features to tensor and move to device
    features = torch.from_numpy(feature_vector_np).float().unsqueeze(0).to(DEVICE) # Add batch dimension

    with torch.no_grad():
        prob = model(features).item()

    # Verdict Calculation
    # 0 = REAL, 1 = FAKE
    verdict = "FAKE (AI Generated)" if prob > 0.5 else "REAL (Human Voice)"
    confidence = max(prob, 1 - prob) * 100

    print("\n" + "=" * 35)
    print(f"Performing inference on the first data entry from: {csv_path}")
    print(f"Features from CSV Label (for reference): {label_val}")
    print(f"RESULT: {verdict}")
    print(f"AI Detection Confidence: {confidence:.2f}%")
    print("=" * 35)


if __name__ == "__main__":
    # Configure file path based on environment
    DATA_ROOT = "/content/DataSet/KAGGLE"
    CSV_FILE_PATH = os.path.join(DATA_ROOT, "DATASET-balanced.csv")

    if IS_COLAB:
        print("Running on Google Colab")
    else:
        print("Running on local machine")

    print(f"Device: {DEVICE}")
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name(0)}")
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        print(f"GPU Memory: {gpu_memory:.1f} GB")
    print(f"Model path: {MODEL_PATH}")
    print(f"CSV file for feature lookup: {CSV_FILE_PATH}\n")

    analyze_audio_forensics(CSV_FILE_PATH)


In [None]:
import torch
import torch.nn as nn
import torchaudio
import librosa
import numpy as np
from torchaudio.transforms import MFCC
from torchvision.models import resnet18

# Detect if running on Google Colab
IS_COLAB = False
try:
    import google.colab  # type: ignore
    IS_COLAB = True
except (ImportError, ModuleNotFoundError):
    IS_COLAB = False

# --- 1. CONFIGURATION (Must Match Training) ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_PATH = "/content/best_model.pth"
SAMPLE_RATE = 16000
WINDOW_DURATION = 3
STRIDE = 0.5  # Fraction of window length (0.5 = 50% overlap). Set <1.0 for overlap.
NUM_SAMPLES = int(SAMPLE_RATE * WINDOW_DURATION)


# --- 2. UPDATED MODEL DEFINITION (Must Match Training) ---
class UltimateResNet(nn.Module):
    def __init__(self):
        super(UltimateResNet, self).__init__()
        self.resnet = resnet18(weights=None)
        # Adapt first layer for 3-channel MFCC features and preserve time-frequency resolution
        # Use a smaller kernel and stride and remove the initial max-pool to avoid excessive downsampling
        self.resnet.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.resnet.maxpool = nn.Identity()
        self.resnet.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.7),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.resnet(x)


# --- 3. UPDATED ANALYSIS LOGIC ---
def analyze_audio_forensics(file_path):
    model = UltimateResNet().to(DEVICE)
    try:
        # Load the weights from your robust training
        model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
    except Exception as e:
        print(f"Error loading model: {e}")
        return
    model.eval()

    mfcc_transform = MFCC(
        sample_rate=SAMPLE_RATE,
        n_mfcc=40,
        melkwargs={"n_fft": 400, "hop_length": 160, "n_mels": 40}
    ).to(DEVICE)

    try:
        # Load with Librosa to handle .mp3 correctly
        audio, _ = librosa.load(file_path, sr=SAMPLE_RATE)
        total_len = len(audio)

        chunk_scores = []
        # Match training: step_size = stride * sample_rate * duration
        step_size = int(STRIDE * SAMPLE_RATE * WINDOW_DURATION)

        print(f"Forensic Scan: {file_path}")

        # Sliding Window Loop (overlap controlled by `STRIDE`)
        for start in range(0, total_len - NUM_SAMPLES + 1, step_size):
            chunk = audio[start: start + NUM_SAMPLES]

            # Same normalization as training
            chunk = chunk / (np.max(np.abs(chunk)) + 1e-9)
            waveform = torch.from_numpy(chunk).float().unsqueeze(0).to(DEVICE)

            with torch.no_grad():
                mfcc = mfcc_transform(waveform)
                # Feature sync: MFCC + Delta + Delta2
                delta = torchaudio.functional.compute_deltas(mfcc)
                delta2 = torchaudio.functional.compute_deltas(delta)
                features = torch.cat([mfcc, delta, delta2], dim=0).unsqueeze(0)

                prob = model(features).item()
                chunk_scores.append(prob)

        if not chunk_scores:
            print("Audio too short for 3-second window analysis.")
            return

        # Verdict Calculation
        final_average = np.mean(chunk_scores)
        # 0 = REAL, 1 = FAKE
        verdict = "FAKE (AI Generated)" if final_average > 0.5 else "REAL (Human Voice)"
        confidence = max(final_average, 1 - final_average) * 100

        print("\n" + "=" * 35)
        print(f"RESULT: {verdict}")
        print(f"AI Detection Confidence: {confidence:.2f}%")
        print(f"Total Segments Scanned: {len(chunk_scores)}")
        print("=" * 35)

    except Exception as e:
        print(f"Inference Error: {e}")


if __name__ == "__main__":
    # Configure file path based on environment
    if IS_COLAB:
        # For Colab: Update this path to your audio file location
        # Option 1: Upload file to Colab
        # TEST_FILE = "/content/your_audio_file.mp3"

        # Option 2: Use Google Drive (mount first: from google.colab import drive; drive.mount('/content/drive'))
        # TEST_FILE = "/content/drive/MyDrive/your_audio_file.mp3"

        # Option 3: Download from URL
        # import urllib.request
        # urllib.request.urlretrieve("https://example.com/audio.mp3", "/content/audio.mp3")
        # TEST_FILE = "/content/audio.mp3"

        TEST_FILE = "/content/DataSet/DEMONSTRATION/DEMONSTRATION/linus-original-DEMO.mp3"  # UPDATE THIS PATH!
        print("Running on Google Colab")
    else:
        # Local Windows path
        TEST_FILE = r"C:\Users\Eng Jamal\Downloads\Compressed\DeepFake Voice Recognition\DEMONSTRATION\DEMONSTRATION\linus-to-musk-DEMO.mp3"
        print("Running on local machine")

    print(f"Device: {DEVICE}")
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name(0)}")
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        print(f"GPU Memory: {gpu_memory:.1f} GB")
    print(f"Model path: {MODEL_PATH}")
    print(f"Test file: {TEST_FILE}\n")

    analyze_audio_forensics(TEST_FILE)

# Task
Modify the deepfake voice recognition system to support dual-mode training, allowing the user to switch between using pre-extracted CSV features (with `UltimateMLP`) and raw audio features (with `UltimateResNet`) for model training. The task includes re-enabling audio processing components (`torchaudio`, `librosa`, `MFCC`, `FrequencyMasking`, `TimeMasking`), implementing a `TRAINING_MODE` configuration to control data loading and model selection, adapting data collection and dataset classes (`collect_files_from_csv_or_folders`, `UltimateForensicDataset`), and modifying the `run_training_session` function to dynamically instantiate the appropriate model based on the chosen mode. Finally, summarize the implemented changes, explain how to switch between the two training modes, and report the training results for both configurations.

## Re-enable Audio Processing Components

### Subtask:
Reintroduce necessary imports for audio processing (`torchaudio`, `librosa`) and feature extraction transforms (`MFCC`, `FrequencyMasking`, `TimeMasking`). Also, bring back the `UltimateResNet` model architecture into the training script.


**Reasoning**:
The subtask requires reintroducing audio processing imports and the `UltimateResNet` model into the training script. I will modify the `f75e1922` cell to include the necessary imports and the `UltimateResNet` class definition, which is currently present in the inference script `Ldf0xkDukMFV`.



In [14]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchaudio # Re-enabled
import librosa # Re-enabled
import numpy as np
import warnings
import random
import time
import logging
import signal
import sys
from torch.utils.data import Dataset, DataLoader
from torchaudio.transforms import MFCC, FrequencyMasking, TimeMasking # Re-enabled
from torchvision.models import resnet18 # Re-enabled
import optuna
import pandas as pd

# --- Configuration & Device ---
warnings.filterwarnings("ignore")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Detect if running on Google Colab
IS_COLAB = False
try:
    import google.colab  # type: ignore
    IS_COLAB = True
except (ImportError, ModuleNotFoundError):
    IS_COLAB = False

# Update this to your path
# For Colab: Use "/content/your_data_path" or mount Google Drive
# For local: Use your local path
if IS_COLAB:
    # Colab path - update this to your Colab data path
    DATA_PATH = "/content/DataSet/KAGGLE/AUDIO"  # Update this! (though now it points to parent of CSV)
    # Or mount Google Drive:
    # from google.colab import drive
    # drive.mount('/content/drive')
    # DATA_PATH = "/content/drive/MyDrive/your_data_path"
else:
    # Local Windows path
    DATA_PATH = r"C:\Users\Eng Jamal\Downloads\Compressed\DeepFake Voice Recognition\KAGGLE\AUDIO"

# Segment extraction settings (no longer relevant but kept for now)
SEGMENT_DURATION = 3  # seconds
SEGMENT_STRIDE = 0.5  # Fraction of window length (0.5 = 50% overlap). Set to 1.0 for no overlap.
SAMPLE_RATE = 16000 # Added for audio processing
NUM_SAMPLES = int(SAMPLE_RATE * SEGMENT_DURATION) # Added for audio processing

# Optuna hyperparameter search settings
if IS_COLAB:
    OPTUNA_N_TRIALS = 15  # More trials on Colab (faster GPUs, more time)
else:
    OPTUNA_N_TRIALS = 5  # Fewer trials locally (slower, time-constrained)

# Colab-optimized settings
if IS_COLAB:
    # Colab typically has better GPUs (T4/V100/A100) with 16GB+ VRAM
    DEFAULT_BATCH_SIZE = 16  # Can go higher (32-64) depending on GPU
    USE_PIN_MEMORY = True  # Faster data transfer on Colab
    NUM_WORKERS = 2  # Can use multiple workers on Colab
else:
    # Local settings (conservative for GTX 1050 Ti with 4GB VRAM)
    DEFAULT_BATCH_SIZE = 4
    USE_PIN_MEMORY = False
    NUM_WORKERS = 0  # Avoid CUDA context issues on Windows

# Basic validation to fail fast if the path is incorrect
if not os.path.exists(DATA_PATH):
    logger.warning(f"DATA_PATH does not exist: {DATA_PATH!r}. This might be acceptable if DATASET-balanced.csv is in a parent directory.")

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s")
logger = logging.getLogger(__name__)

# Log CUDA / device info at startup
if IS_COLAB:
    logger.info("Running on Google Colab - using optimized settings")
if torch.cuda.is_available():
    logger.info("CUDA available: %d devices", torch.cuda.device_count())
    for i in range(torch.cuda.device_count()):
        try:
            gpu_name = torch.cuda.get_device_name(i)
            gpu_memory = torch.cuda.get_device_properties(i).total_memory / (1024**3)
            logger.info("GPU %d: %s (%.1f GB)", i, gpu_name, gpu_memory)
        except Exception:
            logger.info("GPU %d: (name unavailable)", i)
else:
    logger.info("CUDA not available — using CPU")

# --- Reproducibility seeds ---
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
logger.info("Random seed set to %d", SEED)

# --- Global model for interrupt saving ---
current_model = None

def save_model_on_interrupt(signum, frame):
    if current_model is not None:
        checkpoint_path = "interrupted_model.pth"
        torch.save(current_model.state_dict(), checkpoint_path)
        logger.info("Training interrupted. Model saved to %s", checkpoint_path)
    sys.exit(0)

# --- Segment Generation Helper --- (NOW USED AGAIN for audio loading)
def generate_segments_from_files(file_list, sample_rate=16000, duration=3, stride=0.5):
    """
    Generates audio segments from a list of audio file paths.
    Returns a list of (processed_audio_segment, label) tuples.
    """
    all_segments = []
    for audio_path, label in file_list:
        try:
            # Load audio using librosa to handle various formats (e.g., mp3)
            audio, sr = librosa.load(audio_path, sr=sample_rate, mono=True)
            if sr != sample_rate:
                logger.warning(f"Resampling {audio_path} from {sr} to {sample_rate}.")

            # Calculate segment length and step size
            segment_length = int(duration * sample_rate)
            step_size = int(stride * segment_length)

            # Ensure the audio is long enough for at least one segment
            if len(audio) < segment_length:
                logger.warning(f"Audio file {audio_path} too short ({len(audio)} samples) for {duration}-second segment ({segment_length} samples). Skipping.")
                continue

            # Generate overlapping segments
            for i in range(0, len(audio) - segment_length + 1, step_size):
                segment = audio[i : i + segment_length]

                # Apply normalization consistent with analysis/inference
                segment = segment / (np.max(np.abs(segment)) + 1e-9)
                all_segments.append((segment, label))

        except Exception as e:
            logger.error(f"Error processing audio file {audio_path}: {e}")
            continue
    return all_segments

# --- Dataset Class --- UPDATED!
class UltimateForensicDataset(Dataset):
    def __init__(self, data_list, is_train=True, device=device):
        """
        data_list: List of (feature_vector, label) tuples for MLP or
                   (audio_segment, label) tuples for ResNet
        Each tuple represents one entry from the CSV.
        """
        self.data_list = data_list
        self.is_train = is_train
        self.device = device

        # Audio transforms are now needed
        self.mfcc_transform = MFCC(
            sample_rate=SAMPLE_RATE,
            n_mfcc=40,
            melkwargs={"n_fft": 400, "hop_length": 160, "n_mels": 40}
        ).to(self.device)
        self.freq_mask = FrequencyMasking(freq_mask_param=15).to(self.device) # Example params
        self.time_mask = TimeMasking(time_mask_param=30).to(self.device) # Example params
        logger.info("Audio transforms initialized for ResNet model.")

    def __len__(self):
        return len(self.data_list)

    # simulate_compression is no longer needed
    # def simulate_compression(self, audio):
    #     ...

    def __getitem__(self, idx):
        item, label = self.data_list[idx]

        # item can be either a numpy feature vector or an audio segment
        if isinstance(item, np.ndarray) and item.ndim == 1 and len(item) == 26: # Assuming fixed 26 features for MLP
            # It's an MLP feature vector
            features = torch.from_numpy(item).float()
        else:
            # It's an audio segment for ResNet
            waveform = torch.from_numpy(item).float().unsqueeze(0).to(self.device)

            mfcc = self.mfcc_transform(waveform)
            delta = torchaudio.functional.compute_deltas(mfcc)
            delta2 = torchaudio.functional.compute_deltas(delta)
            features = torch.cat([mfcc, delta, delta2], dim=0)

            if self.is_train:
                # Apply augmentations if training
                features = self.freq_mask(features)
                features = self.time_mask(features)

            # ResNet expects (batch, channels, height, width)
            # features is (channels, height, width), so add batch dim later in DataLoader
            # No need for .to(self.device) here if mfcc_transform is already on device

        return features, torch.tensor(label, dtype=torch.float32)

# --- Model Architecture --- UltimateMLP (kept for comparison/fallback) and UltimateResNet!
class UltimateMLP(nn.Module):
    def __init__(self, input_size=26): # 26 features as per task
        super(UltimateMLP, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.network(x)

class UltimateResNet(nn.Module):
    def __init__(self):
        super(UltimateResNet, self).__init__()
        self.resnet = resnet18(weights=None)
        # Adapt first layer for 3-channel MFCC features and preserve time-frequency resolution
        # Use a smaller kernel and stride and remove the initial max-pool to avoid excessive downsampling
        self.resnet.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.resnet.maxpool = nn.Identity()
        self.resnet.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.7),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.resnet(x)

# --- File Collection Helper --- (Modified to collect audio paths)
def collect_files_from_csv_or_folders(data_path, use_audio_files=True):
    """
    Reads specified feature columns and the 'LABEL' column from DATASET-balanced.csv.
    If use_audio_files is True, it attempts to find and return audio file paths.
    Returns a list of (feature_vector, label) pairs or (audio_path, label) pairs.
    """
    all_data = []

    feature_columns = [
        'chroma_stft', 'rms', 'spectral_centroid', 'spectral_bandwidth', 'rolloff', 'zero_crossing_rate',
    ] + [f'mfcc{i}' for i in range(1, 21)]

    # Try multiple possible CSV locations (Colab and local compatible)
    csv_candidates = [
        os.path.join(data_path, "DATASET-balanced.csv"),  # In data folder
        os.path.join(os.path.dirname(data_path), "DATASET-balanced.csv"),  # Parent folder
    ]

    # Colab-specific paths
    if IS_COLAB:
        csv_candidates.extend([
            "/content/DataSet/KAGGLE/DATASET-balanced.csv",  # Specific Colab path from analysis
            "/content/drive/MyDrive/DataSet/KAGGLE/DATASET-balanced.csv",  # If using Google Drive
        ])
    else:
        # Local Windows paths
        csv_candidates.append(r"C:\Users\Eng Jamal\Downloads\Compressed\DeepFake Voice Recognition\KAGGLE\DATASET-balanced.csv")

    csv_path = None
    for candidate in csv_candidates:
        if os.path.exists(candidate):
            csv_path = candidate
            break

    if csv_path:
        try:
            logger.info("Found CSV file, loading from: %s", csv_path)
            df = pd.read_csv(csv_path)
            logger.info("CSV columns: %s", df.columns.tolist())
            logger.info("CSV shape: %d rows", len(df))

            if use_audio_files:
                if 'FILE_NAME' not in df.columns:
                    logger.error("CSV does not contain 'FILE_NAME' column. Cannot use audio files.")
                    use_audio_files = False # Fallback to features if no file name
                else:
                    audio_base_path = os.path.join(data_path, "AUDIO") # Assuming 'AUDIO' subfolder
                    if not os.path.exists(audio_base_path):
                        audio_base_path = data_path # Fallback if AUDIO not in subfolder
                    logger.info(f"Attempting to find audio files in: {audio_base_path}")

            # Validate required columns exist
            required_cols = feature_columns + ['LABEL']
            if not use_audio_files:
                if not all(col in df.columns for col in required_cols):
                    raise ValueError(f"Missing one or more required columns in CSV for feature-based training. Expected: {required_cols}")

            for _, row in df.iterrows():
                # Handle different label formats (string to int mapping)
                label_val = row['LABEL']
                if isinstance(label_val, (int, float)):
                    label = int(label_val)  # 0=REAL, 1=FAKE
                elif isinstance(label_val, str):
                    label_val_upper = label_val.upper()
                    if 'FAKE' in label_val_upper or label_val_upper == '1' or label_val_upper == 'TRUE':
                        label = 1
                    elif 'REAL' in label_val_upper or label_val_upper == '0' or label_val_upper == 'FALSE':
                        label = 0
                    else:
                        logger.warning(f"Unknown label format: {label_val}. Skipping row.")
                        continue
                else:
                    logger.warning(f"Unknown label type: {type(label_val)}. Skipping row.")
                    continue

                if use_audio_files and 'FILE_NAME' in row and pd.notna(row['FILE_NAME']):
                    audio_file_name = row['FILE_NAME']
                    audio_full_path = os.path.join(audio_base_path, audio_file_name)
                    if os.path.exists(audio_full_path):
                        all_data.append((audio_full_path, label))
                    else:
                        logger.warning(f"Audio file not found: {audio_full_path}. Skipping.")
                elif not use_audio_files:
                    feature_vector = row[feature_columns].values.astype(np.float32)
                    all_data.append((feature_vector, label))
                else:
                    logger.warning(f"Skipping row due to missing audio file name or non-existent audio file: {row['FILE_NAME']}")

            logger.info("Loaded %d items (audio paths or feature vectors) and labels from CSV", len(all_data))
            if len(all_data) > 0:
                return all_data
        except Exception as e:
            logger.error("Error reading CSV file or extracting data: %s. Cannot proceed without data.", str(e))
            raise
    else:
        logger.error("No DATASET-balanced.csv found in expected locations. Cannot proceed without CSV data.")
        raise FileNotFoundError("DATASET-balanced.csv not found.")

    return all_data # Should ideally be reached only if an error occurs above

# --- Training Logic ---
def run_training_session(trial, data_path, is_optuna=True, lr=None, epochs=10):
    # Determine if we should use ResNet (audio files) or MLP (features)
    # For this subtask, we assume we are switching to ResNet
    USE_RESNET_MODEL = True # Set to True to use ResNet with audio files
    if not USE_RESNET_MODEL:
        logger.info("Using MLP model with pre-extracted features.")
        all_data_raw = collect_files_from_csv_or_folders(data_path, use_audio_files=False)
    else:
        logger.info("Using ResNet model with raw audio files.")
        # First, collect audio file paths and labels from CSV
        audio_file_labels = collect_files_from_csv_or_folders(data_path, use_audio_files=True)
        # Then, generate segments from these audio files
        all_data_raw = generate_segments_from_files(audio_file_labels, sample_rate=SAMPLE_RATE, duration=SEGMENT_DURATION, stride=SEGMENT_STRIDE)

    if not all_data_raw:
        raise ValueError(f"No data found. Check your path, CSV, and audio files.")

    # Log label distribution
    label_counts = {}
    for _, label in all_data_raw:
        label_counts[label] = label_counts.get(label, 0) + 1
    logger.info("Label distribution: %s", label_counts)

    # CRITICAL: Split data first to avoid data leakage
    random.shuffle(all_data_raw)
    split = int(0.8 * len(all_data_raw))
    train_data = all_data_raw[:split]
    val_data = all_data_raw[split:]

    logger.info("Data split: %d train samples, %d val samples", len(train_data), len(val_data))

    train_ds = UltimateForensicDataset(train_data, is_train=True, device=device)
    val_ds = UltimateForensicDataset(val_data, is_train=False, device=device)

    logger.info("Creating DataLoaders...")
    # Adjust batch size based on environment
    batch_size = DEFAULT_BATCH_SIZE
    if IS_COLAB:
        # Try to detect GPU memory and adjust batch size
        if torch.cuda.is_available():
            gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
            logger.info("GPU Memory: %.1f GB", gpu_memory_gb)
            if gpu_memory_gb >= 16:  # V100/A100
                batch_size = 32
            elif gpu_memory_gb >= 8:  # T4
                batch_size = 16
            else:
                batch_size = 8

    logger.info("Using batch_size=%d, num_workers=%d, pin_memory=%s",
                batch_size, NUM_WORKERS, USE_PIN_MEMORY)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,
                             num_workers=NUM_WORKERS, pin_memory=USE_PIN_MEMORY)
    val_loader = DataLoader(val_ds, batch_size=batch_size,
                           num_workers=NUM_WORKERS, pin_memory=USE_PIN_MEMORY)
    logger.info("DataLoaders created. Total batches: train=%d, val=%d", len(train_loader), len(val_loader))

    # Test loading one sample to identify any bottlenecks
    logger.info("Testing data loading with first sample...")
    try:
        test_sample, test_label = train_ds[0] # This now correctly unpacks 2 values
        logger.info("First sample loaded successfully. Shape: %s, Label: %s", test_sample.shape, test_label)
        if USE_RESNET_MODEL: # ResNet expects (channels, height, width)
            logger.info("Input features for ResNet: %s", test_sample.shape)
            input_model_type = "ResNet"
            # No need to flatten for ResNet
            model = UltimateResNet().to(device) # Use the new ResNet model
        else: # MLP expects (features,)
            if test_sample.dim() > 1: # If features are 2D (like MFCCs), flatten them for MLP
                logger.warning("Input features are not 1D for MLP. Flattening.")
                test_sample = test_sample.view(-1)
            logger.info("Input size for MLP: %d", test_sample.shape[0])
            input_feature_size = test_sample.shape[0]
            input_model_type = "MLP"
            model = UltimateMLP(input_size=input_feature_size).to(device) # Use the new MLP model
        logger.info(f"Using {input_model_type} model.")

    except Exception as e:
        logger.error("Failed to load first sample: %s", str(e))
        raise

    global current_model
    current_model = model

    if is_optuna:
        lr = trial.suggest_float("lr", 1e-6, 1e-3, log=True)

    optimizer = optim.AdamW(model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
    criterion = nn.BCELoss() # Binary Cross-Entropy Loss for binary classification

    best_acc = 0
    for epoch in range(epochs):
        model.train() # Set model to training mode
        train_loss = 0
        total_batches = len(train_loader)
        logger.info("Epoch %d/%d: Starting training (%d batches)...", epoch + 1, epochs, total_batches)
        for batch_idx, (inputs, labels) in enumerate(train_loader):
            if batch_idx == 0:
                logger.info("First batch loaded successfully, continuing training...")
            elif batch_idx % 500 == 0:
                logger.info("Epoch %d: Processed %d/%d batches (%.1f%%)",
                           epoch + 1, batch_idx, total_batches, 100 * batch_idx / total_batches)

            # For ResNet, inputs should be (Batch, Channels, Height, Width)
            # For MLP, inputs should be (Batch, Features)
            if USE_RESNET_MODEL:
                inputs = inputs.to(device) # Already (Batch, Channels, Height, Width)
            else:
                # Ensure inputs are flattened for MLP if they aren't already 1D per sample
                if inputs.dim() > 2:
                    inputs = inputs.view(inputs.size(0), -1) # Flatten to (Batch, Features)
                elif inputs.dim() == 1: # Case where input is already a single feature vector, but DataLoader might add a batch dim
                    inputs = inputs.unsqueeze(0) # Ensure it's (1, Features)
                inputs = inputs.to(device)

            labels = labels.to(device).view(-1, 1)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        model.eval() # Set model to evaluation mode
        correct, val_loss = 0, 0
        logger.info("Epoch %d: Starting validation (%d batches)...", epoch + 1, len(val_loader))
        with torch.no_grad(): # Disable gradient calculations for validation
            for val_batch_idx, (inputs, labels) in enumerate(val_loader):
                if val_batch_idx == 0:
                    logger.info("First validation batch loaded...")

                if USE_RESNET_MODEL:
                    inputs = inputs.to(device)
                else:
                    # Ensure inputs are flattened for MLP if they aren't already 1D per sample
                    if inputs.dim() > 2:
                        inputs = inputs.view(inputs.size(0), -1)
                    elif inputs.dim() == 1:
                        inputs = inputs.unsqueeze(0)
                    inputs = inputs.to(device)

                labels = labels.to(device).view(-1, 1)
                outputs = model(inputs)
                val_loss += criterion(outputs, labels).item()
                correct += ((outputs > 0.5).float() == labels).sum().item()

        acc = 100 * correct / len(val_ds)
        scheduler.step(val_loss) # Adjust learning rate based on validation loss

        print(f"Epoch {epoch+1:02d} | Val Acc: {acc:.2f}% | LR: {optimizer.param_groups[0]['lr']:.1e}")

        # Save checkpoint after each epoch
        torch.save(model.state_dict(), f"checkpoint_epoch_{epoch+1}.pth")

        if is_optuna:
            trial.report(acc, epoch)
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()
        else:
            if acc >= best_acc:
                best_acc = acc
                torch.save(model.state_dict(), "best_model.pth")

    return acc if is_optuna else best_acc

# --- Execution ---
if __name__ == "__main__":
    # Set up interrupt handler to save model on Ctrl+C
    signal.signal(signal.SIGINT, save_model_on_interrupt)

    print("Starting Optuna Hyperparameter Search...")
    print(f"Number of trials: {OPTUNA_N_TRIALS}")
    print(f"Each trial trains for 5 epochs to find optimal learning rate")
    study = optuna.create_study(direction="maximize")
    study.optimize(lambda t: run_training_session(t, DATA_PATH, is_optuna=True, epochs=5), n_trials=OPTUNA_N_TRIALS)

    print("\n--- Optuna Study Results ---")
    best_lr = study.best_trial.params['lr']
    print(f"Best LR Found: {best_lr:.2e}")

    print("\nStarting Final Training with Best Parameters...")
    final_acc = run_training_session(None, DATA_PATH, is_optuna=False, lr=best_lr, epochs=5)
    print(f"\nFinal Model Saved. Best Validation Accuracy: {final_acc:.2f}%")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 -6.3728232e+00 -1.0248235e+01 -4.2494669e+00 -5.2755132e+00
 -1.7221420e+00 -5.6171598e+00 -9.9598789e+00 -7.7617602e+00
 -6.3805490e+00 -4.8528452e+00]: Invalid file: array([ 3.6647299e-01,  4.1283999e-02,  2.3950974e+03,  2.4967014e+03,
        3.7544880e+03,  6.8998002e-02, -3.3918155e+02,  1.5546649e+02,
       -5.2281063e+01,  4.1197920e+00, -1.3023973e+01,  1.2778051e+01,
       -5.5034199e+00, -1.1286263e+01, -5.6594400e+00, -1.6539711e+01,
       -6.3728232e+00, -1.0248235e+01, -4.2494669e+00, -5.2755132e+00,
       -1.7221420e+00, -5.6171598e+00, -9.9598789e+00, -7.7617602e+00,
       -6.3805490e+00, -4.8528452e+00], dtype=float32)
ERROR:__main__:Error processing audio file [ 3.4675699e-01  3.4212001e-02  1.6987218e+03  2.1867212e+03
  2.5118018e+03  4.5880001e-02 -3.6051089e+02  1.8207094e+02
 -3.9593647e+01 -1.9317734e+01 -1.1833890e+01 -2.5261619e+00
 -8.5193596e+00 -1.1080178e+01 -6.2052398e+00 -9.9788427e+0

ValueError: No data found. Check your path, CSV, and audio files.

# Task
The previous execution failed because `librosa.load` received a NumPy array (feature vector) instead of a file path. This happened because the `collect_files_from_csv_or_folders` function, when set to use audio files, fell back to extracting features from the CSV if it didn't find a `FILE_NAME` column. The traceback suggests that the `DATASET-balanced.csv` file, despite the initial plan's assumption, does not contain a `FILE_NAME` column, leading to this fallback.

To resolve this and correctly implement dual-mode training, I need to:
1.  **Introduce a global `TRAINING_MODE` variable** (e.g., `'csv_features'` or `'audio_features'`) to explicitly control the data source and model type.
2.  **Modify `collect_files_from_csv_or_folders`** to be strict about its mode: if `TRAINING_MODE` is `'audio_features'`, it must find filenames and associated audio paths, raising an error if the `FILE_NAME` column is missing. If `TRAINING_MODE` is `'csv_features'`, it should extract feature vectors.
3.  **Adjust the `audio_base_path` logic** within `collect_files_from_csv_or_folders` to correctly use `DATA_PATH` (which already points to the audio directory).
4.  **Update `run_training_session`** to dynamically select `UltimateMLP` or `UltimateResNet` and call `collect_files_from_csv_or_folders` based on the `TRAINING_MODE`.

Since the current `DATASET-balanced.csv` appears to lack a `FILE_NAME` column, and without access to individual audio files, the 'audio_features' mode will likely fail during data collection. I will implement the strict check and set `TRAINING_MODE` to `'csv_features'` for the next execution to ensure the system can run with the available data (features from CSV). Once confirmed, I will modify it to `'audio_features'` and highlight the need for a CSV with file paths or direct audio file scanning.

**Current Task**:
Modify the training script in cell `cbee40a5` to introduce a global `TRAINING_MODE` variable, update `collect_files_from_csv_or_folders` for strict mode adherence and correct audio path handling, and adapt `run_training_session` to dynamically select the model (`UltimateMLP` for 'csv_features', `UltimateResNet` for 'audio_features') and data loading strategy based on `TRAINING_MODE`. Initially, set `TRAINING_MODE = 'csv_features'` to use existing data. This is an incremental step towards the overall goal of dual-mode training.

## Re-enable Audio Processing Components

### Subtask:
Reintroduce necessary imports for audio processing (torchaudio, librosa) and feature extraction transforms (MFCC, FrequencyMasking, TimeMasking). Also, bring back the UltimateResNet model architecture into the training script.


## Implement Training Mode Configuration

### Subtask:
Introduce a global `TRAINING_MODE` variable, update `collect_files_from_csv_or_folders` for strict mode adherence and correct audio path handling, and adapt `run_training_session` to dynamically select the model (`UltimateMLP` for 'csv_features', `UltimateResNet` for 'audio_features') and data loading strategy based on `TRAINING_MODE`. Initially, set `TRAINING_MODE = 'csv_features'` to use existing data.


**Reasoning**:
First, I need to update the `run_training_session` function to dynamically select the model type and data loading strategy based on the `TRAINING_MODE`. This includes modifying the function signature, removing the `USE_RESNET_MODEL` variable, updating the call to `collect_files_from_csv_or_folders`, and adapting the model instantiation logic.

