## Preprocessing Audio Files for ECAPA-TDNN Model
The following code provides a step-by-step guide to preprocessing audio files for compatibility with the ECAPA-TDNN model. The main goals are to ensure uniform sampling rates, consistent audio lengths, and properly formatted file paths.

In [None]:
import os
import pandas as pd
import librosa
import numpy as np
import soundfile as sf
from tqdm import tqdm

def preprocess_audio(file_path, target_sr=16000, target_duration=3.0):
    """
    Preprocess audio to match ECAPA-TDNN requirements: resample, pad, truncate.
    Args:
        file_path (str): Path to the audio file.
        target_sr (int): Target sample rate (e.g., 16000 Hz).
        target_duration (float): Target duration in seconds (e.g., 3.0 seconds).
    Returns:
        np.ndarray: Preprocessed audio array.
    """
    # Load the audio file
    audio, sr = librosa.load(file_path, sr=target_sr, mono=True)
    
    # Calculate target length in samples
    target_length = int(target_sr * target_duration)
    
    # Truncate or pad the audio
    if len(audio) > target_length:
        audio = audio[:target_length]  # Truncate
    elif len(audio) < target_length:
        audio = np.pad(audio, (0, target_length - len(audio)))  # Pad with zeros

    return audio


def preprocess_dataset(data_dir, output_dir, label_csv_path, target_sr=16000, target_duration=3.0):
    """
    Preprocess all audio files in a dataset and save them to a new directory.
    Args:
        data_dir (str): Directory containing the input audio files.
        output_dir (str): Directory to save the preprocessed audio files.
        label_csv_path (str): Path to the CSV file with filenames and labels.
        target_sr (int): Target sample rate (e.g., 16000 Hz).
        target_duration (float): Target duration in seconds (e.g., 3.0 seconds).
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Load the CSV file
    labels_df = pd.read_csv(label_csv_path)

    # Initialize a list to track updated file paths
    updated_filenames = []

    # Process each audio file listed in the CSV
    for idx, row in tqdm(labels_df.iterrows(), total=len(labels_df), desc=f"Processing {os.path.basename(label_csv_path)}"):
        file_name = row['filename']
        label = row['label']
        input_path = os.path.join(data_dir, file_name)
        output_path = os.path.join(output_dir, file_name.replace(".flac", ".wav"))  # Change extension to .wav

        # Ensure the output directory exists
        os.makedirs(os.path.dirname(output_path), exist_ok=True)

        # Preprocess the audio
        processed_audio = preprocess_audio(input_path, target_sr=target_sr, target_duration=target_duration)

        # Save the preprocessed audio as WAV using soundfile
        sf.write(output_path, processed_audio, samplerate=target_sr)

        # Record the updated filename
        updated_filenames.append(file_name.replace(".flac", ".wav"))

    # Update the CSV with new file paths
    labels_df['filename'] = updated_filenames
    new_csv_path = os.path.join(output_dir, os.path.basename(label_csv_path))
    labels_df.to_csv(new_csv_path, index=False)

    print(f"Preprocessed audio saved to: {output_dir}")
    print(f"Updated labels CSV saved to: {new_csv_path}")


# Directories for your dataset
base_dir = "/Users/samruddhikale/Desktop/CAIR/OG1/main_data"
output_base_dir = "/Users/samruddhikale/Desktop/CAIR/OG1/Preprocessed_Dataset2"

# Preprocessing each folder
folders = ["train", "eval", "dev"]
for folder in folders:
    input_dir = os.path.join(base_dir, folder)
    output_dir = os.path.join(output_base_dir, folder)
    label_csv_path = os.path.join(base_dir, f"{folder}_labels.csv")

    preprocess_dataset(input_dir, output_dir, label_csv_path)




Processing train_labels.csv:   0%|                    | 0/18386 [00:00<?, ?it/s]

### Single file preprocessing for ecapa tdnn

In [2]:
import os
import librosa
import numpy as np
import soundfile as sf

def preprocess_audio(file_path, output_path, target_sr=16000, target_duration=3.0):
    """
    Preprocess an audio file for compatibility with the ECAPA-TDNN model.
    Args:
        file_path (str): Path to the input audio file.
        output_path (str): Path to save the preprocessed audio file.
        target_sr (int): Target sample rate (e.g., 16000 Hz).
        target_duration (float): Target duration in seconds (e.g., 3.0 seconds).
    """
    # Load the audio file
    audio, sr = librosa.load(file_path, sr=None, mono=True)
    
    # Resample to the target sample rate if necessary
    if sr != target_sr:
        audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
        sr = target_sr
    
    # Adjust the audio duration to match the target duration
    target_length = int(target_sr * target_duration)
    if len(audio) < target_length:
        # Pad with zeros if the audio is too short
        padding = target_length - len(audio)
        audio = np.pad(audio, (0, padding), mode="constant")
    elif len(audio) > target_length:
        # Trim the audio if it is too long
        audio = audio[:target_length]
    
    # Normalize the audio to the range [-1, 1]
    if np.abs(audio).max() > 0:
        audio = audio / np.abs(audio).max()
    
    # Save the preprocessed audio
    sf.write(output_path, audio, samplerate=target_sr)
    
    print(f"Preprocessed audio saved to: {output_path}")

# Example usage
file_path = "/Users/samruddhikale/Downloads/margot-to-trump.wav"  # Replace with the actual path
output_path = "/Users/samruddhikale/Downloads/s2.wav"  # Replace with the desired save path
preprocess_audio(file_path, output_path)

Preprocessed audio saved to: /Users/samruddhikale/Downloads/s2.wav
