In [4]:
import os
import librosa
import numpy as np
import torch
from glob import glob
from scipy.signal import butter, filtfilt
from torch.utils.data import DataLoader, TensorDataset


In [5]:
def load_and_normalize(file_path, target_sr=16000):
    """Load a FLAC file, resample to 16kHz, and normalize."""
    audio, sr = librosa.load(file_path, sr=target_sr)
    audio = audio / np.max(np.abs(audio))  # Normalize to [-1, 1]
    return audio


In [6]:
def pad_or_trim(audio, target_length=32000):
    """Pad or trim audio to the target length."""
    if len(audio) < target_length:
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        audio = audio[:target_length]
    return audio


In [7]:
def bandpass_filter(audio, lowcut, highcut, sr, order=5):
    """Apply a bandpass filter to isolate specific frequency ranges."""
    nyquist = 0.5 * sr
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, audio)

# Example: Split into low and high frequency bands
def split_into_bands(audio, sr=16000):
    low_band = bandpass_filter(audio, 20, 2000, sr)
    high_band = bandpass_filter(audio, 2000, 8000, sr)
    return low_band, high_band
def segment_audio(audio, segment_size=8000, hop_size=4000):
    """Segment audio into overlapping chunks."""
    segments = []
    for i in range(0, len(audio) - segment_size + 1, hop_size):
        segments.append(audio[i:i + segment_size])
    return np.array(segments)



In [15]:
def preprocess_dataset(root_dir, target_sr=16000, target_length=32000, split_bands=False, segment=False):
    """Load and preprocess all audio files in the dataset."""
    
    # Update to use os.path.join for compatibility across systems
    flac_files = glob(os.path.join(root_dir, '**', '*.flac'), recursive=True)
    
    # Check if files are found
    if not flac_files:
        print("No FLAC files found. Check the directory path.")
    
    dataset = []
    for file in flac_files:
        audio = load_and_normalize(file, target_sr)
        audio = pad_or_trim(audio, target_length)
        
        if split_bands:
            # Split into frequency bands for multiple generators
            low_band, high_band = split_into_bands(audio, target_sr)
            dataset.append((low_band, high_band))
        elif segment:
            # Segment audio for time-segmented generators
            audio_segments = segment_audio(audio)
            dataset.extend(audio_segments)
        else:
            # Standard single generator processing
            dataset.append(audio)

    return dataset
    

In [19]:
data_dir = 'voice/data/LibriSpeech/dev-clean'
flac_files = glob(os.path.join(data_dir, '**', '*.flac'), recursive=True)
print(flac_files)


[]


In [10]:
def audio_to_tensor(audio_list):
    """Convert the audio list to PyTorch tensors."""
    if isinstance(audio_list[0], tuple):
        # For split bands (tuple of bands)
        low_band_tensors = [torch.tensor(x[0], dtype=torch.float32) for x in audio_list]
        high_band_tensors = [torch.tensor(x[1], dtype=torch.float32) for x in audio_list]
        return torch.stack(low_band_tensors), torch.stack(high_band_tensors)
    else:
        # For standard or segmented audio
        audio_tensors = [torch.tensor(audio, dtype=torch.float32) for audio in audio_list]
        return torch.stack(audio_tensors)


In [11]:
def create_dataloader(audio_tensors, batch_size=32):
    """Create a DataLoader for batch processing."""
    if isinstance(audio_tensors, tuple):
        # If using frequency bands
        dataset = TensorDataset(audio_tensors[0], audio_tensors[1])
    else:
        dataset = TensorDataset(audio_tensors)
        
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [12]:
data_dir = 'voice\data\LibriSpeech\dev-clean'
audio_dataset = preprocess_dataset(data_dir, split_bands=True)  # Using frequency bands

audio_tensors = audio_to_tensor(audio_dataset)


dataloader = create_dataloader(audio_tensors, batch_size=32)


IndexError: list index out of range