In [None]:
import os
import torch
import torchaudio
from torchaudio import save
import glob
from tqdm import tqdm

# Set VAD thread safety
torch.set_num_threads(1)

# Load Silero VAD
model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad')
(get_speech_timestamps, _, read_audio, _, _) = utils

# Read speaker mapping
mapping_file = "../recordings/mapping.txt"
id_to_name = {}
with open(mapping_file, "r") as f:
    for line in f:
        discord_id, name = line.strip().split("=")
        id_to_name[discord_id] = name

print("Speaker ID to Name Mapping:")
for discord_id, name in id_to_name.items():
    print(f"{discord_id} -> {name}")

# Create output directory
os.makedirs("clean_chunks", exist_ok=True)

# Process all audio files
data_root = "../recordings/async"
speaker_audio_dict = {}

# Count all .wav files first to get total length for progress bar
all_wav_files = []
for speaker_folder in os.listdir(data_root):
    full_path = os.path.join(data_root, speaker_folder)
    if os.path.isdir(full_path):
        all_wav_files += glob.glob(os.path.join(full_path, "*.mp3"))

print(f"🧹 Processing {len(all_wav_files)} audio files...")

for wav_file in tqdm(all_wav_files, desc="Processing"):
    try:
        # Extract speaker ID from path
        speaker_id = os.path.basename(os.path.dirname(wav_file)).split("_")[0]
        speaker_name = id_to_name.get(speaker_id, speaker_id)

        # Prepare output path
        output_dir = os.path.join("clean_chunks", speaker_name)
        os.makedirs(output_dir, exist_ok=True)

        # Read and VAD
        wav = read_audio(wav_file, sampling_rate=16000)
        speech_timestamps = get_speech_timestamps(wav, model, return_seconds=True)

        for i, segment in enumerate(speech_timestamps):
            start = int(segment['start'] * 16000)
            end = int(segment['end'] * 16000)
            chunk = wav[start:end]

            if (end - start) / 16000 < 1.0:  # Skip too-short clips
                continue

            base_name = os.path.splitext(os.path.basename(wav_file))[0]
            chunk_filename = f"{base_name}_chunk{i}.wav"
            chunk_path = os.path.join(output_dir, chunk_filename)
            save(chunk_path, chunk.unsqueeze(0), 16000)

            speaker_audio_dict[chunk_path] = speaker_name

    except Exception as e:
        print(f"❌ Error processing {wav_file}: {e}")

print("✅ All files processed!")
print(f"🎙️ Valid speech clips saved: {len(speaker_audio_dict)}")

Using cache found in C:\Users\kflin/.cache\torch\hub\snakers4_silero-vad_master


Speaker ID to Name Mapping:
188496350789369856 -> Ethan
210818185623109638 -> Luke
274340078015217666 -> Keller
293246585674924035 -> Mitch
332042555497644033 -> Seth
🧹 Processing 4052 audio files...


Processing: 100%|██████████| 4052/4052 [09:38<00:00,  7.01it/s] 

✅ All files processed!
🎙️ Valid speech clips saved: 3023



