In [3]:
import os
import librosa
import numpy as np
import random
import soundfile as sf
import shutil
from librosa import effects

# ------------ Augmentation Functions ------------
def add_noise(audio, noise_level=0.005):
    noise = np.random.randn(len(audio)) * noise_level
    return audio + noise

def my_time_stretch(audio, rate=1.2):
    if len(audio.shape) > 1:
        audio = librosa.to_mono(audio)
    audio = np.ascontiguousarray(audio)
    return librosa.effects.time_stretch(y=audio, rate=rate)

def pitch_shift(audio, sr, n_steps=None):
    if n_steps is None:
        n_steps = random.randint(-2, 2)
    return effects.pitch_shift(audio, sr=sr, n_steps=n_steps)

def change_volume(audio, gain_range=(-6, 6)):
    gain = random.uniform(*gain_range)
    return audio * (10 ** (gain / 20))

# ------------ Generate 7 Random Augmentations ------------
def generate_augmented_versions(audio, sr, count=7):
    augmented = []
    methods_used = []  # Track which methods were used for each augmentation
    
    for _ in range(count):
        method = random.choice(['noise', 'stretch', 'pitch', 'volume', 'combo1', 'combo2', 'combo3'])
        methods_used.append(method)
        
        if method == 'noise':
            aug = add_noise(audio)
        elif method == 'stretch':
            aug = my_time_stretch(audio)
        elif method == 'pitch':
            aug = pitch_shift(audio, sr)
        elif method == 'volume':
            aug = change_volume(audio)
        elif method == 'combo1':
            aug = pitch_shift(add_noise(audio), sr)
        elif method == 'combo2':
            aug = change_volume(my_time_stretch(audio))
        elif method == 'combo3':
            aug = add_noise(change_volume(audio))
        augmented.append(aug)
    
    return augmented, methods_used

# ------------ Create TXT file for augmented audio ------------
def create_augmented_txt_file(original_txt_path, new_txt_path, augmentation_method):
    try:
        # Read original TXT file
        with open(original_txt_path, 'r') as f:
            content = f.read()
        
        # Add augmentation information to the TXT file
        augmented_content = f"{content}\n\n# Augmentation: {augmentation_method}"
        
        # Write to new TXT file
        with open(new_txt_path, 'w') as f:
            f.write(augmented_content)
            
    except Exception as e:
        print(f"Error processing TXT file {original_txt_path}: {e}")
        # Create a minimal TXT file if the original can't be read
        with open(new_txt_path, 'w') as f:
            f.write(f"# Augmented from: {os.path.basename(original_txt_path)}\n# Augmentation: {augmentation_method}")

# ------------ Paths ------------
input_base_folder = "C:\\Users\\HOME\\Downloads\\Respiratory_Sound_Database\\audio_and_txt_files"
output_base_folder = "C:\\Users\\HOME\\Downloads\\Respiratory_Sound_Database\\audio_and_txt_files\\augmented"

os.makedirs(output_base_folder, exist_ok=True)

# ------------ Process Each File ------------
wav_files = [f for f in os.listdir(input_base_folder) if f.endswith(".wav") and not f.startswith("._")]
txt_files = [f for f in os.listdir(input_base_folder) if f.endswith(".txt") and not f.startswith("._")]

print(f"Found {len(wav_files)} WAV files and {len(txt_files)} TXT files")

for filename in wav_files:
    file_path = os.path.join(input_base_folder, filename)
    base_name = os.path.splitext(filename)[0]
    
    # Check if corresponding TXT file exists
    txt_filename = f"{base_name}.txt"
    txt_file_path = os.path.join(input_base_folder, txt_filename)
    
    # Load original audio
    try:
        audio, sr = librosa.load(file_path, sr=None)
        print(f"Processing: {filename}")
    except Exception as e:
        print(f"Error loading {filename}: {e}")
        continue

    # Save original WAV
    sf.write(os.path.join(output_base_folder, filename), audio, sr)
    print(f"✅ Saved original: {filename}")
    
    # Copy original TXT if it exists
    if os.path.exists(txt_file_path):
        shutil.copy2(txt_file_path, os.path.join(output_base_folder, txt_filename))
        print(f"✅ Copied: {txt_filename}")
    else:
        print(f"⚠️  No TXT file found for: {filename}")

    # Create 7 augmented versions
    augmented_audios, methods_used = generate_augmented_versions(audio, sr, count=7)

    for i, (aug, method) in enumerate(zip(augmented_audios, methods_used)):
        # Save augmented WAV
        new_wav_name = f"{base_name}_aug{i+1}.wav"
        sf.write(os.path.join(output_base_folder, new_wav_name), aug, sr)
        print(f"✅ Saved augmented: {new_wav_name} (Method: {method})")
        
        # Create corresponding TXT file for augmented WAV
        new_txt_name = f"{base_name}_aug{i+1}.txt"
        if os.path.exists(txt_file_path):
            create_augmented_txt_file(
                txt_file_path, 
                os.path.join(output_base_folder, new_txt_name),
                method
            )
            print(f"✅ Created: {new_txt_name}")
        else:
            # Create a basic TXT file if original doesn't exist
            with open(os.path.join(output_base_folder, new_txt_name), 'w') as f:
                f.write(f"# Augmented from: {filename}\n# Augmentation: {method}")
            print(f"✅ Created basic: {new_txt_name}")

print(f"\n🎉 All files created in folder: {output_base_folder}")
print(f"Total WAV files: {len(wav_files) * 8} ({len(wav_files)} originals + {len(wav_files) * 7} augmented)")
print(f"Total TXT files: {len(wav_files) * 8} ({len(wav_files)} originals + {len(wav_files) * 7} augmented)")

Found 3960 WAV files and 3979 TXT files
Processing: 101_1b1_Al_sc_Meditron.wav
✅ Saved original: 101_1b1_Al_sc_Meditron.wav
✅ Copied: 101_1b1_Al_sc_Meditron.txt
✅ Saved augmented: 101_1b1_Al_sc_Meditron_aug1.wav (Method: noise)
✅ Created: 101_1b1_Al_sc_Meditron_aug1.txt
✅ Saved augmented: 101_1b1_Al_sc_Meditron_aug2.wav (Method: combo2)
✅ Created: 101_1b1_Al_sc_Meditron_aug2.txt
✅ Saved augmented: 101_1b1_Al_sc_Meditron_aug3.wav (Method: combo1)
✅ Created: 101_1b1_Al_sc_Meditron_aug3.txt
✅ Saved augmented: 101_1b1_Al_sc_Meditron_aug4.wav (Method: stretch)
✅ Created: 101_1b1_Al_sc_Meditron_aug4.txt
✅ Saved augmented: 101_1b1_Al_sc_Meditron_aug5.wav (Method: pitch)
✅ Created: 101_1b1_Al_sc_Meditron_aug5.txt
✅ Saved augmented: 101_1b1_Al_sc_Meditron_aug6.wav (Method: noise)
✅ Created: 101_1b1_Al_sc_Meditron_aug6.txt
✅ Saved augmented: 101_1b1_Al_sc_Meditron_aug7.wav (Method: stretch)
✅ Created: 101_1b1_Al_sc_Meditron_aug7.txt
Processing: 101_1b1_Al_sc_Meditron_aug1.wav
✅ Saved original: 1

KeyboardInterrupt: 