In [None]:

import os
import librosa
import numpy as np
import soundfile as sf
import random

# Constants
AUGMENT_TARGET_COUNT = 200  # We want at least 200 samples per minority class
AUGMENT_COUNT_PER_FILE = 10  # Max augmentations per file to avoid redundancy

def augment_randomly(y, sr):
    """Apply a random audio augmentation."""
    choice = random.choice(['time_stretch', 'pitch', 'noise'])
    if choice == 'time_stretch':
        rate = random.uniform(0.8, 1.2)
        return librosa.effects.time_stretch(y, rate)
    elif choice == 'pitch':
        steps = random.randint(-2, 2)
        return librosa.effects.pitch_shift(y, sr, steps)
    elif choice == 'noise':
        noise = np.random.normal(0, 0.005, y.shape)
        return y + noise

def count_audio_files(directory):
    return len([f for f in os.listdir(directory) if f.endswith('.wav')])

def smart_augment_dataset(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    for class_label in os.listdir(input_dir):
        class_path = os.path.join(input_dir, class_label)
        out_class_path = os.path.join(output_dir, class_label)
        os.makedirs(out_class_path, exist_ok=True)

        existing_files = [f for f in os.listdir(class_path) if f.endswith('.wav')]
        n_existing = len(existing_files)

        print(f"Class '{class_label}' has {n_existing} original files.")

        if n_existing >= AUGMENT_TARGET_COUNT:
            for file in existing_files:
                src_path = os.path.join(class_path, file)
                dst_path = os.path.join(out_class_path, file)
                y, sr = librosa.load(src_path, sr=None)
                sf.write(dst_path, y, sr)
            continue

        for file in existing_files:
            src_path = os.path.join(class_path, file)
            dst_path = os.path.join(out_class_path, file)
            y, sr = librosa.load(src_path, sr=None)
            sf.write(dst_path, y, sr)

        needed_augments = AUGMENT_TARGET_COUNT - n_existing
        augments_per_file = max(1, min(AUGMENT_COUNT_PER_FILE, needed_augments // n_existing + 1))

        print(f"Augmenting class '{class_label}' with approx {augments_per_file}x per file")

        for file in existing_files:
            file_path = os.path.join(class_path, file)
            y, sr = librosa.load(file_path, sr=None)

            for i in range(augments_per_file):
                if needed_augments <= 0:
                    break
                y_aug = augment_randomly(y, sr)
                out_name = f"{file.split('.')[0]}_aug{i}.wav"
                out_path = os.path.join(out_class_path, out_name)
                sf.write(out_path, y_aug, sr)
                needed_augments -= 1

        print(f"Finished class '{class_label}' with total ~{count_audio_files(out_class_path)} files.")

# To run:
# smart_augment_dataset("audio_data", "balanced_data")


Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting audioread>=2.1.9 (from librosa)
  Using cached audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting numba>=0.51.0 (from librosa)
  Using cached numba-0.61.2-cp311-cp311-win_amd64.whl.metadata (2.9 kB)
Collecting numpy>=1.22.3 (from librosa)
  Downloading numpy-2.3.1-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting scipy>=1.6.0 (from librosa)
  Using cached scipy-1.16.0-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting scikit-learn>=1.1.0 (from librosa)
  Using cached scikit_learn-1.7.1-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting joblib>=1.0 (from librosa)
  Using cached joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Using cached soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Collecting pooch>=1.1 (from librosa)
  Using cached pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from libros

In [6]:
%pip install numpy==1.23.5 librosa==0.10.1


import tensorflow_hub as hub
import numpy as np
import librosa

yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

def extract_yamnet_embedding(wav_path):
    waveform, sr = librosa.load(wav_path, sr=16000)
    waveform = waveform[:16000 * 10]  # Limit to 10 seconds
    waveform = np.reshape(waveform, [1, -1])
    scores, embeddings, spectrogram = yamnet_model(waveform)
    return np.mean(embeddings.numpy(), axis=0)


Collecting numpy==1.23.5
  Using cached numpy-1.23.5-cp311-cp311-win_amd64.whl.metadata (2.3 kB)
Collecting librosa==0.10.1
  Using cached librosa-0.10.1-py3-none-any.whl.metadata (8.3 kB)
INFO: pip is looking at multiple versions of numba to determine which version is compatible with other requirements. This could take a while.
Collecting numba>=0.51.0 (from librosa==0.10.1)
  Downloading numba-0.61.0-cp311-cp311-win_amd64.whl.metadata (2.8 kB)
  Downloading numba-0.60.0-cp311-cp311-win_amd64.whl.metadata (2.8 kB)
Collecting llvmlite<0.44,>=0.43.0dev0 (from numba>=0.51.0->librosa==0.10.1)
  Downloading llvmlite-0.43.0-cp311-cp311-win_amd64.whl.metadata (4.9 kB)
INFO: pip is looking at multiple versions of scipy to determine which version is compatible with other requirements. This could take a while.
Collecting scipy>=1.2.0 (from librosa==0.10.1)
  Downloading scipy-1.15.3-cp311-cp311-win_amd64.whl.metadata (60 kB)
Downloading numpy-1.23.5-cp311-cp311-win_amd64.whl (14.6 MB)
   ------

  You can safely remove it manually.
  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.19.0 requires numpy<2.2.0,>=1.26.0, but you have numpy 1.23.5 which is incompatible.













