### Adjust Configuration

If needed, adjust learning configuration in: [src/config.py](src/config.py)

### Create Python Virtual Environment

In [None]:
from pathlib import Path
if not (Path.cwd() / '.venv').exists():
    !python -m venv .venv

### Install packages

In [None]:
%pip install -r requirements.txt

### Check Learning Samples

In [None]:
!python learn/01.check-samples.py

### Download Room Impulse Responses

In [None]:
!python learn/02.download-rirs.py

### Download Background Sounds

In [None]:
!python learn/03.download-background.py

In [None]:

import os
import sys
import math
from tqdm import tqdm
import numpy as np
from scipy.io import wavfile
from src.common import find_samples
from IPython.display import Audio, display
import src.config as cfg
import audiomentations
import random
from scipy.signal import resample

i = 0
for sample in tqdm(find_samples(cfg.SAMPLE_DIR)):
    sample_rate, data = wavfile.read(sample.wav)
    data = data.astype(np.float32) / 32767
    display(Audio(data=data, rate=sample_rate))
    if random.random() < cfg.modifications.resample_probability:
        min_length = 10000
        max_length = 0
        for label in sample.labels:
            if not label.text.lower().startswith('p'):
                continue
            length = label.end - label.begin
            min_length = min(min_length, length)
            max_length = max(max_length, length)
        if max_length == 0:
            max_length = (cfg.MAX_TARGET_LENGTH_SEC + cfg.MIN_TARGET_LENGTH_SEC) / 2
            min_length = max_length
        print(min_length, max_length)
        rate_max = cfg.MAX_TARGET_LENGTH_SEC / max_length
        rate_min = cfg.MIN_TARGET_LENGTH_SEC / min_length
        print(f'Resampling {sample.wav} with rate {rate_min:.2f} - {rate_max:.2f}')
        rate_max = min(cfg.modifications.resample_max_rate, cfg.MAX_TARGET_LENGTH_SEC / max_length)
        rate_min = max(cfg.modifications.resample_min_rate, cfg.MIN_TARGET_LENGTH_SEC / min_length)
        if rate_min <= rate_max:
            rate = random.uniform(rate_min, rate_max)
            new_sample_count = int(math.ceil(data.shape[0] * rate))
            rate = new_sample_count / data.shape[0]
            data = resample(data, new_sample_count)
            for label in sample.labels:
                label.begin *= rate
                label.end *= rate
            print(f'Resampling {sample.wav} with rate {rate_min:.2f} - {rate_max:.2f}: {rate:.2f}')

    tr = audiomentations.Compose([
        audiomentations.AddBackgroundNoise(
            p=cfg.modifications.background_noise_probability,
            min_snr_db=cfg.modifications.background_noise_min_db,
            max_snr_db=cfg.modifications.background_noise_max_db,
            sounds_path='data/esc50',
            noise_rms='relative',
        ),
        audiomentations.AddColorNoise(
            p=cfg.modifications.color_noise_probability,
            min_snr_db=cfg.modifications.color_noise_min_db,
            max_snr_db=cfg.modifications.color_noise_max_db,
        ),
        audiomentations.ApplyImpulseResponse(
            p=cfg.modifications.impulse_response_probability,
            ir_path='data/mit_rirs',
        ),
        audiomentations.LoudnessNormalization(
            p=1.0,
            min_lufs=-22.0,
            max_lufs=-22.0,
        ),
        audiomentations.Gain(
            p=cfg.modifications.gain_probability,
            min_gain_db=cfg.modifications.gain_min_db,
            max_gain_db=cfg.modifications.gain_max_db,
        ),
        audiomentations.Clip(
            p=1.0,
        ),
    ])
    data2 = tr(data, sample_rate)
    display(Audio(data=data2, rate=sample_rate, normalize=False))
    i += 1
    if i == 20: break