In [1]:
import math
import torch

from denoiser.data.audio import Audio
from denoiser.data.augmentations import BackgroundNoise, BatchAugmentParameters
from IPython.display import display
from IPython.display import Audio as AudioPlayer

In [18]:
x = Audio("/data/denoising/speech/daps/clean/f10_script1_clean.wav")
x = x.resample(24_000)
bnoise = BackgroundNoise(
    "/data/denoising/noise/records/DEMAND/48k/index.train.json",
    min_snr=-15.0,
    max_snr=5.0,
    p=0.8,
)

In [19]:
AudioPlayer(x.random_excerpt(1.7).waveform.numpy(), rate=x.sample_rate)

In [20]:
excerpt = x.random_excerpt(1)
AudioPlayer(excerpt.waveform.numpy(), rate=excerpt.sample_rate)

In [21]:
bnoise_params = bnoise.sample_augment_parameters(excerpt)
bnoise_params = BatchAugmentParameters([bnoise_params])

In [22]:
print(bnoise_params.clean_loudness)
print(bnoise_params.noise_loudness)
print(bnoise_params.snr)
print(bnoise_params.clean_loudness - bnoise_params.noise_loudness - bnoise_params.snr)
print((math.log(10) / 20 * (bnoise_params.clean_loudness - bnoise_params.noise_loudness - bnoise_params.snr)).exp())
print(bnoise_params.noise.abs().max())


tensor([-20.3694])
tensor([-52.5533])
tensor([-1.2403])
tensor([33.4241])
tensor([46.9035])
tensor(0.0402)


In [23]:
augmented = bnoise.augment(excerpt.waveform, bnoise_params)
AudioPlayer(augmented[0], rate=excerpt.sample_rate)

In [24]:
noise = bnoise_params.noise[0]
print(bnoise_params.snr[0])
AudioPlayer(noise.numpy(), rate=excerpt.sample_rate)

tensor(-1.2403)


In [10]:
from concurrent.futures import ThreadPoolExecutor

with ThreadPoolExecutor(4) as executor:
    futures = [executor.submit(x.random_excerpt, 1) for _ in range(16)]
    excerpts = [future.result() for future in futures]

In [11]:
bnoise_params = [bnoise.sample_augment_parameters(excerpt) for excerpt in excerpts]
bnoise_params = BatchAugmentParameters(bnoise_params)

In [12]:
excerpts_waveforms = torch.stack([excerpt.waveform for excerpt in excerpts])
augmented = bnoise.augment(excerpts_waveforms, bnoise_params)

In [13]:
augmented.shape

torch.Size([16, 1, 24000])

In [14]:
((augmented - excerpts_waveforms).abs() < 1e-8).sum(dim=(1, 2))

tensor([24000,     0,     0,     0,     0,     0,     0, 24000,     0, 24000,
            0,     0,     0,     0, 24000,     0])

In [15]:
for i in range(8):
    print(bnoise_params.clean_loudness[i], bnoise_params.noise_loudness[i], bnoise_params.snr[i])
    print((bnoise_params.clean_loudness[i] - bnoise_params.noise_loudness[i] - bnoise_params.snr[i]), (bnoise_params.clean_loudness[i] - bnoise_params.noise_loudness[i] - bnoise_params.snr[i]).exp())
    display(AudioPlayer(augmented[i], rate=24000))

tensor(-20.3694) tensor(-70.) tensor(3.1060)
tensor(46.5246) tensor(1.6047e+20)


tensor(-20.3694) tensor(-30.8167) tensor(-9.0863)
tensor(19.5336) tensor(3.0432e+08)


tensor(-20.3694) tensor(-28.3574) tensor(2.9739)
tensor(5.0142) tensor(150.5282)


tensor(-20.3694) tensor(-31.5960) tensor(-9.0408)
tensor(20.2674) tensor(6.3390e+08)


tensor(-20.3694) tensor(-29.7731) tensor(-4.9483)
tensor(14.3519) tensor(1709868.1250)


tensor(-20.3694) tensor(-29.5912) tensor(-4.3710)
tensor(13.5927) tensor(800279.4375)


tensor(-20.3694) tensor(-32.4926) tensor(-9.5915)
tensor(21.7146) tensor(2.6949e+09)


tensor(-20.3694) tensor(-70.) tensor(-5.5628)
tensor(55.1934) tensor(9.3366e+23)


In [16]:
for i in range(8):
    print(bnoise_params.noise[i].abs().max())
    display(AudioPlayer(bnoise_params.noise[i], rate=24000))
    print()

tensor(0.)


  scaled = data / normalization_factor * 32767
  return scaled.astype("<h").tobytes(), nchan



tensor(0.1353)



tensor(0.1334)



tensor(0.1361)



tensor(0.1415)



tensor(0.0924)



tensor(0.0941)



tensor(0.)



