In [1]:
import tensorflow as tf
import malaya_speech.augmentation.waveform as augmentation
import malaya_speech
from glob import glob
import random
import numpy as np
import IPython.display as ipd

np.seterr(all='raise')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [2]:
files = glob('../youtube/clean-wav/*.wav')
random.shuffle(files)
len(files)

noises = glob('../noise-44k/noise/*.wav') + glob('../noise-44k/clean-wav/*.wav')
basses = glob('HHDS/Sources/**/*bass.wav', recursive = True)
drums = glob('HHDS/Sources/**/*drums.wav', recursive = True)
others = glob('HHDS/Sources/**/*other.wav', recursive = True)
noises = noises + basses + drums + others
random.shuffle(noises)

In [3]:
def read_wav(f):
    return malaya_speech.load(f, sr = 44100)


def random_sampling(s, length):
    return augmentation.random_sampling(s, sr = 44100, length = length)


def combine_speakers(files, n = 5):
    w_samples = random.sample(files, n)
    w_samples = [
        random_sampling(
            read_wav(f)[0],
            length = min(
                random.randint(20000 // n, 240_000 // n), 100_000 // n
            ),
        )
        for f in w_samples
    ]
    y = [w_samples[0]]
    left = w_samples[0].copy() * random.uniform(0.5, 1.0)
    for i in range(1, n):

        right = w_samples[i].copy() * random.uniform(0.5, 1.0)

        overlap = random.uniform(0.01, 1.25)
        left_len = int(overlap * len(left))

        padded_right = np.pad(right, (left_len, 0))

        if len(left) > len(padded_right):
            padded_right = np.pad(
                padded_right, (0, len(left) - len(padded_right))
            )
        else:
            left = np.pad(left, (0, len(padded_right) - len(left)))

        y.append(padded_right)
        left = left + padded_right
    return left, y


def calc(signal, seed, add_uniform = False):
    random.seed(seed)

    choice = random.randint(0, 12)
    if choice == 0:

        x = augmentation.sox_augment_high(
            signal,
            min_bass_gain = random.randint(25, 50),
            reverberance = random.randint(0, 80),
            hf_damping = 10,
            room_scale = random.randint(0, 50),
            negate = 1,
        )
    if choice == 1:
        x = augmentation.sox_augment_high(
            signal,
            min_bass_gain = random.randint(25, 70),
            reverberance = random.randint(0, 80),
            hf_damping = 10,
            room_scale = random.randint(0, 50),
            negate = 0,
        )
    if choice == 2:
        x = augmentation.sox_augment_low(
            signal,
            min_bass_gain = random.randint(5, 30),
            reverberance = random.randint(0, 80),
            hf_damping = 10,
            room_scale = random.randint(0, 50),
            negate = random.randint(0, 1),
        )
    if choice == 3:
        x = augmentation.sox_augment_combine(
            signal,
            min_bass_gain_high = random.randint(25, 70),
            min_bass_gain_low = random.randint(5, 30),
            reverberance = random.randint(0, 80),
            hf_damping = 10,
            room_scale = random.randint(0, 90),
        )
    if choice == 4:
        x = augmentation.sox_reverb(
            signal,
            reverberance = random.randint(10, 80),
            hf_damping = 10,
            room_scale = random.randint(10, 90),
        )
    if choice > 4:
        x = signal

    if random.random() > 0.7 and add_uniform:
        x = augmentation.add_uniform_noise(
            x, power = random.uniform(0.005, 0.015)
        )

    return x

In [4]:
def parallel(f):
    y = random_sampling(
        read_wav(f)[0], length = random.randint(30000, 100_000)
    )

    n = combine_speakers(noises, random.randint(1, 20))[0]
    seed = random.randint(0, 100_000_000)
    y = calc(y, seed)
    n = calc(n, seed, True)
    combined, noise = augmentation.add_noise(
        y, n, factor = random.uniform(0.1, 0.9), return_noise = True
    )
    return combined, y, noise

In [5]:
r = parallel(files[0])

In [6]:
# sr = 44100
# ipd.Audio(r[0][:10 * sr], rate = sr)

In [7]:
# ipd.Audio(r[1][:10 * sr], rate = sr)

In [8]:
# ipd.Audio(r[2][:10 * sr], rate = sr)

In [9]:
from tqdm import tqdm

results = []
for i in tqdm(range(100)):
    try:
        results.append(parallel(files[i]))
    except:
        pass

100%|██████████| 100/100 [02:32<00:00,  1.53s/it]


In [10]:
import pickle

with open('test-set-noise-reduction.pkl', 'wb') as fopen:
    pickle.dump(results, fopen)