# Generate the multilingual noise for MuAViC

In [2]:
import os
import numpy as np
from scipy.io import wavfile
from tqdm import tqdm
import IPython

In [3]:
muavic_root = '/data/sls/scratch/roudi/datasets/muavic/'
langs = ['en', 'ar', 'de', 'el', 'es', 'fr', 'it', 'pt', 'ru']
num_samples = 30
sample_rate = 16_000
min_len = 15*sample_rate

In [4]:
def mix_audio(wav_fns):
    wav_data = [wavfile.read(wav_fn)[1] for wav_fn in wav_fns]
    wav_data_ = []
    min_len = min([len(x) for x in wav_data])
    for item in wav_data:
        wav_data_.append(item[:min_len])
    wav_data = np.stack(wav_data_).mean(axis=0).astype(np.int16)
    return wav_data

# Generate multilingual babble (30 speakers, random langs)

In [5]:
def mix_audio_all(langs, num_samples, min_len):
    lang_to_fns = {lang:[] for lang in langs}
    for lang in langs:
        tsv_fn = os.path.join(muavic_root, 'muavic', lang, 'train.tsv')
        lns = open(tsv_fn).readlines()[1:]
        wav_fns = [(ln.strip().split('\t')[2], int(ln.strip().split('\t')[-1])) for ln in lns]
        wav_fns = list(filter(lambda x: x[1]>min_len, wav_fns))
        indexes = np.random.permutation(len(wav_fns))[:num_samples]
        wav_fns = [wav_fns[i][0] for i in indexes]
        lang_to_fns[lang] = wav_fns
    counter = 0
    multi_fns = []
    while counter < num_samples:
        for lang in langs:
            multi_fns.append(lang_to_fns[lang].pop())
            counter += 1
            if len(multi_fns) == num_samples: 
                break 

    print(len(multi_fns))
    for fn in multi_fns:
        print(fn)
    wav_data = mix_audio(multi_fns)
    return wav_data

In [None]:
multilingual_tsvs = [os.path.join(muavic_root, 'noise', 'tsv', 'babble_multilingual', 'valid.tsv'), 
                     os.path.join(muavic_root, 'noise', 'tsv', 'babble_multilingual', 'test.tsv')]

mixed = mix_audio_all(langs, num_samples=30, min_len=15*sample_rate)
IPython.display.display(IPython.display.Audio(data=mixed, rate=16_000))

output_wav = os.path.join(muavic_root, 'noise', 'babble', 'babble_all_2nd.wav')
wavfile.write(output_wav, sample_rate, mixed)

for tsv in multilingual_tsvs:
    os.makedirs(os.path.dirname(tsv), exist_ok=True)
    with open(tsv, 'w+') as fo:
        fo.write(os.path.abspath(output_wav)+'\n')

The precise list of files used was:
/data/sls/scratch/roudi/datasets/muavic/muavic/en/audio/train/tsFKSfItBoE/00011_0.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/ar/audio/train/AgoTCRmuWiA/AgoTCRmuWiA_0026.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/de/audio/train/6eNxVeS6Cvo/6eNxVeS6Cvo_0037.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/el/audio/train/lGJGgz2OB2o/lGJGgz2OB2o_0069.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/es/audio/train/Az69dGCXG10/Az69dGCXG10_0066.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/fr/audio/train/5OAaHpG1fUI/5OAaHpG1fUI_0048.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/it/audio/train/5E3WKFetiUs/5E3WKFetiUs_0055.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/pt/audio/train/EZ8xfYRkDTQ/EZ8xfYRkDTQ_0054.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/ru/audio/train/N0TkD0BmjV4/N0TkD0BmjV4_0033.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/en/audio/train/hBOVuIIQUFc/00008_3.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/ar/audio/train/40xEYy37DWM/40xEYy37DWM_0028.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/de/audio/train/nPAPM8URceE/nPAPM8URceE_0136.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/el/audio/train/B9HIoXVmdAc/B9HIoXVmdAc_0073.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/es/audio/train/Vg5sfCX8B8I/Vg5sfCX8B8I_0085.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/fr/audio/train/UPc_xK2ZSJs/UPc_xK2ZSJs_0013.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/it/audio/train/WIKqmn61mEY/WIKqmn61mEY_0026.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/pt/audio/train/jl88rYfvR6A/jl88rYfvR6A_0144.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/ru/audio/train/6KVYHOm7a3g/6KVYHOm7a3g_0077.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/en/audio/train/rPh3c8Sa37M/00012_7.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/ar/audio/train/HaPQY8CsT0g/HaPQY8CsT0g_0017.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/de/audio/train/50pno3SGSPc/50pno3SGSPc_0039.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/el/audio/train/yb6SuLhQAy8/yb6SuLhQAy8_0013.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/es/audio/train/u_694JfFUXA/u_694JfFUXA_0031.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/fr/audio/train/EaaXptKfIf0/EaaXptKfIf0_0039.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/it/audio/train/DVrbPt91jgg/DVrbPt91jgg_0014.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/pt/audio/train/paB2pg9pB98/paB2pg9pB98_0067.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/ru/audio/train/WaExIHbpQXs/WaExIHbpQXs_0021.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/en/audio/train/S0hxl03JoA0/00026_5.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/ar/audio/train/mPGJb1GYcjM/mPGJb1GYcjM_0031.wav
/data/sls/scratch/roudi/datasets/muavic/muavic/de/audio/train/V-urvHlqGkE/V-urvHlqGkE_0016.wav