In [1]:
import os
import librosa 
import librosa.core
import librosa.util
import numpy as np
import scipy as sp
import scipy.io
import soundfile as sf
import matplotlib.pyplot as plt
import pandas as pd
from multiprocessing import Pool, cpu_count
from functools import partial
import tqdm

# Add noise to original dataset

In [2]:
def add_noise(in_path, dataset_name, noise_type, snr):
    
    # generate output file path
    dest_path = in_path.replace(dataset_name, '{:s}_{:s}_{:d}'.format(dataset_name, noise_type, snr))    
    dest_path = dest_path.replace(dest_path.split('.')[-1], 'wav')

    # check if file exists
    if os.path.exists(dest_path):
        return

    # read clean signal
    clean, samplerate = sf.read(in_path)
    clean = np.array(clean)
    
    # read and normalize noise
    noise_path = os.path.join('/nas/home/cborrelli/speech_forensics/dataset/noise', noise_type + '.wav')
    noise_samplerate, noise = scipy.io.wavfile.read(noise_path)
    noise = np.float32(noise) / np.float32(2**(16-1))
    
    # resample if necessary
    if (samplerate < noise_samplerate):
        noise = librosa.resample(noise, noise_samplerate, samplerate)
    else:
        raise Exception('Noise sampling rate is too low.') 
    
    # trim the noise
    if (noise.shape[0] > clean.shape[0]):
        noise = noise[:clean.shape[0]]
    else:
        raise Exception('Noise is too short.') 

    # normalize to 0 dB nominal level
    norm_factor = np.sqrt(np.mean(np.abs(clean)**2) / np.mean(np.abs(noise)**2))
    noise = noise * norm_factor
    
    # apply scaling
    noise_gain = 1 / (10**(snr/20))
    noise_scaled = noise*noise_gain
    
    # mix the two signals
    noisy = 0.5*(clean + noise_scaled)
    
    # save file to disk
    try:
        os.makedirs(os.path.dirname(dest_path))
    except:
        pass
    sp.io.wavfile.write(dest_path, samplerate, noisy)
    
    return

In [3]:
def add_noise_dataset(dataset_csv_path, noise_type, snr):
    orig_db = pd.read_csv(dataset_csv_path)
    in_path_list = orig_db['path'].to_list()
    dataset_name = orig_db['librispeech_folder'].unique()[0]

    f_part = partial(add_noise, dataset_name=dataset_name, noise_type=noise_type, snr=snr)

    pool = Pool(cpu_count())

    for _ in tqdm.tqdm(pool.imap_unordered(f_part, in_path_list), total=len(in_path_list)):
        pass
    pool.close()
    pool.join()
    return

## Run from here

In [4]:
dataset_name = ['dev-clean']

#in_dataset_csv = '../csv/dev-clean.csv'
noise_type = ['train']
snr = [0, 5, 10, 15]

In [None]:
for a in dataset_name:
    in_dataset_csv = '../csv/' + a +'.csv'
    for n in noise_type:
        for s in snr:
            add_noise_dataset(in_dataset_csv, n, s)