## Mixing Audio - LibriSpeech
This file was used to mix the LibriSpeech corpus with background noise to create a pared dataset.

In [2]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)

import numpy as np
import soundfile as sf
import librosa
import IPython.display as ipd
from sklearn.preprocessing import minmax_scale
from glob import glob

In [13]:
def mix_audio(voice_file, background_file):
    """
    Takes the location of two audio files (a voice file and a background noise file), 
    loads them, and mixes them with a 60:40 ratio. The first file must be shorter 
    than the second.

    Args:
    voice_file: String - The location of the first file.
    background_file: String - The location of the second file.

    Output:
    norm_mix: 1D array - Normalised 60:40 mixture of voice and background_segment.
    voice_sr: Int - The sample rate of the voice file (which the returned background noise also matches).
    background_segment: 1D array - A background noise segment with len = len(voice).
    voice: 1D array - the voice audio.
    """    
    # Load the files
    voice, voice_sr = sf.read(voice_file)
    background, background_sr = sf.read(background_file)

    if voice_sr != background_sr:
        librosa.resample(background, voice_sr)

    # Select a segment of the background track to use
    start_point = np.random.randint(len(background) - len(voice))
    background_segment = background[start_point:start_point+len(voice)]

    # Normalise both sources and mix
    norm_voice = minmax_scale(voice, (-1,1))
    norm_bgs = minmax_scale(background_segment,(-1,1))
    norm_mix = (0.6*norm_voice) + (0.4*norm_bgs)
    
    return norm_mix, voice_sr, background_segment, voice

In [12]:
# Copy the file structure

inputpath = 'C:/Users/Toby/Speech_Data/BG_test/Voice/'
outputpath = 'C:/Users/Toby/Speech_Data/BG_test/Background/'

for dirpath, dirnames, filenames in os.walk(inputpath):
    structure = os.path.join(outputpath, dirpath[len(inputpath):])
    if not os.path.isdir(structure):
        os.mkdir(structure)
    else:
        print("Folder does already exits!")

In [16]:
background_file_list = glob('C:/Users/Toby/Speech_Data/CHiME3/data/audio/16kHz/backgrounds/*.wav')
directory = 'C:/Users/Toby/Speech_Data/BG_test/Voice/'
file_list = glob(directory+'/**/*.flac', recursive=True)
count = 0

# Loop over files
for file in file_list:
    # Work out the name and destination for the new file
    split_path = file.split('Voice')
    new_mix_file_path = split_path[0] + 'Mixed' + split_path[1]
    new_bg_file_path = split_path[0] + 'Background' + split_path[1]
    # Check if file already exists
    if os.path.isfile(new_mix_file_path):
        print('File already exists!')
        count += 1
        continue
    # Select a background file
    background_file = background_file_list[np.random.randint(len(background_file_list))]
    # Mix them
    mixture, sample_rate, background_noise, _ = mix_audio(file, background_file)
    # Save to file
    sf.write(new_mix_file_path, mixture, sample_rate)
    sf.write(new_bg_file_path, background_noise, sample_rate)
    count += 1
    if count % 1000 == 0:
        print('{c} files processed.'.format(c=count))
