# Binaural MUSDB18 Synthesis

Synthesize binaural mixtures and stems from [MUSDB18HQ](https://sigsep.github.io/datasets/musdb.html#musdb18-hq-uncompressed-wav) and the [SADIE II](https://www.york.ac.uk/sadie-project/database.html) database.

In [None]:
import os
import json
import random
import numpy as np
import soundfile as sf
import librosa
from IPython.display import display, Audio
from tqdm import tqdm

### HRIRs

For this dataset, we use the Head-Related Impulse Responses (HRIRs) associated with the Neumann KU100 Binaural Head as documented on the SADIE II website. These measurements correspond to subject D1.

In [None]:
# constants
HRIR_DIR = "../data/D1_HRIR_WAV/44K_16bit/"
SAMPLE_RATE = 44100

In [None]:
# get possible angles in front of the listener for random distribution of instruments
RANDOM_ANGLES = np.concatenate((np.arange(0, 91, 10), np.arange(270, 351, 10))).tolist()

# get standard angles for the traditional placement of instruments
STANDARD_ANGLES = {
    "vocals": np.concatenate((np.arange(0, 16, 1), np.arange(345, 360, 1))).tolist(),
    "drums": np.concatenate((np.arange(0, 16, 1), np.arange(345, 360, 1))).tolist(),
    "bass": np.arange(16, 91, 1).tolist(),
    "other": np.arange(270, 345, 1).tolist()
}

In [None]:
def make_binaural(y, angle):
    '''
    Turn a monophonic signal into a binaural 2-channel signal by
    convolving it with the left and right HRIRs for a given angle
    on the horizontal plane. The elevation for all locations is
    0 degrees.

    Parameters
    ----------
    y : monophonic input signal

    angle : target location of the source along the azimuth
        
    Returns
    -------
    y_binaural : 2-dimensional array with the
                 binaural left and right channels
    '''
    # load HRIR
    hrir_path = os.path.join(HRIR_DIR, f'azi_{angle},0_ele_0,0.wav')
    hrir, sr = sf.read(hrir_path)

    # convolve each channel with mono signal
    left = np.convolve(y, hrir[:, 0])
    right = np.convolve(y, hrir[:, 1])

    # combine into array
    binaural = np.vstack((left, right))
    
    return binaural

### MUSDB18

In [None]:
def process_song(song_dir, output_dir, angle_type="standard"):
    '''
    Turn all of the stems from a song in the MUSDB18 dataset
    into binaural 2-channel signals. The resulting binaural
    mixture is the normalized sum of each binaural stem.
    
    Parameters
    ----------
    song_dir : path to directory with the songs original stems

    output_dir : path to target directory where the binaural
                 stems and mixture should be saved

    angle_type : how the instruments should be placed spatially,
                 either 'standard' or 'random'
    '''
    # make the output directory, if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # set angles
    if angle_type == "standard":
        stem_loc = []
        for stem_name in STEMS:
            # choose one angle which is appropriate for the type of instrument
            stem_loc.append(random.sample(STANDARD_ANGLES[stem_name], k=1)[0])
    elif angle_type == "random":
        # randomly choose one angle for each stem for its
        # binaural placement
        stem_loc = random.sample(RANDOM_ANGLES, k=len(STEMS))
    else:
        raise ValueError("Invalid angle_type")

    # save metadata of stem locations
    metadata = dict(zip(STEMS, stem_loc))
    # print(f"Angles: {metadata}")

    # initialize mixture
    mixture = None

    for i in range(len(STEMS)):
        # load stem
        # (samples, channels)
        in_file = os.path.join(song_dir, f"{STEMS[i]}.wav")
        orig_stem, sr = sf.read(in_file)

        # check that sample rates match
        if not sr == SAMPLE_RATE:
            raise ValueError("The file has the incorrect sample rate!")

        # convert to mono first
        # (channels, samples)
        mono_stem = librosa.to_mono(orig_stem.T)

        # make binaural
        # (channels, samples)
        binaural_stem = make_binaural(mono_stem, stem_loc[i])

        # save
        # (samples, channels)
        out_file = os.path.join(output_dir, f"{STEMS[i]}.wav")
        sf.write(out_file, binaural_stem.T, SAMPLE_RATE)

        # create mixture by summing stems
        if mixture is None:
            mixture = binaural_stem
        else:
            mixture += binaural_stem

    # normalize mixture to -1/+1
    mixture_norm = mixture / np.max(np.abs(mixture))

    # save mixture
    # (samples, channels)
    out_file = os.path.join(output_dir, "mixture.wav")
    sf.write(out_file, mixture_norm.T, SAMPLE_RATE)

    # dump json
    out_file = os.path.join(output_dir, "metadata.json")
    with open(out_file, 'w') as f:
        json.dump(metadata, f)

In [None]:
# constants
DB_TEST_DIR = "../data/musdb18hq/test"
DB_TRAIN_DIR = "../data/musdb18hq/train"

STEMS = ["vocals", "bass", "drums", "other"]

In [None]:
# get names of songs
train_songs = [f for f in os.listdir(DB_TRAIN_DIR) if os.path.isdir(os.path.join(DB_TRAIN_DIR, f))]
test_songs = [f for f in os.listdir(DB_TEST_DIR) if os.path.isdir(os.path.join(DB_TEST_DIR, f))]

#### Standard

In [None]:
OUTPUT_TEST_DIR = "../data/binaural_musdb18/standard/test"
OUTPUT_TRAIN_DIR = "../data/binaural_musdb18/standard/train"

# create output directories
os.makedirs(OUTPUT_TEST_DIR, exist_ok=True)
os.makedirs(OUTPUT_TRAIN_DIR, exist_ok=True)

In [None]:
random.seed(25)

# synthesize training data
print("SYNTHESIZING TRAINING DATA...")
for song in train_songs:
    input_dir = os.path.join(DB_TRAIN_DIR, song)
    output_dir = os.path.join(OUTPUT_TRAIN_DIR, song)

    process_song(input_dir, output_dir)

# synthesize testing data
print("SYNTHESIZING TESTING DATA...")
for song in test_songs:
    input_dir = os.path.join(DB_TEST_DIR, song)
    output_dir = os.path.join(OUTPUT_TEST_DIR, song)

    process_song(input_dir, output_dir)

#### Random

In [None]:
OUTPUT_TEST_DIR = "../data/binaural_musdb18/random/test"
OUTPUT_TRAIN_DIR = "../data/binaural_musdb18/random/train"

# create output directories
os.makedirs(OUTPUT_TEST_DIR, exist_ok=True)
os.makedirs(OUTPUT_TRAIN_DIR, exist_ok=True)

In [None]:
random.seed(64)

# synthesize training data
print("SYNTHESIZING TRAINING DATA...")
for song in tqdm(train_songs):
    input_dir = os.path.join(DB_TRAIN_DIR, song)
    output_dir = os.path.join(OUTPUT_TRAIN_DIR, song)

    process_song(input_dir, output_dir, angle_type='random')

# synthesize testing data
print("SYNTHESIZING TESTING DATA...")
for song in tqdm(test_songs):
    input_dir = os.path.join(DB_TEST_DIR, song)
    output_dir = os.path.join(OUTPUT_TEST_DIR, song)

    process_song(input_dir, output_dir, angle_type='random')

#### Playback

In [None]:
song_name = random.choice(test_songs)
dir_path = os.path.join("../data/binaural_musdb18/standard/test", song_name)
y, sr = sf.read(os.path.join(dir_path, 'mixture.wav'))
with open(os.path.join(dir_path, 'metadata.json')) as f:
    metadata = json.load(f)

In [None]:
print(song_name)
print(metadata)
Audio(y.T, rate=sr)