# Binaural MUSDB18 Synthesis

In [59]:
import os
import json
import random
import numpy as np
import soundfile as sf
import librosa
from IPython.display import display, Audio

### HRIRs

In [8]:
# constants
HRIR_DIR = "../data/D1_HRIR_WAV/44K_16bit/"
SAMPLE_RATE = 44100

In [10]:
# heads = ['D1', 'D2'] + [f'H{i}' for i in range(3, 21)]

In [45]:
# get possible angles in front of the listener
ANGLES = np.concatenate((np.arange(0, 91, 10), np.arange(270, 351, 10))).tolist()

In [34]:
def make_binaural(y, angle):
    '''
    Turn a monophonic signal into a binaural 2-channel signal by
    convolving it with the left and right HRIRs for a given angle
    on the horizontal plane. The elevation for all locations is
    0 degrees.

    Parameters
    ----------
    y : monophonic input signal

    angle : target location of the source along the azimuth
        
    Returns
    -------
    y_binaural : 2-dimensional array with the
                 binaural left and right channels
    '''
    # load HRIR
    hrir_path = os.path.join(HRIR_DIR, f'azi_{angle},0_ele_0,0.wav')
    hrir, sr = sf.read(hrir_path)

    # convolve each channel with mono signal
    left = np.convolve(y, hrir[:, 0])
    right = np.convolve(y, hrir[:, 1])

    # combine into array
    binaural = np.vstack((left, right))
    
    return binaural

### MUSDB18

In [97]:
# constants
DB_TEST_DIR = "../data/musdb18hq/test"
DB_TRAIN_DIR = "../data/musdb18hq/train"

STEMS = ["vocals", "bass", "drums", "other"]

OUTPUT_TEST_DIR = "../data/binaural_musdb18/test"
OUTPUT_TRAIN_DIR = "../data/binaural_musdb18/train"

In [98]:
# create output directories
os.makedirs(OUTPUT_TEST_DIR, exist_ok=True)
os.makedirs(OUTPUT_TRAIN_DIR, exist_ok=True)

In [99]:
# get names of songs
train_songs = [f for f in os.listdir(DB_TRAIN_DIR) if os.path.isdir(os.path.join(DB_TRAIN_DIR, f))]
test_songs = [f for f in os.listdir(DB_TEST_DIR) if os.path.isdir(os.path.join(DB_TEST_DIR, f))]

In [104]:
def process_song(song_dir, output_dir):
    '''
    Turn all of the stems from a song in the MUSDB18 dataset
    into binaural 2-channel signals. The resulting binaural
    mixture is the normalized sum of each binaural stem.
    
    Parameters
    ----------
    song_dir : path to directory with the songs original stems

    output_dir : path to target directory where the binaural
                 stems and mixture should be saved
    '''
    print(f"\nProcessing: {os.path.split(song_dir)[-1]}...")
    # make the output directory, if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # randomly choose one angle for each stem for its
    # binaural placement
    stem_loc = random.sample(ANGLES, k=len(STEMS))
    print(f"Angles: {stem_loc}")

    # initialize mixture
    mixture = None

    for i in range(len(STEMS)):
        # load stem
        # (samples, channels)
        in_file = os.path.join(song_dir, f"{STEMS[i]}.wav")
        orig_stem, sr = sf.read(in_file)

        # check that sample rates match
        if not sr == SAMPLE_RATE:
            raise ValueError("The file has the incorrect sample rate!")

        # convert to mono first
        # (channels, samples)
        mono_stem = librosa.to_mono(orig_stem.T)

        # make binaural
        # (channels, samples)
        binaural_stem = make_binaural(mono_stem, stem_loc[i])

        # save
        # (samples, channels)
        out_file = os.path.join(output_dir, f"{STEMS[i]}.wav")
        sf.write(out_file, binaural_stem.T, SAMPLE_RATE)

        # create mixture by summing stems
        if mixture is None:
            mixture = binaural_stem
        else:
            mixture += binaural_stem

    # normalize mixture to -1/+1
    mixture_norm = mixture / np.max(np.abs(mixture))

    # save mixture
    # (samples, channels)
    out_file = os.path.join(output_dir, "mixture.wav")
    sf.write(out_file, mixture_norm.T, SAMPLE_RATE)

    # save metadata of stem locations
    metadata = dict(zip(STEMS, stem_loc))

    # dump json
    out_file = os.path.join(output_dir, "metadata.json")
    with open(out_file, 'w') as f:
        json.dump(metadata, f)

In [105]:
# song = 'Al James - Schoolboy Facination'
# process_song(os.path.join(DB_TEST_DIR, song), os.path.join(OUTPUT_TEST_DIR, song))

In [106]:
random.seed(25)

# synthesize training data
for song in train_songs:
    input_dir = os.path.join(DB_TRAIN_DIR, song)
    output_dir = os.path.join(OUTPUT_TRAIN_DIR, song)

    process_song(input_dir, output_dir)

# synthesize testing data
for song in test_songs:
    input_dir = os.path.join(DB_TEST_DIR, song)
    output_dir = os.path.join(OUTPUT_TEST_DIR, song)

    process_song(input_dir, output_dir)


Processing: The Districts - Vermont...
Angles: [290, 0, 60, 90]

Processing: BigTroubles - Phantom...
Angles: [320, 10, 80, 340]

Processing: Music Delta - Rockabilly...
Angles: [90, 300, 30, 330]

Processing: Johnny Lokke - Promises & Lies...
Angles: [350, 60, 330, 270]

Processing: Hezekiah Jones - Borrowed Heart...
Angles: [50, 340, 280, 320]

Processing: Steven Clark - Bounty...
Angles: [330, 30, 340, 280]

Processing: Young Griffo - Pennies...
Angles: [300, 280, 60, 50]

Processing: Phre The Eon - Everybody's Falling Apart...
Angles: [310, 20, 340, 290]

Processing: Music Delta - Gospel...
Angles: [350, 20, 330, 40]

Processing: Remember December - C U Next Time...
Angles: [10, 340, 50, 20]

Processing: Actions - South Of The Water...
Angles: [350, 70, 300, 310]

Processing: Jokers, Jacks & Kings - Sea Of Leaves...
Angles: [300, 320, 330, 340]

Processing: Alexander Ross - Velvet Curtain...
Angles: [310, 10, 320, 90]

Processing: Alexander Ross - Goodbye Bolero...
Angles: [50, 10