In [104]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
import subprocess
from tqdm import tqdm_notebook
import pprint
import yaml
from multiprocessing import Pool
from IPython.display import clear_output

In [105]:
# path to MSMD dataset
DATA_ROOT_MSMD = '../msmd_aug/msmd_aug'

In [106]:
# obtain all the pieces in the dataset
with open('../msmd/msmd/splits/all_split.yaml', 'r') as f:
    doc = yaml.load(f)

pieces = doc['test']
assert len(pieces) == 100

In [107]:
def getSoundFontLoc(performance, base_loc='/data/mirlab/msmd/salience_sheet_following/soundfonts'):
    """
    Convert performance's name to the corresponding soundfont
    """
    if performance.endswith('acoustic_piano_imis_1'):
        return os.path.join(base_loc, 'acoustic_piano_imis_1' + '.sf2')
    elif performance.endswith('grand-piano-YDP-20160804'):
        return os.path.join(base_loc, 'grand-piano-YDP-20160804' + '.sf2')
    elif performance.endswith('YamahaGrandPiano'):
        return os.path.join(base_loc, 'FluidR3_GM' + '.sf2')
    elif performance.endswith('ElectricPiano'):
        return os.path.join(base_loc, 'FluidR3_GM' + '.sf2')
    else:
        raise ValueError('Error to parse {}'.format(performance))
    
# getSoundFontLoc('/data/mirlab/msmd/msmd_aug/msmd_aug/Traditional__traditioner_af_swenska_folk_dansar.3.18__traditioner_af_swenska_folk_dansar.3.18/performances/Traditional__traditioner_af_swenska_folk_dansar.3.18__traditioner_af_swenska_folk_dansar.3.18_tempo-900_acoustic_piano_imis_1')

In [108]:
def processPiece(piece_idx):
    piece = pieces[piece_idx]
    performances = glob.glob(os.path.join(DATA_ROOT_MSMD, piece, 'performances', '*'))
    print(piece)
    for performance in performances:
        soundfont_file = getSoundFontLoc(performance)
        
        midi_file = glob.glob(os.path.join(performance, '*.midi'))[0]
        
        if not os.path.exists(os.path.join(performance, 'audio')):
            os.mkdir(os.path.join(performance, 'audio'))

        output_file = os.path.join(performance, 'audio', os.path.basename(performance) + '.flac')
        
        # Generate audio file
        subprocess.call(["fluidsynth", '-F', output_file, '-O', 's16', '-T', 'flac', 
                         soundfont_file, midi_file])

In [109]:
with Pool(12) as p:
    list(tqdm_notebook(p.imap(processPiece, range(len(pieces))), total=len(pieces)))

Traditional__traditioner_af_swenska_folk_dansar.1.26__traditioner_af_swenska_folk_dansar.1.26
Traditional__traditioner_af_swenska_folk_dansar.3.14__traditioner_af_swenska_folk_dansar.3.14
BachJS__BWV924a__bach-prelude-bwv924a
BachJS__BWV825__15title-hub
SidwellA__little-toy-lost__little-toy-lost
BachJS__BWV988__bwv-988-v13
Traditional__traditioner_af_swenska_folk_dansar.1.14__traditioner_af_swenska_folk_dansar.1.14
ChopinFF__O9__nocturne_in_b-flat_minor
MozartWA__KV331__KV331_1_2_var1
BachJS__BWVAnh120__BWV-120
SchumannR__O68__schumann-op68-01-melodie
Traditional__traditioner_af_swenska_folk_dansar.3.22__traditioner_af_swenska_folk_dansar.3.22
MussorgskyM__pictures-at-an-exhibition__promenade-3
MussorgskyM__pictures-at-an-exhibition__promenade-5
StraussJJ__O314__blue_danube
MozartWA__KV331__KV331_1_5_var4
BachJS__BWVAnh113__anna-magdalena-03
Traditional__traditioner_af_swenska_folk_dansar.1.12__traditioner_af_swenska_folk_dansar.1.12
BachJS__BWVAnh116__anna-magdalena-07
MussorgskyM__pi