In [1]:
import numpy as np
import os
import cPickle
import pandas as pd
import yaml
import wave
import struct
import gc
from scipy.io import wavfile
from scipy.io import savemat
import copy
import patch_label

In [2]:
def backup_wavfile_reader(fpath):
    """Read wav files when scipy wavfile fail to read.
    Args:
        fpath (str): path to the wav file to read
    Returns:
        numpy array: data read from wav file
    """
    f = wave.open(fpath, 'rb')
    res = []
    for i in xrange(f.getnframes()):
        frame = f.readframes(1)
        x = struct.unpack('=h', frame[:2])[0]
        y = struct.unpack('=h', frame[2:])[0]
        res.append([x, y])
    return np.array(res)


In [3]:
def read_mixed_from_files(dpath, dlist, pickle_file=None):
    """Read the mixed track files and return as dictionary
    Args:
        dpath (str): path to the directory "MedleyDB/Audio"
        dlist (list): list of str, each for one mixed track file
    Returns:
        dict: in the format of {song_name(string): song_data(numpy array)}
              song_data two rows n cols. Each row is a channel, each col is a
              time frame.
    """
    res = dict()
    for i in dlist:
        fpath = os.path.join(dpath, i, '{}_MIX.wav'.format(i))
        try:
            data = wavfile.read(fpath)[1].T
        except:
            print "Warning: can't read {}, switch to backup reader". \
                format(fpath)
            data = backup_wavfile_reader(fpath).T
        res[i] = np.float32(data)
    if pickle_file is not None:
        with open(pickle_file, 'w') as f:
            cPickle.dump(res, f)
    return res


In [4]:
def normalize_data(data):
    """Normalize data with respect to each file in place

    For each file, normalize each column using standardization

    Args:
        data (dict): in format of {song_name(string): song_data(numpy array)}
    Returns:
        N/A
    """
    for k in data.keys():
        mean = data[k].mean(axis=1).reshape(2, 1)
        std = data[k].std(axis=1).reshape(2, 1)
        data[k] = np.float32(((data[k] - mean) / std))


In [5]:
def read_activation_confs(path, pickle_file=None):
    """Read the annotation files of activation confidence, return as dictionary
    Args:
        path (string): path to the directory "MedleyDB"
    Returns:
        dict: in the format of {song_name(string): annotation(pandas df)}
    """
    dpath = os.path.join(path, 'Annotations', 'Instrument_Activations',
                         'ACTIVATION_CONF')
    dlist = os.listdir(dpath)
    res = dict()
    for i in dlist:
        fpath = os.path.join(dpath, i)
        annotation = pd.read_csv(fpath, index_col=False)
        k = i[:-20].split('(')[0]
        k = k.translate(None, "'-")
        res[k] = annotation
    if pickle_file is not None:
        with open(pickle_file, 'w') as f:
            cPickle.dump(res, f)
    return res

In [6]:
def read_meta_data(path, pickle_file=None):
    """Read the metadata for instrument info, return as dictionary
    Args:
        path (string): path to the directory "MedleyDB"
    Returns:
        dict: in the format of {song_name(string): instrument_map(dict)}
              instrument_map is of the format eg: {'S01': 'piano'}
    """
    dpath = os.path.join(path, "Audio")
    dlist = os.listdir(dpath)
    res = dict()
    for i in dlist:
        fpath = os.path.join(dpath, i, '{}_METADATA.yaml'.format(i))
        with open(fpath, 'r') as f:
            meta = yaml.load(f)
        instrument = {k: v['instrument'] for k, v in meta['stems'].items()}
        res[i] = instrument
    if pickle_file is not None:
        with open(pickle_file, 'w') as f:
            cPickle.dump(res, f)
    return res

In [7]:
def groupMetaData(meta, instGroup):
    """Match instrument number in annotation with real instrument name in meta.
    Args:
        meta (dict): in the format of {song_name(string): instrument_map(dict)}
                     instrument_map is of the format eg: {'S01': 'piano'}
        instGroup (dict): {instrument: instrumentGroup} eg: {'piano': 'struck'}
    Returns:
        groupedMeta (dict): in the format of
                            {song_name(string): instrument_map(dict)}
    """
    groupedMeta = copy.deepcopy(meta)
    for songName in groupedMeta.keys():
        for stemName in groupedMeta[songName]:
            groupedMeta[songName][stemName] = instGroup[groupedMeta[songName]
                                                        [stemName]]
    return groupedMeta


In [8]:
def match_meta_annotation(meta, annotation):
    """Match instrument number in annotation with real instrument name in meta.

    Note: In the annotation of one mixed track, there can be multiple instances
          of the same instrument, in which case the same column name appears
          multiple times in the pandas df

    Args:
        meta (dict): in the format of {song_name(string): instrument_map(dict)}
                     instrument_map is of the format eg: {'S01': 'piano'}
        annotation (dict): {song_name(string): annotation(pandas df)}
    Returns:
        list: containing all instruments involved, sorted in alphebic order
    """
    assert(len(meta) == len(annotation))
    all_instruments = set()
    for k, v in annotation.items():
        v.rename(columns=meta[k], inplace=True)
        all_instruments.update(v.columns[1:])
    return sorted(list(all_instruments))


In [9]:
def split_music_to_patches(data, annotation, inst_map, label_aggr, length=1,
                           sr=44100, time_window=100.0, binary=False,
                           threshold=None):
    """Split each music file into (length) second patches and label each patch

    Note: for each music file, the last patch that is not long enough is
          abandoned.
          And each patch is raveled to have only one row.

    Args:
        data(dict): the raw input data for each music file
        annotation(dict): annotation for each music file
                          calculated as average confidence in this time period
        inst_map(dict): a dictionary that maps a intrument name to its correct
                        position in the sorted list of all instruments
        label_aggr(function): a function that defines the way labels for each
                              sample chunk is generated, default is np.mean
        length(int): length of each patch, in seconds
        sr (int): sample rate of raw audio
        time_window(float): time windows for average (in milliseconds)
    Returns:
        dict: {'X': np array for X, 'y': np array for y, 'present': np array
                of indicators for whether the instrument is present in the
                track from which the patch is taken}
    """
    res = []
    patch_size = sr * length
    for k, v in data.items():
        for i, e in enumerate(xrange(0, v.shape[1] - patch_size, patch_size)):
            patch = v[:, e:patch_size+e].ravel()
            sub_df = annotation[k][(i * length <= annotation[k].time) &
                                   (annotation[k].time < (i + 1) * length)]
            if label_aggr is not None:
                inst_conf = sub_df.apply(label_aggr, 0).drop('time')
            else:
                inst_conf = patch_label.patch_label(0, length, time_window,
                                                    sub_df, binary,
                                                    threshold).iloc[0]
            label = np.zeros(len(inst_map), dtype='float32')
            is_present = np.zeros(len(inst_map), dtype='float32')
            for j in inst_conf.index:
                temp = inst_conf[j]
                # if there are two columns of the same instrument, take maximum
                if isinstance(temp, pd.Series):
                    temp = temp.max()
                label[inst_map[j]] = temp
                is_present[inst_map[j]] = 1.0
            res.append((patch, label, is_present, k, (i*length, (i+1)*length)))
    X, y, present, song_name, time = zip(*res)
    return {'X': np.array(X), 'y': np.array(y), 'present': np.array(present),
            'song_name': song_name, 'time': np.array(time, dtype='float32')}


In [10]:
in_path = '../../data'
length=1
time_window=100.0
binary=False
threshold=None
out_path=os.curdir
save_size=20
norm_channel=False
label_aggr=None
start_from=0
groupID='Group 4'
#prep_data(in_path, length=1, time_window=100.0, binary=False, threshold=None)

# save parameters for this run
to_write = ['{} = {}'.format(k, v) for k, v in locals().items()]
with open(os.path.join(out_path, 'config.txt'), 'wb') as f:
    f.write('\n'.join(to_write))

# read annotations and match with metadata
anno_pkl = os.path.join(out_path, 'anno_label.pkl')
annotation = read_activation_confs(in_path)
meta = read_meta_data(in_path)

In [11]:
meta

{'MusicDelta_Beethoven': {'S01': 'violin section',
  'S02': 'violin section',
  'S03': 'viola',
  'S04': 'cello',
  'S05': 'double bass',
  'S06': 'flute',
  'S07': 'flute',
  'S08': 'clarinet',
  'S09': 'clarinet',
  'S10': 'oboe',
  'S11': 'oboe',
  'S12': 'bassoon',
  'S13': 'bassoon',
  'S14': 'trumpet',
  'S15': 'trumpet',
  'S16': 'french horn',
  'S17': 'french horn',
  'S18': 'timpani'}}

In [12]:
# group instruments in metadata
instGrouping = pd.read_csv('./instGroup.csv')
groupLookup = dict(zip(instGrouping['Instrument'].values,
                       instGrouping[groupID].values))
meta = groupMetaData(meta, groupLookup)


In [13]:
all_instruments = match_meta_annotation(meta, annotation)
if not os.path.exists(anno_pkl):
    with open(anno_pkl, 'w') as f:
        cPickle.dump(annotation, f)

In [14]:
annotation['MusicDelta_Beethoven'].shape


(590, 19)

In [15]:
annotation

{'MusicDelta_Beethoven':         time  violin  violin   viola   cello  double bass   flute   flute  \
 0     0.0000  0.0900  0.2477  0.0763  0.1025       0.9391  0.0778  0.1190   
 1     0.0464  0.5586  0.6530  0.2931  0.6460       0.9965  0.4534  0.4759   
 2     0.0929  0.9307  0.9034  0.6448  0.9597       0.9998  0.8831  0.8531   
 3     0.1393  0.9907  0.9737  0.8662  0.9957       1.0000  0.9836  0.9711   
 4     0.1858  0.9983  0.9910  0.9474  0.9994       1.0000  0.9975  0.9941   
 5     0.2322  0.9995  0.9959  0.9748  0.9999       1.0000  0.9995  0.9986   
 6     0.2786  0.9998  0.9975  0.9848  1.0000       1.0000  0.9999  0.9996   
 7     0.3251  0.9999  0.9981  0.9887  1.0000       1.0000  1.0000  0.9999   
 8     0.3715  0.9999  0.9981  0.9900  1.0000       1.0000  1.0000  0.9999   
 9     0.4180  0.9999  0.9979  0.9901  1.0000       1.0000  1.0000  1.0000   
 10    0.4644  0.9998  0.9974  0.9895  1.0000       1.0000  1.0000  1.0000   
 11    0.5108  0.9997  0.9967  0.9890  1

In [16]:
# create and save song_instr mapping
song_instr = {}
for k, v in annotation.items():
    song_instr[k] = set(v.columns[1:])
with open(os.path.join(out_path, 'song_instr.pkl'), 'wb') as f:
    cPickle.dump(song_instr, f)

In [17]:
# save all instrument list to file
with open('all_instruments.txt', 'wb') as f:
    f.write('\n'.join(all_instruments))

# get a dictionary mapping all instrument to sorted order
all_instruments_map = {e: i for i, e in enumerate(all_instruments)}
print 'Total number of labels = {}'.format(len(all_instruments))

Total number of labels = 11


In [18]:
# read mixed tracks
dpath = os.path.join(in_path, "Audio")
dlist = sorted(os.listdir(dpath))  # get list of tracks in sorted order
# write the list to file as reference for song_names in data
with open(os.path.join(out_path, 'song_name_list.txt'), 'wb') as f:
    f.write('\n'.join(dlist))

In [24]:
song_name_map

{'MusicDelta_Beethoven': 0}

In [22]:
# get a mapping of song names to their sorted order
song_name_map = {e: i for i, e in enumerate(dlist)}

In [23]:
i=0
tdlist = dlist[i:i+save_size]
data = read_mixed_from_files(dpath, tdlist)
print 'finished reading file'

if norm_channel:
    normalize_data(data)
    print 'finished normalizing data'

finished reading file


In [25]:
(k,v)=data.items()[0]
patched_data = split_music_to_patches({k: v}, annotation,
                                      all_instruments_map,
                                      label_aggr)

In [26]:
patched_data

{'X': array([[  211.,   221.,   222., ..., -4922., -6341., -7588.],
        [-5019., -5997., -7054., ...,  3270.,  2884.,  2609.],
        [ 2635.,  2478.,  2328., ..., -7558., -7166., -6760.],
        ...,
        [ -818.,  -459.,  -204., ...,  -105.,  -118.,  -162.],
        [  532.,   584.,   585., ...,   150.,   147.,   137.],
        [  192.,   182.,   156., ...,    37.,    38.,    39.]],
       dtype=float32),
 'present': array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 

In [25]:
patched_data['present'][26]

11

In [44]:
temp_l = len(patched_data['song_name'])
patched_data['song_name'] = np.array([song_name_map[e] for e in
                                      patched_data['song_name']],
                                     dtype='float32'). \
    reshape(temp_l, 1)
# save patches to file
patches_save_path = os.path.join(out_path, '{}_patched.mat'.
                                 format(k))
if not os.path.exists(patches_save_path):
    savemat(patches_save_path, patched_data)
#del patched_data
print 'finished taking patches of {}'.format(k)


finished taking patches of MusicDelta_Beethoven


In [33]:
# split to x second patches
del data
gc.collect()
print 'finished {} of {}'.format(min(i+save_size, len(dlist)),
                                 len(dlist))

finished 1 of 1


In [34]:
import scipy.io as sio
df=sio.loadmat('MusicDelta_Beethoven_patched.mat')

In [64]:
(k,v)=data.items()[0]

In [69]:
for (i,j) in {k:v}.items():
    print j

[[ 211.  221.  222. ...,    0.    0.    0.]
 [ 323.  337.  343. ...,   -1.    0.    0.]]


In [70]:
v.shape[1]

1207232

In [81]:
patched_data['X'][0] ### NOTE: The length is 88200 as l and r are appended (each of len 1s i.e 44100 samples)

array([  211.,   221.,   222., ..., -4922., -6341., -7588.], dtype=float32)

In [87]:
os.path.join('m','n')

'm/n'

In [88]:
os.sep

'/'

In [None]:
 os.path.join(dpath, i, '{}_METADATA.yaml'.format(i))

In [89]:
dlist = os.listdir('../../data/Audio')

In [90]:
dlist

['MusicDelta_Beethoven']

In [91]:
for i in dlist:
        fpath = os.path.join(dpath, i, '{}_METADATA.yaml'.format(i))

In [92]:
fpath

'../../data/Audio/MusicDelta_Beethoven/MusicDelta_Beethoven_METADATA.yaml'