In [47]:
import os
from glob import glob

from joblib import Parallel, delayed
from tqdm import tqdm_notebook as tqdm
import pickle
import pandas as pd

import pumpp
import jams
import numpy as np

## IMPORTANT: define your working path

In [None]:
working = '/Users/manuel/working'

In [50]:
def root(x):
    return os.path.splitext(os.path.basename(x))[0]

AUDIO = jams.util.find_with_extension(working+'/audio/', 'mp3')
ANNOS = jams.util.find_with_extension(working+'/annotations/', 'jams')

print(AUDIO)
print(ANNOS)

# Make sure there are the same number of files
assert len(AUDIO) == len(ANNOS)

# And that they're in agreement
assert all([root(_1) == root(_2) for (_1, _2) in zip(AUDIO, ANNOS)])

['/Users/manuel/working/audio/multisonidos.mp3']
['/Users/manuel/working/annotations/multisonidos.jams']


In [52]:
# Build a pump
sr = 44100
hop_length = 4096

p_feature = pumpp.feature.CQTMag(name='cqt', sr=sr, hop_length=hop_length, log=True, conv='tf', n_octaves=6)
p_chord_tag = pumpp.task.ChordTagTransformer(name='chord_tag', sr=sr, hop_length=hop_length, sparse=True)
p_chord_struct = pumpp.task.ChordTransformer(name='chord_struct', sr=sr, hop_length=hop_length, sparse=True)

pump = pumpp.Pump(p_feature, p_chord_tag, p_chord_struct)

# Save the pump

with open(working+'/chords/pump.pkl', 'wb') as fd:
    pickle.dump(pump, fd)

In [53]:
def convert(aud, jam, pump, outdir):
    
    data = pump.transform(aud, jam)
    
    fname = os.path.extsep.join([root(aud), 'npz'])
    
    np.savez(os.path.join(outdir, fname), **data)

In [54]:
OUTDIR = working+'/chords/pump/'

In [55]:
Parallel(n_jobs=20, verbose=10)(delayed(convert)(aud, jam, pump, OUTDIR) for (aud, jam) in zip(AUDIO, ANNOS));

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.


[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:    8.3s


In [17]:
# Augmented data
from glob import glob

AUDIO_A = sorted(glob('/home/bmcfee/working/chords/augmentation/*.flac'))
ANNOS_A = sorted(glob('/home/bmcfee/working/chords/augmentation/*.jams'))

# Make sure there are the same number of files
assert len(AUDIO_A) == len(ANNOS_A)

# And that they're in agreement
assert all([root(_1) == root(_2) for (_1, _2) in zip(AUDIO_A, ANNOS_A)])

Parallel(n_jobs=20, verbose=10)(delayed(convert)(aud, jam, pump, OUTDIR) for (aud, jam) in zip(AUDIO_A, ANNOS_A));

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.


In [18]:
# Synthetic data
# Augmented data
from glob import glob

AUDIO_S = sorted(glob('/home/bmcfee/working/chords/synthetic//*.flac'))
ANNOS_S = sorted(glob('/home/bmcfee/working/chords/synthetic//*.jamz'))

# Make sure there are the same number of files
assert len(AUDIO_S) == len(ANNOS_S)

# And that they're in agreement
assert all([root(_1) == root(_2) for (_1, _2) in zip(AUDIO_S, ANNOS_S)])


In [19]:
Parallel(n_jobs=20, verbose=10)(delayed(convert)(aud, jam, pump, OUTDIR) for (aud, jam) in zip(AUDIO_S, ANNOS_S));

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.


In [None]:
# Make the artist index

In [44]:
index = pd.Series()

null_artist = 0

for ann in tqdm(ANNOS):
    J = jams.load(ann, validate=False)
    if not J.file_metadata.artist:
        artist = 'artist_{:05d}'.format(null_artist)
        null_artist += 1
    else:
        artist = J.file_metadata.artist
        
    index[root(ann)] = artist

index.to_json('/home/bmcfee/working/chords/artist_index.json')

0it [00:00, ?it/s]


In [45]:
for ann in tqdm(ANNOS):
    J = jams.load(ann, validate=False)
    print('{}: {}'.format(root(ann), len(J.annotations['chord'])))

0it [00:00, ?it/s]
