In [None]:
import os

from joblib import Parallel, delayed
import pickle

import pumpp
import jams
import numpy as np

## IMPORTANT: define your working path

In [None]:
dataset = "ADD_YOUR_DATASET_PATH_HERE"
working = "ADD_YOUR_WORKING_DIR_HERE"

In [None]:
# LOAD TRAIN AND TEST IN THE PUMP
def root(x):
    return os.path.splitext(os.path.basename(x))[0]


AUDIO = jams.util.find_with_extension(dataset + "ADD_WAV_DIRECTORY", "wav")
ANNOS = jams.util.find_with_extension(dataset + "ADD_JAMS_DIRECTORY", "jams")
# Filter AUDIO and ANNOS to only keep files that exist in both folders
common_files = set(map(root, AUDIO)) & set(map(root, ANNOS))

AUDIO = [file for file in AUDIO if root(file) in common_files]
ANNOS = [file for file in ANNOS if root(file) in common_files]

print("Number of audio files:", len(AUDIO))
# Make sure there are the same number of files
assert len(AUDIO) == len(ANNOS)

# And that they're in agreement
assert all([root(_1) == root(_2) for (_1, _2) in zip(AUDIO, ANNOS)])

In [None]:
# Build a pump
sr = 44100
hop_length = 4096

p_feature = pumpp.feature.CQTMag(
    name="cqt", sr=sr, hop_length=hop_length, log=True, conv="tf", n_octaves=6
)
p_chord_tag = pumpp.task.ChordTagTransformer(
    name="chord_tag", sr=sr, hop_length=hop_length, sparse=True
)
p_chord_struct = pumpp.task.ChordTransformer(
    name="chord_struct", sr=sr, hop_length=hop_length, sparse=True
)

pump = pumpp.Pump(p_feature, p_chord_tag, p_chord_struct)

# Save the pump
with open(os.path.join(working, "pump.pkl"), "wb") as fd:
    pickle.dump(pump, fd)

In [None]:
def convert(aud, jam, pump, outdir):
    try:
        data = pump.transform(aud, jam)
    except Exception as e:
        print("Error processing {}: {}".format(aud, e))

    fname = os.path.extsep.join([root(aud), "npz"])

    np.savez(os.path.join(outdir, fname), **data)

In [None]:
OUTDIR = working + "/pump/"
if not os.path.exists(OUTDIR):
    os.makedirs(OUTDIR)

In [None]:
Parallel(n_jobs=20, verbose=10)(
    delayed(convert)(aud, jam, pump, OUTDIR)
    for (aud, jam) in zip(AUDIO, ANNOS)
)