In [1]:
import os

from joblib import Parallel, delayed
import pickle

import pumpp
import jams
import numpy as np

## IMPORTANT: define your working path

In [None]:
dataset = "/Users/theo/School/2/MIR/final-project/datasets/Beethoven_Piano_Sonata_Dataset_v2/"
working = (
    "/Users/theo/School/2/MIR/final-project/FinalProjectMIR/working/jazznet_2"
)

In [None]:
# LOAD TRAIN AND TEST IN THE PUMP
def root(x):
    return os.path.splitext(os.path.basename(x))[0]


AUDIO = jams.util.find_with_extension(dataset + "1_Audio", "wav")
ANNOS = jams.util.find_with_extension(
    dataset + "2_Annotations/ann_audio_chord", "jams"
)
# Filter AUDIO and ANNOS to only keep files that exist in both folders
common_files = set(map(root, AUDIO)) & set(map(root, ANNOS))

AUDIO = [file for file in AUDIO if root(file) in common_files]
ANNOS = [file for file in ANNOS if root(file) in common_files]

# Make sure there are the same number of files
assert len(AUDIO) == len(ANNOS)

# And that they're in agreement
assert all([root(_1) == root(_2) for (_1, _2) in zip(AUDIO, ANNOS)])

In [3]:
# Build a pump
sr = 16000
hop_length = 4096

p_feature = pumpp.feature.CQTMag(
    name="cqt", sr=sr, hop_length=hop_length, log=True, conv="tf", n_octaves=4
)
p_chord_tag = pumpp.task.ChordTagTransformer(
    name="chord_tag", sr=sr, hop_length=hop_length, sparse=True
)
p_chord_struct = pumpp.task.ChordTransformer(
    name="chord_struct", sr=sr, hop_length=hop_length, sparse=True
)

pump = pumpp.Pump(p_feature, p_chord_tag, p_chord_struct)

# Save the pump

with open(os.path.join(working, "pump.pkl"), "wb") as fd:
    pickle.dump(pump, fd)

In [6]:
def convert(aud, jam, pump, outdir):
    try:
        data = pump.transform(aud, jam)
    except Exception as e:
        print("Error processing {}: {}".format(aud, e))

    fname = os.path.extsep.join([root(aud), "npz"])

    np.savez(os.path.join(outdir, fname), **data)

In [7]:
OUTDIR = working + "/beethoven/pump/"

In [9]:
Parallel(n_jobs=20, verbose=10)(
    delayed(convert)(aud, jam, pump, OUTDIR)
    for (aud, jam) in zip(AUDIO, ANNOS)
)

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:    5.8s
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:   10.7s
[Parallel(n_jobs=20)]: Done  21 tasks      | elapsed:   12.0s
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:   13.7s
[Parallel(n_jobs=20)]: Done  45 tasks      | elapsed:   15.4s
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   17.3s
[Parallel(n_jobs=20)]: Done  73 tasks      | elapsed:   18.4s
[Parallel(n_jobs=20)]: Done  88 tasks      | elapsed:   20.4s
[Parallel(n_jobs=20)]: Done 102 out of 128 | elapsed:   21.4s remaining:    5.5s
[Parallel(n_jobs=20)]: Done 115 out of 128 | elapsed:   22.3s remaining:    2.5s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:   23.2s finished


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]