In [1]:
import pickle
import pumpp
import numpy as np
import librosa
import os
from glob import glob

import tensorflow as tf
import keras as K
import pescador
import pandas as pd

from sklearn.model_selection import GroupShuffleSplit

2023-06-26 06:54:12.418702: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-26 06:54:13.636053: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda11.3/lib64:/usr/local/cuda11.3/lib64
2023-06-26 06:54:13.636183: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda11.3/lib64:/usr/local/cuda11.3/lib64


In [2]:
SEED = 20170401

In [3]:
DATA_DIR = 'working/chords/pump'

In [4]:
# Reload the pump
with open('working/chords/pump.pkl', 'rb') as fd:
    pump = pickle.load(fd)

In [5]:
# Calculate the number of frames

MAX_SAMPLES = 128

duration = 8.0

pump['cqt'].sr

n_frames = int(librosa.time_to_frames(duration,
                                  sr=pump['cqt'].sr,
                                  hop_length=pump['cqt'].hop_length))

sampler = pump.sampler(MAX_SAMPLES, n_frames, random_state=SEED)

In [6]:
n_frames

86

In [7]:
def data_sampler(fname, sampler):

    data = np.load(fname)
    yield from sampler(data)
    data.close()
    

In [8]:
def data_sampler(fname, sampler):

    data = np.load(fname)
    d2 = dict(data)
    data.close()
    data = d2
    yield from sampler(data)

In [9]:
def data_generator(tracks, sampler, k, batch_size=16, **kwargs):
    
    seeds = []
    for track in tracks:
        fname = os.path.join(DATA_DIR, os.path.extsep.join([track, 'npz']))
        
        seeds.append(pescador.Streamer(data_sampler, fname, sampler))
        
    # Send it all to a mux
    mux = pescador.Mux(seeds, k, **kwargs)
    
    if batch_size == 1:
        return mux
    else:
        return pescador.BufferedStreamer(mux, batch_size)

In [10]:
def data_generator(tracks, sampler, k, batch_size=16, augmentation=False, **kwargs):
    
    seeds = []
    for track in tracks:
        
        fname = os.path.join(DATA_DIR, os.path.extsep.join([track, 'npz']))
        seeds.append(pescador.Streamer(data_sampler, fname, sampler))
        
        if augmentation:
            for fname in sorted(glob(os.path.join(DATA_DIR, '{}.*.npz'.format(track)))):
                seeds.append(pescador.Streamer(data_sampler, fname, sampler))
        
    # Send it all to a mux
    mux = pescador.Mux(seeds, k, **kwargs)
    
    if batch_size == 1:
        return mux
    else:
        return pescador.BufferedStreamer(mux, batch_size)

In [11]:
def wrap(gen):
    
    for batch in gen:
        yield batch[0], list(batch[1:])

# Construct the model

In [12]:
pump.fields

{'cqt/mag': Tensor(shape=(None, 216, 1), dtype=dtype('float32')),
 'chord_tag/chord': Tensor(shape=(None, 1), dtype=<class 'int'>),
 'chord_struct/pitch': Tensor(shape=(None, 12), dtype=<class 'bool'>),
 'chord_struct/root': Tensor(shape=(None, 1), dtype=<class 'int'>),
 'chord_struct/bass': Tensor(shape=(None, 1), dtype=<class 'int'>)}

In [13]:
len(pump['chord_tag'].vocabulary())

170

In [14]:
x = pump.layers()['cqt/mag']

b = K.layers.BatchNormalization()(x)

c0 = K.layers.Convolution2D(1, (5, 5), padding='same', activation='relu',
                            data_format='channels_last')(b)

c1 = K.layers.Convolution2D(36, (1, int(c0.shape[2])), padding='valid', activation='relu',
                            data_format='channels_last')(c0)

r1 = K.layers.Lambda(lambda x: K.backend.squeeze(x, axis=2))(c1)

rs = K.layers.Bidirectional(K.layers.GRU(64,
                                         return_sequences=True))(r1)

# 1: pitch class predictor
pc_p = K.layers.TimeDistributed(K.layers.Dense(pump.fields['chord_struct/pitch'].shape[1], activation='sigmoid'),
                       name='chord_pitch')(rs)

# 2: root predictor
root_p = K.layers.TimeDistributed(K.layers.Dense(13, activation='softmax'),
                         name='chord_root')(rs)

# 3: bass predictor
bass_p = K.layers.TimeDistributed(K.layers.Dense(13, activation='softmax'),
                         name='chord_bass')(rs)

# 4: merge layer
codec = K.layers.concatenate([rs, pc_p, root_p, bass_p])


p0 = K.layers.Dense(len(pump['chord_tag'].vocabulary()), activation='softmax',
                    bias_regularizer=K.regularizers.l2())

tag = K.layers.TimeDistributed(p0, name='chord_tag')(codec)


model = K.models.Model(x, [tag, pc_p, root_p, bass_p])

2023-06-26 06:54:16.729448: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-26 06:54:17.561999: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22293 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:01:00.0, compute capability: 8.6


# Run a train-test split

In [15]:
index = pd.read_json('working/chords/artist_index.json', typ='series')

splitter_tt = GroupShuffleSplit(n_splits=1, random_state=SEED)
for train_, test in splitter_tt.split(index, groups=list(index)):
    idx_train_ = index.iloc[train_]
    idx_test = index.iloc[test]
    splitter_tv = GroupShuffleSplit(n_splits=1, test_size=0.25, random_state=SEED)
    
    for train, val in splitter_tv.split(idx_train_, groups=list(idx_train_)):
        idx_train = idx_train_.iloc[train]
        idx_val = idx_train_.iloc[val]
    
        gen_train = data_generator(idx_train.index, sampler, 1024, augmentation=True,
                                   lam=8, batch_size=32, revive=True, random_state=SEED)
        
        gen_val = data_generator(idx_val.index, sampler, len(idx_val), batch_size=32, revive=True, random_state=SEED)
        

        model.compile(K.optimizers.Adam(),
                      loss={'chord_tag': 'sparse_categorical_crossentropy',
                            'chord_pitch': 'binary_crossentropy',
                            'chord_root': 'sparse_categorical_crossentropy',
                            'chord_bass': 'sparse_categorical_crossentropy'},
                      metrics={'chord_tag': 'sparse_categorical_accuracy'})

        model.fit_generator(wrap(gen_train.tuples('cqt/mag', 
                                              'chord_tag/chord',
                                              'chord_struct/pitch',
                                              'chord_struct/root',
                                              'chord_struct/bass')),
                            512, 100,
                            
                            validation_data=wrap(gen_val.tuples('cqt/mag',
                                                           'chord_tag/chord',
                                                            'chord_struct/pitch',
                                                            'chord_struct/root',
                                                            'chord_struct/bass')),
                            validation_steps=1024,
                            callbacks=[K.callbacks.ModelCheckpoint('working/chords/model_direct_ckpt.pkl',
                                                                   save_best_only=True,
                                                                   verbose=1,
                                                                   monitor='val_chord_tag_loss'),
                                       K.callbacks.ReduceLROnPlateau(monitor='val_chord_tag_loss', patience=5, verbose=1),
                                       K.callbacks.EarlyStopping(monitor='val_chord_tag_loss', patience=15, verbose=1)])



Epoch 1/100
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


2023-06-26 06:54:27.576668: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401
Could not load symbol cublasGetSmCountTarget from libcublas.so.11. Error: /usr/local/cuda-11.3/targets/x86_64-linux/lib/libcublas.so.11: undefined symbol: cublasGetSmCountTarget
2023-06-26 06:54:35.413431: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'

Epoch 1: val_chord_tag_loss improved from inf to 3.37458, saving model to working/chords/model_direct_ckpt.pkl
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'




Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'




INFO:tensorflow:Assets written to: working/chords/model_direct_ckpt.pkl/assets


INFO:tensorflow:Assets written to: working/chords/model_direct_ckpt.pkl/assets


Epoch 2/100
Epoch 2: val_chord_tag_loss did not improve from 3.37458
Epoch 3/100
Epoch 3: val_chord_tag_loss improved from 3.37458 to 3.37229, saving model to working/chords/model_direct_ckpt.pkl
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'




Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'




INFO:tensorflow:Assets written to: working/chords/model_direct_ckpt.pkl/assets


INFO:tensorflow:Assets written to: working/chords/model_direct_ckpt.pkl/assets


Epoch 4/100
Epoch 4: val_chord_tag_loss did not improve from 3.37229
Epoch 5/100
Epoch 5: val_chord_tag_loss did not improve from 3.37229
Epoch 6/100
Epoch 6: val_chord_tag_loss did not improve from 3.37229
Epoch 7/100
Epoch 7: val_chord_tag_loss did not improve from 3.37229
Epoch 8/100
Epoch 8: val_chord_tag_loss did not improve from 3.37229

Epoch 8: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 9/100
Epoch 9: val_chord_tag_loss did not improve from 3.37229
Epoch 10/100
Epoch 10: val_chord_tag_loss did not improve from 3.37229
Epoch 11/100
Epoch 11: val_chord_tag_loss did not improve from 3.37229
Epoch 12/100
Epoch 12: val_chord_tag_loss did not improve from 3.37229
Epoch 13/100
Epoch 13: val_chord_tag_loss did not improve from 3.37229

Epoch 13: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 14/100
Epoch 14: val_chord_tag_loss did not improve from 3.37229
Epoch 15/100
Epoch 15: val_chord_tag_loss did not improve from 3.37229
Epo

In [16]:
model.load_weights('working/chords/model_direct_ckpt.pkl')

2023-06-26 07:03:40.459738: W tensorflow/core/util/tensor_slice_reader.cc:96] Could not open working/chords/model_direct_ckpt.pkl: FAILED_PRECONDITION: working/chords/model_direct_ckpt.pkl; Is a directory: perhaps your file is in a different file format and you need to use a different restore operator?


<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7ff7b5d1b350>

# Diagnostics

In [30]:
import jams.display
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib nbagg
import librosa.display

In [32]:
history = pd.DataFrame.from_dict(model.history.history)
plt.figure()

plt.plot(history['chord_tag_sparse_categorical_accuracy'], label='Training accuracy')
plt.plot(history['val_chord_tag_sparse_categorical_accuracy'], label='Validation accuracy')
plt.legend(loc='best')
plt.savefig('02_accuracy.png')

<IPython.core.display.Javascript object>

In [None]:
history

In [19]:
l1 = model.get_weights()[4]

In [33]:
plt.figure(figsize=2 * np.asarray(l1.shape[2:]))

pi = 0
for f in range(l1.shape[3]):
    for c in range(l1.shape[2]):
        pi += 1
        plt.subplot(l1.shape[3], l1.shape[2], pi)
        librosa.display.specshow(l1[:, :, c, f].T, vmin=l1.min(), vmax=l1.max())
        
        
plt.tight_layout()
plt.savefig('02_unknown.png')

<IPython.core.display.Javascript object>

In [21]:
l2 = model.get_weights()[6].squeeze()
l2 = librosa.util.axis_sort(l2)

In [34]:
plt.figure(figsize=(8,4))
librosa.display.specshow(l2, y_axis='cqt_note', sr=pump.ops[0].sr, bins_per_octave=36)
plt.tight_layout()
plt.savefig('02_cqt_note.png')

<IPython.core.display.Javascript object>

---
# Validation viz

In [23]:
import pandas as pd
import jams

from tqdm import tqdm_notebook as tqdm

from IPython.display import Audio

import jams

import librosa

In [28]:
def score_model(pump, model, idx,
                features='working/chords/pump',
                refs='working/chords/augmentation'):
    
    results = {}
    for item in idx.index:
        jam = jams.load('{}/{}.jams'.format(refs, item), validate=False)
        datum = np.load('{}/{}.npz'.format(features, item))['cqt/mag']
        
        ann = pump.ops[1].inverse(model.predict(datum)[0][0])
        results[item] = jams.eval.chord(jam.annotations['chord', 0], ann)
        
    return pd.DataFrame.from_dict(results, orient='index')

In [29]:
df = score_model(pump, model, idx_test)

FileNotFoundError: [Errno 2] No such file or directory: 'working/chords/augmentation/06_-_Ask_Me_Why.jams'

In [None]:
dfr = df[['thirds', 'triads', 'tetrads', 'root', 'mirex', 'majmin', 'sevenths']]

In [None]:
dfr.describe()

In [None]:
plt.figure()
dfr.boxplot();

In [None]:
dfr.describe().loc['mean']

In [None]:
#F = idx_val.index[99] #F = 'TRDAJDG149E3784BF8'

In [None]:
F = df['mirex'].argmin()

In [None]:
datum = np.load('working/chords/pump/{}.npz'.format(F))

In [None]:
J = jams.load('data/eric_chords/references_v2/{}.jams'.format(F))

In [None]:
ann_true = pump['chord_tag'].inverse(datum['chord_tag/chord'][0])

In [None]:
ann = pump['chord_tag'].inverse(model.predict(datum['cqt/mag'])[0][0])

In [None]:
pd.DataFrame.from_records([jams.eval.chord(J.annotations['chord', 0], ann)]).loc[0]

In [None]:
Audio(filename='data/eric_chords/audio/{}.mp3'.format(F))

In [None]:
plt.figure(figsize=(10, 8))

ax = plt.subplot(2,1,1)
librosa.display.specshow(datum['cqt/mag'][0, :, :, 0].T,
                         sr=pump['cqt'].sr,
                         hop_length=pump['cqt'].hop_length,
                         x_axis='time')

plt.subplot(2,1,2, sharex=ax)
jams.display.display(ann_true, meta=False, label='Reference', alpha=0.5)
jams.display.display(ann, meta=False, label='Estimate', alpha=0.5)
plt.legend(loc='best')
plt.tight_layout()

In [None]:
y, sr = librosa.load('data/eric_chords/audio/{}.mp3'.format(F))

In [None]:
Audio(data=np.vstack([y, jams.sonify.sonify(ann, sr=sr, duration=int(np.ceil(len(y) / sr)))[:len(y)]]),
      rate=sr)