In [None]:
import os
import pickle
import numpy as np
import tensorflow as tf
import keras as K
import librosa
import sys

import builtins

builtins.tf = tf

In [None]:
project_root = os.path.abspath(os.path.join(os.getcwd(), "../code"))

# Add to Python path if not already there
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from utils import rename_slashes_in_pump_opmap, rename_slashes_in_pump_ops_list

In [None]:
# Set your working directory (adjust to your environment)
working = "ADD_YOUR_WORKING_DIRECTORY_HERE"
split = 0  # choose the appropriate split
epochs = 5  # choose the appropriate epochs the model was trained on

In [None]:
def load_model(model_spec_path, weights_path):
    # Load the serialized model spec
    with open(model_spec_path, "rb") as fd:
        model_spec = pickle.load(fd)
    # Reconstruct the model from the serialized spec
    model = K.utils.deserialize_keras_object(model_spec, safe_mode=False)
    # Load the weights from the saved file
    model.load_weights(weights_path)
    return model


# Set parameters for the model you wish to load (adjust split/epochs as needed)
output_path = os.path.join(working, "model_deep")


model_spec_path = os.path.join(
    output_path, "fold{:02d}_model_{:03d}_epochs.pkl".format(split, epochs)
)
weights_path = os.path.join(
    output_path, "fold{:02d}_weights_{:03d}_epochs.keras".format(split, epochs)
)

# Load and display model summary
model = load_model(model_spec_path, weights_path)
model.summary()

In [None]:
# Load the pump from your saved file
pump_path = os.path.join(working, "chords/pump.pkl")
with open(pump_path, "rb") as fd:
    pump = pickle.load(fd)

# Rename keys in the pump operators
rename_slashes_in_pump_opmap(pump)
rename_slashes_in_pump_ops_list(pump)

In [None]:
def compute_cqt_mag(
    audio_path,
    sr=44100,
    hop_length=4096,
    n_bins=216,
    bins_per_octave=36,
    use_log=True,
):
    # Load the audio
    y, _ = librosa.load(audio_path, sr=sr)
    # Compute the constant-Q transform (CQT)
    cqt = librosa.cqt(
        y,
        sr=sr,
        hop_length=hop_length,
        n_bins=n_bins,
        bins_per_octave=bins_per_octave,
    )
    # Compute magnitude
    cqt_mag = np.abs(cqt)
    # Apply logarithmic compression if needed (as in your original settings)
    if use_log:
        cqt_mag = np.log1p(cqt_mag)
    # Transpose so that time frames come first and add channel dimension: (time, 216, 1)
    cqt_mag = np.expand_dims(cqt_mag.T, axis=-1)
    return cqt_mag


# Example usage: specify your new audio file path here
new_audio_path = "ADD_YOUR_AUDIO_FILE_PATH_HERE"

In [None]:
# Compute the CQT magnitude feature for the new audio
cqt_mag_feature = compute_cqt_mag(new_audio_path)

# Display feature shape (should be (n_frames, 216, 1))
print("Feature shape:", cqt_mag_feature.shape)

In [None]:
# Add a batch dimension: (1, n_frames, 216, 1)
input_data = np.expand_dims(cqt_mag_feature, axis=0)

# Predict using the loaded model (the output is per time frame)
predictions = model.predict(input_data)[0]

# Use the pump's chord_tag transformer to convert predictions to chord labels
chord_annotation = pump["chord_tag"].inverse(predictions)

print("Predicted chord annotation:")
print(chord_annotation)