### Evaluate the trained model on audio examples both from training set and outside of it

In [1]:
import os
import librosa
import tensorflow
from tensorflow.keras.models import model_from_json

import numpy as np
import IPython.display as ipd

In [8]:
# load and compile model
model_path = "trained_model/"
model_name = "drumsep_full"

json_file = open(model_path + model_name + ".json", 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
model.load_weights(model_path + model_name + ".h5")
print("Loaded model %s from disk" % model_name)

precision = tensorflow.keras.metrics.Precision()
recall = tensorflow.keras.metrics.Recall()
model.compile(loss=tensorflow.keras.losses.BinaryCrossentropy(), optimizer='adam', metrics=[precision, recall])

Loaded model drumsep_full from disk


In [9]:
# load song from the training set
song, sr = librosa.load("data/musdb18hq/train/Auctioneer - Our Future Faces/mixture.wav")
song = song[750000:950000]  # use only a slice of the song for this demo

In [4]:
def process_song(song, hop_length=512, n_fft=1024, context_size=25):
    """
    Preprocesses one song and creates x-frames with associated y-labels in the target directory
    
    parameters:
        song: (ndarray) audio to be processed
        hop_length, n_fft, context_size: preprocessing parameters pertaining to the STFT spectrograms; make sure 
        they are the same as the ones used in training
    """
    
    mix_spec = np.abs(librosa.stft(song, hop_length=hop_length, n_fft=n_fft))
    
    n_bins, n_frames = mix_spec.shape
    
    frames = []
    
    for i in range(n_frames):
        # container for one image of size n_bins, context_size
        x = np.zeros(shape=(n_bins, context_size))
        
        for j in range(context_size):
            curr_idx = i - context_size//2 + j
            
            # if current index out of range, leave 0s as padding
            if curr_idx < 0:
                continue
            elif curr_idx >= n_frames:
                break
                
            else:
                x[:, j] = mix_spec[:, curr_idx]
        
        frames.append(x)
            
    return np.expand_dims(np.asarray(frames), axis=-1)

In [5]:
def evaluate_song(song, model):
    """
    Convenience method for quick application of the drum separation network.
    
    parameters:
        song: (ndarray) audio to be processed
        model: trained Keras model
    """
    
    song_data = process_song(song)
    ibm = model.predict(song_data)
    ibm = ibm.T
    
    mixspec = librosa.stft(song, hop_length=512, n_fft=1024)
    reconst = librosa.istft(mixspec * ibm, hop_length=512)
    
    return reconst

#### Evaluate songs from train set

In [22]:
ipd.Audio(song, rate=sr)

In [40]:
sepped = evaluate_song(song, model)

In [41]:
ipd.Audio(sepped, rate=sr)

In [44]:
# load the corresponding drum part for comparison
drum, sr = librosa.load("data/musdb18hq/train/Auctioneer - Our Future Faces/drums.wav")
drum = drum[750000:950000]  # use only a slice of the song for this demo
ipd.Audio(drum, rate=sr)

In [45]:
# load another song from the training set
song, sr = librosa.load("data/musdb18hq/train/Fergessen - Back From The Start/mixture.wav")
song = song[750000:950000]  # use only a slice of the song for this demo

In [46]:
ipd.Audio(song, rate=sr)

In [47]:
sepped = evaluate_song(song, model)
ipd.Audio(sepped, rate=sr)

In [48]:
# load the corresponding drum part for comparison
drum, sr = librosa.load("data/musdb18hq/train/Fergessen - Back From The Start/drums.wav")
drum = drum[750000:950000]  # use only a slice of the song for this demo
ipd.Audio(drum, rate=sr)

#### Evaluate song from test set

In [53]:
# load song from the test set
song, sr = librosa.load("data/musdb18hq/test/Detsky Sad - Walkie Talkie/mixture.wav")
song = song[1150000:1350000]  # use only a slice of the song for this demo
ipd.Audio(song, rate=sr)

In [54]:
sepped = evaluate_song(song, model)
ipd.Audio(sepped, rate=sr)

#### Evaluate one of my own songs (outside the dataset)

In [2]:
# load one of my own songs
song, sr = librosa.load("liqueed.wav")
ipd.Audio(song, rate=sr)

In [9]:
sepped = evaluate_song(song, model)
ipd.Audio(sepped, rate=sr)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 4, expecting 3
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Bad argument number for Name: 4, expecting 3
