In [0]:
pip install -U PyYAML
pip install keras-vis
pip install -I scipy==1.2.* # for imresize function to work properly

In [0]:
%matplotlib inline
import vis
import keras
import tensorflow 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import librosa
import librosa.display
import soundfile as sf

In [0]:
TrainFile = '/...your directory.../fold1_train.csv'
ValFile = '/...your directory.../fold1_evaluate.csv'
sr = 44100 # sampling rate
audio_channels = 1
SampleDuration = 10 # sec

# pre-processing parameters
FreqBins = 128
NumFFTPoints = 2048
HopLength = int(NumFFTPoints/2)
TimeBins = int(np.ceil(SampleDuration*sr/HopLength))

In [0]:
# load filenames and labels

dev_train = pd.read_csv(TrainFile, sep='\t', encoding='ASCII')
dev_val = pd.read_csv(ValFile, sep='\t', encoding='ASCII')
wav_train = dev_train['filename'].tolist()
wav_val = dev_val['filename'].tolist()

# dataframe to categorical
y_train_labels =  dev_train['scene_label'].astype('category').cat.codes.values 
y_val_labels =  dev_val['scene_label'].astype('category').cat.codes.values

ClassNames = np.unique(dev_train['scene_label']) # returns the sorted unique elements of an array
NumClasses = len(ClassNames)

# Converts a class vector (integers) to binary class matrix, e.g. for use with categorical_crossentropy
y_train = keras.utils.to_categorical(y_train_labels, NumClasses, dtype='float32') 
y_val = keras.utils.to_categorical(y_val_labels, NumClasses, dtype='float32') 

In [0]:
# load model
from keras.models import load_model

model = load_model('DCASE2019_1b_mixup.h5') # use appropriate name

In [0]:
# view class names
ClassNames

In [0]:
# look for class-specific wav files
indices = [i for i, s in enumerate(wav_val) if 'street_pedestrian' in s]
index = indices[0] # finds the first wav of the selected class. Set your own index to browse through the available wav files.
wav_val[index]

In [0]:
# log-mel spectrogram, validation. Load one instance per class for visualization. Switch to PCEN if necessary.
Spec_val = np.zeros((1,FreqBins,TimeBins,audio_channels),'float32')

y, fs = sf.read('your directory' + wav_val[index], stop=SampleDuration*sr)
Spec_val[0,:,:,0]= librosa.feature.melspectrogram(y, sr=sr, n_fft=NumFFTPoints, hop_length=HopLength, n_mels=FreqBins, fmin=0.0, fmax=sr/2, htk=True, norm=None)
Spec_val[0,:,:,0] = np.log10(Spec_val[0,:,:,0]+1e-8)

In [0]:
# predict class of given spectrogram
y_pred = model.predict(Spec_val) # or PCEN_val
class_idxs_sorted = np.argsort(y_pred.flatten())[::-1]
topNclass = 3 # the 3 most probable classes
for i, idx in enumerate(class_idxs_sorted[:topNclass]):
    print("Top {} predicted class:     Pr(Class={:18} [index={}])={:5.3f}".format(
          i + 1,ClassNames[idx], idx, y_pred[0,idx]))

In [0]:
#see the predictions / probabilities of each class
print(y_pred)

In [0]:
layer_idx=20 #The layer index within model.layers whose filters need to be visualized. Usually the last layer

In [0]:
# Swap softmax activation with linear if necessary
model.layers[layer_idx].activation = keras.activations.linear

In [0]:
# visualize spectrogram/PCEN
fig,ax = plt.subplots(1)
fig.subplots_adjust(left=0,right=1,bottom=0,top=1)
ax.axis('off')
librosa.display.specshow(Spec_val[0,:,:,0], x_axis='time',
                         y_axis='mel', sr=sr,
                         fmax=sr/2)
ax.axis('off')

In [0]:
#calculate grad-CAM
from vis.visualization import visualize_cam

#penultimate_layer_idx: The pre-layer to layer_idx whose feature maps should be used to compute gradients wrt filter output. If not provided, it is set to the nearest penultimate Conv or Pooling layer.
#penultimate_layer_idx = ... 

class_idx  = class_idxs_sorted[0]
seed_input = Spec_val # or PCEN_val
grad_top  = visualize_cam(model, layer_idx, class_idx, seed_input) # , penultimate_layer_idx = penultimate_layer_idx

In [0]:
grad_top.shape

In [0]:
#visualization of the most probable class
def plot_map(grads):
    fig, axes = plt.subplots(1,2,figsize=(14,5)) # horizontally stacked  
    axes[0].imshow(Spec_val[0,:,:,0], origin='lower')  # or PCEN_val
    axes[1].imshow(Spec_val[0,:,:,0], origin='lower')
    i = axes[1].imshow(grads,cmap="jet",alpha=0.8, origin='lower')
    fig.colorbar(i)
    plt.suptitle("Pr(class={}) = {:5.2f}".format(
                      ClassNames[class_idx],
                      y_pred[0,class_idx]))

plot_map(grad_top)