# Evaluate a trained model

In [1]:
import pandas as pd
import numpy as np
import random
from tqdm import tqdm
import h5py
import os

import tensorflow as tf
import keras
import tensorflow_io as tfio
import tensorflow_probability as tfp
import tensorflow_extra as tfe

import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio

In [2]:
df = pd.read_csv("../../data/dataset_train.csv")
df["fullfilename"] = "../" + df["fullfilename"]

In [3]:
from sklearn.model_selection import train_test_split

df_train, df_val = train_test_split(df, test_size=0.2, random_state=42)

df_train.reset_index(inplace = True, drop = True)
df_val.reset_index(inplace = True, drop = True)

In [4]:
class cfg:
    # random seed
    seed = 42

    # audio clip settings
    sr = 22050
    duration = 15 # the duration of the clips
    
    n_samples = duration*sr
    
    hop_length = 2048 # "stepsize" of the fft for the melspectrograms
    nfft = 4096 # windowsize of the fft for the melspectrograms
    n_mels = 128 # number of mel frequency bins
    fmax = sr/2 # maximum frequency in the melspectrograms
    input_dim = (n_mels, int(duration*sr//hop_length + 1))
    
    # training settings
    batch_size = 32
    n_epochs = 50
    
    # class labels/names
    names = list(np.unique(df.en))

In [5]:
from tensorflow.keras.models import load_model

model = load_model('../models/Best_Model_1.keras')
model.summary()

In [6]:
def random_int(shape=[], minval=0, maxval=1):
    return tf.random.uniform(shape=shape, minval=minval, maxval=maxval, dtype=tf.int32)

# Generats random float
def random_float(shape=[], minval=0.0, maxval=1.0):
    rnd = tf.random.uniform(shape=shape, minval=minval, maxval=maxval, dtype=tf.float32)
    return rnd

def load_spectrogram_slice(hdf5_path, name, start_row = 0, end_row =None, start_col = 0, end_col = None):
    with h5py.File(hdf5_path, 'r') as f:
        spectrogram_slice = f[name][start_row:end_row, start_col:end_col]
    return spectrogram_slice

def pad_spectrogram(spec, shape = cfg.input_dim, random = False):
    _ = np.zeros(shape)
    if random:
        rdm = random_int(maxval=shape[1]-spec.shape[1])
        _[:,rdm: rdm + spec.shape[1]] = spec 
    else:
        _[:,:spec.shape[1]] = spec
    return _

def predict_file(df, ID, model):
    name = df.spectrogram.iloc[ID]
    hdf5_path = os.path.dirname(df.fullfilename.iloc[ID]) + "/spectrograms.h5"
    spec_length = df.length_spectrogram.iloc[ID]
    spec = load_spectrogram_slice(hdf5_path, name)
    if spec_length < cfg.input_dim[1]:
        spec = pad_spectrogram(spec, shape = cfg.input_dim, random = True)
        preds = model.predict(np.expand_dims([spec], axis = -1), verbose=0)
        return np.mean(preds, axis = 0) # return mean prediction
    slices = []
    k = 0
    for i in range(spec_length//cfg.input_dim[1]):
        k = i
        slices.append(spec[:,i*cfg.input_dim[1]:(i+1)*cfg.input_dim[1]])
    if spec_length%cfg.input_dim[1]/cfg.input_dim[1] > 5/cfg.duration:
        # consider last slice, only if it is longer than the shortest clips in the dataset 
        slices.append(pad_spectrogram(spec[:, (k+1)*cfg.input_dim[1]:None], random = True))
    
    
    preds = model.predict(np.expand_dims(np.array(slices), axis = -1), verbose=0)
    
    return np.mean(preds, axis = 0) # return mean prediction


In [7]:
predictions_train = []
for j in tqdm(df_train.index):
    predictions_train.append(predict_file(df_train, j, model))

100%|█████████████████████████████████████| 11100/11100 [10:20<00:00, 17.89it/s]


In [8]:
predictions_val = []
for j in tqdm(df_val.index):
    predictions_val.append(predict_file(df_val, j, model))

100%|███████████████████████████████████████| 2776/2776 [02:57<00:00, 15.61it/s]


In [10]:
preds_train = np.array(predictions_train)
preds_class_train = np.argmax(preds_train, axis=1)
one_hot_pred_train = np.zeros_like(preds_train)
one_hot_pred_train[np.arange(len(preds_class_train)), preds_class_train] = 1

In [11]:
preds_val = np.array(predictions_val)
preds_class_val = np.argmax(preds_val, axis=1)
one_hot_pred_val = np.zeros_like(preds_val)
one_hot_pred_val[np.arange(len(preds_class_val)), preds_class_val] = 1

In [12]:
true_label_train = df_train.label
true_label_train = np.array(true_label_train)
from tensorflow.keras.utils import to_categorical
trues_train = to_categorical(true_label_train)

In [13]:
true_label_val = df_val.label
true_label_val = np.array(true_label_val)
from tensorflow.keras.utils import to_categorical
trues_val = to_categorical(true_label_val)

In [14]:
from sklearn.metrics import confusion_matrix, classification_report

In [57]:
len(preds_class_val[preds_class_val == true_label_val])/len(preds_class_val)

0.8505043227665706

In [64]:
99/len(df_val)

0.035662824207492796

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=True,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    #tick_marks = np.arange(len(classes))
    #plt.xticks(tick_marks, classes, rotation=45)
    #plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        #plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    #plt.ylabel('True label')
    #plt.xlabel('Predicted label')

In [53]:
cm = confusion_matrix(true_label_val, np.argmax(preds_val, axis = 1), normalize = "true")

plt.imshow(cm, interpolation='nearest', cmap="viridis")
plt.colorbar()
plt.ylabel('True label')
plt.xlabel('Predicted label')

plt.savefig("../../plots/Confusion.pdf")
plt.close()

In [None]:
import itertools
plt.figure(figsize = (30, 30))
plot_confusion_matrix(confusion_mtx, cfg.names,
                          normalize=True,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues)

In [34]:
plt.figure(figsize=(6.5, 4.5))

classnr = 2

_, bins, _ = plt.hist(preds_val[:, classnr][trues[:, classnr] == 1], bins = 10, color = "firebrick", edgecolor = "r", range = [0, 1], density = True,
                          label = f"True class {classnr}", alpha = .8)
plt.hist(preds_val[:, classnr][trues[:, classnr] != 1], bins = bins, color = "royalblue", edgecolor = "b", density = True, label = "Not class 0", alpha = .8)
#plt.yscale("log")
plt.xlabel(f"Probability of being class {classnr}")
plt.ylabel("Counts [a.u.]")
plt.title("Probability distribution", fontsize = 18)
plt.legend()
plt.tight_layout()

NameError: name 'trues' is not defined

<Figure size 650x450 with 0 Axes>

In [None]:
print(len(preds_class[preds_class == true_label])/len(preds_class))

In [None]:
def compute_spec(filepath, sr=cfg.sr, duration=cfg.duration, nfft=cfg.nfft, hop_length=cfg.hop_length, n_mels=cfg.n_mels, fmax=cfg.fmax):
    audio, sr = librosa.load(filepath, sr = sr)
    # randomly pad clip if shorter
    if len(audio) < duration*sr:
        _ = np.zeros(duration*sr)
        rand_idx = np.random.randint(0, duration*sr-len(audio))
        _[rand_idx:rand_idx + len(audio)] = audio
        audio = _
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=nfft, hop_length=hop_length, n_mels=n_mels, fmin = 0, fmax=fmax)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)
    return log_mel_spectrogram

In [None]:
file = "../../data/own_recordings/Vogel_Terasse.mp3"

In [None]:
spec = compute_spec(file)
audio, sr = librosa.load(file, sr = cfg.sr)

In [None]:
fig, ax = plt.subplots(1,1)
librosa.display.specshow(spec, sr=cfg.sr, hop_length=cfg.hop_length,
                         x_axis='time', ax=ax)

In [None]:
Audio(audio, rate = sr)

In [None]:
def predict_spec(spec, model):
    slices = []
    spec_length = spec.shape[1]
    for i in range(spec_length//cfg.input_dim[1]):
        slices.append(spec[:,i*cfg.input_dim[1]:(i+1)*cfg.input_dim[1]])
    if spec_length%cfg.input_dim[1]/cfg.input_dim[1] > 5/cfg.duration:
        # consider last slice, only if it is longer than the shortest clips in the dataset 
        slices.append(pad_spectrogram(spec[:, (i+1)*cfg.input_dim[1]:None], random = True))
    
    preds = model.predict(np.expand_dims(np.array(slices), axis = -1))
    return np.mean(preds, axis = 0) # return mean prediction

In [None]:
np.argsort(predict_spec(spec, model))

In [None]:
cfg.label2name[16]

In [None]:
df[df.label == 4]

In [35]:
df_new = pd.read_csv("../models/Best_Model_1_log.csv")

In [38]:
df_new.head()

Unnamed: 0,epoch,accuracy,loss,val_accuracy,val_loss
0,0,0.079039,3.518565,0.252907,2.641062
1,1,0.298659,2.457076,0.459666,2.014435
2,2,0.402681,2.093218,0.506177,1.788356
3,3,0.464485,1.886614,0.567587,1.581876
4,4,0.510098,1.714994,0.615189,1.413678


In [54]:
fig, ax = plt.subplots()

ax.plot(df_new["epoch"], df_new.val_accuracy, label = "Validation Accuracy", color = "chartreuse")
ax.plot(df_new["epoch"], df_new.accuracy, label = "Accuracy", color = "fuchsia")

ax.set_xlabel("Epoch")
ax.set_ylabel("Accuracy")
ax.legend()

ax.set_xlim(0,42)

fig.savefig("../../plots/acc.pdf")
plt.close()
