In [4]:
import tensorflow as tf
import seaborn as sns
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

In [2]:
model = tf.keras.models.load_model('../models/model_cnn.h5')

2023-02-10 09:22:05.927193: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-02-10 09:22:05.927729: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-02-10 09:22:05.927794: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (Student-laptop): /proc/driver/nvidia/version does not exist
2023-02-10 09:22:05.928967: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
def decode_audio(audio_binary):
    # Decode WAV-encoded audio files to `float32` tensors, 
    # normalized to the [-1.0, 1.0] range. Return `float32` audio and a sample rate.
    try :
        audio, _ = tf.audio.decode_wav(contents=audio_binary)
    except :
        return None
    # Since all the data is single channel (mono), drop the `channels`
    # axis from the array.
    return tf.squeeze(audio, axis=-1)

def get_spectrogram(waveform):
    # Zero-padding for an audio waveform with less than 16,000 samples.
    input_len = 16000
    waveform = waveform[:input_len]
    zero_padding = tf.zeros(
        [16000] - tf.shape(waveform),
        dtype=tf.float32)
    # Cast the waveform tensors' dtype to float32.
    waveform = tf.cast(waveform, dtype=tf.float32)
    # Concatenate the waveform with `zero_padding`, which ensures all audio
    # clips are of the same length.
    equal_length = tf.concat([waveform, zero_padding], 0)
    # Convert the waveform to a spectrogram via a STFT.
    spectrogram = tf.signal.stft(
        equal_length, frame_length=255, frame_step=128)
    # Obtain the magnitude of the STFT.
    spectrogram = tf.abs(spectrogram)
    # Add a `channels` dimension, so that the spectrogram can be used
    # as image-like input data with convolution layers (which expect
    # shape (`batch_size`, `height`, `width`, `channels`).
    spectrogram = spectrogram[..., tf.newaxis]
    return spectrogram

In [None]:
COMMANDS = ["yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go", "_background_noise_"]
TARGETS = ["yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go", "unknown"]
map_class_to_id = {'yes':0, 'no':1, 'up':2, 'down':3, 'left':4, 'right':5, 'on':6, 'off':7, 'stop':8, 'go':9, 'unknown':10, '_background_noise_':11}

In [None]:
DATASET_PATH = "full_df.csv"
DATA_PATH = "/home/lagarrueal/voice_commands/data/"

dataset = pd.read_csv('../dataframes/full_df.csv')
test_ds = dataset[dataset['set'] == 'testing']
test_ds = test_ds.reset_index(drop = True)

test_ds['waveform'] = test_ds.apply(lambda row: decode_audio(tf.io.read_file(DATA_PATH + row['label'] + "/" + row['filename'])), axis=1)
test_ds = test_ds.dropna()
test_ds = test_ds.reset_index(drop = True)
test_ds['spectrogram'] = test_ds['waveform'].apply(lambda x: get_spectrogram(x))
test_ds['label'] = test_ds['label'].apply(lambda x: x if x in COMMANDS else "unknown")
test_ds['label'] = test_ds['label'].apply(lambda x: map_class_to_id[x])

del dataset

test_audio = []
test_labels = []

for audio, label in zip(test_ds['spectrogram'], test_ds['label']):
    test_audio.append(tf.convert_to_tensor(audio, dtype=tf.float32))
    test_labels.append(label)

model = tf.keras.models.load_model('model_cnn.h5')

test_audio = np.array([x.numpy() for x in test_audio])
test_labels = np.array(test_labels)

y_pred = np.argmax(model.predict(test_audio), axis=1)
y_true = test_labels

mat_conf = tf.math.confusion_matrix(y_true, y_pred, num_classes=11)
# Normalise
mat_conf_normalized = mat_conf.astype('float') / mat_conf.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(mat_conf, annot=True, fmt='.2f', xticklabels=TARGETS, yticklabels=TARGETS)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.savefig('matconf_normalized.png')