In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../')))

import tensorflow as tf
import numpy as np
import tensorflow_io as tfio
import time

from notebooks.helper_functions import (
    create_spectrogram_features,
    create_spectrogram_patches,
    lite_model_from_file_predicts_dataset,
    get_file_size, 
    convert_bytes,
    evaluate_prediction,
    full_int_model_predict
)


from sklearn.metrics import confusion_matrix

In [4]:
desired_length_of_audio = 48000
sample_rate = 16000

In [6]:
# Take all audio from testing dataset and create spectrograms from them
# We will use spectrograms for models testing
directory = '../../dataset/testing'

x_data = []
y_data = []
spectrogram_creation_times = []
for root, dirs, files in os.walk(directory):
    for file in files:
        full_file_name = os.path.join(root, file)

        if "non_target" in str(full_file_name):
            class_encoded = 0
        elif "target" in str(full_file_name):
            class_encoded = 1

        audio, sr = tf.audio.decode_wav(tf.io.read_file(full_file_name))
        audio = tf.squeeze(audio, axis=-1)
        resampled_audio = tfio.audio.resample(audio, rate_in=48000, rate_out=sample_rate)
        # Prepare log mel spectrogram from audio

        start_time = time.time()
        spectrogram_feature = create_spectrogram_features(resampled_audio, desired_length=48000, sample_rate = 16000)
        end_time = time.time() - start_time
        spectrogram_creation_times.append(end_time)
        x_data.append(spectrogram_feature)
        y_data.append(class_encoded)

# input data should be in numpy array, not in list
x_data_np = np.array(x_data)
y_data_np = np.array(y_data)

2024-10-02 13:38:50.031392: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-10-02 13:38:50.031884: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2024-10-02 13:38:50.170079: I tensorflow_io/core/kernels/cpu_check.cc:128] Your CPU supports instructions that this TensorFlow IO binary was not compiled to use: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
2024-10-02 13:38:50.240903: W tensorflow_io/core/kernels/audio_video_mp3_kernels.cc:271] libmp3lame.so.0 or lame functions are not available




In [8]:
spectrogram_creation_times

[0.08681416511535645,
 0.024813175201416016,
 0.02083563804626465,
 0.022597312927246094,
 0.02030158042907715,
 0.02045297622680664,
 0.018914461135864258,
 0.023183345794677734,
 0.022968292236328125,
 0.023601531982421875,
 0.0199277400970459,
 0.023296356201171875,
 0.02593088150024414,
 0.02510857582092285,
 0.021811723709106445,
 0.023431062698364258,
 0.02404046058654785,
 0.021683216094970703,
 0.020762205123901367,
 0.023074865341186523,
 0.020532608032226562,
 0.01690673828125,
 0.017788171768188477,
 0.01849079132080078,
 0.020221710205078125,
 0.018039226531982422,
 0.02202916145324707,
 0.020884990692138672,
 0.021429777145385742,
 0.019423484802246094,
 0.017507553100585938,
 0.018895864486694336,
 0.023931264877319336,
 0.02218031883239746,
 0.02314019203186035,
 0.02678823471069336,
 0.02536749839782715,
 0.02436351776123047,
 0.024509906768798828,
 0.02455282211303711,
 0.023014545440673828,
 0.024571657180786133,
 0.02535223960876465,
 0.0211181640625,
 0.018918275833

In [11]:
np.mean(spectrogram_creation_times) * 1000

22.18232938734987

In [5]:
len(x_data_np)

1393

In [6]:
for i in x_data_np:
    print(i.shape)
    break

(184, 80, 1)


### CNN

##### CNN initial model in keras format

In [7]:
cnn_initial_model_path = "../spectrogram_models_from_notebooks/cnn/hpo/cnn_mel_spec_baseline.keras"
convert_bytes(get_file_size(cnn_initial_model_path), "KB")

File size: 335.825 Kilobytes


In [16]:
cnn_initial_model = tf.keras.models.load_model(cnn_initial_model_path)
# input data should be in numpy array
y_pred_prob = cnn_initial_model.predict(x_data_np, verbose=0)
y_pred = tf.argmax(y_pred_prob, axis=1).numpy()

# Evaluate
evaluate_prediction(y_data_np, y_pred)

Accuracy: 98.99%
Recall: 99.13%
Precision: 97.84%
F1-score: 98.48%


##### CNN tf Lite model (without any additional quantization techniques)

In [18]:
cnn_tflite_model_path = '../spectrogram_models_from_notebooks/cnn/quantized/cnn_mel_spec.tflite'
convert_bytes(get_file_size(cnn_tflite_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(cnn_tflite_model_path, x_data_np, y_data_np)

File size: 103.875 Kilobytes
Accuracy: 98.99%
Recall: 99.13%
Precision: 97.84%
F1-score: 98.48%


##### CNN tf Lite model + Post Training Dynamic range quantization

In [21]:
cnn_drq_model_path = '../spectrogram_models_from_notebooks/cnn/quantized/cnn_mel_spec_drq.tflite'
convert_bytes(get_file_size(cnn_drq_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(cnn_drq_model_path, x_data_np, y_data_np)

File size: 29.703 Kilobytes
Accuracy: 98.99%
Recall: 99.13%
Precision: 97.84%
F1-score: 98.48%


##### CNN tf Lite model + Float 16 quantization

In [22]:
cnn_float16q_model_path = '../spectrogram_models_from_notebooks/cnn/quantized/cnn_mel_spec_float16q.tflite'
convert_bytes(get_file_size(cnn_float16q_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(cnn_float16q_model_path, x_data_np, y_data_np)

File size: 55.07 Kilobytes
Accuracy: 98.99%
Recall: 99.13%
Precision: 97.84%
F1-score: 98.48%


##### CNN tf Lite model + Float Fallback quantization

In [26]:
cnn_float_fallback_model_path = '../spectrogram_models_from_notebooks/cnn/quantized/cnn_mel_spec_fallback_q.tflite'
convert_bytes(get_file_size(cnn_float_fallback_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(cnn_float_fallback_model_path, x_data_np, y_data_np)

File size: 30.109 Kilobytes
Accuracy: 98.85%
Recall: 98.47%
Precision: 98.04%
F1-score: 98.26%


##### CNN tf Lite model + Full int quantization

In [23]:
cnn_full_int_q_model_path = '../spectrogram_models_from_notebooks/cnn/quantized/cnn_mel_spec_full_int_q.tflite'
convert_bytes(get_file_size(cnn_full_int_q_model_path), "KB")

y_pred = full_int_model_predict(cnn_full_int_q_model_path, x_data_np)
evaluate_prediction(y_data_np, y_pred)

File size: 30.125 Kilobytes
Accuracy: 98.78%
Recall: 98.47%
Precision: 97.83%
F1-score: 98.15%


### SqueezeNet

##### SqueezeNet initial model in keras format

In [12]:
squeezenet_initial_model_path = '../spectrogram_models_from_notebooks/squeezenet/squeezenet_spec_16kHz_baseline.keras'
convert_bytes(get_file_size(squeezenet_initial_model_path), "KB")

File size: 8687.448 Kilobytes


In [29]:
squeezenet_initial_model = tf.keras.models.load_model(squeezenet_initial_model_path)
# input data should be in numpy array
y_pred_prob = squeezenet_initial_model.predict(x_data_np, verbose=0)
y_pred = tf.argmax(y_pred_prob, axis=1).numpy()

# Evaluate
evaluate_prediction(y_data_np, y_pred)

Accuracy: 98.56%
Recall: 99.56%
Precision: 96.20%
F1-score: 97.85%


##### SqueezeNet tf Lite model (without any additional quantization techniques)

In [13]:
squeezenet_tflite_model_path = '../spectrogram_models_from_notebooks/squeezenet/squeezenet_spec_16kHz.tflite'
convert_bytes(get_file_size(squeezenet_tflite_model_path), "KB")
y_pred = lite_model_from_file_predicts_dataset(squeezenet_tflite_model_path, x_data_np, y_data_np)

File size: 2856.07 Kilobytes
Accuracy: 98.56%
Recall: 99.56%
Precision: 96.20%
F1-score: 97.85%


##### SqueezeNet tf Lite model + Post Training Dynamic range quantization

In [31]:
squeezenet_tflite_drq_model_path = '../spectrogram_models_from_notebooks/squeezenet/squeezenet_spec_16kHz_drq.tflite'
convert_bytes(get_file_size(squeezenet_tflite_drq_model_path), "KB")
y_pred = lite_model_from_file_predicts_dataset(squeezenet_tflite_drq_model_path, x_data_np, y_data_np)

File size: 770.258 Kilobytes
Accuracy: 98.56%
Recall: 99.56%
Precision: 96.20%
F1-score: 97.85%


##### SqueezeNet tf Lite model + Float 16 quantization

In [32]:
squeezenet_float16q_model_path = '../spectrogram_models_from_notebooks/squeezenet/squeezenet_spec_16kHz_float16q.tflite'
convert_bytes(get_file_size(squeezenet_float16q_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(squeezenet_float16q_model_path, x_data_np, y_data_np)

File size: 1442.977 Kilobytes
Accuracy: 98.56%
Recall: 99.56%
Precision: 96.20%
F1-score: 97.85%


##### SqueezeNet tf Lite model + Full integer quantization

In [33]:
squeezenet_full_int_q_model_path = '../spectrogram_models_from_notebooks/squeezenet/squeezenet_spec_16kHz_full_int_q.tflite'
convert_bytes(get_file_size(squeezenet_full_int_q_model_path), "KB")

y_pred = full_int_model_predict(squeezenet_full_int_q_model_path, x_data_np)
evaluate_prediction(y_data_np, y_pred)

File size: 807.539 Kilobytes
Accuracy: 98.21%
Recall: 97.82%
Precision: 96.76%
F1-score: 97.29%


### BNN 

##### BNN initial model in keras format

In [46]:
bnn_initial_model_path = '../spectrogram_models_from_notebooks/bnn/hpo/bnn_mel_spec_3_conv_layer_model_binary_weights.keras'
convert_bytes(get_file_size(bnn_initial_model_path), "KB")

File size: 531.234 Kilobytes


In [47]:
from larq.layers import QuantConv2D

In [48]:
bnn_initial_model = tf.keras.models.load_model(bnn_initial_model_path)
# input data should be in numpy array
y_pred_prob = bnn_initial_model.predict(x_data_np, verbose=0)
y_pred = tf.argmax(y_pred_prob, axis=1).numpy()

# Evaluate
evaluate_prediction(y_data_np, y_pred)

Accuracy: 94.97%
Recall: 87.12%
Precision: 97.32%
F1-score: 91.94%


##### BNN larq tflite

I dont know how to read this model!!!

This model gave F1-score of 92.04%

In [50]:
bnn_larq_tflite_model_path = '../spectrogram_models_from_notebooks/bnn/hpo/bnn_mel_spec_lq.tflite'
convert_bytes(get_file_size(bnn_larq_tflite_model_path), "KB")

# y_pred = full_int_model_predict(bnn_larq_tflite_model_path, x_data_np)
# evaluate_prediction(y_data_np, y_pred)

File size: 156.035 Kilobytes


##### BNN tf tflite

In [19]:
bnn_tf_tflite_model_path = '../spectrogram_models_from_notebooks/bnn/hpo/bnn_mel_spec_tf_lite.tflite'
convert_bytes(get_file_size(bnn_tf_tflite_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(bnn_tf_tflite_model_path, x_data_np, y_data_np)

File size: 157.906 Kilobytes
Accuracy: 94.97%
Recall: 87.12%
Precision: 97.32%
F1-score: 91.94%


##### BNN Float16 quant

In [20]:
bnn_tf_tflite_model_path = '../spectrogram_models_from_notebooks/bnn/hpo/bnn_mel_spec_float16q.tflite'
convert_bytes(get_file_size(bnn_tf_tflite_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(bnn_tf_tflite_model_path, x_data_np, y_data_np)

File size: 85.387 Kilobytes
Accuracy: 95.12%
Recall: 87.55%
Precision: 97.33%
F1-score: 92.18%


##### BNN full int quant

In [52]:
bnn_full_int_model_path = '../spectrogram_models_from_notebooks/bnn/hpo/bnn_mel_spec_full_int_q.tflite'
convert_bytes(get_file_size(bnn_full_int_model_path), "KB")

y_pred = full_int_model_predict(bnn_full_int_model_path, x_data_np)
evaluate_prediction(y_data_np, y_pred)

File size: 49.953 Kilobytes
Accuracy: 95.19%
Recall: 88.21%
Precision: 96.88%
F1-score: 92.34%


### ViT

For ViT we need dataset in patches

In [12]:
# Take all audio from testing dataset and create spectrograms from them
# We will use spectrograms for models testing
directory = '../dataset/testing'

x_data = []
y_data = []

patches_creation_times = []
for root, dirs, files in os.walk(directory):
    for file in files:
        full_file_name = os.path.join(root, file)

        if "non_target" in str(full_file_name):
            class_encoded = 0
        elif "target" in str(full_file_name):
            class_encoded = 1

        audio, sr = tf.audio.decode_wav(tf.io.read_file(full_file_name))
        audio = tf.squeeze(audio, axis=-1)
        resampled_audio = tfio.audio.resample(audio, rate_in=48000, rate_out=sample_rate)
        # Prepare log mel spectrogram from audio
        start_time = time.time()
        spectrogram_feature = create_spectrogram_patches(resampled_audio, desired_length=48000, sample_rate = 16000)
        end_time = time.time() - start_time
        patches_creation_times.append(end_time)
        x_data.append(spectrogram_feature)
        y_data.append(class_encoded)

# input data should be in numpy array, not in list
x_data_patches_np = np.array(x_data)
y_data_patches_np = np.array(y_data)

In [14]:
np.mean(patches_creation_times)*1000

22.31225707251993

In [22]:
for i in x_data_patches_np:
    print(i.shape)
    break

(23, 10, 64)


In [23]:
len(x_data_patches_np)

1393

##### ViT initial model in keras format

In [24]:
vit_initial_model_path = '../spectrogram_models_from_notebooks/vit/hpo/vit_mel_spec_final.keras'
convert_bytes(get_file_size(vit_initial_model_path), "KB")

File size: 493.881 Kilobytes


In [25]:
from vit_for_tflite_compartible import PatchEncoder

In [26]:
vit_initial_model = tf.keras.models.load_model(vit_initial_model_path, compile=True, custom_objects={ "PatchEncoder": PatchEncoder})
# input data should be in numpy array
y_pred_prob = vit_initial_model.predict(x_data_patches_np, verbose=0)
y_pred = tf.argmax(y_pred_prob, axis=1).numpy()

# Evaluate
evaluate_prediction(y_data_patches_np, y_pred)

Accuracy: 97.85%
Recall: 96.51%
Precision: 96.93%
F1-score: 96.72%


##### ViT tflite

In [74]:
vit_tflite_model_path = '../spectrogram_models_from_notebooks/vit/quantized/vit_mel_spec.tflite'
convert_bytes(get_file_size(vit_tflite_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(vit_tflite_model_path, x_data_np, y_data_np)

File size: 154.797 Kilobytes
Accuracy: 97.85%
Recall: 96.51%
Precision: 96.93%
F1-score: 96.72%


##### ViT Dynamic range quantization

In [75]:
vit_drq_model_path = '../spectrogram_models_from_notebooks/vit/quantized/vit_mel_spec_drq.tflite'
convert_bytes(get_file_size(vit_drq_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(vit_drq_model_path, x_data_np, y_data_np)

File size: 68.609 Kilobytes
Accuracy: 97.85%
Recall: 96.51%
Precision: 96.93%
F1-score: 96.72%


##### ViT Float16

In [76]:
vit_float16_model_path = '../spectrogram_models_from_notebooks/vit/quantized/vit_mel_spec_float16q.tflite'
convert_bytes(get_file_size(vit_float16_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(vit_float16_model_path, x_data_np, y_data_np)

File size: 93.582 Kilobytes
Accuracy: 97.85%
Recall: 96.51%
Precision: 96.93%
F1-score: 96.72%


##### ViT Float Fallback

In [79]:
vit_float_fallback_model_path = '../spectrogram_models_from_notebooks/vit/quantized/vit_mel_spec_fallback_q.tflite'
convert_bytes(get_file_size(vit_float_fallback_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(vit_float_fallback_model_path, x_data_np, y_data_np)

File size: 60.828 Kilobytes
Accuracy: 96.41%
Recall: 92.58%
Precision: 96.36%
F1-score: 94.43%


##### ViT Full Int

In [90]:
vit_full_int_model_path = '../spectrogram_models_from_notebooks/vit/quantized/vit_mel_spec_full_int_q.tflite'
convert_bytes(get_file_size(vit_full_int_model_path), "KB")

y_pred = full_int_model_predict(vit_full_int_model_path, x_data_patches_np)
evaluate_prediction(y_data_patches_np, y_pred)

File size: 60.836 Kilobytes
Accuracy: 96.63%
Recall: 92.36%
Precision: 97.24%
F1-score: 94.74%
