In [1]:
%load_ext autoreload
%autoreload 2

In [16]:
import tensorflow as tf
import os
import numpy as np
import tensorflow_io as tfio

from helper_functions import (
    convert_prefetchdataset_to_numpy_arrays,
    lite_model_from_file_predicts_dataset,
    get_file_size, 
    convert_bytes,
    evaluate_prediction,
)


from sklearn.metrics import confusion_matrix

In [17]:
desired_length_of_audio = 48000
sample_rate = 16000

In [62]:
# Take all audio from testing dataset and create spectrograms from them
# We will use spectrograms for models testing
directory = 'dataset/testing'

x_data = []
y_data = []
for root, dirs, files in os.walk(directory):
    for file in files:
        full_file_name = os.path.join(root, file)

        if "non_target" in str(full_file_name):
            class_encoded = 0
        elif "target" in str(full_file_name):
            class_encoded = 1

        audio, sr = tf.audio.decode_wav(tf.io.read_file(full_file_name))
        audio = tf.squeeze(audio, axis=-1)
        resampled_audio = tfio.audio.resample(audio, rate_in=48000, rate_out=sample_rate)
        audio_length = tf.shape(resampled_audio)[0]
        if audio_length < desired_length_of_audio:
            resampled_audio = tf.pad(resampled_audio, [[0, desired_length_of_audio - audio_length]], mode='CONSTANT')
        else:
            resampled_audio = resampled_audio[:desired_length_of_audio]
        resampled_audio = tf.expand_dims(resampled_audio, axis=-1).numpy()

        x_data.append(resampled_audio)
        y_data.append(class_encoded)

# input data should be in numpy array, not in list
x_data_np = np.array(x_data)
y_data_np = np.array(y_data)

In [63]:
for i in x_data_np:
    print(i.shape)
    break

(48000, 1)


### CNN

##### CNN initial model in keras format

In [64]:
cnn_initial_model_path = 'time_series_models_from_notebooks/cnn/cnn_time_series_16kHz_baseline.keras'
convert_bytes(get_file_size(cnn_initial_model_path), "KB")

File size: 712.659 Kilobytes


In [65]:
cnn_initial_model = tf.keras.models.load_model(cnn_initial_model_path)
# input data should be in numpy array
y_pred_prob = cnn_initial_model.predict(x_data_np, verbose=0)
y_pred = tf.argmax(y_pred_prob, axis=1).numpy()

# Evaluate
evaluate_prediction(y_data_np, y_pred)

Accuracy: 96.12%
Recall: 92.14%
Precision: 95.91%
F1-score: 93.99%


##### CNN tf Lite model (without any additional quantization techniques)

In [66]:
cnn_tflite_model_path = 'time_series_models_from_notebooks/cnn/cnn_time_series_16kHz.tflite'
convert_bytes(get_file_size(cnn_tflite_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(cnn_tflite_model_path, x_data_np, y_data_np)

File size: 231.473 Kilobytes
Accuracy: 96.12%
Recall: 92.14%
Precision: 95.91%
F1-score: 93.99%


##### CNN tf Lite model + Post Training Dynamic range quantization

In [67]:
cnn_drq_model_path = 'time_series_models_from_notebooks/cnn/cnn_time_series_16kHz_drq.tflite'
convert_bytes(get_file_size(cnn_drq_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(cnn_drq_model_path, x_data_np, y_data_np)

File size: 63.078 Kilobytes
Accuracy: 96.12%
Recall: 92.14%
Precision: 95.91%
F1-score: 93.99%


##### CNN tf Lite model + Float 16 quantization

In [68]:
cnn_float16q_model_path = 'time_series_models_from_notebooks/cnn/cnn_time_series_16kHz_float16q.tflite'
convert_bytes(get_file_size(cnn_float16q_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(cnn_float16q_model_path, x_data_np, y_data_np)

File size: 119.863 Kilobytes
Accuracy: 96.12%
Recall: 92.14%
Precision: 95.91%
F1-score: 93.99%


##### CNN tf Lite model + Full integer quantization

In [69]:
cnn_full_int_q_model_path = 'time_series_models_from_notebooks/cnn/cnn_time_series_16kHz_full_int_q.tflite'
convert_bytes(get_file_size(cnn_full_int_q_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(cnn_full_int_q_model_path, x_data_np, y_data_np, input_data_uint8_type=True)

File size: 63.898 Kilobytes
Accuracy: 67.12%
Recall: 0.00%
Precision: 0.00%
F1-score: 0.00%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### SqueezeNet

##### SqueezeNet initial model in keras format

In [71]:
squeezenet_initial_model_path = 'time_series_models_from_notebooks/squeezenet/squeezenet30%_time_series_16kHz_baseline.keras'
convert_bytes(get_file_size(squeezenet_initial_model_path), "KB")

File size: 531.003 Kilobytes


In [72]:
squeezenet_initial_model = tf.keras.models.load_model(squeezenet_initial_model_path)
# input data should be in numpy array
y_pred_prob = squeezenet_initial_model.predict(x_data_np, verbose=0)
y_pred = tf.argmax(y_pred_prob, axis=1).numpy()

# Evaluate
evaluate_prediction(y_data_np, y_pred)

Accuracy: 86.22%
Recall: 96.07%
Precision: 71.66%
F1-score: 82.09%


##### SqueezeNet tf Lite model (without any additional quantization techniques)

In [73]:
squeezenet_tflite_model_path = 'time_series_models_from_notebooks/squeezenet/squeezenet30%_time_series_16kHz.tflite'
convert_bytes(get_file_size(squeezenet_tflite_model_path), "KB")
y_pred = lite_model_from_file_predicts_dataset(squeezenet_tflite_model_path, x_data_np, y_data_np)

File size: 150.871 Kilobytes
Accuracy: 86.22%
Recall: 96.07%
Precision: 71.66%
F1-score: 82.09%


##### SqueezeNet tf Lite model + Post Training Dynamic range quantization

In [74]:
squeezenet_tflite_drq_model_path = 'time_series_models_from_notebooks/squeezenet/squeezenet30%_time_series_16kHz_drq.tflite'
convert_bytes(get_file_size(squeezenet_tflite_drq_model_path), "KB")
y_pred = lite_model_from_file_predicts_dataset(squeezenet_tflite_drq_model_path, x_data_np, y_data_np)

File size: 89.656 Kilobytes
Accuracy: 86.22%
Recall: 96.07%
Precision: 71.66%
F1-score: 82.09%


##### SqueezeNet tf Lite model + Float 16 quantization

In [75]:
squeezenet_float16q_model_path = 'time_series_models_from_notebooks/squeezenet/squeezenet30%_time_series_16kHz_float16q.tflite'
convert_bytes(get_file_size(squeezenet_float16q_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(squeezenet_float16q_model_path, x_data_np, y_data_np)

File size: 97.117 Kilobytes
Accuracy: 86.22%
Recall: 96.07%
Precision: 71.66%
F1-score: 82.09%


##### CNN tf Lite model + Full integer quantization

In [76]:
squeezenet_full_int_q_model_path = 'time_series_models_from_notebooks/squeezenet/squeezenet30%_time_series_16kHz_full_int_q.tflite'
convert_bytes(get_file_size(squeezenet_full_int_q_model_path), "KB")

y_pred = lite_model_from_file_predicts_dataset(squeezenet_full_int_q_model_path, x_data_np, y_data_np, input_data_uint8_type=True)

File size: 87.039 Kilobytes
Accuracy: 67.12%
Recall: 0.00%
Precision: 0.00%
F1-score: 0.00%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
