# Homework 2

In [None]:
import tensorflow as tf

2024-01-02 14:50:35.045290: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-02 14:50:35.047102: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-02 14:50:35.086538: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-02 14:50:35.087250: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Hyperparameters

We included the computation of the MFCC coefficients as a crucial step in the pre-processing phase.

In [None]:
PREPROCESSING_ARGS = {
    'sampling_rate': 16000,
    'frame_length_in_s': 0.05,
    'frame_step_in_s': 0.028,
    'num_mel_bins': 20,
    'lower_frequency': 20,
    'upper_frequency': 5000,
}

MFCC_ARGS = {
    **PREPROCESSING_ARGS,
    'num_coefficients': 10,
}

TRAINING_ARGS = {
    'batch_size': 10,
    'initial_learning_rate': 1.e-1,
    'end_learning_rate': 1.e-4,
    'epochs': 40
}



## Create datasets

In [None]:
train_data = tf.data.Dataset.list_files('yn-train/*').shuffle(buffer_size=1600)
test_data = tf.data.Dataset.list_files('yn-test/*')

print("Train-set size: ", len(train_data))
print("Test-set size: ", len(test_data))


Train-set size:  1600
Test-set size:  200


## Pre-processing

Modify the file preprocessing.py in order to calculate the MFCCs

In [None]:
from preprocessing import LABELS
from preprocessing import AudioReader
from preprocessing import MelSpectrogram
from preprocessing import MFCC


audio_reader = AudioReader(tf.int16, 16000)
mel_spec_processor = MelSpectrogram(**PREPROCESSING_ARGS)
mfccs = MFCC(**MFCC_ARGS)

def prepare_for_training(feature, label):
    feature = tf.expand_dims(feature, -1)
    label_id = tf.argmax(label == LABELS)
    return feature, label_id


batch_size = TRAINING_ARGS['batch_size']
epochs = TRAINING_ARGS['epochs']


train_ds = (train_data
            .map(audio_reader.get_audio_and_label)
            .map(mfccs.get_mfccs_and_label)
            .map(prepare_for_training)
            .batch(batch_size)
            .cache())
test_ds = (test_data
            .map(audio_reader.get_audio_and_label)
            .map(mfccs.get_mfccs_and_label)
            .map(prepare_for_training)
            .batch(batch_size))



In [None]:
for example_batch, example_labels in train_ds.take(1):
  print('Batch Shape:', example_batch.shape)
  print('Data Shape:', example_batch.shape[1:])
  print('Labels:', example_labels)

Batch Shape: (10, 34, 10, 1)
Data Shape: (34, 10, 1)
Labels: tf.Tensor([1 1 1 1 1 0 0 0 1 0], shape=(10,), dtype=int64)
2024-01-02 14:56:05.221388: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


## Create model

We used a basic sequential model with 2 Convolutional layers and 2 dropout layers to have a better generalization on new data.

In [None]:
import os

alpha = 0.25


ref_model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=[34, 10, 1]),
    tf.keras.layers.Conv2D(filters=(128*alpha), kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='valid'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv2D(filters=(128*alpha*0.8), kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='valid'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units=2),
    tf.keras.layers.Softmax()
])

ref_model.build()

In [None]:
ref_model.summary()

## Train

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

batch_size = TRAINING_ARGS['batch_size']
epochs = TRAINING_ARGS['epochs']

loss = tf.losses.SparseCategoricalCrossentropy(from_logits=False)
initial_learning_rate = TRAINING_ARGS['initial_learning_rate']
end_learning_rate = TRAINING_ARGS['end_learning_rate']
linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=initial_learning_rate,
    end_learning_rate=end_learning_rate,
    decay_steps=len(train_ds) * epochs,
)
optimizer = tf.optimizers.Adam(learning_rate=linear_decay)
metrics = [tf.metrics.sparse_categorical_accuracy]  
ref_model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

history = ref_model.fit(train_ds, epochs=epochs, callbacks=early_stopping, shuffle=True)


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40


## Evaluation on Test-set

In [None]:
test_loss, test_accuracy = ref_model.evaluate(test_ds)

training_loss = history.history['loss'][-1]
training_accuracy = history.history['sparse_categorical_accuracy'][-1]

print(f'Training Loss: {training_loss:.4f}')
print(f'Training Accuracy: {training_accuracy*100.:.2f}%')
print()
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy*100.:.2f}%')

Training Loss: 0.0683
Training Accuracy: 97.19%

Test Loss: 0.0599
Test Accuracy: 99.00%


## Save model

In [None]:
import os

saved_model_dir = f'./saved_models/model4' 
if not os.path.exists(saved_model_dir):
    os.makedirs(saved_model_dir)
ref_model.save(saved_model_dir)


INFO:tensorflow:Assets written to: ./saved_models/cavalloPazzoRabiot/assets
INFO:tensorflow:Assets written to: ./saved_models/cavalloPazzoRabiot/assets


## convert model in tflite and quantize it 

We applied the default optimizations provided by tf.lite 

In [None]:
import tensorflow as tf
from tensorflow_model_optimization.quantization.keras import quantize_model

converter_opt = tf.lite.TFLiteConverter.from_saved_model(f'./saved_models/model4')
converter_opt.optimizations = [tf.lite.Optimize.DEFAULT ]
tflite_model_opt = converter_opt.convert()
tflite_models_dir = './tflite_models'
if not os.path.exists(tflite_models_dir):
    os.makedirs(tflite_models_dir)
tflite_model_name = os.path.join(tflite_models_dir, 'model4.tflite')

with open(tflite_model_name, 'wb') as fp:
    fp.write(tflite_model_opt)


2024-01-02 15:09:51.057684: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2024-01-02 15:09:51.057718: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2024-01-02 15:09:51.240448: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: ./saved_models/cavalloPazzoRabiot
2024-01-02 15:09:51.404083: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2024-01-02 15:09:51.404116: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: ./saved_models/cavalloPazzoRabiot
2024-01-02 15:09:51.406280: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:375] MLIR V1 optimization pass is not enabled
2024-01-02 15:09:51.407026: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2024-01-02 15:09:51.768389: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle

## Reference latency

Define ref_model and convert it in TFlite (if it does not exist)

In [None]:
import tensorflow as tf
import os


REF_PREPROCESSING_ARGS = {
    'sampling_rate': 16000,
    'frame_length_in_s': 0.04,
    'frame_step_in_s': 0.02,
    'num_mel_bins': 40,
    'lower_frequency': 20,
    'upper_frequency': 4000,
}

tflite_models_dir = './tflite_models'
if not os.path.exists(tflite_models_dir):
    os.makedirs(tflite_models_dir)
tflite_model_name = os.path.join(tflite_models_dir, 'ref_model.tflite')

if not os.path.exists(tflite_model_name):
    ref_model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=[49, 40, 1]),
        tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[2, 2], use_bias=False, padding='valid'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(units=2),
        tf.keras.layers.Softmax()
    ])

    ref_model.build()

    saved_model_dir = f'./saved_models/ref_model'
    if not os.path.exists(saved_model_dir):
        os.makedirs(saved_model_dir)
    ref_model.save(saved_model_dir)

    converter = tf.lite.TFLiteConverter.from_saved_model(f'./saved_models/ref_model')
    tflite_model = converter.convert()

    with open(tflite_model_name, 'wb') as fp:
        fp.write(tflite_model)

Evaluate latency on ref_mode.tflite

In [None]:
import numpy as np
from time import time
from preprocessing import MelSpectrogram

mel_spec_processor = MelSpectrogram(**REF_PREPROCESSING_ARGS)
interpreter = tf.lite.Interpreter(model_path='tflite_models/ref_model.tflite')
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

audio = tf.random.normal((16000,))

ref_latencies = []

for i in range(100):
    start_preprocess = time()

    log_mel_spectrogram = mel_spec_processor.get_mel_spec(audio)
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, 0)
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, -1)
    interpreter.set_tensor(input_details[0]['index'], log_mel_spectrogram)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])

    end_inference = time()

    ref_latencies.append(end_inference - start_preprocess)

median_ref_latency = np.median(ref_latencies)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Evaluate latency of our optimized model 

In [None]:
import numpy as np
from time import time
from preprocessing import MFCC

mfcc = MFCC(**MFCC_ARGS)
interpreter_opt = tf.lite.Interpreter(model_path='tflite_models/model4.tflite') # insert our model name 
interpreter_opt.allocate_tensors()

input_details = interpreter_opt.get_input_details()
output_details = interpreter_opt.get_output_details()

audio = tf.random.normal((16000,))

optimized_latencies = []

for i in range(100):

    start_preprocess = time()

    mfccs = mfcc.get_mfccs(audio)
    mfccs = tf.expand_dims(mfccs, 0)
    mfccs = tf.expand_dims(mfccs, -1)
    interpreter_opt.set_tensor(input_details[0]['index'], mfccs)
    interpreter_opt.invoke()
    output = interpreter_opt.get_tensor(output_details[0]['index'])

    end_inference = time()

    optimized_latencies.append(end_inference - start_preprocess)

median_opt_latency = np.median(optimized_latencies)


Compute total latency saving 

In [None]:
latency_saving = 100 * (median_ref_latency-median_opt_latency) / median_ref_latency

print("Total latency saving: ", latency_saving)

Total latency saving:  38.33637990784112


## Save compressed model 

In [None]:
import zipfile
import os

tflite_models_dir = './tflite_models'

tflite_model_name = os.path.join(tflite_models_dir, f'model4.tflite')
tflite_model_name

with zipfile.ZipFile(f'{tflite_model_name}.zip', 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(tflite_model_name)

tflite_size = os.path.getsize(tflite_model_name) / 1024.0
zipped_size = os.path.getsize(f'{tflite_model_name}.zip') / 1024.0    

print("Original size ", tflite_size)
print()
print("New size: ", zipped_size)

Original size  11.515625

New size:  9.9169921875


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=42dec124-2522-4d70-b81d-1e692b6f25c0' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>