In [None]:
import os
import tensorflow as tf

from time import time
from preprocessing import MFCC, AudioReader, LABELS

2024-01-03 11:59:24.578512: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-03 11:59:24.607809: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-03 11:59:24.608556: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
PREPROCESSING_ARGS = {
    'sampling_rate': 16000,
    'frame_length_in_s': 0.032,
    'frame_step_in_s': 0.016,
    'num_mel_bins': 14,
    'lower_frequency': 0,
    'upper_frequency': 4000,
    'num_coefficients': 8,
}

TRAINING_ARGS = {
    'batch_size': 16,
    'initial_learning_rate': 0.001,
    'end_learning_rate': 0.00001,
    'epochs': 60
}

In [None]:
train_ds = tf.data.Dataset.list_files('/tmp/yn-train/*', shuffle=True)
test_ds = tf.data.Dataset.list_files('/tmp/yn-test/*')

In [None]:
audio_reader = AudioReader(tf.int16, 16000)
mfcc_processor = MFCC(**PREPROCESSING_ARGS)

def prepare_for_training(feature, label):
    feature = tf.expand_dims(feature, -1)
    label_id = tf.argmax(label == LABELS)

    return feature, label_id

batch_size = TRAINING_ARGS['batch_size']
epochs = TRAINING_ARGS['epochs']

train_ds = (train_ds
                .map(audio_reader.get_audio_and_label)
                .map(mfcc_processor.get_mfccs_and_label)
                .map(prepare_for_training)
                .batch(batch_size)
                .cache()
            )
            

test_ds = (test_ds
                .map(audio_reader.get_audio_and_label)
                .map(mfcc_processor.get_mfccs_and_label)
                .map(prepare_for_training)
                .batch(batch_size)
                .cache()
            )

In [None]:
for example_batch, example_labels in train_ds.take(1):
    print(example_batch.shape),
    print(example_labels)

(16, 61, 8, 1)
tf.Tensor([0 1 1 1 1 1 1 1 1 0 0 1 1 0 0 1], shape=(16,), dtype=int64)
2024-01-03 12:49:20.177710: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=example_batch.shape[1:]),
    tf.keras.layers.Conv2D(kernel_size=[2, 2], strides=[2, 2], use_bias=False, padding='valid', filters=32, kernel_regularizer='l2'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(kernel_size=[2, 2], strides=[2, 2], use_bias=False, padding='same', filters=32, kernel_regularizer='l2'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same', depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(2),
    tf.keras.layers.Softmax(), 
])

In [None]:
model.summary()

Model: "sequential_80"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_160 (Conv2D)         (None, 30, 4, 32)         128       
                                                                 
 batch_normalization_240 (B  (None, 30, 4, 32)         128       
 atchNormalization)                                              
                                                                 
 re_lu_240 (ReLU)            (None, 30, 4, 32)         0         
                                                                 
 conv2d_161 (Conv2D)         (None, 15, 2, 32)         4096      
                                                                 
 batch_normalization_241 (B  (None, 15, 2, 32)         128       
 atchNormalization)                                              
                                                                 
 re_lu_241 (ReLU)            (None, 15, 2, 32)       

In [None]:

# Due to the intrinsic randomicity of the training, the algorithm may get stuck
# in several local maxima, therefore different results are given by different 
# runs. Nevertheless, the architecture we found is able to achieve >= 99% accuracy,
# as proven by the accompanying trained model.
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=False)
initial_learning_rate = TRAINING_ARGS['initial_learning_rate']
end_learning_rate = TRAINING_ARGS['end_learning_rate']
decay_steps = len(train_ds) * epochs
lr_decay = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate,
    decay_steps,
    end_learning_rate,
    power=2
)
optimizer = tf.optimizers.RMSprop(learning_rate=lr_decay)
metrics = [tf.metrics.SparseCategoricalAccuracy()]

model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

history = model.fit(train_ds, epochs=epochs, validation_data=test_ds)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [None]:
test_loss, test_accuracy = model.evaluate(test_ds)
print(f'Test accuracy={100 * test_accuracy:.2f}%')

Test accuracy=99.00%


In [None]:
MODEL_NAME = 'model2'
tflite_models_dir = './saved_models'
tflite_model_name = os.path.join(tflite_models_dir, f'{MODEL_NAME}.tflite')
tflite_model_name

'./saved_models/model2.tflite'

In [None]:
model.save(tflite_model_name)

INFO:tensorflow:Assets written to: ./saved_models/model2.tflite/assets
INFO:tensorflow:Assets written to: ./saved_models/model2.tflite/assets


In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(tflite_model_name)
tflite_model = converter.convert()

2024-01-03 12:55:03.346023: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2024-01-03 12:55:03.346062: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2024-01-03 12:55:03.518707: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: ./saved_models/model2.tflite
2024-01-03 12:55:03.646754: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2024-01-03 12:55:03.646791: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: ./saved_models/model2.tflite
2024-01-03 12:55:03.650022: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:375] MLIR V1 optimization pass is not enabled
2024-01-03 12:55:03.651570: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2024-01-03 12:55:03.958199: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: 

In [None]:
tflite_models_dir = './tflite_models'
if not os.path.exists(tflite_models_dir):
    os.makedirs(tflite_models_dir)

In [None]:
tflite_model_name = os.path.join(tflite_models_dir, f'{MODEL_NAME}.tflite')
tflite_model_name

'./tflite_models/model2.tflite'

In [None]:
with open(tflite_model_name, 'wb') as fp:
    fp.write(tflite_model)

In [None]:
tflite_size = os.path.getsize(tflite_model_name) / 1024.0

print(f'Size: {tflite_size:.3f} KB')

Size: 21.574 KB


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=89ae1351-331c-4819-b269-03e7ee869ec8' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>