In [12]:
import tensorflow as tf
yes_no_train_ds = tf.data.Dataset.list_files('/tmp/yn-train/*')
yes_no_test_ds = tf.data.Dataset.list_files('/tmp/yn-test/*')

# Pre-processing & Training Hyper-parameters

In [13]:
PREPROCESSING_ARGS = {
    'sampling_rate': 16000,
    'frame_length_in_s': 0.016,
    'frame_step_in_s': 0.012,
    'num_mel_bins': 10,
    'lower_frequency': 20,
    'upper_frequency': 4000,
    'num_coefficients': 30,
}

TRAINING_ARGS = {
    'batch_size': 30,
    'initial_learning_rate': 0.005,
    'end_learning_rate': 1.e-5,
    'epochs': 20
}
final_sparsity = 0.70

In [14]:
from preprocessing import LABELS
from preprocessing import AudioReader
from preprocessing import MFCC


audio_reader = AudioReader(tf.int16, 16000)
mfcc_processor = MFCC(**PREPROCESSING_ARGS)

def prepare_for_training(feature, label):
    feature = tf.expand_dims(feature, -1)
    label_id = tf.argmax(label == LABELS)

    return feature, label_id


batch_size = TRAINING_ARGS['batch_size']
epochs = TRAINING_ARGS['epochs']

train_ds = (yes_no_train_ds
            .map(audio_reader.get_audio_and_label)
            .map(mfcc_processor.get_mfccs_and_label)
            .map(prepare_for_training)
            .batch(batch_size)
            .cache())

test_ds = (yes_no_test_ds
            .map(audio_reader.get_audio_and_label)
            .map(mfcc_processor.get_mfccs_and_label)
            .map(prepare_for_training)
            .batch(batch_size))

In [15]:
for example_batch, example_labels in train_ds.take(1):
  print('Batch Shape:', example_batch.shape)
  print('Data Shape:', example_batch.shape[1:])
  print('Labels:', example_labels)

Batch Shape: (30, 83, 10, 1)
Data Shape: (83, 10, 1)
Labels: tf.Tensor([0 1 1 0 0 0 1 1 0 0 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 1 1 0 1], shape=(30,), dtype=int64)
2024-01-04 03:59:45.297888: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [16]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=example_batch.shape[1:]),
    tf.keras.layers.Conv2D(filters=32, kernel_size=[3, 3], strides=[2, 2], use_bias=False, padding='valid'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=16, kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=16, kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=16, kernel_size=[3, 3], strides=[1, 1], use_bias=False, padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units=len(LABELS)),
    tf.keras.layers.Softmax()
])

# SETUP MAGNITUDE BASED WEIGHT PRUNING

In [17]:
import tensorflow_model_optimization as tfmot

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

begin_step = int(len(train_ds) * epochs * 0.2)
end_step = int(len(train_ds) * epochs)

pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.20,
        final_sparsity=final_sparsity,
        begin_step=begin_step,
        end_step=end_step
    )
}

model_for_pruning = prune_low_magnitude(model, **pruning_params)

In [18]:
model_for_pruning.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d  (None, 41, 4, 32)         578       
 _4 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_batch_  (None, 41, 4, 32)         129       
 normalization_4 (PruneLowM                                      
 agnitude)                                                       
                                                                 
 prune_low_magnitude_re_lu_  (None, 41, 4, 32)         1         
 4 (PruneLowMagnitude)                                           
                                                                 
 prune_low_magnitude_conv2d  (None, 41, 4, 16)         9218      
 _5 (PruneLowMagnitude)                                          
                                                      

In [19]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=False)
initial_learning_rate = TRAINING_ARGS['initial_learning_rate']
end_learning_rate = TRAINING_ARGS['end_learning_rate']

linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=initial_learning_rate,
    end_learning_rate=end_learning_rate,
    decay_steps=len(train_ds) * epochs,
)
optimizer = tf.optimizers.Adam(learning_rate=linear_decay)
metrics = [tf.metrics.SparseCategoricalAccuracy()]
callbacks = [tfmot.sparsity.keras.UpdatePruningStep()]

model_for_pruning.compile(loss=loss, optimizer=optimizer, metrics=metrics)

history = model_for_pruning.fit(train_ds, epochs=epochs,callbacks=callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [20]:
test_loss, test_accuracy = model_for_pruning.evaluate(test_ds)



In [21]:
training_loss = history.history['loss'][-1]
training_accuracy = history.history['sparse_categorical_accuracy'][-1]

print(f'Training Loss: {training_loss:.4f}')
print(f'Training Accuracy: {training_accuracy*100.:.2f}%')
print()
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy*100.:.2f}%')

Training Loss: 0.0246
Training Accuracy: 99.69%

Test Loss: 0.0390
Test Accuracy: 99.00%


In [22]:
import numpy as np


for layer in model_for_pruning.layers:
    if isinstance(layer, tf.keras.layers.Wrapper):
        weights = layer.trainable_weights
    else:
        weights = layer.weights
    for weight in weights:        
        weight_size = weight.numpy().size
        zero_num = np.count_nonzero(weight == 0)
        print(
            f'{weight.name}: {zero_num/weight_size:.2%} sparsity ',
            f'({zero_num}/{weight_size})',
        )

conv2d_4/kernel:0: 70.14% sparsity  (202/288)
batch_normalization_4/gamma:0: 0.00% sparsity  (0/32)
batch_normalization_4/beta:0: 0.00% sparsity  (0/32)
conv2d_5/kernel:0: 69.99% sparsity  (3225/4608)
batch_normalization_5/gamma:0: 0.00% sparsity  (0/16)
batch_normalization_5/beta:0: 0.00% sparsity  (0/16)
conv2d_6/kernel:0: 69.97% sparsity  (1612/2304)
batch_normalization_6/gamma:0: 0.00% sparsity  (0/16)
batch_normalization_6/beta:0: 0.00% sparsity  (0/16)
conv2d_7/kernel:0: 69.97% sparsity  (1612/2304)
batch_normalization_7/gamma:0: 0.00% sparsity  (0/16)
batch_normalization_7/beta:0: 0.00% sparsity  (0/16)
dense_1/kernel:0: 68.75% sparsity  (22/32)
dense_1/bias:0: 0.00% sparsity  (0/2)


# Save The Model

In [23]:
import os
from time import time

timestamp = int(time())

saved_model_dir = f'./saved_models/{timestamp}'
if not os.path.exists(saved_model_dir):
    os.makedirs(saved_model_dir)
model.save(saved_model_dir)

INFO:tensorflow:Assets written to: ./saved_models/1704340898/assets
INFO:tensorflow:Assets written to: ./saved_models/1704340898/assets


Save Hyperparameter

In [24]:
import pandas as pd

output_dict = {
    'timestamp': timestamp,
    **PREPROCESSING_ARGS,
    **TRAINING_ARGS,
    'test_accuracy': test_accuracy,
    'final_sparsity': final_sparsity
}

df = pd.DataFrame([output_dict])

output_path='./mel_spectrogram_results.csv'
df.to_csv(output_path, mode='a', header=not os.path.exists(output_path), index=False)

# TFLite Conversion

In [25]:
!ls saved_models

1703604642  1704235763	1704237560  1704238806	1704339337  1704340898
1703608656  1704236809	1704238096  1704239753	1704340294


In [26]:
MODEL_NAME = 1704340898

In [28]:
converter = tf.lite.TFLiteConverter.from_saved_model(f'./saved_models/{MODEL_NAME}')
tflite_model = converter.convert()

2024-01-04 04:02:36.313420: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2024-01-04 04:02:36.313483: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2024-01-04 04:02:36.579773: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: ./saved_models/1704340898
2024-01-04 04:02:36.846891: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2024-01-04 04:02:36.846944: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: ./saved_models/1704340898
2024-01-04 04:02:36.855268: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2024-01-04 04:02:37.512521: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: ./saved_models/1704340898
2024-01-04 04:02:37.535019: I tensorflow/cc/saved_model/loader.cc:314] SavedModel load for tags { serve }; Status

In [29]:
tflite_models_dir = './tflite_models'
if not os.path.exists(tflite_models_dir):
    os.makedirs(tflite_models_dir)

In [30]:
tflite_model_name = os.path.join(tflite_models_dir, f'{MODEL_NAME}.tflite')
tflite_model_name

'./tflite_models/1704340898.tflite'

In [31]:
with open(tflite_model_name, 'wb') as fp:
    fp.write(tflite_model)

In [32]:
!ls tflite_models

1703604642.tflite      1704237560.tflite.zip  1704340294.tflite
1703604642.tflite.zip  1704238096.tflite      1704340294.tflite.zip
1703608656.tflite      1704238096.tflite.zip  1704340898.tflite
1703608656.tflite.zip  1704239753.tflite      MODEL_NAME.tflite.zip
1704236809.tflite      1704239753.tflite.zip  ref_model.tflite
1704236809.tflite.zip  1704339337.tflite
1704237560.tflite      1704339337.tflite.zip


# Zip File

In [33]:
import zipfile

with zipfile.ZipFile(f'{tflite_model_name}.zip', 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(tflite_model_name)

In [34]:
import zipfile

not_pruned_tflite = os.path.join(tflite_models_dir, '1704340898.tflite')

with zipfile.ZipFile(f'{not_pruned_tflite}.zip', 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(not_pruned_tflite) 

# Zipped tflite size

In [35]:
not_pruned_tflite_size = os.path.getsize(not_pruned_tflite) / 1024.0
tflite_size = os.path.getsize(tflite_model_name) / 1024.0
not_pruned_zipped_size = os.path.getsize(f'{not_pruned_tflite}.zip') / 1024.0
zipped_size = os.path.getsize(f'{tflite_model_name}.zip') / 1024.0

print(f'Original tflite size (not pruned model): {tflite_size:.3f} KB')
print(f'Original tflite size (pruned model): {tflite_size:.3f} KB')
print(f'Zipped tflite size (not pruned model): {not_pruned_zipped_size:.3f} KB')
print(f'Zipped tflite size (pruned model): {zipped_size:.3f} KB')


Original tflite size (not pruned model): 41.148 KB
Original tflite size (pruned model): 41.148 KB
Zipped tflite size (not pruned model): 16.509 KB
Zipped tflite size (pruned model): 16.509 KB


# Latency

In [36]:
import tensorflow as tf
import numpy as np
from time import time
from preprocessing import MelSpectrogram

# Configuration for Mel Spectrogram
SPEC_CONFIG = {
    'sampling_rate': 16000,
    'frame_length_in_s': 0.04,
    'frame_step_in_s': 0.02,
    'num_mel_bins': 40,
    'lower_frequency': 20,
    'upper_frequency': 4000,
}

# Initialize Mel Spectrogram Processor
mel_spectrogram_generator = MelSpectrogram(**SPEC_CONFIG)

# Load TensorFlow Lite model
model_interpreter = tf.lite.Interpreter(model_path='tflite_models/ref_model.tflite')
model_interpreter.allocate_tensors()

# Get input and output details
model_input_details = model_interpreter.get_input_details()
model_output_details = model_interpreter.get_output_details()

# Generate random audio sample
random_audio_sample = tf.random.normal((16000,))

# List to store latency times
latency_times = []

# Measure latency over 100 iterations
for _ in range(100):
    start_time = time()

    # Process audio to log Mel Spectrogram
    log_mel_spectrogram = mel_spectrogram_generator.get_mel_spec(random_audio_sample)
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, 0)
    log_mel_spectrogram = tf.expand_dims(log_mel_spectrogram, -1)

    # Run inference
    model_interpreter.set_tensor(model_input_details[0]['index'], log_mel_spectrogram)
    model_interpreter.invoke()
    inference_output = model_interpreter.get_tensor(model_output_details[0]['index'])

    end_time = time()

    # Calculate and store latency
    latency_times.append(end_time - start_time)

# Calculate median latency
median_latency = np.median(latency_times)
median_latency_ms = 1000 * median_latency
print(median_latency_ms)


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
27.88376808166504


In [37]:
import tensorflow as tf
import numpy as np
from time import time
from preprocessing import MFCC

mfcc_creator = MFCC(**PREPROCESSING_ARGS)

# Load the TensorFlow Lite model
tflite_model_path = '/work/tflite_models/1704340898.tflite'
model_interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
model_interpreter.allocate_tensors()

# Get the input and output details from the model
input_info = model_interpreter.get_input_details()
output_info = model_interpreter.get_output_details()

# Create a random audio sample
audio_sample = tf.random.normal((16000,))

# List to hold latency measurements
latency_measurements = []

# Measure latency for 100 iterations
for _ in range(100):
    preprocessing_start = time()

    # Generate MFCC features from the audio sample
    mfcc_features = mfcc_creator.get_mfccs(audio_sample)
    mfcc_features = tf.expand_dims(mfcc_features, 0)  # Adding batch dimension
    mfcc_features = tf.expand_dims(mfcc_features, -1) # Adjusting for model input

    # Set the tensor for model inference
    model_interpreter.set_tensor(input_info[0]['index'], mfcc_features)
    model_interpreter.invoke()

    # Retrieve the output from the model
    model_output = model_interpreter.get_tensor(output_info[0]['index'])

    inference_end = time()

    # Calculate and append latency
    latency_measurements.append(inference_end - preprocessing_start)

# Compute the median latency
median_latency1 = np.median(latency_measurements)
median_latency2_ms = 1000 * median_latency1
print(median_latency2_ms)


11.198759078979492


# LATENCY_PERCENTAGE_REDUCTION

In [38]:
# Calculate percentage reduction in latency
LATENCY_PERCENTAGE_REDUCTION = 100 * (median_latency_ms - median_latency2_ms) / median_latency_ms
print(LATENCY_PERCENTAGE_REDUCTION)


59.837712585397554


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=38c11a1c-cd40-4740-9cf4-b8aa568707a4' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>