# **Exercise 2 - Homework 2**

## Download dataset

In [38]:
# !pip install -U tensorflow-io==0.23.1
import argparse 
import tensorflow_io as tfio
from scipy import signal
from scipy.io import wavfile
import numpy as np
import os
import zlib
import tensorflow as tf
import tensorflow_model_optimization as tfmot

# from subprocess import Popen
# Popen('sudo sh -c "echo performance >" /sys/devices/system/cpu/cpufreq/policy0/scaling_governor"',
#       shell=True).wait()

# args_version = "a"
args_mfcc = True

seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

zip_path = tf.keras.utils.get_file(
    origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
    fname='mini_speech_commands.zip',
    extract=True,
    cache_dir='.', cache_subdir='data')

## Train/validation/test splits

In [39]:
kws_train_split = open("kws_train_split.txt", "r")
train_lines = kws_train_split.read().splitlines()
train_files = tf.convert_to_tensor(train_lines)
num_samples_train = train_files.shape

kws_val_split = open("kws_val_split.txt", "r")
val_lines = kws_val_split.read().splitlines()
val_files = tf.convert_to_tensor(val_lines)
num_samples_val = val_files.shape

kws_test_split = open("kws_test_split.txt", "r")
test_lines = kws_test_split.read().splitlines()
test_files = tf.convert_to_tensor(test_lines)
num_samples_test = test_files.shape

num_samples = num_samples_train[0] + num_samples_val[0] + num_samples_test[0]

## Label mapping

In [40]:
labels = open("labels.txt", "r")
labels = str(labels.read())
characters_to_remove = "[]''""  "
for character in characters_to_remove: 
    labels = labels.replace(character, "")
LABELS = labels.split(",")

## Classes definition

In [41]:
def res(audio, sampling_rate):        
    audio = signal.resample_poly(audio, 1, 16000 // sampling_rate)
    return np.array(audio, dtype = np.float32)
    
def tf_function(audio, sampling_rate):
    audio = tf.numpy_function(res, [audio, sampling_rate], tf.float32)
    return audio

In [42]:
class SignalGenerator:
    def __init__(self, labels, sampling_rate, frame_length, frame_step,
            num_mel_bins=None, lower_frequency=None, upper_frequency=None,
            num_coefficients=None, mfcc=False):
        self.labels = labels
        self.sampling_rate = sampling_rate
        self.frame_length = frame_length
        self.frame_step = frame_step
        self.num_mel_bins = num_mel_bins
        self.lower_frequency = lower_frequency
        self.upper_frequency = upper_frequency
        self.num_coefficients = num_coefficients
        num_spectrogram_bins = (frame_length) // 2 + 1

        if mfcc is True:
            self.linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
                    self.num_mel_bins, num_spectrogram_bins, self.sampling_rate,
                    self.lower_frequency, self.upper_frequency)
            self.preprocess = self.preprocess_with_mfcc
        else:
            self.preprocess = self.preprocess_with_stft
    
    def read(self, file_path):
        parts = tf.strings.split(file_path, os.path.sep)
        label = parts[-2]
        label_id = tf.argmax(label == self.labels)
        audio_binary = tf.io.read_file(file_path)
        # input_rate, audio = wavfile.read(file_path)
        audio, _ = tf.audio.decode_wav(audio_binary)
        # audio = tfio.audio.resample(audio, 16000, self.sampling_rate)
        
        audio = tf_function(audio, sampling_rate)
        
        audio = tf.squeeze(audio, axis=1)
        return audio, label_id
    
    
    def pad(self, audio):
        zero_padding = tf.zeros([self.sampling_rate] - tf.shape(audio), dtype=tf.float32)
        audio = tf.concat([audio, zero_padding], 0)
        audio.set_shape([self.sampling_rate])
        return audio

    
    def get_spectrogram(self, audio):
        stft = tf.signal.stft(audio, frame_length=self.frame_length,
                frame_step=self.frame_step, fft_length=self.frame_length)
        spectrogram = tf.abs(stft)
        return spectrogram

    
    def get_mfccs(self, spectrogram):
        mel_spectrogram = tf.tensordot(spectrogram,
                self.linear_to_mel_weight_matrix, 1)
        log_mel_spectrogram = tf.math.log(mel_spectrogram + 1.e-6)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
        mfccs = mfccs[..., :self.num_coefficients]
        return mfccs

    
    def preprocess_with_stft(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        spectrogram = tf.expand_dims(spectrogram, -1)
        spectrogram = tf.image.resize(spectrogram, [32, 32])
        return spectrogram, label

    
    def preprocess_with_mfcc(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        mfccs = self.get_mfccs(spectrogram)
        mfccs = tf.expand_dims(mfccs, -1)
        return mfccs, label

    
    def make_dataset(self, files, train):
        ds = tf.data.Dataset.from_tensor_slices(files)
        ds = ds.map(self.preprocess, num_parallel_calls=4)
        ds = ds.batch(32)
        ds = ds.cache()
        if train is True:
            ds = ds.shuffle(100, reshuffle_each_iteration=True)
        return ds

In [43]:
# Function for weight and activations quantization 
def representative_dataset_generator():
    for x, _ in train_ds.take(1000):
        yield [x]

## Options for the Signal Generator

In [44]:
sampling_rate = 8000
# frame_length = 16e-3 * sampling_rate
# frame_step = 8e-3 * sampling_rate
# frame_length = int(frame_length)
# frame_step = int(frame_step)

STFT_OPTIONS = {'frame_length': 256, 'frame_step': 128, 'mfcc': False}
MFCC_OPTIONS = {'frame_length': 320, 'frame_step': 160, 'mfcc': True,
        'lower_frequency': 20, 'upper_frequency': 4000, 'num_mel_bins': 16,
        'num_coefficients': 10}
    
if args_mfcc is True:
    options = MFCC_OPTIONS
    print('mfcc')
    strides = [2, 1]
else:
    options = STFT_OPTIONS
    strides = [2, 2]
    print('stft')
    
units = len(LABELS)

mfcc


## Create datasets

In [45]:
generator = SignalGenerator(LABELS, sampling_rate, **options)
train_ds = generator.make_dataset(train_files, True)
val_ds = generator.make_dataset(val_files, False)
test_ds = generator.make_dataset(test_files, False)

## Load and evaluation of TfLite models-

In [33]:
# Load and evaluate quantized models
def load_and_evaluation(path, dataset):
    f = open(path, 'rb')
    decompressed_model = zlib.decompress(f.read())
    interpreter = tf.lite.Interpreter(model_content=decompressed_model)
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # set batch size to 1 when running inference with TFLite models
    dataset = dataset.unbatch().batch(1)
    
    outputs = []
    labels = []
    
    for data in dataset:
        my_input = np.array(data[0], dtype = np.float32)
        label = np.array(data[1], dtype = np.float32)
        labels.append(label)

        interpreter.set_tensor(input_details[0]['index'], my_input)
        interpreter.invoke()
        my_output = interpreter.get_tensor(output_details[0]['index'])
        outputs.append(my_output[0])
        
    outputs = np.array(outputs)
    labels = np.squeeze(np.array(labels))
    
    acc = sum(np.equal(labels, np.argmax(outputs, axis=1)))/len(outputs)
                 
    return acc

# Training without optimization

## Define models

In [27]:
mlp = tf.keras.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units = 256, activation='relu'),
    tf.keras.layers.Dense(units = 256, activation='relu'),
    tf.keras.layers.Dense(units = 256, activation='relu'),
    tf.keras.layers.Dense(units = units)
])

cnn = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides=strides, use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides=[1,1], use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides=[1,1], use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units = units)
])

ds_cnn = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=256, kernel_size=[3,3], strides=strides, use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], use_bias=False),
    tf.keras.layers.Conv2D(filters=256, kernel_size=[1,1], strides=[1,1], use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], use_bias=False),
    tf.keras.layers.Conv2D(filters=256, kernel_size=[1,1], strides=[1,1], use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units = units)
])

## Parameters for training

In [28]:
MODELS = {'b': mlp, 'b': cnn, 'a': ds_cnn}
model = MODELS[args_version]

loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.optimizers.Adam()
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]

## Training

In [14]:
model.compile(loss = loss, optimizer = optimizer, metrics = metrics)
model.fit(train_ds, validation_data=val_ds, epochs=1)

print(model.summary())

test_loss, test_error = model.evaluate(test_ds)
print('Test error: ', test_error)

InvalidArgumentError:  0-th value returned by pyfunc_0 is double, but expects float
	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]] [Op:__inference_train_function_7628]

# Optimization of the model

## Structured pruning

In [11]:
alpha = 0.7

pruned_mlp = tf.keras.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units = int(256*alpha), activation='relu'),
    tf.keras.layers.Dense(units = int(256*alpha), activation='relu'),
    tf.keras.layers.Dense(units = int(256*alpha), activation='relu'),
    tf.keras.layers.Dense(units = units)
])

pruned_cnn = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=int(128*alpha), kernel_size=[3,3], strides=strides, use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=int(128*alpha), kernel_size=[3,3], strides=[1,1], use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=int(128*alpha), kernel_size=[3,3], strides=[1,1], use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units = units)
])

pruned_ds_cnn = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=int(256*alpha), kernel_size=[3,3], strides=strides, use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], use_bias=False),
    tf.keras.layers.Conv2D(filters=int(256*alpha), kernel_size=[1,1], strides=[1,1], use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], use_bias=False),
    tf.keras.layers.Conv2D(filters=int(256*alpha), kernel_size=[1,1], strides=[1,1], use_bias=False),
    tf.keras.layers.BatchNormalization(momentum=0.1),
    tf.keras.layers.ReLU(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units = units)
])

In [12]:
MODELS = {'b': pruned_mlp, 'b': pruned_cnn, 'a': pruned_ds_cnn}
model = MODELS[args_version]

loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.optimizers.Adam()
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]

In [None]:
# if args_mfcc is True:
#     input_shape = [1, 32, 32, 1]
# else:
#     input_shape = [1, 49, 10, 1]

# # model.build(input_shape)

# # Early stopping callback
# es_callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", mode = 'min', min_delta = 0, patience = 6,)# restore_best_weights = True)

# model.compile(loss = loss, optimizer = optimizer, metrics = metrics)
# model.fit(train_ds, validation_data=val_ds, epochs = 15,)#callbacks=es_callback)

# print(model.summary())

# test_loss, test_error = model.evaluate(test_ds)
# print('Test error: ', test_error)

# model = tfmot.sparsity.keras.strip_pruning(model)

## Magnitude based pruning

In [13]:
epochs = 15

pruning_params = {'pruning_schedule':
    tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.20,
        final_sparsity=0.75,
        begin_step=2*len(train_ds),
        end_step=15*len(train_ds)
        )
    }

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
model = prune_low_magnitude(model, **pruning_params)

# Early stopping callback
es_callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", mode = 'min', min_delta = 0, patience = 6,) #restore_best_weights = True)

# Define the pruning callback
callbacks = [tfmot.sparsity.keras.UpdatePruningStep(),]# es_callback]

# Train the model
# if args_mfcc is True:
#     print('mfcc')
#     input_shape = [1, 32, 32, 1]
# else:
#     input_shape = [1, 49, 10, 1]

input_shape = [1, 49, 10, 1] #mfcc
model.build(input_shape) # serve per ds_cnn
model.compile(loss = loss, optimizer = optimizer, metrics = metrics)
model.fit(train_ds, epochs=epochs, validation_data=val_ds, callbacks=callbacks)

print(model.summary())

test_loss, test_error = model.evaluate(test_ds)
print('Test error: ', test_error)

# Strip the model
model = tfmot.sparsity.keras.strip_pruning(model)

  mask = self.add_variable(
  threshold = self.add_variable(
  self.pruning_step = self.add_variable(


Epoch 1/15


2022-01-05 16:50:35.359870: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:380] Filling up shuffle buffer (this may take a while): 66 of 100


  1/200 [..............................] - ETA: 1:11:47 - loss: 2.0542 - sparse_categorical_accuracy: 0.1250

2022-01-05 16:50:40.913972: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:405] Shuffle buffer filled.


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d_  (None, 24, 8, 179)       3224      
 3 (PruneLowMagnitude)                                           
                                                                 
 prune_low_magnitude_batch_n  (None, 24, 8, 179)       717       
 ormalization_3 (PruneLowMag                                     
 nitude)                                                         
                                                                 
 prune_low_magnitude_re_lu_3  (None, 24, 8, 179)       1         
  (PruneLowMagnitude)                                            
                                                                 
 prune_low_magnitude_depth

## Post Training Quantization

### Weight 

In [12]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
  tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
]

# converter.target_spec.supported_types = [tf.float16]
converter.optimizations = [tf.lite.Optimize.DEFAULT]

### Weight and activation

In [None]:
# # integer quantization with float fallback ( Reduce too much the accuracy)
# converter = tf.lite.TFLiteConverter.from_keras_model(model)
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# converter.representative_dataset = representative_dataset_generator

In [14]:
# integer only quantization (ERROR wrong input type)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_generator
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.float32
converter.inference_output_type = tf.float32 

In [None]:
# experimental integer-only quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_generator
converter.inference_input_type = tf.float32
converter.inference_output_type = tf.float32
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
]

### No quantization

In [None]:
# converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Save model as TFLite model

In [46]:
# tflite_model = converter.convert()

# if not os.path.exists('./models/'):
#     os.makedirs('./models/')

model_dir = os.path.join('.', 'models', 'drive_C_notOK.zlib')
# with open(model_dir, 'wb') as fp:
#         tflite_compressed = zlib.compress(tflite_model)
#         fp.write(tflite_compressed)

# Size of the final tflite.zlib model
print('Model size version {}: {:.2f}kB'.format('a', os.path.getsize(model_dir)/1000))

# Evaluation of the tflite.zlib model
acc = load_and_evaluation(model_dir, test_ds)
print('Accuracy of model version {} = {:.3f}'.format(args_version, acc))

Model size version a: 24.72kB
Accuracy of model version a = 0.907
