Due to difficulties converting a Pytorch model to a Tensorflow model, this notebook uses the same CNN model as before, but written in Tensorflow. This allows it to be portable to an Android device.

In [1]:
# imports 
import numpy as np
import os
import os.path as path
import librosa

import build.pybind_modules.dsp_module as cu
import build.pybind_modules.matrix_module as myMatrix

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

import tensorflow as tf
from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import optimize_for_inference_lib
from tensorflow.keras.models import Model
from tensorflow.math import confusion_matrix
# tf.compat.v1.disable_eager_execution()

from tflite_runtime.interpreter import Interpreter

print('TensorFlow version:',tf.__version__)
physical_devices = tf.config.list_physical_devices()
for dev in physical_devices:
    print(dev)

2023-04-23 19:44:51.275892: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-23 19:44:51.830271: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/jorgejc2/.local/lib/python3.8/site-packages/nvidia/cublas/lib/:/usr/local/cuda-11.0/lib64
2023-04-23 19:44:51.830323: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/jorgejc2/.local/lib/python3.

TensorFlow version: 2.11.0
PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')


2023-04-23 19:44:52.466718: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-04-23 19:44:52.471902: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/jorgejc2/.local/lib/python3.8/site-packages/nvidia/cublas/lib/:/usr/local/cuda-11.0/lib64
2023-04-23 19:44:52.472410: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1934] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [2]:
# Parameters 
MODEL_NAME = 'audio_mnist'
EPOCHS = 20
BATCH_SIZE = 64

FS = 48000
DOWNSAMPLED_FS = 8000
NFFT = 256
NOVERLAP = -1
NFILT = 40
NUM_CEPS = 13
NN_DATA_COLS = 28
NN_DATA_ROWS = 12

In [3]:
def relu6(x):
    return tf.keras.activations.relu(x, max_value=6)

def compute_accuracies(predicted_labels, dev_set, dev_labels):
    yhats = predicted_labels
    assert predicted_labels.dtype == int, "Your predicted labels have type {}, but they should have type np.int (consider using .astype(int) on your output)".format(predicted_labels.dtype)

    if len(yhats) != len(dev_labels):
        print("Lengths of predicted labels don't match length of actual labels", len(yhats), len(dev_labels))
        return 0., 0., 0., 0.
    accuracy = np.mean(yhats == dev_labels)
    conf_m = np.zeros((len(np.unique(dev_labels)),len(np.unique(dev_labels))))
    for i,j in zip(dev_labels,predicted_labels):
        conf_m[i,j] +=1

    return accuracy, conf_m

def export_model(saver, model, input_node_names, output_node_name):
    tf.train.write_graph(K.get_session().graph_def, 'out', \
        MODEL_NAME + '_graph.pbtxt')

    saver.save(K.get_session(), 'out/' + MODEL_NAME + '.chkp')

    freeze_graph.freeze_graph('out/' + MODEL_NAME + '_graph.pbtxt', None, \
        False, 'out/' + MODEL_NAME + '.chkp', output_node_name, \
        "save/restore_all", "save/Const:0", \
        'out/frozen_' + MODEL_NAME + '.pb', True, "")

    input_graph_def = tf.GraphDef()
    with tf.gfile.Open('out/frozen_' + MODEL_NAME + '.pb', "rb") as f:
        input_graph_def.ParseFromString(f.read())

    output_graph_def = optimize_for_inference_lib.optimize_for_inference(
            input_graph_def, input_node_names, [output_node_name],
            tf.float32.as_datatype_enum)

    with tf.gfile.FastGFile('out/opt_' + MODEL_NAME + '.pb', "wb") as f:
        f.write(output_graph_def.SerializeToString())

    print("graph saved!")


class NeuralNet(tf.keras.Model):
    def __init__(self, out_size):
        super(NeuralNet, self).__init__()

        self.conv1 = tf.keras.layers.Conv2D(filters=10, kernel_size=(3,3), activation=relu6, padding='same', kernel_initializer='he_uniform')
        self.maxpool = tf.keras.layers.MaxPooling2D(pool_size=(3,3), padding='same')
        self.conv2 = tf.keras.layers.Conv2D(filters=20, kernel_size=(3,3), activation=relu6, padding='same', kernel_initializer='he_uniform')
        self.dropout_1 = tf.keras.layers.Dropout(rate=0.1)
        self.dropout_2 = tf.keras.layers.Dropout(rate=0.16)
        self.dropout_3 = tf.keras.layers.Dropout(rate=0.12)

        self.flatten = tf.keras.layers.Flatten()
        self.dense_1 = tf.keras.layers.Dense(units=5000, activation='relu', kernel_initializer='he_uniform')
        self.dense_2 = tf.keras.layers.Dense(units=1000, activation='relu', kernel_initializer='he_uniform')
        self.dense_3 = tf.keras.layers.Dense(units=out_size, activation='softmax', kernel_initializer='he_uniform')

        # self.loss_fn = loss_fn
        # self.optimizer = tf.keras.optimizers.SGD(learning_rate=lrate, momentum=0.9, weight_decay=weight_decay)

    def call(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.maxpool(x)
        x = self.dropout_1(x, training=True)
        x = self.flatten(x)
        x = self.dense_1(x)
        x = self.dropout_2(x, training=True)
        x = self.dense_2(x)
        x = self.dropout_3(x, training=True)
        x = self.dense_3(x)
        return x

In [4]:
# Load the data (should be trained and uploaded using the other spoken_digit_recognition notebook)

dev_set_labels = np.loadtxt("l_dev_set_labels.csv", delimiter=",", dtype=np.int32)
train_labels = np.loadtxt("train_labels.csv", delimiter=",", dtype=np.int32)
train_set = np.loadtxt("train_set.csv", delimiter=",", dtype=np.float64)
dev_set = np.loadtxt("dev_set.csv", delimiter=",", dtype=np.float64)

# normalize the data between 0 and 1 in float64
max_val = None
min_val = None
for arr in train_set:
    for sample in arr:
        if max_val == None or sample > max_val:
            max_val = sample
        if min_val == None or sample < min_val:
            min_val = sample


train_set -= min_val
dev_set -= min_val
max_val -= min_val

train_set = train_set / max_val
dev_set = dev_set / max_val

print("Len dev_set_labels: {}".format(len(dev_set_labels)))
print("Len dev_set: {}".format(len(dev_set_labels)))
print("Len train_set_labels: {}".format(len(train_labels)))
print("Len train_set: {}".format(len(train_set)))

print("\nTrainset shape: {}".format(train_set.shape))

Len dev_set_labels: 200
Len dev_set: 200
Len train_set_labels: 1000
Len train_set: 1000

Trainset shape: (1000, 336)


In [5]:
# reshaping data to desired shape
reshaped_dev_set = np.zeros((len(dev_set), NN_DATA_ROWS, NN_DATA_COLS))
reshaped_train_set = np.zeros((len(train_set), NN_DATA_ROWS, NN_DATA_COLS))
for i in range(len(dev_set)):
    reshaped_dev_set[i] = np.reshape(dev_set[i], (NN_DATA_ROWS, NN_DATA_COLS))

for i in range(len(train_set)):
    reshaped_train_set[i] = np.reshape(train_set[i], (NN_DATA_ROWS, NN_DATA_COLS))

In [6]:
# running dataset on model 
batch_size = 64
epochs = 20
input_dim = 12 # num ceps - 1
output_dim = 10 # number of genres
weight_decay = 1e-2
learning_rate = 1e-2

input_layer = Input(shape=(NN_DATA_ROWS, NN_DATA_COLS, 1))
x = NeuralNet(output_dim)(input_layer)

model = Model(inputs=input_layer, outputs=x)

# printing a view of the model
print(model.summary(expand_nested=True))

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 12, 28, 1)]       0         
                                                                 
 neural_net (NeuralNet)      (None, 10)                5817930   
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
| conv2d (Conv2D)           multiple                  100       |
|                                                               |
| max_pooling2d (MaxPooling2D  multiple               0         |
| )                                                             |
|                                                               |
| conv2d_1 (Conv2D)         multiple                  1820      |
|                                                               |
| dropout (Dropout)         multiple                  0         |
|                                                            

2023-04-23 19:44:52.723406: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


|                                                               |
| dropout_2 (Dropout)       multiple                  0         |
|                                                               |
| flatten (Flatten)         multiple                  0         |
|                                                               |
| dense (Dense)             multiple                  805000    |
|                                                               |
| dense_1 (Dense)           multiple                  5001000   |
|                                                               |
| dense_2 (Dense)           multiple                  10010     |
¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
Total params: 5,817,930
Trainable params: 5,817,930
Non-trainable params: 0
_________________________________________________________________
None


In [7]:
# training the model now
# model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate, weight_decay=weight_decay), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics="acc")
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate, weight_decay=weight_decay), loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics="acc")
history = model.fit(x=reshaped_train_set, y=train_labels, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(reshaped_dev_set, dev_set_labels))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [8]:

# confusion = confusion_matrix(labels=dev_set_labels, predictions=, num_classes=num_classes)
# print(confusion)

In [9]:
# testing a prediction on the model
dev_set_1_idx = 20
dev_set_2_idx = 180
single_test = tf.convert_to_tensor(np.array([reshaped_dev_set[dev_set_1_idx], reshaped_dev_set[dev_set_2_idx]]))
predictions = model(single_test, training=False).numpy()

print("Top 3 prections by (value, label)")
print(sorted(zip(predictions[0], [i for i in range(10)]), reverse=True)[:3])
print("Actual label was: {}\n".format(dev_set_labels[dev_set_1_idx]))

print("Top 3 prections by (value, label)")
print(sorted(zip(predictions[1], [i for i in range(10)]), reverse=True)[:3])
print("Actual label was: {}\n".format(dev_set_labels[dev_set_2_idx]))

Top 3 prections by (value, label)
[(0.29388922, 7), (0.2321858, 8), (0.18663992, 2)]
Actual label was: 6

Top 3 prections by (value, label)
[(0.8033189, 5), (0.12497926, 7), (0.034471214, 4)]
Actual label was: 5



In [10]:
def displayFIR(filt):
    coef_str = "{" 
    for val in filt: 
        coef_str += str(val) + ", " 
    coef_str = coef_str[:-2] 
    coef_str += "};" 
    print("FIR a Coefficients")
    print(coef_str) 

In [11]:
displayFIR(reshaped_dev_set[dev_set_1_idx][11])

FIR a Coefficients
{0.37957937228225425, 0.3714986117842557, 0.3756342901368514, 0.36749962020527693, 0.39516165384576263, 0.4006051142599922, 0.3827173966199189, 0.3852082101473901, 0.39247934066041756, 0.37916899309645696, 0.31506156766613364, 0.3308236359151576, 0.2981971620403313, 0.2737523694189852, 0.28268743288092174, 0.3152678413998494, 0.32207365229684426, 0.3467714748818549, 0.3602618748521197, 0.3784991428750813, 0.40540582797327557, 0.36648110179403093, 0.3696725785255151, 0.35372701412876756, 0.3747523309532439, 0.41644297670673497, 0.36024640990222756, 0.377333245085688};


In [12]:
model.save('my_model')

loaded_model = tf.keras.models.load_model('my_model')

# Convert the model to the TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(loaded_model)
tflite_model = converter.convert()

# Save the converted model
with open('my_model.tflite', 'wb') as f:
    f.write(tflite_model)



INFO:tensorflow:Assets written to: my_model/assets


INFO:tensorflow:Assets written to: my_model/assets


INFO:tensorflow:Assets written to: /tmp/tmpp_x3rylq/assets


INFO:tensorflow:Assets written to: /tmp/tmpp_x3rylq/assets
2023-04-23 19:45:10.381848: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-04-23 19:45:10.381872: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-04-23 19:45:10.382273: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpp_x3rylq
2023-04-23 19:45:10.383198: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-04-23 19:45:10.383211: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpp_x3rylq
2023-04-23 19:45:10.386094: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:357] MLIR V1 optimization pass is not enabled
2023-04-23 19:45:10.386798: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2023-04-23 19:45:10.417347: I tensorflow/cc/saved_model/loader.cc:213] Running initializatio

In [13]:
model_path = 'my_model.tflite'
interpreter = Interpreter(model_path=model_path)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
single_test = np.reshape(np.array(reshaped_dev_set[dev_set_1_idx], dtype=np.float32), (1, NN_DATA_ROWS, NN_DATA_COLS, 1))
interpreter.set_tensor(input_details[0]['index'], single_test)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])[0]

print("Top 3 prections by (value, label)")
print(sorted(zip(output_data, [i for i in range(10)]), reverse=True)[:3])
print("Actual label was: {}\n".format(dev_set_labels[dev_set_1_idx]))

Top 3 prections by (value, label)
[(0.36168578, 6), (0.23412475, 7), (0.17962642, 5)]
Actual label was: 6



INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
