Due to difficulties converting a Pytorch model to a Tensorflow model, this notebook uses the same CNN model as before, but written in Tensorflow. This allows it to be portable to an Android device.

In [16]:
# imports 
import numpy as np
import os
import os.path as path
import librosa

import build.pybind_modules.dsp_module as cu
import build.pybind_modules.matrix_module as myMatrix

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

import tensorflow as tf
from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import optimize_for_inference_lib
from tensorflow.keras.models import Model
from tensorflow.math import confusion_matrix
# tf.compat.v1.disable_eager_execution()

from tflite_runtime.interpreter import Interpreter

print('TensorFlow version:',tf.__version__)
physical_devices = tf.config.list_physical_devices()
for dev in physical_devices:
    print(dev)

TensorFlow version: 2.12.0
PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')


In [17]:
# Parameters 
MODEL_NAME = 'audio_mnist'
EPOCHS = 20
BATCH_SIZE = 64

FS = 48000
DOWNSAMPLED_FS = 8000
NFFT = 256
NOVERLAP = -1
NFILT = 40
NUM_CEPS = 13
NN_DATA_COLS = 48
NN_DATA_ROWS = 12

In [18]:
def relu6(x):
    return tf.keras.activations.relu(x, max_value=6)

def compute_accuracies(predicted_labels, dev_set, dev_labels):
    yhats = predicted_labels
    assert predicted_labels.dtype == int, "Your predicted labels have type {}, but they should have type np.int (consider using .astype(int) on your output)".format(predicted_labels.dtype)

    if len(yhats) != len(dev_labels):
        print("Lengths of predicted labels don't match length of actual labels", len(yhats), len(dev_labels))
        return 0., 0., 0., 0.
    accuracy = np.mean(yhats == dev_labels)
    conf_m = np.zeros((len(np.unique(dev_labels)),len(np.unique(dev_labels))))
    for i,j in zip(dev_labels,predicted_labels):
        conf_m[i,j] +=1

    return accuracy, conf_m

def export_model(saver, model, input_node_names, output_node_name):
    tf.train.write_graph(K.get_session().graph_def, 'out', \
        MODEL_NAME + '_graph.pbtxt')

    saver.save(K.get_session(), 'out/' + MODEL_NAME + '.chkp')

    freeze_graph.freeze_graph('out/' + MODEL_NAME + '_graph.pbtxt', None, \
        False, 'out/' + MODEL_NAME + '.chkp', output_node_name, \
        "save/restore_all", "save/Const:0", \
        'out/frozen_' + MODEL_NAME + '.pb', True, "")

    input_graph_def = tf.GraphDef()
    with tf.gfile.Open('out/frozen_' + MODEL_NAME + '.pb', "rb") as f:
        input_graph_def.ParseFromString(f.read())

    output_graph_def = optimize_for_inference_lib.optimize_for_inference(
            input_graph_def, input_node_names, [output_node_name],
            tf.float32.as_datatype_enum)

    with tf.gfile.FastGFile('out/opt_' + MODEL_NAME + '.pb', "wb") as f:
        f.write(output_graph_def.SerializeToString())

    print("graph saved!")


class NeuralNet(tf.keras.Model):
    def __init__(self, out_size):
        super(NeuralNet, self).__init__()

        self.conv1 = tf.keras.layers.Conv2D(filters=10, kernel_size=(3,3), activation=relu6, padding='same', kernel_initializer='he_uniform')
        self.maxpool = tf.keras.layers.MaxPooling2D(pool_size=(3,3), padding='same')
        self.conv2 = tf.keras.layers.Conv2D(filters=20, kernel_size=(3,3), activation=relu6, padding='same', kernel_initializer='he_uniform')
        self.dropout_1 = tf.keras.layers.Dropout(rate=0.1)
        self.dropout_2 = tf.keras.layers.Dropout(rate=0.16)
        self.dropout_3 = tf.keras.layers.Dropout(rate=0.12)

        self.flatten = tf.keras.layers.Flatten()
        self.dense_1 = tf.keras.layers.Dense(units=5000, activation='relu', kernel_initializer='he_uniform')
        self.dense_2 = tf.keras.layers.Dense(units=1000, activation='relu', kernel_initializer='he_uniform')
        self.dense_3 = tf.keras.layers.Dense(units=out_size, activation='softmax', kernel_initializer='he_uniform')

        # self.loss_fn = loss_fn
        # self.optimizer = tf.keras.optimizers.SGD(learning_rate=lrate, momentum=0.9, weight_decay=weight_decay)

    def call(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.maxpool(x)
        x = self.dropout_1(x, training=True)
        x = self.flatten(x)
        x = self.dense_1(x)
        x = self.dropout_2(x, training=True)
        x = self.dense_2(x)
        x = self.dropout_3(x, training=True)
        x = self.dense_3(x)
        return x

In [19]:
# Load the data (should be trained and uploaded using the other spoken_digit_recognition notebook)

dev_set_labels = np.loadtxt("l_dev_set_labels.csv", delimiter=",", dtype=np.int32)
train_labels = np.loadtxt("train_labels.csv", delimiter=",", dtype=np.int32)
train_set = np.loadtxt("train_set.csv", delimiter=",", dtype=np.float64)
dev_set = np.loadtxt("dev_set.csv", delimiter=",", dtype=np.float64)

print("Len dev_set_labels: {}".format(len(dev_set_labels)))
print("Len dev_set: {}".format(len(dev_set_labels)))
print("Len train_set_labels: {}".format(len(train_labels)))
print("Len train_set: {}".format(len(train_set)))

print("\nTrainset shape: {}".format(train_set.shape))

Len dev_set_labels: 200
Len dev_set: 200
Len train_set_labels: 1000
Len train_set: 1000

Trainset shape: (1000, 576)


In [20]:
# reshaping data to desired shape
reshaped_dev_set = np.zeros((len(dev_set), NN_DATA_ROWS, NN_DATA_COLS))
reshaped_train_set = np.zeros((len(train_set), NN_DATA_ROWS, NN_DATA_COLS))
for i in range(len(dev_set)):
    reshaped_dev_set[i] = np.reshape(dev_set[i], (NN_DATA_ROWS, NN_DATA_COLS))

for i in range(len(train_set)):
    reshaped_train_set[i] = np.reshape(train_set[i], (NN_DATA_ROWS, NN_DATA_COLS))

In [21]:
# running dataset on model 
batch_size = 64
epochs = 20
input_dim = 12 # num ceps - 1
output_dim = 10 # number of genres
weight_decay = 1e-2
learning_rate = 1e-2

input_layer = Input(shape=(NN_DATA_ROWS, NN_DATA_COLS, 1))
x = NeuralNet(output_dim)(input_layer)

model = Model(inputs=input_layer, outputs=x)

# printing a view of the model
print(model.summary(expand_nested=True))

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 12, 48, 1)]       0         
                                                                 
 neural_net (NeuralNet)      (None, 10)                6217930   
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
| conv2d (Conv2D)           multiple                  100       |
|                                                               |
| max_pooling2d (MaxPooling2D  multiple               0         |
| )                                                             |
|                                                               |
| conv2d_1 (Conv2D)         multiple                  1820      |
|                                                               |
| dropout (Dropout)         multiple                  0         |
|                                                            

In [22]:
# training the model now
# model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate, weight_decay=weight_decay), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics="acc")
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate, weight_decay=weight_decay), loss=tf.keras.losses.SparseCategoricalCrossentropy())
history = model.fit(x=reshaped_train_set, y=train_labels, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(reshaped_dev_set, dev_set_labels))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [23]:

# confusion = confusion_matrix(labels=dev_set_labels, predictions=, num_classes=num_classes)
# print(confusion)

In [24]:
# testing a prediction on the model
dev_set_1_idx = 20
dev_set_2_idx = 180
single_test = tf.convert_to_tensor(np.array([reshaped_dev_set[dev_set_1_idx], reshaped_dev_set[dev_set_2_idx]]))
predictions = model(single_test, training=False).numpy()

print("Top 3 prections by (value, label)")
print(sorted(zip(predictions[0], [i for i in range(10)]), reverse=True)[:3])
print("Actual label was: {}\n".format(dev_set_labels[dev_set_1_idx]))

print("Top 3 prections by (value, label)")
print(sorted(zip(predictions[1], [i for i in range(10)]), reverse=True)[:3])
print("Actual label was: {}\n".format(dev_set_labels[dev_set_2_idx]))

Top 3 prections by (value, label)
[(0.98286, 8), (0.012504478, 9), (0.0035975103, 0)]
Actual label was: 8

Top 3 prections by (value, label)
[(0.9820525, 1), (0.012819302, 4), (0.0034329419, 3)]
Actual label was: 1



In [25]:
def displayFIR(filt):
    coef_str = "{" 
    for val in filt: 
        coef_str += str(val) + ", " 
    coef_str = coef_str[:-2] 
    coef_str += "};" 
    print("FIR a Coefficients")
    print(coef_str) 

In [26]:
displayFIR(reshaped_dev_set[dev_set_1_idx][11])

FIR a Coefficients
{4.608795255997088, 6.698753287757723, 8.236016873501589, 7.700499116598714, 5.245252962919166, 3.8466566539754545, 0.2557723577548203, -2.224424702878995, -1.7538192820271359, -1.6108894693193423, -1.5526189819315854, -1.4217778875485052, 0.6884055890191354, 0.19267831195985646, 2.3121828630044594, 4.709698465608981, 4.254232469349811, 4.867784266241455, 3.6521102731532706, 5.176660526987325, 5.1262866046084, 7.18305450651508, 7.307006051256813, 9.797304678514347, 6.722168772648709, 7.086903804628322, 2.1571777229279383, 2.0079520449434693, -0.6501586804137662, -3.51383108746593, -3.809193133424525, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};


In [27]:
model.save('my_model')

loaded_model = tf.keras.models.load_model('my_model')

# Convert the model to the TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(loaded_model)
tflite_model = converter.convert()

# Save the converted model
with open('my_model.tflite', 'wb') as f:
    f.write(tflite_model)

2023-04-20 19:48:35.673139: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,2,6,20]
	 [[{{node inputs}}]]
2023-04-20 19:48:35.694500: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,5000]
	 [[{{node inputs}}]]
2023-04-20 19:48:35.711266: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,1000]
	 [[{{node inputs}}]]
2023-04-20 

INFO:tensorflow:Assets written to: my_model/assets


INFO:tensorflow:Assets written to: my_model/assets


INFO:tensorflow:Assets written to: /tmp/tmpuubgckse/assets


INFO:tensorflow:Assets written to: /tmp/tmpuubgckse/assets
2023-04-20 19:48:38.537047: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2023-04-20 19:48:38.537076: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2023-04-20 19:48:38.537471: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpuubgckse
2023-04-20 19:48:38.538943: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-04-20 19:48:38.538963: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpuubgckse
2023-04-20 19:48:38.542368: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:353] MLIR V1 optimization pass is not enabled
2023-04-20 19:48:38.543484: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2023-04-20 19:48:38.587917: I tensorflow/cc/saved_model/loader.cc:215] Running initializatio

In [34]:
model_path = 'my_model.tflite'
interpreter = Interpreter(model_path=model_path)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
single_test = np.reshape(np.array(reshaped_dev_set[dev_set_1_idx], dtype=np.float32), (1, 12, 48, 1))
interpreter.set_tensor(input_details[0]['index'], single_test)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
print("Output data: {}".format(output_data))
print(np.argmax(output_data))

Output data: [[1.3611508e-03 3.5230021e-04 2.9098062e-06 2.4490946e-06 3.4021759e-05
  5.2604580e-04 8.8414885e-03 1.0675494e-04 9.8448426e-01 4.2886338e-03]]
8
