# Training Keyword Spotting

## Preparation

- Download and unzip [tensorflow-2.10.0](https://github.com/tensorflow/tensorflow/archive/v2.10.0.zip) source code. 
- Use the path in the cell below


In [62]:
import tensorflow.compat.v1 as tf
import sys
sys.path.append("E:\\data\\tensorflow-2.18.0\\tensorflow\\examples\\speech_commands\\")
import models
import input_data
import numpy as np
import pickle

In [63]:
# A comma-delimited list of the words you want to train for.
# All the other words you do not select will be used to train 
# an "unknown" label so that the model does not just recognize
# speech but your specific words. Audio data with no spoken 
# words will be used to train a "silence" label.

WANTED_WORDS = "yes,no"
TRAINING_STEPS = "12000,3000"
LEARNING_RATE = "0.001,0.0001"
MODEL_ARCHITECTURE = 'tiny_conv'
# Print the configuration to confirm it
print("Spotting these words: %s" % WANTED_WORDS)

# Calculate the total number of steps, which is used to identify the checkpoint
# file name.
TOTAL_STEPS = str(sum(map(lambda string: int(string), TRAINING_STEPS.split(","))))

# Print the configuration to confirm it
print("Training these words: %s" % WANTED_WORDS)
print("Training steps in each stage: %s" % TRAINING_STEPS)
print("Learning rate in each stage: %s" % LEARNING_RATE)
print("Total number of training steps: %s" % TOTAL_STEPS)

Spotting these words: yes,no
Training these words: yes,no
Training steps in each stage: 12000,3000
Learning rate in each stage: 0.001,0.0001
Total number of training steps: 15000


In [64]:
# Calculate the percentage of 'silence' and 'unknown' training samples required
# to ensure that we have equal number of samples for each label.
number_of_labels = WANTED_WORDS.count(',') + 1
number_of_total_labels = number_of_labels + 2 # for 'silence' and 'unknown' label
equal_percentage_of_training_samples = int(100.0/(number_of_total_labels))
SILENT_PERCENTAGE = equal_percentage_of_training_samples
UNKNOWN_PERCENTAGE = equal_percentage_of_training_samples

# Constants which are shared during training and inference
PREPROCESS = 'micro'
WINDOW_STRIDE = 20

# Constants used during training only
VERBOSITY = 'DEBUG'
EVAL_STEP_INTERVAL = '1000'
SAVE_STEP_INTERVAL = '1000'

# Constants for training directories and filepaths
DATASET_DIR =  'dataset/'
LOGS_DIR = 'logs/'
TRAIN_DIR = 'train/' # for training checkpoints and other files.

# Constants for inference directories and filepaths
import os
MODELS_DIR = 'models'
if not os.path.exists(MODELS_DIR):
  os.mkdir(MODELS_DIR)
MODEL_TF = os.path.join(MODELS_DIR, 'model.pb')
MODEL_TFLITE = os.path.join(MODELS_DIR, 'model.tflite')
FLOAT_MODEL_TFLITE = os.path.join(MODELS_DIR, 'float_model.tflite')
MODEL_TFLITE_MICRO = os.path.join(MODELS_DIR, 'model.cc')
SAVED_MODEL = os.path.join(MODELS_DIR, 'saved_model')

# Constants for Quantization
QUANT_INPUT_MIN = 0.0
QUANT_INPUT_MAX = 26.0
QUANT_INPUT_RANGE = QUANT_INPUT_MAX - QUANT_INPUT_MIN

# Constants for audio process during Quantization and Evaluation
SAMPLE_RATE = 16000
CLIP_DURATION_MS = 1000
WINDOW_SIZE_MS = 30.0
FEATURE_BIN_COUNT = 40
BACKGROUND_FREQUENCY = 0.8
BACKGROUND_VOLUME_RANGE = 0.1
TIME_SHIFT_MS = 100.0

# URL for the dataset and train/val/test split
DATA_URL = 'https://storage.googleapis.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz'
VALIDATION_PERCENTAGE = 10
TESTING_PERCENTAGE = 10

In [65]:
# Calculate the correct flattened input data shape for later use in model conversion
# since the model takes a flattened version of the spectrogram. The shape is number of 
# overlapping windows times the number of frequency bins. For the default settings we have
# 40 bins (as set above) times 49 windows (as calculated below) so the shape is (1,1960)
def window_counter(total_samples, window_size, stride):
  '''helper function to count the number of full-length overlapping windows'''
  window_count = 0
  sample_index = 0
  while True:
    window = range(sample_index,sample_index+stride)
    if window.stop < total_samples:
      window_count += 1
    else:
      break
    
    sample_index += stride
  return window_count

OVERLAPPING_WINDOWS = window_counter(CLIP_DURATION_MS, int(WINDOW_SIZE_MS), WINDOW_STRIDE)
FLATTENED_SPECTROGRAM_SHAPE = (1, OVERLAPPING_WINDOWS * FEATURE_BIN_COUNT)

## Train the model


In [66]:
!python E:/data/tensorflow-2.18.0/tensorflow/examples/speech_commands/train.py \
--data_dir={DATASET_DIR} \
--wanted_words={WANTED_WORDS} \
--silence_percentage={SILENT_PERCENTAGE} \
--unknown_percentage={UNKNOWN_PERCENTAGE} \
--preprocess={PREPROCESS} \
--window_stride={WINDOW_STRIDE} \
--model_architecture={MODEL_ARCHITECTURE} \
--how_many_training_steps={TRAINING_STEPS} \
--learning_rate={LEARNING_RATE} \
--train_dir={TRAIN_DIR} \
--summaries_dir={LOGS_DIR} \
--verbosity={VERBOSITY} \
--eval_step_interval={EVAL_STEP_INTERVAL} \
--save_step_interval={SAVE_STEP_INTERVAL}

2025-03-05 20:49:42.503255: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2025-03-05 20:49:42.503621: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2025-03-05 20:49:45.455723: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2025-03-05 20:49:45.457381: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cublas64_11.dll'; dlerror: cublas64_11.dll not found
2025-03-05 20:49:45.458921: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cublasLt64_11.dll'; dlerror: cublasLt64_11.dll not found
2025-03-05 20:49:45.460337: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cu

In [67]:
# manually delete the {SAVED_MODEL} directory
!python E:/data/tensorflow-2.18.0/tensorflow/examples/speech_commands/freeze.py \
--wanted_words={WANTED_WORDS} \
--window_stride_ms={WINDOW_STRIDE} \
--preprocess={PREPROCESS} \
--model_architecture={MODEL_ARCHITECTURE} \
--start_checkpoint={TRAIN_DIR}{MODEL_ARCHITECTURE}.ckpt-{TOTAL_STEPS} \
--save_format=saved_model \
--output_file={SAVED_MODEL}

2025-03-05 22:10:38.528869: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2025-03-05 22:10:38.529114: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2025-03-05 22:10:46.762560: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2025-03-05 22:10:46.764007: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cublas64_11.dll'; dlerror: cublas64_11.dll not found
2025-03-05 22:10:46.765818: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cublasLt64_11.dll'; dlerror: cublasLt64_11.dll not found
2025-03-05 22:10:46.767499: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cu

In [71]:
model_settings = models.prepare_model_settings(
    len(input_data.prepare_words_list(WANTED_WORDS.split(','))),
    SAMPLE_RATE, CLIP_DURATION_MS, WINDOW_SIZE_MS,
    WINDOW_STRIDE, FEATURE_BIN_COUNT, PREPROCESS)
audio_processor = input_data.AudioProcessor(
    DATA_URL, DATASET_DIR,
    SILENT_PERCENTAGE, UNKNOWN_PERCENTAGE,
    WANTED_WORDS.split(','), VALIDATION_PERCENTAGE,
    TESTING_PERCENTAGE, model_settings, LOGS_DIR)

In [72]:
with tf.compat.v1.Session() as sess: #replaces the above line for use with TF2.x
  float_converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL)
  float_tflite_model = float_converter.convert()
  float_tflite_model_size = open(FLOAT_MODEL_TFLITE, "wb").write(float_tflite_model)
  print("Float model is %d bytes" % float_tflite_model_size)

  converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL)
  converter.optimizations = [tf.lite.Optimize.DEFAULT]
  #converter.inference_input_type = tf.lite.constants.INT8
  converter.inference_input_type = tf.compat.v1.lite.constants.INT8 #replaces the above line for use with TF2.x   
  #converter.inference_output_type = tf.lite.constants.INT8
  converter.inference_output_type = tf.compat.v1.lite.constants.INT8 #replaces the above line for use with TF2.x
  def representative_dataset_gen():
    for i in range(100):
      data, _ = audio_processor.get_data(1, i*1, model_settings,
                                         BACKGROUND_FREQUENCY, 
                                         BACKGROUND_VOLUME_RANGE,
                                         TIME_SHIFT_MS,
                                         'testing',
                                         sess)
      flattened_data = np.array(data.flatten(), dtype=np.float32).reshape(FLATTENED_SPECTROGRAM_SHAPE)
      yield [flattened_data]
  converter.representative_dataset = representative_dataset_gen  
  tflite_model = converter.convert()
  tflite_model_size = open(MODEL_TFLITE, "wb").write(tflite_model)
  print("Quantized model is %d bytes" % tflite_model_size)


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:The given SavedModel MetaGraphDef contains SignatureDefs with the following keys: {'serving_default'}


INFO:tensorflow:The given SavedModel MetaGraphDef contains SignatureDefs with the following keys: {'serving_default'}


INFO:tensorflow:input tensors info: 


INFO:tensorflow:input tensors info: 


INFO:tensorflow:Tensor's key in saved_model's tensor_map: input


INFO:tensorflow:Tensor's key in saved_model's tensor_map: input


INFO:tensorflow: tensor name: Reshape_1:0, shape: (1, 1960), type: DT_FLOAT


INFO:tensorflow: tensor name: Reshape_1:0, shape: (1, 1960), type: DT_FLOAT


INFO:tensorflow:output tensors info: 


INFO:tensorflow:output tensors info: 


INFO:tensorflow:Tensor's key in saved_model's tensor_map: output


INFO:tensorflow:Tensor's key in saved_model's tensor_map: output


INFO:tensorflow: tensor name: labels_softmax:0, shape: (1, 4), type: DT_FLOAT


INFO:tensorflow: tensor name: labels_softmax:0, shape: (1, 4), type: DT_FLOAT


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:The given SavedModel MetaGraphDef contains SignatureDefs with the following keys: {'serving_default'}


INFO:tensorflow:The given SavedModel MetaGraphDef contains SignatureDefs with the following keys: {'serving_default'}


INFO:tensorflow:input tensors info: 


INFO:tensorflow:input tensors info: 


INFO:tensorflow:Tensor's key in saved_model's tensor_map: input


INFO:tensorflow:Tensor's key in saved_model's tensor_map: input


INFO:tensorflow: tensor name: Reshape_1:0, shape: (1, 1960), type: DT_FLOAT


INFO:tensorflow: tensor name: Reshape_1:0, shape: (1, 1960), type: DT_FLOAT


INFO:tensorflow:output tensors info: 


INFO:tensorflow:output tensors info: 


INFO:tensorflow:Tensor's key in saved_model's tensor_map: output


INFO:tensorflow:Tensor's key in saved_model's tensor_map: output


INFO:tensorflow: tensor name: labels_softmax:0, shape: (1, 4), type: DT_FLOAT


INFO:tensorflow: tensor name: labels_softmax:0, shape: (1, 4), type: DT_FLOAT


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


Float model is 68356 bytes
INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:The given SavedModel MetaGraphDef contains SignatureDefs with the following keys: {'serving_default'}


INFO:tensorflow:The given SavedModel MetaGraphDef contains SignatureDefs with the following keys: {'serving_default'}


INFO:tensorflow:input tensors info: 


INFO:tensorflow:input tensors info: 


INFO:tensorflow:Tensor's key in saved_model's tensor_map: input


INFO:tensorflow:Tensor's key in saved_model's tensor_map: input


INFO:tensorflow: tensor name: Reshape_1:0, shape: (1, 1960), type: DT_FLOAT


INFO:tensorflow: tensor name: Reshape_1:0, shape: (1, 1960), type: DT_FLOAT


INFO:tensorflow:output tensors info: 


INFO:tensorflow:output tensors info: 


INFO:tensorflow:Tensor's key in saved_model's tensor_map: output


INFO:tensorflow:Tensor's key in saved_model's tensor_map: output


INFO:tensorflow: tensor name: labels_softmax:0, shape: (1, 4), type: DT_FLOAT


INFO:tensorflow: tensor name: labels_softmax:0, shape: (1, 4), type: DT_FLOAT


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:The given SavedModel MetaGraphDef contains SignatureDefs with the following keys: {'serving_default'}


INFO:tensorflow:The given SavedModel MetaGraphDef contains SignatureDefs with the following keys: {'serving_default'}


INFO:tensorflow:input tensors info: 


INFO:tensorflow:input tensors info: 


INFO:tensorflow:Tensor's key in saved_model's tensor_map: input


INFO:tensorflow:Tensor's key in saved_model's tensor_map: input


INFO:tensorflow: tensor name: Reshape_1:0, shape: (1, 1960), type: DT_FLOAT


INFO:tensorflow: tensor name: Reshape_1:0, shape: (1, 1960), type: DT_FLOAT


INFO:tensorflow:output tensors info: 


INFO:tensorflow:output tensors info: 


INFO:tensorflow:Tensor's key in saved_model's tensor_map: output


INFO:tensorflow:Tensor's key in saved_model's tensor_map: output


INFO:tensorflow: tensor name: labels_softmax:0, shape: (1, 4), type: DT_FLOAT


INFO:tensorflow: tensor name: labels_softmax:0, shape: (1, 4), type: DT_FLOAT


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


INFO:tensorflow:Restoring parameters from models\saved_model\variables\variables


InvalidArgumentError: Graph execution error:

Detected at node 'data_13/wav_filename' defined at (most recent call last):
    File "f:\Anaconda3\envs\tf2.10\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "f:\Anaconda3\envs\tf2.10\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\kernelapp.py", line 739, in start
      self.io_loop.start()
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "f:\Anaconda3\envs\tf2.10\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "f:\Anaconda3\envs\tf2.10\lib\asyncio\base_events.py", line 1909, in _run_once
      handle._run()
    File "f:\Anaconda3\envs\tf2.10\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
      await result
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\ipkernel.py", line 362, in execute_request
      await super().execute_request(stream, ident, parent)
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\ipkernel.py", line 449, in do_execute
      res = shell.run_cell(
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\interactiveshell.py", line 3077, in run_cell
      result = self._run_cell(
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\interactiveshell.py", line 3132, in _run_cell
      result = runner(coro)
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
      coro.send(None)
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\interactiveshell.py", line 3336, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\interactiveshell.py", line 3519, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\interactiveshell.py", line 3579, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Linh B Ngo\AppData\Local\Temp\ipykernel_23572\638176283.py", line 1, in <module>
      run_tflite_inference_singleFile(FLOAT_MODEL_TFLITE, audio_loud, sr_loud, model_type="Quantized")
    File "C:\Users\Linh B Ngo\AppData\Local\Temp\ipykernel_23572\4213617393.py", line 18, in run_tflite_inference_singleFile
      custom_audio_processor = input_data.AudioProcessor(None, None, 0, 0, '', 0, 0,
    File "E:\data\tensorflow-2.10.0\tensorflow\examples\speech_commands\input_data.py", line 198, in __init__
      self.prepare_processing_graph(model_settings, summaries_dir)
    File "E:\data\tensorflow-2.10.0\tensorflow\examples\speech_commands\input_data.py", line 392, in prepare_processing_graph
      self.wav_filename_placeholder_ = tf.compat.v1.placeholder(
Node: 'data_13/wav_filename'
You must feed a value for placeholder tensor 'data_13/wav_filename' with dtype string
	 [[{{node data_13/wav_filename}}]]

Original stack trace for 'data_13/wav_filename':
  File "f:\Anaconda3\envs\tf2.10\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "f:\Anaconda3\envs\tf2.10\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
    self.asyncio_loop.run_forever()
  File "f:\Anaconda3\envs\tf2.10\lib\asyncio\base_events.py", line 603, in run_forever
    self._run_once()
  File "f:\Anaconda3\envs\tf2.10\lib\asyncio\base_events.py", line 1909, in _run_once
    handle._run()
  File "f:\Anaconda3\envs\tf2.10\lib\asyncio\events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
    await self.process_one()
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one
    await dispatch(*args)
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
    await result
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\ipkernel.py", line 362, in execute_request
    await super().execute_request(stream, ident, parent)
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
    reply_content = await reply_content
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\ipkernel.py", line 449, in do_execute
    res = shell.run_cell(
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
    return super().run_cell(*args, **kwargs)
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\interactiveshell.py", line 3077, in run_cell
    result = self._run_cell(
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\interactiveshell.py", line 3132, in _run_cell
    result = runner(coro)
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
    coro.send(None)
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\interactiveshell.py", line 3336, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\interactiveshell.py", line 3519, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\IPython\core\interactiveshell.py", line 3579, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\Linh B Ngo\AppData\Local\Temp\ipykernel_23572\638176283.py", line 1, in <module>
    run_tflite_inference_singleFile(FLOAT_MODEL_TFLITE, audio_loud, sr_loud, model_type="Quantized")
  File "C:\Users\Linh B Ngo\AppData\Local\Temp\ipykernel_23572\4213617393.py", line 18, in run_tflite_inference_singleFile
    custom_audio_processor = input_data.AudioProcessor(None, None, 0, 0, '', 0, 0,
  File "E:\data\tensorflow-2.10.0\tensorflow\examples\speech_commands\input_data.py", line 198, in __init__
    self.prepare_processing_graph(model_settings, summaries_dir)
  File "E:\data\tensorflow-2.10.0\tensorflow\examples\speech_commands\input_data.py", line 392, in prepare_processing_graph
    self.wav_filename_placeholder_ = tf.compat.v1.placeholder(
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\tensorflow\python\ops\array_ops.py", line 3345, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 6897, in placeholder
    _, _, _op, _outputs = _op_def_library._apply_op_helper(
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 797, in _apply_op_helper
    op = g._create_op_internal(op_type_name, inputs, dtypes=None,
  File "f:\Anaconda3\envs\tf2.10\lib\site-packages\tensorflow\python\framework\ops.py", line 3800, in _create_op_internal
    ret = Operation(


In [73]:
# Helper function to run inference
def run_tflite_inference_testSet(tflite_model_path, model_type="Float"):
  #
  # Load test data
  #
  np.random.seed(0) # set random seed for reproducible test results.
  #with tf.Session() as sess:
  with tf.compat.v1.Session() as sess: #replaces the above line for use with TF2.x
    test_data, test_labels = audio_processor.get_data(
        -1, 0, model_settings, BACKGROUND_FREQUENCY, BACKGROUND_VOLUME_RANGE,
        TIME_SHIFT_MS, 'testing', sess)
  test_data = np.expand_dims(test_data, axis=1).astype(np.float32)

  #
  # Initialize the interpreter
  #
  interpreter = tf.lite.Interpreter(tflite_model_path)
  interpreter.allocate_tensors()
  input_details = interpreter.get_input_details()[0]
  output_details = interpreter.get_output_details()[0]
  
  #
  # For quantized models, manually quantize the input data from float to integer
  #
  if model_type == "Quantized":
    input_scale, input_zero_point = input_details["quantization"]
    test_data = test_data / input_scale + input_zero_point
    test_data = test_data.astype(input_details["dtype"])

  #
  # Evaluate the predictions
  #
  correct_predictions = 0
  for i in range(len(test_data)):
    interpreter.set_tensor(input_details["index"], test_data[i])
    interpreter.invoke()
    output = interpreter.get_tensor(output_details["index"])[0]
    top_prediction = output.argmax()
    correct_predictions += (top_prediction == test_labels[i])

  print('%s model accuracy is %f%% (Number of test samples=%d)' % (
      model_type, (correct_predictions * 100) / len(test_data), len(test_data)))

In [None]:
# Compute float model accuracy
run_tflite_inference_testSet(FLOAT_MODEL_TFLITE)

# Compute quantized model accuracy
run_tflite_inference_testSet(MODEL_TFLITE, model_type='Quantized')

## Test using live audio recording

In [38]:
import pyaudio
import wave
from io import BytesIO

def record_audio_to_variable(record_seconds=5, 
                             rate=44100, 
                             chunk=1024, 
                             channels=1):
    """
    Records audio from the default microphone for `record_seconds` seconds
    and returns the audio data as WAV bytes (i.e., a complete WAV file in memory).
    """
    FORMAT = pyaudio.paInt16
    audio_interface = pyaudio.PyAudio()

    # Open the microphone stream
    stream = audio_interface.open(
        format=FORMAT,
        channels=channels,
        rate=rate,
        input=True,
        frames_per_buffer=chunk
    )

    print("Recording...")
    frames = []

    # Capture data from the mic for the specified duration
    for _ in range(int(rate / chunk * record_seconds)):
        data = stream.read(chunk)
        frames.append(data)

    print("Done recording!")

    # Stop & close the stream
    stream.stop_stream()
    stream.close()
    audio_interface.terminate()

    # Combine all frames into a single bytes object and write to an in-memory WAV
    wav_io = BytesIO()
    with wave.open(wav_io, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(audio_interface.get_sample_size(FORMAT))
        wf.setframerate(rate)
        wf.writeframes(b''.join(frames))

    # Reset buffer pointer to start
    wav_io.seek(0)

    # Return the entire WAV file as bytes
    return wav_io.read()


In [39]:
from scipy.io.wavfile import read as wav_read
import io
import numpy as np
import tensorflow as tf
import tensorflow_io as tfio
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
import librosa
import scipy.io.wavfile

In [41]:
def get_audio(seconds):
    output = record_audio_to_variable(record_seconds=seconds)
    riff_chunk_size = len(output) - 8

    # Break up the chunk size into four bytes, held in b.
    q = riff_chunk_size
    b = []
    for i in range(4):
        q, r = divmod(q, 256)
        b.append(r)

    # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
    riff = output[:4] + bytes(b) + output[8:]

    sr, audio = wav_read(io.BytesIO(riff))
    return audio, sr

In [44]:
# Say a keyword ['no' 'yes']
audio_loud, sr_loud = get_audio(1)
print("DONE")

Recording...
Done recording!
DONE


In [45]:
# Helper function to run inference (on a single input this time)
# Note: this also includes additional manual pre-processing
TF_SESS = tf.compat.v1.InteractiveSession()
def run_tflite_inference_singleFile(tflite_model_path, custom_audio, sr_custom_audio, model_type="Float"):
  #
  # Preprocess the sample to get the features we pass to the model
  #
  # First re-sample to the needed rate (and convert to mono if needed)
  custom_audio_resampled = librosa.resample(y = librosa.to_mono(np.float64(custom_audio)), orig_sr = sr_custom_audio, target_sr = SAMPLE_RATE)
  # Then extract the loudest one second
  scipy.io.wavfile.write('custom_audio.wav', SAMPLE_RATE, np.int16(custom_audio_resampled))
  !/tmp/extract_loudest_section/gen/bin/extract_loudest_section custom_audio.wav ./trimmed
  # Finally pass it through the TFLiteMicro preprocessor to produce the 
  # spectrogram/MFCC input that the model expects
  custom_model_settings = models.prepare_model_settings(
      0, SAMPLE_RATE, CLIP_DURATION_MS, WINDOW_SIZE_MS,
      WINDOW_STRIDE, FEATURE_BIN_COUNT, PREPROCESS)
  custom_audio_processor = input_data.AudioProcessor(None, None, 0, 0, '', 0, 0,
                                                    model_settings, None)
  custom_audio_preprocessed = custom_audio_processor.get_features_for_wav(
                                        'custom_audio.wav', model_settings, TF_SESS)
  # Reshape the output into a 1,1960 matrix as that is what the model expects
  custom_audio_input = custom_audio_preprocessed[0].flatten()
  test_data = np.reshape(custom_audio_input,(1,len(custom_audio_input)))

  #
  # Initialize the interpreter
  #
  interpreter = tf.lite.Interpreter(tflite_model_path)
  interpreter.allocate_tensors()
  input_details = interpreter.get_input_details()[0]
  output_details = interpreter.get_output_details()[0]

  #
  # For quantized models, manually quantize the input data from float to integer
  #
  if model_type == "Quantized":
    input_scale, input_zero_point = input_details["quantization"]
    test_data = test_data / input_scale + input_zero_point
    test_data = test_data.astype(input_details["dtype"])

  #
  # Run the interpreter
  #
  interpreter.set_tensor(input_details["index"], test_data)
  interpreter.invoke()
  output = interpreter.get_tensor(output_details["index"])[0]
  top_prediction = output.argmax()

  #
  # Translate the output
  #
  top_prediction_str = ''
  if top_prediction >= 2:
    top_prediction_str = WANTED_WORDS.split(',')[top_prediction-2]
  elif top_prediction == 0:
    top_prediction_str = 'silence'
  else:
    top_prediction_str = 'unknown'

  print('%s model guessed the value to be %s' % (model_type, top_prediction_str))

In [60]:
# Say a keyword ['no' 'yes']
audio_loud, sr_loud = get_audio(1)
print("DONE")

Recording...
Done recording!
DONE


In [61]:
run_tflite_inference_singleFile(FLOAT_MODEL_TFLITE, audio_loud, sr_loud)#, model_type="Quantized")

Float model guessed the value to be yes


The system cannot find the path specified.
