In [28]:
pip install -q tensorflow-model-optimization

In [56]:
import numpy as np
from numpy import random
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
import os
import tensorflow_model_optimization as tfmot
import pathlib
import tempfile


In [30]:
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

In [31]:
num_class = 10
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [32]:
X_train /= 255
X_test /= 255
X_train = X_train.reshape(-1, X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(-1, X_test.shape[1], X_test.shape[2], 1)
y_train = to_categorical(y_train, num_class)
y_test = to_categorical(y_test, num_class)

In [33]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape = (28,28,1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(100, activation = 'relu'))
model.add(Dense(num_class, activation='softmax'))

In [34]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 13, 13, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 5, 5, 64)          0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 1600)              0         
                                                                 
 dropout_1 (Dropout)         (None, 1600)             

In [35]:
opt = Adam(learning_rate=0.002)
model.compile(loss='categorical_crossentropy', optimizer = opt, metrics=['accuracy'])

In [36]:
model.fit(X_train, y_train, batch_size=128, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x7bc88856c760>

In [37]:
score = model.evaluate(X_test, y_test)
print('accuracy on test data:', score[1])

accuracy on test data: 0.9151999950408936


In [38]:
model.save('/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/baseline_model.h5')

  saving_api.save_model(


In [39]:
baseline_model = load_model('/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/baseline_model.h5')

## **Converting to TFLite model**

### **Way to Convert**
* We can do the conversion using
 **tf.lite.TFLiteConverter**. We will call the **from_keras_model()** method under the
 **tf.lite.TFLiteConverte**r class and pass the baseline model as a function argument.

In [40]:
convertor = tf.lite.TFLiteConverter.from_keras_model(baseline_model)
tflite_model = convertor.convert()

**Note :**
* We can further convert the baseline model to a TensorFlow graph using **tf.function**,
which contains all the computational operations, variables, and weights. This can be
achieved by exporting the model as a concrete function. Finally, the concrete function
is converted into a TFLite model using the method from_concrete_functions().

In [41]:
func = tf.function(baseline_model).get_concrete_function(tf.TensorSpec(baseline_model.inputs[0].shape , baseline_model.inputs[0].dtype))
func.graph.as_graph_def()
converter =  tf.lite.TFLiteConverter.from_concrete_functions([func])
tflite_model = converter.convert()



In [42]:
path = "/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model"

tflite_models_dir = pathlib.Path(path)
tflite_models_dir.mkdir(exist_ok=True, parents=True)
tflite_model_file = tflite_models_dir/'model.tflite'
tflite_model_file.write_bytes(tflite_model)

723092

**Now, we have a TFLite model. We will use it to make inferences in Python**. This can
be done by using the tf.lite.Interpreter class.
The following steps are done to make an inference in TensorFlow Lite:
1. First, we create an instance of the Interpreter class. It takes the path
containing the .TFLIE file as an input.
2. Allocate memory to the Interpreter by calling the function allocate_
tensors().
3. After memory allocation, call get_input_details() and get_output_
details() to get some details about the input and the output tensor.
4. Now, we are ready to make inferences. Get an image from the test data and
reshape it according to the desired input shape to the model.
5. Set the input tensor by copying the input data. Use the method set_tensor().
6. Invoke the interpreter to make an inference by calling Interpreter.invoke().
7. Get the value of the output tensor.
8. Covert it into the predicted class label

In [43]:
tflite_model = "/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model/model.tflite"
interpreter = tf.lite.Interpreter(model_path = tflite_model )
interpreter.allocate_tensors()
input_index = interpreter.get_input_details()[0]['index']
output_index = interpreter.get_output_details()[0]['index']
pred_list = []
for images in X_test:
  input_data = np.array(images, dtype=np.float32)

  input_data = input_data.reshape(1, input_data.shape[0], input_data.shape[1], 1)

  interpreter.set_tensor(input_index, input_data)
  interpreter.invoke()
  prediction = interpreter.get_tensor(output_index)
  prediction = np.argmax(prediction)
  pred_list.append(prediction)

accurate_count = 0
for index in range(len(pred_list)):
  if pred_list[index] == np.argmax(y_test[index]):
      accurate_count += 1
accuracy = accurate_count * 1.0 / len(pred_list)

print('accuracy = ', accuracy)

accuracy =  0.9152


---

### **Tensorflow model optimization**
The TensorFlow Model Optimization Toolkit consists of a set of libraries for the
effective optimization of large neural networks. The primary goal of optimization is
to enable a large machine learning model to seamlessly run on smaller edge devices
having restricted hardware resources in terms of memory and computational
capacity. They also need to consume lower battery power on the target hardware.
Such applications are particularly useful in scenarios where we require continuous
24 × 7 monitoring, for example, machine condition monitoring in large industries,
on-device cardiac health monitoring systems, smart voice assistant devices, and so
on. A few popular model optimizations techniques are as follows:

• Lowering the precision of model weights and activations

• Reducing some of the lesser important parameters in the model

• Updating the model topology

### **Optimization Techniques**
* **Quantization** is an optimization strategy used to lower the precision of a machine
learning model. Both model weights and activation outputs can be quantized in the
process. Integer-based quantization is particularly common in TinyML. It converts
the weights and activation outputs from the original 32-bit floating point numbers
to the nearest 8-bit fixed-point numbers. As a result, the model size is reduced by
a factor of 4. The resulting model also has a faster inference speed. Quantization is
particularly common in low-powered microcontroller devices, as many of them do
not have floating-point units in the hardware.
  *  post-training quantization
  * quantization-aware training



In [44]:
converter = tf.lite.TFLiteConverter.from_keras_model(baseline_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_ptq = converter.convert()

In [45]:
tflite_models_dir = pathlib.Path("/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model")
tflite_models_dir.mkdir(exist_ok=True, parents=True)
tflite_model_file = tflite_models_dir/"model_ptq.tflite"
tflite_model_file.write_bytes(tflite_model_ptq)

188760

In [46]:
tflite_model_file = "/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model/model_ptq.tflite"
interpreter = tf.lite.Interpreter(model_path=tflite_model_file)
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]

pred_list = []
for images in X_test:
  input_data = np.array(images, dtype=np.float32)

  input_data = input_data.reshape(1, input_data.shape[0], input_data.shape[1], 1)

  interpreter.set_tensor(input_index, input_data)
  interpreter.invoke()
  prediction = interpreter.get_tensor(output_index)
  prediction = np.argmax(prediction)
  pred_list.append(prediction)

accurate_count = 0
for index in range(len(pred_list)):
  if pred_list[index] == np.argmax(y_test[index]):
      accurate_count += 1
accuracy = accurate_count * 1.0 / len(pred_list)

print(accuracy)

0.9151


**Quantization-aware training**
In post-training quantization, we take a pre-trained model and convert the weights
and activation output into 8-bit integers. One major disadvantage is that we do not
fine-tune the model after quantization. In most of the cases, lowering the precision
of the model weights will introduce a loss called quantization error. This can have
a negative impact on model performance. Quantization-aware training tries to
minimize the loss via backpropagation by retraining the model for few epochs. By
doing this, it mitigates the impact of quantization error to some extent.

In [47]:
baseline_model = load_model('/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/baseline_model.h5')

quantized_model = tfmot.quantization.keras.quantize_model
q_aware_model = quantized_model(baseline_model)
q_aware_model.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

q_aware_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer_1 (Quantize  (None, 28, 28, 1)         3         
 Layer)                                                          
                                                                 
 quant_conv2d_2 (QuantizeWr  (None, 26, 26, 32)        387       
 apperV2)                                                        
                                                                 
 quant_max_pooling2d_2 (Qua  (None, 13, 13, 32)        1         
 ntizeWrapperV2)                                                 
                                                                 
 quant_conv2d_3 (QuantizeWr  (None, 11, 11, 64)        18627     
 apperV2)                                                        
                                                                 
 quant_max_pooling2d_3 (Qua  (None, 5, 5, 64)         

**Note** that the resulting model is
only quantization-aware but not yet quantized. The floating point model weights
and activations are rounded to mimic integer values. Before converting them into
full-integer, we will retrain to fine-tune the model.

In [48]:
q_aware_model.fit(X_train, y_train, batch_size=500, epochs=2, validation_split=0.1)

Epoch 1/2


  output, from_logits = _get_logits(


Epoch 2/2


<keras.src.callbacks.History at 0x7bc88915fac0>

In [49]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_qat = converter.convert()

tflite_models_dir = pathlib.Path("/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model")
tflite_models_dir.mkdir(exist_ok=True, parents=True)
tflite_model_file = tflite_models_dir/"model_qat.tflite"
tflite_model_file.write_bytes(tflite_model_qat)



187888

In [50]:
tflite_model_file = '/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model/model_qat.tflite'
interpreter = tf.lite.Interpreter(model_path=tflite_model_file)
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]

pred_list = []
for images in X_test:
  input_data = np.array(images, dtype=np.float32)

  input_data = input_data.reshape(1, input_data.shape[0], input_data.shape[1], 1)

  interpreter.set_tensor(input_index, input_data)
  interpreter.invoke()
  prediction = interpreter.get_tensor(output_index)
  prediction = np.argmax(prediction)
  pred_list.append(prediction)

accurate_count = 0
for index in range(len(pred_list)):
  if pred_list[index] == np.argmax(y_test[index]):
      accurate_count += 1
accuracy = accurate_count * 1.0 / len(pred_list)

print('accuracy = ', accuracy)

accuracy =  0.9201


**Important Note** : A fully quantized
model can often be less accurate compared to the baseline model, even after
retraining. To mitigate that risk, the critical feature extraction layers are often not
quantized in a deep neural network. For example, you may prefer to quantize only
the first few convolutional layers of a CNN.

***we will quantize only the dense layers of the baseline CNN.
We will first define a function apply_quantization(layer) to define which layers
will be quantized. Next, we will use tf.keras.models.clone_model to apply
quantization to the dense layers by calling the function.***

In [51]:
baseline_model = load_model('/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/baseline_model.h5')

def apply_quantization(layer):
      if isinstance(layer, tf.keras.layers.Dense):
        return tfmot.quantization.keras.quantize_annotate_layer(layer)
      return layer
annotated_model = tf.keras.models.clone_model(baseline_model,clone_function=apply_quantization,)
q_aware_model_dense = tfmot.quantization.keras.quantize_apply(annotated_model)
q_aware_model_dense.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 13, 13, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 5, 5, 64)          0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 1600)              0         
                                                                 
 quant_dropout_1 (QuantizeW  (None, 1600)             

In [52]:
q_aware_model_dense.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

q_aware_model_dense.fit(X_train, y_train,
                  batch_size=500, epochs=2, validation_split=0.1)

Epoch 1/2


  output, from_logits = _get_logits(


Epoch 2/2


<keras.src.callbacks.History at 0x7bc88856d6c0>

In [53]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model_dense)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_qat_dense = converter.convert()

tflite_models_dir = pathlib.Path("/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model")
tflite_models_dir.mkdir(exist_ok=True, parents=True)
tflite_model_file = tflite_models_dir/"model_qat_dense.tflite"
tflite_model_file.write_bytes(tflite_model_qat_dense)



721152

In [54]:
tflite_model_file = '/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model/model_qat_dense.tflite'
interpreter = tf.lite.Interpreter(model_path=tflite_model_file)
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]

pred_list = []
for images in X_test:
  input_data = np.array(images, dtype=np.float32)

  input_data = input_data.reshape(1, input_data.shape[0], input_data.shape[1], 1)

  interpreter.set_tensor(input_index, input_data)
  interpreter.invoke()
  prediction = interpreter.get_tensor(output_index)
  prediction = np.argmax(prediction)
  pred_list.append(prediction)

accurate_count = 0
for index in range(len(pred_list)):
  if pred_list[index] == np.argmax(y_test[index]):
      accurate_count += 1
accuracy = accurate_count * 1.0 / len(pred_list)

print('accuracy = ', accuracy)

accuracy =  0.9236


---

**Weight pruning** is another popular model optimization technique that zeros out
some of the less significant model weights. The pruned elements are trimmed
from the model to introduce sparsity. Such sparse models are easy to compress and
occupy lesser memory space in the target device. During inference, the zero weights
are skipped, resulting in an improved latency due to lesser mathematical operations.


**Process:**

1. **Magnitude-based Pruning:**
   - Consider absolute values of weights.
   - Absolute values help focus on the importance of the weight, regardless of its direction (positive or negative).

2. **Setting a Threshold:**
   - Choose a threshold value.
   - Connections with weights below this threshold are considered less important.

3. **Zeroing Out Weights:**
   - Set to zero (prune) connections with weights below the threshold.
   - These pruned connections won't contribute to the network's computations.

**Example:**

Let's illustrate with a connection having a weight of -0.2. The absolute value is 0.2. If the threshold is set at 0.3, this connection would be pruned because 0.2 is below the threshold.

**Trade-off:**

- Weight pruning creates a sparser network, reducing the number of connections.
- Too much pruning can impact model performance.
- A balance between model size and accuracy must be maintained.

In summary, weight pruning is like cleaning up unnecessary connections, keeping important ones, and finding a trade-off between a compact model and good performance.

---

In [55]:
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

batch_size = 128
epochs = 2
validation_split = 0.1

num_samples = X_train.shape[0] * (1 - validation_split)
end_step = np.ceil(num_samples / batch_size).astype(np.int32) * epochs

pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.40,
                                                               final_sparsity=0.75,
                                                               begin_step=0,
                                                               end_step=end_step) # This function is used to define the loop iterations
}

model_for_pruning = prune_low_magnitude(baseline_model, **pruning_params)

model_for_pruning.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d  (None, 26, 26, 32)        610       
 _2 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 13, 13, 32)        1         
 oling2d_2 (PruneLowMagnitu                                      
 de)                                                             
                                                                 
 prune_low_magnitude_conv2d  (None, 11, 11, 64)        36930     
 _3 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 5, 5, 64)          1         
 oling2d_3 (PruneLowMagnitu                                      
 de)                                                  

In [57]:
log_dir = tempfile.mkdtemp()
callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
    tfmot.sparsity.keras.PruningSummaries(log_dir=log_dir)
]

model_for_pruning.fit(X_train, y_train,
                  batch_size=batch_size, epochs=epochs, validation_split=validation_split,
                  callbacks=callbacks)

Epoch 1/2


  output, from_logits = _get_logits(


  1/422 [..............................] - ETA: 55:54 - loss: 0.2571 - accuracy: 0.9219



Epoch 2/2


<keras.src.callbacks.History at 0x7bc8894b48e0>

In [58]:
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

In [60]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
tflite_model_pruned_dense = converter.convert()

tflite_models_dir = pathlib.Path("/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model")
tflite_models_dir.mkdir(exist_ok=True, parents=True)
tflite_model_file = tflite_models_dir/"model_pruned.tflite"
tflite_model_file.write_bytes(tflite_model_pruned_dense)

723188

In [61]:
tflite_model_file = '/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model/model_pruned.tflite'
interpreter = tf.lite.Interpreter(model_path=tflite_model_file)
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]

pred_list = []
for images in X_test:
  input_data = np.array(images, dtype=np.float32)

  input_data = input_data.reshape(1, input_data.shape[0], input_data.shape[1], 1)

  interpreter.set_tensor(input_index, input_data)
  interpreter.invoke()
  prediction = interpreter.get_tensor(output_index)
  prediction = np.argmax(prediction)
  pred_list.append(prediction)

accurate_count = 0
for index in range(len(pred_list)):
  if pred_list[index] == np.argmax(y_test[index]):
      accurate_count += 1
accuracy = accurate_count * 1.0 / len(pred_list)

print(accuracy)

0.902


In [62]:
def apply_pruning(layer):
  if isinstance(layer, tf.keras.layers.Dense):
    return tfmot.sparsity.keras.prune_low_magnitude(layer)
  return layer

model_for_pruning = tf.keras.models.clone_model(
    baseline_model,
    clone_function=apply_pruning)

model_for_pruning.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 13, 13, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 5, 5, 64)          0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 1600)              0         
                                                                 
 dropout_1 (Dropout)         (None, 1600)             

In [63]:
model_for_pruning.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

log_dir = tempfile.mkdtemp()
callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
    tfmot.sparsity.keras.PruningSummaries(log_dir=log_dir)
]

model_for_pruning.fit(X_train, y_train,
                  batch_size=batch_size, epochs=epochs, validation_split=validation_split,
                  callbacks=callbacks)

Epoch 1/2


  output, from_logits = _get_logits(


  6/422 [..............................] - ETA: 4s - loss: 0.3222 - accuracy: 0.8802   



Epoch 2/2


<keras.src.callbacks.History at 0x7bc889283df0>

In [64]:
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
tflite_model_pruned_dense = converter.convert()

In [65]:
tflite_models_dir = pathlib.Path("/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model")
tflite_models_dir.mkdir(exist_ok=True, parents=True)
tflite_model_file = tflite_models_dir/"model_pruned_dense.tflite"
tflite_model_file.write_bytes(tflite_model_pruned_dense)

723188

In [66]:
tflite_model_file = '/content/drive/MyDrive/Colab_Notebooks/Hands_on_TinyML/01_Understanding_TF_Lite/tg_lite_model/model_pruned_dense.tflite'
interpreter = tf.lite.Interpreter(model_path=tflite_model_file)
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]

pred_list = []
for images in X_test:
  input_data = np.array(images, dtype=np.float32)

  input_data = input_data.reshape(1, input_data.shape[0], input_data.shape[1], 1)

  interpreter.set_tensor(input_index, input_data)
  interpreter.invoke()
  prediction = interpreter.get_tensor(output_index)
  prediction = np.argmax(prediction)
  pred_list.append(prediction)

accurate_count = 0
for index in range(len(pred_list)):
  if pred_list[index] == np.argmax(y_test[index]):
      accurate_count += 1
accuracy = accurate_count * 1.0 / len(pred_list)

print('accuracy = ', accuracy)

accuracy =  0.9103
