# MNIST for TF Lite for micro controllers

***

## Define & Train Model

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
print('TensorFlow version: ', tf.__version__)

TensorFlow version:  2.4.0


In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [4]:
# Convert to FP32 for quantization
x_train = x_train.astype(np.float32) / 255.0
x_test = x_test.astype(np.float32) / 255.0

In [5]:
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

- https://stackoverflow.com/questions/53249386/valueerror-input-0-is-incompatible-with-layer-conv2d-1-expected-ndim-4-found

In [6]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(4, (3,3), strides=(2,2), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.Conv2D(6, (3,3), strides=(2,2), activation='relu'),
    tf.keras.layers.Flatten(input_shape=(32, 7, 7)),
    tf.keras.layers.Dense(200, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

In [7]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 13, 13, 4)         40        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 6, 6, 6)           222       
_________________________________________________________________
flatten (Flatten)            (None, 216)               0         
_________________________________________________________________
dense (Dense)                (None, 200)               43400     
_________________________________________________________________
dropout (Dropout)            (None, 200)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                2010      
Total params: 45,672
Trainable params: 45,672
Non-trainable params: 0
____________________________________________________

In [9]:
model.fit(x_train, y_train, epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f52b139d7b8>

In [10]:
# Test the trained model
model.evaluate(x_test, y_test)



[0.07255595922470093, 0.9868000149726868]

In [11]:
# Save the trained model as saved_model
model.save('saved_model')

INFO:tensorflow:Assets written to: saved_model/assets


## Quantize the model to int8

In [12]:
# Load model
converter = tf.lite.TFLiteConverter.from_saved_model('saved_model')

# Convert the model to the TensorFlow Lite format with quantization
def representative_dataset():
    for i in range(500):
        yield [x_train[i:i+1]]

# Set the optimization flag.
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Enforce integer only quantization
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

# Provide a representative dataset to ensure we quantize correctly.
converter.representative_dataset = representative_dataset
model_tflite = converter.convert()

# Save the model to disk
open('model_quant.tflite', "wb").write(model_tflite)

49720

## Evaluate the quantized model

In [13]:
# Initialize the TFLite interpreter
interpreter = tf.lite.Interpreter(model_content=model_tflite)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]

# If required, quantize the input layer (from float to integer)
input_scale, input_zero_point = input_details["quantization"]
if (input_scale, input_zero_point) != (0.0, 0):
  x_test_ = x_test / input_scale + input_zero_point
  x_test_ = x_test_.astype(input_details["dtype"])

# Invoke the interpreter
y_pred = np.empty(x_test_.shape[0], dtype=output_details["dtype"])
for i in range(x_test_.shape[0]):
  interpreter.set_tensor(input_details["index"], [x_test_[i]])
  interpreter.invoke()
  y_pred[i] = np.argmax(interpreter.get_tensor(output_details["index"]))

In [14]:
print('Accuracy: {} / {}'.format(np.sum(y_pred == y_test), x_test_.shape[0]))

Accuracy: 9863 / 10000


## Generate a TensorFlow Lite for Microcontrollers Model

In [15]:
!sudo apt install -q xxd

Reading package lists...
Building dependency tree...
Reading state information...
xxd is already the newest version (2:8.0.1453-1ubuntu1.4).
0 upgraded, 0 newly installed, 0 to remove and 13 not upgraded.


In [16]:
!xxd -i model_quant.tflite > model_quant.cc