# Create inference graph

So, we have trained our model, but we are not done yet... We have to create the inference graph, freeze the graph and convert our model into TFLite.

In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow import InteractiveSession, ConfigProto
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.logging.set_verbosity(tf.logging.ERROR)

In [2]:
def cnn_model(x, training=False):
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    
    x = tf.layers.conv2d(x, 32, 3, activation=tf.nn.relu)
    x = tf.layers.max_pooling2d(x, 2, 2)

    x = tf.layers.conv2d(x, 64, 3, activation=tf.nn.relu)
    x = tf.layers.max_pooling2d(x, 2, 2)

    x = tf.layers.dropout(x, rate=0.5, training=training)

    x = tf.contrib.layers.flatten(x)
    x = tf.layers.dense(x, 1024, activation=tf.nn.relu)
    x = tf.layers.dropout(x, rate=0.5, training=training)
    x = tf.layers.dense(x, 10)


    return x

Re-create the model's graph for inference:

In [3]:
sess = tf.Session()

x = tf.placeholder(tf.float32, shape=[None, 784], name='input')
y = tf.placeholder(tf.float32, shape=[None, 10], name='label')

logits = cnn_model(x, False)
y_pred = tf.nn.softmax(logits, name='prob')

tf.contrib.quantize.create_eval_graph()

Load the previous model:

In [4]:
saver = tf.train.Saver()
saver.restore(sess, './data/trained_model.ckpt')

Save the inference graph!

In [5]:
with open('data/eval.pb', 'w') as f:
    g = tf.get_default_graph()
    f.write(str(g.as_graph_def()))

Evaluate the model to ensure that it still works:

In [6]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_pred,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

mnist = input_data.read_data_sets('MNIST-data', one_hot=True)
acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels})
print("Test acc = ", acc)

Extracting MNIST-data/train-images-idx3-ubyte.gz
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz
Test acc =  0.9928


Freeze the inference graph:

In [7]:
!freeze_graph --input_graph=./data/eval.pb --input_checkpoint=./data/trained_model.ckpt --output_graph=./data/frozen_graph.pb  --output_node_names=prob

Instructions for updating:
Use tf.gfile.GFile.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
2019-05-07 08:38:36.174683: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2019-05-07 08:38:36.257304: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2019-05-07 08:38:36.260916: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2019-05-07 08:38:36.261717: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x3d7b890 executing computations on platform CUDA. Devices:
2019-05-07 08:38:36.261731: I tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device 

Convert frozen graph to TFLite:

In [8]:
!tflite_convert --output_file=./data/mnist_cnn_quant.tflite  --graph_def_file=./data/frozen_graph.pb --inference_type=QUANTIZED_UINT8 --input_type=QUANTIZED_UINT8 --input_arrays=input --output_arrays=prob --mean_values=128  --std_dev_values=127

2019-05-07 08:38:39.734319: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2019-05-07 08:38:39.814759: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2019-05-07 08:38:39.817607: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2019-05-07 08:38:39.818319: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x132d930 executing computations on platform CUDA. Devices:
2019-05-07 08:38:39.818332: I tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device (0): GeForce RTX 2060, Compute Capability 7.5
2019-05-07 08:38:39.818336: I tensorflow/compiler/xla/service/service.cc:158]   StreamEx

In [9]:
!ls data/mnist_cnn*

data/mnist_cnn_quant_edgetpu.tflite  data/mnist_cnn_quant.tflite


Upload the TFLite model to [online Edge TPU compiler](https://coral.withgoogle.com/web-compiler/) to get you final model:

![](edge_compiler.png)