In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, SeparableConv2D
import model_architectures as model_archs
import time

# %matplotlib notebook
%matplotlib inline
import matplotlib.pyplot as plt


In [None]:
## Print out some system information
import subprocess

print('Linux kernel version:')
print('$ uname -r')
result = subprocess.run(['uname', '-r'], stdout=subprocess.PIPE)
print(result.stdout.decode('utf-8'))

print('Linux release:')
print('$ lsb_release -a')
result = subprocess.run(['lsb_release', '-a'], stdout=subprocess.PIPE)
print(result.stdout.decode('utf-8'))

print('Tensorflow python module version')
print(tf.__version__)
print('')

print('Edge TPU python module version:')
import edgetpu
print(edgetpu.__version__)
print('')

print('Edge TPU compiler version:')
result = subprocess.run(['edgetpu_compiler', '--version'], stdout=subprocess.PIPE)
print(result.stdout.decode('utf-8'))

print('Edge TPU runtime version:')
import edgetpu.basic.edgetpu_utils
print(edgetpu.basic.edgetpu_utils.GetRuntimeVersion())
print('')

print('Paths of available Edge TPU devices, if any:')
print(edgetpu.basic.edgetpu_utils.ListEdgeTpuPaths(edgetpu.basic.edgetpu_utils.EDGE_TPU_STATE_NONE))

In [None]:
## Build a very simple model
# image_shape = (512, 512, 3)
# image_shape = (384, 384, 3)
image_shape = (300, 300, 3)

def representative_dataset_gen():
    num_calibration_images = 10  #  Slow. About 1 second per calibration image.
    for i in range(num_calibration_images):
        image = tf.random.normal([1] + list(image_shape))
#         image = tf.random.uniform([1] + list(image_shape),
#                                   minval=0,
#                                   maxval=1,
#                                   dtype=tf.dtypes.float32)
        yield [image]

x = Input(shape=image_shape)
# y = MaxPooling2D(pool_size=(2, 2))(x)
y = Conv2D(29, (3, 3), padding='same')(x)
# y = SeparableConv2D(32, (3, 3), padding='same')(x)
model = Model(inputs=x, outputs=y)

# model = model_archs.build_test(image_shape)
# model = model_archs.build_vgg16_novel(image_shape, n_gt_chans=5,
#                                       resolution='2s', separable_conv=True,
#                                       batchnorm='none')

model.summary()

In [None]:
## Convert to tensorflow lite model and save...
converter = tf.lite.TFLiteConverter.from_keras_model(model) # TF2.0
# model.save('model_keras', include_optimizer=False) # TF1.15
# converter = tf.lite.TFLiteConverter.from_keras_model_file('model_keras') # TF1.15

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = tf.lite.RepresentativeDataset(representative_dataset_gen)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] # For EdgeTPU, no float ops allowed

tflite_model = converter.convert()
open('model.tflite', 'wb').write(tflite_model)

In [None]:
##================================================================================================
## Set variable below and comment out lines in cell below if not using Edge TPU model and hardware
##================================================================================================
# True:  Use EdgeTPU model and process on the Edge TPU (assumes one is available)
# False: Use TFLite model and process on CPU
use_edgetpu = True

In [None]:
%%bash
## Compile model for edge TPU
# Note that the output file name has '_edgetpu' appended to the root filename of the input TFLite model.
edgetpu_compiler --min_runtime_version 12 --show_operations 'model.tflite'

In [None]:
## Load TFLite model and allocate tensors.
if use_edgetpu:
    # If using interpreter from tflite_runtime package
    from tflite_runtime.interpreter import load_delegate
    from tflite_runtime.interpreter import Interpreter
    interpreter = Interpreter(model_path='model_edgetpu.tflite',
                              model_content=None,
                              experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
    
#     # If using interpreter from full TensorFlow package...
#     from tensorflow.lite.python.interpreter import load_delegate
#     interpreter = tf.lite.Interpreter(model_path='model_edgetpu.tflite',
#                                       experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
else:
    interpreter = tf.lite.Interpreter(model_path='model.tflite')

interpreter.allocate_tensors()

# Get input and output tensors details.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

In [None]:
## Put some random data through the model and show results
# Create a batch of images
batch_size = 2
image = tf.random.uniform([batch_size] + list(image_shape))

# Process the image with the network model
t_all = time.time()
t_individual = np.zeros(batch_size)
for i_im in range(batch_size):
    t_one = time.time()
    # Set input tensor and invoke model
    interpreter.set_tensor(input_details[0]['index'], image[i_im:i_im+1])
    interpreter.invoke()   # Can be slow if running on CPU

    # The function `get_tensor()` returns a copy of the tensor data.
    # Use `tensor()` in order to get a pointer to the tensor.
    model_output = interpreter.get_tensor(output_details[0]['index'])
    t_individual[i_im] = time.time() - t_one
print('Model processing took %f seconds.' % (time.time() - t_all))
print('Individual image processing times:')
print(t_individual)

# Plot results for first channel of input and output, of the first
# image in the batch.
in_chan0 = image[0, :, :, 0]
out_chan0 = model_output[0, :, :, 0]
plt.figure(figsize=(16, 8))

plt.subplot(1,2,1)
plt.imshow(in_chan0, aspect='equal')
plt.title('Input max chan0 value: %f' % (tf.reduce_max(in_chan0)))
plt.clim([0,1])
plt.colorbar()

plt.subplot(1,2,2)
plt.imshow(out_chan0, aspect='equal')
plt.title('Output max chan0 value: %f' % (tf.reduce_max(out_chan0)))
plt.clim([0,1])
plt.colorbar()