https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/evaluation/tasks/coco_object_detection

## Model Convert

### Set up

In [3]:
import os
os.chdir(r'c:/tensorflow1/models/research/object_detection')
os.getcwd()

'c:\\tensorflow1\\models\\research\\object_detection'

In [4]:
import tensorflow as tf

### Export

In [None]:
!python export_tflite_ssd_graph.py \
        --pipeline_config_path=training/ssd_mobilenet_v2.config \
        --trained_checkpoint_prefix=training/ssd_mobilenet/model.ckpt-9105 \
        --output_directory=tflite \
        --add_postprocessing_op=true

### Convert

In [None]:
in_model = "./tflite/tflite_graph.pb"
out_model = "./tflite/tflite_gprah.tflite"

# check input_arrays and output_arrays of model
gf = tf.GraphDef()   
m_file = open(in_model,'rb')
gf.ParseFromString(m_file.read())

with open('./tflite/graph.txt', 'a') as the_file:
    for n in gf.node:
        the_file.write(n.name+'\n')

file = open('./tflite/graph.txt','r')
data = file.readlines()
print ("output name = ")
print (data[len(data)-1])

print ("Input name = ")
file.seek ( 0 )
print (file.readline())

In [None]:
input_arrays = ["normalized_input_image_tensor"]
output_arrays = ['TFLite_Detection_PostProcess','TFLite_Detection_PostProcess:1',
                 'TFLite_Detection_PostProcess:2','TFLite_Detection_PostProcess:3']
input_shapes = {"normalized_input_image_tensor":[1,300,300,3]}

In [None]:
converter = tf.lite.TFLiteConverter.from_frozen_graph(in_model, input_arrays,
                                                      output_arrays, input_shapes)
#converter.post_training_quantize = True
converter.allow_custom_ops = True

In [None]:
tflite_model = converter.convert()

with open(out_model, "wb") as f:
    f.write(tflite_model)

## Inference

In [2]:
import os
import argparse
import cv2
import numpy as np
import sys
import glob
import importlib.util
import time

os.chdir(r'c:/tensorflow1/models/research/object_detection')
os.getcwd()

'c:\\tensorflow1\\models\\research\\object_detection'

In [3]:
# Minimum confidence threshold for displaying detected objects
min_conf_threshold = float(0.5)

In [49]:
# Path to .tflite file, which contains the model that is used for object detection
PATH_TO_CKPT = 'tflite/ssd_mobilenet_q/tflite_graph.tflite'

# Path to label map file
PATH_TO_LABELS = 'tflite/labelmap.txt'
# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
    labels = [line.strip() for line in f.readlines()]

# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
if labels[0] == '???':
    del(labels[0])

In [50]:
labels

['thistle']

In [52]:
# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
from tensorflow.lite.python.interpreter import Interpreter

interpreter = Interpreter(model_path=PATH_TO_CKPT)

interpreter.allocate_tensors()

In [53]:
# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]

floating_model = (input_details[0]['dtype'] == np.float32)

input_mean = 127.5
input_std = 127.5

In [54]:
floating_model

True

### Image Detection

In [55]:
# images to perform detection on
images = 'test/'
os.listdir(images)

['DJI_0032.JPG',
 'DJI_0033.JPG',
 'DJI_0034.JPG',
 'DJI_0036.JPG',
 'DJI_0037.JPG',
 'DJI_0038.JPG',
 'DJI_0039.JPG',
 'DJI_0040.JPG',
 'DJI_0041.JPG',
 'DJI_0042.JPG',
 'weed.jpg',
 'weeds.jpg']

In [56]:
latency = list()
# Loop over every image and perform detection
for img in os.listdir(images):
    start = time.clock()
    image_path = images + img
    # Load image and resize to expected shape [1xHxWx3]
    image = cv2.imread(image_path)
    image = cv2.resize(image, (640, 480), interpolation=cv2.INTER_LINEAR)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    imH, imW, _ = image.shape 
    image_resized = cv2.resize(image_rgb, (width, height))
    input_data = np.expand_dims(image_resized, axis=0)

    # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
    if floating_model:
        input_data = (np.float32(input_data) - input_mean) / input_std

    # Perform the actual detection by running the model with the image as input
    interpreter.set_tensor(input_details[0]['index'],input_data)
    interpreter.invoke()

    # Retrieve detection results
    boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
    classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
    scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
    #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

    # Loop over all detections and draw detection box if confidence is above minimum threshold
    for i in range(len(scores)):
        if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

            # Get bounding box coordinates and draw box
            # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
            ymin = int(max(1,(boxes[i][0] * imH)))
            xmin = int(max(1,(boxes[i][1] * imW)))
            ymax = int(min(imH,(boxes[i][2] * imH)))
            xmax = int(min(imW,(boxes[i][3] * imW)))
            
            cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)

            # Draw label
            #object_name = labels[0] 
            object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
            label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
            label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
            # Draw white box to put label text in
            cv2.rectangle(image, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) 
            cv2.putText(image, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text

    end = time.clock()
    latency.append(end - start)
    # All the results have been drawn on the image, now display the image
    cv2.imshow('Object detector', image)

    # Press any key to continue to next image, or press 'q' to quit
    if cv2.waitKey(0) == ord('q'):
        break

# Clean up
cv2.destroyAllWindows()

In [57]:
classes

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [58]:
scores

array([0.9976064 , 0.987175  , 0.12670368, 0.12670368, 0.12670368,
       0.05341607, 0.0523698 , 0.04061228, 0.02609569, 0.02609569],
      dtype=float32)

In [59]:
print('average latency is: ', np.mean(latency))
print('latency per image is: ', latency)

average latency is:  0.9892215583333458
latency per image is:  [0.990690000000086, 1.001221600000008, 1.0038567000001422, 1.0172973999999613, 1.0194687999999132, 0.9990970999999718, 0.9912228000000596, 1.0103646000000026, 1.0030853000000661, 1.0219426999999541, 0.802298300000075, 1.0101133999999092]


### Video Detection

In [60]:
i = 0
latency = 0
fps = 0

# Open video file
video = cv2.VideoCapture('weed.mp4')
#video.set(CV_CAP_PROP_FOURCC, CV_FOURCC('A', 'V', 'C', '1'))
imW = video.get(cv2.CAP_PROP_FRAME_WIDTH)
imH = video.get(cv2.CAP_PROP_FRAME_HEIGHT)

while(video.isOpened()):
    start = time.clock()
    ret, frame = video.read()
    if not ret:
        pass
    # resize to expected shape [1xHxWx3]
    frame = cv2.resize(frame, (640, 480), interpolation=cv2.INTER_LINEAR)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_resized = cv2.resize(frame_rgb, (width, height))
    input_data = np.expand_dims(frame_resized, axis=0)

    # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
    if floating_model:
        input_data = (np.float32(input_data) - input_mean) / input_std

    # Perform the actual detection by running the model with the image as input
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()

    # Retrieve detection results
    boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
    classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
    scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
    #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

    # Loop over all detections and draw detection box if confidence is above minimum threshold
    for i in range(len(scores)):
        if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

            # Get bounding box coordinates and draw box
            # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
            ymin = int(max(1,(boxes[i][0] * imH)))
            xmin = int(max(1,(boxes[i][1] * imW)))
            ymax = int(min(imH,(boxes[i][2] * imH)))
            xmax = int(min(imW,(boxes[i][3] * imW)))
            
            cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 4)

            # Draw label
            #object_name = labels[0] 
            object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
            label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
            label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
            # Draw white box to put label text in
            cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) 
            cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text

    end = time.clock()
    i = i + 1
    latency = latency + (end - start)
    fps = fps + 1.0 / (end - start)
    # All the results have been drawn on the image, now display the image
    cv2.imshow('Object detector', frame)


    # Press 'q' to quit
    if cv2.waitKey(1) == ord('q'):
        break

# Clean up
video.release()
cv2.destroyAllWindows()

In [38]:
print('average latency is: ', (latency / i))
print('average fps is:', (fps / i))

average latency is:  6.7429033800000635
average fps is: 109.68058377653487
