# Train a detector with TensorFlow Lite Model Maker

## Prerequisites


### Install the required packages
Start by installing the required packages, including the Model Maker package from the [GitHub repo](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker) and the pycocotools library you'll use for evaluation.

In [None]:
#!pip install -q tflite-model-maker
#!pip install -q pycocotools

Import the required packages.

In [1]:
import numpy as np
import os

import tflite_model_maker
from tflite_model_maker.config import ExportFormat
from tflite_model_maker import model_spec
from tflite_model_maker import object_detector


import tensorflow as tf
assert tf.__version__.startswith('2')

tf.get_logger().setLevel('ERROR')
from absl import logging
logging.set_verbosity(logging.ERROR)

 The versions of TensorFlow you are currently using is 2.4.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


### Prepare the dataset

## Train your salad detection model

There are six steps to training an object detection model:

**Step 1. Choose an object detection model archiecture.**

This tutorial uses the EfficientDet-Lite2 model. EfficientDet-Lite[0-4] are a family of mobile/IoT-friendly object detection models derived from the [EfficientDet](https://arxiv.org/abs/1911.09070) architecture. 

Here is the performance of each EfficientDet-Lite models compared to each others.

| Model architecture | Size(MB)* | Latency(ms)** | Average Precision*** |
|--------------------|-----------|---------------|----------------------|
| EfficientDet-Lite0 | 4.4       | 37            | 25.69%               |
| EfficientDet-Lite1 | 5.8       | 49            | 30.55%               |
| EfficientDet-Lite2 | 7.2       | 69            | 33.97%               |
| EfficientDet-Lite3 | 11.4      | 116           | 37.70%               |
| EfficientDet-Lite4 | 19.9      | 260           | 41.96%               |

<i> * Size of the integer quantized models. <br/>
** Latency measured on Pixel 4 using 4 threads on CPU. <br/>
*** Average Precision is the mAP (mean Average Precision) on the COCO 2017 validation dataset.
</i>


In [2]:
#spec1 = model_spec.get('efficientdet_lite2')
spec2 = tflite_model_maker.object_detector.EfficientDetLite2Spec(
    model_name='efficientdet-lite2',
    uri='https://tfhub.dev/tensorflow/efficientdet/lite2/feature-vector/1', 
    hparams='', model_dir=None, epochs=10, batch_size=64,
    steps_per_execution=1, moving_average_decay=0,
    var_freeze_expr='(efficientnet|fpn_cells|resample_p6)',
    tflite_max_detections=25, strategy=None, tpu=None, gcp_project=None,
    tpu_zone=None, use_xla=False, profile=False, debug=False, tf_random_seed=111111,
    verbose=0)
print("ok")

ok


**Step 2. Load the dataset.**

Model Maker will take input data in the CSV format. Use the `ObjectDetectorDataloader.from_csv` method to load the dataset and split them into the training, validation and test images.

* Training images: These images are used to train the object detection model to recognize salad ingredients.
* Validation images: These are images that the model didn't see during the training process. You'll use them to decide when you should stop the training, to avoid [overfitting](https://en.wikipedia.org/wiki/Overfitting).
* Test images: These images are used to evaluate the final model performance.

You can load the CSV file directly from Google Cloud Storage, but you don't need to keep your images on Google Cloud to use Model Maker. You can specify a local CSV file on your computer, and Model Maker will work just fine.

In [None]:
#!git clone https://github.com/marcoruizrueda/Carla-Object-Detection-Dataset.git carla_training/

In [None]:
# Create CSV from xmls (FUNCIONA ok!)
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET

# source and credits:
# https://raw.githubusercontent.com/datitran/raccoon_dataset/master/xml_to_csv.py

def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text.replace(".png", ".jpeg"),
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df


def train():
    image_path = os.path.join('train/annotations_dir')
    xml_df = xml_to_csv(image_path)
    labels_path = os.path.join('train/annotations_dir/train.csv')
    xml_df.to_csv(labels_path, index=None)
    print('> tf_wider_train - Successfully converted xml to csv.')

def val():
    image_path = os.path.join('test/annotations_dir/')
    xml_df = xml_to_csv(image_path)
    labels_path = os.path.join('test/annotations_dir/val.csv')
    xml_df.to_csv(labels_path, index=None)
    print('> tf_wider_val -  Successfully converted xml to csv.')

train()
val()


In [None]:
# Create TFRECORD from csv
# Change train or test by hand
#!python carla_training/generate_tfrecord.py --csv_input='carla_training/train/annotations_dir/train.csv' --output_path='carla_training/train/annotations_dir/train.record' --image_dir='carla_training/train/images_dir/'
#!python carla_training/generate_tfrecord.py --csv_input='carla_training/test/annotations_dir/val.csv' --output_path='carla_training/test/annotations_dir/val.record' --image_dir='carla_training/test/images_dir/'

In [3]:
#train_data, validation_data, test_data = object_detector.DataLoader.from_csv('gs://cloud-ml-data/img/openimage/csv/salads_ml_use.csv')

#label_map={1:'Vehicle', 2: 'Bike', 3: 'Motobike', 4: 'Traffic light', 5: 'Traffic sign'},

#train_data, validation_data, test_data = object_detector.DataLoader.from_pascal_voc(
#    images_dir='carla_training/Carla-Object-Detection-Dataset/train_data/images_dir/',
#    annotations_dir='carla_training/Carla-Object-Detection-Dataset/train_data/annotations_dir/',
#    label_map=['Vehicle', 'Bike', 'Motobike', 'Traffic light', 'Traffic sign'],
#)

train_data = object_detector.DataLoader(
    '/home/marco/carla_training/train/annotations_dir/train.record', 
    size=820, 
    label_map={1:'vehicle', 2: 'bike', 3: 'motobike', 4: 'traffic_light', 5: 'traffic_sign'}, 
    annotations_json_file=None
)
validation_data = object_detector.DataLoader(
    '/home/marco/carla_training/test/annotations_dir/val.record', 
    size=208, 
    label_map={1:'vehicle', 2: 'bike', 3: 'motobike', 4: 'traffic_light', 5: 'traffic_sign'}, 
    annotations_json_file=None
)
print("Done!")

Done!


In [4]:
#!pip uninstall numpy -y
#!pip install update numpy==1.17.4
import numpy
numpy.__version__

'1.17.4'

**Step 3. Train the TensorFlow model with the training data.**

* The EfficientDet-Lite0 model uses `epochs = 50` by default, which means it will go through the training dataset 50 times. You can look at the validation accuracy during training and stop early to avoid overfitting.
* Set `batch_size = 8` here so you will see that it takes 21 steps to go through the 175 images in the training dataset. 
* Set `train_whole_model=True` to fine-tune the whole model instead of just training the head layer to improve accuracy. The trade-off is that it may take longer to train the model.

In [5]:
model = object_detector.create(train_data, model_spec=spec2, batch_size=8, train_whole_model=True, validation_data=validation_data, do_train=False)

**Step 4. Evaluate the model with the test data.**

After training the object detection model using the images in the training dataset, use the remaining 25 images in the test dataset to evaluate how the model performs against new data it has never seen before.

As the default batch size is 64, it will take 1 step to go through the 25 images in the test dataset.

In [7]:
model.evaluate(validation_data)



{'AP': 0.3293328,
 'AP50': 0.49222553,
 'AP75': 0.3835527,
 'APs': 0.16599874,
 'APm': 0.6696494,
 'APl': 0.8652824,
 'ARmax1': 0.021558926,
 'ARmax10': 0.14325427,
 'ARmax100': 0.37282896,
 'ARs': 0.22770302,
 'ARm': 0.70047987,
 'ARl': 0.87666667,
 'AP_/vehicle': 0.3275013,
 'AP_/bike': 0.52024144,
 'AP_/motobike': 0.6557059,
 'AP_/traffic_light': 0.0679929,
 'AP_/traffic_sign': 0.07522238}

**Step 5.  Export as a TensorFlow Lite model.**

Export the trained object detection model to the TensorFlow Lite format by specifying which folder you want to export the quantized model to. The default post-training quantization technique is full integer quantization.

In [8]:
model.export(export_dir='.')

**Step 6.  Evaluate the TensorFlow Lite model.**

Several factors can affect the model accuracy when exporting to TFLite:
* [Quantization](https://www.tensorflow.org/lite/performance/model_optimization) helps shrinking the model size by 4 times at the expense of some accuracy drop. 
* The original TensorFlow model uses per-class [non-max supression (NMS)](https://www.coursera.org/lecture/convolutional-neural-networks/non-max-suppression-dvrjH) for post-processing, while the TFLite model uses global NMS that's much faster but less accurate.
Keras outputs maximum 100 detections while tflite outputs maximum 25 detections.

Therefore you'll have to evaluate the exported TFLite model and compare its accuracy with the original TensorFlow model.

In [9]:
model.evaluate_tflite('model.tflite', validation_data)



{'AP': 0.32025373,
 'AP50': 0.48261288,
 'AP75': 0.38005114,
 'APs': 0.16053818,
 'APm': 0.6556571,
 'APl': 0.8729645,
 'ARmax1': 0.023187462,
 'ARmax10': 0.14158987,
 'ARmax100': 0.35480383,
 'ARs': 0.22044155,
 'ARm': 0.67954314,
 'ARl': 0.8829167,
 'AP_/vehicle': 0.32600158,
 'AP_/bike': 0.51501876,
 'AP_/motobike': 0.616035,
 'AP_/traffic_light': 0.06708762,
 'AP_/traffic_sign': 0.077125706}

In the next step of the codelab, you'll use the [ObjectDetector API](https://www.tensorflow.org/lite/inference_with_metadata/task_library/object_detector) of the [TensorFlow Lite Task Library](https://www.tensorflow.org/lite/inference_with_metadata/task_library/overview) to integrate the model into the Android app.

## (Optional) Test the TFLite model on your image

You can test the trained TFLite model using images from the internet. 
* Replace the `INPUT_IMAGE_URL` below with your desired input image. 
* Adjust the `DETECTION_THRESHOLD` to change the sensitivity of the model. A lower threshold means the model will pickup more objects but there will also be more false detection. Meanwhile, a higher threshold means the model will only pickup objects that it has confidently detected.

Although it requires some of boilerplate code to run the model in Python at this moment, integrating the model into a mobile app only requires a few lines of code.

In [6]:
#@title Load the trained TFLite model and define some visualization functions

import cv2

from PIL import Image

# Load the labels into a list
classes = ['???'] * model.model_spec.config.num_classes
label_map = model.model_spec.config.label_map
for label_id, label_name in label_map.as_dict().items():
  classes[label_id-1] = label_name

# Define a list of colors for visualization
COLORS = np.random.randint(0, 255, size=(len(classes), 3), dtype=np.uint8)

def preprocess_image(image_path, input_size):
  """Preprocess the input image to feed to the TFLite model"""
  img = tf.io.read_file(image_path)
  img = tf.io.decode_image(img, channels=3)
  img = tf.image.convert_image_dtype(img, tf.uint8)
  original_image = img
  resized_img = tf.image.resize(img, input_size)
  resized_img = resized_img[tf.newaxis, :]
  return resized_img, original_image


def set_input_tensor(interpreter, image):
  """Set the input tensor."""
  tensor_index = interpreter.get_input_details()[0]['index']
  input_tensor = interpreter.tensor(tensor_index)()[0]
  input_tensor[:, :] = image


def get_output_tensor(interpreter, index):
  """Retur the output tensor at the given index."""
  output_details = interpreter.get_output_details()[index]
  tensor = np.squeeze(interpreter.get_tensor(output_details['index']))
  return tensor


def detect_objects(interpreter, image, threshold):
  """Returns a list of detection results, each a dictionary of object info."""
  # Feed the input image to the model
  set_input_tensor(interpreter, image)
  interpreter.invoke()

  # Get all outputs from the model
  boxes = get_output_tensor(interpreter, 0)
  classes = get_output_tensor(interpreter, 1)
  scores = get_output_tensor(interpreter, 2)
  count = int(get_output_tensor(interpreter, 3))

  results = []
  for i in range(count):
    if scores[i] >= threshold:
      result = {
        'bounding_box': boxes[i],
        'class_id': classes[i],
        'score': scores[i]
      }
      results.append(result)
  return results


def run_odt_and_draw_results(image_path, interpreter, threshold=0.5):
  """Run object detection on the input image and draw the detection results"""
  # Load the input shape required by the model
  _, input_height, input_width, _ = interpreter.get_input_details()[0]['shape']

  # Load the input image and preprocess it
  preprocessed_image, original_image = preprocess_image(
      image_path, 
      (input_height, input_width)
    )

  # Run object detection on the input image
  results = detect_objects(interpreter, preprocessed_image, threshold=threshold)

  # Plot the detection results on the input image
  original_image_np = original_image.numpy().astype(np.uint8)
  for obj in results:
    # Convert the object bounding box from relative coordinates to absolute 
    # coordinates based on the original image resolution
    ymin, xmin, ymax, xmax = obj['bounding_box']
    xmin = int(xmin * original_image_np.shape[1])
    xmax = int(xmax * original_image_np.shape[1])
    ymin = int(ymin * original_image_np.shape[0])
    ymax = int(ymax * original_image_np.shape[0])

    # Find the class index of the current object
    class_id = int(obj['class_id'])

    # Draw the bounding box and label on the image
    color = [int(c) for c in COLORS[class_id]]
    cv2.rectangle(original_image_np, (xmin, ymin), (xmax, ymax), color, 2)
    # Make adjustments to make the label visible for all objects
    y = ymin - 15 if ymin - 15 > 15 else ymin + 15
    label = "{}: {:.0f}%".format(classes[class_id], obj['score'] * 100)
    cv2.putText(original_image_np, label, (xmin, y),
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

  # Return the final image
  original_uint8 = original_image_np.astype(np.uint8)
  return original_uint8

In [12]:
#@title Run object detection and show the detection results

model_path = 'model.tflite'
#INPUT_IMAGE_URL = "https://techtime.news/wp-content/uploads/sites/2/2017/10/Cognata-Simulation-Engine.png" #@param {type:"string"}
INPUT_IMAGE_URL = "https://carla.readthedocs.io/en/0.9.7/img/low_quality_capture.png"

DETECTION_THRESHOLD = 0.3 #@param {type:"number"}

TEMP_FILE = '/tmp/result.png'

!wget -q -O $TEMP_FILE $INPUT_IMAGE_URL
im = Image.open(TEMP_FILE)
im.thumbnail((512, 512), Image.ANTIALIAS)
im.save(TEMP_FILE, 'PNG')

# Load the TFLite model
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

# Run inference and draw detection result on the local copy of the original file
detection_result_image = run_odt_and_draw_results(
    TEMP_FILE, 
    interpreter, 
    threshold=DETECTION_THRESHOLD
)

# Show the detection result
im = Image.fromarray(detection_result_image)
im.thumbnail((512, 512), Image.ANTIALIAS)
im.save("result2.png")

In [13]:
#!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -

#!echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list

#!sudo apt-get update

#!sudo apt-get install edgetpu-compiler

In [9]:
!edgetpu_compiler --min_runtime_version 13 model.tflite

Edge TPU Compiler version 15.0.340273435

Model compiled successfully in 6159 ms.

Input model: model.tflite
Input size: 7.04MiB
Output model: model_edgetpu.tflite
Output size: 9.42MiB
On-chip memory used for caching model parameters: 7.26MiB
On-chip memory remaining for caching model parameters: 768.00B
Off-chip memory used for streaming uncached model parameters: 159.12KiB
Number of Edge TPU subgraphs: 1
Total number of operations: 357
Operation log: model_edgetpu.log

Model successfully compiled but not all operations are supported by the Edge TPU. A percentage of the model will instead run on the CPU, which is slower. If possible, consider updating your model to use only operations supported by the Edge TPU. For details, visit g.co/coral/model-reqs.
Number of operations that will run on Edge TPU: 351
Number of operations that will run on CPU: 6
See the operation log file for individual operation details.


In [None]:
'''# Create labelmap from train_data
def convert_classes(ids, label_map_dic, start=1):
    msg = ''
    for id_ in ids:
        msg = msg + "item {\n"
        msg = msg + " id: " + str(id_) + "\n"
        msg = msg + " name: '" + label_map_dic[id_] + "'\n}\n\n"
    return msg[:-1]

ids = list(train_data.label_map.keys())
label_map_dic = train_data.label_map

label_map = convert_classes(ids, label_map_dic)
with open("model_label_map.pbtxt", "w") as f:
    f.write(label_map)
    f.close()
    print("Done!")
'''

In [None]:
#print(train_data.label_map)
#!cp /tmp/tmpu59t79wt/train_d097cf62ad50863cf2d989dd68fa080b_annotations.json .

In [None]:
%%bash

git clone https://github.com/tensorflow/models.git
# Install the Object Detection API

cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

In [None]:
# This is an example of using 
# https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py
# The structure should be like PASCAL VOC format dataset
# +Dataset
#   +Annotations
#   +JPEGImages
# python create_tfrecords_from_xml.py --image_dir=dataset/JPEGImages 
#                                      --annotations_dir=dataset/Annotations 
#                                      --label_map_path=object-detection.pbtxt 
#                                      --output_path=data.record
%tb
import hashlib
import io
import logging
import os

from lxml import etree
import PIL.Image
import tensorflow as tf

from object_detection.utils import dataset_util
from object_detection.utils import label_map_util


def dict_to_tf_example(data, image_dir, label_map_dict):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding
    box coordinates provided by the raw data.

    Arguments:
        data: dict holding XML fields for a single image (obtained by
          running dataset_util.recursive_parse_xml_to_dict)
        image_dir: Path to image directory.
        label_map_dict: A map from string label names to integers ids.

    Returns:
        example: The converted tf.Example.
    """
    full_path = os.path.join(image_dir, data['filename'].replace("png", "jpeg"))
    with tf.compat.v1.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    #if image.format != 'JPEG':
    #    raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    try:
        for obj in data['object']:
            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
    except KeyError:
        print(data['filename'] + ' without objects!')

    difficult_obj = [0]*len(classes)
    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(data['filename'].replace("png", "jpeg").encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj)
    }))
    return example


def main(_):

    SUBSET = 'test'
    dataset_dir = 'carla_training'
    
    writer = tf.compat.v1.python_io.TFRecordWriter('carla_training/test.record')
    label_map_dict = label_map_util.get_label_map_dict(os.path.join(dataset_dir, 'carla_label_map.pbtxt'))
    
    image_dir = os.path.join(dataset_dir, SUBSET, 'images_dir')
    annotations_dir = os.path.join(dataset_dir, SUBSET, 'annotations_dir')
    logging.info('Reading from dataset: ' + annotations_dir)
    examples_list = os.listdir(annotations_dir)

    for idx, example in enumerate(examples_list):
        if example.endswith('.xml'):
            if idx % 50 == 0:
                print('On image %d of %d' % (idx, len(examples_list)))

            path = os.path.join(annotations_dir, example)
            with tf.compat.v1.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, image_dir, label_map_dict)
            writer.write(tf_example.SerializeToString())

    writer.close()


if __name__ == '__main__':
    tf.compat.v1.app.run()