# About
Convert ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19 to Core ML format


**Currently the converted model seems to have problem with accuracy**

Notes:
* TFLite_Detection_PostProcess is not supported by coremltools 

https://github.com/apple/coremltools/issues/642
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/detection_postprocess.cc
https://github.com/tensorflow/models/blob/master/research/object_detection/box_coders/keypoint_box_coder.py
https://github.com/tensorflow/models/blob/master/research/object_detection/export_tflite_ssd_graph_lib.py


* TFLite_Detection_PostProcess is similar to Decoder + Non Max Suppression (NMS) but optimized for TFLite and is not supported in default TensorFLow runtime. So we need to add additional layers to decode the anchor boxes and do NMS after conversion from TF to Core ML.

* The layer before TFLite_Detection_PostProcess is:
```
['raw_outputs/box_encodings', 'raw_outputs/class_predictions', 'anchors']
raw_outputs/box_encodings has shape: [1,2034,4]
raw_outputs/class_predictions has shape: [1,2034,91]
```

* The code below is based on this code:

https://github.com/hollance/coreml-survival-guide/blob/master/MobileNetV2%2BSSDLite/ssdlite.py

* WARNING: the original code use much older version:
Python 3.6.5, Tensorflow 1.7.0, coremltools 2.0, tfcoreml 0.3.0


* TF2 is required for EdgeTPU version else error:
```
InvalidArgumentError: NodeDef mentions attr 'exponential_avg_factor' not in Op<name=FusedBatchNormV3; signature=x:T, scale:U, offset:U, mean:U, variance:U -> y:T, batch_mean:U, batch_variance:U, reserve_space_1:U, reserve_space_2:U, reserve_space_3:U; attr=T:type,allowed=[DT_HALF, DT_BFLOAT16, DT_FLOAT]; attr=U:type,allowed=[DT_FLOAT]; attr=epsilon:float,default=0.0001; attr=data_format:string,default="NHWC",allowed=["NHWC", "NCHW"]; attr=is_training:bool,default=true>; NodeDef: {{node FeatureExtractor/MobileDetEdgeTPU/Conv/BatchNorm/FusedBatchNormV3}}. (Check whether your GraphDef-interpreting binary is up to date with your GraphDef-generating binary.).
```

# Download model and setup enviroments

In [None]:
!wget http://download.tensorflow.org/models/object_detection/ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19.tar.gz
!tar -zxvf ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19.tar.gz
!wget https://raw.githubusercontent.com/hollance/coreml-survival-guide/master/MobileNetV2%2BSSDLite/coco_labels.txt

--2021-08-07 13:34:28--  http://download.tensorflow.org/models/object_detection/ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19.tar.gz
Resolving download.tensorflow.org (download.tensorflow.org)... 172.217.193.128, 2607:f8b0:400c:c03::80
Connecting to download.tensorflow.org (download.tensorflow.org)|172.217.193.128|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 156413934 (149M) [application/x-tar]
Saving to: ‘ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19.tar.gz.1’


2021-08-07 13:34:29 (157 MB/s) - ‘ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19.tar.gz.1’ saved [156413934/156413934]

ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19/
ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19/fp32/
ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19/fp32/model.ckpt-400000.data-00000-of-00001
ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19/fp32/model.ckpt-400000.index
ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19/fp32/model.ckpt-400000.meta
ssdlite_mobilede

In [None]:
!pip install coremltools==4.1



In [None]:
%tensorflow_version 2.x

In [None]:
import sys
print(sys.version)

import tensorflow as tf
print(tf.__version__)

import coremltools as ct
print(ct.__version__)

3.7.11 (default, Jul  3 2021, 18:01:19) 
[GCC 7.5.0]
2.5.0
4.1


# Convert from TF to Core ML

In [None]:
specificationVersion = 5
num_classes = 90
num_coordinates = 4
num_anchors = 2034

ssd_model_file = 'SSD.mlmodel'
decoder_model_file = 'Decoder.mlmodel'
nms_model_file = 'NMS.mlmodel'
final_model_file = 'SSDLiteMobileDetEdgeTPU.mlmodel'

input_name_image = 'normalized_input_image_tensor'
output_name_score = 'raw_outputs/class_predictions'
output_name_box = 'raw_outputs/box_encodings'

model_name = 'ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19'

input_file='ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19/fp32/tflite_graph.pb'
export_dir='saved_model'

input_height = 320
input_width = 320

saved_model_dir = 'saved_model'

In [None]:
!rm Decoder.mlmodel NMS.mlmodel SSD.mlmodel SSDLiteMobileDetEdgeTPU.mlmodel

In [None]:
!rm -rf saved_model

from tensorflow.python.tools import strip_unused_lib
from tensorflow.python.framework import dtypes
from tensorflow.python.platform import gfile

from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import tag_constants

def load_frozen_graph(path):
  with tf.io.gfile.GFile(path, "rb") as f:
    graph_def = tf.compat.v1.GraphDef()
    graph_def.ParseFromString(f.read())

    return graph_def


def load_saved_model(path):
  the_graph = tf.Graph()
  with tf.compat.v1.Session(graph=the_graph) as sess:
    tags = [tf.compat.v1.saved_model.tag_constants.SERVING]
    tf.compat.v1.saved_model.loader.load(sess, tags, path)
  return the_graph


def print_node_name(graph_def):
  nodes = [n.name for n in graph_def.node]
  print('nodes', len(nodes), nodes)
  print(nodes[0], '->', nodes[-1])


def export_ops_name(the_graph, filename):
  with open(filename, "w") as text_file:
    ops = the_graph.get_operations()
    N = len(ops)
    for i in range(N):
      text_file.write('\n\nop id {} , op type: "{}"'.format(str(i), ops[i].type))
      
      text_file.write('\ninput(s):'),
      for x in ops[i].inputs:
        text_file.write("name = {}, shape: {}, ".format(x.name, x.get_shape()))
      
      text_file.write('\noutput(s):'),
      for x in ops[i].outputs:
        text_file.write("name = {}, shape: {},".format(x.name, x.get_shape()))


def optimize_graph(graph_def, 
                   input_node_names, 
                   output_node_names,
                   export_dir):

  print('nodes before strip_unused_lib')
  print_node_name(graph_def)
  
  graph_def = strip_unused_lib.strip_unused(
          input_graph_def = graph_def,
          input_node_names = input_node_names,
          output_node_names = output_node_names,
          placeholder_type_enum = dtypes.float32.as_datatype_enum)
  
  print('nodes after strip_unused_lib')
  print_node_name(graph_def)

  builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_dir)
  sigs = {}
  
  with tf.compat.v1.Session(graph=tf.Graph()) as sess:
      tf.import_graph_def(graph_def, name="")
      g = tf.compat.v1.get_default_graph()
      inputs = {n:g.get_tensor_by_name(n+':0') for n in input_node_names}
      outputs = {n:g.get_tensor_by_name(n+':0') for n in output_node_names}

      sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \
        tf.compat.v1.saved_model.signature_def_utils.predict_signature_def(inputs, outputs)

      builder.add_meta_graph_and_variables(sess, [tag_constants.SERVING], signature_def_map=sigs)
      builder.save()


graph_def = load_frozen_graph(input_file)

input_nodes=[input_name_image]
output_nodes=[output_name_box, output_name_score, 'anchors']
optimize_graph(graph_def, input_nodes, output_nodes, saved_model_dir)

the_graph = load_saved_model(saved_model_dir)
export_ops_name(the_graph, f'{model_name}_ops.txt')


nodes before strip_unused_lib
nodes 1177 ['normalized_input_image_tensor', 'FeatureExtractor/MobileDetEdgeTPU/Conv/weights', 'FeatureExtractor/MobileDetEdgeTPU/Conv/weights/read', 'FeatureExtractor/MobileDetEdgeTPU/Conv/Conv2D', 'FeatureExtractor/MobileDetEdgeTPU/Conv/BatchNorm/beta', 'FeatureExtractor/MobileDetEdgeTPU/Conv/BatchNorm/beta/read', 'FeatureExtractor/MobileDetEdgeTPU/Conv/BatchNorm/gamma', 'FeatureExtractor/MobileDetEdgeTPU/Conv/BatchNorm/gamma/read', 'FeatureExtractor/MobileDetEdgeTPU/Conv/BatchNorm/moving_mean', 'FeatureExtractor/MobileDetEdgeTPU/Conv/BatchNorm/moving_mean/read', 'FeatureExtractor/MobileDetEdgeTPU/Conv/BatchNorm/moving_variance', 'FeatureExtractor/MobileDetEdgeTPU/Conv/BatchNorm/moving_variance/read', 'FeatureExtractor/MobileDetEdgeTPU/Conv/BatchNorm/FusedBatchNormV3', 'FeatureExtractor/MobileDetEdgeTPU/Conv/Relu6', 'FeatureExtractor/MobileDetEdgeTPU/TuckerConv/Conv/weights', 'FeatureExtractor/MobileDetEdgeTPU/TuckerConv/Conv/weights/read', 'FeatureExtra

In [None]:
# ================================
# PART 1: Convert the SSD to Core ML
# ================================

image_input = ct.ImageType(name=input_name_image,
                           shape=(1, input_width, input_height, 3),
                           bias=[-1,-1,-1], scale=2./255)

ssd_model = ct.convert(
    saved_model_dir,
    source='tensorflow',
    inputs=[image_input],
    outputs=[output_name_score, output_name_box]  # the order in list is important
)

print(ssd_model)

Running TensorFlow Graph Passes: 100%|██████████| 5/5 [00:00<00:00,  6.58 passes/s]
Converting Frontend ==> MIL Ops: 100%|██████████| 722/722 [00:03<00:00, 214.42 ops/s]
Running MIL optimization passes: 100%|██████████| 18/18 [00:02<00:00,  6.53 passes/s]
Translating MIL ==> MLModel Ops: 100%|██████████| 1292/1292 [00:00<00:00, 1726.75 ops/s]


input {
  name: "normalized_input_image_tensor"
  type {
    imageType {
      width: 320
      height: 320
      colorSpace: RGB
    }
  }
}
output {
  name: "raw_outputs/class_predictions"
  type {
    multiArrayType {
      dataType: FLOAT32
    }
  }
}
output {
  name: "raw_outputs/box_encodings"
  type {
    multiArrayType {
      dataType: FLOAT32
    }
  }
}
metadata {
  userDefined {
    key: "com.github.apple.coremltools.source"
    value: "tensorflow==2.5.0"
  }
  userDefined {
    key: "com.github.apple.coremltools.version"
    value: "4.1"
  }
}



In [None]:
# from coremltools.models.neural_network import quantization_utils
# ssd_spec = quantization_utils.quantize_weights(ssd_model, nbits=nbits)
# ssd_model = ct.models.MLModel(ssd_spec)

ssd_model.input_description[input_name_image] = "Input image to be detected"
ssd_model.output_description[output_name_score] = "Confidence derived for each of the bounding boxes."
ssd_model.output_description[output_name_box] = "Bounding boxes coordinates"

ssd_spec = ssd_model.get_spec()
ssd_spec.specificationVersion = specificationVersion
ct.utils.rename_feature(ssd_spec, input_name_image, "image")
ct.utils.rename_feature(ssd_spec, output_name_score, "scores")
ct.utils.rename_feature(ssd_spec, output_name_box, "boxes")

# scores
ssd_spec.description.output[0].type.multiArrayType.shape.append(1)
ssd_spec.description.output[0].type.multiArrayType.shape.append(num_anchors)
ssd_spec.description.output[0].type.multiArrayType.shape.append(num_classes + 1)

# boxes
ssd_spec.description.output[1].type.multiArrayType.shape.append(1)
ssd_spec.description.output[1].type.multiArrayType.shape.append(num_anchors)
ssd_spec.description.output[1].type.multiArrayType.shape.append(num_coordinates)


ssd_model = ct.models.MLModel(ssd_spec)
ssd_model.save(ssd_model_file)

print(ssd_model)

input {
  name: "image"
  shortDescription: "Input image to be detected"
  type {
    imageType {
      width: 320
      height: 320
      colorSpace: RGB
    }
  }
}
output {
  name: "scores"
  shortDescription: "Confidence derived for each of the bounding boxes."
  type {
    multiArrayType {
      shape: 1
      shape: 2034
      shape: 91
      dataType: FLOAT32
    }
  }
}
output {
  name: "boxes"
  shortDescription: "Bounding boxes coordinates"
  type {
    multiArrayType {
      shape: 1
      shape: 2034
      shape: 4
      dataType: FLOAT32
    }
  }
}
metadata {
  userDefined {
    key: "com.github.apple.coremltools.source"
    value: "tensorflow==2.5.0"
  }
  userDefined {
    key: "com.github.apple.coremltools.version"
    value: "4.1"
  }
}



In [None]:
# ================================
# PART 2: Decoding the coordinates
# ================================

import numpy as np

def get_anchors(graph, tensor_name):
    """
    Computes the list of anchor boxes by sending a fake image through the graph.
    Outputs an array of size (4, num_anchors) where each element is an anchor box
    given as [ycenter, xcenter, height, width] in normalized coordinates.
    """
    box_corners_tensor = graph.get_tensor_by_name(tensor_name)
    box_corners = sess.run(box_corners_tensor, feed_dict={})
    print('box_corners.shape', box_corners.shape)

    # The TensorFlow graph gives each anchor box as [ymin, xmin, ymax, xmax]. 
    # Convert these min/max values to a center coordinate, width and height.
    ymin, xmin, ymax, xmax = np.transpose(box_corners)
    width = xmax - xmin
    height = ymax - ymin
    ycenter = ymin + height / 2.
    xcenter = xmin + width / 2.
    anchors = np.stack([ycenter, xcenter, height, width])
    print('anchors.shape', anchors.shape)
    return anchors


# Read the anchors into a (4, num_anchors) tensor.
anchors_tensor_name = "anchors:0"
the_graph = load_saved_model(export_dir)

with the_graph.as_default():
    with tf.compat.v1.Session(graph=the_graph) as sess:
        anchors = get_anchors(the_graph, anchors_tensor_name)
        assert(anchors.shape[1] == num_anchors)


from coremltools.models import datatypes
from coremltools.models import neural_network

input_features = [ ("scores", datatypes.Array(1, num_anchors, num_classes + 1)),
                   ("boxes", datatypes.Array(1, num_anchors, num_coordinates)) ]

# The outputs of the decoder model should match the inputs of the next
# model in the pipeline, NonMaximumSuppression. This expects the number
# of bounding boxes in the first dimension.
output_features = [ ("raw_confidence", datatypes.Array(num_anchors, num_classes)),
                    ("raw_coordinates", datatypes.Array(num_anchors, num_coordinates)) ]

builder = neural_network.NeuralNetworkBuilder(input_features, 
                                              output_features, 
                                              use_float_arraytype=True)
builder.spec.specificationVersion = specificationVersion


# decode scores #
# Strip off the "unknown" class (at index 0).
builder.add_slice(name="slice_scores",
                  input_name="scores",
                  output_name="raw_confidence",
                  axis="width",
                  start_index=1,
                  end_index=num_classes + 1)

# decode boxes #
# (1, num_anchors, 4) --> (4, num_anchors, 1)
builder.add_permute(name="permute_boxes",
                    dim=(0, 3, 2, 1),
                    input_name="boxes",
                    output_name="permute_boxes_output")

# Grab the y, x coordinates (channels 0-1).
builder.add_slice(name="slice_yx",
                  input_name="permute_boxes_output",
                  output_name="slice_yx_output",
                  axis="channel",
                  start_index=0,
                  end_index=2)

# boxes_yx / 10
builder.add_elementwise(name="scale_yx",
                        input_names="slice_yx_output",
                        output_name="scale_yx_output",
                        mode="MULTIPLY",
                        alpha=0.1)

# Split the anchors into two (2, num_anchors, 1) arrays.
anchors_yx = np.expand_dims(anchors[:2, :], axis=-1)
anchors_hw = np.expand_dims(anchors[2:, :], axis=-1)
print('split_anchors_shape', anchors_hw.shape, anchors_yx.shape)

split_anchors_shape = [2, num_anchors, 1]
builder.add_load_constant(name="anchors_yx",
                          output_name="anchors_yx",
                          constant_value=anchors_yx,
                          shape=split_anchors_shape)

builder.add_load_constant(name="anchors_hw",
                          output_name="anchors_hw",
                          constant_value=anchors_hw,
                          shape=split_anchors_shape)

# (boxes_yx / 10) * anchors_hw
builder.add_elementwise(name="yw_times_hw",
                        input_names=["scale_yx_output", "anchors_hw"],
                        output_name="yw_times_hw_output",
                        mode="MULTIPLY")

# (boxes_yx / 10) * anchors_hw + anchors_yx
builder.add_elementwise(name="decoded_yx",
                        input_names=["yw_times_hw_output", "anchors_yx"],
                        output_name="decoded_yx_output",
                        mode="ADD")

# Grab the height and width (channels 2-3).
builder.add_slice(name="slice_hw",
                  input_name="permute_boxes_output",
                  output_name="slice_hw_output",
                  axis="channel",
                  start_index=2,
                  end_index=4)

# (boxes_hw / 5)
builder.add_elementwise(name="scale_hw",
                        input_names="slice_hw_output",
                        output_name="scale_hw_output",
                        mode="MULTIPLY",
                        alpha=0.2)

# exp(boxes_hw / 5)
builder.add_unary(name="exp_hw",
                  input_name="scale_hw_output",
                  output_name="exp_hw_output",
                  mode="exp")

# exp(boxes_hw / 5) * anchors_hw
builder.add_elementwise(name="decoded_hw",
                        input_names=["exp_hw_output", "anchors_hw"],
                        output_name="decoded_hw_output",
                        mode="MULTIPLY")

# The coordinates are now (y, x) and (height, width) but NonMaximumSuppression
# wants them as (x, y, width, height). So create four slices and then concat
# them into the right order.
builder.add_slice(name="slice_y",
                  input_name="decoded_yx_output",
                  output_name="slice_y_output",
                  axis="channel",
                  start_index=0,
                  end_index=1)

builder.add_slice(name="slice_x",
                  input_name="decoded_yx_output",
                  output_name="slice_x_output",
                  axis="channel",
                  start_index=1,
                  end_index=2)

builder.add_slice(name="slice_h",
                  input_name="decoded_hw_output",
                  output_name="slice_h_output",
                  axis="channel",
                  start_index=0,
                  end_index=1)

builder.add_slice(name="slice_w",
                  input_name="decoded_hw_output",
                  output_name="slice_w_output",
                  axis="channel",
                  start_index=1,
                  end_index=2)

builder.add_elementwise(name="concat",
                        input_names=["slice_x_output", "slice_y_output", 
                                     "slice_w_output", "slice_h_output"],
                        output_name="concat_output",
                        mode="CONCAT")

# (4, num_anchors, 1) --> (1, num_anchors, 4)
builder.add_permute(name="permute_output",
                    dim=(0, 3, 2, 1),
                    input_name="concat_output",
                    output_name="raw_coordinates")


decoder_model = ct.models.MLModel(builder.spec)
decoder_model.save(decoder_model_file)
print(decoder_model)

box_corners.shape (2034, 4)
anchors.shape (4, 2034)
split_anchors_shape (2, 2034, 1) (2, 2034, 1)
input {
  name: "scores"
  type {
    multiArrayType {
      shape: 1
      shape: 2034
      shape: 91
      dataType: FLOAT32
    }
  }
}
input {
  name: "boxes"
  type {
    multiArrayType {
      shape: 1
      shape: 2034
      shape: 4
      dataType: FLOAT32
    }
  }
}
output {
  name: "raw_confidence"
  type {
    multiArrayType {
      shape: 2034
      shape: 90
      dataType: FLOAT32
    }
  }
}
output {
  name: "raw_coordinates"
  type {
    multiArrayType {
      shape: 2034
      shape: 4
      dataType: FLOAT32
    }
  }
}



In [None]:
# ===============================
# PART 3: Non-maximum suppression
# ===============================

nms_spec = ct.proto.Model_pb2.Model()
nms_spec.specificationVersion = specificationVersion

for i in range(2):
    decoder_output = decoder_model._spec.description.output[i].SerializeToString()

    nms_spec.description.input.add()
    nms_spec.description.input[i].ParseFromString(decoder_output)

    nms_spec.description.output.add()
    nms_spec.description.output[i].ParseFromString(decoder_output)
    
nms_spec.description.output[0].name = "confidence"
nms_spec.description.output[1].name = "coordinates"

output_sizes = [num_classes, 4]
for i in range(2):
    ma_type = nms_spec.description.output[i].type.multiArrayType
    ma_type.shapeRange.sizeRanges.add()
    ma_type.shapeRange.sizeRanges[0].lowerBound = 0
    ma_type.shapeRange.sizeRanges[0].upperBound = -1
    ma_type.shapeRange.sizeRanges.add()
    ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
    ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
    del ma_type.shape[:]

nms = nms_spec.nonMaximumSuppression
nms.confidenceInputFeatureName = "raw_confidence"
nms.coordinatesInputFeatureName = "raw_coordinates"
nms.confidenceOutputFeatureName = "confidence"
nms.coordinatesOutputFeatureName = "coordinates"
nms.iouThresholdInputFeatureName = "iouThreshold"
nms.confidenceThresholdInputFeatureName = "confidenceThreshold"

default_iou_threshold = 0.6
default_confidence_threshold = 0.4
nms.iouThreshold = default_iou_threshold
nms.confidenceThreshold = default_confidence_threshold

nms.pickTop.perClass = True

labels = np.loadtxt("coco_labels.txt", dtype=str, delimiter="\n")
nms.stringClassLabels.vector.extend(labels)

nms_model = ct.models.MLModel(nms_spec)
nms_model.save(nms_model_file)
print(nms_model)

input {
  name: "raw_confidence"
  type {
    multiArrayType {
      shape: 2034
      shape: 90
      dataType: FLOAT32
    }
  }
}
input {
  name: "raw_coordinates"
  type {
    multiArrayType {
      shape: 2034
      shape: 4
      dataType: FLOAT32
    }
  }
}
output {
  name: "confidence"
  type {
    multiArrayType {
      dataType: FLOAT32
      shapeRange {
        sizeRanges {
          upperBound: -1
        }
        sizeRanges {
          lowerBound: 90
          upperBound: 90
        }
      }
    }
  }
}
output {
  name: "coordinates"
  type {
    multiArrayType {
      dataType: FLOAT32
      shapeRange {
        sizeRanges {
          upperBound: -1
        }
        sizeRanges {
          lowerBound: 4
          upperBound: 4
        }
      }
    }
  }
}



In [None]:
# ===============================================
# PART 4: Putting it all together into a pipeline
# ===============================================

from coremltools.models.pipeline import *

input_features = [ ("image", datatypes.Array(3, input_height, input_width)),
                   ("iouThreshold", datatypes.Double()),
                   ("confidenceThreshold", datatypes.Double()) ]

output_features = [ "confidence", "coordinates" ]

pipeline = Pipeline(input_features, output_features)

# Don't forget this or Core ML might attempt to run the model on an unsupported
# operating system version!
# set specificationVersion = 5 to use Float32
pipeline.spec.specificationVersion = specificationVersion

pipeline.add_model(ssd_model)
pipeline.add_model(decoder_model)
pipeline.add_model(nms_model)

# The "image" input should really be an image, not a multi-array.
pipeline.spec.description.input[0].ParseFromString(ssd_model._spec.description.input[0].SerializeToString())

# Copy the declarations of the "confidence" and "coordinates" outputs.
# The Pipeline makes these strings by default.
pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString())
pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString())

# Add descriptions to the inputs and outputs.
pipeline.spec.description.input[1].shortDescription = f"IOU Threshold (default={default_iou_threshold})"
pipeline.spec.description.input[2].shortDescription = f"Confidence Threshold (default={default_confidence_threshold})"
pipeline.spec.description.output[0].shortDescription = u"Boxes \xd7 Class confidence"
pipeline.spec.description.output[1].shortDescription = u"Boxes \xd7 [x, y, width, height] (relative to image size)"

# Add metadata to the model.
pipeline.spec.description.metadata.versionString = "2021-08-06"
pipeline.spec.description.metadata.shortDescription = "ssdlite_mobiledet_edgetpu_320x320_coco_2020_05_19"
pipeline.spec.description.metadata.author = "Converted to Core ML by Anh"

# Add the list of class labels and the default threshold values too.
user_defined_metadata = {
    "iou_threshold": str(default_iou_threshold),
    "confidence_threshold": str(default_confidence_threshold),
    "classes": ",".join(labels)
}
pipeline.spec.description.metadata.userDefined.update(user_defined_metadata)


final_model = ct.models.MLModel(pipeline.spec)
final_model.save(final_model_file)

print(final_model)
print("Done!", "Exported to:", final_model_file)

input {
  name: "image"
  shortDescription: "Input image to be detected"
  type {
    imageType {
      width: 320
      height: 320
      colorSpace: RGB
    }
  }
}
input {
  name: "iouThreshold"
  shortDescription: "IOU Threshold (default=0.6)"
  type {
    doubleType {
    }
  }
}
input {
  name: "confidenceThreshold"
  shortDescription: "Confidence Threshold (default=0.4)"
  type {
    doubleType {
    }
  }
}
output {
  name: "confidence"
  shortDescription: "Boxes \303\227 Class confidence"
  type {
    multiArrayType {
      dataType: FLOAT32
      shapeRange {
        sizeRanges {
          upperBound: -1
        }
        sizeRanges {
          lowerBound: 90
          upperBound: 90
        }
      }
    }
  }
}
output {
  name: "coordinates"
  shortDescription: "Boxes \303\227 [x, y, width, height] (relative to image size)"
  type {
    multiArrayType {
      dataType: FLOAT32
      shapeRange {
        sizeRanges {
          upperBound: -1
        }
        sizeRanges {
  