In [122]:
import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone.utils.eval.detection import evaluate_detections, DetectionResults
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from numpyencoder import NumpyEncoder
import json

import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import time
from codecarbon import track_emissions

# Load the COCO-2017 validation split into a FiftyOne dataset
#
# This will download the dataset from the web, if necessary
#
dataset = foz.load_zoo_dataset("coco-2017", split="validation")
#test17 = foz.load_zoo_dataset("coco-2017", split="test")

# Give the dataset a new name, and make it persistent so that you can
# work with it in future sessions
dataset.name = "coco-2017-set"
dataset.persistent = True
#test17.name = "coco-2017-test-example"
#test17.persistent = True
# Visualize the in the App
#session = fo.launch_app(dataset)

Downloading split 'validation' to '/Users/kristian/fiftyone/coco-2017/validation' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'validation' to '/Users/kristian/fiftyone/coco-2017/validation' if necessary


Found annotations at '/Users/kristian/fiftyone/coco-2017/raw/instances_val2017.json'


INFO:fiftyone.utils.coco:Found annotations at '/Users/kristian/fiftyone/coco-2017/raw/instances_val2017.json'


Images already downloaded


INFO:fiftyone.utils.coco:Images already downloaded


Existing download of split 'validation' is sufficient


INFO:fiftyone.zoo.datasets:Existing download of split 'validation' is sufficient


Loading existing dataset 'coco-2017-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


INFO:fiftyone.zoo.datasets:Loading existing dataset 'coco-2017-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [143]:
model_url = "https://www.kaggle.com/models/tensorflow/ssd-mobilenet-v2/frameworks/TensorFlow2/variations/ssd-mobilenet-v2/versions/1"
model = hub.load(model_url)

#TO DO: Get size of model ssd mobilenet v2


In [139]:
num_images_to_infer = 3500
sampled_dataset = dataset.take(num_images_to_infer)

In [141]:
def preprocess_image(image):
    input_image = tf.convert_to_tensor(image)
    input_image = tf.expand_dims(input_image, axis=0)
    input_image = tf.cast(input_image, tf.uint8) 
    return input_image

In [146]:
coco_results = []

# Iterate through each sample in the dataset
print("Performing inference on the samples...")
start_time = time.time()
for sample in sampled_dataset:
    coco_sample_results = []

    # Load the image
    image_path = sample.filepath
    image = tf.image.decode_image(tf.io.read_file(image_path), channels=3).numpy()
    image_height, image_width, _ = image.shape
    image_id = int(image_path[-16:-4])

    # Perform inference
    input_image = preprocess_image(image)
    detections = model(input_image)

    # Extract relevant information from the detections
    boxes = detections["detection_boxes"][0].numpy()
    scores = detections["detection_scores"][0].numpy()
    labels = detections["detection_classes"][0].numpy().astype(int)

    # Append image id to results
    for box, score, label in zip(boxes, scores, labels):
        # Convert to COCO format
        coco_sample_results.append(
            {
                "image_id": image_id,
                "category_id": label,
                # Scale box to image size
                "bbox":  [
                    box[1] * image_width,
                    box[0] * image_height,
                    (box[3] - box[1]) * image_width,
                    (box[2] - box[0]) * image_height,
                ],
                "score": score,
            }
        )

    coco_results.extend(coco_sample_results)

end_time = time.time()
print("Inference time for (full) model: ", end_time - start_time)
print("Inference time per image: ", (end_time - start_time)/num_images_to_infer)

print("___________________________________________________________")

Performing inference on the samples...
Inference time for (full) model:  136.57784295082092
Inference time per image:  0.03902224084309169
___________________________________________________________


In [147]:
# Save the results to disk
coco_results_path = "coco_results.json"
with open(coco_results_path, "w") as f:
    json.dump(coco_results, f, cls=NumpyEncoder)

# Load the COCO annotations for the validation split
gt_path = 'data/coco/annotations/instances_val2017.json'
coco_gt = COCO(gt_path)

# Load the COCO results
coco_dt = coco_gt.loadRes(coco_results_path)

# Evaluate the results
coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
#coco_eval.params.imgIds  = imgIds
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()


loading annotations into memory...
Done (t=0.26s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.69s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=12.75s).
Accumulating evaluation results...
DONE (t=3.06s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.142
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.243
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.143
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.020
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.125
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.289
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.149
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.227
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet

### TfLite


In [126]:
representative_dataset = dataset.take(250)

def representative_data_gen():
    for sample in representative_dataset:
        # Preprocess the image
        image_path = sample.filepath        
        image = tf.image.decode_image(tf.io.read_file(image_path), channels=3)
        image = preprocess_image(image) 
        image = tf.image.resize(image, (320, 320)) 
        image = tf.cast(image, tf.uint8)

        yield [image]


#### int8 quantization

In [127]:
from tensorflow import lite

# Convert the model to TensorFlow Lite format with int8 quantization
converter = lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8, tf.lite.OpsSet.TFLITE_BUILTINS]


converter.representative_dataset = representative_data_gen 

tflite_model_int8 = converter.convert()

# Save the TFLite model to a file
with open("model_int8.tflite", 'wb') as f:
    f.write(tflite_model_int8)

INFO:tensorflow:Assets written to: /var/folders/8v/5_c7kn_13jsgmgqz56r3yx440000gp/T/tmpa34e5qtl/assets


INFO:tensorflow:Assets written to: /var/folders/8v/5_c7kn_13jsgmgqz56r3yx440000gp/T/tmpa34e5qtl/assets
2023-11-18 23:59:19.609419: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2023-11-18 23:59:19.609431: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
Summary on the non-converted ops:
---------------------------------
 * Accepted dialects: tfl, builtin, func
 * Non-Converted Ops: 272, Total Ops 2004, % non-converted = 13.57 %
 * 272 ARITH ops

- arith.constant:  272 occurrences  (f32: 161, i32: 111)



  (f32: 15)
  (f32: 3, i1: 1, i32: 3)
  (f32: 98, i32: 90)
  (f32: 55)
  (f32: 17)
  (f32: 2)
  (f32: 1)
  (f32: 91, i32: 90)
  (f32: 295)
  (i1: 7)
  (i1: 1)
  (i1: 90)
  (f32: 1)
  (f32: 4, i32: 1)
  (f32: 14)
  (i32: 90)
  (f32: 6, i32: 9)
  (f32: 5)
  (i32: 2)
  (f32: 4)
  (i64: 1, f32: 106, i1: 1, i32: 98)
  (f32: 1)
  (f32: 91, i32: 6)
  (i32: 103)
  (f32: 96)
  (f32: 4)
  (

#### Float16 quantization

In [131]:
converter = lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

converter.representative_dataset = representative_data_gen

tflite_model_fp16 = converter.convert()

with open("model_fp16.tflite", 'wb') as f:
    f.write(tflite_model_fp16)


INFO:tensorflow:Assets written to: /var/folders/8v/5_c7kn_13jsgmgqz56r3yx440000gp/T/tmpqbl4qrxj/assets


INFO:tensorflow:Assets written to: /var/folders/8v/5_c7kn_13jsgmgqz56r3yx440000gp/T/tmpqbl4qrxj/assets
2023-11-19 00:15:37.518849: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2023-11-19 00:15:37.518877: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
Summary on the non-converted ops:
---------------------------------
 * Accepted dialects: tfl, builtin, func
 * Non-Converted Ops: 271, Total Ops 2164, % non-converted = 12.52 %
 * 271 ARITH ops

- arith.constant:  271 occurrences  (f16: 160, i32: 111)



  (f32: 15)
  (f32: 3, i1: 1, i32: 3)
  (f32: 98, i32: 90)
  (f32: 55)
  (f32: 17)
  (f32: 161)
  (f32: 2)
  (f32: 1)
  (f32: 91, i32: 90)
  (f32: 295)
  (i1: 7)
  (i1: 1)
  (i1: 90)
  (f32: 1)
  (f32: 4, i32: 1)
  (f32: 14)
  (i32: 90)
  (f32: 6, i32: 9)
  (f32: 5)
  (i32: 2)
  (f32: 4)
  (i64: 1, f32: 106, i1: 1, i32: 98)
  (f32: 1)
  (f32: 91, i32: 6)
  (i32: 103)
  (f32: 96)
 

#### Float32 quantization

In [130]:
converter = lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float32]

converter.representative_dataset = representative_data_gen

tflite_model_fp32 = converter.convert()

with open("model_fp32.tflite", 'wb') as f:
    f.write(tflite_model_fp32)

INFO:tensorflow:Assets written to: /var/folders/8v/5_c7kn_13jsgmgqz56r3yx440000gp/T/tmp_rvykcb3/assets


INFO:tensorflow:Assets written to: /var/folders/8v/5_c7kn_13jsgmgqz56r3yx440000gp/T/tmp_rvykcb3/assets
2023-11-19 00:15:17.085662: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2023-11-19 00:15:17.085676: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
Summary on the non-converted ops:
---------------------------------
 * Accepted dialects: tfl, builtin, func
 * Non-Converted Ops: 272, Total Ops 2004, % non-converted = 13.57 %
 * 272 ARITH ops

- arith.constant:  272 occurrences  (f32: 161, i32: 111)



  (f32: 15)
  (f32: 3, i1: 1, i32: 3)
  (f32: 98, i32: 90)
  (f32: 55)
  (f32: 17)
  (f32: 2)
  (f32: 1)
  (f32: 91, i32: 90)
  (f32: 295)
  (i1: 7)
  (i1: 1)
  (i1: 90)
  (f32: 1)
  (f32: 4, i32: 1)
  (f32: 14)
  (i32: 90)
  (f32: 6, i32: 9)
  (f32: 5)
  (i32: 2)
  (f32: 4)
  (i64: 1, f32: 106, i1: 1, i32: 98)
  (f32: 1)
  (f32: 91, i32: 6)
  (i32: 103)
  (f32: 96)
  (f32: 4)
  (

In [113]:
import cv2
def setup_model(model_content):
    interpreter = tf.lite.Interpreter(model_path=model_content)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    desired_size = 320

    new_input_shape = (1, desired_size, desired_size, 3)  
    interpreter.resize_tensor_input(input_details[0]['index'], new_input_shape)
    interpreter.resize_tensor_input(output_details[0]['index'], new_input_shape)

    interpreter.allocate_tensors()

    return interpreter, input_details, output_details


In [115]:
#Prepare box sizes for evaluation
def load_image(image_path, desired_size=320):
    image = cv2.imread(image_path)

    height, width, channels = image.shape

    if height > width:
        new_height = desired_size
        scale = desired_size / height
        new_width = int(scale * width)
    else:
        new_width = desired_size
        scale = desired_size / width
        new_height = int(scale * height)

    resized_image = cv2.resize(image, (new_width, new_height))

    padded_image = np.zeros((desired_size, desired_size, channels), dtype=np.uint8)
    top = (desired_size - new_height) // 2
    left = (desired_size - new_width) // 2
    padded_image[top:top + new_height, left:left + new_width] = resized_image
    padded_image = padded_image[..., ::-1]  # BGR to RGB

    input_tensor = tf.convert_to_tensor(padded_image)
    input_tensor = input_tensor[tf.newaxis, ...]

    return padded_image, input_tensor, new_height, new_width, height, width

def prepare_results(image_id, detection_boxes, detection_classes, detection_scores, new_height, new_width, height, width, desired_size=320):
    prepared_results = []

    pbox = np.round(detection_boxes[0] * desired_size).astype(int)

    height_scale = height / new_height
    width_scale = width / new_width

    top_padding = (desired_size - new_height) // 2
    left_padding = (desired_size - new_width) // 2

    for i in range(len(detection_boxes[0])):
        # Umrechnen der bbox-Koordinaten von Prozent in Pixel unter Berücksichtigung der Skalierung und des Paddings
        ystart, xstart, yend, xend = pbox[i]

        ystart = (ystart - top_padding) * height_scale
        xstart = (xstart - left_padding) * width_scale
        yend = (yend - top_padding) * height_scale
        xend = (xend - left_padding) * width_scale

        box_width = xend - xstart
        box_height = yend - ystart

        result_entry = {
            "image_id": int(image_id),
            "category_id": int(detection_classes[0][i]),
            "bbox": [xstart, ystart, box_width, box_height],
            "score": float(detection_scores[0][i])
        }

        prepared_results.append(result_entry)

    return prepared_results


def inference(sampled_dataset, interpreter, input_details, output_details, num_images_to_infer=3500):
    sampled_dataset = dataset.take(num_images_to_infer)
    coco_results = []

    for sample in sampled_dataset:
        image_path = sample.filepath
        image_id = int(image_path[-16:-4])

        image, input_image, new_height, new_width, height, width = load_image(image_path)
        boxes, classes, scores = test_model(input_image, interpreter, input_details, output_details)
        predictions = prepare_results(image_id, boxes, classes, scores, new_height, new_width, height, width)

        coco_results.extend(predictions)

    return coco_results

def test_model(input_tensor, interpreter, input_details, output_details):
    interpreter.set_tensor(input_details[0]['index'], input_tensor)

    interpreter.invoke()

    output_details = interpreter.get_output_details()

    detection_boxes = interpreter.get_tensor(output_details[4]['index'])
    detection_classes = interpreter.get_tensor(output_details[5]['index'])
    detection_scores = interpreter.get_tensor(output_details[6]['index'])

    return detection_boxes, detection_classes, detection_scores


def evaluate(coco_results):
    coco_results_path = "coco_results.json"
    with open(coco_results_path, "w") as f:
        json.dump(coco_results, f, cls=NumpyEncoder)

    # Load the COCO annotations for the validation split
    gt_path = 'data/coco/annotations/instances_val2017.json'
    coco_gt = COCO(gt_path)

    # Load the COCO results
    coco_dt = coco_gt.loadRes(coco_results_path)

    # Evaluate the results
    coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
    #coco_eval.params.imgIds  = imgIds
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    return coco_eval.stats[0]

In [132]:
models = ["model_int8.tflite", "model_fp16.tflite", "model_fp32.tflite"]

In [135]:
for model in models:
    print("Running model: ", model)
    interpreter, input_details, output_details = setup_model(model)
    start_time = time.time()
    coco_results = inference(sampled_dataset, interpreter, input_details, output_details)
    end_time = time.time()
    print("Inference time: ", end_time - start_time)
    print("Evaluating model: ", model)
    evaluation = evaluate(coco_results)
    print("______________________________________________________")

Running model:  model_int8.tflite
Inference time:  144.18134713172913
Evaluating model:  model_int8.tflite
loading annotations into memory...
Done (t=0.21s)
creating index...
index created!
Loading and preparing results...
DONE (t=2.38s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=10.03s).
Accumulating evaluation results...
DONE (t=3.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.064
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.114
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.064
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.009
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.053
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.138
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.079
 Average Recall     (AR) @[ IoU=0.50: