In [70]:
import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone.utils.eval.detection import evaluate_detections, DetectionResults
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from numpyencoder import NumpyEncoder
import json

import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import time
from codecarbon import track_emissions



# List available zoo datasets
print(foz.list_zoo_datasets())

#
# Load the COCO-2017 validation split into a FiftyOne dataset
#
# This will download the dataset from the web, if necessary
#
dataset = foz.load_zoo_dataset("coco-2017", split="validation")
#test17 = foz.load_zoo_dataset("coco-2017", split="test")

# Give the dataset a new name, and make it persistent so that you can
# work with it in future sessions
dataset.name = "coco-2017-valix"
dataset.persistent = True
#test17.name = "coco-2017-test-example"
#test17.persistent = True
# Visualize the in the App
#session = fo.launch_app(dataset)

['activitynet-100', 'activitynet-200', 'bdd100k', 'caltech101', 'caltech256', 'cifar10', 'cifar100', 'cityscapes', 'coco-2014', 'coco-2017', 'fashion-mnist', 'fiw', 'hmdb51', 'imagenet-2012', 'imagenet-sample', 'kinetics-400', 'kinetics-600', 'kinetics-700', 'kinetics-700-2020', 'kitti', 'kitti-multiview', 'lfw', 'mnist', 'open-images-v6', 'open-images-v7', 'quickstart', 'quickstart-geo', 'quickstart-groups', 'quickstart-video', 'sama-coco', 'ucf101', 'voc-2007', 'voc-2012']
Downloading split 'validation' to '/Users/kristian/fiftyone/coco-2017/validation' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'validation' to '/Users/kristian/fiftyone/coco-2017/validation' if necessary


Found annotations at '/Users/kristian/fiftyone/coco-2017/raw/instances_val2017.json'


INFO:fiftyone.utils.coco:Found annotations at '/Users/kristian/fiftyone/coco-2017/raw/instances_val2017.json'


Images already downloaded


INFO:fiftyone.utils.coco:Images already downloaded


Existing download of split 'validation' is sufficient


INFO:fiftyone.zoo.datasets:Existing download of split 'validation' is sufficient


Loading existing dataset 'coco-2017-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


INFO:fiftyone.zoo.datasets:Loading existing dataset 'coco-2017-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [71]:
model_url = "https://www.kaggle.com/models/tensorflow/ssd-mobilenet-v2/frameworks/TensorFlow2/variations/ssd-mobilenet-v2/versions/1"
model = hub.load(model_url)

#TO DO: Get size of model ssd mobilenet v2


In [72]:
def preprocess_image(image):
    input_image = tf.convert_to_tensor(image)
    input_image = tf.expand_dims(input_image, axis=0)
    input_image = tf.cast(input_image, tf.uint8) 
    return input_image

In [73]:
num_images_to_infer = 5000
sampled_dataset = dataset.take(num_images_to_infer)

In [74]:
coco_results = []

# Iterate through each sample in the dataset
for sample in sampled_dataset:
    coco_sample_results = []

    # Load the image
    image_path = sample.filepath
    image = tf.image.decode_image(tf.io.read_file(image_path), channels=3).numpy()
    image_height, image_width, _ = image.shape
    image_id = int(image_path[-16:-4])

    # Perform inference
    input_image = preprocess_image(image)
    detections = model(input_image)

    # Extract relevant information from the detections
    boxes = detections["detection_boxes"][0].numpy()
    scores = detections["detection_scores"][0].numpy()
    labels = detections["detection_classes"][0].numpy().astype(int)

    # Append image id to results
    for box, score, label in zip(boxes, scores, labels):
        # Convert to COCO format
        coco_sample_results.append(
            {
                "image_id": image_id,
                "category_id": label,
                # Scale box to image size
                "bbox":  [
                    box[1] * image_width,
                    box[0] * image_height,
                    (box[3] - box[1]) * image_width,
                    (box[2] - box[0]) * image_height,
                ],
                "score": score,
            }
        )

    coco_results.extend(coco_sample_results)

In [75]:
# Save the results to disk
coco_results_path = "coco_results.json"
with open(coco_results_path, "w") as f:
    json.dump(coco_results, f, cls=NumpyEncoder)

# Load the COCO annotations for the validation split
gt_path = 'data/coco/annotations/instances_val2017.json'
coco_gt = COCO(gt_path)

# Load the COCO results
coco_dt = coco_gt.loadRes(coco_results_path)

# Evaluate the results
coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
#coco_eval.params.imgIds  = imgIds
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()


loading annotations into memory...
Done (t=1.27s)
creating index...
index created!
Loading and preparing results...
DONE (t=2.07s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=17.14s).
Accumulating evaluation results...
DONE (t=4.14s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.202
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.349
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.204
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.027
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.174
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.414
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.217
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.330
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet

### TfLite
#### float32 quantization

In [78]:
representative_dataset = dataset.take(50)

In [79]:
from tensorflow import lite

# Convert the model to TensorFlow Lite format with float32 quantization
converter = lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8, tf.lite.OpsSet.TFLITE_BUILTINS]


def representative_data_gen():
    for sample in representative_dataset:
        # Preprocess the image
        image_path = sample.filepath
        image = tf.image.decode_image(tf.io.read_file(image_path), channels=3)
        image = preprocess_image(image)  

        yield [image]

converter.representative_dataset = representative_data_gen 

tflite_model_int8 = converter.convert()

# Save the TFLite model to a file
with open("model_int8.tflite", 'wb') as f:
    f.write(tflite_model_int8)

INFO:tensorflow:Assets written to: /var/folders/8v/5_c7kn_13jsgmgqz56r3yx440000gp/T/tmp8dz9mbxe/assets


INFO:tensorflow:Assets written to: /var/folders/8v/5_c7kn_13jsgmgqz56r3yx440000gp/T/tmp8dz9mbxe/assets
2023-11-18 16:46:19.301035: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2023-11-18 16:46:19.301054: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
Summary on the non-converted ops:
---------------------------------
 * Accepted dialects: tfl, builtin, func
 * Non-Converted Ops: 272, Total Ops 2004, % non-converted = 13.57 %
 * 272 ARITH ops

- arith.constant:  272 occurrences  (f32: 161, i32: 111)



  (f32: 15)
  (f32: 3, i1: 1, i32: 3)
  (f32: 98, i32: 90)
  (f32: 55)
  (f32: 17)
  (f32: 2)
  (f32: 1)
  (f32: 91, i32: 90)
  (f32: 295)
  (i1: 7)
  (i1: 1)
  (i1: 90)
  (f32: 1)
  (f32: 4, i32: 1)
  (f32: 14)
  (i32: 90)
  (f32: 6, i32: 9)
  (f32: 5)
  (i32: 2)
  (f32: 4)
  (i64: 1, f32: 106, i1: 1, i32: 98)
  (f32: 1)
  (f32: 91, i32: 6)
  (i32: 103)
  (f32: 96)
  (f32: 4)
  (

In [80]:
import cv2

interpreter = tf.lite.Interpreter(model_content=tflite_model_int8)


input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

desired_size = 320

new_input_shape = (1, desired_size, desired_size, 3)  # Neue Eingabegröße: (Batch, Höhe, Breite, Kanäle)
interpreter.resize_tensor_input(input_details[0]['index'], new_input_shape)
interpreter.resize_tensor_input(output_details[0]['index'], new_input_shape)

interpreter.allocate_tensors()

def test_model(input_tensor):
    interpreter.set_tensor(input_details[0]['index'], input_tensor)

    interpreter.invoke()

    output_details = interpreter.get_output_details()

    detection_boxes = interpreter.get_tensor(output_details[4]['index'])
    detection_classes = interpreter.get_tensor(output_details[5]['index'])
    detection_scores = interpreter.get_tensor(output_details[6]['index'])

    return detection_boxes, detection_classes, detection_scores

In [81]:
#Prepare box sizes for evaluation
def load_image(image_path):
    image = cv2.imread(image_path)

    height, width, channels = image.shape

    if height > width:
        new_height = desired_size
        scale = desired_size / height
        new_width = int(scale * width)
    else:
        new_width = desired_size
        scale = desired_size / width
        new_height = int(scale * height)

    resized_image = cv2.resize(image, (new_width, new_height))

    padded_image = np.zeros((desired_size, desired_size, channels), dtype=np.uint8)
    top = (desired_size - new_height) // 2
    left = (desired_size - new_width) // 2
    padded_image[top:top + new_height, left:left + new_width] = resized_image
    padded_image = padded_image[..., ::-1]  # BGR to RGB

    input_tensor = tf.convert_to_tensor(padded_image)
    input_tensor = input_tensor[tf.newaxis, ...]

    return padded_image, input_tensor, new_height, new_width, height, width

def prepare_results(image_id, detection_boxes, detection_classes, detection_scores, new_height, new_width, height, width):
    prepared_results = []

    pbox = np.round(detection_boxes[0] * desired_size).astype(int)

    height_scale = height / new_height
    width_scale = width / new_width

    top_padding = (desired_size - new_height) // 2
    left_padding = (desired_size - new_width) // 2

    for i in range(len(detection_boxes[0])):
        # Umrechnen der bbox-Koordinaten von Prozent in Pixel unter Berücksichtigung der Skalierung und des Paddings
        ystart, xstart, yend, xend = pbox[i]

        ystart = (ystart - top_padding) * height_scale
        xstart = (xstart - left_padding) * width_scale
        yend = (yend - top_padding) * height_scale
        xend = (xend - left_padding) * width_scale

        box_width = xend - xstart
        box_height = yend - ystart

        result_entry = {
            "image_id": int(image_id),
            "category_id": int(detection_classes[0][i]),
            "bbox": [xstart, ystart, box_width, box_height],
            "score": float(detection_scores[0][i])
        }

        prepared_results.append(result_entry)

    return prepared_results

In [82]:
#Do inference with the new model for the sampled dataset
all_predictions = []

for sample in sampled_dataset:
    # Load and preprocess the image
    image_path = sample.filepath
    image_id = int(image_path[-16:-4])

    image, input_image, new_height, new_width, height, width = load_image(image_path)
    boxes, classes, scores = test_model(input_image)
    predictions = prepare_results(image_id, boxes, classes, scores, new_height, new_width, height, width)

    all_predictions.extend(predictions)



In [83]:
""" #Do inference with the new model for the sampled dataset
all_predictions = []

for sample in sampled_dataset:
    # Load and preprocess the image
    image_path = sample.filepath
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_id = int(image_path[-16:-4])
    image_height, image_width, _ = image.shape


    # Resize the image to match the expected dimensions
    height, width = 320, 320  # Adjust these values based on your model's input shape
    input_image = cv2.resize(image, (width, height))
    
    # Normalize and expand dimensions
    input_image = input_image / 255.0  # Normalize to [0, 1]
    input_image = np.expand_dims(input_image, axis=0).astype(np.uint8)

    # Run inference
    boxes, classes, scores = test_model(input_image)

    # Process the results as needed
    # (e.g., visualize the detected objects on the image, print results, etc.)
    predictions = []

    for box, score, label in zip(boxes[0], scores[0], classes[0]):
        predictions.append({
            "image_id": image_id,
            "category_id": int(label),
            "bbox":  [
                    box[1] * image_width,
                    box[0] * image_height,
                    (box[3] - box[1]) * image_width,
                    (box[2] - box[0]) * image_height,
                ],
            "score": float(score),
        })

    all_predictions.extend(predictions) """

' #Do inference with the new model for the sampled dataset\nall_predictions = []\n\nfor sample in sampled_dataset:\n    # Load and preprocess the image\n    image_path = sample.filepath\n    image = cv2.imread(image_path)\n    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n    image_id = int(image_path[-16:-4])\n    image_height, image_width, _ = image.shape\n\n\n    # Resize the image to match the expected dimensions\n    height, width = 320, 320  # Adjust these values based on your model\'s input shape\n    input_image = cv2.resize(image, (width, height))\n    \n    # Normalize and expand dimensions\n    input_image = input_image / 255.0  # Normalize to [0, 1]\n    input_image = np.expand_dims(input_image, axis=0).astype(np.uint8)\n\n    # Run inference\n    boxes, classes, scores = test_model(input_image)\n\n    # Process the results as needed\n    # (e.g., visualize the detected objects on the image, print results, etc.)\n    predictions = []\n\n    for box, score, label in zip(bo

In [84]:
#Dump json
coco_results_path = "coco_results_int8.json"
with open(coco_results_path, "w") as f:
    json.dump(all_predictions, f, cls=NumpyEncoder)

coco_results = coco_gt.loadRes(coco_results_path)
coco_eval = COCOeval(coco_gt, coco_results, 'bbox')  # 'bbox' indicates bounding box evaluation


# Run COCOeval
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()


Loading and preparing results...
DONE (t=2.63s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=18.04s).
Accumulating evaluation results...
DONE (t=4.11s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.090
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.161
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.089
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.011
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.076
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.192
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.113
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.178
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.189
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=1