# Setup dependencies

**Before you begin, you will need your API Token from AI Hub.**  
To get this value, log into AI Hub and copy it from [here](https://app.aihub.qualcomm.com/account/).  
Then open the `aihub_api_token.txt` file and paste it there.  
Use CTRL-S to save the file and close it.

In [None]:
%%time
import os
import sys

![ ! -s "aihub_api_token.txt" ] && echo "ERROR!! Place your AI Hub token in aihub_api_token.txt file and re-run this block!"

# Make sure we remove PyPi Ultralytics
!{sys.executable} -m pip uninstall -y ultralytics

# AI Hub uses some patches for YOLO
# https://github.com/quic/ai-hub-models/blob/v0.18.0/qai_hub_models/models/yolov8_det/model.py#L92
# Clone the Ultralytics repo
![ ! -d "ultralytics" ] && git clone https://github.com/ultralytics/ultralytics -b v8.3.34
# TFLite doesn't support quantized division, so convert to multiply
!sed -i 's|/ 2|* 0.5|g' ultralytics/ultralytics/utils/tal.py
# Boxes and scores have different scales, so return separately
!sed -i 's/y = torch.cat((dbox, cls.sigmoid()), 1)/return (dbox, cls.sigmoid())/g' ultralytics/ultralytics/nn/modules/head.py
!cd ultralytics/ && git diff
# Install patched ultralytics
!{sys.executable} -m pip install {os.getcwd()}/ultralytics/

# Fix error:
# ImportError: libGL.so.1: cannot open shared object file: No such file or directory
!sudo apt-get install -q -y libgl1

# Fix error:
# TensorFlow SavedModel: export failure ❌ 167.2s: libusb-1.0.so.0: cannot open shared object file: No such file or directory
!sudo apt-get install -q -y libusb-1.0-0-dev

# Install and configure qai-hub for Quantize steps
!{sys.executable} -m pip install -q qai_hub
![ -s "aihub_api_token.txt" ] && qai-hub configure --api_token $(cat aihub_api_token.txt)

# Configure settings

The following block sets the configuration for building the model.

NOTES:
- To setup the **FULL** training session change `SAMPLE_ONLY = False`.  Training will take a long time once all of the classes are enabled.
- If you run tests with `SAMPLE_ONLY = True` and then change to `SAMPLE_ONLY = False`, **you will need to re-run "Download and prepare the dataset"**.
- **If you restart the kernel this block MUST always be re-run.**

In [None]:
# Limit the dataset to 5 classes in order to test training quickly, change to False for full training
SAMPLE_ONLY = True
if SAMPLE_ONLY:
    CLASS_FILTER = [17, 36, 47, 68, 73]
else:
    CLASS_FILTER = []

# Dataset settings
DATASET_NAME = "CUB_200_2011"
DATASET_FILENAME = DATASET_NAME + ".tgz"
LABELS_FILENAME = DATASET_NAME + ".labels"
LABELS_COLOR = "0x00FF00FF"
DATA_DIR = DATASET_NAME + "/"

# Model and training settings
MODEL_NAME = "yolov5m"
MODEL_INPUT_PIXEL_SIZE = 640
if SAMPLE_ONLY:
    TRAINING_EPOCHS = 100
else:
    TRAINING_EPOCHS = 250 # This probably needs to be higher but for demo purposes it's ok.

# Download and prepare the dataset

The dataset used for this project is: Caltech-UCSD Birds-200-2011 (CUB-200-2011)  
More information on this dataset can be found [here](https://www.vision.caltech.edu/datasets/cub_200_2011/).

To prepare the dataset for training use:
- Several text files are parsed for image data and combined into a DataFrame
- Using the `training` field, the images are split into different folders for training and validation
- A dataset configuration file under the `datasets/` folder is created to describe where the images are and the related class names
- A labels file containing class data is generated for use on the device

In [None]:
%%time
![ -z "$MODEL_NAME" ] && echo "ERROR!! No model settings re-run \"Configure settings\" step above!"

import os
import pandas as pd
import shutil

from PIL import Image

def convert_coco_to_yolo(img_size, bbox):
    x_center = (2*bbox[0] + bbox[2])/(2*img_size[0])
    y_center = (2*bbox[1] + bbox[3])/(2*img_size[1])
    width = bbox[2]/img_size[0]
    height = bbox[3]/img_size[1]
    return (round(x_center, 6), round(y_center, 6), round(width, 6), round(height, 6))

def append_file(filename, line):
    with open(filename, "a") as file:
        file.write(line)
        file.close()

print("Downloading CUB_200_2011 files ...")
![ ! -f "$DATASET_FILENAME" ] && [ ! -d "$DATA_DIR" ] && wget --no-check-certificate -q -O $DATASET_FILENAME https://data.caltech.edu/records/65de6-vp158/files/CUB_200_2011.tgz?download=1

print("Extracting CUB_200_2011 files ...")
# Unzip and cleanup the old compressed file
![ ! -d "$DATA_DIR" ] && tar -xf $DATASET_FILENAME

print("Clearing old configured dataset flles ...")
# Remove data archive
!rm -rf $DATASET_FILENAME

# read main list of images
df = pd.read_csv(DATA_DIR + "images.txt", sep=' ',
                 names=["id", "filepath"])
# merge list of image_id to class labels
df = df.merge(pd.read_csv(DATA_DIR + "image_class_labels.txt", sep=' ',
                          names=["id", "class_id"]), on="id")
# merge list of image_id to training flag
df = df.merge(pd.read_csv(DATA_DIR + "train_test_split.txt", sep=' ',
                          names=["id", "training"]), on="id")
# merge list of image_id to bounding box data
df = df.merge(pd.read_csv(DATA_DIR + "bounding_boxes.txt", sep=' ',
                          names=["id", "x_min", "y_min", "width", "height"]), on="id")

classes_df = pd.read_csv(DATA_DIR + "classes.txt", sep=' ',
                         names=["class_id", "class_name"])
df = df.merge(classes_df, on="class_id")

print("Generating dataset and label files ...")

# Create dataset folders
!rm -rf datasets/
!mkdir -p datasets/$DATASET_NAME/images/export
!mkdir -p datasets/$DATASET_NAME/images/test
!mkdir -p datasets/$DATASET_NAME/images/train
!mkdir -p datasets/$DATASET_NAME/images/val
!mkdir -p datasets/$DATASET_NAME/labels/export
!mkdir -p datasets/$DATASET_NAME/labels/train
!mkdir -p datasets/$DATASET_NAME/labels/val

# Remove old labels file
!rm -rf $LABELS_FILENAME

# copy the dataset template
!cp CUB_200_2011.yaml.template datasets/CUB_200_2011.yaml

class_ids = sorted(df['class_id'].drop_duplicates())

class_counter = -1
for c_id in class_ids:
    if len(CLASS_FILTER) == 0 or c_id in CLASS_FILTER:
        class_counter += 1

        # Parse the class name
        c_name = classes_df[classes_df['class_id'] == c_id]['class_name'].array[0].split(".")[1]
        print(f"Parsing: {c_name}")
        # append to the dataset config
        append_file(f"datasets/{DATASET_NAME}.yaml", f"  {class_counter}: {c_name}\n")
        # append to the labels file
        append_file(LABELS_FILENAME, f'(structure)"{c_name.replace(" ", "-").replace("_", "-").lower()},id=(guint)0x{class_counter:0>4X},color=(guint){LABELS_COLOR};"\n')

        for image_id in df[df['class_id'] == c_id]['id']:
            image_dfs = df[df['id'] == image_id]
            filepath_orig = image_dfs['filepath'].array[0]
            filename_new = filepath_orig.split("/")[1]
            label_filename = filename_new.split(".")[0] + ".txt"

            # convert from bounding box data to YOLO style x_center,y_center,w,h box
            img_size = Image.open(f"{DATASET_NAME}/images/{filepath_orig}").size
            bbox = (image_dfs['x_min'].array[0], image_dfs['y_min'].array[0], image_dfs['width'].array[0], image_dfs['height'].array[0])
            yolo_box = convert_coco_to_yolo(img_size, bbox)

            if image_dfs['training'].array[0] == 1:
                loc = "train"
            if image_dfs['training'].array[0] == 0:
                loc = "val"

            # TODO: copy -> rename
            shutil.copy(f"{DATASET_NAME}/images/{filepath_orig}", f"datasets/{DATASET_NAME}/images/{loc}/{filename_new}")
            # Create label file: <class_id> <x_center> <y_center> <width> <height>
            with open(f"datasets/{DATASET_NAME}/labels/{loc}/{label_filename}", "w") as label_file:
                label_file.write(f"{class_counter} {yolo_box[0]} {yolo_box[1]} {yolo_box[2]} {yolo_box[3]}\n")

# For SAMPLE_ONLY we use the limited val folder data as calibration for export/quantize operations
# For !SAMPLE_ONLY the val data is too large: create export dir with only 1 calibration image per class type to keep the upload package smaller
if not SAMPLE_ONLY:
    # Setup export directory for calibration data
    print("Generate calibration data ...")
    src_images_dir = f"datasets/{DATA_DIR}/images/val/"
    dst_images_dir = f"datasets/{DATA_DIR}/images/export/"
    src_labels_dir = f"datasets/{DATA_DIR}/labels/val/"
    dst_labels_dir = f"datasets/{DATA_DIR}/labels/export/"
    last_class = ""
    translation_table = {ord(char): None for char in "_0123456789"}
    for image_path in os.listdir(src_images_dir):
        if image_path.endswith(".jpg"):
            unique_name = image_path.translate(translation_table).lower()
            if unique_name != last_class:
                last_class = unique_name
                shutil.copy(os.path.join(src_images_dir, image_path), dst_images_dir)
                shutil.copy(os.path.join(src_labels_dir, image_path.replace(".jpg", ".txt")), dst_labels_dir)

    # Copy the dataset config to make an "export" version which points "val" dir to "export"
    export_file = open(f"datasets/{DATASET_NAME}.yaml","r")
    contents = export_file.read()
    export_file.close()
    contents = contents.replace("val: images/val", "val: images/export")
    export_file = open(f"datasets/{DATASET_NAME}-export.yaml","w")
    export_file.write(contents)
    export_file.close()
else:
    shutil.copy(f"datasets/{DATASET_NAME}.yaml", f"datasets/{DATASET_NAME}-export.yaml")

# Download test images
!wget --no-check-certificate -q -O datasets/$DATASET_NAME/images/test/multi-goldfinch-1.jpg https://t3.ftcdn.net/jpg/01/44/64/36/500_F_144643697_GJRUBtGc55KYSMpyg1Kucb9yJzvMQooW.jpg
!wget --no-check-certificate -q -O datasets/$DATASET_NAME/images/test/northern-flicker-1.jpg https://upload.wikimedia.org/wikipedia/commons/5/5c/Northern_Flicker_%28Red-shafted%29.jpg
!wget --no-check-certificate -q -O datasets/$DATASET_NAME/images/test/northern-cardinal-1.jpg https://cdn.pixabay.com/photo/2013/03/19/04/42/bird-94957_960_720.jpg
!wget --no-check-certificate -q -O datasets/$DATASET_NAME/images/test/blue-jay-1.jpg https://cdn12.picryl.com/photo/2016/12/31/blue-jay-bird-feather-animals-b8ee04-1024.jpg
!wget --no-check-certificate -q -O datasets/$DATASET_NAME/images/test/hummingbird-1.jpg http://res.freestockphotos.biz/pictures/17/17875-hummingbird-close-up-pv.jpg

# Remove the original data now that we've created the dataset
!rm -rf $DATA_DIR

print("Done!")

# Create our model

In [None]:
%%time
![ -z "$MODEL_NAME" ] && echo "ERROR!! No model settings re-run \"Configure settings\" step above!"

import shutil
from ultralytics import YOLO

create_model = YOLO()
create_model._new(f"{MODEL_NAME}.yaml", task="detect", verbose=True)
create_model.save(f"{MODEL_NAME}_train.pt")

# Train and validate our model

This step can be re-run several times and combined with the next step to test inference.

If the `runs/` directory is removed (for cleaning), be sure to:
- Restart the instance kernel
- Re-run the "Configure settings" section

In [None]:
%%time
![ -z "$MODEL_NAME" ] && echo "ERROR!! No model settings re-run \"Configure settings\" step above!"

import shutil
from ultralytics import YOLO

# Load our model
train_model = YOLO(f"{MODEL_NAME}_train.pt")

# Train the model on the CUB-200-2011 dataset
results = train_model.train(data=f"datasets/{DATASET_NAME}.yaml",
                            epochs=TRAINING_EPOCHS,
                            imgsz=MODEL_INPUT_PIXEL_SIZE)

# Backup best.pt after training
shutil.copy(f"{results.save_dir}/weights/best.pt", f"{MODEL_NAME}_train.pt")
print(f"Saved best weights as: {MODEL_NAME}_train.pt")

# Run inference using test images and unquantized model

The results of the image tests are stored under the `tests/` folder.

In [None]:
%%time
![ -z "$MODEL_NAME" ] && echo "ERROR!! No model settings re-run \"Configure settings\" step above!"

# Clean up previous tests
!rm -rf tests
!mkdir -p tests

import os

from ultralytics import YOLO

# Load our model
test_model = YOLO(f"{MODEL_NAME}_train.pt", task="detect")

# Run inference with the model on the test images
directory = f"datasets/{DATASET_NAME}/images/test/"
for filename in os.listdir(directory):
    if filename.endswith(".jpg"):
        f = os.path.join(directory, filename)
        results = test_model(f)
        for r in results:
            r.save(filename=f"tests/{filename}")  # save to disk

# Compile and quantize on AI Hub

Using our newly trained model:
- Send the model to AI Hub to compile into an ONNX format
- Use the val images to calibrate the compiled ONNX model
- Send the model to AI Hub to quantize (int8)
- Send the quantized model to AI Hub to compile as a TFLite binary for use on the device

**Warning: You need to have saved your API Token in the `aihub_api_token.txt` file before proceeding.**

In [None]:
%%time
![ -z "$MODEL_NAME" ] && echo "ERROR!! No model settings re-run \"Configure settings\" step above!"
![ ! -s "aihub_api_token.txt" ] && echo "ERROR!! Place your AI Hub token in aihub_api_token.txt file and re-run \"Setup dependencies\"!"

# Based on AI Hub docs: https://app.aihub.qualcomm.com/docs/hub/quantize_examples.html

import os
import numpy as np
import qai_hub as hub
import shutil
import torch

from PIL import Image
from ultralytics import YOLO

print("Load and export our model with a traced graph representation ...")
# 1. Load our model and export as torchvision
torch_model = YOLO(f"{MODEL_NAME}_train.pt")
torch_model.export()

print("Copy model to *.pth ...")
# 2. Copy to model.pth for torch compatibility
shutil.copy(f"{MODEL_NAME}_train.torchscript", f"{MODEL_NAME}_train.pth")

print("Start compile of model to ONNX format ...")
# 3. Compile the model on AI Hub to ONNX
# Can also watch progress here: https://app.aihub.qualcomm.com/jobs/?type=compile
pt_model = torch.load(f"{MODEL_NAME}_train.pth")
input_shape = (1, 3, MODEL_INPUT_PIXEL_SIZE, MODEL_INPUT_PIXEL_SIZE)
device = hub.Device("RB3 Gen 2 (Proxy)")
compile_onnx_job = hub.submit_compile_job(
    name="DetectionModel_PyTorch_to_ONNX",
    model=pt_model,
    device=device,
    input_specs=dict(image_tensor=input_shape),
    options="--target_runtime onnx",
)
assert isinstance(compile_onnx_job, hub.CompileJob)

unquantized_onnx_model = compile_onnx_job.get_target_model()
assert isinstance(unquantized_onnx_model, hub.Model)
print("Done!")

print(f"Save model locally:{MODEL_NAME}_train_compiled.onnx")
# 3a. Save compiled ONNX model
unquantized_onnx_model.download(f"{MODEL_NAME}_train_compiled.onnx")

# 4. Load and pre-process downloaded calibration data
sample_inputs = []

print("Generate calibration data ...")
if SAMPLE_ONLY:
    images_dir = f"datasets/{DATA_DIR}/images/val/"
else:
    images_dir = f"datasets/{DATA_DIR}/images/export/"
for image_path in os.listdir(images_dir):
    if image_path.endswith(".jpg"):
        image = Image.open(os.path.join(images_dir, image_path))
        print(f"Converting {image_path} from to RGB ...")
        image = image.convert("RGB").resize(input_shape[2:])
        sample_input = np.array(image).astype(np.float32) / 255.0
        sample_input = np.expand_dims(np.transpose(sample_input, (2, 0, 1)), 0)
        sample_inputs.append(sample_input)
calibration_data = dict(image_tensor=sample_inputs)

# 5. Quantize the model
# Can also watch progress here: https://app.aihub.qualcomm.com/jobs/?type=quantize
print("Start quantize job at AI Hub for ONNX model ...")
quantize_job = hub.submit_quantize_job(
    name="DetectionModel_Quantize_ONNX",
    model=unquantized_onnx_model,
    calibration_data=calibration_data,
    weights_dtype=hub.QuantizeDtype.INT8,
    activations_dtype=hub.QuantizeDtype.INT8,
)

quantized_onnx_model = quantize_job.get_target_model()
assert isinstance(quantized_onnx_model, hub.Model)
print("Done!")

print(f"Save model locally:{MODEL_NAME}_train_int8.onnx")
# 5a. Save quantized ONNX model
quantized_onnx_model.download(f"{MODEL_NAME}_train_int8.onnx")

print("Start compile of quantized model to TFLITE format ...")
# 6. Compile to target runtime (TFLite)
# Can also watch progress here: https://app.aihub.qualcomm.com/jobs/?type=compile
compile_tflite_job = hub.submit_compile_job(
    name="DetectionModel_ONNX_to_TFLite",
    model=quantized_onnx_model,
    device=device,
    options="--target_runtime tflite --quantize_io --quantize_io_type int8 --force_channel_last_input image_tensor --force_channel_last_output output_0",
)
assert isinstance(compile_tflite_job, hub.CompileJob)

quantized_tflite_model = compile_tflite_job.get_target_model()
assert isinstance(quantized_tflite_model, hub.Model)
print("Done!")

print(f"Save model locally:{MODEL_NAME}_train_int8.tflite")
# 6. Save tflite model
quantized_tflite_model.download(f"{MODEL_NAME}_train_int8.tflite")

print("DONE!  Please be sure to download the following files for use on the device:")
print(f"- {MODEL_NAME}_train_int8.tflite")
print(f"- {LABELS_FILENAME}")

# Run inference using test images and quantized model

The results of the image tests are stored under the `tests_int8/` folder.

In [None]:
%%time
![ -z "$MODEL_NAME" ] && echo "ERROR!! No model settings re-run \"Configure settings\" step above!"

import os
import numpy as np
import torch
import tensorflow as tf

from PIL import Image
from ultralytics import YOLO

# Clean up previous tests
!rm -rf tests_int8
!mkdir -p tests_int8

# Export torch model as tflite
# 1. Load our model and export as torchvision
#torch_model = YOLO(f"{MODEL_NAME}_train.pt")
#torch_model.export(format="tflite", data=f"datasets/{DATASET_NAME}-export.yaml",
#                   imgsz=MODEL_INPUT_PIXEL_SIZE, int8=True)

# Load our model
interpreter = tf.lite.Interpreter(model_path=f"{MODEL_NAME}_train_int8.tflite")
interpreter.allocate_tensors()

input = interpreter.get_input_details()[0]
print(f"input index:{input['index']}")
print(f"({input['shape'][1]},{input['shape'][2]})")
output = interpreter.get_output_details()[0]
print(output)

# Run inference with the model on the test images
directory = f"datasets/{DATASET_NAME}/images/test/"
for filename in os.listdir(directory):
    if filename.endswith(".jpg"):
        f = os.path.join(directory, filename)
        print(f"Performing detection on: {filename}")

        # Reshape the input to match the model (640x640 by default)
        image = Image.open(f)
        image = image.resize((input['shape'][1], input['shape'][2]))
        image = np.array(image).astype(np.int8)
        image = np.expand_dims(image, axis=0)

        interpreter.set_tensor(input['index'], image)
        interpreter.invoke()
        print(interpreter.get_tensor(output['index']).reshape(-1, 9))