# License Plate Project
Live data stream real-time processing is required, speed and accuracy are the main factors.

# Incoming Truck
<img src="../samples/new_truck.jpg"  width="49%" style="float: left; margin: 5px;">
<img src="../samples/new_truck_id.png"  width="49%" style="float: left; margin: 5px;">

# Readable License Plate on Truck
<img src="../samples/truck.jpg"  width="100%" style="float: left; mergin-bottom: 15px;">

# Process Pipeline
<div style="margin: 10px;">
    <img src="../samples/pipeline.jpg"  width="100%" style="float: left;">
</div>

# Layers
- Truck / Car recognition
- License Plate Detection
- Spatial Transform of License Plate for better Recognition
- License Plate Recognition

# Install libraries

In [1]:
pip install opencv-python torch torchvision ultralytics easyocr pytesseract

Note: you may need to restart the kernel to use updated packages.




# Reload modules

In [2]:
%load_ext autoreload
%reload_ext autoreload
%autoreload 2

# ================ Run all cells below one by one ===================

# Imports

In [3]:
# Imports
import cv2
import math
import time
import traceback
import torch
import os

# Imports all pre-trained YOLOv5, YOLOv8, YOLOv9 and in the future YOLOv10 models (COCO dataset with 80 classes), they will be automatically downloaded if not available
from ultralytics import YOLO

# Imports all other pre-trained detection models needed (COCO dataset with 80 classes), they will be automatically downloaded if they are not available
from torchvision.models.detection import *
from torchvision.io.image import read_image
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image
import torchvision.transforms as transforms

from PIL import Image

import IPython
from IPython.display import display, clear_output

# OCR
import pytesseract   # pip install pytesseract
import easyocr  # pip install easyocr

# Install this from "https://github.com/UB-Mannheim/tesseract/wiki"
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

# Init on english and german characters
reader = easyocr.Reader(['en', 'de'])

print("If you are using CPU you may see a warning above caused by easyocr, you can ignore it!")

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.




# Determining first priority for GPU for better performance if available; otherwise, settle for CPU

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("You are using:",device,"(Device)")

You are using: cpu (Device)


# Run this cell if you are using any model from Pytorch, it transforms input image to tensor

In [5]:
transform = transforms.Compose([
    # transforms.Resize((300, 300)), # resizes the input image into 300x300 before processing but it is not needed here
    transforms.ToTensor(),
])
print("Good, run the next!")

Good, run the next!


# Define custom classes

In [6]:
# This is a new class definition that describes a Detection Model object, where detections can be stored and averages can be calculated, this workflow is based on the RAM size because it stores data temporarily, I wanted to rebuild the code so it works with file save and file read and that every model inferences the whole video then the next model comes but I still wanted to test with camera on a live feed so I kept it as is in the end. This class downloads, loads and initializes all models before the beginning of tests.
class DetectionModel():
    def __init__(self, concept, backbone, model, weights, reference, color=None, custom=False):
        self.concept = concept
        self.backbone = backbone
        self.model = model if custom==True else model(weights) if concept=="YOLO" else model(weights=weights.DEFAULT)
        if concept not in ["YOLO", "OCR"]:
            self.model.to(device)
            self.model.eval()
        self.weights = weights
        self.reference = reference
        self.color = color
        self.custom = custom
        self.detections = []
        self.plates_detections = []
        self.plates_recognitions = []
        self.vehicleModel = -1
        self.plateModel = -1
        self.textModel = -1

    def addDetection(self, detection):
        self.detections.append(detection)

    def addPlateDetection(self, plate_detection):
        self.plates_detections.append(plate_detection)

    def addPlateRecognition(self, plate_recognition):
        self.plates_recognitions.append(plate_recognition)

    def getDetections(self):
        return self.detections

    def clearDetections(self):
        self.detections = []

    def detectionsCount(self, child=None):
        if child is not None:
            if child=="plateDetection": return len(self.plates_detections)
            else: return len(self.plates_recognitions)
        return len(self.detections)

    def inferencesCount(self, child=None):
        length = 0
        addedIds = []
        if child is None:
            if len(self.detections)>0:
                for i in range(len(self.detections)):
                    if self.detections[i].id not in addedIds:
                        addedIds.append(self.detections[i].id)
        else:
            if child=="plateDetection":
                if len(self.plates_detections)>0:
                    for i in range(len(self.plates_detections)):
                        if self.plates_detections[i].id not in addedIds:
                            addedIds.append(self.plates_detections[i].id)
            elif child=="plateRecognition":
                if len(self.plates_recognitions)>0:
                    for i in range(len(self.plates_recognitions)):
                        if self.plates_recognitions[i].id not in addedIds:
                            addedIds.append(self.plates_recognitions[i].id)
        return len(addedIds)

    def getAveragePrecision(self, child=None):
        sum=0.0
        avg=0.0
        if child is None:
            if len(self.detections)>0:
                for i in range(len(self.detections)):
                    sum+=self.detections[i].precision
                avg = sum/len(self.detections)
        else:
            if child=="plateDetection":
                if len(self.plates_detections)>0:
                    for i in range(len(self.plates_detections)):
                        sum+=self.plates_detections[i].precision
                    avg = sum/len(self.plates_detections)
            elif child=="plateRecognition":
                if len(self.plates_recognitions)>0:
                    for i in range(len(self.plates_recognitions)):
                        sum+=self.plates_recognitions[i].precision
                    avg = sum/len(self.plates_recognitions)
        return avg.item() if torch.is_tensor(avg) else avg

    def getMaxPrecision(self, child=None):
        max=0.0
        if child is None:
            for i in range(len(self.detections)):
                if max<self.detections[i].precision:
                    max=self.detections[i].precision
        else:
            if child=="plateDetection":
                for i in range(len(self.plates_detections)):
                    if max<self.plates_detections[i].precision:
                        max=self.plates_detections[i].precision
            elif child=="plateRecognition":
                for i in range(len(self.plates_recognitions)):
                    if max<self.plates_recognitions[i].precision:
                        max=self.plates_recognitions[i].precision
        return max.item() if torch.is_tensor(max) else max

    def getMinPrecision(self, child=None):
        min=9999999.99
        if child is None:
            for i in range(len(self.detections)):
                if min>self.detections[i].precision:
                    min=self.detections[i].precision
        else:
            if child=="plateDetection":
                for i in range(len(self.plates_detections)):
                    if min>self.plates_detections[i].precision:
                        min=self.plates_detections[i].precision
            elif child=="plateRecognition":
                for i in range(len(self.plates_recognitions)):
                        if min>self.plates_recognitions[i].precision:
                            min=self.plates_recognitions[i].precision
        return 0.0 if min==9999999.99 else min.item() if torch.is_tensor(min) else min

    def getAverageInferenceSpeed(self, child=None):
        sum=0.0
        avg=0.0
        addedFrames=[]
        if child is None:
            if len(self.detections)>0:
                for i in range(len(self.detections)):
                    if self.detections[i].frame not in addedFrames:
                        sum+=self.detections[i].speed
                        addedFrames.append(self.detections[i].frame)
                avg = sum/len(addedFrames) if len(addedFrames)>0 else 0.0
        else:
            if child=="plateDetection":
                if len(self.plates_detections)>0:
                    for i in range(len(self.plates_detections)):
                        if self.plates_detections[i].frame not in addedFrames:
                            sum+=self.plates_detections[i].speed
                            addedFrames.append(self.plates_detections[i].frame)
                    avg = sum/len(addedFrames) if len(addedFrames)>0 else 0.0
            elif child=="plateRecognition":
                if len(self.plates_recognitions)>0:
                    for i in range(len(self.plates_recognitions)):
                        if self.plates_recognitions[i].frame not in addedFrames:
                            sum+=self.plates_recognitions[i].speed
                            addedFrames.append(self.plates_recognitions[i].frame)
                    avg = sum/len(addedFrames) if len(addedFrames)>0 else 0.0
        return avg

    def getMaxInferenceSpeed(self, child=None):
        max=0.0
        addedFrames=[]
        if child is None:
            for i in range(len(self.detections)):
                if max<self.detections[i].speed and self.detections[i].frame not in addedFrames:
                    max=self.detections[i].speed
                    addedFrames.append(self.detections[i].frame)
        else:
            if child=="plateDetection":
                for i in range(len(self.plates_detections)):
                    if max<self.plates_detections[i].speed and self.plates_detections[i].frame not in addedFrames:
                        max=self.plates_detections[i].speed
                        addedFrames.append(self.plates_detections[i].frame)
            elif child=="plateRecognition":
                for i in range(len(self.plates_recognitions)):
                    if max<self.plates_recognitions[i].speed and self.plates_recognitions[i].frame not in addedFrames:
                        max=self.plates_recognitions[i].speed
                        addedFrames.append(self.plates_recognitions[i].frame)
        return max

    def getMinInferenceSpeed(self, child=None):
        min=9999999.99
        addedFrames=[]
        if child is None:
            for i in range(len(self.detections)):
                if min>self.detections[i].speed and self.detections[i].frame not in addedFrames:
                    min=self.detections[i].speed
                    addedFrames.append(self.detections[i].frame)
        else:
            if child=="plateDetection":
                for i in range(len(self.plates_detections)):
                    if min>self.plates_detections[i].speed and self.plates_detections[i].frame not in addedFrames:
                        min=self.plates_detections[i].speed
                        addedFrames.append(self.plates_detections[i].frame)
            elif child=="plateRecognition":
                for i in range(len(self.plates_recognitions)):
                    if min>self.plates_recognitions[i].speed and self.plates_recognitions[i].frame not in addedFrames:
                        min=self.plates_recognitions[i].speed
                        addedFrames.append(self.plates_recognitions[i].frame)
        return 0.0 if min==9999999.99 else min

    def filterPrecisionsBelow(self, minPrecision, child=None):
        newDetections = []
        if child is None:
            for i in range(len(self.detections)):
                if self.detections[i].precision>=minPrecision:
                    newDetections.append(self.detections[i])
            self.detections = newDetections
        else:
            if child=="plateDetection":
                for i in range(len(self.plates_detections)):
                    if self.plates_detections[i].precision>=minPrecision:
                        newDetections.append(self.plates_detections[i])
                self.plates_detections = newDetections
            elif child=="plateRecognition":
                for i in range(len(self.plates_recognitions)):
                    if self.plates_recognitions[i].precision>=minPrecision:
                        newDetections.append(self.plates_recognitions[i])
                self.plates_recognitions = newDetections

    def toString(self, withPlates=True, withTexts=True):
        text = ""
        text += f'Model: {self.reference} \n'
        text += f'Best Precision: {self.getMaxPrecision():.2f} \n'
        text += f'Average Precision: {self.getAveragePrecision():.2f}s \n'
        text += f'Worst Precision: {self.getMinPrecision()} \n'
        text += f'Best Inference Speed: {self.getMinInferenceSpeed():.5f}s \n'
        text += f'Best Inference Speed: {self.getAverageInferenceSpeed():.5f}s \n'
        text += f'Worst Inference Speed: {self.getMaxInferenceSpeed():.5f}s \n'
        text += f'Total Detections: {self.detectionsCount()} \n'
        text += f'Total Inferences: {self.inferencesCount()}'

        if withPlates==True:
            text += "\n\n"
            text += f'Plate Best Precision: {self.getMaxPrecision("plateDetection"):.2f} \n'
            text += f'Plate Average Precision: {self.getAveragePrecision("plateDetection"):.2f}s \n'
            text += f'Plate Worst Precision: {self.getMinPrecision("plateDetection")} \n'
            text += f'Plate Best Inference Speed: {self.getMinInferenceSpeed("plateDetection"):.5f}s \n'
            text += f'Plate Best Inference Speed: {self.getAverageInferenceSpeed("plateDetection"):.5f}s \n'
            text += f'Plate Worst Inference Speed: {self.getMaxInferenceSpeed("plateDetection"):.5f}s \n'
            text += f'Plate Total Detections: {self.detectionsCount("plateDetection")} \n'
            text += f'Plate Total Inferences: {self.inferencesCount("plateDetection")}'

        if withTexts==True:
            text += "\n\n"
            text += f'Plate Text Best Precision: {self.getMaxPrecision("plateRecognition"):.2f} \n'
            text += f'Plate Text Average Precision: {self.getAveragePrecision("plateRecognition"):.2f}s \n'
            text += f'Plate Text Worst Precision: {self.getMinPrecision("plateRecognition")} \n'
            text += f'Plate Text Best Inference Speed: {self.getMinInferenceSpeed("plateRecognition"):.5f}s \n'
            text += f'Plate Text Best Inference Speed: {self.getAverageInferenceSpeed("plateRecognition"):.5f}s \n'
            text += f'Plate Text Worst Inference Speed: {self.getMaxInferenceSpeed("plateRecognition"):.5f}s \n'
            text += f'Plate Text Total Detections: {self.detectionsCount("plateRecognition")} \n'
            text += f'Plate Text Total Inferences: {self.inferencesCount("plateRecognition")}'

        text += "\n\n"
        return text

    def printDetections(self):
        for i in range(len(self.detections)):
            print(self.detections[i].toString())
            print()

    def printPlateDetections(self):
        for i in range(len(self.plates_detections)):
            print(self.plates_detections[i].toString())
            print()

    def printPlateRecognitions(self):
        for i in range(len(self.plates_recognitions)):
            print(self.plates_recognitions[i].toString())
            print()
    
    def printResults(self):
        print(self.toString())

# This class represents a Detection Result, it is saved into the detections array in the Detection Model object
class DetectionResult():
    def __init__(self, model, label, precision, speed, device=None, source=None, frame=None, id=None, boundingBox=None, details=None, content=None):
        self.model = model
        self.label = label
        self.precision = precision.item() if torch.is_tensor(precision) else precision
        self.speed = speed.item() if torch.is_tensor(speed) else speed
        self.content = content
        self.device = device
        self.source = source
        self.frame = frame
        self.boundingBox = boundingBox
        self.details = details
        self.id = id

    def toString(self):
        text = ""
        text += f'ID: {self.id}, '
        text += f'Model: {self.model}, '
        text += f'Label: {self.label}, '
        text += f'Precision: {self.precision:.2f}%, '
        text += f'Speed: {self.speed:.3f}s, '
        text += f'Content: {self.content}, '
        text += f'Device: {self.device}, '
        text += f'Source: {self.source}, '
        text += f'Frame: {self.frame}, '
        text += f'BoundingBox: {self.boundingBox[0]} {self.boundingBox[1]} {self.boundingBox[2]} {self.boundingBox[3]}, ' if self.boundingBox is not None else ""
        text += f'Details: {self.details}'
        return text

print("Definitions initialized successfully!")

Definitions initialized successfully!


# Initialize (and download) detection models

In [7]:
# This defines all the models we want to work with on vehicle detection, we can import any model we want above and add it in this array eventually.
# Defining a new model takes the concept name as the first parameter, the backbone name as the second parameter, the model object in the third, the pretrained weights name in fourth,  a custom given name for the model as fifth parameter and a color tupel in the last parameter.
VEHICLE_DETECTION_MODELS = [
    DetectionModel("FCOS", "resnet50_fpn", fcos_resnet50_fpn, FCOS_ResNet50_FPN_Weights, "fcos_resnet", (0,234,255)),
    DetectionModel("RetinaNet", "resnet50_fpn", retinanet_resnet50_fpn, RetinaNet_ResNet50_FPN_Weights, "retinanet_resnet", (255,255,0)),
    DetectionModel("RetinaNet", "resnet50_fpn_v2", retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights, "retinanet_resnet_v2", (0,234,255)),
    DetectionModel("FasterRCNN", "resnet50_fpn", fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights, "faster_rcnn_resnet", (170,0,255)),
    DetectionModel("FasterRCNN", "resnet50_fpn_v2", fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights, "faster_rcnn_resnet_v2", (255,127,0)),
    DetectionModel("FasterRCNN", "mobilenet_v3_large_fpn", fasterrcnn_mobilenet_v3_large_fpn, FasterRCNN_MobileNet_V3_Large_FPN_Weights, "faster_rcnn_mobilenet_v3", (191,255,0)),
    DetectionModel("FasterRCNN", "mobilenet_v3_large_320_fpn", fasterrcnn_mobilenet_v3_large_320_fpn, FasterRCNN_MobileNet_V3_Large_320_FPN_Weights, "faster_rcnn_mobilenet_v3_320", (0,149,255)),
    DetectionModel("MaskRCNN", "resnet50_fpn", maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights, "mask_rcnn_resnet", (255,0,170)),
    DetectionModel("MaskRCNN", "resnet50_fpn_v2", maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights, "mask_rcnn_resnet_v2", (255,212,0)),
    DetectionModel("SSD300", "vgg16", ssd300_vgg16, SSD300_VGG16_Weights, "ssd_vgg16", (106,255,0)),
    DetectionModel("SSDLite320", "mobilenet_v3_large", ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights, "ssd_mobilenet_v3", (0,64,255)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov5nu.pt', "yolov5nu", (237,185,185)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov5su.pt', "yolov5su", (185,215,237)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov5mu.pt', "yolov5mu", (231,233,185)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov5lu.pt', "yolov5lu", (220,185,237)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov5xu.pt', "yolov5xu", (185,237,224)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov5n6u.pt', "yolov5n6u", (237,185,185)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov5s6u.pt', "yolov5s6u", (170,0,255)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov5m6u.pt', "yolov5m6u", (143,106,35)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov5l6u.pt', "yolov5l6u", (107,35,143)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov5x6u.pt', "yolov5x6u", (237,185,185)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov8n.pt', "yolov8n", (185,237,224)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov8s.pt', "yolov8s", (237,185,185)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov8m.pt', "yolov8m", (255,255,0)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov8l.pt', "yolov8l", (255,0,0)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov8x.pt', "yolov8x", (255,255,0)),
    # DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov9t.pt', "yolov9t", (220,185,237)),
    # DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov9s.pt', "yolov9s", (185,215,237)),
    # DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov9m.pt', "yolov9m", (237,185,185)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov9c.pt', "yolov9c", (0,234,255)),
    DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov9e.pt', "yolov9e", (170,0,255))#,
    # DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov10n.pt', "yolov10n", (185,237,224)),
    # DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov10s.pt', "yolov10s", (237,185,185)),
    # DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov10m.pt', "yolov10m", (255,255,0)),
    # DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov10b.pt', "yolov10b", (220,185,237)),
    # DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov10l.pt', "yolov10l", (255,0,0)),
    # DetectionModel("YOLO", "csp_darknet53", YOLO, 'yolov10x.pt', "yolov10x", (255,255,0))
]
print("Successfully initialized (and imported) vehicle detectors!")

Successfully initialized (and imported) vehicle detectors!


In [8]:
# Plate detection models
PLATE_DETECTION_MODELS = [
    DetectionModel("YOLO", "csp_darknet53", YOLO, "best_plate_model.pt", "best_plate_model", (220,185,237), True),
    DetectionModel("YOLO", "csp_darknet53", YOLO, "vehicle_license_best.pt", "vehicle_license_best", (170,0,255), True),
    DetectionModel("YOLO", "csp_darknet53", YOLO, "best_3.pt", "best_3", (185,237,224), True),
    DetectionModel("YOLO", "csp_darknet53", YOLO, "best_4.pt", "best_4", (220,185,237), True),
    DetectionModel("YOLO", "csp_darknet53", YOLO, "best_5.pt", "best_5", (185,215,237), True)
]
for i in range(len(PLATE_DETECTION_MODELS)):
    if PLATE_DETECTION_MODELS[i].custom==True and os.path.isfile(PLATE_DETECTION_MODELS[i].weights):
        PLATE_DETECTION_MODELS[i].model = PLATE_DETECTION_MODELS[i].model(PLATE_DETECTION_MODELS[i].weights)
print("Successfully initialized plate detectors!")

Successfully initialized plate detectors!


In [9]:
# Plate text models
PLATE_RECOGNITION_MODELS = [
    DetectionModel("YOLO", "csp_darknet53", YOLO, "plate_char/yolov8x/weights/best.pt", "plate_char", (220,185,237), True),
    DetectionModel("YOLO", "csp_darknet53", YOLO, "charenyeni/yolov8x/weights/best.pt", "charenyeni", (185,237,224), True),
    DetectionModel("OCR", "tesseract", pytesseract, "", "pytesseract", (220,185,237), True),
    DetectionModel("OCR", "easyocr", reader, "", "easyocr", (220,185,237), True)
]
for i in range(len(PLATE_RECOGNITION_MODELS)):
    if PLATE_RECOGNITION_MODELS[i].custom==True and os.path.isfile(PLATE_RECOGNITION_MODELS[i].weights):
        PLATE_RECOGNITION_MODELS[i].model = PLATE_RECOGNITION_MODELS[i].model(PLATE_RECOGNITION_MODELS[i].weights)
print("Successfully initialized plate recognizers!")

Successfully initialized plate recognizers!


# Recognize License Plates Text

In [10]:
# get grayscale image
def grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# otsu thresholding
def threshold(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

In [11]:
def recognize_plates(i, j, k, processed_frame, detection_plate, detection):
    currentModel = VEHICLE_DETECTION_MODELS[i]
    currentPlateModel = PLATE_DETECTION_MODELS[j]
    currentTextModel = PLATE_RECOGNITION_MODELS[k]
    
    x1, y1, x2, y2 = [int(x) for x in detection[0]]
    a1, b1, a2, b2 = [int(x) for x in detection_plate[0]]

    frame = processed_frame.copy()
    license_frame = frame[y1+b1:y1+b2,x1+a1:x1+a2]

    final_text = ""
    final_precision = 0.0
    final_speed = 0.0

    if currentTextModel.concept=="OCR":
        preprocessed_frame = threshold(grayscale(license_frame))

        if currentTextModel.reference=="pytesseract":
            start_time = time.time()
            results = currentTextModel.model.image_to_data(preprocessed_frame, config='-c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
            final_speed = time.time() - start_time

            parsedLines = results.split('\n')
            best_depth = len(parsedLines)-2
            for line in parsedLines:
                params = line.split()
                if len(params)==12 and float(params[10].replace('conf','0.0'))>=final_precision:
                    final_precision = float(params[10].replace('conf','0.0'))
                    final_text = params[11] if params[11]!="text" else "-"

            cv2.putText(frame, '{} ({:.3f}s) {} {:.2f}%'.format(final_text, final_speed, currentTextModel.reference, final_precision),(x1+a1+10,y1+b2+25),0,0.8,currentTextModel.color,1)
            cv2.putText(frame, '{} ({:.3f}s) {:.2f}%'.format(final_text, final_speed+detection_plate[3]+detection[3], (final_precision+detection_plate[2]+detection[2])/3),(x1+10,y2+25),0,0.9,currentModel.color,2)
        elif currentTextModel.reference=="easyocr":
            start_time = time.time()
            result = currentTextModel.model.readtext(preprocessed_frame)
            final_speed = time.time() - start_time
            for res in result:
                if res[2]*100>final_precision:
                    final_precision = res[2]*100
                    final_text = res[1]
                    
            cv2.putText(frame, '{} ({:.3f}s) {} {:.2f}%'.format(final_text, final_speed, currentTextModel.reference, final_precision),(x1+a1+10,y1+b2+25),0,0.8,currentTextModel.color,1)
    else:
        plate_parts=[]
        plate_text=""
        avg_conf = 0.0
    
        start_time = time.time()
        results = currentTextModel.model(license_frame, agnostic_nms=True, verbose=False, device=device)[0]
        final_speed = time.time() - start_time

        counter = 0
        for result in results:
            detection_count = result.boxes.shape[0]
            for i in range(detection_count):
                cls = int(result.boxes.cls[i].item())
                name = result.names[cls]
                confidence = float(result.boxes.conf[i].item())
                bounding_box = result.boxes.xyxy[i].cpu().numpy()
                m1, n1, m2, n2 = [int(x) for x in bounding_box]
                # Ensure m1 < m2 and n1 < n2
                m1, m2 = min(m1, m2), max(m1, m2)
                n1, n2 = min(n1, n2), max(n1, n2)
                if confidence>currentModel.confidence_threshold_plate:
                    plate_parts.append([m1,n1,name,confidence])
                    cv2.rectangle(frame,(x1+a1+m1,y1+b1+n1),(x1+a1+m2,y1+b1+n2),currentTextModel.color,1)
                    cv2.putText(frame, '{}'.format(name),(x1+a1+m1-20,y1+b1+n2+25),0,0.9,currentTextModel.color,1)
                    # cv2.putText(frame, '{:.2f}%'.format(confidence*100),(x1+a1+m1,y1+b1+n1-100-15*counter),0,0.7,currentTextModel.color,1)
                    counter+=1

        sum_confidence = 0.0
        if len(plate_parts)>0:
            sorted_list = sorted(plate_parts,key=lambda l:l[0])
            for part in range(len(sorted_list)):
                if sorted_list[part][2]!="undefined":
                    plate_text+=sorted_list[part][2]
                    sum_confidence += sorted_list[part][3]
            final_precision = sum_confidence / len(sorted_list) * 100 if len(sorted_list)>0 else 0.0
            final_text = plate_text

        cv2.putText(frame, '{} ({:.3f}s) {} {:.2f}%'.format(final_text, final_speed, currentTextModel.reference, final_precision),(x1+a1+10,y1+b2+35),0,0.8,currentTextModel.color,1)

    # Add result to the statistics
    VEHICLE_DETECTION_MODELS[i].addPlateRecognition(DetectionResult(model=currentTextModel.reference, label=final_text, precision=final_precision, 
                                                                    speed=final_speed, device=detection_plate[4], source=detection_plate[5], 
                                                                    frame=detection_plate[6], id=f'{detection[6]}{i}{j}{k}', boundingBox=None, 
                                                                    details=f'Pipeline Speed: {final_speed+detection_plate[3]+detection[3]:.5f}s'))
    
    cv2.putText(frame, '{} ({:.3f}s) {:.2f}%'.format(final_text, final_speed+detection_plate[3]+detection[3], (final_precision+detection_plate[2]+detection[2])/3),(x1+10,y2+25),0,0.9,currentModel.color,2)

    # Outputs frames as a smooth video and does not allow prints
    _, display_frame = cv2.imencode('.jpeg', frame)
    display_handle.update(IPython.display.Image(data=display_frame.tobytes()))
    IPython.display.clear_output(wait=True)

In [12]:
def prepare_plate_recognition(i, j, processed_frame, detection_plate, detection):
    if VEHICLE_DETECTION_MODELS[i].textModel>-1:
        recognize_plates(i, j, VEHICLE_DETECTION_MODELS[i].textModel, processed_frame, detection_plate, detection)
    else:
        for k in range(len(PLATE_RECOGNITION_MODELS)):
            recognize_plates(i, j, k, processed_frame, detection_plate, detection)

# Detect License Plates

In [13]:
def detect_plates(i, j, processed_frame, detection):
    currentModel = VEHICLE_DETECTION_MODELS[i]
    currentPlateModel = PLATE_DETECTION_MODELS[j]

    frame = processed_frame.copy()
    
    x1, y1, x2, y2 = [int(x) for x in detection[0]]
    vehicle_frame = frame[y1:y2,x1:x2]
    
    start_time = time.time()
    results = currentPlateModel.model(vehicle_frame, agnostic_nms=True, verbose=False, device=device)[0]
    processing_time = time.time() - start_time 
    for result in results:
        detection_count = result.boxes.shape[0]
        for k in range(detection_count):
            cls = int(result.boxes.cls[k].item())
            name = result.names[cls]
            confidence = float(result.boxes.conf[k].item())
            bounding_box = result.boxes.xyxy[k].cpu().numpy()

            if confidence > currentModel.confidence_threshold_plate:
                a1, b1, a2, b2 = [int(x) for x in bounding_box]
                # Ensure a1 < a2 and b1 < b2
                a1, a2 = min(a1, a2), max(a1, a2)
                b1, b2 = min(b1, b2), max(b1, b2)

                label_text = f'{currentPlateModel.reference} ({processing_time:.3f}s) {confidence*100:.2f}% {name.upper()}'
                
                cv2.rectangle(frame, (x1+a1, y1+b1), (x1+a2, y1+b2),currentPlateModel.color,1)
                cv2.putText(frame, label_text,(x1+a1-10,y1+b1-15),0,0.8,currentPlateModel.color,1)

                # Add result to the statistics
                detection_plate = [bounding_box, name, confidence*100, processing_time, detection[4], detection[5], detection[6]]
                VEHICLE_DETECTION_MODELS[i].addPlateDetection(DetectionResult(model=currentPlateModel.reference, label=detection_plate[1], precision=detection_plate[2], 
                                                                              speed=detection_plate[3], device=detection_plate[4], source=detection_plate[5], 
                                                                              frame=detection[6], id=f'{detection[6]}{i}{j}', boundingBox=detection[0], 
                                                                              details=label_text))

                # Outputs frames as a smooth video and does not allow prints
                _, display_frame = cv2.imencode('.jpeg', frame)
                display_handle.update(IPython.display.Image(data=display_frame.tobytes()))
                IPython.display.clear_output(wait=True)
                
                prepare_plate_recognition(i, j, frame, detection_plate, detection)

In [14]:
def prepare_plate_detection(i, processed_frame, detection):
    if VEHICLE_DETECTION_MODELS[i].plateModel>-1:
        detect_plates(i, VEHICLE_DETECTION_MODELS[i].plateModel, processed_frame, detection)
    else:
        for j in range(len(PLATE_DETECTION_MODELS)):
            detect_plates(i, j, processed_frame, detection)

# Detect vehicles

In [15]:
def handle_vehicle_detection(i, processed_frame, detection):
    currentModel = VEHICLE_DETECTION_MODELS[i]
    
    # Extract bounding boxes
    x1, y1, x2, y2 = [int(x) for x in detection[0]]

    # Ensure x1 < x2 and y1 < y2
    x1, x2 = min(x1, x2), max(x1, x2)
    y1, y2 = min(y1, y2), max(y1, y2)

    # Build up the label text out of class name, confidence score and inference speed
    label_text = '{} ({:.3f}s) {:.2f}% {}'.format(currentModel.reference, detection[3], detection[2], detection[1].upper())

    # Add result to the statistics
    VEHICLE_DETECTION_MODELS[i].addDetection(DetectionResult(model=currentModel.reference, label=detection[1], precision=detection[2], 
                                                             speed=detection[3], device=detection[4], source=detection[5], 
                                                             frame=detection[6], id=f'{detection[6]}{i}', boundingBox=detection[0], 
                                                             details=label_text))

    # Draw bounding boxes and label on the frame
    cv2.rectangle(processed_frame, (x1, y1), (x2, y2), currentModel.color, 1)
    cv2.putText(processed_frame, label_text, (x1+5, y1+20), 0, 0.7, currentModel.color, 1)

    # Display the results on the image
    _, display_frame = cv2.imencode('.jpeg', processed_frame)
    display_handle.update(IPython.display.Image(data=display_frame.tobytes()))
    IPython.display.clear_output(wait=True)

    # For further processing of a detected vehicle object
    prepare_plate_detection(i, processed_frame, detection)

In [16]:
def detect_vehicles(i, plateModel, textModel, frame, frameIndex=-1, source=None, confidence_threshold=0.5, confidence_threshold_plate=0.3):
    # Define current model
    VEHICLE_DETECTION_MODELS[i].plateModel = plateModel
    VEHICLE_DETECTION_MODELS[i].textModel = textModel
    VEHICLE_DETECTION_MODELS[i].confidence_threshold = confidence_threshold
    VEHICLE_DETECTION_MODELS[i].confidence_threshold_plate = confidence_threshold_plate
    currentModel = VEHICLE_DETECTION_MODELS[i]

    if currentModel.custom==False or (currentModel.custom==True and os.path.isfile(currentModel.weights)):
    
        # Keep original input intact to avoid different models outputs being accumulately overwritten
        processed_frame = frame.copy()
    
        if currentModel.concept=="YOLO":
            # Handle a YOLO model object detection
            start_time = time.time()
            results = currentModel.model(processed_frame, agnostic_nms=True, verbose=False, device=device)[0]
            processing_time = time.time() - start_time
    
            # Iterate through results
            for result in results:
                detection_count = result.boxes.shape[0]
    
                # Iterate through all detections
                for j in range(detection_count):
                    cls = int(result.boxes.cls[j].item())
                    name = result.names[cls] # Extract class name
                    confidence = float(result.boxes.conf[j].item()) # Extract confidence score
                    bounding_box = result.boxes.xyxy[j].cpu().numpy() # Extract bounding boxes
                    
                    # Vehicle detected, proceed
                    if name in target_classes and confidence>confidence_threshold:
                        detection = [bounding_box, name, confidence*100, processing_time, device, source, frameIndex]
                        handle_vehicle_detection(i, processed_frame, detection)
        else:
            # Handle a Pytorch model
            
            # Preprocessing: Convert frame to RGB and PIL Image, then apply transformation
            frame_rgb = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(frame_rgb)
            image = transform(pil_image).unsqueeze(0).to(device)
        
            # Object detection
            start_time = time.time()
            with torch.no_grad():
                predictions = currentModel.model(image)
        
            # Processing (inference) time
            processing_time = time.time() - start_time
            
            # Visualization
            scores = predictions[0]['scores']
            boxes = predictions[0]['boxes']
            labels = predictions[0]['labels']
            
            # For each detection
            for confidence, bounding_box, label in zip(scores, boxes, labels):
                name = currentModel.weights.DEFAULT.meta["categories"][label.item()]
                if name in target_classes and confidence>confidence_threshold:
                    detection = [bounding_box, name, confidence*100, processing_time, device, source, frameIndex]
                    handle_vehicle_detection(i, processed_frame, detection)

# Run The Pipeline

In [17]:
# Parameters for input
target_classes = ["truck", "bus", "car", "train", "bicycle"]

videos = [
    "data/video/sample.MP4", 
    "data/video/vecteezy_car-and-truck-traffic-on-the-highway-in-europe-poland_7957364.MP4", 
    "data/video/2034115-hd_1920_1080_30fps.MP4"
]

images = [
    "../samples/truck.jpg"
]

inputType = "video"
inputIndex = 0 # Which index in the input array?

vehicleModel = -1 # Use a specific model for vehicle detection (Use index of the array)
plateModel = -1 # Use a specific model for license plate detection
textModel = -1 # Use a specific model for license plate recognition

confidence_threshold = 0.5
confidence_threshold_plate = 0.3
every = 60
safety_limit= 2000

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Transforms input image to tensor, the models usually automatically resize inputs based on the trained dataset resolution.
transform = transforms.Compose([transforms.ToTensor()])

def handleFrame(frame, frameIndex=-1, source=None, confidence_threshold=0.5, confidence_threshold_plate=0.3):
    if vehicleModel>-1:
        detect_vehicles(vehicleModel, plateModel, textModel, frame, frameIndex, source, confidence_threshold, confidence_threshold_plate)
    else:
        for i in range(len(VEHICLE_DETECTION_MODELS)):
            detect_vehicles(i, plateModel, textModel, frame, frameIndex, source, confidence_threshold, confidence_threshold_plate)

def handleRelease():
    cam.release()
    print("Source released.")

source = videos[inputIndex] if inputType=="video" else images[inputIndex] if inputType=="image" else 0
cam = cv2.VideoCapture(source) if inputType!="image" else cv2.imread(source, cv2.IMREAD_COLOR)
display_handle=display(None, display_id=True)
start = 0 if inputType=="video" else -1
end = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) if inputType=="video" else -1
if inputType=="video": cam.set(1, start)
frameCount=start
while_safety=0

if inputType=="image":
    handleFrame(cam, frameCount, source, confidence_threshold, confidence_threshold_plate)
else:
    if not cam.isOpened(): print("Error: Could not open source.")
    else:
        try:
            while True if end<0 else frameCount<end:
                _, frame = cam.read()
                if frame is None:
                    if source!=0:
                        if while_safety > safety_limit: break
                        while_safety += 1
                        continue
                    else:
                        print("Error: Could not capture frame.")
                        cam.release()
                        break
                        
                if every>0:
                    if (frameCount+1)%math.floor(every) == 0:
                        while_safety = 0
                        handleFrame(frame, frameCount, source, confidence_threshold, confidence_threshold_plate)
                else:
                    while_safety=0
                    handleFrame(frame, frameCount, source, confidence_threshold, confidence_threshold_plate)
                frameCount += 1
            handleRelease()
        except KeyboardInterrupt:
            handleRelease()
        except:
            print("Unknown error.")
            try:
                cam.release()
                raise TypeError("Error: Source could not be released.")
            except:
                pass
            traceback.print_exc() 

Source released.


In [None]:
VEHICLE_DETECTION_MODELS[0].printResults()

In [None]:
VEHICLE_DETECTION_MODELS[0].printDetections()

In [None]:
VEHICLE_DETECTION_MODELS[0].printPlateDetections()

In [None]:
VEHICLE_DETECTION_MODELS[0].printPlateRecognitions()

# Output handling

In [None]:
# The output of the experiment result is processed below
print()
print(device)
print(source)
print()

BestPrecision=None
BestAvgPrecision=None
WorstPrecision=None
BestSpeed=None
BestAvgSpeed=None
WorstSpeed=None

Models=[]
BestPrecisions=[]
AvgPrecisions=[]
WorstPrecisions=[]
BestInferenceSpeeds=[]
AvgInferenceSpeeds=[]
WorstInferenceSpeeds=[]
TotalDetectionsArr=[]
TotalInferencesArr=[]

for i in range(len(VEHICLE_DETECTION_MODELS)):
    currentModel = VEHICLE_DETECTION_MODELS[i]
    
    # You can uncomment this to see details of every model
    # currentModel.printResults()

    Models.append(currentModel.reference)
    BestPrecisions.append(currentModel.getMaxPrecision())
    AvgPrecisions.append(currentModel.getAveragePrecision())
    WorstPrecisions.append(currentModel.getMinPrecision())
    BestInferenceSpeeds.append(currentModel.getMinInferenceSpeed())
    AvgInferenceSpeeds.append(currentModel.getAverageInferenceSpeed())
    WorstInferenceSpeeds.append(currentModel.getMaxInferenceSpeed())
    TotalDetectionsArr.append(currentModel.detectionsCount())
    TotalInferencesArr.append(currentModel.inferencesCount())
    
    if BestPrecision is None or BestPrecision.getMaxPrecision()<=currentModel.getMaxPrecision():
        BestPrecision = currentModel
    if BestAvgPrecision is None or BestAvgPrecision.getAveragePrecision()<=currentModel.getAveragePrecision():
        BestAvgPrecision = currentModel
    if WorstPrecision is None or WorstPrecision.getMinPrecision()>=currentModel.getMinPrecision():
        WorstPrecision = currentModel
    if BestSpeed is None or BestSpeed.getMinInferenceSpeed()>=currentModel.getMinInferenceSpeed():
        BestSpeed = currentModel
    if BestAvgSpeed is None or BestAvgSpeed.getAverageInferenceSpeed()>=currentModel.getAverageInferenceSpeed():
        BestAvgSpeed = currentModel
    if WorstSpeed is None or WorstSpeed.getMaxInferenceSpeed()<=currentModel.getMaxInferenceSpeed():
        WorstSpeed = currentModel
        
if BestPrecision is not None: 
    print("Best Precision:")
    BestPrecision.printResults()
if BestAvgPrecision is not None: 
    print("Best Average Precision:")
    BestAvgPrecision.printResults()
if WorstPrecision is not None: 
    print("Worst Precision:")
    WorstPrecision.printResults()
if BestSpeed is not None: 
    print("Best Speed:")
    BestSpeed.printResults()
if BestAvgSpeed is not None: 
    print("Best Average Speed:")
    BestAvgSpeed.printResults()
if WorstSpeed is not None: 
    print("Worst Speed:")
    WorstSpeed.printResults()

In [None]:
# This can be executed then the output is copied and used to represent the data as diagrams and tables
print("models=",Models)
print("best_precisions=",BestPrecisions)
print("avg_precisions=",AvgPrecisions)
print("worst_precisions=",WorstPrecisions)
print("best_inference_speeds=",BestInferenceSpeeds)
print("avg_inference_speeds=",AvgInferenceSpeeds)
print("worst_inference_speeds=",WorstInferenceSpeeds)
print("total_detections=",TotalDetectionsArr)
print("total_inferences=",TotalInferencesArr)

# After we copy the output from the Detection Experiment Analysis, we can run it in another cell, then use the code below to save and display the data. I still wanted to make the primary data also part of the output from the previous code, like the device used, source input path and parameters like framerate, threshold and target classes.

In [None]:
# You can paste the array definitions from the output above in this cell and run it
models= ['fcos_resnet', 'retinanet_resnet', 'retinanet_resnet_v2', 'faster_rcnn_resnet', 'faster_rcnn_resnet_v2', 'faster_rcnn_mobilenet_v3', 'faster_rcnn_mobilenet_v3_320', 'mask_rcnn_resnet', 'mask_rcnn_resnet_v2', 'ssd_vgg16', 'ssd_mobilenet_v3', 'yolov5nu', 'yolov5su', 'yolov5mu', 'yolov5lu', 'yolov5xu', 'yolov5n6u', 'yolov5s6u', 'yolov5m6u', 'yolov5l6u', 'yolov5x6u', 'yolov8n', 'yolov8s', 'yolov8m', 'yolov8l', 'yolov8x', 'yolov9c', 'yolov9e']
best_precisions= [64.07835388183594, 0.0, 55.489200592041016, 95.16394805908203, 98.24671173095703, 89.18415069580078, 95.50997924804688, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
avg_precisions= [61.71642875671387, 0.0, 54.490304946899414, 74.99663543701172, 98.24671173095703, 89.18415069580078, 95.50997924804688, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
worst_precisions= [59.3545036315918, 0.0, 53.49140930175781, 54.829322814941406, 98.24671173095703, 89.18415069580078, 95.50997924804688, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
best_inference_speeds= [1.9180281162261963, 0.0, 1.7949974536895752, 1.5070006847381592, 2.358997344970703, 0.44583773612976074, 0.20199799537658691, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
avg_inference_speeds= [1.9180281162261963, 0.0, 1.7949974536895752, 1.5070006847381592, 2.358997344970703, 0.44583773612976074, 0.20199799537658691, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
worst_inference_speeds= [1.9180281162261963, 0.0, 1.7949974536895752, 1.5070006847381592, 2.358997344970703, 0.44583773612976074, 0.20199799537658691, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
total_detections= [2, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
total_inferences= [1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

# Generate a color map
num_models = len(models)
cmap = matplotlib.colormaps.get_cmap('nipy_spectral')
colors = [cmap(i / num_models) for i in range(num_models)]

# Create a figure and plot with unique colors
plt.figure(figsize=(14, 10))
for i, model in enumerate(models):
    plt.scatter(avg_inference_speeds[i], avg_precisions[i], color=colors[i], label=model, edgecolor='black')

print("DEVICE:", device)
print("SOURCE:", source)
print("SKIP EVERY", every, "FRAMES")
print("TOTALLY PROCESSED FRAMES:", max(total_inferences))
print("COCO Dataset, Classes:", target_classes)
print("THRESHOLD:", confidence_threshold*100, "%")
# Axis labels and plot title
plt.xlabel('Average Speed (seconds)')
plt.ylabel('Average Precision (%)')
plt.title('Model Performance: Average Inference Speed x Average Accuracy')
plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1), fontsize='small')
plt.grid(True)
# plt.savefig("Experiment1_Diagram.png", bbox_inches='tight')
plt.show()

# Set up the figure and axis for the table
fig, ax = plt.subplots(figsize=(14, 12))  # Adjust size as needed
ax.axis('tight')
ax.axis('off')

# The table data: transpose the array to make each column a different metric
table_data = np.transpose([models, best_precisions, avg_precisions, worst_precisions])

# Create the table in the plot
table = ax.table(cellText=table_data, colLabels=["Model", "Best Precision (%)", "Average Precision (%)", "Worst Precision (%)"],
                 cellLoc='center', loc='center', colColours=["palegreen"] * 4)
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)  # Scale table size

plt.title("Model Precision Metrics")
# plt.savefig("Experiment1_Precision_Table.png", bbox_inches='tight')
plt.show()

# Set up the figure and axis for the table
fig, ax = plt.subplots(figsize=(14, 12))  # Adjust size as needed
ax.axis('tight')
ax.axis('off')

# The table data: transpose the array to make each column a different metric
table_data = np.transpose([models, best_inference_speeds, avg_inference_speeds, worst_inference_speeds])

# Create the table in the plot
table = ax.table(cellText=table_data, colLabels=["Model", "Best Inference Speed (s)", "Average Inference Speed (s)", "Worst Inference Speed (s)"],
                 cellLoc='center', loc='center', colColours=["palegreen"] * 4)
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)  # Scale table size

plt.title("Model Inference Speed Metrics")
# plt.savefig("Experiment1_Time_Table.png", bbox_inches='tight')
plt.show()

# This code is used to remap the data as a latex table sorted by average precision in descending order.
data = []
for i in range(len(models)):
    data.append([models[i], f'{avg_precisions[i]:.2f}', f'{avg_inference_speeds[i]:.5f}'])

# LaTeX model names mapping
latex_model_names = {
    'fcos_resnet': 'FCOS ResNet50 FPN',
    'retinanet_resnet': 'RetinaNet ResNet50 FPN',
    'retinanet_resnet_v2': 'RetinaNet ResNet50 FPN V2',
    'faster_rcnn_resnet': 'Faster R-CNN ResNet50 FPN',
    'faster_rcnn_resnet_v2': 'Faster R-CNN ResNet50 FPN V2',
    'faster_rcnn_mobilenet_v3': 'Faster R-CNN MobileNet V3 L',
    'faster_rcnn_mobilenet_v3_320': 'Faster R-CNN MobileNet V3 L 320',
    'mask_rcnn_resnet': 'Mask R-CNN ResNet50',
    'mask_rcnn_resnet_v2': 'Mask R-CNN ResNet50 FPN V2',
    'ssd_vgg16': 'SSD VGG16',
    'ssd_mobilenet_v3': 'SSDLite MobileNet V3 Large',
    'yolov5nu': 'YOLOv5nu',
    'yolov5su': 'YOLOv5su',
    'yolov5mu': 'YOLOv5mu',
    'yolov5lu': 'YOLOv5lu',
    'yolov5xu': 'YOLOv5xu',
    'yolov5n6u': 'YOLOv5n6u',
    'yolov5s6u': 'YOLOv5s6u',
    'yolov5m6u': 'YOLOv5m6u',
    'yolov5l6u': 'YOLOv5l6u',
    'yolov5x6u': 'YOLOv5x6u',
    'yolov8n': 'YOLOv8n',
    'yolov8s': 'YOLOv8s',
    'yolov8m': 'YOLOv8m',
    'yolov8l': 'YOLOv8l',
    'yolov8x': 'YOLOv8x',
    'yolov9c': 'YOLOv9c',
    'yolov9e': 'YOLOv9e'
}

# Sort data by average precision in descending order
data_sorted = sorted(data, key=lambda x: float(x[1]), reverse=True)

# Generate LaTeX table
latex_table = "\\begin{table}[H]\n\\centering\n\\begin{tabular}{lrr}\n\\toprule\n"
latex_table += "\\multicolumn{3}{c}{A100 Experiment 1a: Vehicle Detection Models Performance}\\\\ \\cmidrule{1-3}\n"
latex_table += "Model & Average Precision (\\%) & Average Inference Speed (s)\\\\\n\\midrule\n"

best_prec_index = 0
best_speed = [99999.99, 0]
for i in range(len(data_sorted)):
    if float(data_sorted[i][2])<best_speed[0]:
        best_speed = [float(data_sorted[i][2]), i]

count = 0
for entry in data_sorted:
    model = latex_model_names[entry[0]]
    avg_precision = float(entry[1])
    avg_speed = float(entry[2])

    if count==best_prec_index:
        if count==best_speed[1]:
            latex_table += f"\\textbf{{{model}}} & \\textbf{{{avg_precision:.2f}}} & \\textbf{{{avg_speed:.5f}}} \\\\ \\addlinespace\n"
        else:
            latex_table += f"\\textbf{{{model}}} & \\textbf{{{avg_precision:.2f}}} & {avg_speed:.5f} \\\\ \\addlinespace\n"
    else:
        if count==best_speed[1]:
            latex_table += f"\\textbf{{{model}}} & {avg_precision:.2f} & \\textbf{{{avg_speed:.5f}}} \\\\ \\addlinespace\n"
        else:
            latex_table += f"{model} & {avg_precision:.2f} & {avg_speed:.5f} \\\\ \\addlinespace\n"

    count+=1

latex_table += "\\bottomrule\n\\end{tabular}\n\\caption{(DEVICE) Experiment (X)A: Vehicle Detection Models Performance}\n\\label{table:Device_Experiment_1A_Vehicle_Detection_Performance}\n\\end{table}"

print(latex_table)