## YOLO Evaluation Notebook

This notebook allows to evaluate the finetuned model with a portion of the training dataset

The only metric for this evaluation is IOU (Intersection over union), because the DETR is only generating bounding boxes, not the label.


In [1]:

import os
import json
from ultralytics import YOLO
import cv2
from tqdm.auto import tqdm
import torch
from PIL import Image

In [2]:
DATASET_DIR = "../hand-cursive-detr"
ANNOTATION_FILE_NAME = "_val_annotations_coco.json"
#HF_CACHE = "/home/ralvarez22/Documentos/llm_data/llm_cache"
DEVICE = "cuda"
CHECKPOINT = "../finetuned/yolo/Zani_V1/weights/last.pt"
CONFIDENCE_TRESHOLD = 0.5

In [3]:
DATASET_FILE = os.path.join(DATASET_DIR, ANNOTATION_FILE_NAME)

In [4]:
dataset_json = json.load(open(DATASET_FILE, "r"))

In [5]:
del dataset_json["info"]
del dataset_json["licenses"]
del dataset_json["categories"]

In [6]:
dataset_json.keys()

dict_keys(['images', 'annotations'])

In [7]:
images_with_boxes = []
for e in dataset_json["images"]:
    image_annotations = [
        x for x in dataset_json["annotations"] if x["image_id"] == e["id"]
    ]
    images_with_boxes.append(
        {"id": e["id"], "image": e["file_name"], "boxes": image_annotations}
    )

In [8]:
yolo_model = YOLO(CHECKPOINT)

In [9]:
def intersection_over_union(gt, pred):
    # determine the (x, y)-coordinates of the intersection rectangle
    x_a = max(gt[0], pred[0])
    y_a = max(gt[1], pred[1])
    x_b = min(gt[2], pred[2])
    y_b = min(gt[3], pred[3])
    # if there is no overlap between predicted and ground-truth box
    if x_b < x_a or y_b < y_a:
        return 0.0
    # compute the area of intersection rectangle
    inter_area = max(0, x_b - x_a + 1) * max(0, y_b - y_a + 1)
    # compute the area of both the prediction and ground-truth
    # rectangles
    box_a_area = (gt[2] - gt[0] + 1) * (gt[3] - gt[1] + 1)
    box_b_area = (pred[2] - pred[0] + 1) * (pred[3] - pred[1] + 1)
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the intersection area
    iou = inter_area / float(box_a_area + box_b_area - inter_area)
    # return the intersection over union value
    return iou

In [10]:
#yolo_results = yolo_model.predict(Image.open(os.path.join(DATASET_DIR, images_with_boxes[0]["image"])), conf=CONFIDENCE_TRESHOLD, imgsz=640)

In [11]:
#yolo_results[0].show()

In [12]:
def generate_bboxes(image):
    with torch.no_grad():
        yolo_results = yolo_model.predict(image, conf=CONFIDENCE_TRESHOLD, imgsz=640)
        print(len(yolo_results[0].boxes))
    return yolo_results[0].probs, yolo_results[0].boxes.xyxy

In [13]:
def get_image_iou(target_boxes, generated_boxes):
    image_iou = 0
    fmt_tgt_boxes = [ [ round(x[0], 4), round(x[1], 4), round(x[0] + x[2], 4), round(x[1] + x[3], 4) ]  for x in target_boxes.tolist()]
    pred_boxes = [ [ round(x[0], 4), round(x[1], 4), round(x[2], 4), round(x[3], 4) ] for x in generated_boxes.tolist() ]
    total_correct = 0
    total_false = 0
    false_negatives = 0
    for tgt_box in fmt_tgt_boxes:
        tgt_iou_items = [ intersection_over_union(tgt_box, x) for x in pred_boxes ]
        tgt_iou_items = [ x for x in tgt_iou_items if x > 0 ]
        # tgt_iou_items contiene todos los valores con los cuales hay un overlap entre boxes
        if len(tgt_iou_items) == 0:
            total_false += 1
            image_iou += 0
            continue
        
        max_iou_item = max(tgt_iou_items) # Valor maximo de iou para el item
        #print("IOU for item {} - {}".format(tgt_box, max_iou_item))
        if max_iou_item >= 0.5:
            # IOU valido del item
            image_iou += max_iou_item
            total_correct += 1
        elif max_iou_item > 0.3 and max_iou_item < 0.5:
            total_false += 1
        else:
            false_negatives += 1
        
    return image_iou, total_correct, total_false, false_negatives

In [14]:
acc_iou = 0
good_matches = 0
poor_matches = 0
false_neg = 0
total_ds_items = 0
for test_item in tqdm(images_with_boxes):
    # For every item, I test the expected or ground truth box with all the generated boxes from the model
    # The main reason is because the model ignores the 'num_queries' configuration and generates all the posible bounding boxes
    gt_boxes = torch.tensor([ x["bbox"] for x in test_item["boxes"] ], device=DEVICE)
    total_ds_items += len(gt_boxes)
    input_image = os.path.join(DATASET_DIR, test_item["image"])
    #image_pixels = cv2.imread(input_image)
    
    _, pred_boxes = generate_bboxes(Image.open(input_image).convert("RGB"))
    item_iou, tc, tf, fn = get_image_iou(gt_boxes, pred_boxes)
    
    acc_iou += item_iou
    good_matches += tc
    poor_matches += tf
    false_neg += fn

  0%|          | 0/479 [00:00<?, ?it/s]


0: 640x512 53 words, 50.8ms
Speed: 5.2ms preprocess, 50.8ms inference, 113.5ms postprocess per image at shape (1, 3, 640, 512)
53

0: 640x512 53 words, 10.9ms
Speed: 2.9ms preprocess, 10.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 512)
53

0: 640x512 59 words, 11.1ms
Speed: 2.8ms preprocess, 11.1ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 512)
59

0: 640x512 65 words, 8.9ms
Speed: 2.3ms preprocess, 8.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 512)
65

0: 640x512 56 words, 8.1ms
Speed: 2.1ms preprocess, 8.1ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 512)
56

0: 640x512 55 words, 11.9ms
Speed: 4.0ms preprocess, 11.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 512)
55

0: 640x512 44 words, 10.6ms
Speed: 2.6ms preprocess, 10.6ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 512)
44

0: 640x512 56 words, 10.0ms
Speed: 2.3ms preprocess, 10.0ms inference, 1.2ms postprocess per image

In [15]:
good_matches, poor_matches, false_neg,  acc_iou / (good_matches + poor_matches)

(27058, 241, 87, 0.8351137973963119)

In [16]:
iou_prom = acc_iou / (good_matches + poor_matches)
prec = good_matches / (poor_matches + good_matches)
recall = good_matches / (good_matches + false_neg)
f1_score = (2 * prec * recall) / (prec + recall)

In [17]:
print("IOU: {:.4f} - Precision: {:.4f} - Recall: {:.4f} - F1: {:.4f}".format(iou_prom, prec, recall, f1_score))

IOU: 0.8351 - Precision: 0.9912 - Recall: 0.9968 - F1: 0.9940


In [18]:
print("Total Evaluation items: {} - Total Correct Items: {} - Total Incorrect Items: {} - Total False Negatives: {}".format(total_ds_items, good_matches, poor_matches, false_neg))

Total Evaluation items: 27386 - Total Correct Items: 27058 - Total Incorrect Items: 241 - Total False Negatives: 87
