## DETR Test Lab Nootebook

This notebook allows to test the base model and finetuned model of DETR

First load the model to CUDA and, with a given image, the model tries to generate the bounding boxes for only the match text.

The main purpose of this is to crop the base image with the generated boxes and pass the cropped images to the TROCR to recognize the text

In [None]:
from transformers import DetrForObjectDetection, DetrImageProcessor
import torch
import cv2
import supervision as sv

In [2]:
MODEL_CHECKPOINT = "../finetuned/detr/Nous/V_3"
DEVICE = "cuda"

In [3]:
img_proc = DetrImageProcessor.from_pretrained(MODEL_CHECKPOINT)
detr_model = DetrForObjectDetection.from_pretrained(
    pretrained_model_name_or_path=MODEL_CHECKPOINT,
    num_queries=100,
    ignore_mismatched_sizes=True
).to(DEVICE)


In [42]:
CONFIDENCE_TRESHOLD = 0.8 # This parameter allows to filter the generated boxes with a confidence score >= to this value
IOU_TRESHOLD = 0.1
TEST_IMAGE = "../hand-cursive-detr/test_samples/test_image_detr_1.jpeg" # Path to the test image

In [43]:
# Get the pixel values of the image (matrix)
image = cv2.imread(TEST_IMAGE)

In [None]:
image.shape[:2]

In [45]:
# inference
with torch.inference_mode():
    # load image and predict
    inputs = img_proc(images=image, return_tensors='pt').to(DEVICE)
    outputs = detr_model(**inputs)
    # post-process
    # Resize the generated Bounding Boxes coords to the image original size
    target_sizes = torch.tensor([image.shape[:2]]).to(DEVICE)
    results = img_proc.post_process_object_detection(
        outputs=outputs, 
        threshold=CONFIDENCE_TRESHOLD, 
        target_sizes=target_sizes
    )[0]

In [None]:
len(results["boxes"])

In [None]:
# With supervision lib, use the generated coords to annotate the image and preview the boxes
box_annotator = sv.BoxAnnotator()
detections = sv.Detections.from_transformers(transformers_results=results).with_nms(threshold=IOU_TRESHOLD)
labels = [f"{confidence:.2f} - {class_id}" for _,_, confidence, class_id,_,_, in detections]
frame = box_annotator.annotate(scene=image.copy(), detections=detections)

#print('detections')
%matplotlib inline  
sv.plot_image(frame, (16, 16))

In [None]:
detections