# Ultralytics Oriented bouding box

Set up runtime to use GPU

In [None]:
%%capture
!pip install ultralytics

## Look for dataset

* [Roboflow Universe](https://universe.roboflow.com/)
* [Google dataset](https://datasetsearch.research.google.com/?hl=it)
* [Kaggle dataset](https://www.kaggle.com/datasets?fileType=csv)
* [Halcon MVTecIndustrial dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad/downloads)

## Utils

In [None]:
import numpy as np
import cv2
import os
import shutil
from tqdm import tqdm
from PIL import Image
import random
import albumentations as A
from ultralytics import YOLO
import json
import yaml
import json
import pandas as pd

In [None]:
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]

    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image

    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        dim = (int(w * r), height)

    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        dim = (width, int(h * r))

    # resize the image
    resized = cv2.resize(image, dim, interpolation=inter)

    # return the resized image
    return resized

def convert_yolo_to_albumentations(normalized_coords_yolo):
    """
        normalized_coords Nx4 list
        yolo          --> normalize[x_center, y_center, width, height]
        albumentation --> normalize[x_min, y_min, x_max, y_max]
    """

    normalized_coords_alb = []
    for normalized_coord in normalized_coords_yolo:
        x_center, y_center, width, height = normalized_coord

        x_min = x_center - (width / 2)
        y_min = y_center - (height / 2)
        x_max = x_center + (width / 2)
        y_max = y_center + (height / 2)

        normalized_coords_alb.append([x_min,y_min, x_max, y_max])

    return normalized_coords_alb

def convert_yolo_box_to_absolute(normalized_coords, image_width, image_height):
    """
    Convert normalized YOLO coordinates to absolute pixel coordinates.

    Args:
    - normalized_coords (tuple): A tuple of (x_center, y_center, width, height) in normalized format.
    - image_width (int): The width of the image.
    - image_height (int): The height of the image.

    Returns:
    - tuple: A tuple of (x_min, y_min, x_max, y_max) in absolute pixel coordinates.
    """
    x_center, y_center, width, height = normalized_coords
    x_center_abs = x_center * image_width
    y_center_abs = y_center * image_height
    width_abs = width * image_width
    height_abs = height * image_height

    x_min = x_center_abs - (width_abs / 2)
    y_min = y_center_abs - (height_abs / 2)
    x_max = x_center_abs + (width_abs / 2)
    y_max = y_center_abs + (height_abs / 2)

    x_min = int(x_min)
    y_min = int(y_min)
    x_max = int(x_max)
    y_max = int(y_max)

    return (x_min, y_min, x_max, y_max)

def convert_yolo_poly_to_absolute(normalized_coords, image_width, image_height):
    """
    Convert normalized YOLO coordinates to absolute pixel coordinates.

    Args:
    - normalized_coords (tuple): A tuple of (x_center, y_center, width, height) in normalized format.
    - image_width (int): The width of the image.
    - image_height (int): The height of the image.

    Returns:
    - tuple: A tuple of (x_min, y_min, x_max, y_max) in absolute pixel coordinates.
    """
    n = len(normalized_coords)//2
    cnt = np.array(normalized_coords).reshape(n,-1)
    cnt[:,0] = cnt[:,0]*image_width
    cnt[:,1] = cnt[:,1]*image_height
    cnt = cnt.astype(int)
    return cnt

def load_yolov8_det_labels(file_path):
    """
    Load yolov8 box coordinate
    """
    labels = []
    boxes = []
    with open(file_path, 'r') as file:
        for line in file:
            # Each line is expected to be 'class_id x_center y_center width height'
            class_id, x_center, y_center, width, height = map(float, line.split())
            boxes.append([x_center, y_center, width, height])
            labels.append(int(class_id))
    return boxes, labels

def load_yolov8_seg_labels(file_path):
    """
    Load yolov8 polylines
    """
    labels = []
    boxes = []
    with open(file_path, 'r') as file:
        for line in file:
            # Each line is expected to be 'class_id x_center y_center width height'
            #class_id, x_center, y_center, width, height = map(float, line.split())
            #boxes.append([x_center, y_center, width, height])
            l = list(map(float, line.split()))
            class_id = l[0]
            poly = l[1:]
            boxes.append(poly)
            labels.append(int(class_id))
    return boxes, labels

def load_yolov8_labels_name(file_path):
    #file_path = f"{folder_dataset}/notes.json"
    f = open(file_path)
    info_data = json.load(f)
    labels_name = [info_data["categories"][i]["name"] for i in range(0,len(info_data["categories"]))]
    return labels_name

def augment_seg(img, keypoints_yolo, class_labels, transform):

    img_height = img.shape[0]
    img_width = img.shape[1]

    # Boxes yolo to absoltute coordinate:
    keypoints_alb = []
    class_labes_alb = []
    k=0
    for keypoint_raw ,label in zip(keypoints_yolo, class_labels):
        cnt = convert_yolo_poly_to_absolute(keypoint_raw, img_width, img_height)
        cnt[:,0] = np.clip(cnt[:,0], 0, img_width-1)
        cnt[:,1] = np.clip(cnt[:,1], 0, img_height-1)
        cnt = [tuple(row) for row in cnt]
        keypoints_alb.extend(cnt)
        for i in range(0, len(cnt)):
            class_labes_alb.append(f"{label}_{k}")
        k = k+1

    tr = transform(image=img, keypoints=keypoints_alb, class_labels=class_labes_alb)
    tr_img = tr['image']
    tr_bboxes_yolo = tr['keypoints']
    tr_class_labels = tr['class_labels']

    tr_img_debug = tr_img.copy()

    # Ricostruzione:
    aug_keypoints = []
    aug_class_labels = []
    unique_elements = list(set(tr_class_labels))
    tmp_label = np.array(tr_class_labels)
    tmp_cnt = np.array(tr_bboxes_yolo)
    for label in unique_elements:
        idx = np.where(tmp_label == label)[0]
        cnt = tmp_cnt[idx].astype(float)

        cnt_norm = cnt
        cnt_norm[:,0] = cnt_norm[:,0]/img_width
        cnt_norm[:,1] = cnt_norm[:,1]/img_height
        cnt_norm = cnt_norm.reshape(-1)

        label = label.split("_")[0]

        cnt = tmp_cnt[idx].astype(int)
        cv2.drawContours(tr_img_debug, [cnt], -1, (255, 0, 0), 1)
        M = cv2.moments(cnt)
        if  M["m00"]!=0:
            cx = int(M["m10"] / M["m00"])
            cy = int(M["m01"] / M["m00"])
            cv2.putText(tr_img_debug, f"{label} ", (cx, cy -10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

        aug_keypoints.append(cnt_norm)
        aug_class_labels.append(label)

    return tr_img, aug_keypoints, aug_class_labels, tr_img_debug

def augment_yolov8_seg(transform, folder_inp_images, folder_inp_labels, folder_out_images_aug, folder_out_labels_aug, labels_name, folder_out_images_aug_check):

    # Save images augmented
    folder_images_aug = folder_out_images_aug
    folder_labels_aug = folder_out_labels_aug
    if os.path.exists(folder_images_aug):
        shutil.rmtree(folder_images_aug)
    os.makedirs(folder_images_aug)
    if os.path.exists(folder_labels_aug):
        shutil.rmtree(folder_labels_aug)
    os.makedirs(folder_labels_aug)
    if os.path.exists(folder_out_images_aug_check):
        shutil.rmtree(folder_out_images_aug_check)
    os.makedirs(folder_out_images_aug_check)

    # Augmentation
    folder_images = folder_inp_images
    folder_labels = folder_inp_labels

    # pascal_voc --> x_min, y_min, x_max, y_max
    # coco --> x_min, ymin, width, height
    # albumentation --> normalize[x_min, y_min, x_max, y_max]
    # yolo --> normalize[x_center, y_center, width, height]
    #f = open(f"{folder_labelstudio}/notes.json")
    #info_data = json.load(f)
    # labels_name = [info_data["categories"][i]["name"] for i in range(0,len(info_data["categories"]))]

    N = 2
    for i in range(0, N):
        filenames = []
        for name in tqdm(os.listdir(folder_images)):
            ext = name.split(".")[-1]
            basename = name.split(".")[-2]
            img_pil = Image.open(os.path.join(folder_images, name)).convert("RGB")
            img = np.array(img_pil)
            label_path = os.path.join(folder_labels, name.replace(ext,"txt"))
            keypoints_yolo, class_ids = load_yolov8_seg_labels(label_path)
            class_labels = [labels_name[class_id] for class_id in class_ids]

            # Augmentation
            if transform is None:
                tr = transform(image=img)
                aug_img = tr['image']
                aug_keypoints = keypoints_yolo
                aug_class_labels = class_labels
                aug_img_debug = aug_img.copy()
                for keypoint_raw ,label in zip(keypoints_yolo, class_labels):
                    cnt = convert_yolo_poly_to_absolute(keypoint_raw, img_pil.width, img_pil.height)
                    cv2.drawContours(aug_img_debug, [cnt], -1, (255, 0, 0), 1)
                    M = cv2.moments(cnt)
                    if M["m00"] !=0:
                        cx = int(M["m10"] / M["m00"])
                        cy = int(M["m01"] / M["m00"])
                        cv2.putText(aug_img_debug, f"{label} ", (cx, cy -10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
            else:
                aug_img, aug_keypoints, aug_class_labels, aug_img_debug = augment_seg(img, keypoints_yolo, class_labels, transform)


            # Alway check augmentation
            if True:
                filepath_image_save = f"{folder_out_images_aug_check}/{i}_{basename}.png"
                tmp_aug_img_pil = Image.fromarray(aug_img_debug)
                tmp_aug_img_pil.save(filepath_image_save)


            filepath_label_save = f"{folder_labels_aug}/{i}_{basename}.txt"
            with open(filepath_label_save, 'w') as file:
                # Save new images and labels
                for box_yolo, class_label in zip(aug_keypoints, aug_class_labels):
                    class_id = labels_name.index(class_label)
                    #x_center, y_center, width, height = box_yolo
                    #file.write(f"{class_id} {x_center} {y_center} {width} {height} \n")
                    file.write(f"{class_id} {' '.join(map(str,box_yolo))} \n")

            filepath_image_save = f"{folder_images_aug}/{i}_{basename}.png"
            tr_img_pil = Image.fromarray(aug_img)
            tr_img_pil.save(filepath_image_save)
            #break

def augment_yolov8_det(transform, folder_inp_images, folder_inp_labels, folder_out_images_aug, folder_out_labels_aug, labels_name, folder_out_images_aug_check):

    # Save images augmented
    folder_images_aug = folder_out_images_aug
    folder_labels_aug = folder_out_labels_aug
    if os.path.exists(folder_images_aug):
        shutil.rmtree(folder_images_aug)
    os.makedirs(folder_images_aug)
    if os.path.exists(folder_labels_aug):
        shutil.rmtree(folder_labels_aug)
    os.makedirs(folder_labels_aug)
    if os.path.exists(folder_out_images_aug_check):
        shutil.rmtree(folder_out_images_aug_check)
    os.makedirs(folder_out_images_aug_check)

    # Augmentation
    folder_images = folder_inp_images
    folder_labels = folder_inp_labels

    # pascal_voc --> x_min, y_min, x_max, y_max
    # coco --> x_min, ymin, width, height
    # albumentation --> normalize[x_min, y_min, x_max, y_max]
    # yolo --> normalize[x_center, y_center, width, height]
    #f = open(f"{folder_labelstudio}/notes.json")
    #info_data = json.load(f)
    # labels_name = [info_data["categories"][i]["name"] for i in range(0,len(info_data["categories"]))]

    print(f"Labels: {labels_name}")

    N = 2
    for i in range(0, N):
        filenames = []
        for name in tqdm(os.listdir(folder_images)):
            ext = name.split(".")[-1]
            basename = name.split(".")[-2]
            img = Image.open(os.path.join(folder_images, name)).convert("RGB")
            img = np.array(img)
            label_path = os.path.join(folder_labels, name.replace(ext,"txt"))
            boxes_yolo, class_ids = load_yolov8_det_labels(label_path)
            class_labels = [labels_name[class_id] for class_id in class_ids]

            #boxes_alb = convert_format_yolo_to_albumentations(boxes_yolo)

            #tr_img = img
            #tr_bboxes_yolo = boxes_yolo
            #tr_class_labels = class_labels

            #if i>0:
            # DOCS: https://albumentations.ai/docs/getting_started/bounding_boxes_augmentation/
            if transform is None:
                tr_img = img
                tr_bboxes_yolo = boxes_yolo
                tr_class_labels = class_labels
            else:
                tr = transform(image=img, bboxes=boxes_yolo, class_labels=class_labels)
                tr_img = tr['image']
                tr_bboxes_yolo = tr['bboxes']
                tr_class_labels = tr['class_labels']


            # Check augmentation
            im_draw = np.array(tr_img.copy())
            for box_yolo, class_label in zip(tr_bboxes_yolo, tr_class_labels):
                # Draw
                x_center, y_center, width, height = box_yolo
                normalized_coords = [x_center, y_center, width, height]
                x_min, y_min, x_max, y_max = convert_yolo_box_to_absolute(normalized_coords, img.shape[1], img.shape[0])
                x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
                #print(x_min, y_min, x_max, y_max)
                cv2.putText(im_draw, class_label , (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
                cv2.rectangle(im_draw, (x_min,y_min), (x_max,y_max), (0,0,255),1)
                filepath_image_save = f"{folder_out_images_aug_check}/{i}_{basename}.png"
                tmp_aug_img_pil = Image.fromarray(im_draw)
                tmp_aug_img_pil.save(filepath_image_save)


            filepath_label_save = f"{folder_labels_aug}/{i}_{basename}.txt"
            with open(filepath_label_save, 'w') as file:
                # Save new images and labels
                for box_yolo, class_label in zip(tr_bboxes_yolo, tr_class_labels):
                    class_id = labels_name.index(class_label)
                    x_center, y_center, width, height = box_yolo
                    file.write(f"{class_id} {x_center} {y_center} {width} {height} \n")


            filepath_image_save = f"{folder_images_aug}/{i}_{basename}.png"
            tr_img_pil = Image.fromarray(tr_img)
            tr_img_pil.save(filepath_image_save)
            #break

def split_dataset(images_folder, labels_folder, output_folder, train_pct, val_pct, test_pct, random_seed=None):
    """
    Split a dataset into training, validation, and testing sets based on specified percentages,
    organizing images and labels into separate subfolders. Removes existing split folders if they exist.

    Args:
    - images_folder (str): Path to the folder containing images.
    - labels_folder (str): Path to the folder containing corresponding labels.
    - output_folder (str): Path to the folder where the split datasets will be saved.
    - train_pct (float): Fraction of data to be used for training (e.g., 0.7 for 70%).
    - val_pct (float): Fraction of data to be used for validation.
    - test_pct (float): Fraction of data to be used for testing.
    - random_seed (int, optional): Seed for the random number generator for reproducibility.
    """

    assert train_pct + val_pct + test_pct == 1.0, "Percentages must sum up to 1.0"

    if random_seed is not None:
        random.seed(random_seed)

    # Function to remove a directory if it exists
    def remove_dir_if_exists(directory):
        if os.path.exists(directory):
            shutil.rmtree(directory)

    # Remove existing directories if they exist
    for subfolder in ['train', 'valid', 'test']:
        remove_dir_if_exists(os.path.join(output_folder, subfolder))

    # Create new output directories and subdirectories
    for subfolder in ['train', 'valid', 'test']:
        image_subfolder = os.path.join(output_folder, subfolder, 'images')
        label_subfolder = os.path.join(output_folder, subfolder, 'labels')
        os.makedirs(image_subfolder, exist_ok=True)
        os.makedirs(label_subfolder, exist_ok=True)

    # Get a list of filenames (without file extensions)
    filenames = [os.path.splitext(file)[0] for file in os.listdir(images_folder) if file.endswith(('.png', '.jpg', '.jpeg'))]

    # Shuffle the filenames
    random.shuffle(filenames)

    # Calculate split indices
    total_files = len(filenames)
    split_train = int(train_pct * total_files)
    split_val = split_train + int(val_pct * total_files)

    # Split filenames
    train_filenames = filenames[:split_train]
    val_filenames = filenames[split_train:split_val]
    test_filenames = filenames[split_val:]

    # Function to copy files
    def copy_files(filenames, src_img_folder, src_lbl_folder, dst_img_folder, dst_lbl_folder):
        for filename in filenames:
            shutil.copy2(os.path.join(src_img_folder, filename + '.png'), dst_img_folder)
            shutil.copy2(os.path.join(src_lbl_folder, filename + '.txt'), dst_lbl_folder)

    # Copy files to respective directories
    copy_files(train_filenames, images_folder, labels_folder, os.path.join(output_folder, 'train', 'images'), os.path.join(output_folder, 'train', 'labels'))
    copy_files(val_filenames, images_folder, labels_folder, os.path.join(output_folder, 'valid', 'images'), os.path.join(output_folder, 'valid', 'labels'))
    copy_files(test_filenames, images_folder, labels_folder, os.path.join(output_folder, 'test', 'images'), os.path.join(output_folder, 'test', 'labels'))

    print("Dataset split complete.")

def train(folder_export_labelstudio, model_name, pretrained_model="yolov8n.pt", transform=None, segment=False):

    folder_tmp_training = f"{folder_export_labelstudio}/tmp_yolov8_training"
    folder_out_models = f"{folder_export_labelstudio}/models"
    if not os.path.exists(folder_out_models):
        os.makedirs(folder_out_models)

    with open(f"{folder_export_labelstudio}/data.yaml", 'r') as file:
        data = yaml.safe_load(file)
        labels_name = data['names']

    #f = open(f"{folder_export_labelstudio}/notes.json")
    #info_data = json.load(f)
    #labels_name = [info_data["categories"][i]["name"] for i in range(0,len(info_data["categories"]))]

    if os.path.exists(folder_tmp_training):
        shutil.rmtree(folder_tmp_training)
    os.makedirs(folder_tmp_training)

    folder_inp_images = f"{folder_export_labelstudio}/images"
    folder_inp_labels = f"{folder_export_labelstudio}/labels"
    folder_out_images_aug = f"{folder_tmp_training}/images"
    folder_out_labels_aug = f"{folder_tmp_training}/labels"
    folder_out_images_aug_check = f"{folder_tmp_training}/aug"

    # DETECTION LIN
    if segment:
        # Augment data
        augment_yolov8_seg(transform, folder_inp_images, folder_inp_labels, folder_out_images_aug, folder_out_labels_aug, labels_name, folder_out_images_aug_check)
    else:
        augment_yolov8_det(transform, folder_inp_images, folder_inp_labels, folder_out_images_aug, folder_out_labels_aug, labels_name, folder_out_images_aug_check)

    # Split data into train test val
    split_dataset(folder_out_images_aug, folder_out_labels_aug, folder_tmp_training, 0.7, 0.15, 0.15, random_seed=42)

    # Define the data to be written to the YAML file
    data = {
        "train": f"{folder_tmp_training}/train/images",
        "val":   f"{folder_tmp_training}/valid/images",
        "test":  f"{folder_tmp_training}/test/images",
        "nc": len(labels_name),
        "names": labels_name,
    }
    # Create and write to the YAML file
    yaml_file_path = f'{folder_tmp_training}/data.yaml'
    with open(yaml_file_path, 'w') as file:
        yaml.dump(data, file, sort_keys=False)

    project=f"{folder_tmp_training}/models"
    if segment:
        # Traing yolov8
        model = YOLO(pretrained_model, task="segment")
    else:
        model = YOLO(pretrained_model)

    results = model.train(data=yaml_file_path, epochs=100, imgsz=640, device=0, batch=4, project=project, name=model_name)

    # Start tensorboard open a terminal and run
    #tensorboard --logdir /home/manuel/builds/idea/idea_anomaly/app/data/datasets/models/runs/mat

    # Copy best model to yolov8 folder
    last_model_path = f"{project}/{model_name}/weights/last.pt"
    best_model_path = f"{project}/{model_name}/weights/best.pt"
    dst_last_model_path = f"{folder_export_labelstudio}/models/{model_name}_last.pt"
    dst_best_model_path = f"{folder_export_labelstudio}/models/{model_name}_best.pt"
    shutil.copy(best_model_path, dst_best_model_path)
    shutil.copy(last_model_path, dst_last_model_path)
    print(f"Model path best: {dst_best_model_path}")
    print(f"Model path last: {dst_last_model_path}")


## Downlaod Dataset

> [Deep learning course datasets](https://drive.google.com/drive/folders/1tEypEI3lZjff0Z8GOyeANJ3qBg46Vhzi?usp=sharing)

In [None]:
!unzip halcon_mvtect_wood_segmentation_yolov8.zip  -d halcon_mvtect_wood_segmentation_yolov8
!mv halcon_mvtect_wood_segmentation_yolov8/train/images halcon_mvtect_wood_segmentation_yolov8/images
!mv halcon_mvtect_wood_segmentation_yolov8/train/labels halcon_mvtect_wood_segmentation_yolov8/labels
!rm -rf  halcon_mvtect_wood_segmentation_yolov8/README.dataset.txt
!rm -rf  halcon_mvtect_wood_segmentation_yolov8/README.roboflow.txt

## Train Detection Model

In [None]:
def train_detection():
    folder_dataset = f"halcon_mvtect_wood_segmentation_yolov8_det"
    folder_dataset = os.path.join(os.getcwd(),folder_dataset)
    model_name = "yolov8n_det"
    pretrained_model = "yolov8n.pt"
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        #A.ShiftScaleRotate( shift_limit=0.005, scale_limit=0, rotate_limit=90, p=0.8, border_mode=cv2.BORDER_CONSTANT),
        A.ColorJitter(p=0.5,contrast=0.3, saturation=0.3, hue=0.5, brightness=0.2),
        #A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), always_apply=False, p=0.3),
        #A.GaussianBlur(p=0.5, blur_limit=(3, 9))
    ], bbox_params=A.BboxParams(format='yolo', label_fields=["class_labels"]))
    train(folder_dataset, model_name, pretrained_model, transform, segment=False)

In [None]:
def train_segmentation():
    folder_dataset = "halcon_mvtect_wood_segmentation_yolov8_seg"
    folder_dataset = os.path.join(os.getcwd(),folder_dataset)
    model_name = "yolov8n_seg"
    pretrained_model = "yolov8n-seg.pt"
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Rotate(limit=(-90,90), p=0.5, border_mode=cv2.BORDER_CONSTANT),
        A.ColorJitter(p=0.5,contrast=0.3, saturation=0.3, hue=0.5, brightness=(0.9,1.2)),
    ], keypoint_params=A.KeypointParams(format='xy', remove_invisible=True, label_fields=['class_labels']))
    train(folder_dataset, model_name, pretrained_model, transform, segment=True)


In [None]:
train_segmentation()

## Metrics and Results

In [None]:
from ultralytics import YOLO
from PIL import Image
import cv2
import numpy as np
import imutils
import torch
import pandas as pd
from PIL import Image


In [None]:
def count_rows(score_dict):
    if isinstance(score_dict["label"], list):
        # If the label is a list, return its length
        return len(score_dict["label"])
    else:
        # If the label is a single element, count it as one row
        return 1

class Yolov8Inference:
    def __init__(self, model_path, gpu=False):
        self.model = YOLO(model_path)

        self.device = torch.device("cpu")
        if gpu:
            if torch.cuda.is_available():
                self.device = torch.device("cuda")
                print(f"Model will run on GPU: {torch.cuda.get_device_name(0)}")
            else:
                print("Model will run on CPU, because GPU is not available")
        else:
            print("Model will run on CPU")

        self.labels = self.model.names

    def filter_score_dict(self, df, th_dict):
        """
        Questa funzione prende come input
        th_dict = {'bolla': 0.1, 'bordo_deformazione': 0.2,'deformazione': 0.5, 'solco': 0.1,'taglio': 0.1}
        dove th_dict rappresenta i thresholds associati ad ogni classe
        e
        score_dict= {'bolla': 0, 'bordo_deformazione': 0, 'deformazione': 0, 'solco': 0, 'taglio': 0.8709222078323364}
        dove score_dict rappresenta il massimo score trovato per ogni classe
        """

        score_dict_filt = {key: [] for key in df.keys()}
        for index, row in df.iterrows():
            label = row["label"]
            score = row["score"]
            th = th_dict[label]
            if (score > th):
                for l in df.keys():
                    score_dict_filt[l].append(row[l])

        df_filt = pd.DataFrame(score_dict_filt)
        return df_filt

    def get_score_max_for_each_class(self, df):
        # Find the entry with the max score for each label
        max_df = df.loc[df.groupby('label')['score'].idxmax()]
        return max_df

    def predict(self, im, th_dict=None, is_seg=False, debug=False):

        # im has to be numpy array
        # start = time.time()
        res = self.model.predict(im, imgsz=640, device=self.device, conf=0.01, iou=0.01, verbose=False)
        # end = time.time()
        # print(f"Model inference: {end-start:2f}s")

        names = res[0].names
        scores = res[0].boxes.conf.cpu().numpy()
        class_ids = res[0].boxes.cls.cpu().numpy()
        boxes = res[0].boxes.xyxy.cpu().numpy()
        if is_seg and len(scores) > 0:
            # Segmetnation model
            masks = res[0].masks.data.cpu().numpy()

        # Create score dict
        if is_seg:
            score_dict = {
                "label": [],
                "score": [],
                "xyxy": [],
                "cnt": []
            }
        else:
            score_dict = {
                "label": [],
                "score": [],
                "xyxy": [],
            }
        for box, score, class_id in zip(boxes, scores, class_ids):
            x1, y1, x2, y2 = box.astype(int)
            score_dict["label"].append(names[class_id])
            score_dict["score"].append(score)
            score_dict["xyxy"].append([x1, y1, x2, y2])

        if is_seg and len(scores) > 0:
            for mask, class_id, score in zip(masks, class_ids, scores):
                label = names[class_id]
                mask = (mask * 255).astype(np.uint8)
                h = im.shape[0]
                w = im.shape[1]
                mask = cv2.resize(mask, (w, h))
                _, binary_mask = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY)
                contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                if len(contours) > 0:
                    c_max = max(contours, key=cv2.contourArea)
                    score_dict["cnt"].append(c_max)
                else:
                    score_dict["cnt"].append([])
        df = pd.DataFrame(score_dict)

        # Get score_max for each class (THIS is NOT USEFUL)
        # df = self.get_score_max_for_each_class(df)

        # Filter results according to the thresholds
        if th_dict is not None:
            df = self.filter_score_dict(df, th_dict)

        score_max = 0
        label_max = ""
        if len(df) > 0:
            # print(f"{filename}: {score_max}")
            score_vec = df["score"].values
            score_max_idx = np.argmax(score_vec)
            score_max = score_vec[score_max_idx]
            label_max = df["label"][score_max_idx]

        score_dict = df.to_dict(orient='list')
        return score_dict, score_max, label_max

In [None]:
inf = Yolov8Inference("/content/halcon_mvtect_wood_segmentation_yolov8/models/yolov8n_seg_mat_v13_best.pt", True)

Model will run on GPU: Tesla T4


In [None]:
th_dict = {
    "liquid": 0.1,
    "scratch": 0.1,
    "hole": 0.1
}
is_seg = True

In [None]:
is_seg = True
folder_path = "halcon_mvtect_wood_segmentation_yolov8/images"
images_list = []
filename_list = []
for name in sorted(os.listdir(folder_path)):
    filename = os.path.join(folder_path, name)
    im = Image.open(filename)
    im = np.array(im)

    score_dict, score_max, label_max = inf.predict(im, th_dict, is_seg)

    im_draw = im.copy()

    num_rows_dict = count_rows(score_dict)
    for i in range(0, num_rows_dict):
        label = score_dict["label"][i]
        score = score_dict["score"][i]

        if not is_seg:
            x1, y1, x2, y2 = score_dict["xyxy"][i]
            cv2.rectangle(im_draw, (x1, y1), (x2, y2), (0, 0, 255), 1)
            cv2.putText(im_draw, f"{label}: {score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (255, 255, 255), 1)
        else:
            cnt = score_dict["cnt"][i]
            cv2.drawContours(im_draw, [cnt], -1, (0, 0, 255), 1)  # Adjust thickness and color as needed
            rect = cv2.minAreaRect(cnt)
            box = cv2.boxPoints(rect)
            box = np.intp(box)  # convert to integer
            M = cv2.moments(cnt)
            cx = int(M["m10"] / M["m00"])
            cy = int(M["m01"] / M["m00"])
            cv2.drawContours(im_draw, [box], -1, (0, 255, 0), 1)  # Adjust thickness and color as needed
            cv2.putText(im_draw, f"{label}: {score:.2f}", (cx, cy - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (255, 255, 255), 1)

    #cv2.putText(im_draw, f"{res_string}", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
    #print(filename)
    #display(Image.fromarray(im_draw).resize((256,256)))
    images_list.append(im_draw)
    filename_list.append(filename)
    #break

In [None]:
# Save on colab