In [None]:
from logging import DEBUG, INFO, FileHandler, Formatter, Logger, StreamHandler, getLogger
from pathlib import Path

_LOGGIN_DIR = Path().parent / "logs"

if not _LOGGIN_DIR.exists():
    _LOGGIN_DIR.mkdir()


def create_logger(name: str) -> Logger:
    logger = getLogger(name)
    logger.setLevel(DEBUG)
    stream_handler = StreamHandler()
    file_handler = FileHandler(_LOGGIN_DIR / f"{name}.log")
    formatter = Formatter(fmt="%(asctime)s.%(msecs)03d %(levelname)s: %(message)s", datefmt="%Y-%m-%d,%H:%M:%S")
    stream_handler.setFormatter(formatter)
    file_handler.setFormatter(formatter)
    stream_handler.setLevel(INFO)
    file_handler.setLevel(DEBUG)
    logger.addHandler(stream_handler)
    logger.addHandler(file_handler)
    return logger

In [None]:
from hashlib import sha256
from pathlib import Path
from shutil import rmtree
from typing import Optional
from urllib.request import urlretrieve
from zipfile import ZipFile

from tqdm import tqdm

_DATASETS_DIR = Path().parent / "datasets"

if not _DATASETS_DIR.exists():
    _DATASETS_DIR.mkdir()

logger = create_logger(__name__)


class DownloadProgressBar(tqdm):
    def update_to(self, b: int = 1, bsize: int = 1, tsize: int = None):
        if tsize is not None:
            self.total = tsize
        self.update(b * bsize - self.n)


def load_dataset(name: str, url: str, hash: Optional[str] = None) -> None:
    directory_name = _DATASETS_DIR / name
    archive_name = directory_name.with_suffix(".zip")
    if not archive_name.exists():
        logger.info(f"Downloading dataset '{url}' into {archive_name}...")
        with DownloadProgressBar(unit="B", unit_scale=True, miniters=1, desc=name) as t:
            urlretrieve(url, archive_name, t.update_to)
    else:
        logger.debug(f"Dataset '{url}' is found in {archive_name}!")
    if hash is not None:
        logger.debug(f"Verifying dataset {name} archive {archive_name} SHA256 checksum...")
        if sha256(archive_name.read_bytes()).hexdigest() == hash:
            logger.info(f"Dataset {name} SHA256 verification successful!")
        else:
            raise ValueError(f"Error verifying dataset {name} archive {archive_name} SHA256 checksum!")
    if directory_name.is_dir():
        logger.debug(f"Removing previous dataset {name} directory...")
        rmtree(directory_name)
    with ZipFile(archive_name, "r") as zipfile:
        logger.debug(f"Unpacking dataset {name} into {_DATASETS_DIR}...")
        zipfile.extractall(directory_name)
    logger.info(f"Dataset {name} available in {directory_name}!")


def verify_dataset(name: str) -> None:
    directory_name = _DATASETS_DIR / name
    if directory_name.is_dir():
        logger.info(f"Dataset {name} found in {directory_name}!")
    else:
        raise RuntimeError(f"Dataset {name} does not exist!")

In [None]:
load_dataset(
    "allergen30",
    "https://prod-dcd-datasets-cache-zipfiles.s3.eu-west-1.amazonaws.com/9ygs9vhnpw-1.zip",
    "ab6e19d32f7490988ca77d600fc6f3df2e8648365c4c92ced8c1b462c01d9d9f"
)

In [None]:
from ultralytics import YOLO

YOLO_VERSION = "yolo11n.pt"


model = YOLO(YOLO_VERSION)

print("YOLO network parameters:")
for k, v in model.named_parameters():
  print(k)

In [None]:
FREEZE_LAYERS = 10
EPOCHS_NUMBER = 15
BATCH_SIZE = 16
IMAGE_SIZE = 416
LEARNING_RATE = 0.001


model.train(
    data="datasets/allergen30.yaml",
    epochs=EPOCHS_NUMBER,
    batch=BATCH_SIZE,
    freeze=FREEZE_LAYERS,
    imgsz=IMAGE_SIZE,
    lr0=LEARNING_RATE
)

In [None]:
EPOCHS_NUMBER = 7
BATCH_SIZE = 16
IMAGE_SIZE = 416
LEARNING_RATE = 0.0001


model.train(
    data="datasets/allergen30.yaml",
    epochs=EPOCHS_NUMBER,
    batch=BATCH_SIZE,
    imgsz=IMAGE_SIZE,
    lr0=LEARNING_RATE
)

In [None]:
results = model.predict(source="datasets/allergen30/Allergen30/test/images", show_labels=True, conf=0.25)
print("Test set image predictions:")
for r in results:
    print(r.boxes.data)

In [None]:
model.save("find_allergens.pt")