Install requirements

In [None]:
!python3 -m pip install -q --upgrade pip
!python3 -m pip install -q lancedb
!python3 -m pip install opencv-python

In [1]:
import lancedb

uri = r'../data/test.lancedb'
db = lancedb.connect(uri)

In [2]:
from dataclasses import dataclass
import cv2

@dataclass
class BoundingBox:
    x: int
    y: int
    width: int
    height: int

@dataclass
class Annotation:
    className: str
    bbox: BoundingBox

@dataclass
class AnnotatedImage:
    image: cv2.Mat
    annotations: list[Annotation]

In [None]:
from typing import Generator
from xml.etree import ElementTree
from pathlib import Path

import numpy as np

def get_annotated_images_generator(dataset_path: Path) -> Generator[AnnotatedImage, None, None]:
    for file in dataset_path.iterdir():
        if not file.name.endswith('.xml'):
            continue

        parsed_annotation = ElementTree.parse(file)
        image_internal_path = parsed_annotation.find('path')

        image_path = dataset_path.joinpath(image_internal_path.text)
        image = cv2.imread(str(image_path))

        objects = parsed_annotation.findall('object')

        annotations: list[Annotation] = []

        for o in objects:
            object_name = o.find('name').text
            object_bbox = o.find('bndbox')

            bbox_xmin = round(float(object_bbox.find('xmin').text))
            bbox_ymin = round(float(object_bbox.find('ymin').text))
            bbox_xmax = round(float(object_bbox.find('xmax').text))
            bbox_ymax = round(float(object_bbox.find('ymax').text))

            bounding_box = BoundingBox(bbox_xmin, bbox_ymin, bbox_xmax - bbox_xmin, bbox_ymax - bbox_ymin)
            annotation = Annotation(object_name, bounding_box)

            annotations.append(annotation)

        yield AnnotatedImage(image, annotations)

def encode_image(path_to_image: Path):
    image = cv2.imread(path_to_image)
    encoding = path_to_image.suffix

    return cv2.imencode(encoding, image)[1].tobytes()


def decode_image(encoded_image):
    nparr = np.frombuffer(encoded_image, np.byte)
    return cv2.imdecode(nparr, cv2.IMREAD_ANYCOLOR)

In [None]:
dataset_path = Path(r'../data/sample-dataset/')

test_dataset_path = dataset_path.joinpath('test')

for item in get_annotated_images_generator(test_dataset_path):
    print(item)