Add YOLO Loose format (#856)

* Change YoloBase to YoloStrictFormat * Add YoloLooseFormat * Revisit notebooks/06_tiling.ipynb * Update YOLO documenetation Signed-off-by: Kim, Vinnam <vinnam.kim@intel.com>
openvinotoolkit · Mar 17, 2023 · 36c2d4b · 36c2d4b
1 parent 3f6765e
commit 36c2d4b
Show file tree

Hide file tree

Showing 30 changed files with 599 additions and 183 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/842>)
 - Add Ava action data format support
   (<https://github.com/openvinotoolkit/datumaro/pull/847>)
+- Add YOLO Loose format
+  (<https://github.com/openvinotoolkit/datumaro/pull/856>)
 
 ### Changed
 - Refactor Datumaro format code and test code

diff --git a/datumaro/components/dataset_base.py b/datumaro/components/dataset_base.py
@@ -16,8 +16,7 @@
 from datumaro.components.importer import ImportContext, NullImportContext
 from datumaro.components.media import Image, MediaElement, PointCloud
 from datumaro.util.attrs_util import default_if_none, not_empty
-
-DEFAULT_SUBSET_NAME = "default"
+from datumaro.util.definitions import DEFAULT_SUBSET_NAME
 
 T = TypeVar("T", bound=MediaElement)
 

diff --git a/datumaro/components/format_detection.py b/datumaro/components/format_detection.py
@@ -298,7 +298,7 @@ def _require_files_iter(
     @contextlib.contextmanager
     def probe_text_file(
         self, path: str, requirement_desc: str, is_binary_file: bool = False
-    ) -> Iterator[Union[BufferedReader, TextIO]]:
+    ) -> Union[BufferedReader, TextIO]:
         """
         Returns a context manager that can be used to place a requirement on
         the contents of the file referred to by `path`. To do so, you must

diff --git a/datumaro/components/importer.py b/datumaro/components/importer.py
@@ -135,6 +135,7 @@ def _find_sources_recursive(
         dirname: str = "",
         file_filter: Optional[Callable[[str], bool]] = None,
         max_depth: int = 3,
+        recursive: bool = False,
     ):
         """
         Finds sources in the specified location, using the matching pattern
@@ -151,6 +152,8 @@ def _find_sources_recursive(
             dirname: a glob pattern for filename prefixes
             file_filter: a callable (abspath: str) -> bool, to filter paths found
             max_depth: the maximum depth for recursive search.
+            recursive: If recursive is true, the pattern '**' will match any files and
+                zero or more directories and subdirectories.
 
         Returns: a list of source configurations
             (i.e. Extractor type names and c-tor parameters)
@@ -174,7 +177,9 @@ def _find_sources_recursive(
             for d in range(max_depth + 1):
                 sources.extend(
                     {"url": p, "format": extractor_name}
-                    for p in iglob(osp.join(path, *("*" * d), dirname, filename + ext))
+                    for p in iglob(
+                        osp.join(path, *("*" * d), dirname, filename + ext), recursive=recursive
+                    )
                     if (callable(file_filter) and file_filter(p)) or (not callable(file_filter))
                 )
                 if sources:

diff --git a/datumaro/components/media.py b/datumaro/components/media.py
@@ -15,10 +15,9 @@
 import numpy as np
 
 from datumaro.components.errors import MediaShapeError
+from datumaro.util.definitions import BboxIntCoords
 from datumaro.util.image import _image_loading_errors, decode_image, lazy_image, save_image
 
-BboxIntCoords = Tuple[int, int, int, int]  # (x, y, w, h)
-
 
 class MediaType(IntEnum):
     NONE = 0

diff --git a/datumaro/plugins/data_formats/yolo/base.py b/datumaro/plugins/data_formats/yolo/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2022 Intel Corporation
+# Copyright (C) 2019-2023 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -7,7 +7,7 @@
 import os.path as osp
 import re
 from collections import OrderedDict
-from typing import Dict, List, Optional, Tuple, Type, TypeVar, Union
+from typing import Dict, List, Optional, Type, TypeVar, Union
 
 from datumaro.components.annotation import Annotation, AnnotationType, Bbox, LabelCategories
 from datumaro.components.dataset_base import DatasetBase, DatasetItem, SubsetBase
@@ -16,21 +16,24 @@
     InvalidAnnotationError,
     UndeclaredLabelError,
 )
-from datumaro.components.format_detection import FormatDetectionContext
-from datumaro.components.importer import Importer
 from datumaro.components.media import Image
-from datumaro.util.image import DEFAULT_IMAGE_META_FILE_NAME, ImageMeta, load_image_meta_file
+from datumaro.util.image import (
+    DEFAULT_IMAGE_META_FILE_NAME,
+    IMAGE_EXTENSIONS,
+    ImageMeta,
+    load_image_meta_file,
+)
 from datumaro.util.meta_file_util import has_meta_file, parse_meta_file
-from datumaro.util.os_util import split_path
+from datumaro.util.os_util import extract_subset_name_from_parent, find_files, split_path
 
-from .format import YoloPath
+from .format import YoloLoosePath, YoloPath
 
 T = TypeVar("T")
 
 
-class YoloBase(SubsetBase):
-    class Subset(DatasetBase):
-        def __init__(self, name: str, parent: YoloBase):
+class YoloStrictBase(SubsetBase):
+    class _Subset(DatasetBase):
+        def __init__(self, name: str, parent: YoloStrictBase):
             super().__init__()
             self._name = name
             self._parent = parent
@@ -54,23 +57,15 @@ def __init__(
         image_info: Union[None, str, ImageMeta] = None,
         **kwargs,
     ) -> None:
+        super().__init__(**kwargs)
+
         if not osp.isfile(config_path):
             raise DatasetImportError(f"Can't read dataset descriptor file '{config_path}'")
 
-        super().__init__(**kwargs)
-
         rootpath = osp.dirname(config_path)
         self._path = rootpath
 
-        assert image_info is None or isinstance(image_info, (str, dict))
-        if image_info is None:
-            image_info = osp.join(rootpath, DEFAULT_IMAGE_META_FILE_NAME)
-            if not osp.isfile(image_info):
-                image_info = {}
-        if isinstance(image_info, str):
-            image_info = load_image_meta_file(image_info)
-
-        self._image_info = image_info
+        self._image_info = self.parse_image_info(rootpath, image_info)
 
         config = self._parse_config(config_path)
 
@@ -95,21 +90,33 @@ def __init__(
             if not osp.isfile(list_path):
                 raise InvalidAnnotationError(f"Can't find '{subset_name}' subset list file")
 
-            subset = YoloBase.Subset(subset_name, self)
+            subset = self._Subset(subset_name, self)
             with open(list_path, "r", encoding="utf-8") as f:
                 subset.items = OrderedDict(
                     (self.name_from_path(p), self.localize_path(p)) for p in f if p.strip()
                 )
             subsets[subset_name] = subset
 
-        self._subsets: Dict[str, YoloBase.Subset] = subsets
+        self._subsets: Dict[str, self._Subset] = subsets
 
         self._categories = {
             AnnotationType.label: self._load_categories(
                 osp.join(self._path, self.localize_path(names_path))
             )
         }
 
+    @staticmethod
+    def parse_image_info(rootpath: str, image_info: Optional[Union[str, ImageMeta]] = None):
+        assert image_info is None or isinstance(image_info, (str, dict))
+        if image_info is None:
+            image_info = osp.join(rootpath, DEFAULT_IMAGE_META_FILE_NAME)
+            if not osp.isfile(image_info):
+                image_info = {}
+        if isinstance(image_info, str):
+            image_info = load_image_meta_file(image_info)
+
+        return image_info
+
     @staticmethod
     def _parse_config(path: str) -> Dict[str, str]:
         with open(path, "r", encoding="utf-8") as f:
@@ -168,13 +175,19 @@ def _get(self, item_id: str, subset_name: str) -> Optional[DatasetItem]:
 
                 anno_path = osp.splitext(image.path)[0] + ".txt"
                 annotations = self._parse_annotations(
-                    anno_path, image, item_id=(item_id, subset_name)
+                    anno_path,
+                    image,
+                    label_categories=self._categories[AnnotationType.label],
                 )
 
                 item = DatasetItem(
                     id=item_id, subset=subset_name, media=image, annotations=annotations
                 )
                 subset.items[item_id] = item
+            except (UndeclaredLabelError, InvalidAnnotationError) as e:
+                self._ctx.error_policy.report_annotation_error(e, item_id=(item_id, subset_name))
+                subset.items.pop(item_id)
+                item = None
             except Exception as e:
                 self._ctx.error_policy.report_item_error(e, item_id=(item_id, subset_name))
                 subset.items.pop(item_id)
@@ -191,8 +204,13 @@ def _parse_field(value: str, cls: Type[T], field_name: str) -> T:
                 f"Can't parse {field_name} from '{value}'. Expected {cls}"
             ) from e
 
+    @classmethod
     def _parse_annotations(
-        self, anno_path: str, image: Image, *, item_id: Tuple[str, str]
+        cls,
+        anno_path: str,
+        image: Image,
+        *,
+        label_categories: LabelCategories,
     ) -> List[Annotation]:
         lines = []
         with open(anno_path, "r", encoding="utf-8") as f:
@@ -207,39 +225,36 @@ def _parse_annotations(
             # Use image info as late as possible to avoid unnecessary image loading
             if image.size is None:
                 raise DatasetImportError(
-                    f"Can't find image info for '{self.localize_path(image.path)}'"
+                    f"Can't find image info for '{cls.localize_path(image.path)}'"
                 )
             image_height, image_width = image.size
 
         for line in lines:
-            try:
-                parts = line.split()
-                if len(parts) != 5:
-                    raise InvalidAnnotationError(
-                        f"Unexpected field count {len(parts)} in the bbox description. "
-                        "Expected 5 fields (label, xc, yc, w, h)."
-                    )
-                label_id, xc, yc, w, h = parts
-
-                label_id = self._parse_field(label_id, int, "bbox label id")
-                if label_id not in self._categories[AnnotationType.label]:
-                    raise UndeclaredLabelError(str(label_id))
-
-                w = self._parse_field(w, float, "bbox width")
-                h = self._parse_field(h, float, "bbox height")
-                x = self._parse_field(xc, float, "bbox center x") - w * 0.5
-                y = self._parse_field(yc, float, "bbox center y") - h * 0.5
-                annotations.append(
-                    Bbox(
-                        x * image_width,
-                        y * image_height,
-                        w * image_width,
-                        h * image_height,
-                        label=label_id,
-                    )
+            parts = line.split()
+            if len(parts) != 5:
+                raise InvalidAnnotationError(
+                    f"Unexpected field count {len(parts)} in the bbox description. "
+                    "Expected 5 fields (label, xc, yc, w, h)."
                 )
-            except Exception as e:
-                self._ctx.error_policy.report_annotation_error(e, item_id=item_id)
+            label_id, xc, yc, w, h = parts
+
+            label_id = cls._parse_field(label_id, int, "bbox label id")
+            if label_id not in label_categories:
+                raise UndeclaredLabelError(str(label_id))
+
+            w = cls._parse_field(w, float, "bbox width")
+            h = cls._parse_field(h, float, "bbox height")
+            x = cls._parse_field(xc, float, "bbox center x") - w * 0.5
+            y = cls._parse_field(yc, float, "bbox center y") - h * 0.5
+            annotations.append(
+                Bbox(
+                    x * image_width,
+                    y * image_height,
+                    w * image_width,
+                    h * image_height,
+                    label=label_id,
+                )
+            )
 
         return annotations
 
@@ -272,11 +287,51 @@ def get_subset(self, name):
         return self._subsets[name]
 
 
-class YoloImporter(Importer):
-    @classmethod
-    def detect(cls, context: FormatDetectionContext) -> None:
-        context.require_file("obj.data")
+class YoloLooseBase(SubsetBase):
+    def __init__(
+        self,
+        config_path: str,
+        image_info: Union[None, str, ImageMeta] = None,
+        urls: Optional[List[str]] = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
 
-    @classmethod
-    def find_sources(cls, path):
-        return cls._find_sources_recursive(path, ".data", "yolo")
+        if not osp.isdir(config_path):
+            raise DatasetImportError(f"{config_path} should be a directory.")
+
+        rootpath = config_path
+        self._path = rootpath
+
+        self._image_info = YoloStrictBase.parse_image_info(rootpath, image_info)
+
+        # Init label categories
+        label_categories = YoloStrictBase._load_categories(
+            osp.join(rootpath, YoloLoosePath.NAMES_FILE)
+        )
+        self._categories = {AnnotationType.label: label_categories}
+
+        # Parse dataset items
+        def _get_fname(fpath: str) -> str:
+            return osp.splitext(osp.basename(fpath))[0]
+
+        img_files = {
+            _get_fname(img_file): img_file
+            for img_file in find_files(rootpath, IMAGE_EXTENSIONS, recursive=True, max_depth=2)
+            if extract_subset_name_from_parent(img_file, rootpath) == self._subset
+        }
+
+        for url in urls:
+            try:
+                fname = _get_fname(url)
+                img = Image(path=img_files[fname])
+                anns = YoloStrictBase._parse_annotations(
+                    url,
+                    img,
+                    label_categories=label_categories,
+                )
+                self._items.append(
+                    DatasetItem(id=fname, subset=self._subset, media=img, annotations=anns)
+                )
+            except Exception as e:
+                self._ctx.error_policy.report_item_error(e, item_id=(fname, self._subset))
diff --git a/datumaro/plugins/data_formats/yolo/format.py b/datumaro/plugins/data_formats/yolo/format.py
@@ -2,8 +2,19 @@
 #
 # SPDX-License-Identifier: MIT
 
+from enum import IntEnum
+
 
 class YoloPath:
     DEFAULT_SUBSET_NAME = "train"
     SUBSET_NAMES = ["train", "valid"]
     RESERVED_CONFIG_KEYS = ["backup", "classes", "names"]
+
+
+class YoloLoosePath:
+    NAMES_FILE = "obj.names"
+
+
+class YoloFormatType(IntEnum):
+    yolo_strict = 0
+    yolo_loose = 1