diff --git a/src/labelformat/formats/yolov8.py b/src/labelformat/formats/yolov8.py index 2d5a60b..4a5110b 100644 --- a/src/labelformat/formats/yolov8.py +++ b/src/labelformat/formats/yolov8.py @@ -69,8 +69,19 @@ def _root_dir(self) -> Path: return self._config_file.parent def _images_dir(self) -> Path: + """Get images directory from YOLOv8 config file with fallback logic.""" root_dir = self._root_dir() - return root_dir / str(self._config_data[self._split]) + split_path = str(self._config_data[self._split]) + # Try original path first, then fallback to modified path for Roboflow-style configs + path = root_dir / split_path + if ( + not path.exists() + and "path" not in self._config_data + and split_path.startswith("../") + ): + split_path = split_path.replace("../", "./", 1) + path = root_dir / split_path + return path def _labels_dir(self) -> Path: """Get labels directory from YOLOv8 config file. diff --git a/tests/unit/formats/test_yolov8.py b/tests/unit/formats/test_yolov8.py index 4589de3..251a3f7 100644 --- a/tests/unit/formats/test_yolov8.py +++ b/tests/unit/formats/test_yolov8.py @@ -1,7 +1,7 @@ from __future__ import annotations from pathlib import Path -from typing import Dict, List, Union +from typing import Callable, Dict, List, TypedDict, Union import pytest import yaml @@ -10,6 +10,15 @@ from labelformat.model.category import Category +class YOLOv8Config(TypedDict, total=False): + path: str + train: str + valid: str + test: str + nc: int + names: Union[Dict[int, str], List[str]] + + @pytest.fixture def expected_categories() -> List[Category]: return [ @@ -19,159 +28,249 @@ def expected_categories() -> List[Category]: ] -def test_get_categories_dict_format( - tmp_path: Path, expected_categories: List[Category] -) -> None: - config = { - "path": ".", - "train": "images", - "names": {0: "person", 1: "dog", 2: "cat"}, - } - config_file = tmp_path / "config.yaml" - with config_file.open("w") as f: - yaml.safe_dump(config, f) - - input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") - categories = list(input_obj.get_categories()) - assert categories == expected_categories - - -def test_get_categories_list_format( - tmp_path: Path, expected_categories: List[Category] -) -> None: - config = { - "path": ".", - "train": "images", - "names": ["person", "dog", "cat"], - } - config_file = tmp_path / "config.yaml" - with config_file.open("w") as f: - yaml.safe_dump(config, f) - - input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") - categories = list(input_obj.get_categories()) - assert categories == expected_categories - - -def test_get_categories_yaml_block_format( - tmp_path: Path, expected_categories: List[Category] -) -> None: - config: str = """ - path: . - train: images - names: - - person - - dog - - cat - """ - config_file = tmp_path / "config.yaml" - with config_file.open("w") as f: - f.write(config) - - input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") - categories = list(input_obj.get_categories()) - assert categories == expected_categories - - -def test_root_dir_with_explicit_path(tmp_path: Path) -> None: - dataset_dir = tmp_path / "dataset" - dataset_dir.mkdir() - - config = {"path": ".", "train": "images", "names": ["person"]} - config_file = dataset_dir / "config.yaml" - with config_file.open("w") as f: - yaml.safe_dump(config, f) - - input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") - assert input_obj._root_dir() == dataset_dir - - -def test_root_dir_without_path(tmp_path: Path) -> None: - dataset_dir = tmp_path / "dataset" - dataset_dir.mkdir() - - config = {"train": "images", "names": ["person"]} - config_file = dataset_dir / "config.yaml" - with config_file.open("w") as f: - yaml.safe_dump(config, f) - - input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") - assert input_obj._root_dir() == dataset_dir - - -def test_invalid_names_format(tmp_path: Path) -> None: - config = { - "path": ".", - "train": "images", - "names": 123, # Invalid format - } - config_file = tmp_path / "config.yaml" - with config_file.open("w") as f: - yaml.safe_dump(config, f) - - input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") - with pytest.raises(TypeError): # Will fail when trying to use len() on int - list(input_obj.get_categories()) - - -def test_labels_dir_relative_to_path(tmp_path: Path) -> None: - """Test labels directory resolution for paths relative to dataset root.""" - config = { - "path": "../datasets/coco8", - "train": "images/train", - "names": ["person"], - } - config_file = tmp_path / "config.yaml" - with config_file.open("w") as f: - yaml.safe_dump(config, f) - - input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") - expected = tmp_path / "../datasets/coco8/labels/train" - assert input_obj._labels_dir() == expected - - -def test_labels_dir_absolute_path(tmp_path: Path) -> None: - """Test labels directory resolution for absolute paths.""" - config = { - "path": ".", - "train": "../train/images", - "names": ["head", "helmet", "person"], - } - config_file = tmp_path / "config.yaml" - with config_file.open("w") as f: - yaml.safe_dump(config, f) - - input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") - expected = tmp_path / "../train/labels" - assert input_obj._labels_dir() == expected - - -def test_labels_dir_with_images_in_path(tmp_path: Path) -> None: - """Test labels directory resolution when 'images' appears in the root path.""" - config = { - "path": "mydataset/images/dataset1", - "train": "images/train", - "names": ["person"], - } - config_file = tmp_path / "config.yaml" - with config_file.open("w") as f: - yaml.safe_dump(config, f) - - input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") - expected = tmp_path / "mydataset/images/dataset1/labels/train" - assert input_obj._labels_dir() == expected - - -def test_labels_dir_without_path(tmp_path: Path) -> None: - """Test labels directory resolution when 'path' is not specified in config.""" - config = { - "train": "images/train", - "names": ["person"], - } - config_file = tmp_path / "config.yaml" - with config_file.open("w") as f: - yaml.safe_dump(config, f) - - input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") - expected = tmp_path / "labels/train" - assert input_obj._labels_dir() == expected +@pytest.fixture +def config_file_factory(tmp_path: Path) -> Callable[[Union[YOLOv8Config, str]], Path]: + """Factory fixture to create config files with different formats.""" + + def _create_config(config_data: Union[YOLOv8Config, str]) -> Path: + config_file = tmp_path / "config.yaml" + + if isinstance(config_data, str): + with config_file.open("w") as f: + f.write(config_data) + else: + with config_file.open("w") as f: + yaml.safe_dump(config_data, f) + + return config_file + + return _create_config + + +class Test_YOLOv8BaseInput: + class Test_GetCategories: + def test_extracts_categories_from_dict_format( + self, + config_file_factory: Callable[[Union[YOLOv8Config, str]], Path], + expected_categories: List[Category], + ) -> None: + config: YOLOv8Config = { + "path": ".", + "train": "images", + "names": {0: "person", 1: "dog", 2: "cat"}, + } + config_file = config_file_factory(config) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + categories = list(input_obj.get_categories()) + assert categories == expected_categories + + def test_extracts_categories_from_list_format( + self, + config_file_factory: Callable[[Union[YOLOv8Config, str]], Path], + expected_categories: List[Category], + ) -> None: + config: YOLOv8Config = { + "path": ".", + "train": "images", + "names": ["person", "dog", "cat"], + } + config_file = config_file_factory(config) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + categories = list(input_obj.get_categories()) + assert categories == expected_categories + + def test_extracts_categories_from_yaml_block_format( + self, + config_file_factory: Callable[[Union[YOLOv8Config, str]], Path], + expected_categories: List[Category], + ) -> None: + config = """ + path: . + train: images + names: + - person + - dog + - cat + """ + config_file = config_file_factory(config) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + categories = list(input_obj.get_categories()) + assert categories == expected_categories + + def test_raises_error_for_invalid_names_format( + self, config_file_factory: Callable[[Union[YOLOv8Config, str]], Path] + ) -> None: + config = { + "path": ".", + "train": "images", + "names": 123, # Invalid format will make mypy raise an error + } + config_file = config_file_factory(config) # type: ignore[arg-type] + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + with pytest.raises(TypeError): # Will fail when trying to use len() on int + list(input_obj.get_categories()) + + class Test_RootDir: + def test_resolves_root_dir_with_explicit_path(self, tmp_path: Path) -> None: + dataset_dir = tmp_path / "dataset" + dataset_dir.mkdir() + + config = {"path": ".", "train": "images", "names": ["person"]} + config_file = dataset_dir / "config.yaml" + with config_file.open("w") as f: + yaml.safe_dump(config, f) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + assert input_obj._root_dir() == dataset_dir + + def test_resolves_root_dir_without_path(self, tmp_path: Path) -> None: + dataset_dir = tmp_path / "dataset" + dataset_dir.mkdir() + + config = {"train": "images", "names": ["person"]} + config_file = dataset_dir / "config.yaml" + with config_file.open("w") as f: + yaml.safe_dump(config, f) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + assert input_obj._root_dir() == dataset_dir + + class Test_LabelsDir: + def test_resolves_labels_dir_relative_to_path( + self, config_file_factory: Callable[[Union[YOLOv8Config, str]], Path] + ) -> None: + config: YOLOv8Config = { + "path": "../datasets/coco8", + "train": "images/train", + "names": ["person"], + } + config_file = config_file_factory(config) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + expected = config_file.parent / "../datasets/coco8/labels/train" + assert input_obj._labels_dir() == expected + + def test_resolves_labels_dir_for_absolute_path( + self, config_file_factory: Callable[[Union[YOLOv8Config, str]], Path] + ) -> None: + config: YOLOv8Config = { + "path": ".", + "train": "../train/images", + "names": ["head", "helmet", "person"], + } + config_file = config_file_factory(config) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + expected = config_file.parent / "../train/labels" + assert input_obj._labels_dir() == expected + + def test_resolves_labels_dir_with_images_in_path( + self, config_file_factory: Callable[[Union[YOLOv8Config, str]], Path] + ) -> None: + config: YOLOv8Config = { + "path": "mydataset/images/dataset1", + "train": "images/train", + "names": ["person"], + } + config_file = config_file_factory(config) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + expected = config_file.parent / "mydataset/images/dataset1/labels/train" + assert input_obj._labels_dir() == expected + + def test_resolves_labels_dir_without_path( + self, config_file_factory: Callable[[Union[YOLOv8Config, str]], Path] + ) -> None: + config: YOLOv8Config = { + "train": "images/train", + "names": ["person"], + } + config_file = config_file_factory(config) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + expected = config_file.parent / "labels/train" + assert input_obj._labels_dir() == expected + + class Test_MultilevelPaths: + def test_handles_relative_paths_with_dot_notation(self, tmp_path: Path) -> None: + dataset_root = tmp_path / "dataset" + for split in ["train", "valid", "test"]: + (dataset_root / split / "images").mkdir(parents=True) + (dataset_root / split / "labels").mkdir(parents=True) + + config: YOLOv8Config = { + "train": "./train/images", + "valid": "./valid/images", + "test": "./test/images", + "names": ["person"], + } + config_file = dataset_root / "data.yaml" + with config_file.open("w") as f: + yaml.safe_dump(config, f) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + + assert input_obj._root_dir() == dataset_root + assert input_obj._images_dir() == dataset_root / "train" / "images" + assert input_obj._labels_dir() == dataset_root / "train" / "labels" + + def test_handles_relative_paths(self, tmp_path: Path) -> None: + dataset_root = tmp_path / "dataset" + for split in ["train", "valid", "test"]: + (dataset_root / split / "images").mkdir(parents=True) + (dataset_root / split / "labels").mkdir(parents=True) + + config: YOLOv8Config = { + "train": "../train/images", + "valid": "../valid/images", + "test": "../test/images", + "names": ["person"], + } + config_file = dataset_root / "data.yaml" + with config_file.open("w") as f: + yaml.safe_dump(config, f) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + + assert input_obj._root_dir() == dataset_root + assert input_obj._images_dir() == dataset_root / "train" / "images" + assert input_obj._labels_dir() == dataset_root / "train" / "labels" + + def test_preserves_parent_dir_references_when_directory_exists( + self, tmp_path: Path + ) -> None: + parent_dir = tmp_path / "parent" + parent_dir.mkdir() + + dataset_dir = parent_dir / "dataset" + dataset_dir.mkdir() + + target_images_dir = parent_dir / "images" + target_images_dir.mkdir() + + target_labels_dir = parent_dir / "labels" + target_labels_dir.mkdir() + + config: YOLOv8Config = { + "path": ".", + "train": "../images", # This is intentionally using ../ to go up to parent/images + "names": ["person"], + } + config_file = dataset_dir / "config.yaml" + with config_file.open("w") as f: + yaml.safe_dump(config, f) + + input_obj = _YOLOv8BaseInput(input_file=config_file, input_split="train") + + assert input_obj._root_dir() == dataset_dir + # Use .resolve() to normalize the path for comparison + assert ( + input_obj._images_dir().resolve() == (parent_dir / "images").resolve() + ) + assert ( + input_obj._labels_dir().resolve() == (parent_dir / "labels").resolve() + )