diff --git a/poetry.lock b/poetry.lock index f9a134d..53124ca 100644 --- a/poetry.lock +++ b/poetry.lock @@ -370,8 +370,11 @@ name = "docutils" version = "0.20.1" description = "Docutils -- Python Documentation Utilities" optional = false -python-versions = "*" -files = [] +python-versions = ">=3.7" +files = [ + {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, + {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, +] [[package]] name = "exceptiongroup" @@ -769,6 +772,43 @@ files = [ {file = "nh3-0.2.21.tar.gz", hash = "sha256:4990e7ee6a55490dbf00d61a6f476c9a3258e31e711e13713b2ea7d6616f670e"}, ] +[[package]] +name = "numpy" +version = "1.24.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, + {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, + {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, + {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, + {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, + {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, + {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, + {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, + {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, + {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, + {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, +] + [[package]] name = "packaging" version = "24.2" @@ -1502,4 +1542,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.8" -content-hash = "9c717c3cef42122fb097449fa23aa714e345666637ae525a16d4ab027b21156d" +content-hash = "99fb08d51d572fac0647e5aff6db40a9351d837b2a207151fc03988539af26da" diff --git a/pyproject.toml b/pyproject.toml index 8efbd79..1e13410 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ tqdm = "*" pyyaml = "*" pillow = "*" pydantic-xml = "*" +numpy = "*" [tool.poetry.group.dev.dependencies] mypy = "*" diff --git a/src/labelformat/formats/coco.py b/src/labelformat/formats/coco.py index 170683f..cdbfd1e 100644 --- a/src/labelformat/formats/coco.py +++ b/src/labelformat/formats/coco.py @@ -1,9 +1,15 @@ +from __future__ import annotations + import json from argparse import ArgumentParser from pathlib import Path -from typing import Dict, Iterable, List +from typing import Dict, Iterable, List, TypedDict from labelformat.cli.registry import Task, cli_register +from labelformat.model.binary_mask_segmentation import ( + BinaryMaskSegmentation, + RLEDecoderEncoder, +) from labelformat.model.bounding_box import BoundingBox, BoundingBoxFormat from labelformat.model.category import Category from labelformat.model.image import Image @@ -54,6 +60,14 @@ def get_images(self) -> Iterable[Image]: ) +class _COCOInstanceSegmentationRLE(TypedDict): + counts: list[int] + size: list[int] + + +_COCOInstanceSegmentationMultiPolygon = List[List[float]] + + @cli_register(format="coco", task=Task.OBJECT_DETECTION) class COCOObjectDetectionInput(_COCOBaseInput, ObjectDetectionInput): def get_labels(self) -> Iterable[ImageObjectDetection]: @@ -103,14 +117,15 @@ def get_labels(self) -> Iterable[ImageInstanceSegmentation]: for ann in annotations: if "segmentation" not in ann: raise ParseError(f"Segmentation missing for image id {image_id}") + segmentation: MultiPolygon | BinaryMaskSegmentation if ann["iscrowd"] == 1: - raise ParseError( - "Parsing segmentations with iscrowd=1 is not yet supported. " - f"(image id {image_id})" + segmentation = _coco_segmentation_to_binary_mask_rle( + segmentation=ann["segmentation"], bbox=ann["bbox"] + ) + else: + segmentation = _coco_segmentation_to_multipolygon( + coco_segmentation=ann["segmentation"] ) - segmentation = _coco_segmentation_to_multipolygon( - coco_segmentation=ann["segmentation"] - ) objects.append( SingleInstanceSegmentation( category=category_id_to_category[ann["category_id"]], @@ -173,19 +188,41 @@ def save(self, label_input: InstanceSegmentationInput) -> None: data["annotations"] = [] for label in label_input.get_labels(): for obj in label.objects: - annotation = { - "image_id": label.image.id, - "category_id": obj.category.id, - "bbox": [ + segmentation: ( + _COCOInstanceSegmentationMultiPolygon | _COCOInstanceSegmentationRLE + ) + if isinstance(obj.segmentation, BinaryMaskSegmentation): + segmentation = _binary_mask_rle_to_coco_segmentation( + binary_mask_rle=obj.segmentation + ) + bbox = [ float(v) - for v in obj.segmentation.bounding_box().to_format( + for v in obj.segmentation.bounding_box.to_format( BoundingBoxFormat.XYWH ) - ], - "iscrowd": 0, - "segmentation": _multipolygon_to_coco_segmentation( + ] + iscrowd = 1 + elif isinstance(obj.segmentation, MultiPolygon): + segmentation = _multipolygon_to_coco_segmentation( multipolygon=obj.segmentation - ), + ) + bbox = [ + float(v) + for v in obj.segmentation.bounding_box().to_format( + BoundingBoxFormat.XYWH + ) + ] + iscrowd = 0 + else: + raise ParseError( + f"Unsupported segmentation type: {type(obj.segmentation)}" + ) + annotation = { + "image_id": label.image.id, + "category_id": obj.category.id, + "bbox": bbox, + "iscrowd": iscrowd, + "segmentation": segmentation, } data["annotations"].append(annotation) @@ -195,7 +232,7 @@ def save(self, label_input: InstanceSegmentationInput) -> None: def _coco_segmentation_to_multipolygon( - coco_segmentation: List[List[float]], + coco_segmentation: _COCOInstanceSegmentationMultiPolygon, ) -> MultiPolygon: """Convert COCO segmentation to MultiPolygon.""" polygons = [] @@ -213,7 +250,9 @@ def _coco_segmentation_to_multipolygon( return MultiPolygon(polygons=polygons) -def _multipolygon_to_coco_segmentation(multipolygon: MultiPolygon) -> List[List[float]]: +def _multipolygon_to_coco_segmentation( + multipolygon: MultiPolygon, +) -> _COCOInstanceSegmentationMultiPolygon: """Convert MultiPolygon to COCO segmentation.""" coco_segmentation = [] for polygon in multipolygon.polygons: @@ -221,6 +260,26 @@ def _multipolygon_to_coco_segmentation(multipolygon: MultiPolygon) -> List[List[ return coco_segmentation +def _coco_segmentation_to_binary_mask_rle( + segmentation: _COCOInstanceSegmentationRLE, bbox: list[float] +) -> BinaryMaskSegmentation: + counts = segmentation["counts"] + height, width = segmentation["size"] + binary_mask = RLEDecoderEncoder.decode_column_wise_rle(counts, height, width) + bounding_box = BoundingBox.from_format(bbox=bbox, format=BoundingBoxFormat.XYWH) + return BinaryMaskSegmentation.from_binary_mask( + binary_mask, bounding_box=bounding_box + ) + + +def _binary_mask_rle_to_coco_segmentation( + binary_mask_rle: BinaryMaskSegmentation, +) -> _COCOInstanceSegmentationRLE: + binary_mask = binary_mask_rle.get_binary_mask() + counts = RLEDecoderEncoder.encode_column_wise_rle(binary_mask) + return {"counts": counts, "size": [binary_mask_rle.height, binary_mask_rle.width]} + + def _get_output_images_dict( images: Iterable[Image], ) -> List[JsonDict]: diff --git a/src/labelformat/formats/yolov8.py b/src/labelformat/formats/yolov8.py index 4a5110b..a2a2494 100644 --- a/src/labelformat/formats/yolov8.py +++ b/src/labelformat/formats/yolov8.py @@ -290,6 +290,10 @@ def save(self, label_input: InstanceSegmentationInput) -> None: label_path.parent.mkdir(parents=True, exist_ok=True) with label_path.open("w") as file: for obj in label.objects: + if not isinstance(obj.segmentation, MultiPolygon): + raise ValueError( + f"YOLOv8 format only supports MultiPolygon segmentation." + ) polygon = _multipolygon_to_polygon(multipolygon=obj.segmentation) polygon_str = " ".join( [ diff --git a/src/labelformat/model/binary_mask_segmentation.py b/src/labelformat/model/binary_mask_segmentation.py new file mode 100644 index 0000000..1a2110a --- /dev/null +++ b/src/labelformat/model/binary_mask_segmentation.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np +from numpy.typing import NDArray + +from labelformat.model.bounding_box import BoundingBox + + +@dataclass(frozen=True) +class BinaryMaskSegmentation: + """ + A binary mask. + Internally, the mask is represented as a run-length encoding (RLE) format. + """ + + _rle_row_wise: list[int] + width: int + height: int + bounding_box: BoundingBox + + @classmethod + def from_binary_mask( + cls, binary_mask: NDArray[np.int_], bounding_box: BoundingBox + ) -> "BinaryMaskSegmentation": + """ + Create a BinaryMaskSegmentation instance from a binary mask (2D numpy array) + by converting it to RLE format. + """ + if not isinstance(binary_mask, np.ndarray): + raise ValueError("Binary mask must be a numpy array.") + if binary_mask.ndim != 2: + raise ValueError("Binary mask must be a 2D array.") + height, width = binary_mask.shape + + rle_row_wise = RLEDecoderEncoder.encode_row_wise_rle(binary_mask) + return cls( + _rle_row_wise=rle_row_wise, + width=width, + height=height, + bounding_box=bounding_box, + ) + + def get_binary_mask(self) -> NDArray[np.int_]: + """ + Get the binary mask (2D numpy array) from the RLE format. + """ + return RLEDecoderEncoder.decode_row_wise_rle( + self._rle_row_wise, self.height, self.width + ) + + +class RLEDecoderEncoder: + """ + A class for encoding and decoding binary masks using run-length encoding (RLE). + This class provides methods to encode a binary mask into RLE format and + decode an RLE list back into a binary mask. + + The encoding and decoding can be done both row-wise and column-wise. + + Example: + Consider a binary mask of shape 2x4: + [[0, 1, 1, 0], + [1, 1, 1, 1]] + Row-wise RLE: [1, 2, 1, 4] + Column-wise RLE: [1, 5, 1, 1] + """ + + @staticmethod + def encode_row_wise_rle(binary_mask: NDArray[np.int_]) -> list[int]: + # Encodes a binary mask using row-major order. + flat = np.concatenate(([-1], binary_mask.ravel(order="C"), [-1])) + borders = np.nonzero(np.diff(flat))[0] + rle = np.diff(borders) + if flat[1]: + rle = np.concatenate(([0], rle)) + rle_list: list[int] = rle.tolist() + return rle_list + + @staticmethod + def encode_column_wise_rle(binary_mask: NDArray[np.int_]) -> list[int]: + # Encodes a binary mask using column-major order. + flat = np.concatenate(([-1], binary_mask.ravel(order="F"), [-1])) + borders = np.nonzero(np.diff(flat))[0] + rle = np.diff(borders) + if flat[1]: + rle = np.concatenate(([0], rle)) + rle_list: list[int] = rle.tolist() + return rle_list + + @staticmethod + def decode_row_wise_rle( + rle: list[int], height: int, width: int + ) -> NDArray[np.int_]: + # Decodes a row-major run-length encoded list into a 2D binary mask. + run_val = 0 + decoded = [] + for count in rle: + decoded.extend([run_val] * count) + run_val = 1 - run_val + return np.array(decoded, dtype=np.int_).reshape((height, width), order="C") + + @staticmethod + def decode_column_wise_rle( + rle: list[int], height: int, width: int + ) -> NDArray[np.int_]: + # Decodes a column-major run-length encoded list into a 2D binary mask. + run_val = 0 + decoded = [] + for count in rle: + decoded.extend([run_val] * count) + run_val = 1 - run_val + return np.array(decoded, dtype=np.int_).reshape((height, width), order="F") diff --git a/src/labelformat/model/instance_segmentation.py b/src/labelformat/model/instance_segmentation.py index 6003e3a..39a5946 100644 --- a/src/labelformat/model/instance_segmentation.py +++ b/src/labelformat/model/instance_segmentation.py @@ -1,8 +1,11 @@ +from __future__ import annotations + from abc import ABC, abstractmethod from argparse import ArgumentParser from dataclasses import dataclass -from typing import Iterable, List +from typing import Iterable +from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation from labelformat.model.category import Category from labelformat.model.image import Image from labelformat.model.multipolygon import MultiPolygon @@ -11,13 +14,13 @@ @dataclass(frozen=True) class SingleInstanceSegmentation: category: Category - segmentation: MultiPolygon + segmentation: MultiPolygon | BinaryMaskSegmentation @dataclass(frozen=True) class ImageInstanceSegmentation: image: Image - objects: List[SingleInstanceSegmentation] + objects: list[SingleInstanceSegmentation] class InstanceSegmentationInput(ABC): diff --git a/tests/fixtures/instance_segmentation/COCO/instances_with_binary_mask.json b/tests/fixtures/instance_segmentation/COCO/instances_with_binary_mask.json new file mode 100644 index 0000000..61c8992 --- /dev/null +++ b/tests/fixtures/instance_segmentation/COCO/instances_with_binary_mask.json @@ -0,0 +1,495 @@ +{ + "info": { + "description": "COCO 2017 Dataset", + "url": "http://cocodataset.org", + "version": "1.0", + "year": 2017, + "contributor": "COCO Consortium", + "date_created": "2017/09/01" + }, + "licenses": [ + { + "url": "http://creativecommons.org/licenses/by/2.0/", + "id": 4, + "name": "Attribution License" + }, + { + "url": "http://creativecommons.org/licenses/by-sa/2.0/", + "id": 5, + "name": "Attribution-ShareAlike License" + } + ], + "categories": [ + { + "supercategory": "sports", + "id": 0, + "name": "kite" + }, + { + "supercategory": "person", + "id": 1, + "name": "person" + }, + { + "supercategory": "animal", + "id": 2, + "name": "elephant" + }, + { + "supercategory": "sports", + "id": 3, + "name": "sports ball" + } + ], + "images": [ + { + "license": 5, + "file_name": "000000109005.jpg", + "coco_url": "http://images.cocodataset.org/train2017/000000109005.jpg", + "height": 428, + "width": 640, + "date_captured": "2013-11-18 05:39:43", + "flickr_url": "http://farm8.staticflickr.com/7084/7189768737_e787a5ebb4_z.jpg", + "id": 109005 + }, + { + "license": 4, + "file_name": "000000036086.jpg", + "coco_url": "http://images.cocodataset.org/train2017/000000036086.jpg", + "height": 640, + "width": 482, + "date_captured": "2013-11-22 02:33:34", + "flickr_url": "http://farm7.staticflickr.com/6097/6255998533_699b3d2e9f_z.jpg", + "id": 36086 + }, + { + "license": 4, + "file_name": "000000374545.jpg", + "coco_url": "http://images.cocodataset.org/val2017/000000374545.jpg", + "height": 640, + "width": 549, + "date_captured": "2013-11-22 09:03:11", + "flickr_url": "http://farm1.staticflickr.com/184/451764136_e652115475_z.jpg", + "id": 374545 + } + ], + "annotations": [ + { + "segmentation": [ + [ + 334.01, + 161.33, + 349.06, + 159.96, + 365.47, + 165.89, + 375.96, + 178.66, + 380.98, + 197.81, + 381.43, + 202.37, + 385.54, + 205.56, + 381.43, + 206.01, + 380.98, + 222.43, + 381.43, + 233.37, + 376.87, + 246.14, + 370.95, + 243.86, + 375.05, + 233.83, + 371.86, + 216.5, + 367.75, + 203.28, + 364.11, + 198.72, + 361.83, + 203.28, + 354.07, + 195.53, + 352.25, + 201.0, + 348.6, + 197.81, + 344.04, + 205.56, + 343.59, + 220.61, + 343.13, + 232.01, + 342.67, + 240.67, + 338.12, + 246.14, + 329.91, + 222.43, + 326.26, + 239.76, + 318.96, + 247.97, + 311.21, + 247.97, + 316.23, + 236.57, + 316.68, + 224.25, + 316.68, + 218.33, + 306.65, + 221.52, + 301.64, + 228.36, + 301.18, + 247.05, + 288.87, + 245.69, + 292.97, + 234.29, + 290.69, + 226.53, + 282.03, + 225.62, + 275.19, + 233.37, + 277.47, + 250.7, + 265.61, + 249.79, + 265.61, + 231.09, + 263.79, + 202.82, + 267.89, + 183.22, + 276.56, + 173.64, + 288.41, + 167.26, + 294.34, + 165.43, + 301.18, + 165.43, + 307.11, + 167.71, + 314.86, + 167.71, + 322.16, + 165.43, + 329.0, + 164.06, + 334.92, + 161.33 + ] + ], + "area": 6956.119049999998, + "iscrowd": 0, + "image_id": 109005, + "bbox": [ + 263.79, + 159.96, + 121.75, + 90.74 + ], + "category_id": 2, + "id": 581786 + }, + { + "segmentation": [ + [ + 119.92, + 488.63, + 127.07, + 483.76, + 135.45, + 481.63, + 143.06, + 482.55, + 152.95, + 485.74, + 159.65, + 493.51, + 163.76, + 502.18, + 164.52, + 512.08, + 162.69, + 519.84, + 158.89, + 525.17, + 155.69, + 529.43, + 145.34, + 534.15, + 135.6, + 534.3, + 123.72, + 531.41, + 116.11, + 525.32, + 111.24, + 515.58, + 110.18, + 510.86, + 110.63, + 503.25, + 112.46, + 498.38, + 117.33, + 491.68 + ] + ], + "area": 2239.0532499999986, + "iscrowd": 0, + "image_id": 36086, + "bbox": [ + 110.18, + 481.63, + 54.34, + 52.67 + ], + "category_id": 3, + "id": 303325 + }, + { + "segmentation": [ + [ + 225.8, + 504.81, + 243.06, + 476.04, + 218.61, + 353.8, + 302.02, + 263.19, + 297.71, + 238.74, + 253.12, + 230.11, + 281.89, + 211.42, + 302.02, + 222.92, + 306.34, + 166.83, + 281.89, + 145.26, + 291.96, + 92.04, + 300.58, + 81.98, + 340.85, + 87.73, + 342.29, + 116.49, + 342.29, + 151.01, + 373.93, + 184.09, + 422.83, + 284.76, + 414.2, + 302.02, + 375.37, + 240.18, + 363.87, + 287.64, + 358.11, + 333.66, + 340.85, + 408.45, + 355.24, + 454.47, + 349.48, + 537.89, + 283.33, + 540.76, + 277.57, + 519.19, + 323.6, + 510.56, + 306.34, + 343.73, + 284.76, + 355.24, + 254.56, + 366.74, + 268.94, + 451.6, + 286.2, + 464.54, + 277.57, + 487.55, + 232.99, + 519.19 + ] + ], + "area": 35681.934150000016, + "iscrowd": 0, + "image_id": 36086, + "bbox": [ + 218.61, + 81.98, + 204.22, + 458.78 + ], + "category_id": 1, + "id": 445520 + }, + { + "segmentation": { + "counts": [ + 8214, + 6, + 629, + 17, + 2, + 6, + 614, + 28, + 611, + 29, + 610, + 31, + 609, + 31, + 609, + 32, + 608, + 32, + 608, + 32, + 608, + 31, + 609, + 31, + 610, + 29, + 612, + 27, + 615, + 16, + 3, + 4, + 620, + 11, + 35186, + 6, + 633, + 9, + 630, + 11, + 628, + 14, + 626, + 14, + 626, + 15, + 625, + 15, + 625, + 16, + 624, + 16, + 624, + 16, + 625, + 14, + 627, + 13, + 628, + 11, + 631, + 8, + 634, + 4, + 40318, + 5, + 629, + 14, + 624, + 17, + 622, + 19, + 620, + 20, + 619, + 22, + 617, + 23, + 617, + 23, + 617, + 22, + 618, + 22, + 618, + 21, + 619, + 7, + 1, + 4, + 3, + 4, + 621, + 6, + 3, + 1, + 631, + 3, + 638, + 1, + 133135, + 5, + 633, + 8, + 631, + 10, + 630, + 10, + 630, + 11, + 629, + 11, + 629, + 11, + 629, + 11, + 629, + 11, + 629, + 11, + 629, + 10, + 631, + 9, + 632, + 7, + 634, + 4, + 99294 + ], + "size": [ + 640, + 549 + ] + }, + "area": 962, + "iscrowd": 1, + "image_id": 374545, + "bbox": [ + 12, + 524, + 381, + 33 + ], + "category_id": 1, + "id": 900100374545 + } + ] +} diff --git a/tests/integration/instance_segmentation/test_instance_segmentation.py b/tests/integration/instance_segmentation/test_instance_segmentation.py index 31989ef..ba674a1 100644 --- a/tests/integration/instance_segmentation/test_instance_segmentation.py +++ b/tests/integration/instance_segmentation/test_instance_segmentation.py @@ -13,16 +13,18 @@ def test_coco_to_coco(tmp_path: Path) -> None: - coco_file = INST_SEGMENTATION_FIXTURES_DIR / "COCO/instances.json" + coco_file = INST_SEGMENTATION_FIXTURES_DIR / "COCO/instances_with_binary_mask.json" label_input = COCOInstanceSegmentationInput(input_file=coco_file) - COCOInstanceSegmentationOutput(output_file=tmp_path / "instances.json").save( - label_input=label_input - ) + COCOInstanceSegmentationOutput( + output_file=tmp_path / "instances_with_binary_mask.json" + ).save(label_input=label_input) # Compare jsons. - output_json = json.loads((tmp_path / "instances.json").read_text()) + output_json = json.loads((tmp_path / "instances_with_binary_mask.json").read_text()) expected_json = json.loads( - (INST_SEGMENTATION_FIXTURES_DIR / "COCO/instances.json").read_text() + ( + INST_SEGMENTATION_FIXTURES_DIR / "COCO/instances_with_binary_mask.json" + ).read_text() ) # Some fields are not converted: # - info diff --git a/tests/integration/instance_segmentation/test_inverse.py b/tests/integration/instance_segmentation/test_inverse.py index 096fb08..b6ffd46 100644 --- a/tests/integration/instance_segmentation/test_inverse.py +++ b/tests/integration/instance_segmentation/test_inverse.py @@ -10,13 +10,17 @@ YOLOv8InstanceSegmentationInput, YOLOv8InstanceSegmentationOutput, ) +from labelformat.model.multipolygon import MultiPolygon -from ...simple_instance_segmentation_label_input import SimpleInstanceSegmentationInput +from ...simple_instance_segmentation_label_input import ( + SimpleInstanceSegmentationInput, + SimpleInstanceSegmentationInputWithBinaryMask, +) from .. import integration_utils def test_coco_inverse(tmp_path: Path) -> None: - start_label_input = SimpleInstanceSegmentationInput() + start_label_input = SimpleInstanceSegmentationInputWithBinaryMask() COCOInstanceSegmentationOutput(output_file=tmp_path / "train.json").save( label_input=start_label_input ) @@ -46,6 +50,8 @@ def test_yolov8_inverse(tmp_path: Path, mocker: MockerFixture) -> None: assert len(image_label_0.objects) == len(image_label_1.objects) for object_0, object_1 in zip(image_label_0.objects, image_label_1.objects): assert object_0.category == object_1.category + assert isinstance(object_0.segmentation, MultiPolygon) + assert isinstance(object_1.segmentation, MultiPolygon) integration_utils.assert_multipolygons_almost_equal( object_0.segmentation, object_1.segmentation ) diff --git a/tests/simple_instance_segmentation_label_input.py b/tests/simple_instance_segmentation_label_input.py index 2491a1a..76886e4 100644 --- a/tests/simple_instance_segmentation_label_input.py +++ b/tests/simple_instance_segmentation_label_input.py @@ -1,6 +1,8 @@ from argparse import ArgumentParser from typing import Iterable +from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation +from labelformat.model.bounding_box import BoundingBox from labelformat.model.category import Category from labelformat.model.image import Image from labelformat.model.instance_segmentation import ( @@ -11,7 +13,7 @@ from labelformat.model.multipolygon import MultiPolygon -class SimpleInstanceSegmentationInput(InstanceSegmentationInput): +class _BaseSimpleInstanceSegmentationInput(InstanceSegmentationInput): def get_categories(self) -> Iterable[Category]: return [ Category(id=0, name="cat"), @@ -24,6 +26,13 @@ def get_images(self) -> Iterable[Image]: Image(id=0, filename="image.jpg", width=100, height=200), ] + @staticmethod + def add_cli_arguments(parser: ArgumentParser) -> None: + # Default implementation (no arguments) + pass + + +class SimpleInstanceSegmentationInput(_BaseSimpleInstanceSegmentationInput): def get_labels(self) -> Iterable[ImageInstanceSegmentation]: return [ ImageInstanceSegmentation( @@ -63,6 +72,42 @@ def get_labels(self) -> Iterable[ImageInstanceSegmentation]: ) ] - @staticmethod - def add_cli_arguments(parser: ArgumentParser) -> None: - raise NotImplementedError() + +class SimpleInstanceSegmentationInputWithBinaryMask( + _BaseSimpleInstanceSegmentationInput +): + def get_labels(self) -> Iterable[ImageInstanceSegmentation]: + return [ + ImageInstanceSegmentation( + image=Image(id=0, filename="image.jpg", width=100, height=200), + objects=[ + SingleInstanceSegmentation( + category=Category(id=1, name="dog"), + segmentation=MultiPolygon( + polygons=[ + [ + (10.0, 10.0), + (10.0, 20.0), + (20.0, 20.0), + (20.0, 10.0), + ], + ], + ), + ), + SingleInstanceSegmentation( + category=Category(id=0, name="cat"), + segmentation=BinaryMaskSegmentation( + _rle_row_wise=[1, 2, 3], + width=3, + height=2, + bounding_box=BoundingBox( + 0.0, + 0.0, + 3.0, + 2.0, + ), + ), + ), + ], + ) + ] diff --git a/tests/unit/model/test_binary_mask_segmentation.py b/tests/unit/model/test_binary_mask_segmentation.py new file mode 100644 index 0000000..b435920 --- /dev/null +++ b/tests/unit/model/test_binary_mask_segmentation.py @@ -0,0 +1,70 @@ +import numpy as np + +from labelformat.model.binary_mask_segmentation import ( + BinaryMaskSegmentation, + RLEDecoderEncoder, +) +from labelformat.model.bounding_box import BoundingBox + + +class TestBinaryMaskSegmentation: + + def test_from_binary_mask(self) -> None: + # Create a binary mask + binary_mask = np.array([[0, 1], [1, 0]], dtype=np.int_) + bounding_box = BoundingBox(0, 0, 2, 2) + + binary_mask_segmentation = BinaryMaskSegmentation.from_binary_mask( + binary_mask=binary_mask, bounding_box=bounding_box + ) + assert binary_mask_segmentation.width == 2 + assert binary_mask_segmentation.height == 2 + assert binary_mask_segmentation.bounding_box == bounding_box + assert np.array_equal(binary_mask_segmentation.get_binary_mask(), binary_mask) + + +class TestRLEDecoderEncoder: + + def test_encode_row_wise_rle(self) -> None: + binary_mask = np.array([[0, 1, 1, 0], [1, 1, 1, 1]], dtype=np.int_) + rle = RLEDecoderEncoder.encode_row_wise_rle(binary_mask) + assert rle == [1, 2, 1, 4] + + def test_decode_row_wise_rle(self) -> None: + rle = [1, 2, 1, 4] + height = 2 + width = 4 + binary_mask = RLEDecoderEncoder.decode_row_wise_rle(rle, height, width) + expected_binary_mask = np.array([[0, 1, 1, 0], [1, 1, 1, 1]], dtype=np.uint8) + assert np.array_equal(binary_mask, expected_binary_mask) + + def test_encode_column_wise_rle(self) -> None: + binary_mask = np.array([[0, 1, 1, 0], [1, 1, 1, 1]], dtype=np.int_) + rle = RLEDecoderEncoder.encode_column_wise_rle(binary_mask) + assert rle == [1, 5, 1, 1] + + def test_decode_column_wise_rle(self) -> None: + rle = [1, 5, 1, 1] + height = 2 + width = 4 + binary_mask = RLEDecoderEncoder.decode_column_wise_rle(rle, height, width) + expected_binary_mask = np.array([[0, 1, 1, 0], [1, 1, 1, 1]], dtype=np.uint8) + assert np.array_equal(binary_mask, expected_binary_mask) + + def test_inverse__row_wise(self) -> None: + mask = np.random.randint(0, 2, (42, 9), dtype=np.int_) + + rle = RLEDecoderEncoder.encode_row_wise_rle(mask) + mask_inverse_row_wise = RLEDecoderEncoder.decode_row_wise_rle( + rle, mask.shape[0], mask.shape[1] + ) + assert np.array_equal(mask, mask_inverse_row_wise) + + def test_inverse__column_wise(self) -> None: + mask = np.random.randint(0, 2, (42, 9), dtype=np.int_) + + rle = RLEDecoderEncoder.encode_column_wise_rle(mask) + mask_inverse_column_wise = RLEDecoderEncoder.decode_column_wise_rle( + rle, mask.shape[0], mask.shape[1] + ) + assert np.array_equal(mask, mask_inverse_column_wise)