diff --git a/docs/detection/utils/converters.md b/docs/detection/utils/converters.md index 48bec65fe..b6b1e2af6 100644 --- a/docs/detection/utils/converters.md +++ b/docs/detection/utils/converters.md @@ -58,3 +58,9 @@ status: new :::supervision.detection.utils.converters.polygon_to_xyxy + +
+

xyxy_to_mask

+
+ +:::supervision.detection.utils.converters.xyxy_to_mask diff --git a/pyproject.toml b/pyproject.toml index 9bf3b24aa..a7910db36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "supervision" description = "A set of easy-to-use utils that will come in handy in any Computer Vision project" license = { text = "MIT" } -version = "0.27.0rc1" +version = "0.27.0rc2" readme = "README.md" requires-python = ">=3.9" authors = [ diff --git a/supervision/__init__.py b/supervision/__init__.py index 04d3fb254..a70dd20fe 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -65,6 +65,7 @@ polygon_to_xyxy, xcycwh_to_xyxy, xywh_to_xyxy, + xyxy_to_mask, xyxy_to_polygons, xyxy_to_xcycarh, xyxy_to_xywh, @@ -249,6 +250,7 @@ "tint_image", "xcycwh_to_xyxy", "xywh_to_xyxy", + "xyxy_to_mask", "xyxy_to_polygons", "xyxy_to_xcycarh", "xyxy_to_xywh", diff --git a/supervision/detection/utils/converters.py b/supervision/detection/utils/converters.py index 9e02783a0..4aef2dc87 100644 --- a/supervision/detection/utils/converters.py +++ b/supervision/detection/utils/converters.py @@ -229,6 +229,70 @@ def mask_to_xyxy(masks: np.ndarray) -> np.ndarray: return xyxy +def xyxy_to_mask(boxes: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarray: + """ + Converts a 2D `np.ndarray` of bounding boxes into a 3D `np.ndarray` of bool masks. + + Parameters: + boxes (np.ndarray): A 2D `np.ndarray` of shape `(N, 4)` + containing bounding boxes `(x_min, y_min, x_max, y_max)` + resolution_wh (Tuple[int, int]): A tuple `(width, height)` specifying + the resolution of the output masks + + Returns: + np.ndarray: A 3D `np.ndarray` of shape `(N, height, width)` + containing 2D bool masks for each bounding box + + Examples: + ```python + import numpy as np + import supervision as sv + + boxes = np.array([[0, 0, 2, 2]]) + + sv.xyxy_to_mask(boxes, (5, 5)) + # array([ + # [[ True, True, True, False, False], + # [ True, True, True, False, False], + # [ True, True, True, False, False], + # [False, False, False, False, False], + # [False, False, False, False, False]] + # ]) + + boxes = np.array([[0, 0, 1, 1], [3, 3, 4, 4]]) + + sv.xyxy_to_mask(boxes, (5, 5)) + # array([ + # [[ True, True, False, False, False], + # [ True, True, False, False, False], + # [False, False, False, False, False], + # [False, False, False, False, False], + # [False, False, False, False, False]], + # + # [[False, False, False, False, False], + # [False, False, False, False, False], + # [False, False, False, False, False], + # [False, False, False, True, True], + # [False, False, False, True, True]] + # ]) + ``` + """ + width, height = resolution_wh + n = boxes.shape[0] + masks = np.zeros((n, height, width), dtype=bool) + + for i, (x_min, y_min, x_max, y_max) in enumerate(boxes): + x_min = max(0, int(x_min)) + y_min = max(0, int(y_min)) + x_max = min(width - 1, int(x_max)) + y_max = min(height - 1, int(y_max)) + + if x_max >= x_min and y_max >= y_min: + masks[i, y_min : y_max + 1, x_min : x_max + 1] = True + + return masks + + def mask_to_polygons(mask: np.ndarray) -> list[np.ndarray]: """ Converts a binary mask to a list of polygons. diff --git a/test/detection/utils/test_converters.py b/test/detection/utils/test_converters.py index e13b15004..52a3b5200 100644 --- a/test/detection/utils/test_converters.py +++ b/test/detection/utils/test_converters.py @@ -6,6 +6,7 @@ from supervision.detection.utils.converters import ( xcycwh_to_xyxy, xywh_to_xyxy, + xyxy_to_mask, xyxy_to_xcycarh, xyxy_to_xywh, ) @@ -129,3 +130,174 @@ def test_xyxy_to_xcycarh(xyxy: np.ndarray, expected_result: np.ndarray) -> None: def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None: result = xcycwh_to_xyxy(xcycwh) np.testing.assert_array_equal(result, expected_result) + + +@pytest.mark.parametrize( + "boxes,resolution_wh,expected", + [ + # 0) Empty input + ( + np.array([], dtype=float).reshape(0, 4), + (5, 4), + np.array([], dtype=bool).reshape(0, 4, 5), + ), + # 1) Single pixel box + ( + np.array([[2, 1, 2, 1]], dtype=float), + (5, 4), + np.array( + [ + [ + [False, False, False, False, False], + [False, False, True, False, False], + [False, False, False, False, False], + [False, False, False, False, False], + ] + ], + dtype=bool, + ), + ), + # 2) Horizontal line, inclusive bounds + ( + np.array([[1, 2, 3, 2]], dtype=float), + (5, 4), + np.array( + [ + [ + [False, False, False, False, False], + [False, False, False, False, False], + [False, True, True, True, False], + [False, False, False, False, False], + ] + ], + dtype=bool, + ), + ), + # 3) Vertical line, inclusive bounds + ( + np.array([[3, 0, 3, 2]], dtype=float), + (5, 4), + np.array( + [ + [ + [False, False, False, True, False], + [False, False, False, True, False], + [False, False, False, True, False], + [False, False, False, False, False], + ] + ], + dtype=bool, + ), + ), + # 4) Proper rectangle fill + ( + np.array([[1, 1, 3, 2]], dtype=float), + (5, 4), + np.array( + [ + [ + [False, False, False, False, False], + [False, True, True, True, False], + [False, True, True, True, False], + [False, False, False, False, False], + ] + ], + dtype=bool, + ), + ), + # 5) Negative coordinates clipped to [0, 0] + ( + np.array([[-2, -1, 1, 1]], dtype=float), + (5, 4), + np.array( + [ + [ + [True, True, False, False, False], + [True, True, False, False, False], + [False, False, False, False, False], + [False, False, False, False, False], + ] + ], + dtype=bool, + ), + ), + # 6) Overflow coordinates clipped to width-1 and height-1 + ( + np.array([[3, 2, 10, 10]], dtype=float), + (5, 4), + np.array( + [ + [ + [False, False, False, False, False], + [False, False, False, False, False], + [False, False, False, True, True], + [False, False, False, True, True], + ] + ], + dtype=bool, + ), + ), + # 7) Invalid box where max < min after ints, mask stays empty + ( + np.array([[3, 2, 1, 4]], dtype=float), + (5, 4), + np.array( + [ + [ + [False, False, False, False, False], + [False, False, False, False, False], + [False, False, False, False, False], + [False, False, False, False, False], + ] + ], + dtype=bool, + ), + ), + # 8) Fractional coordinates are floored by int conversion + # (0.2,0.2)-(2.8,1.9) -> (0,0)-(2,1) + ( + np.array([[0.2, 0.2, 2.8, 1.9]], dtype=float), + (5, 4), + np.array( + [ + [ + [True, True, True, False, False], + [True, True, True, False, False], + [False, False, False, False, False], + [False, False, False, False, False], + ] + ], + dtype=bool, + ), + ), + # 9) Multiple boxes, separate masks + ( + np.array([[0, 0, 1, 0], [2, 1, 4, 3]], dtype=float), + (5, 4), + np.array( + [ + # Box 0: row 0, cols 0..1 + [ + [True, True, False, False, False], + [False, False, False, False, False], + [False, False, False, False, False], + [False, False, False, False, False], + ], + # Box 1: rows 1..3, cols 2..4 + [ + [False, False, False, False, False], + [False, False, True, True, True], + [False, False, True, True, True], + [False, False, True, True, True], + ], + ], + dtype=bool, + ), + ), + ], +) +def test_xyxy_to_mask(boxes: np.ndarray, resolution_wh, expected: np.ndarray) -> None: + result = xyxy_to_mask(boxes, resolution_wh) + assert result.dtype == np.bool_ + assert result.shape == expected.shape + np.testing.assert_array_equal(result, expected)