From 70e8b30a57f7fbf0286b67f03b8800fd27a11bd8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 20 Jun 2024 09:51:14 +0000 Subject: [PATCH] Deployed b2b251a4 to develop with MkDocs 1.6.0 and mike 2.1.1 --- develop/detection/annotators/index.html | 5444 ++++++++++++----------- develop/search/search_index.json | 2 +- develop/sitemap.xml | 88 +- develop/sitemap.xml.gz | Bin 646 -> 646 bytes 4 files changed, 2809 insertions(+), 2725 deletions(-) diff --git a/develop/detection/annotators/index.html b/develop/detection/annotators/index.html index c4d77993b..0080ea77f 100644 --- a/develop/detection/annotators/index.html +++ b/develop/detection/annotators/index.html @@ -4378,21 +4378,7 @@

RoundBoxAnnotator Source code in supervision/annotators/core.py -
1697
-1698
-1699
-1700
-1701
-1702
-1703
-1704
-1705
-1706
-1707
-1708
-1709
-1710
-1711
+                
1711
 1712
 1713
 1714
@@ -4507,136 +4493,150 @@ 

RoundBoxAnnotator1823 1824 1825 -1826

class RoundBoxAnnotator(BaseAnnotator):
-    """
-    A class for drawing bounding boxes with round edges on an image
-    using provided detections.
-    """
-
-    def __init__(
-        self,
-        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-        thickness: int = 2,
-        color_lookup: ColorLookup = ColorLookup.CLASS,
-        roundness: float = 0.6,
-    ):
-        """
-        Args:
-            color (Union[Color, ColorPalette]): The color or color palette to use for
-                annotating detections.
-            thickness (int): Thickness of the bounding box lines.
-            color_lookup (str): Strategy for mapping colors to annotations.
-                Options are `INDEX`, `CLASS`, `TRACK`.
-            roundness (float): Percent of roundness for edges of bounding box.
-                Value must be float 0 < roundness <= 1.0
-                By default roundness percent is calculated based on smaller side
-                length (width or height).
-        """
-        self.color: Union[Color, ColorPalette] = color
-        self.thickness: int = thickness
-        self.color_lookup: ColorLookup = color_lookup
-        if not 0 < roundness <= 1.0:
-            raise ValueError("roundness attribute must be float between (0, 1.0]")
-        self.roundness: float = roundness
-
-    @convert_for_annotation_method
-    def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
-        custom_color_lookup: Optional[np.ndarray] = None,
-    ) -> ImageType:
-        """
-        Annotates the given scene with bounding boxes with rounded edges
-        based on the provided detections.
-
-        Args:
-            scene (ImageType): The image where rounded bounding boxes will be drawn.
-                `ImageType` is a flexible type, accepting either `numpy.ndarray`
-                or `PIL.Image.Image`.
-            detections (Detections): Object detections to annotate.
-            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-                Allows to override the default color mapping strategy.
-
-        Returns:
-            The annotated image, matching the type of `scene` (`numpy.ndarray`
-                or `PIL.Image.Image`)
-
-        Example:
-            ```python
-            import supervision as sv
-
-            image = ...
-            detections = sv.Detections(...)
-
-            round_box_annotator = sv.RoundBoxAnnotator()
-            annotated_frame = round_box_annotator.annotate(
-                scene=image.copy(),
-                detections=detections
-            )
-            ```
+1826
+1827
+1828
+1829
+1830
+1831
+1832
+1833
+1834
+1835
+1836
+1837
+1838
+1839
+1840
class RoundBoxAnnotator(BaseAnnotator):
+    """
+    A class for drawing bounding boxes with round edges on an image
+    using provided detections.
+    """
+
+    def __init__(
+        self,
+        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+        thickness: int = 2,
+        color_lookup: ColorLookup = ColorLookup.CLASS,
+        roundness: float = 0.6,
+    ):
+        """
+        Args:
+            color (Union[Color, ColorPalette]): The color or color palette to use for
+                annotating detections.
+            thickness (int): Thickness of the bounding box lines.
+            color_lookup (str): Strategy for mapping colors to annotations.
+                Options are `INDEX`, `CLASS`, `TRACK`.
+            roundness (float): Percent of roundness for edges of bounding box.
+                Value must be float 0 < roundness <= 1.0
+                By default roundness percent is calculated based on smaller side
+                length (width or height).
+        """
+        self.color: Union[Color, ColorPalette] = color
+        self.thickness: int = thickness
+        self.color_lookup: ColorLookup = color_lookup
+        if not 0 < roundness <= 1.0:
+            raise ValueError("roundness attribute must be float between (0, 1.0]")
+        self.roundness: float = roundness
+
+    @convert_for_annotation_method
+    def annotate(
+        self,
+        scene: ImageType,
+        detections: Detections,
+        custom_color_lookup: Optional[np.ndarray] = None,
+    ) -> ImageType:
+        """
+        Annotates the given scene with bounding boxes with rounded edges
+        based on the provided detections.
+
+        Args:
+            scene (ImageType): The image where rounded bounding boxes will be drawn.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): Object detections to annotate.
+            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+                Allows to override the default color mapping strategy.
+
+        Returns:
+            The annotated image, matching the type of `scene` (`numpy.ndarray`
+                or `PIL.Image.Image`)
 
-        ![round-box-annotator-example](https://media.roboflow.com/
-        supervision-annotator-examples/round-box-annotator-example-purple.png)
-        """
+        Example:
+            ```python
+            import supervision as sv
 
-        for detection_idx in range(len(detections)):
-            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)
-            color = resolve_color(
-                color=self.color,
-                detections=detections,
-                detection_idx=detection_idx,
-                color_lookup=self.color_lookup
-                if custom_color_lookup is None
-                else custom_color_lookup,
-            )
-
-            radius = (
-                int((x2 - x1) // 2 * self.roundness)
-                if abs(x1 - x2) < abs(y1 - y2)
-                else int((y2 - y1) // 2 * self.roundness)
-            )
-
-            circle_coordinates = [
-                ((x1 + radius), (y1 + radius)),
-                ((x2 - radius), (y1 + radius)),
-                ((x2 - radius), (y2 - radius)),
-                ((x1 + radius), (y2 - radius)),
-            ]
-
-            line_coordinates = [
-                ((x1 + radius, y1), (x2 - radius, y1)),
-                ((x2, y1 + radius), (x2, y2 - radius)),
-                ((x1 + radius, y2), (x2 - radius, y2)),
-                ((x1, y1 + radius), (x1, y2 - radius)),
-            ]
+            image = ...
+            detections = sv.Detections(...)
+
+            round_box_annotator = sv.RoundBoxAnnotator()
+            annotated_frame = round_box_annotator.annotate(
+                scene=image.copy(),
+                detections=detections
+            )
+            ```
+
+        ![round-box-annotator-example](https://media.roboflow.com/
+        supervision-annotator-examples/round-box-annotator-example-purple.png)
+        """
+
+        for detection_idx in range(len(detections)):
+            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)
+            color = resolve_color(
+                color=self.color,
+                detections=detections,
+                detection_idx=detection_idx,
+                color_lookup=self.color_lookup
+                if custom_color_lookup is None
+                else custom_color_lookup,
+            )
+
+            radius = (
+                int((x2 - x1) // 2 * self.roundness)
+                if abs(x1 - x2) < abs(y1 - y2)
+                else int((y2 - y1) // 2 * self.roundness)
+            )
 
-            start_angles = (180, 270, 0, 90)
-            end_angles = (270, 360, 90, 180)
-
-            for center_coordinates, line, start_angle, end_angle in zip(
-                circle_coordinates, line_coordinates, start_angles, end_angles
-            ):
-                cv2.ellipse(
-                    img=scene,
-                    center=center_coordinates,
-                    axes=(radius, radius),
-                    angle=0,
-                    startAngle=start_angle,
-                    endAngle=end_angle,
-                    color=color.as_bgr(),
-                    thickness=self.thickness,
-                )
+            circle_coordinates = [
+                ((x1 + radius), (y1 + radius)),
+                ((x2 - radius), (y1 + radius)),
+                ((x2 - radius), (y2 - radius)),
+                ((x1 + radius), (y2 - radius)),
+            ]
+
+            line_coordinates = [
+                ((x1 + radius, y1), (x2 - radius, y1)),
+                ((x2, y1 + radius), (x2, y2 - radius)),
+                ((x1 + radius, y2), (x2 - radius, y2)),
+                ((x1, y1 + radius), (x1, y2 - radius)),
+            ]
+
+            start_angles = (180, 270, 0, 90)
+            end_angles = (270, 360, 90, 180)
 
-                cv2.line(
-                    img=scene,
-                    pt1=line[0],
-                    pt2=line[1],
-                    color=color.as_bgr(),
-                    thickness=self.thickness,
-                )
-
-        return scene
+            for center_coordinates, line, start_angle, end_angle in zip(
+                circle_coordinates, line_coordinates, start_angles, end_angles
+            ):
+                cv2.ellipse(
+                    img=scene,
+                    center=center_coordinates,
+                    axes=(radius, radius),
+                    angle=0,
+                    startAngle=start_angle,
+                    endAngle=end_angle,
+                    color=color.as_bgr(),
+                    thickness=self.thickness,
+                )
+
+                cv2.line(
+                    img=scene,
+                    pt1=line[0],
+                    pt2=line[1],
+                    color=color.as_bgr(),
+                    thickness=self.thickness,
+                )
+
+        return scene
 
@@ -4742,21 +4742,7 @@

Source code in supervision/annotators/core.py -
+ + + + + +
1703
-1704
-1705
-1706
-1707
-1708
-1709
-1710
-1711
-1712
-1713
-1714
-1715
-1716
-1717
+              
1717
 1718
 1719
 1720
@@ -4766,31 +4752,45 @@ 

1724 1725 1726 -1727

def __init__(
-    self,
-    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-    thickness: int = 2,
-    color_lookup: ColorLookup = ColorLookup.CLASS,
-    roundness: float = 0.6,
-):
-    """
-    Args:
-        color (Union[Color, ColorPalette]): The color or color palette to use for
-            annotating detections.
-        thickness (int): Thickness of the bounding box lines.
-        color_lookup (str): Strategy for mapping colors to annotations.
-            Options are `INDEX`, `CLASS`, `TRACK`.
-        roundness (float): Percent of roundness for edges of bounding box.
-            Value must be float 0 < roundness <= 1.0
-            By default roundness percent is calculated based on smaller side
-            length (width or height).
-    """
-    self.color: Union[Color, ColorPalette] = color
-    self.thickness: int = thickness
-    self.color_lookup: ColorLookup = color_lookup
-    if not 0 < roundness <= 1.0:
-        raise ValueError("roundness attribute must be float between (0, 1.0]")
-    self.roundness: float = roundness
+1727
+1728
+1729
+1730
+1731
+1732
+1733
+1734
+1735
+1736
+1737
+1738
+1739
+1740
+1741
def __init__(
+    self,
+    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+    thickness: int = 2,
+    color_lookup: ColorLookup = ColorLookup.CLASS,
+    roundness: float = 0.6,
+):
+    """
+    Args:
+        color (Union[Color, ColorPalette]): The color or color palette to use for
+            annotating detections.
+        thickness (int): Thickness of the bounding box lines.
+        color_lookup (str): Strategy for mapping colors to annotations.
+            Options are `INDEX`, `CLASS`, `TRACK`.
+        roundness (float): Percent of roundness for edges of bounding box.
+            Value must be float 0 < roundness <= 1.0
+            By default roundness percent is calculated based on smaller side
+            length (width or height).
+    """
+    self.color: Union[Color, ColorPalette] = color
+    self.thickness: int = thickness
+    self.color_lookup: ColorLookup = color_lookup
+    if not 0 < roundness <= 1.0:
+        raise ValueError("roundness attribute must be float between (0, 1.0]")
+    self.roundness: float = roundness
 
@@ -4914,21 +4914,7 @@

Source code in supervision/annotators/core.py -
1729
-1730
-1731
-1732
-1733
-1734
-1735
-1736
-1737
-1738
-1739
-1740
-1741
-1742
-1743
+              
1743
 1744
 1745
 1746
@@ -5011,104 +4997,118 @@ 

1823 1824 1825 -1826

@convert_for_annotation_method
-def annotate(
-    self,
-    scene: ImageType,
-    detections: Detections,
-    custom_color_lookup: Optional[np.ndarray] = None,
-) -> ImageType:
-    """
-    Annotates the given scene with bounding boxes with rounded edges
-    based on the provided detections.
-
-    Args:
-        scene (ImageType): The image where rounded bounding boxes will be drawn.
-            `ImageType` is a flexible type, accepting either `numpy.ndarray`
-            or `PIL.Image.Image`.
-        detections (Detections): Object detections to annotate.
-        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-            Allows to override the default color mapping strategy.
-
-    Returns:
-        The annotated image, matching the type of `scene` (`numpy.ndarray`
-            or `PIL.Image.Image`)
-
-    Example:
-        ```python
-        import supervision as sv
-
-        image = ...
-        detections = sv.Detections(...)
-
-        round_box_annotator = sv.RoundBoxAnnotator()
-        annotated_frame = round_box_annotator.annotate(
-            scene=image.copy(),
-            detections=detections
-        )
-        ```
+1826
+1827
+1828
+1829
+1830
+1831
+1832
+1833
+1834
+1835
+1836
+1837
+1838
+1839
+1840
@convert_for_annotation_method
+def annotate(
+    self,
+    scene: ImageType,
+    detections: Detections,
+    custom_color_lookup: Optional[np.ndarray] = None,
+) -> ImageType:
+    """
+    Annotates the given scene with bounding boxes with rounded edges
+    based on the provided detections.
+
+    Args:
+        scene (ImageType): The image where rounded bounding boxes will be drawn.
+            `ImageType` is a flexible type, accepting either `numpy.ndarray`
+            or `PIL.Image.Image`.
+        detections (Detections): Object detections to annotate.
+        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+            Allows to override the default color mapping strategy.
+
+    Returns:
+        The annotated image, matching the type of `scene` (`numpy.ndarray`
+            or `PIL.Image.Image`)
 
-    ![round-box-annotator-example](https://media.roboflow.com/
-    supervision-annotator-examples/round-box-annotator-example-purple.png)
-    """
+    Example:
+        ```python
+        import supervision as sv
 
-    for detection_idx in range(len(detections)):
-        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)
-        color = resolve_color(
-            color=self.color,
-            detections=detections,
-            detection_idx=detection_idx,
-            color_lookup=self.color_lookup
-            if custom_color_lookup is None
-            else custom_color_lookup,
-        )
-
-        radius = (
-            int((x2 - x1) // 2 * self.roundness)
-            if abs(x1 - x2) < abs(y1 - y2)
-            else int((y2 - y1) // 2 * self.roundness)
-        )
-
-        circle_coordinates = [
-            ((x1 + radius), (y1 + radius)),
-            ((x2 - radius), (y1 + radius)),
-            ((x2 - radius), (y2 - radius)),
-            ((x1 + radius), (y2 - radius)),
-        ]
-
-        line_coordinates = [
-            ((x1 + radius, y1), (x2 - radius, y1)),
-            ((x2, y1 + radius), (x2, y2 - radius)),
-            ((x1 + radius, y2), (x2 - radius, y2)),
-            ((x1, y1 + radius), (x1, y2 - radius)),
-        ]
+        image = ...
+        detections = sv.Detections(...)
+
+        round_box_annotator = sv.RoundBoxAnnotator()
+        annotated_frame = round_box_annotator.annotate(
+            scene=image.copy(),
+            detections=detections
+        )
+        ```
+
+    ![round-box-annotator-example](https://media.roboflow.com/
+    supervision-annotator-examples/round-box-annotator-example-purple.png)
+    """
+
+    for detection_idx in range(len(detections)):
+        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)
+        color = resolve_color(
+            color=self.color,
+            detections=detections,
+            detection_idx=detection_idx,
+            color_lookup=self.color_lookup
+            if custom_color_lookup is None
+            else custom_color_lookup,
+        )
+
+        radius = (
+            int((x2 - x1) // 2 * self.roundness)
+            if abs(x1 - x2) < abs(y1 - y2)
+            else int((y2 - y1) // 2 * self.roundness)
+        )
 
-        start_angles = (180, 270, 0, 90)
-        end_angles = (270, 360, 90, 180)
-
-        for center_coordinates, line, start_angle, end_angle in zip(
-            circle_coordinates, line_coordinates, start_angles, end_angles
-        ):
-            cv2.ellipse(
-                img=scene,
-                center=center_coordinates,
-                axes=(radius, radius),
-                angle=0,
-                startAngle=start_angle,
-                endAngle=end_angle,
-                color=color.as_bgr(),
-                thickness=self.thickness,
-            )
+        circle_coordinates = [
+            ((x1 + radius), (y1 + radius)),
+            ((x2 - radius), (y1 + radius)),
+            ((x2 - radius), (y2 - radius)),
+            ((x1 + radius), (y2 - radius)),
+        ]
+
+        line_coordinates = [
+            ((x1 + radius, y1), (x2 - radius, y1)),
+            ((x2, y1 + radius), (x2, y2 - radius)),
+            ((x1 + radius, y2), (x2 - radius, y2)),
+            ((x1, y1 + radius), (x1, y2 - radius)),
+        ]
+
+        start_angles = (180, 270, 0, 90)
+        end_angles = (270, 360, 90, 180)
 
-            cv2.line(
-                img=scene,
-                pt1=line[0],
-                pt2=line[1],
-                color=color.as_bgr(),
-                thickness=self.thickness,
-            )
-
-    return scene
+        for center_coordinates, line, start_angle, end_angle in zip(
+            circle_coordinates, line_coordinates, start_angles, end_angles
+        ):
+            cv2.ellipse(
+                img=scene,
+                center=center_coordinates,
+                axes=(radius, radius),
+                angle=0,
+                startAngle=start_angle,
+                endAngle=end_angle,
+                color=color.as_bgr(),
+                thickness=self.thickness,
+            )
+
+            cv2.line(
+                img=scene,
+                pt1=line[0],
+                pt2=line[1],
+                color=color.as_bgr(),
+                thickness=self.thickness,
+            )
+
+    return scene
 
@@ -7501,7 +7501,15 @@

DotAnnotator

899 900 901 -902
class DotAnnotator(BaseAnnotator):
+902
+903
+904
+905
+906
+907
+908
+909
+910
class DotAnnotator(BaseAnnotator):
     """
     A class for drawing dots on an image at specific coordinates based on provided
     detections.
@@ -7513,73 +7521,81 @@ 

DotAnnotator

radius: int = 4, position: Position = Position.CENTER, color_lookup: ColorLookup = ColorLookup.CLASS, - ): - """ - Args: - color (Union[Color, ColorPalette]): The color or color palette to use for - annotating detections. - radius (int): Radius of the drawn dots. - position (Position): The anchor position for placing the dot. - color_lookup (ColorLookup): Strategy for mapping colors to annotations. - Options are `INDEX`, `CLASS`, `TRACK`. - """ - self.color: Union[Color, ColorPalette] = color - self.radius: int = radius - self.position: Position = position - self.color_lookup: ColorLookup = color_lookup - - @convert_for_annotation_method - def annotate( - self, - scene: ImageType, - detections: Detections, - custom_color_lookup: Optional[np.ndarray] = None, - ) -> ImageType: - """ - Annotates the given scene with dots based on the provided detections. - - Args: - scene (ImageType): The image where dots will be drawn. - `ImageType` is a flexible type, accepting either `numpy.ndarray` - or `PIL.Image.Image`. - detections (Detections): Object detections to annotate. - custom_color_lookup (Optional[np.ndarray]): Custom color lookup array. - Allows to override the default color mapping strategy. - - Returns: - The annotated image, matching the type of `scene` (`numpy.ndarray` - or `PIL.Image.Image`) - - Example: - ```python - import supervision as sv - - image = ... - detections = sv.Detections(...) + outline_thickness: int = 0, + ): + """ + Args: + color (Union[Color, ColorPalette]): The color or color palette to use for + annotating detections. + radius (int): Radius of the drawn dots. + position (Position): The anchor position for placing the dot. + color_lookup (ColorLookup): Strategy for mapping colors to annotations. + Options are `INDEX`, `CLASS`, `TRACK`. + outline_thickness (int): Thickness of the outline of the dot. + """ + self.color: Union[Color, ColorPalette] = color + self.radius: int = radius + self.position: Position = position + self.color_lookup: ColorLookup = color_lookup + self.outline_thickness = outline_thickness + + @convert_for_annotation_method + def annotate( + self, + scene: ImageType, + detections: Detections, + custom_color_lookup: Optional[np.ndarray] = None, + ) -> ImageType: + """ + Annotates the given scene with dots based on the provided detections. + + Args: + scene (ImageType): The image where dots will be drawn. + `ImageType` is a flexible type, accepting either `numpy.ndarray` + or `PIL.Image.Image`. + detections (Detections): Object detections to annotate. + custom_color_lookup (Optional[np.ndarray]): Custom color lookup array. + Allows to override the default color mapping strategy. + + Returns: + The annotated image, matching the type of `scene` (`numpy.ndarray` + or `PIL.Image.Image`) + + Example: + ```python + import supervision as sv - dot_annotator = sv.DotAnnotator() - annotated_frame = dot_annotator.annotate( - scene=image.copy(), - detections=detections - ) - ``` - - ![dot-annotator-example](https://media.roboflow.com/ - supervision-annotator-examples/dot-annotator-example-purple.png) - """ - xy = detections.get_anchors_coordinates(anchor=self.position) - for detection_idx in range(len(detections)): - color = resolve_color( - color=self.color, - detections=detections, - detection_idx=detection_idx, - color_lookup=self.color_lookup - if custom_color_lookup is None - else custom_color_lookup, - ) - center = (int(xy[detection_idx, 0]), int(xy[detection_idx, 1])) - cv2.circle(scene, center, self.radius, color.as_bgr(), -1) - return scene + image = ... + detections = sv.Detections(...) + + dot_annotator = sv.DotAnnotator() + annotated_frame = dot_annotator.annotate( + scene=image.copy(), + detections=detections + ) + ``` + + ![dot-annotator-example](https://media.roboflow.com/ + supervision-annotator-examples/dot-annotator-example-purple.png) + """ + xy = detections.get_anchors_coordinates(anchor=self.position) + for detection_idx in range(len(detections)): + color = resolve_color( + color=self.color, + detections=detections, + detection_idx=detection_idx, + color_lookup=self.color_lookup + if custom_color_lookup is None + else custom_color_lookup, + ) + center = (int(xy[detection_idx, 0]), int(xy[detection_idx, 1])) + + cv2.circle(scene, center, self.radius, color.as_bgr(), -1) + if self.outline_thickness: + cv2.circle( + scene, center, self.radius, (0, 0, 0), self.outline_thickness + ) + return scene
@@ -7599,7 +7615,7 @@

Functions - __init__(color=ColorPalette.DEFAULT, radius=4, position=Position.CENTER, color_lookup=ColorLookup.CLASS) + __init__(color=ColorPalette.DEFAULT, radius=4, position=Position.CENTER, color_lookup=ColorLookup.CLASS, outline_thickness=0)

@@ -7677,6 +7693,20 @@

CLASS

outline_thickness + int + +
+

Thickness of the outline of the dot.

+
+
+ 0 +
@@ -7701,26 +7731,32 @@

846 847 848 -849

def __init__(
+849
+850
+851
+852
def __init__(
     self,
     color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
     radius: int = 4,
     position: Position = Position.CENTER,
     color_lookup: ColorLookup = ColorLookup.CLASS,
-):
-    """
-    Args:
-        color (Union[Color, ColorPalette]): The color or color palette to use for
-            annotating detections.
-        radius (int): Radius of the drawn dots.
-        position (Position): The anchor position for placing the dot.
-        color_lookup (ColorLookup): Strategy for mapping colors to annotations.
-            Options are `INDEX`, `CLASS`, `TRACK`.
-    """
-    self.color: Union[Color, ColorPalette] = color
-    self.radius: int = radius
-    self.position: Position = position
-    self.color_lookup: ColorLookup = color_lookup
+    outline_thickness: int = 0,
+):
+    """
+    Args:
+        color (Union[Color, ColorPalette]): The color or color palette to use for
+            annotating detections.
+        radius (int): Radius of the drawn dots.
+        position (Position): The anchor position for placing the dot.
+        color_lookup (ColorLookup): Strategy for mapping colors to annotations.
+            Options are `INDEX`, `CLASS`, `TRACK`.
+        outline_thickness (int): Thickness of the outline of the dot.
+    """
+    self.color: Union[Color, ColorPalette] = color
+    self.radius: int = radius
+    self.position: Position = position
+    self.color_lookup: ColorLookup = color_lookup
+    self.outline_thickness = outline_thickness
 
@@ -7843,10 +7879,7 @@

Source code in supervision/annotators/core.py -
851
-852
-853
-854
+              
854
 855
 856
 857
@@ -7894,58 +7927,71 @@ 

899 900 901 -902

@convert_for_annotation_method
-def annotate(
-    self,
-    scene: ImageType,
-    detections: Detections,
-    custom_color_lookup: Optional[np.ndarray] = None,
-) -> ImageType:
-    """
-    Annotates the given scene with dots based on the provided detections.
-
-    Args:
-        scene (ImageType): The image where dots will be drawn.
-            `ImageType` is a flexible type, accepting either `numpy.ndarray`
-            or `PIL.Image.Image`.
-        detections (Detections): Object detections to annotate.
-        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-            Allows to override the default color mapping strategy.
-
-    Returns:
-        The annotated image, matching the type of `scene` (`numpy.ndarray`
-            or `PIL.Image.Image`)
-
-    Example:
-        ```python
-        import supervision as sv
-
-        image = ...
-        detections = sv.Detections(...)
+902
+903
+904
+905
+906
+907
+908
+909
+910
@convert_for_annotation_method
+def annotate(
+    self,
+    scene: ImageType,
+    detections: Detections,
+    custom_color_lookup: Optional[np.ndarray] = None,
+) -> ImageType:
+    """
+    Annotates the given scene with dots based on the provided detections.
+
+    Args:
+        scene (ImageType): The image where dots will be drawn.
+            `ImageType` is a flexible type, accepting either `numpy.ndarray`
+            or `PIL.Image.Image`.
+        detections (Detections): Object detections to annotate.
+        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+            Allows to override the default color mapping strategy.
+
+    Returns:
+        The annotated image, matching the type of `scene` (`numpy.ndarray`
+            or `PIL.Image.Image`)
+
+    Example:
+        ```python
+        import supervision as sv
 
-        dot_annotator = sv.DotAnnotator()
-        annotated_frame = dot_annotator.annotate(
-            scene=image.copy(),
-            detections=detections
-        )
-        ```
-
-    ![dot-annotator-example](https://media.roboflow.com/
-    supervision-annotator-examples/dot-annotator-example-purple.png)
-    """
-    xy = detections.get_anchors_coordinates(anchor=self.position)
-    for detection_idx in range(len(detections)):
-        color = resolve_color(
-            color=self.color,
-            detections=detections,
-            detection_idx=detection_idx,
-            color_lookup=self.color_lookup
-            if custom_color_lookup is None
-            else custom_color_lookup,
-        )
-        center = (int(xy[detection_idx, 0]), int(xy[detection_idx, 1]))
-        cv2.circle(scene, center, self.radius, color.as_bgr(), -1)
-    return scene
+        image = ...
+        detections = sv.Detections(...)
+
+        dot_annotator = sv.DotAnnotator()
+        annotated_frame = dot_annotator.annotate(
+            scene=image.copy(),
+            detections=detections
+        )
+        ```
+
+    ![dot-annotator-example](https://media.roboflow.com/
+    supervision-annotator-examples/dot-annotator-example-purple.png)
+    """
+    xy = detections.get_anchors_coordinates(anchor=self.position)
+    for detection_idx in range(len(detections)):
+        color = resolve_color(
+            color=self.color,
+            detections=detections,
+            detection_idx=detection_idx,
+            color_lookup=self.color_lookup
+            if custom_color_lookup is None
+            else custom_color_lookup,
+        )
+        center = (int(xy[detection_idx, 0]), int(xy[detection_idx, 1]))
+
+        cv2.circle(scene, center, self.radius, color.as_bgr(), -1)
+        if self.outline_thickness:
+            cv2.circle(
+                scene, center, self.radius, (0, 0, 0), self.outline_thickness
+            )
+    return scene
 
@@ -7979,15 +8025,7 @@

TriangleAnnotator Source code in supervision/annotators/core.py -
+ + + + + +
1603
-1604
-1605
-1606
-1607
-1608
-1609
-1610
-1611
+                
1611
 1612
 1613
 1614
@@ -8070,98 +8108,118 @@ 

TriangleAnnotator1691 1692 1693 -1694

class TriangleAnnotator(BaseAnnotator):
-    """
-    A class for drawing triangle markers on an image at specific coordinates based on
-    provided detections.
-    """
-
-    def __init__(
-        self,
-        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-        base: int = 10,
-        height: int = 10,
-        position: Position = Position.TOP_CENTER,
-        color_lookup: ColorLookup = ColorLookup.CLASS,
-    ):
-        """
-        Args:
-            color (Union[Color, ColorPalette]): The color or color palette to use for
-                annotating detections.
-            base (int): The base width of the triangle.
-            height (int): The height of the triangle.
-            position (Position): The anchor position for placing the triangle.
-            color_lookup (ColorLookup): Strategy for mapping colors to annotations.
-                Options are `INDEX`, `CLASS`, `TRACK`.
-        """
-        self.color: Union[Color, ColorPalette] = color
-        self.base: int = base
-        self.height: int = height
-        self.position: Position = position
-        self.color_lookup: ColorLookup = color_lookup
-
-    @convert_for_annotation_method
-    def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
-        custom_color_lookup: Optional[np.ndarray] = None,
-    ) -> ImageType:
-        """
-        Annotates the given scene with triangles based on the provided detections.
-
-        Args:
-            scene (ImageType): The image where triangles will be drawn.
-                `ImageType` is a flexible type, accepting either `numpy.ndarray`
-                or `PIL.Image.Image`.
-            detections (Detections): Object detections to annotate.
-            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-                Allows to override the default color mapping strategy.
-
-        Returns:
-            The annotated image, matching the type of `scene` (`numpy.ndarray`
-                or `PIL.Image.Image`)
-
-        Example:
-            ```python
-            import supervision as sv
-
-            image = ...
-            detections = sv.Detections(...)
+1694
+1695
+1696
+1697
+1698
+1699
+1700
+1701
+1702
+1703
+1704
+1705
+1706
+1707
+1708
class TriangleAnnotator(BaseAnnotator):
+    """
+    A class for drawing triangle markers on an image at specific coordinates based on
+    provided detections.
+    """
+
+    def __init__(
+        self,
+        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+        base: int = 10,
+        height: int = 10,
+        position: Position = Position.TOP_CENTER,
+        color_lookup: ColorLookup = ColorLookup.CLASS,
+        outline_thickness: int = 0,
+    ):
+        """
+        Args:
+            color (Union[Color, ColorPalette]): The color or color palette to use for
+                annotating detections.
+            base (int): The base width of the triangle.
+            height (int): The height of the triangle.
+            position (Position): The anchor position for placing the triangle.
+            color_lookup (ColorLookup): Strategy for mapping colors to annotations.
+                Options are `INDEX`, `CLASS`, `TRACK`.
+            outline_thickness (int): Thickness of the outline of the triangle.
+        """
+        self.color: Union[Color, ColorPalette] = color
+        self.base: int = base
+        self.height: int = height
+        self.position: Position = position
+        self.color_lookup: ColorLookup = color_lookup
+        self.outline_thickness: int = outline_thickness
+
+    @convert_for_annotation_method
+    def annotate(
+        self,
+        scene: ImageType,
+        detections: Detections,
+        custom_color_lookup: Optional[np.ndarray] = None,
+    ) -> ImageType:
+        """
+        Annotates the given scene with triangles based on the provided detections.
+
+        Args:
+            scene (ImageType): The image where triangles will be drawn.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): Object detections to annotate.
+            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+                Allows to override the default color mapping strategy.
 
-            triangle_annotator = sv.TriangleAnnotator()
-            annotated_frame = triangle_annotator.annotate(
-                scene=image.copy(),
-                detections=detections
-            )
-            ```
-
-        ![triangle-annotator-example](https://media.roboflow.com/
-        supervision-annotator-examples/triangle-annotator-example.png)
-        """
-        xy = detections.get_anchors_coordinates(anchor=self.position)
-        for detection_idx in range(len(detections)):
-            color = resolve_color(
-                color=self.color,
-                detections=detections,
-                detection_idx=detection_idx,
-                color_lookup=self.color_lookup
-                if custom_color_lookup is None
-                else custom_color_lookup,
-            )
-            tip_x, tip_y = int(xy[detection_idx, 0]), int(xy[detection_idx, 1])
-            vertices = np.array(
-                [
-                    [tip_x - self.base // 2, tip_y - self.height],
-                    [tip_x + self.base // 2, tip_y - self.height],
-                    [tip_x, tip_y],
-                ],
-                np.int32,
-            )
-
-            cv2.fillPoly(scene, [vertices], color.as_bgr())
-
-        return scene
+        Returns:
+            The annotated image, matching the type of `scene` (`numpy.ndarray`
+                or `PIL.Image.Image`)
+
+        Example:
+            ```python
+            import supervision as sv
+
+            image = ...
+            detections = sv.Detections(...)
+
+            triangle_annotator = sv.TriangleAnnotator()
+            annotated_frame = triangle_annotator.annotate(
+                scene=image.copy(),
+                detections=detections
+            )
+            ```
+
+        ![triangle-annotator-example](https://media.roboflow.com/
+        supervision-annotator-examples/triangle-annotator-example.png)
+        """
+        xy = detections.get_anchors_coordinates(anchor=self.position)
+        for detection_idx in range(len(detections)):
+            color = resolve_color(
+                color=self.color,
+                detections=detections,
+                detection_idx=detection_idx,
+                color_lookup=self.color_lookup
+                if custom_color_lookup is None
+                else custom_color_lookup,
+            )
+            tip_x, tip_y = int(xy[detection_idx, 0]), int(xy[detection_idx, 1])
+            vertices = np.array(
+                [
+                    [tip_x - self.base // 2, tip_y - self.height],
+                    [tip_x + self.base // 2, tip_y - self.height],
+                    [tip_x, tip_y],
+                ],
+                np.int32,
+            )
+
+            cv2.fillPoly(scene, [vertices], color.as_bgr())
+            if self.outline_thickness:
+                cv2.polylines(
+                    scene, [vertices], True, (0, 0, 0), thickness=self.outline_thickness
+                )
+        return scene
 
@@ -8181,7 +8239,7 @@

Functions - __init__(color=ColorPalette.DEFAULT, base=10, height=10, position=Position.TOP_CENTER, color_lookup=ColorLookup.CLASS) + __init__(color=ColorPalette.DEFAULT, base=10, height=10, position=Position.TOP_CENTER, color_lookup=ColorLookup.CLASS, outline_thickness=0)

@@ -8273,20 +8331,26 @@

CLASS

outline_thickness + int + +
+

Thickness of the outline of the triangle.

+
+
+ 0 +
Source code in supervision/annotators/core.py -
1609
-1610
-1611
-1612
-1613
-1614
-1615
-1616
-1617
+              
1617
 1618
 1619
 1620
@@ -8300,29 +8364,43 @@ 

1628 1629 1630 -1631

def __init__(
-    self,
-    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-    base: int = 10,
-    height: int = 10,
-    position: Position = Position.TOP_CENTER,
-    color_lookup: ColorLookup = ColorLookup.CLASS,
-):
-    """
-    Args:
-        color (Union[Color, ColorPalette]): The color or color palette to use for
-            annotating detections.
-        base (int): The base width of the triangle.
-        height (int): The height of the triangle.
-        position (Position): The anchor position for placing the triangle.
-        color_lookup (ColorLookup): Strategy for mapping colors to annotations.
-            Options are `INDEX`, `CLASS`, `TRACK`.
-    """
-    self.color: Union[Color, ColorPalette] = color
-    self.base: int = base
-    self.height: int = height
-    self.position: Position = position
-    self.color_lookup: ColorLookup = color_lookup
+1631
+1632
+1633
+1634
+1635
+1636
+1637
+1638
+1639
+1640
+1641
+1642
def __init__(
+    self,
+    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+    base: int = 10,
+    height: int = 10,
+    position: Position = Position.TOP_CENTER,
+    color_lookup: ColorLookup = ColorLookup.CLASS,
+    outline_thickness: int = 0,
+):
+    """
+    Args:
+        color (Union[Color, ColorPalette]): The color or color palette to use for
+            annotating detections.
+        base (int): The base width of the triangle.
+        height (int): The height of the triangle.
+        position (Position): The anchor position for placing the triangle.
+        color_lookup (ColorLookup): Strategy for mapping colors to annotations.
+            Options are `INDEX`, `CLASS`, `TRACK`.
+        outline_thickness (int): Thickness of the outline of the triangle.
+    """
+    self.color: Union[Color, ColorPalette] = color
+    self.base: int = base
+    self.height: int = height
+    self.position: Position = position
+    self.color_lookup: ColorLookup = color_lookup
+    self.outline_thickness: int = outline_thickness
 
@@ -8445,18 +8523,7 @@

Source code in supervision/annotators/core.py -
1633
-1634
-1635
-1636
-1637
-1638
-1639
-1640
-1641
-1642
-1643
-1644
+              
1644
 1645
 1646
 1647
@@ -8506,68 +8573,85 @@ 

1691 1692 1693 -1694

@convert_for_annotation_method
-def annotate(
-    self,
-    scene: ImageType,
-    detections: Detections,
-    custom_color_lookup: Optional[np.ndarray] = None,
-) -> ImageType:
-    """
-    Annotates the given scene with triangles based on the provided detections.
-
-    Args:
-        scene (ImageType): The image where triangles will be drawn.
-            `ImageType` is a flexible type, accepting either `numpy.ndarray`
-            or `PIL.Image.Image`.
-        detections (Detections): Object detections to annotate.
-        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-            Allows to override the default color mapping strategy.
-
-    Returns:
-        The annotated image, matching the type of `scene` (`numpy.ndarray`
-            or `PIL.Image.Image`)
-
-    Example:
-        ```python
-        import supervision as sv
-
-        image = ...
-        detections = sv.Detections(...)
+1694
+1695
+1696
+1697
+1698
+1699
+1700
+1701
+1702
+1703
+1704
+1705
+1706
+1707
+1708
@convert_for_annotation_method
+def annotate(
+    self,
+    scene: ImageType,
+    detections: Detections,
+    custom_color_lookup: Optional[np.ndarray] = None,
+) -> ImageType:
+    """
+    Annotates the given scene with triangles based on the provided detections.
+
+    Args:
+        scene (ImageType): The image where triangles will be drawn.
+            `ImageType` is a flexible type, accepting either `numpy.ndarray`
+            or `PIL.Image.Image`.
+        detections (Detections): Object detections to annotate.
+        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+            Allows to override the default color mapping strategy.
 
-        triangle_annotator = sv.TriangleAnnotator()
-        annotated_frame = triangle_annotator.annotate(
-            scene=image.copy(),
-            detections=detections
-        )
-        ```
-
-    ![triangle-annotator-example](https://media.roboflow.com/
-    supervision-annotator-examples/triangle-annotator-example.png)
-    """
-    xy = detections.get_anchors_coordinates(anchor=self.position)
-    for detection_idx in range(len(detections)):
-        color = resolve_color(
-            color=self.color,
-            detections=detections,
-            detection_idx=detection_idx,
-            color_lookup=self.color_lookup
-            if custom_color_lookup is None
-            else custom_color_lookup,
-        )
-        tip_x, tip_y = int(xy[detection_idx, 0]), int(xy[detection_idx, 1])
-        vertices = np.array(
-            [
-                [tip_x - self.base // 2, tip_y - self.height],
-                [tip_x + self.base // 2, tip_y - self.height],
-                [tip_x, tip_y],
-            ],
-            np.int32,
-        )
-
-        cv2.fillPoly(scene, [vertices], color.as_bgr())
-
-    return scene
+    Returns:
+        The annotated image, matching the type of `scene` (`numpy.ndarray`
+            or `PIL.Image.Image`)
+
+    Example:
+        ```python
+        import supervision as sv
+
+        image = ...
+        detections = sv.Detections(...)
+
+        triangle_annotator = sv.TriangleAnnotator()
+        annotated_frame = triangle_annotator.annotate(
+            scene=image.copy(),
+            detections=detections
+        )
+        ```
+
+    ![triangle-annotator-example](https://media.roboflow.com/
+    supervision-annotator-examples/triangle-annotator-example.png)
+    """
+    xy = detections.get_anchors_coordinates(anchor=self.position)
+    for detection_idx in range(len(detections)):
+        color = resolve_color(
+            color=self.color,
+            detections=detections,
+            detection_idx=detection_idx,
+            color_lookup=self.color_lookup
+            if custom_color_lookup is None
+            else custom_color_lookup,
+        )
+        tip_x, tip_y = int(xy[detection_idx, 0]), int(xy[detection_idx, 1])
+        vertices = np.array(
+            [
+                [tip_x - self.base // 2, tip_y - self.height],
+                [tip_x + self.base // 2, tip_y - self.height],
+                [tip_x, tip_y],
+            ],
+            np.int32,
+        )
+
+        cv2.fillPoly(scene, [vertices], color.as_bgr())
+        if self.outline_thickness:
+            cv2.polylines(
+                scene, [vertices], True, (0, 0, 0), thickness=self.outline_thickness
+            )
+    return scene
 
@@ -9853,21 +9937,7 @@

PercentageBarA
Source code in supervision/annotators/core.py -
1829
-1830
-1831
-1832
-1833
-1834
-1835
-1836
-1837
-1838
-1839
-1840
-1841
-1842
-1843
+                
1843
 1844
 1845
 1846
@@ -10028,182 +10098,196 @@ 

PercentageBarA 2001 2002 2003 -2004

class PercentageBarAnnotator(BaseAnnotator):
-    """
-    A class for drawing percentage bars on an image using provided detections.
-    """
-
-    def __init__(
-        self,
-        height: int = 16,
-        width: int = 80,
-        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-        border_color: Color = Color.BLACK,
-        position: Position = Position.TOP_CENTER,
-        color_lookup: ColorLookup = ColorLookup.CLASS,
-        border_thickness: int = None,
-    ):
-        """
-        Args:
-            height (int): The height in pixels of the percentage bar.
-            width (int): The width in pixels of the percentage bar.
-            color (Union[Color, ColorPalette]): The color or color palette to use for
-                annotating detections.
-            border_color (Color): The color of the border lines.
-            position (Position): The anchor position of drawing the percentage bar.
-            color_lookup (str): Strategy for mapping colors to annotations.
-                Options are `INDEX`, `CLASS`, `TRACK`.
-            border_thickness (int): The thickness of the border lines.
-        """
-        self.height: int = height
-        self.width: int = width
-        self.color: Union[Color, ColorPalette] = color
-        self.border_color: Color = border_color
-        self.position: Position = position
-        self.color_lookup: ColorLookup = color_lookup
-
-        if border_thickness is None:
-            self.border_thickness = int(0.15 * self.height)
-
-    @convert_for_annotation_method
-    def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
-        custom_color_lookup: Optional[np.ndarray] = None,
-        custom_values: Optional[np.ndarray] = None,
-    ) -> ImageType:
-        """
-        Annotates the given scene with percentage bars based on the provided
-        detections. The percentage bars visually represent the confidence or custom
-        values associated with each detection.
-
-        Args:
-            scene (ImageType): The image where percentage bars will be drawn.
-                `ImageType` is a flexible type, accepting either `numpy.ndarray`
-                or `PIL.Image.Image`.
-            detections (Detections): Object detections to annotate.
-            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-                Allows to override the default color mapping strategy.
-            custom_values (Optional[np.ndarray]): Custom values array to use instead
-                of the default detection confidences. This array should have the
-                same length as the number of detections and contain a value between
-                0 and 1 (inclusive) for each detection, representing the percentage
-                to be displayed.
-
-        Returns:
-            The annotated image, matching the type of `scene` (`numpy.ndarray`
-                or `PIL.Image.Image`)
-
-        Example:
-            ```python
-            import supervision as sv
-
-            image = ...
-            detections = sv.Detections(...)
-
-            percentage_bar_annotator = sv.PercentageBarAnnotator()
-            annotated_frame = percentage_bar_annotator.annotate(
-                scene=image.copy(),
-                detections=detections
-            )
-            ```
+2004
+2005
+2006
+2007
+2008
+2009
+2010
+2011
+2012
+2013
+2014
+2015
+2016
+2017
+2018
class PercentageBarAnnotator(BaseAnnotator):
+    """
+    A class for drawing percentage bars on an image using provided detections.
+    """
+
+    def __init__(
+        self,
+        height: int = 16,
+        width: int = 80,
+        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+        border_color: Color = Color.BLACK,
+        position: Position = Position.TOP_CENTER,
+        color_lookup: ColorLookup = ColorLookup.CLASS,
+        border_thickness: int = None,
+    ):
+        """
+        Args:
+            height (int): The height in pixels of the percentage bar.
+            width (int): The width in pixels of the percentage bar.
+            color (Union[Color, ColorPalette]): The color or color palette to use for
+                annotating detections.
+            border_color (Color): The color of the border lines.
+            position (Position): The anchor position of drawing the percentage bar.
+            color_lookup (str): Strategy for mapping colors to annotations.
+                Options are `INDEX`, `CLASS`, `TRACK`.
+            border_thickness (int): The thickness of the border lines.
+        """
+        self.height: int = height
+        self.width: int = width
+        self.color: Union[Color, ColorPalette] = color
+        self.border_color: Color = border_color
+        self.position: Position = position
+        self.color_lookup: ColorLookup = color_lookup
+
+        if border_thickness is None:
+            self.border_thickness = int(0.15 * self.height)
+
+    @convert_for_annotation_method
+    def annotate(
+        self,
+        scene: ImageType,
+        detections: Detections,
+        custom_color_lookup: Optional[np.ndarray] = None,
+        custom_values: Optional[np.ndarray] = None,
+    ) -> ImageType:
+        """
+        Annotates the given scene with percentage bars based on the provided
+        detections. The percentage bars visually represent the confidence or custom
+        values associated with each detection.
+
+        Args:
+            scene (ImageType): The image where percentage bars will be drawn.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): Object detections to annotate.
+            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+                Allows to override the default color mapping strategy.
+            custom_values (Optional[np.ndarray]): Custom values array to use instead
+                of the default detection confidences. This array should have the
+                same length as the number of detections and contain a value between
+                0 and 1 (inclusive) for each detection, representing the percentage
+                to be displayed.
+
+        Returns:
+            The annotated image, matching the type of `scene` (`numpy.ndarray`
+                or `PIL.Image.Image`)
 
-        ![percentage-bar-example](https://media.roboflow.com/
-        supervision-annotator-examples/percentage-bar-annotator-example-purple.png)
-        """
-        self.validate_custom_values(
-            custom_values=custom_values, detections_count=len(detections)
-        )
-        anchors = detections.get_anchors_coordinates(anchor=self.position)
-        for detection_idx in range(len(detections)):
-            anchor = anchors[detection_idx]
-            border_coordinates = self.calculate_border_coordinates(
-                anchor_xy=(int(anchor[0]), int(anchor[1])),
-                border_wh=(self.width, self.height),
-                position=self.position,
-            )
-            border_width = border_coordinates[1][0] - border_coordinates[0][0]
-
-            value = (
-                custom_values[detection_idx]
-                if custom_values is not None
-                else detections.confidence[detection_idx]
-            )
-
-            color = resolve_color(
-                color=self.color,
-                detections=detections,
-                detection_idx=detection_idx,
-                color_lookup=self.color_lookup
-                if custom_color_lookup is None
-                else custom_color_lookup,
-            )
-            cv2.rectangle(
-                img=scene,
-                pt1=border_coordinates[0],
-                pt2=(
-                    border_coordinates[0][0] + int(border_width * value),
-                    border_coordinates[1][1],
-                ),
-                color=color.as_bgr(),
-                thickness=-1,
-            )
-            cv2.rectangle(
-                img=scene,
-                pt1=border_coordinates[0],
-                pt2=border_coordinates[1],
-                color=self.border_color.as_bgr(),
-                thickness=self.border_thickness,
-            )
-        return scene
-
-    @staticmethod
-    def calculate_border_coordinates(
-        anchor_xy: Tuple[int, int], border_wh: Tuple[int, int], position: Position
-    ) -> Tuple[Tuple[int, int], Tuple[int, int]]:
-        cx, cy = anchor_xy
-        width, height = border_wh
-
-        if position == Position.TOP_LEFT:
-            return (cx - width, cy - height), (cx, cy)
-        elif position == Position.TOP_CENTER:
-            return (cx - width // 2, cy), (cx + width // 2, cy - height)
-        elif position == Position.TOP_RIGHT:
-            return (cx, cy), (cx + width, cy - height)
-        elif position == Position.CENTER_LEFT:
-            return (cx - width, cy - height // 2), (cx, cy + height // 2)
-        elif position == Position.CENTER or position == Position.CENTER_OF_MASS:
-            return (
-                (cx - width // 2, cy - height // 2),
-                (cx + width // 2, cy + height // 2),
-            )
-        elif position == Position.CENTER_RIGHT:
-            return (cx, cy - height // 2), (cx + width, cy + height // 2)
-        elif position == Position.BOTTOM_LEFT:
-            return (cx - width, cy), (cx, cy + height)
-        elif position == Position.BOTTOM_CENTER:
-            return (cx - width // 2, cy), (cx + width // 2, cy + height)
-        elif position == Position.BOTTOM_RIGHT:
-            return (cx, cy), (cx + width, cy + height)
-
-    @staticmethod
-    def validate_custom_values(
-        custom_values: Optional[Union[np.ndarray, List[float]]], detections_count: int
-    ) -> None:
-        if custom_values is not None:
-            if not isinstance(custom_values, (np.ndarray, list)):
-                raise TypeError(
-                    "custom_values must be either a numpy array or a list of floats."
-                )
-
-            if len(custom_values) != detections_count:
-                raise ValueError(
-                    "The length of custom_values must match the number of detections."
-                )
-
-            if not all(0 <= value <= 1 for value in custom_values):
-                raise ValueError("All values in custom_values must be between 0 and 1.")
+        Example:
+            ```python
+            import supervision as sv
+
+            image = ...
+            detections = sv.Detections(...)
+
+            percentage_bar_annotator = sv.PercentageBarAnnotator()
+            annotated_frame = percentage_bar_annotator.annotate(
+                scene=image.copy(),
+                detections=detections
+            )
+            ```
+
+        ![percentage-bar-example](https://media.roboflow.com/
+        supervision-annotator-examples/percentage-bar-annotator-example-purple.png)
+        """
+        self.validate_custom_values(
+            custom_values=custom_values, detections_count=len(detections)
+        )
+        anchors = detections.get_anchors_coordinates(anchor=self.position)
+        for detection_idx in range(len(detections)):
+            anchor = anchors[detection_idx]
+            border_coordinates = self.calculate_border_coordinates(
+                anchor_xy=(int(anchor[0]), int(anchor[1])),
+                border_wh=(self.width, self.height),
+                position=self.position,
+            )
+            border_width = border_coordinates[1][0] - border_coordinates[0][0]
+
+            value = (
+                custom_values[detection_idx]
+                if custom_values is not None
+                else detections.confidence[detection_idx]
+            )
+
+            color = resolve_color(
+                color=self.color,
+                detections=detections,
+                detection_idx=detection_idx,
+                color_lookup=self.color_lookup
+                if custom_color_lookup is None
+                else custom_color_lookup,
+            )
+            cv2.rectangle(
+                img=scene,
+                pt1=border_coordinates[0],
+                pt2=(
+                    border_coordinates[0][0] + int(border_width * value),
+                    border_coordinates[1][1],
+                ),
+                color=color.as_bgr(),
+                thickness=-1,
+            )
+            cv2.rectangle(
+                img=scene,
+                pt1=border_coordinates[0],
+                pt2=border_coordinates[1],
+                color=self.border_color.as_bgr(),
+                thickness=self.border_thickness,
+            )
+        return scene
+
+    @staticmethod
+    def calculate_border_coordinates(
+        anchor_xy: Tuple[int, int], border_wh: Tuple[int, int], position: Position
+    ) -> Tuple[Tuple[int, int], Tuple[int, int]]:
+        cx, cy = anchor_xy
+        width, height = border_wh
+
+        if position == Position.TOP_LEFT:
+            return (cx - width, cy - height), (cx, cy)
+        elif position == Position.TOP_CENTER:
+            return (cx - width // 2, cy), (cx + width // 2, cy - height)
+        elif position == Position.TOP_RIGHT:
+            return (cx, cy), (cx + width, cy - height)
+        elif position == Position.CENTER_LEFT:
+            return (cx - width, cy - height // 2), (cx, cy + height // 2)
+        elif position == Position.CENTER or position == Position.CENTER_OF_MASS:
+            return (
+                (cx - width // 2, cy - height // 2),
+                (cx + width // 2, cy + height // 2),
+            )
+        elif position == Position.CENTER_RIGHT:
+            return (cx, cy - height // 2), (cx + width, cy + height // 2)
+        elif position == Position.BOTTOM_LEFT:
+            return (cx - width, cy), (cx, cy + height)
+        elif position == Position.BOTTOM_CENTER:
+            return (cx - width // 2, cy), (cx + width // 2, cy + height)
+        elif position == Position.BOTTOM_RIGHT:
+            return (cx, cy), (cx + width, cy + height)
+
+    @staticmethod
+    def validate_custom_values(
+        custom_values: Optional[Union[np.ndarray, List[float]]], detections_count: int
+    ) -> None:
+        if custom_values is not None:
+            if not isinstance(custom_values, (np.ndarray, list)):
+                raise TypeError(
+                    "custom_values must be either a numpy array or a list of floats."
+                )
+
+            if len(custom_values) != detections_count:
+                raise ValueError(
+                    "The length of custom_values must match the number of detections."
+                )
+
+            if not all(0 <= value <= 1 for value in custom_values):
+                raise ValueError("All values in custom_values must be between 0 and 1.")
 
@@ -10348,21 +10432,7 @@

Source code in supervision/annotators/core.py -
1834
-1835
-1836
-1837
-1838
-1839
-1840
-1841
-1842
-1843
-1844
-1845
-1846
-1847
-1848
+              
1848
 1849
 1850
 1851
@@ -10378,37 +10448,51 @@ 

1861 1862 1863 -1864

def __init__(
-    self,
-    height: int = 16,
-    width: int = 80,
-    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-    border_color: Color = Color.BLACK,
-    position: Position = Position.TOP_CENTER,
-    color_lookup: ColorLookup = ColorLookup.CLASS,
-    border_thickness: int = None,
-):
-    """
-    Args:
-        height (int): The height in pixels of the percentage bar.
-        width (int): The width in pixels of the percentage bar.
-        color (Union[Color, ColorPalette]): The color or color palette to use for
-            annotating detections.
-        border_color (Color): The color of the border lines.
-        position (Position): The anchor position of drawing the percentage bar.
-        color_lookup (str): Strategy for mapping colors to annotations.
-            Options are `INDEX`, `CLASS`, `TRACK`.
-        border_thickness (int): The thickness of the border lines.
-    """
-    self.height: int = height
-    self.width: int = width
-    self.color: Union[Color, ColorPalette] = color
-    self.border_color: Color = border_color
-    self.position: Position = position
-    self.color_lookup: ColorLookup = color_lookup
-
-    if border_thickness is None:
-        self.border_thickness = int(0.15 * self.height)
+1864
+1865
+1866
+1867
+1868
+1869
+1870
+1871
+1872
+1873
+1874
+1875
+1876
+1877
+1878
def __init__(
+    self,
+    height: int = 16,
+    width: int = 80,
+    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+    border_color: Color = Color.BLACK,
+    position: Position = Position.TOP_CENTER,
+    color_lookup: ColorLookup = ColorLookup.CLASS,
+    border_thickness: int = None,
+):
+    """
+    Args:
+        height (int): The height in pixels of the percentage bar.
+        width (int): The width in pixels of the percentage bar.
+        color (Union[Color, ColorPalette]): The color or color palette to use for
+            annotating detections.
+        border_color (Color): The color of the border lines.
+        position (Position): The anchor position of drawing the percentage bar.
+        color_lookup (str): Strategy for mapping colors to annotations.
+            Options are `INDEX`, `CLASS`, `TRACK`.
+        border_thickness (int): The thickness of the border lines.
+    """
+    self.height: int = height
+    self.width: int = width
+    self.color: Union[Color, ColorPalette] = color
+    self.border_color: Color = border_color
+    self.position: Position = position
+    self.color_lookup: ColorLookup = color_lookup
+
+    if border_thickness is None:
+        self.border_thickness = int(0.15 * self.height)
 
@@ -10551,21 +10635,7 @@

Source code in supervision/annotators/core.py -
1866
-1867
-1868
-1869
-1870
-1871
-1872
-1873
-1874
-1875
-1876
-1877
-1878
-1879
-1880
+              
1880
 1881
 1882
 1883
@@ -10642,98 +10712,112 @@ 

1954 1955 1956 -1957

@convert_for_annotation_method
-def annotate(
-    self,
-    scene: ImageType,
-    detections: Detections,
-    custom_color_lookup: Optional[np.ndarray] = None,
-    custom_values: Optional[np.ndarray] = None,
-) -> ImageType:
-    """
-    Annotates the given scene with percentage bars based on the provided
-    detections. The percentage bars visually represent the confidence or custom
-    values associated with each detection.
-
-    Args:
-        scene (ImageType): The image where percentage bars will be drawn.
-            `ImageType` is a flexible type, accepting either `numpy.ndarray`
-            or `PIL.Image.Image`.
-        detections (Detections): Object detections to annotate.
-        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-            Allows to override the default color mapping strategy.
-        custom_values (Optional[np.ndarray]): Custom values array to use instead
-            of the default detection confidences. This array should have the
-            same length as the number of detections and contain a value between
-            0 and 1 (inclusive) for each detection, representing the percentage
-            to be displayed.
-
-    Returns:
-        The annotated image, matching the type of `scene` (`numpy.ndarray`
-            or `PIL.Image.Image`)
-
-    Example:
-        ```python
-        import supervision as sv
-
-        image = ...
-        detections = sv.Detections(...)
-
-        percentage_bar_annotator = sv.PercentageBarAnnotator()
-        annotated_frame = percentage_bar_annotator.annotate(
-            scene=image.copy(),
-            detections=detections
-        )
-        ```
+1957
+1958
+1959
+1960
+1961
+1962
+1963
+1964
+1965
+1966
+1967
+1968
+1969
+1970
+1971
@convert_for_annotation_method
+def annotate(
+    self,
+    scene: ImageType,
+    detections: Detections,
+    custom_color_lookup: Optional[np.ndarray] = None,
+    custom_values: Optional[np.ndarray] = None,
+) -> ImageType:
+    """
+    Annotates the given scene with percentage bars based on the provided
+    detections. The percentage bars visually represent the confidence or custom
+    values associated with each detection.
+
+    Args:
+        scene (ImageType): The image where percentage bars will be drawn.
+            `ImageType` is a flexible type, accepting either `numpy.ndarray`
+            or `PIL.Image.Image`.
+        detections (Detections): Object detections to annotate.
+        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+            Allows to override the default color mapping strategy.
+        custom_values (Optional[np.ndarray]): Custom values array to use instead
+            of the default detection confidences. This array should have the
+            same length as the number of detections and contain a value between
+            0 and 1 (inclusive) for each detection, representing the percentage
+            to be displayed.
+
+    Returns:
+        The annotated image, matching the type of `scene` (`numpy.ndarray`
+            or `PIL.Image.Image`)
 
-    ![percentage-bar-example](https://media.roboflow.com/
-    supervision-annotator-examples/percentage-bar-annotator-example-purple.png)
-    """
-    self.validate_custom_values(
-        custom_values=custom_values, detections_count=len(detections)
-    )
-    anchors = detections.get_anchors_coordinates(anchor=self.position)
-    for detection_idx in range(len(detections)):
-        anchor = anchors[detection_idx]
-        border_coordinates = self.calculate_border_coordinates(
-            anchor_xy=(int(anchor[0]), int(anchor[1])),
-            border_wh=(self.width, self.height),
-            position=self.position,
-        )
-        border_width = border_coordinates[1][0] - border_coordinates[0][0]
-
-        value = (
-            custom_values[detection_idx]
-            if custom_values is not None
-            else detections.confidence[detection_idx]
-        )
-
-        color = resolve_color(
-            color=self.color,
-            detections=detections,
-            detection_idx=detection_idx,
-            color_lookup=self.color_lookup
-            if custom_color_lookup is None
-            else custom_color_lookup,
-        )
-        cv2.rectangle(
-            img=scene,
-            pt1=border_coordinates[0],
-            pt2=(
-                border_coordinates[0][0] + int(border_width * value),
-                border_coordinates[1][1],
-            ),
-            color=color.as_bgr(),
-            thickness=-1,
-        )
-        cv2.rectangle(
-            img=scene,
-            pt1=border_coordinates[0],
-            pt2=border_coordinates[1],
-            color=self.border_color.as_bgr(),
-            thickness=self.border_thickness,
-        )
-    return scene
+    Example:
+        ```python
+        import supervision as sv
+
+        image = ...
+        detections = sv.Detections(...)
+
+        percentage_bar_annotator = sv.PercentageBarAnnotator()
+        annotated_frame = percentage_bar_annotator.annotate(
+            scene=image.copy(),
+            detections=detections
+        )
+        ```
+
+    ![percentage-bar-example](https://media.roboflow.com/
+    supervision-annotator-examples/percentage-bar-annotator-example-purple.png)
+    """
+    self.validate_custom_values(
+        custom_values=custom_values, detections_count=len(detections)
+    )
+    anchors = detections.get_anchors_coordinates(anchor=self.position)
+    for detection_idx in range(len(detections)):
+        anchor = anchors[detection_idx]
+        border_coordinates = self.calculate_border_coordinates(
+            anchor_xy=(int(anchor[0]), int(anchor[1])),
+            border_wh=(self.width, self.height),
+            position=self.position,
+        )
+        border_width = border_coordinates[1][0] - border_coordinates[0][0]
+
+        value = (
+            custom_values[detection_idx]
+            if custom_values is not None
+            else detections.confidence[detection_idx]
+        )
+
+        color = resolve_color(
+            color=self.color,
+            detections=detections,
+            detection_idx=detection_idx,
+            color_lookup=self.color_lookup
+            if custom_color_lookup is None
+            else custom_color_lookup,
+        )
+        cv2.rectangle(
+            img=scene,
+            pt1=border_coordinates[0],
+            pt2=(
+                border_coordinates[0][0] + int(border_width * value),
+                border_coordinates[1][1],
+            ),
+            color=color.as_bgr(),
+            thickness=-1,
+        )
+        cv2.rectangle(
+            img=scene,
+            pt1=border_coordinates[0],
+            pt2=border_coordinates[1],
+            color=self.border_color.as_bgr(),
+            thickness=self.border_thickness,
+        )
+    return scene
 
@@ -10766,15 +10850,7 @@

HeatMapAnnotator
Source code in supervision/annotators/core.py -
1435
-1436
-1437
-1438
-1439
-1440
-1441
-1442
-1443
+                
1443
 1444
 1445
 1446
@@ -10861,102 +10937,110 @@ 

HeatMapAnnotator 1527 1528 1529 -1530

class HeatMapAnnotator:
-    """
-    A class for drawing heatmaps on an image based on provided detections.
-    Heat accumulates over time and is drawn as a semi-transparent overlay
-    of blurred circles.
-    """
-
-    def __init__(
-        self,
-        position: Position = Position.BOTTOM_CENTER,
-        opacity: float = 0.2,
-        radius: int = 40,
-        kernel_size: int = 25,
-        top_hue: int = 0,
-        low_hue: int = 125,
-    ):
-        """
-        Args:
-            position (Position): The position of the heatmap. Defaults to
-                `BOTTOM_CENTER`.
-            opacity (float): Opacity of the overlay mask, between 0 and 1.
-            radius (int): Radius of the heat circle.
-            kernel_size (int): Kernel size for blurring the heatmap.
-            top_hue (int): Hue at the top of the heatmap. Defaults to 0 (red).
-            low_hue (int): Hue at the bottom of the heatmap. Defaults to 125 (blue).
-        """
-        self.position = position
-        self.opacity = opacity
-        self.radius = radius
-        self.kernel_size = kernel_size
-        self.heat_mask = None
-        self.top_hue = top_hue
-        self.low_hue = low_hue
-
-    @convert_for_annotation_method
-    def annotate(self, scene: ImageType, detections: Detections) -> ImageType:
-        """
-        Annotates the scene with a heatmap based on the provided detections.
-
-        Args:
-            scene (ImageType): The image where the heatmap will be drawn.
-                `ImageType` is a flexible type, accepting either `numpy.ndarray`
-                or `PIL.Image.Image`.
-            detections (Detections): Object detections to annotate.
-
-        Returns:
-            The annotated image, matching the type of `scene` (`numpy.ndarray`
-                or `PIL.Image.Image`)
-
-        Example:
-            ```python
-            import supervision as sv
-            from ultralytics import YOLO
-
-            model = YOLO('yolov8x.pt')
-
-            heat_map_annotator = sv.HeatMapAnnotator()
-
-            video_info = sv.VideoInfo.from_video_path(video_path='...')
-            frames_generator = get_video_frames_generator(source_path='...')
-
-            with sv.VideoSink(target_path='...', video_info=video_info) as sink:
-               for frame in frames_generator:
-                   result = model(frame)[0]
-                   detections = sv.Detections.from_ultralytics(result)
-                   annotated_frame = heat_map_annotator.annotate(
-                       scene=frame.copy(),
-                       detections=detections)
-                   sink.write_frame(frame=annotated_frame)
-            ```
-
-        ![heatmap-annotator-example](https://media.roboflow.com/
-        supervision-annotator-examples/heat-map-annotator-example-purple.png)
-        """
-
-        if self.heat_mask is None:
-            self.heat_mask = np.zeros(scene.shape[:2])
-        mask = np.zeros(scene.shape[:2])
-        for xy in detections.get_anchors_coordinates(self.position):
-            cv2.circle(mask, (int(xy[0]), int(xy[1])), self.radius, 1, -1)
-        self.heat_mask = mask + self.heat_mask
-        temp = self.heat_mask.copy()
-        temp = self.low_hue - temp / temp.max() * (self.low_hue - self.top_hue)
-        temp = temp.astype(np.uint8)
-        if self.kernel_size is not None:
-            temp = cv2.blur(temp, (self.kernel_size, self.kernel_size))
-        hsv = np.zeros(scene.shape)
-        hsv[..., 0] = temp
-        hsv[..., 1] = 255
-        hsv[..., 2] = 255
-        temp = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
-        mask = cv2.cvtColor(self.heat_mask.astype(np.uint8), cv2.COLOR_GRAY2BGR) > 0
-        scene[mask] = cv2.addWeighted(temp, self.opacity, scene, 1 - self.opacity, 0)[
-            mask
-        ]
-        return scene
+1530
+1531
+1532
+1533
+1534
+1535
+1536
+1537
+1538
class HeatMapAnnotator:
+    """
+    A class for drawing heatmaps on an image based on provided detections.
+    Heat accumulates over time and is drawn as a semi-transparent overlay
+    of blurred circles.
+    """
+
+    def __init__(
+        self,
+        position: Position = Position.BOTTOM_CENTER,
+        opacity: float = 0.2,
+        radius: int = 40,
+        kernel_size: int = 25,
+        top_hue: int = 0,
+        low_hue: int = 125,
+    ):
+        """
+        Args:
+            position (Position): The position of the heatmap. Defaults to
+                `BOTTOM_CENTER`.
+            opacity (float): Opacity of the overlay mask, between 0 and 1.
+            radius (int): Radius of the heat circle.
+            kernel_size (int): Kernel size for blurring the heatmap.
+            top_hue (int): Hue at the top of the heatmap. Defaults to 0 (red).
+            low_hue (int): Hue at the bottom of the heatmap. Defaults to 125 (blue).
+        """
+        self.position = position
+        self.opacity = opacity
+        self.radius = radius
+        self.kernel_size = kernel_size
+        self.heat_mask = None
+        self.top_hue = top_hue
+        self.low_hue = low_hue
+
+    @convert_for_annotation_method
+    def annotate(self, scene: ImageType, detections: Detections) -> ImageType:
+        """
+        Annotates the scene with a heatmap based on the provided detections.
+
+        Args:
+            scene (ImageType): The image where the heatmap will be drawn.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): Object detections to annotate.
+
+        Returns:
+            The annotated image, matching the type of `scene` (`numpy.ndarray`
+                or `PIL.Image.Image`)
+
+        Example:
+            ```python
+            import supervision as sv
+            from ultralytics import YOLO
+
+            model = YOLO('yolov8x.pt')
+
+            heat_map_annotator = sv.HeatMapAnnotator()
+
+            video_info = sv.VideoInfo.from_video_path(video_path='...')
+            frames_generator = get_video_frames_generator(source_path='...')
+
+            with sv.VideoSink(target_path='...', video_info=video_info) as sink:
+               for frame in frames_generator:
+                   result = model(frame)[0]
+                   detections = sv.Detections.from_ultralytics(result)
+                   annotated_frame = heat_map_annotator.annotate(
+                       scene=frame.copy(),
+                       detections=detections)
+                   sink.write_frame(frame=annotated_frame)
+            ```
+
+        ![heatmap-annotator-example](https://media.roboflow.com/
+        supervision-annotator-examples/heat-map-annotator-example-purple.png)
+        """
+
+        if self.heat_mask is None:
+            self.heat_mask = np.zeros(scene.shape[:2])
+        mask = np.zeros(scene.shape[:2])
+        for xy in detections.get_anchors_coordinates(self.position):
+            cv2.circle(mask, (int(xy[0]), int(xy[1])), self.radius, 1, -1)
+        self.heat_mask = mask + self.heat_mask
+        temp = self.heat_mask.copy()
+        temp = self.low_hue - temp / temp.max() * (self.low_hue - self.top_hue)
+        temp = temp.astype(np.uint8)
+        if self.kernel_size is not None:
+            temp = cv2.blur(temp, (self.kernel_size, self.kernel_size))
+        hsv = np.zeros(scene.shape)
+        hsv[..., 0] = temp
+        hsv[..., 1] = 255
+        hsv[..., 2] = 255
+        temp = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
+        mask = cv2.cvtColor(self.heat_mask.astype(np.uint8), cv2.COLOR_GRAY2BGR) > 0
+        scene[mask] = cv2.addWeighted(temp, self.opacity, scene, 1 - self.opacity, 0)[
+            mask
+        ]
+        return scene
 
@@ -11086,15 +11170,7 @@

Source code in supervision/annotators/core.py -
1442
-1443
-1444
-1445
-1446
-1447
-1448
-1449
-1450
+              
1450
 1451
 1452
 1453
@@ -11111,32 +11187,40 @@ 

1464 1465 1466 -1467

def __init__(
-    self,
-    position: Position = Position.BOTTOM_CENTER,
-    opacity: float = 0.2,
-    radius: int = 40,
-    kernel_size: int = 25,
-    top_hue: int = 0,
-    low_hue: int = 125,
-):
-    """
-    Args:
-        position (Position): The position of the heatmap. Defaults to
-            `BOTTOM_CENTER`.
-        opacity (float): Opacity of the overlay mask, between 0 and 1.
-        radius (int): Radius of the heat circle.
-        kernel_size (int): Kernel size for blurring the heatmap.
-        top_hue (int): Hue at the top of the heatmap. Defaults to 0 (red).
-        low_hue (int): Hue at the bottom of the heatmap. Defaults to 125 (blue).
-    """
-    self.position = position
-    self.opacity = opacity
-    self.radius = radius
-    self.kernel_size = kernel_size
-    self.heat_mask = None
-    self.top_hue = top_hue
-    self.low_hue = low_hue
+1467
+1468
+1469
+1470
+1471
+1472
+1473
+1474
+1475
def __init__(
+    self,
+    position: Position = Position.BOTTOM_CENTER,
+    opacity: float = 0.2,
+    radius: int = 40,
+    kernel_size: int = 25,
+    top_hue: int = 0,
+    low_hue: int = 125,
+):
+    """
+    Args:
+        position (Position): The position of the heatmap. Defaults to
+            `BOTTOM_CENTER`.
+        opacity (float): Opacity of the overlay mask, between 0 and 1.
+        radius (int): Radius of the heat circle.
+        kernel_size (int): Kernel size for blurring the heatmap.
+        top_hue (int): Hue at the top of the heatmap. Defaults to 0 (red).
+        low_hue (int): Hue at the bottom of the heatmap. Defaults to 125 (blue).
+    """
+    self.position = position
+    self.opacity = opacity
+    self.radius = radius
+    self.kernel_size = kernel_size
+    self.heat_mask = None
+    self.top_hue = top_hue
+    self.low_hue = low_hue
 
@@ -11252,15 +11336,7 @@

Source code in supervision/annotators/core.py -
1469
-1470
-1471
-1472
-1473
-1474
-1475
-1476
-1477
+              
1477
 1478
 1479
 1480
@@ -11313,68 +11389,76 @@ 

1527 1528 1529 -1530

@convert_for_annotation_method
-def annotate(self, scene: ImageType, detections: Detections) -> ImageType:
-    """
-    Annotates the scene with a heatmap based on the provided detections.
-
-    Args:
-        scene (ImageType): The image where the heatmap will be drawn.
-            `ImageType` is a flexible type, accepting either `numpy.ndarray`
-            or `PIL.Image.Image`.
-        detections (Detections): Object detections to annotate.
-
-    Returns:
-        The annotated image, matching the type of `scene` (`numpy.ndarray`
-            or `PIL.Image.Image`)
-
-    Example:
-        ```python
-        import supervision as sv
-        from ultralytics import YOLO
-
-        model = YOLO('yolov8x.pt')
-
-        heat_map_annotator = sv.HeatMapAnnotator()
-
-        video_info = sv.VideoInfo.from_video_path(video_path='...')
-        frames_generator = get_video_frames_generator(source_path='...')
-
-        with sv.VideoSink(target_path='...', video_info=video_info) as sink:
-           for frame in frames_generator:
-               result = model(frame)[0]
-               detections = sv.Detections.from_ultralytics(result)
-               annotated_frame = heat_map_annotator.annotate(
-                   scene=frame.copy(),
-                   detections=detections)
-               sink.write_frame(frame=annotated_frame)
-        ```
-
-    ![heatmap-annotator-example](https://media.roboflow.com/
-    supervision-annotator-examples/heat-map-annotator-example-purple.png)
-    """
-
-    if self.heat_mask is None:
-        self.heat_mask = np.zeros(scene.shape[:2])
-    mask = np.zeros(scene.shape[:2])
-    for xy in detections.get_anchors_coordinates(self.position):
-        cv2.circle(mask, (int(xy[0]), int(xy[1])), self.radius, 1, -1)
-    self.heat_mask = mask + self.heat_mask
-    temp = self.heat_mask.copy()
-    temp = self.low_hue - temp / temp.max() * (self.low_hue - self.top_hue)
-    temp = temp.astype(np.uint8)
-    if self.kernel_size is not None:
-        temp = cv2.blur(temp, (self.kernel_size, self.kernel_size))
-    hsv = np.zeros(scene.shape)
-    hsv[..., 0] = temp
-    hsv[..., 1] = 255
-    hsv[..., 2] = 255
-    temp = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
-    mask = cv2.cvtColor(self.heat_mask.astype(np.uint8), cv2.COLOR_GRAY2BGR) > 0
-    scene[mask] = cv2.addWeighted(temp, self.opacity, scene, 1 - self.opacity, 0)[
-        mask
-    ]
-    return scene
+1530
+1531
+1532
+1533
+1534
+1535
+1536
+1537
+1538
@convert_for_annotation_method
+def annotate(self, scene: ImageType, detections: Detections) -> ImageType:
+    """
+    Annotates the scene with a heatmap based on the provided detections.
+
+    Args:
+        scene (ImageType): The image where the heatmap will be drawn.
+            `ImageType` is a flexible type, accepting either `numpy.ndarray`
+            or `PIL.Image.Image`.
+        detections (Detections): Object detections to annotate.
+
+    Returns:
+        The annotated image, matching the type of `scene` (`numpy.ndarray`
+            or `PIL.Image.Image`)
+
+    Example:
+        ```python
+        import supervision as sv
+        from ultralytics import YOLO
+
+        model = YOLO('yolov8x.pt')
+
+        heat_map_annotator = sv.HeatMapAnnotator()
+
+        video_info = sv.VideoInfo.from_video_path(video_path='...')
+        frames_generator = get_video_frames_generator(source_path='...')
+
+        with sv.VideoSink(target_path='...', video_info=video_info) as sink:
+           for frame in frames_generator:
+               result = model(frame)[0]
+               detections = sv.Detections.from_ultralytics(result)
+               annotated_frame = heat_map_annotator.annotate(
+                   scene=frame.copy(),
+                   detections=detections)
+               sink.write_frame(frame=annotated_frame)
+        ```
+
+    ![heatmap-annotator-example](https://media.roboflow.com/
+    supervision-annotator-examples/heat-map-annotator-example-purple.png)
+    """
+
+    if self.heat_mask is None:
+        self.heat_mask = np.zeros(scene.shape[:2])
+    mask = np.zeros(scene.shape[:2])
+    for xy in detections.get_anchors_coordinates(self.position):
+        cv2.circle(mask, (int(xy[0]), int(xy[1])), self.radius, 1, -1)
+    self.heat_mask = mask + self.heat_mask
+    temp = self.heat_mask.copy()
+    temp = self.low_hue - temp / temp.max() * (self.low_hue - self.top_hue)
+    temp = temp.astype(np.uint8)
+    if self.kernel_size is not None:
+        temp = cv2.blur(temp, (self.kernel_size, self.kernel_size))
+    hsv = np.zeros(scene.shape)
+    hsv[..., 0] = temp
+    hsv[..., 1] = 255
+    hsv[..., 2] = 255
+    temp = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
+    mask = cv2.cvtColor(self.heat_mask.astype(np.uint8), cv2.COLOR_GRAY2BGR) > 0
+    scene[mask] = cv2.addWeighted(temp, self.opacity, scene, 1 - self.opacity, 0)[
+        mask
+    ]
+    return scene
 
@@ -12543,15 +12627,7 @@

LabelAnnotator

Source code in supervision/annotators/core.py -
 905
- 906
- 907
- 908
- 909
- 910
- 911
- 912
- 913
+                
 913
  914
  915
  916
@@ -12741,205 +12817,213 @@ 

LabelAnnotator

1100 1101 1102 -1103
class LabelAnnotator:
-    """
-    A class for annotating labels on an image using provided detections.
-    """
-
-    def __init__(
-        self,
-        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-        text_color: Color = Color.WHITE,
-        text_scale: float = 0.5,
-        text_thickness: int = 1,
-        text_padding: int = 10,
-        text_position: Position = Position.TOP_LEFT,
-        color_lookup: ColorLookup = ColorLookup.CLASS,
-        border_radius: int = 0,
-    ):
-        """
-        Args:
-            color (Union[Color, ColorPalette]): The color or color palette to use for
-                annotating the text background.
-            text_color (Color): The color to use for the text.
-            text_scale (float): Font scale for the text.
-            text_thickness (int): Thickness of the text characters.
-            text_padding (int): Padding around the text within its background box.
-            text_position (Position): Position of the text relative to the detection.
-                Possible values are defined in the `Position` enum.
-            color_lookup (str): Strategy for mapping colors to annotations.
-                Options are `INDEX`, `CLASS`, `TRACK`.
-            border_radius (int): The radius to apply round edges. If the selected
-                value is higher than the lower dimension, width or height, is clipped.
-        """
-        self.border_radius: int = border_radius
-        self.color: Union[Color, ColorPalette] = color
-        self.text_color: Color = text_color
-        self.text_scale: float = text_scale
-        self.text_thickness: int = text_thickness
-        self.text_padding: int = text_padding
-        self.text_anchor: Position = text_position
-        self.color_lookup: ColorLookup = color_lookup
-
-    @convert_for_annotation_method
-    def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
-        labels: List[str] = None,
-        custom_color_lookup: Optional[np.ndarray] = None,
-    ) -> ImageType:
-        """
-        Annotates the given scene with labels based on the provided detections.
-
-        Args:
-            scene (ImageType): The image where labels will be drawn.
-                `ImageType` is a flexible type, accepting either `numpy.ndarray`
-                or `PIL.Image.Image`.
-            detections (Detections): Object detections to annotate.
-            labels (List[str]): Optional. Custom labels for each detection.
-            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-                Allows to override the default color mapping strategy.
-
-        Returns:
-            The annotated image, matching the type of `scene` (`numpy.ndarray`
-                or `PIL.Image.Image`)
-
-        Example:
-            ```python
-             import supervision as sv
+1103
+1104
+1105
+1106
+1107
+1108
+1109
+1110
+1111
class LabelAnnotator:
+    """
+    A class for annotating labels on an image using provided detections.
+    """
+
+    def __init__(
+        self,
+        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+        text_color: Color = Color.WHITE,
+        text_scale: float = 0.5,
+        text_thickness: int = 1,
+        text_padding: int = 10,
+        text_position: Position = Position.TOP_LEFT,
+        color_lookup: ColorLookup = ColorLookup.CLASS,
+        border_radius: int = 0,
+    ):
+        """
+        Args:
+            color (Union[Color, ColorPalette]): The color or color palette to use for
+                annotating the text background.
+            text_color (Color): The color to use for the text.
+            text_scale (float): Font scale for the text.
+            text_thickness (int): Thickness of the text characters.
+            text_padding (int): Padding around the text within its background box.
+            text_position (Position): Position of the text relative to the detection.
+                Possible values are defined in the `Position` enum.
+            color_lookup (str): Strategy for mapping colors to annotations.
+                Options are `INDEX`, `CLASS`, `TRACK`.
+            border_radius (int): The radius to apply round edges. If the selected
+                value is higher than the lower dimension, width or height, is clipped.
+        """
+        self.border_radius: int = border_radius
+        self.color: Union[Color, ColorPalette] = color
+        self.text_color: Color = text_color
+        self.text_scale: float = text_scale
+        self.text_thickness: int = text_thickness
+        self.text_padding: int = text_padding
+        self.text_anchor: Position = text_position
+        self.color_lookup: ColorLookup = color_lookup
+
+    @convert_for_annotation_method
+    def annotate(
+        self,
+        scene: ImageType,
+        detections: Detections,
+        labels: List[str] = None,
+        custom_color_lookup: Optional[np.ndarray] = None,
+    ) -> ImageType:
+        """
+        Annotates the given scene with labels based on the provided detections.
+
+        Args:
+            scene (ImageType): The image where labels will be drawn.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): Object detections to annotate.
+            labels (List[str]): Optional. Custom labels for each detection.
+            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+                Allows to override the default color mapping strategy.
 
-            image = ...
-            detections = sv.Detections(...)
-
-            labels = [
-                f"{class_name} {confidence:.2f}"
-                for class_name, confidence
-                in zip(detections['class_name'], detections.confidence)
-            ]
-
-            label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)
-            annotated_frame = label_annotator.annotate(
-                scene=image.copy(),
-                detections=detections,
-                labels=labels
-            )
-            ```
+        Returns:
+            The annotated image, matching the type of `scene` (`numpy.ndarray`
+                or `PIL.Image.Image`)
+
+        Example:
+            ```python
+             import supervision as sv
+
+            image = ...
+            detections = sv.Detections(...)
+
+            labels = [
+                f"{class_name} {confidence:.2f}"
+                for class_name, confidence
+                in zip(detections['class_name'], detections.confidence)
+            ]
 
-        ![label-annotator-example](https://media.roboflow.com/
-        supervision-annotator-examples/label-annotator-example-purple.png)
-        """
-        font = cv2.FONT_HERSHEY_SIMPLEX
-        anchors_coordinates = detections.get_anchors_coordinates(
-            anchor=self.text_anchor
-        ).astype(int)
-        if labels is not None and len(labels) != len(detections):
-            raise ValueError(
-                f"The number of labels provided ({len(labels)}) does not match the "
-                f"number of detections ({len(detections)}). Each detection should have "
-                f"a corresponding label. This discrepancy can occur if the labels and "
-                f"detections are not aligned or if an incorrect number of labels has "
-                f"been provided. Please ensure that the labels array has the same "
-                f"length as the Detections object."
-            )
-
-        for detection_idx, center_coordinates in enumerate(anchors_coordinates):
-            color = resolve_color(
-                color=self.color,
-                detections=detections,
-                detection_idx=detection_idx,
-                color_lookup=(
-                    self.color_lookup
-                    if custom_color_lookup is None
-                    else custom_color_lookup
-                ),
-            )
-
-            if labels is not None:
-                text = labels[detection_idx]
-            elif detections[CLASS_NAME_DATA_FIELD] is not None:
-                text = detections[CLASS_NAME_DATA_FIELD][detection_idx]
-            elif detections.class_id is not None:
-                text = str(detections.class_id[detection_idx])
-            else:
-                text = str(detection_idx)
-
-            text_w, text_h = cv2.getTextSize(
-                text=text,
-                fontFace=font,
-                fontScale=self.text_scale,
-                thickness=self.text_thickness,
-            )[0]
-            text_w_padded = text_w + 2 * self.text_padding
-            text_h_padded = text_h + 2 * self.text_padding
-            text_background_xyxy = resolve_text_background_xyxy(
-                center_coordinates=tuple(center_coordinates),
-                text_wh=(text_w_padded, text_h_padded),
-                position=self.text_anchor,
-            )
-
-            text_x = text_background_xyxy[0] + self.text_padding
-            text_y = text_background_xyxy[1] + self.text_padding + text_h
-
-            self.draw_rounded_rectangle(
-                scene=scene,
-                xyxy=text_background_xyxy,
-                color=color.as_bgr(),
-                border_radius=self.border_radius,
-            )
-            cv2.putText(
-                img=scene,
-                text=text,
-                org=(text_x, text_y),
-                fontFace=font,
-                fontScale=self.text_scale,
-                color=self.text_color.as_rgb(),
-                thickness=self.text_thickness,
-                lineType=cv2.LINE_AA,
-            )
-        return scene
-
-    @staticmethod
-    def draw_rounded_rectangle(
-        scene: np.ndarray,
-        xyxy: Tuple[int, int, int, int],
-        color: Tuple[int, int, int],
-        border_radius: int,
-    ) -> np.ndarray:
-        x1, y1, x2, y2 = xyxy
-        width = x2 - x1
-        height = y2 - y1
-
-        border_radius = min(border_radius, min(width, height) // 2)
-
-        rectangle_coordinates = [
-            ((x1 + border_radius, y1), (x2 - border_radius, y2)),
-            ((x1, y1 + border_radius), (x2, y2 - border_radius)),
-        ]
-        circle_centers = [
-            (x1 + border_radius, y1 + border_radius),
-            (x2 - border_radius, y1 + border_radius),
-            (x1 + border_radius, y2 - border_radius),
-            (x2 - border_radius, y2 - border_radius),
-        ]
-
-        for coordinates in rectangle_coordinates:
-            cv2.rectangle(
-                img=scene,
-                pt1=coordinates[0],
-                pt2=coordinates[1],
-                color=color,
-                thickness=-1,
-            )
-        for center in circle_centers:
-            cv2.circle(
+            label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)
+            annotated_frame = label_annotator.annotate(
+                scene=image.copy(),
+                detections=detections,
+                labels=labels
+            )
+            ```
+
+        ![label-annotator-example](https://media.roboflow.com/
+        supervision-annotator-examples/label-annotator-example-purple.png)
+        """
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        anchors_coordinates = detections.get_anchors_coordinates(
+            anchor=self.text_anchor
+        ).astype(int)
+        if labels is not None and len(labels) != len(detections):
+            raise ValueError(
+                f"The number of labels provided ({len(labels)}) does not match the "
+                f"number of detections ({len(detections)}). Each detection should have "
+                f"a corresponding label. This discrepancy can occur if the labels and "
+                f"detections are not aligned or if an incorrect number of labels has "
+                f"been provided. Please ensure that the labels array has the same "
+                f"length as the Detections object."
+            )
+
+        for detection_idx, center_coordinates in enumerate(anchors_coordinates):
+            color = resolve_color(
+                color=self.color,
+                detections=detections,
+                detection_idx=detection_idx,
+                color_lookup=(
+                    self.color_lookup
+                    if custom_color_lookup is None
+                    else custom_color_lookup
+                ),
+            )
+
+            if labels is not None:
+                text = labels[detection_idx]
+            elif detections[CLASS_NAME_DATA_FIELD] is not None:
+                text = detections[CLASS_NAME_DATA_FIELD][detection_idx]
+            elif detections.class_id is not None:
+                text = str(detections.class_id[detection_idx])
+            else:
+                text = str(detection_idx)
+
+            text_w, text_h = cv2.getTextSize(
+                text=text,
+                fontFace=font,
+                fontScale=self.text_scale,
+                thickness=self.text_thickness,
+            )[0]
+            text_w_padded = text_w + 2 * self.text_padding
+            text_h_padded = text_h + 2 * self.text_padding
+            text_background_xyxy = resolve_text_background_xyxy(
+                center_coordinates=tuple(center_coordinates),
+                text_wh=(text_w_padded, text_h_padded),
+                position=self.text_anchor,
+            )
+
+            text_x = text_background_xyxy[0] + self.text_padding
+            text_y = text_background_xyxy[1] + self.text_padding + text_h
+
+            self.draw_rounded_rectangle(
+                scene=scene,
+                xyxy=text_background_xyxy,
+                color=color.as_bgr(),
+                border_radius=self.border_radius,
+            )
+            cv2.putText(
+                img=scene,
+                text=text,
+                org=(text_x, text_y),
+                fontFace=font,
+                fontScale=self.text_scale,
+                color=self.text_color.as_rgb(),
+                thickness=self.text_thickness,
+                lineType=cv2.LINE_AA,
+            )
+        return scene
+
+    @staticmethod
+    def draw_rounded_rectangle(
+        scene: np.ndarray,
+        xyxy: Tuple[int, int, int, int],
+        color: Tuple[int, int, int],
+        border_radius: int,
+    ) -> np.ndarray:
+        x1, y1, x2, y2 = xyxy
+        width = x2 - x1
+        height = y2 - y1
+
+        border_radius = min(border_radius, min(width, height) // 2)
+
+        rectangle_coordinates = [
+            ((x1 + border_radius, y1), (x2 - border_radius, y2)),
+            ((x1, y1 + border_radius), (x2, y2 - border_radius)),
+        ]
+        circle_centers = [
+            (x1 + border_radius, y1 + border_radius),
+            (x2 - border_radius, y1 + border_radius),
+            (x1 + border_radius, y2 - border_radius),
+            (x2 - border_radius, y2 - border_radius),
+        ]
+
+        for coordinates in rectangle_coordinates:
+            cv2.rectangle(
                 img=scene,
-                center=center,
-                radius=border_radius,
+                pt1=coordinates[0],
+                pt2=coordinates[1],
                 color=color,
                 thickness=-1,
             )
-        return scene
+        for center in circle_centers:
+            cv2.circle(
+                img=scene,
+                center=center,
+                radius=border_radius,
+                color=color,
+                thickness=-1,
+            )
+        return scene
 
@@ -13100,15 +13184,7 @@

Source code in supervision/annotators/core.py -
910
-911
-912
-913
-914
-915
-916
-917
-918
+              
918
 919
 920
 921
@@ -13133,40 +13209,48 @@ 

940 941 942 -943

def __init__(
-    self,
-    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-    text_color: Color = Color.WHITE,
-    text_scale: float = 0.5,
-    text_thickness: int = 1,
-    text_padding: int = 10,
-    text_position: Position = Position.TOP_LEFT,
-    color_lookup: ColorLookup = ColorLookup.CLASS,
-    border_radius: int = 0,
-):
-    """
-    Args:
-        color (Union[Color, ColorPalette]): The color or color palette to use for
-            annotating the text background.
-        text_color (Color): The color to use for the text.
-        text_scale (float): Font scale for the text.
-        text_thickness (int): Thickness of the text characters.
-        text_padding (int): Padding around the text within its background box.
-        text_position (Position): Position of the text relative to the detection.
-            Possible values are defined in the `Position` enum.
-        color_lookup (str): Strategy for mapping colors to annotations.
-            Options are `INDEX`, `CLASS`, `TRACK`.
-        border_radius (int): The radius to apply round edges. If the selected
-            value is higher than the lower dimension, width or height, is clipped.
-    """
-    self.border_radius: int = border_radius
-    self.color: Union[Color, ColorPalette] = color
-    self.text_color: Color = text_color
-    self.text_scale: float = text_scale
-    self.text_thickness: int = text_thickness
-    self.text_padding: int = text_padding
-    self.text_anchor: Position = text_position
-    self.color_lookup: ColorLookup = color_lookup
+943
+944
+945
+946
+947
+948
+949
+950
+951
def __init__(
+    self,
+    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+    text_color: Color = Color.WHITE,
+    text_scale: float = 0.5,
+    text_thickness: int = 1,
+    text_padding: int = 10,
+    text_position: Position = Position.TOP_LEFT,
+    color_lookup: ColorLookup = ColorLookup.CLASS,
+    border_radius: int = 0,
+):
+    """
+    Args:
+        color (Union[Color, ColorPalette]): The color or color palette to use for
+            annotating the text background.
+        text_color (Color): The color to use for the text.
+        text_scale (float): Font scale for the text.
+        text_thickness (int): Thickness of the text characters.
+        text_padding (int): Padding around the text within its background box.
+        text_position (Position): Position of the text relative to the detection.
+            Possible values are defined in the `Position` enum.
+        color_lookup (str): Strategy for mapping colors to annotations.
+            Options are `INDEX`, `CLASS`, `TRACK`.
+        border_radius (int): The radius to apply round edges. If the selected
+            value is higher than the lower dimension, width or height, is clipped.
+    """
+    self.border_radius: int = border_radius
+    self.color: Union[Color, ColorPalette] = color
+    self.text_color: Color = text_color
+    self.text_scale: float = text_scale
+    self.text_thickness: int = text_thickness
+    self.text_padding: int = text_padding
+    self.text_anchor: Position = text_position
+    self.color_lookup: ColorLookup = color_lookup
 
@@ -13310,15 +13394,7 @@

Source code in supervision/annotators/core.py -
 945
- 946
- 947
- 948
- 949
- 950
- 951
- 952
- 953
+              
 953
  954
  955
  956
@@ -13426,123 +13502,131 @@ 

1058 1059 1060 -1061

@convert_for_annotation_method
-def annotate(
-    self,
-    scene: ImageType,
-    detections: Detections,
-    labels: List[str] = None,
-    custom_color_lookup: Optional[np.ndarray] = None,
-) -> ImageType:
-    """
-    Annotates the given scene with labels based on the provided detections.
-
-    Args:
-        scene (ImageType): The image where labels will be drawn.
-            `ImageType` is a flexible type, accepting either `numpy.ndarray`
-            or `PIL.Image.Image`.
-        detections (Detections): Object detections to annotate.
-        labels (List[str]): Optional. Custom labels for each detection.
-        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-            Allows to override the default color mapping strategy.
-
-    Returns:
-        The annotated image, matching the type of `scene` (`numpy.ndarray`
-            or `PIL.Image.Image`)
-
-    Example:
-        ```python
-         import supervision as sv
+1061
+1062
+1063
+1064
+1065
+1066
+1067
+1068
+1069
@convert_for_annotation_method
+def annotate(
+    self,
+    scene: ImageType,
+    detections: Detections,
+    labels: List[str] = None,
+    custom_color_lookup: Optional[np.ndarray] = None,
+) -> ImageType:
+    """
+    Annotates the given scene with labels based on the provided detections.
+
+    Args:
+        scene (ImageType): The image where labels will be drawn.
+            `ImageType` is a flexible type, accepting either `numpy.ndarray`
+            or `PIL.Image.Image`.
+        detections (Detections): Object detections to annotate.
+        labels (List[str]): Optional. Custom labels for each detection.
+        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+            Allows to override the default color mapping strategy.
 
-        image = ...
-        detections = sv.Detections(...)
-
-        labels = [
-            f"{class_name} {confidence:.2f}"
-            for class_name, confidence
-            in zip(detections['class_name'], detections.confidence)
-        ]
-
-        label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)
-        annotated_frame = label_annotator.annotate(
-            scene=image.copy(),
-            detections=detections,
-            labels=labels
-        )
-        ```
+    Returns:
+        The annotated image, matching the type of `scene` (`numpy.ndarray`
+            or `PIL.Image.Image`)
+
+    Example:
+        ```python
+         import supervision as sv
+
+        image = ...
+        detections = sv.Detections(...)
+
+        labels = [
+            f"{class_name} {confidence:.2f}"
+            for class_name, confidence
+            in zip(detections['class_name'], detections.confidence)
+        ]
 
-    ![label-annotator-example](https://media.roboflow.com/
-    supervision-annotator-examples/label-annotator-example-purple.png)
-    """
-    font = cv2.FONT_HERSHEY_SIMPLEX
-    anchors_coordinates = detections.get_anchors_coordinates(
-        anchor=self.text_anchor
-    ).astype(int)
-    if labels is not None and len(labels) != len(detections):
-        raise ValueError(
-            f"The number of labels provided ({len(labels)}) does not match the "
-            f"number of detections ({len(detections)}). Each detection should have "
-            f"a corresponding label. This discrepancy can occur if the labels and "
-            f"detections are not aligned or if an incorrect number of labels has "
-            f"been provided. Please ensure that the labels array has the same "
-            f"length as the Detections object."
-        )
-
-    for detection_idx, center_coordinates in enumerate(anchors_coordinates):
-        color = resolve_color(
-            color=self.color,
-            detections=detections,
-            detection_idx=detection_idx,
-            color_lookup=(
-                self.color_lookup
-                if custom_color_lookup is None
-                else custom_color_lookup
-            ),
-        )
-
-        if labels is not None:
-            text = labels[detection_idx]
-        elif detections[CLASS_NAME_DATA_FIELD] is not None:
-            text = detections[CLASS_NAME_DATA_FIELD][detection_idx]
-        elif detections.class_id is not None:
-            text = str(detections.class_id[detection_idx])
-        else:
-            text = str(detection_idx)
-
-        text_w, text_h = cv2.getTextSize(
-            text=text,
-            fontFace=font,
-            fontScale=self.text_scale,
-            thickness=self.text_thickness,
-        )[0]
-        text_w_padded = text_w + 2 * self.text_padding
-        text_h_padded = text_h + 2 * self.text_padding
-        text_background_xyxy = resolve_text_background_xyxy(
-            center_coordinates=tuple(center_coordinates),
-            text_wh=(text_w_padded, text_h_padded),
-            position=self.text_anchor,
-        )
-
-        text_x = text_background_xyxy[0] + self.text_padding
-        text_y = text_background_xyxy[1] + self.text_padding + text_h
-
-        self.draw_rounded_rectangle(
-            scene=scene,
-            xyxy=text_background_xyxy,
-            color=color.as_bgr(),
-            border_radius=self.border_radius,
-        )
-        cv2.putText(
-            img=scene,
-            text=text,
-            org=(text_x, text_y),
-            fontFace=font,
-            fontScale=self.text_scale,
-            color=self.text_color.as_rgb(),
-            thickness=self.text_thickness,
-            lineType=cv2.LINE_AA,
-        )
-    return scene
+        label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)
+        annotated_frame = label_annotator.annotate(
+            scene=image.copy(),
+            detections=detections,
+            labels=labels
+        )
+        ```
+
+    ![label-annotator-example](https://media.roboflow.com/
+    supervision-annotator-examples/label-annotator-example-purple.png)
+    """
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    anchors_coordinates = detections.get_anchors_coordinates(
+        anchor=self.text_anchor
+    ).astype(int)
+    if labels is not None and len(labels) != len(detections):
+        raise ValueError(
+            f"The number of labels provided ({len(labels)}) does not match the "
+            f"number of detections ({len(detections)}). Each detection should have "
+            f"a corresponding label. This discrepancy can occur if the labels and "
+            f"detections are not aligned or if an incorrect number of labels has "
+            f"been provided. Please ensure that the labels array has the same "
+            f"length as the Detections object."
+        )
+
+    for detection_idx, center_coordinates in enumerate(anchors_coordinates):
+        color = resolve_color(
+            color=self.color,
+            detections=detections,
+            detection_idx=detection_idx,
+            color_lookup=(
+                self.color_lookup
+                if custom_color_lookup is None
+                else custom_color_lookup
+            ),
+        )
+
+        if labels is not None:
+            text = labels[detection_idx]
+        elif detections[CLASS_NAME_DATA_FIELD] is not None:
+            text = detections[CLASS_NAME_DATA_FIELD][detection_idx]
+        elif detections.class_id is not None:
+            text = str(detections.class_id[detection_idx])
+        else:
+            text = str(detection_idx)
+
+        text_w, text_h = cv2.getTextSize(
+            text=text,
+            fontFace=font,
+            fontScale=self.text_scale,
+            thickness=self.text_thickness,
+        )[0]
+        text_w_padded = text_w + 2 * self.text_padding
+        text_h_padded = text_h + 2 * self.text_padding
+        text_background_xyxy = resolve_text_background_xyxy(
+            center_coordinates=tuple(center_coordinates),
+            text_wh=(text_w_padded, text_h_padded),
+            position=self.text_anchor,
+        )
+
+        text_x = text_background_xyxy[0] + self.text_padding
+        text_y = text_background_xyxy[1] + self.text_padding + text_h
+
+        self.draw_rounded_rectangle(
+            scene=scene,
+            xyxy=text_background_xyxy,
+            color=color.as_bgr(),
+            border_radius=self.border_radius,
+        )
+        cv2.putText(
+            img=scene,
+            text=text,
+            org=(text_x, text_y),
+            fontFace=font,
+            fontScale=self.text_scale,
+            color=self.text_color.as_rgb(),
+            thickness=self.text_thickness,
+            lineType=cv2.LINE_AA,
+        )
+    return scene
 
@@ -13574,15 +13658,7 @@

RichLabelAnnotator
Source code in supervision/annotators/core.py -
1106
-1107
-1108
-1109
-1110
-1111
-1112
-1113
-1114
+                
1114
 1115
 1116
 1117
@@ -13730,163 +13806,171 @@ 

RichLabelAnnotator 1259 1260 1261 -1262

class RichLabelAnnotator:
-    """
-    A class for annotating labels on an image using provided detections,
-    with support for Unicode characters by using a custom font.
-    """
-
-    def __init__(
-        self,
-        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-        text_color: Color = Color.WHITE,
-        font_path: str = None,
-        font_size: int = 10,
-        text_padding: int = 10,
-        text_position: Position = Position.TOP_LEFT,
-        color_lookup: ColorLookup = ColorLookup.CLASS,
-        border_radius: int = 0,
-    ):
-        """
-        Args:
-            color (Union[Color, ColorPalette]): The color or color palette to use for
-                annotating the text background.
-            text_color (Color): The color to use for the text.
-            font_path (str): Path to the font file (e.g., ".ttf" or ".otf") to use for
-                rendering text. If `None`, the default PIL font will be used.
-            font_size (int): Font size for the text.
-            text_padding (int): Padding around the text within its background box.
-            text_position (Position): Position of the text relative to the detection.
-                Possible values are defined in the `Position` enum.
-            color_lookup (ColorLookup): Strategy for mapping colors to annotations.
-                Options are `INDEX`, `CLASS`, `TRACK`.
-            border_radius (int): The radius to apply round edges. If the selected
-                value is higher than the lower dimension, width or height, is clipped.
-        """
-        self.color = color
-        self.text_color = text_color
-        self.text_padding = text_padding
-        self.text_anchor = text_position
-        self.color_lookup = color_lookup
-        self.border_radius = border_radius
-        if font_path is not None:
-            try:
-                self.font = ImageFont.truetype(font_path, font_size)
-            except OSError:
-                print(f"Font path '{font_path}' not found. Using PIL's default font.")
-                self.font = ImageFont.load_default(size=font_size)
-        else:
-            self.font = ImageFont.load_default(size=font_size)
-
-    def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
-        labels: List[str] = None,
-        custom_color_lookup: Optional[np.ndarray] = None,
-    ) -> ImageType:
-        """
-        Annotates the given scene with labels based on the provided
-        detections, with support for Unicode characters.
-
-        Args:
-            scene (ImageType): The image where labels will be drawn.
-                `ImageType` is a flexible type, accepting either `numpy.ndarray`
-                or `PIL.Image.Image`.
-            detections (Detections): Object detections to annotate.
-            labels (List[str]): Optional. Custom labels for each detection.
-            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-                Allows to override the default color mapping strategy.
-
-        Returns:
-            The annotated image, matching the type of `scene` (`numpy.ndarray`
-                or `PIL.Image.Image`)
-
-        Example:
-            ```python
-            import supervision as sv
+1262
+1263
+1264
+1265
+1266
+1267
+1268
+1269
+1270
class RichLabelAnnotator:
+    """
+    A class for annotating labels on an image using provided detections,
+    with support for Unicode characters by using a custom font.
+    """
+
+    def __init__(
+        self,
+        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+        text_color: Color = Color.WHITE,
+        font_path: str = None,
+        font_size: int = 10,
+        text_padding: int = 10,
+        text_position: Position = Position.TOP_LEFT,
+        color_lookup: ColorLookup = ColorLookup.CLASS,
+        border_radius: int = 0,
+    ):
+        """
+        Args:
+            color (Union[Color, ColorPalette]): The color or color palette to use for
+                annotating the text background.
+            text_color (Color): The color to use for the text.
+            font_path (str): Path to the font file (e.g., ".ttf" or ".otf") to use for
+                rendering text. If `None`, the default PIL font will be used.
+            font_size (int): Font size for the text.
+            text_padding (int): Padding around the text within its background box.
+            text_position (Position): Position of the text relative to the detection.
+                Possible values are defined in the `Position` enum.
+            color_lookup (ColorLookup): Strategy for mapping colors to annotations.
+                Options are `INDEX`, `CLASS`, `TRACK`.
+            border_radius (int): The radius to apply round edges. If the selected
+                value is higher than the lower dimension, width or height, is clipped.
+        """
+        self.color = color
+        self.text_color = text_color
+        self.text_padding = text_padding
+        self.text_anchor = text_position
+        self.color_lookup = color_lookup
+        self.border_radius = border_radius
+        if font_path is not None:
+            try:
+                self.font = ImageFont.truetype(font_path, font_size)
+            except OSError:
+                print(f"Font path '{font_path}' not found. Using PIL's default font.")
+                self.font = ImageFont.load_default(size=font_size)
+        else:
+            self.font = ImageFont.load_default(size=font_size)
+
+    def annotate(
+        self,
+        scene: ImageType,
+        detections: Detections,
+        labels: List[str] = None,
+        custom_color_lookup: Optional[np.ndarray] = None,
+    ) -> ImageType:
+        """
+        Annotates the given scene with labels based on the provided
+        detections, with support for Unicode characters.
+
+        Args:
+            scene (ImageType): The image where labels will be drawn.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): Object detections to annotate.
+            labels (List[str]): Optional. Custom labels for each detection.
+            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+                Allows to override the default color mapping strategy.
 
-            image = ...
-            detections = sv.Detections(...)
-
-            labels = [
-                f"{class_name} {confidence:.2f}"
-                for class_name, confidence
-                in zip(detections['class_name'], detections.confidence)
-            ]
-
-            rich_label_annotator = sv.RichLabelAnnotator(font_path="path/to/font.ttf")
-            annotated_frame = label_annotator.annotate(
-                scene=image.copy(),
-                detections=detections,
-                labels=labels
-            )
-            ```
+        Returns:
+            The annotated image, matching the type of `scene` (`numpy.ndarray`
+                or `PIL.Image.Image`)
+
+        Example:
+            ```python
+            import supervision as sv
+
+            image = ...
+            detections = sv.Detections(...)
+
+            labels = [
+                f"{class_name} {confidence:.2f}"
+                for class_name, confidence
+                in zip(detections['class_name'], detections.confidence)
+            ]
 
-        """
-        if isinstance(scene, np.ndarray):
-            scene = Image.fromarray(cv2.cvtColor(scene, cv2.COLOR_BGR2RGB))
-        draw = ImageDraw.Draw(scene)
-        anchors_coordinates = detections.get_anchors_coordinates(
-            anchor=self.text_anchor
-        ).astype(int)
-        if labels is not None and len(labels) != len(detections):
-            raise ValueError(
-                f"The number of labels provided ({len(labels)}) does not match the "
-                f"number of detections ({len(detections)}). Each detection should have "
-                f"a corresponding label. This discrepancy can occur if the labels and "
-                f"detections are not aligned or if an incorrect number of labels has "
-                f"been provided. Please ensure that the labels array has the same "
-                f"length as the Detections object."
-            )
-        for detection_idx, center_coordinates in enumerate(anchors_coordinates):
-            color = resolve_color(
-                color=self.color,
-                detections=detections,
-                detection_idx=detection_idx,
-                color_lookup=(
-                    self.color_lookup
-                    if custom_color_lookup is None
-                    else custom_color_lookup
-                ),
-            )
-            if labels is not None:
-                text = labels[detection_idx]
-            elif detections[CLASS_NAME_DATA_FIELD] is not None:
-                text = detections[CLASS_NAME_DATA_FIELD][detection_idx]
-            elif detections.class_id is not None:
-                text = str(detections.class_id[detection_idx])
-            else:
-                text = str(detection_idx)
-
-            left, top, right, bottom = draw.textbbox((0, 0), text, font=self.font)
-            text_width = right - left
-            text_height = bottom - top
-            text_w_padded = text_width + 2 * self.text_padding
-            text_h_padded = text_height + 2 * self.text_padding
-            text_background_xyxy = resolve_text_background_xyxy(
-                center_coordinates=tuple(center_coordinates),
-                text_wh=(text_w_padded, text_h_padded),
-                position=self.text_anchor,
-            )
-
-            text_x = text_background_xyxy[0] + self.text_padding - left
-            text_y = text_background_xyxy[1] + self.text_padding - top
-
-            draw.rounded_rectangle(
-                text_background_xyxy,
-                radius=self.border_radius,
-                fill=color.as_rgb(),
-                outline=None,
-            )
-            draw.text(
-                xy=(text_x, text_y),
-                text=text,
-                font=self.font,
-                fill=self.text_color.as_rgb(),
-            )
-
-        return scene
+            rich_label_annotator = sv.RichLabelAnnotator(font_path="path/to/font.ttf")
+            annotated_frame = label_annotator.annotate(
+                scene=image.copy(),
+                detections=detections,
+                labels=labels
+            )
+            ```
+
+        """
+        if isinstance(scene, np.ndarray):
+            scene = Image.fromarray(cv2.cvtColor(scene, cv2.COLOR_BGR2RGB))
+        draw = ImageDraw.Draw(scene)
+        anchors_coordinates = detections.get_anchors_coordinates(
+            anchor=self.text_anchor
+        ).astype(int)
+        if labels is not None and len(labels) != len(detections):
+            raise ValueError(
+                f"The number of labels provided ({len(labels)}) does not match the "
+                f"number of detections ({len(detections)}). Each detection should have "
+                f"a corresponding label. This discrepancy can occur if the labels and "
+                f"detections are not aligned or if an incorrect number of labels has "
+                f"been provided. Please ensure that the labels array has the same "
+                f"length as the Detections object."
+            )
+        for detection_idx, center_coordinates in enumerate(anchors_coordinates):
+            color = resolve_color(
+                color=self.color,
+                detections=detections,
+                detection_idx=detection_idx,
+                color_lookup=(
+                    self.color_lookup
+                    if custom_color_lookup is None
+                    else custom_color_lookup
+                ),
+            )
+            if labels is not None:
+                text = labels[detection_idx]
+            elif detections[CLASS_NAME_DATA_FIELD] is not None:
+                text = detections[CLASS_NAME_DATA_FIELD][detection_idx]
+            elif detections.class_id is not None:
+                text = str(detections.class_id[detection_idx])
+            else:
+                text = str(detection_idx)
+
+            left, top, right, bottom = draw.textbbox((0, 0), text, font=self.font)
+            text_width = right - left
+            text_height = bottom - top
+            text_w_padded = text_width + 2 * self.text_padding
+            text_h_padded = text_height + 2 * self.text_padding
+            text_background_xyxy = resolve_text_background_xyxy(
+                center_coordinates=tuple(center_coordinates),
+                text_wh=(text_w_padded, text_h_padded),
+                position=self.text_anchor,
+            )
+
+            text_x = text_background_xyxy[0] + self.text_padding - left
+            text_y = text_background_xyxy[1] + self.text_padding - top
+
+            draw.rounded_rectangle(
+                text_background_xyxy,
+                radius=self.border_radius,
+                fill=color.as_rgb(),
+                outline=None,
+            )
+            draw.text(
+                xy=(text_x, text_y),
+                text=text,
+                font=self.font,
+                fill=self.text_color.as_rgb(),
+            )
+
+        return scene
 
@@ -14048,15 +14132,7 @@

Source code in supervision/annotators/core.py -
1112
-1113
-1114
-1115
-1116
-1117
-1118
-1119
-1120
+              
1120
 1121
 1122
 1123
@@ -14088,47 +14164,55 @@ 

1149 1150 1151 -1152

def __init__(
-    self,
-    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-    text_color: Color = Color.WHITE,
-    font_path: str = None,
-    font_size: int = 10,
-    text_padding: int = 10,
-    text_position: Position = Position.TOP_LEFT,
-    color_lookup: ColorLookup = ColorLookup.CLASS,
-    border_radius: int = 0,
-):
-    """
-    Args:
-        color (Union[Color, ColorPalette]): The color or color palette to use for
-            annotating the text background.
-        text_color (Color): The color to use for the text.
-        font_path (str): Path to the font file (e.g., ".ttf" or ".otf") to use for
-            rendering text. If `None`, the default PIL font will be used.
-        font_size (int): Font size for the text.
-        text_padding (int): Padding around the text within its background box.
-        text_position (Position): Position of the text relative to the detection.
-            Possible values are defined in the `Position` enum.
-        color_lookup (ColorLookup): Strategy for mapping colors to annotations.
-            Options are `INDEX`, `CLASS`, `TRACK`.
-        border_radius (int): The radius to apply round edges. If the selected
-            value is higher than the lower dimension, width or height, is clipped.
-    """
-    self.color = color
-    self.text_color = text_color
-    self.text_padding = text_padding
-    self.text_anchor = text_position
-    self.color_lookup = color_lookup
-    self.border_radius = border_radius
-    if font_path is not None:
-        try:
-            self.font = ImageFont.truetype(font_path, font_size)
-        except OSError:
-            print(f"Font path '{font_path}' not found. Using PIL's default font.")
-            self.font = ImageFont.load_default(size=font_size)
-    else:
-        self.font = ImageFont.load_default(size=font_size)
+1152
+1153
+1154
+1155
+1156
+1157
+1158
+1159
+1160
def __init__(
+    self,
+    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+    text_color: Color = Color.WHITE,
+    font_path: str = None,
+    font_size: int = 10,
+    text_padding: int = 10,
+    text_position: Position = Position.TOP_LEFT,
+    color_lookup: ColorLookup = ColorLookup.CLASS,
+    border_radius: int = 0,
+):
+    """
+    Args:
+        color (Union[Color, ColorPalette]): The color or color palette to use for
+            annotating the text background.
+        text_color (Color): The color to use for the text.
+        font_path (str): Path to the font file (e.g., ".ttf" or ".otf") to use for
+            rendering text. If `None`, the default PIL font will be used.
+        font_size (int): Font size for the text.
+        text_padding (int): Padding around the text within its background box.
+        text_position (Position): Position of the text relative to the detection.
+            Possible values are defined in the `Position` enum.
+        color_lookup (ColorLookup): Strategy for mapping colors to annotations.
+            Options are `INDEX`, `CLASS`, `TRACK`.
+        border_radius (int): The radius to apply round edges. If the selected
+            value is higher than the lower dimension, width or height, is clipped.
+    """
+    self.color = color
+    self.text_color = text_color
+    self.text_padding = text_padding
+    self.text_anchor = text_position
+    self.color_lookup = color_lookup
+    self.border_radius = border_radius
+    if font_path is not None:
+        try:
+            self.font = ImageFont.truetype(font_path, font_size)
+        except OSError:
+            print(f"Font path '{font_path}' not found. Using PIL's default font.")
+            self.font = ImageFont.load_default(size=font_size)
+    else:
+        self.font = ImageFont.load_default(size=font_size)
 
@@ -14271,15 +14355,7 @@

Source code in supervision/annotators/core.py -
1154
-1155
-1156
-1157
-1158
-1159
-1160
-1161
-1162
+              
1162
 1163
 1164
 1165
@@ -14379,115 +14455,123 @@ 

1259 1260 1261 -1262

def annotate(
-    self,
-    scene: ImageType,
-    detections: Detections,
-    labels: List[str] = None,
-    custom_color_lookup: Optional[np.ndarray] = None,
-) -> ImageType:
-    """
-    Annotates the given scene with labels based on the provided
-    detections, with support for Unicode characters.
-
-    Args:
-        scene (ImageType): The image where labels will be drawn.
-            `ImageType` is a flexible type, accepting either `numpy.ndarray`
-            or `PIL.Image.Image`.
-        detections (Detections): Object detections to annotate.
-        labels (List[str]): Optional. Custom labels for each detection.
-        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-            Allows to override the default color mapping strategy.
-
-    Returns:
-        The annotated image, matching the type of `scene` (`numpy.ndarray`
-            or `PIL.Image.Image`)
-
-    Example:
-        ```python
-        import supervision as sv
+1262
+1263
+1264
+1265
+1266
+1267
+1268
+1269
+1270
def annotate(
+    self,
+    scene: ImageType,
+    detections: Detections,
+    labels: List[str] = None,
+    custom_color_lookup: Optional[np.ndarray] = None,
+) -> ImageType:
+    """
+    Annotates the given scene with labels based on the provided
+    detections, with support for Unicode characters.
+
+    Args:
+        scene (ImageType): The image where labels will be drawn.
+            `ImageType` is a flexible type, accepting either `numpy.ndarray`
+            or `PIL.Image.Image`.
+        detections (Detections): Object detections to annotate.
+        labels (List[str]): Optional. Custom labels for each detection.
+        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+            Allows to override the default color mapping strategy.
 
-        image = ...
-        detections = sv.Detections(...)
-
-        labels = [
-            f"{class_name} {confidence:.2f}"
-            for class_name, confidence
-            in zip(detections['class_name'], detections.confidence)
-        ]
-
-        rich_label_annotator = sv.RichLabelAnnotator(font_path="path/to/font.ttf")
-        annotated_frame = label_annotator.annotate(
-            scene=image.copy(),
-            detections=detections,
-            labels=labels
-        )
-        ```
+    Returns:
+        The annotated image, matching the type of `scene` (`numpy.ndarray`
+            or `PIL.Image.Image`)
+
+    Example:
+        ```python
+        import supervision as sv
+
+        image = ...
+        detections = sv.Detections(...)
+
+        labels = [
+            f"{class_name} {confidence:.2f}"
+            for class_name, confidence
+            in zip(detections['class_name'], detections.confidence)
+        ]
 
-    """
-    if isinstance(scene, np.ndarray):
-        scene = Image.fromarray(cv2.cvtColor(scene, cv2.COLOR_BGR2RGB))
-    draw = ImageDraw.Draw(scene)
-    anchors_coordinates = detections.get_anchors_coordinates(
-        anchor=self.text_anchor
-    ).astype(int)
-    if labels is not None and len(labels) != len(detections):
-        raise ValueError(
-            f"The number of labels provided ({len(labels)}) does not match the "
-            f"number of detections ({len(detections)}). Each detection should have "
-            f"a corresponding label. This discrepancy can occur if the labels and "
-            f"detections are not aligned or if an incorrect number of labels has "
-            f"been provided. Please ensure that the labels array has the same "
-            f"length as the Detections object."
-        )
-    for detection_idx, center_coordinates in enumerate(anchors_coordinates):
-        color = resolve_color(
-            color=self.color,
-            detections=detections,
-            detection_idx=detection_idx,
-            color_lookup=(
-                self.color_lookup
-                if custom_color_lookup is None
-                else custom_color_lookup
-            ),
-        )
-        if labels is not None:
-            text = labels[detection_idx]
-        elif detections[CLASS_NAME_DATA_FIELD] is not None:
-            text = detections[CLASS_NAME_DATA_FIELD][detection_idx]
-        elif detections.class_id is not None:
-            text = str(detections.class_id[detection_idx])
-        else:
-            text = str(detection_idx)
-
-        left, top, right, bottom = draw.textbbox((0, 0), text, font=self.font)
-        text_width = right - left
-        text_height = bottom - top
-        text_w_padded = text_width + 2 * self.text_padding
-        text_h_padded = text_height + 2 * self.text_padding
-        text_background_xyxy = resolve_text_background_xyxy(
-            center_coordinates=tuple(center_coordinates),
-            text_wh=(text_w_padded, text_h_padded),
-            position=self.text_anchor,
-        )
-
-        text_x = text_background_xyxy[0] + self.text_padding - left
-        text_y = text_background_xyxy[1] + self.text_padding - top
-
-        draw.rounded_rectangle(
-            text_background_xyxy,
-            radius=self.border_radius,
-            fill=color.as_rgb(),
-            outline=None,
-        )
-        draw.text(
-            xy=(text_x, text_y),
-            text=text,
-            font=self.font,
-            fill=self.text_color.as_rgb(),
-        )
-
-    return scene
+        rich_label_annotator = sv.RichLabelAnnotator(font_path="path/to/font.ttf")
+        annotated_frame = label_annotator.annotate(
+            scene=image.copy(),
+            detections=detections,
+            labels=labels
+        )
+        ```
+
+    """
+    if isinstance(scene, np.ndarray):
+        scene = Image.fromarray(cv2.cvtColor(scene, cv2.COLOR_BGR2RGB))
+    draw = ImageDraw.Draw(scene)
+    anchors_coordinates = detections.get_anchors_coordinates(
+        anchor=self.text_anchor
+    ).astype(int)
+    if labels is not None and len(labels) != len(detections):
+        raise ValueError(
+            f"The number of labels provided ({len(labels)}) does not match the "
+            f"number of detections ({len(detections)}). Each detection should have "
+            f"a corresponding label. This discrepancy can occur if the labels and "
+            f"detections are not aligned or if an incorrect number of labels has "
+            f"been provided. Please ensure that the labels array has the same "
+            f"length as the Detections object."
+        )
+    for detection_idx, center_coordinates in enumerate(anchors_coordinates):
+        color = resolve_color(
+            color=self.color,
+            detections=detections,
+            detection_idx=detection_idx,
+            color_lookup=(
+                self.color_lookup
+                if custom_color_lookup is None
+                else custom_color_lookup
+            ),
+        )
+        if labels is not None:
+            text = labels[detection_idx]
+        elif detections[CLASS_NAME_DATA_FIELD] is not None:
+            text = detections[CLASS_NAME_DATA_FIELD][detection_idx]
+        elif detections.class_id is not None:
+            text = str(detections.class_id[detection_idx])
+        else:
+            text = str(detection_idx)
+
+        left, top, right, bottom = draw.textbbox((0, 0), text, font=self.font)
+        text_width = right - left
+        text_height = bottom - top
+        text_w_padded = text_width + 2 * self.text_padding
+        text_h_padded = text_height + 2 * self.text_padding
+        text_background_xyxy = resolve_text_background_xyxy(
+            center_coordinates=tuple(center_coordinates),
+            text_wh=(text_w_padded, text_h_padded),
+            position=self.text_anchor,
+        )
+
+        text_x = text_background_xyxy[0] + self.text_padding - left
+        text_y = text_background_xyxy[1] + self.text_padding - top
+
+        draw.rounded_rectangle(
+            text_background_xyxy,
+            radius=self.border_radius,
+            fill=color.as_rgb(),
+            outline=None,
+        )
+        draw.text(
+            xy=(text_x, text_y),
+            text=text,
+            font=self.font,
+            fill=self.text_color.as_rgb(),
+        )
+
+    return scene
 
@@ -14520,15 +14604,7 @@

BlurAnnotator

Source code in supervision/annotators/core.py -
1265
-1266
-1267
-1268
-1269
-1270
-1271
-1272
-1273
+                
1273
 1274
 1275
 1276
@@ -14578,65 +14654,73 @@ 

BlurAnnotator

1320 1321 1322 -1323
class BlurAnnotator(BaseAnnotator):
-    """
-    A class for blurring regions in an image using provided detections.
-    """
-
-    def __init__(self, kernel_size: int = 15):
-        """
-        Args:
-            kernel_size (int): The size of the average pooling kernel used for blurring.
-        """
-        self.kernel_size: int = kernel_size
-
-    @convert_for_annotation_method
-    def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
-    ) -> ImageType:
-        """
-        Annotates the given scene by blurring regions based on the provided detections.
-
-        Args:
-            scene (ImageType): The image where blurring will be applied.
-                `ImageType` is a flexible type, accepting either `numpy.ndarray`
-                or `PIL.Image.Image`.
-            detections (Detections): Object detections to annotate.
-
-        Returns:
-            The annotated image, matching the type of `scene` (`numpy.ndarray`
-                or `PIL.Image.Image`)
-
-        Example:
-            ```python
-            import supervision as sv
+1323
+1324
+1325
+1326
+1327
+1328
+1329
+1330
+1331
class BlurAnnotator(BaseAnnotator):
+    """
+    A class for blurring regions in an image using provided detections.
+    """
+
+    def __init__(self, kernel_size: int = 15):
+        """
+        Args:
+            kernel_size (int): The size of the average pooling kernel used for blurring.
+        """
+        self.kernel_size: int = kernel_size
+
+    @convert_for_annotation_method
+    def annotate(
+        self,
+        scene: ImageType,
+        detections: Detections,
+    ) -> ImageType:
+        """
+        Annotates the given scene by blurring regions based on the provided detections.
+
+        Args:
+            scene (ImageType): The image where blurring will be applied.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): Object detections to annotate.
 
-            image = ...
-            detections = sv.Detections(...)
-
-            blur_annotator = sv.BlurAnnotator()
-            annotated_frame = circle_annotator.annotate(
-                scene=image.copy(),
-                detections=detections
-            )
-            ```
-
-        ![blur-annotator-example](https://media.roboflow.com/
-        supervision-annotator-examples/blur-annotator-example-purple.png)
-        """
-        image_height, image_width = scene.shape[:2]
-        clipped_xyxy = clip_boxes(
-            xyxy=detections.xyxy, resolution_wh=(image_width, image_height)
-        ).astype(int)
+        Returns:
+            The annotated image, matching the type of `scene` (`numpy.ndarray`
+                or `PIL.Image.Image`)
+
+        Example:
+            ```python
+            import supervision as sv
+
+            image = ...
+            detections = sv.Detections(...)
+
+            blur_annotator = sv.BlurAnnotator()
+            annotated_frame = circle_annotator.annotate(
+                scene=image.copy(),
+                detections=detections
+            )
+            ```
 
-        for x1, y1, x2, y2 in clipped_xyxy:
-            roi = scene[y1:y2, x1:x2]
-            roi = cv2.blur(roi, (self.kernel_size, self.kernel_size))
-            scene[y1:y2, x1:x2] = roi
-
-        return scene
+        ![blur-annotator-example](https://media.roboflow.com/
+        supervision-annotator-examples/blur-annotator-example-purple.png)
+        """
+        image_height, image_width = scene.shape[:2]
+        clipped_xyxy = clip_boxes(
+            xyxy=detections.xyxy, resolution_wh=(image_width, image_height)
+        ).astype(int)
+
+        for x1, y1, x2, y2 in clipped_xyxy:
+            roi = scene[y1:y2, x1:x2]
+            roi = cv2.blur(roi, (self.kernel_size, self.kernel_size))
+            scene[y1:y2, x1:x2] = roi
+
+        return scene
 
@@ -14695,17 +14779,17 @@

Source code in supervision/annotators/core.py -
def __init__(self, kernel_size: int = 15):
-    """
-    Args:
-        kernel_size (int): The size of the average pooling kernel used for blurring.
-    """
-    self.kernel_size: int = kernel_size
+              
def __init__(self, kernel_size: int = 15):
+    """
+    Args:
+        kernel_size (int): The size of the average pooling kernel used for blurring.
+    """
+    self.kernel_size: int = kernel_size
 
@@ -14813,15 +14897,7 @@

Source code in supervision/annotators/core.py -
1277
-1278
-1279
-1280
-1281
-1282
-1283
-1284
-1285
+              
1285
 1286
 1287
 1288
@@ -14859,53 +14935,61 @@ 

1320 1321 1322 -1323

@convert_for_annotation_method
-def annotate(
-    self,
-    scene: ImageType,
-    detections: Detections,
-) -> ImageType:
-    """
-    Annotates the given scene by blurring regions based on the provided detections.
-
-    Args:
-        scene (ImageType): The image where blurring will be applied.
-            `ImageType` is a flexible type, accepting either `numpy.ndarray`
-            or `PIL.Image.Image`.
-        detections (Detections): Object detections to annotate.
-
-    Returns:
-        The annotated image, matching the type of `scene` (`numpy.ndarray`
-            or `PIL.Image.Image`)
-
-    Example:
-        ```python
-        import supervision as sv
+1323
+1324
+1325
+1326
+1327
+1328
+1329
+1330
+1331
@convert_for_annotation_method
+def annotate(
+    self,
+    scene: ImageType,
+    detections: Detections,
+) -> ImageType:
+    """
+    Annotates the given scene by blurring regions based on the provided detections.
+
+    Args:
+        scene (ImageType): The image where blurring will be applied.
+            `ImageType` is a flexible type, accepting either `numpy.ndarray`
+            or `PIL.Image.Image`.
+        detections (Detections): Object detections to annotate.
 
-        image = ...
-        detections = sv.Detections(...)
-
-        blur_annotator = sv.BlurAnnotator()
-        annotated_frame = circle_annotator.annotate(
-            scene=image.copy(),
-            detections=detections
-        )
-        ```
-
-    ![blur-annotator-example](https://media.roboflow.com/
-    supervision-annotator-examples/blur-annotator-example-purple.png)
-    """
-    image_height, image_width = scene.shape[:2]
-    clipped_xyxy = clip_boxes(
-        xyxy=detections.xyxy, resolution_wh=(image_width, image_height)
-    ).astype(int)
+    Returns:
+        The annotated image, matching the type of `scene` (`numpy.ndarray`
+            or `PIL.Image.Image`)
+
+    Example:
+        ```python
+        import supervision as sv
+
+        image = ...
+        detections = sv.Detections(...)
+
+        blur_annotator = sv.BlurAnnotator()
+        annotated_frame = circle_annotator.annotate(
+            scene=image.copy(),
+            detections=detections
+        )
+        ```
 
-    for x1, y1, x2, y2 in clipped_xyxy:
-        roi = scene[y1:y2, x1:x2]
-        roi = cv2.blur(roi, (self.kernel_size, self.kernel_size))
-        scene[y1:y2, x1:x2] = roi
-
-    return scene
+    ![blur-annotator-example](https://media.roboflow.com/
+    supervision-annotator-examples/blur-annotator-example-purple.png)
+    """
+    image_height, image_width = scene.shape[:2]
+    clipped_xyxy = clip_boxes(
+        xyxy=detections.xyxy, resolution_wh=(image_width, image_height)
+    ).astype(int)
+
+    for x1, y1, x2, y2 in clipped_xyxy:
+        roi = scene[y1:y2, x1:x2]
+        roi = cv2.blur(roi, (self.kernel_size, self.kernel_size))
+        scene[y1:y2, x1:x2] = roi
+
+    return scene
 
@@ -14938,15 +15022,7 @@

PixelateAnnotator Source code in supervision/annotators/core.py -
1533
-1534
-1535
-1536
-1537
-1538
-1539
-1540
-1541
+                
1541
 1542
 1543
 1544
@@ -15005,74 +15081,82 @@ 

PixelateAnnotator1597 1598 1599 -1600

class PixelateAnnotator(BaseAnnotator):
-    """
-    A class for pixelating regions in an image using provided detections.
-    """
-
-    def __init__(self, pixel_size: int = 20):
-        """
-        Args:
-            pixel_size (int): The size of the pixelation.
-        """
-        self.pixel_size: int = pixel_size
-
-    @convert_for_annotation_method
-    def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
-    ) -> ImageType:
-        """
-        Annotates the given scene by pixelating regions based on the provided
-            detections.
-
-        Args:
-            scene (ImageType): The image where pixelating will be applied.
-                `ImageType` is a flexible type, accepting either `numpy.ndarray`
-                or `PIL.Image.Image`.
-            detections (Detections): Object detections to annotate.
-
-        Returns:
-            The annotated image, matching the type of `scene` (`numpy.ndarray`
-                or `PIL.Image.Image`)
-
-        Example:
-            ```python
-            import supervision as sv
+1600
+1601
+1602
+1603
+1604
+1605
+1606
+1607
+1608
class PixelateAnnotator(BaseAnnotator):
+    """
+    A class for pixelating regions in an image using provided detections.
+    """
+
+    def __init__(self, pixel_size: int = 20):
+        """
+        Args:
+            pixel_size (int): The size of the pixelation.
+        """
+        self.pixel_size: int = pixel_size
+
+    @convert_for_annotation_method
+    def annotate(
+        self,
+        scene: ImageType,
+        detections: Detections,
+    ) -> ImageType:
+        """
+        Annotates the given scene by pixelating regions based on the provided
+            detections.
+
+        Args:
+            scene (ImageType): The image where pixelating will be applied.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): Object detections to annotate.
 
-            image = ...
-            detections = sv.Detections(...)
-
-            pixelate_annotator = sv.PixelateAnnotator()
-            annotated_frame = pixelate_annotator.annotate(
-                scene=image.copy(),
-                detections=detections
-            )
-            ```
-
-        ![pixelate-annotator-example](https://media.roboflow.com/
-        supervision-annotator-examples/pixelate-annotator-example-10.png)
-        """
-        image_height, image_width = scene.shape[:2]
-        clipped_xyxy = clip_boxes(
-            xyxy=detections.xyxy, resolution_wh=(image_width, image_height)
-        ).astype(int)
+        Returns:
+            The annotated image, matching the type of `scene` (`numpy.ndarray`
+                or `PIL.Image.Image`)
+
+        Example:
+            ```python
+            import supervision as sv
+
+            image = ...
+            detections = sv.Detections(...)
+
+            pixelate_annotator = sv.PixelateAnnotator()
+            annotated_frame = pixelate_annotator.annotate(
+                scene=image.copy(),
+                detections=detections
+            )
+            ```
 
-        for x1, y1, x2, y2 in clipped_xyxy:
-            roi = scene[y1:y2, x1:x2]
-            scaled_up_roi = cv2.resize(
-                src=roi, dsize=None, fx=1 / self.pixel_size, fy=1 / self.pixel_size
-            )
-            scaled_down_roi = cv2.resize(
-                src=scaled_up_roi,
-                dsize=(roi.shape[1], roi.shape[0]),
-                interpolation=cv2.INTER_NEAREST,
-            )
-
-            scene[y1:y2, x1:x2] = scaled_down_roi
-
-        return scene
+        ![pixelate-annotator-example](https://media.roboflow.com/
+        supervision-annotator-examples/pixelate-annotator-example-10.png)
+        """
+        image_height, image_width = scene.shape[:2]
+        clipped_xyxy = clip_boxes(
+            xyxy=detections.xyxy, resolution_wh=(image_width, image_height)
+        ).astype(int)
+
+        for x1, y1, x2, y2 in clipped_xyxy:
+            roi = scene[y1:y2, x1:x2]
+            scaled_up_roi = cv2.resize(
+                src=roi, dsize=None, fx=1 / self.pixel_size, fy=1 / self.pixel_size
+            )
+            scaled_down_roi = cv2.resize(
+                src=scaled_up_roi,
+                dsize=(roi.shape[1], roi.shape[0]),
+                interpolation=cv2.INTER_NEAREST,
+            )
+
+            scene[y1:y2, x1:x2] = scaled_down_roi
+
+        return scene
 
@@ -15131,17 +15215,17 @@

Source code in supervision/annotators/core.py -
def __init__(self, pixel_size: int = 20):
-    """
-    Args:
-        pixel_size (int): The size of the pixelation.
-    """
-    self.pixel_size: int = pixel_size
+              
def __init__(self, pixel_size: int = 20):
+    """
+    Args:
+        pixel_size (int): The size of the pixelation.
+    """
+    self.pixel_size: int = pixel_size
 
@@ -15250,15 +15334,7 @@

Source code in supervision/annotators/core.py -
1545
-1546
-1547
-1548
-1549
-1550
-1551
-1552
-1553
+              
1553
 1554
 1555
 1556
@@ -15305,62 +15381,70 @@ 

1597 1598 1599 -1600

@convert_for_annotation_method
-def annotate(
-    self,
-    scene: ImageType,
-    detections: Detections,
-) -> ImageType:
-    """
-    Annotates the given scene by pixelating regions based on the provided
-        detections.
-
-    Args:
-        scene (ImageType): The image where pixelating will be applied.
-            `ImageType` is a flexible type, accepting either `numpy.ndarray`
-            or `PIL.Image.Image`.
-        detections (Detections): Object detections to annotate.
-
-    Returns:
-        The annotated image, matching the type of `scene` (`numpy.ndarray`
-            or `PIL.Image.Image`)
-
-    Example:
-        ```python
-        import supervision as sv
+1600
+1601
+1602
+1603
+1604
+1605
+1606
+1607
+1608
@convert_for_annotation_method
+def annotate(
+    self,
+    scene: ImageType,
+    detections: Detections,
+) -> ImageType:
+    """
+    Annotates the given scene by pixelating regions based on the provided
+        detections.
+
+    Args:
+        scene (ImageType): The image where pixelating will be applied.
+            `ImageType` is a flexible type, accepting either `numpy.ndarray`
+            or `PIL.Image.Image`.
+        detections (Detections): Object detections to annotate.
 
-        image = ...
-        detections = sv.Detections(...)
-
-        pixelate_annotator = sv.PixelateAnnotator()
-        annotated_frame = pixelate_annotator.annotate(
-            scene=image.copy(),
-            detections=detections
-        )
-        ```
-
-    ![pixelate-annotator-example](https://media.roboflow.com/
-    supervision-annotator-examples/pixelate-annotator-example-10.png)
-    """
-    image_height, image_width = scene.shape[:2]
-    clipped_xyxy = clip_boxes(
-        xyxy=detections.xyxy, resolution_wh=(image_width, image_height)
-    ).astype(int)
+    Returns:
+        The annotated image, matching the type of `scene` (`numpy.ndarray`
+            or `PIL.Image.Image`)
+
+    Example:
+        ```python
+        import supervision as sv
+
+        image = ...
+        detections = sv.Detections(...)
+
+        pixelate_annotator = sv.PixelateAnnotator()
+        annotated_frame = pixelate_annotator.annotate(
+            scene=image.copy(),
+            detections=detections
+        )
+        ```
 
-    for x1, y1, x2, y2 in clipped_xyxy:
-        roi = scene[y1:y2, x1:x2]
-        scaled_up_roi = cv2.resize(
-            src=roi, dsize=None, fx=1 / self.pixel_size, fy=1 / self.pixel_size
-        )
-        scaled_down_roi = cv2.resize(
-            src=scaled_up_roi,
-            dsize=(roi.shape[1], roi.shape[0]),
-            interpolation=cv2.INTER_NEAREST,
-        )
-
-        scene[y1:y2, x1:x2] = scaled_down_roi
-
-    return scene
+    ![pixelate-annotator-example](https://media.roboflow.com/
+    supervision-annotator-examples/pixelate-annotator-example-10.png)
+    """
+    image_height, image_width = scene.shape[:2]
+    clipped_xyxy = clip_boxes(
+        xyxy=detections.xyxy, resolution_wh=(image_width, image_height)
+    ).astype(int)
+
+    for x1, y1, x2, y2 in clipped_xyxy:
+        roi = scene[y1:y2, x1:x2]
+        scaled_up_roi = cv2.resize(
+            src=roi, dsize=None, fx=1 / self.pixel_size, fy=1 / self.pixel_size
+        )
+        scaled_down_roi = cv2.resize(
+            src=scaled_up_roi,
+            dsize=(roi.shape[1], roi.shape[0]),
+            interpolation=cv2.INTER_NEAREST,
+        )
+
+        scene[y1:y2, x1:x2] = scaled_down_roi
+
+    return scene
 
@@ -15397,15 +15481,7 @@

TraceAnnotator

Source code in supervision/annotators/core.py -
1326
-1327
-1328
-1329
-1330
-1331
-1332
-1333
-1334
+                
1334
 1335
 1336
 1337
@@ -15503,113 +15579,121 @@ 

TraceAnnotator

1429 1430 1431 -1432
class TraceAnnotator:
-    """
-    A class for drawing trace paths on an image based on detection coordinates.
-
-    !!! warning
-
-        This annotator uses the `sv.Detections.tracker_id`. Read
-        [here](/latest/trackers/) to learn how to plug
-        tracking into your inference pipeline.
-    """
-
-    def __init__(
-        self,
-        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-        position: Position = Position.CENTER,
-        trace_length: int = 30,
-        thickness: int = 2,
-        color_lookup: ColorLookup = ColorLookup.CLASS,
-    ):
-        """
-        Args:
-            color (Union[Color, ColorPalette]): The color to draw the trace, can be
-                a single color or a color palette.
-            position (Position): The position of the trace.
-                Defaults to `CENTER`.
-            trace_length (int): The maximum length of the trace in terms of historical
-                points. Defaults to `30`.
-            thickness (int): The thickness of the trace lines. Defaults to `2`.
-            color_lookup (str): Strategy for mapping colors to annotations.
-                Options are `INDEX`, `CLASS`, `TRACK`.
-        """
-        self.color: Union[Color, ColorPalette] = color
-        self.trace = Trace(max_size=trace_length, anchor=position)
-        self.thickness = thickness
-        self.color_lookup: ColorLookup = color_lookup
-
-    @convert_for_annotation_method
-    def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
-        custom_color_lookup: Optional[np.ndarray] = None,
-    ) -> ImageType:
-        """
-        Draws trace paths on the frame based on the detection coordinates provided.
-
-        Args:
-            scene (ImageType): The image on which the traces will be drawn.
-                `ImageType` is a flexible type, accepting either `numpy.ndarray`
-                or `PIL.Image.Image`.
-            detections (Detections): The detections which include coordinates for
-                which the traces will be drawn.
-            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-                Allows to override the default color mapping strategy.
-
-        Returns:
-            The annotated image, matching the type of `scene` (`numpy.ndarray`
-                or `PIL.Image.Image`)
-
-        Example:
-            ```python
-            import supervision as sv
-            from ultralytics import YOLO
-
-            model = YOLO('yolov8x.pt')
-            trace_annotator = sv.TraceAnnotator()
+1432
+1433
+1434
+1435
+1436
+1437
+1438
+1439
+1440
class TraceAnnotator:
+    """
+    A class for drawing trace paths on an image based on detection coordinates.
+
+    !!! warning
+
+        This annotator uses the `sv.Detections.tracker_id`. Read
+        [here](/latest/trackers/) to learn how to plug
+        tracking into your inference pipeline.
+    """
+
+    def __init__(
+        self,
+        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+        position: Position = Position.CENTER,
+        trace_length: int = 30,
+        thickness: int = 2,
+        color_lookup: ColorLookup = ColorLookup.CLASS,
+    ):
+        """
+        Args:
+            color (Union[Color, ColorPalette]): The color to draw the trace, can be
+                a single color or a color palette.
+            position (Position): The position of the trace.
+                Defaults to `CENTER`.
+            trace_length (int): The maximum length of the trace in terms of historical
+                points. Defaults to `30`.
+            thickness (int): The thickness of the trace lines. Defaults to `2`.
+            color_lookup (str): Strategy for mapping colors to annotations.
+                Options are `INDEX`, `CLASS`, `TRACK`.
+        """
+        self.color: Union[Color, ColorPalette] = color
+        self.trace = Trace(max_size=trace_length, anchor=position)
+        self.thickness = thickness
+        self.color_lookup: ColorLookup = color_lookup
+
+    @convert_for_annotation_method
+    def annotate(
+        self,
+        scene: ImageType,
+        detections: Detections,
+        custom_color_lookup: Optional[np.ndarray] = None,
+    ) -> ImageType:
+        """
+        Draws trace paths on the frame based on the detection coordinates provided.
+
+        Args:
+            scene (ImageType): The image on which the traces will be drawn.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): The detections which include coordinates for
+                which the traces will be drawn.
+            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+                Allows to override the default color mapping strategy.
+
+        Returns:
+            The annotated image, matching the type of `scene` (`numpy.ndarray`
+                or `PIL.Image.Image`)
 
-            video_info = sv.VideoInfo.from_video_path(video_path='...')
-            frames_generator = sv.get_video_frames_generator(source_path='...')
-            tracker = sv.ByteTrack()
-
-            with sv.VideoSink(target_path='...', video_info=video_info) as sink:
-               for frame in frames_generator:
-                   result = model(frame)[0]
-                   detections = sv.Detections.from_ultralytics(result)
-                   detections = tracker.update_with_detections(detections)
-                   annotated_frame = trace_annotator.annotate(
-                       scene=frame.copy(),
-                       detections=detections)
-                   sink.write_frame(frame=annotated_frame)
-            ```
-
-        ![trace-annotator-example](https://media.roboflow.com/
-        supervision-annotator-examples/trace-annotator-example-purple.png)
-        """
-        self.trace.put(detections)
-
-        for detection_idx in range(len(detections)):
-            tracker_id = int(detections.tracker_id[detection_idx])
-            color = resolve_color(
-                color=self.color,
-                detections=detections,
-                detection_idx=detection_idx,
-                color_lookup=self.color_lookup
-                if custom_color_lookup is None
-                else custom_color_lookup,
-            )
-            xy = self.trace.get(tracker_id=tracker_id)
-            if len(xy) > 1:
-                scene = cv2.polylines(
-                    scene,
-                    [xy.astype(np.int32)],
-                    False,
-                    color=color.as_bgr(),
-                    thickness=self.thickness,
-                )
-        return scene
+        Example:
+            ```python
+            import supervision as sv
+            from ultralytics import YOLO
+
+            model = YOLO('yolov8x.pt')
+            trace_annotator = sv.TraceAnnotator()
+
+            video_info = sv.VideoInfo.from_video_path(video_path='...')
+            frames_generator = sv.get_video_frames_generator(source_path='...')
+            tracker = sv.ByteTrack()
+
+            with sv.VideoSink(target_path='...', video_info=video_info) as sink:
+               for frame in frames_generator:
+                   result = model(frame)[0]
+                   detections = sv.Detections.from_ultralytics(result)
+                   detections = tracker.update_with_detections(detections)
+                   annotated_frame = trace_annotator.annotate(
+                       scene=frame.copy(),
+                       detections=detections)
+                   sink.write_frame(frame=annotated_frame)
+            ```
+
+        ![trace-annotator-example](https://media.roboflow.com/
+        supervision-annotator-examples/trace-annotator-example-purple.png)
+        """
+        self.trace.put(detections)
+
+        for detection_idx in range(len(detections)):
+            tracker_id = int(detections.tracker_id[detection_idx])
+            color = resolve_color(
+                color=self.color,
+                detections=detections,
+                detection_idx=detection_idx,
+                color_lookup=self.color_lookup
+                if custom_color_lookup is None
+                else custom_color_lookup,
+            )
+            xy = self.trace.get(tracker_id=tracker_id)
+            if len(xy) > 1:
+                scene = cv2.polylines(
+                    scene,
+                    [xy.astype(np.int32)],
+                    False,
+                    color=color.as_bgr(),
+                    thickness=self.thickness,
+                )
+        return scene
 
@@ -15728,15 +15812,7 @@

Source code in supervision/annotators/core.py -
1337
-1338
-1339
-1340
-1341
-1342
-1343
-1344
-1345
+              
1345
 1346
 1347
 1348
@@ -15751,30 +15827,38 @@ 

1357 1358 1359 -1360

def __init__(
-    self,
-    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-    position: Position = Position.CENTER,
-    trace_length: int = 30,
-    thickness: int = 2,
-    color_lookup: ColorLookup = ColorLookup.CLASS,
-):
-    """
-    Args:
-        color (Union[Color, ColorPalette]): The color to draw the trace, can be
-            a single color or a color palette.
-        position (Position): The position of the trace.
-            Defaults to `CENTER`.
-        trace_length (int): The maximum length of the trace in terms of historical
-            points. Defaults to `30`.
-        thickness (int): The thickness of the trace lines. Defaults to `2`.
-        color_lookup (str): Strategy for mapping colors to annotations.
-            Options are `INDEX`, `CLASS`, `TRACK`.
-    """
-    self.color: Union[Color, ColorPalette] = color
-    self.trace = Trace(max_size=trace_length, anchor=position)
-    self.thickness = thickness
-    self.color_lookup: ColorLookup = color_lookup
+1360
+1361
+1362
+1363
+1364
+1365
+1366
+1367
+1368
def __init__(
+    self,
+    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+    position: Position = Position.CENTER,
+    trace_length: int = 30,
+    thickness: int = 2,
+    color_lookup: ColorLookup = ColorLookup.CLASS,
+):
+    """
+    Args:
+        color (Union[Color, ColorPalette]): The color to draw the trace, can be
+            a single color or a color palette.
+        position (Position): The position of the trace.
+            Defaults to `CENTER`.
+        trace_length (int): The maximum length of the trace in terms of historical
+            points. Defaults to `30`.
+        thickness (int): The thickness of the trace lines. Defaults to `2`.
+        color_lookup (str): Strategy for mapping colors to annotations.
+            Options are `INDEX`, `CLASS`, `TRACK`.
+    """
+    self.color: Union[Color, ColorPalette] = color
+    self.trace = Trace(max_size=trace_length, anchor=position)
+    self.thickness = thickness
+    self.color_lookup: ColorLookup = color_lookup
 
@@ -15907,15 +15991,7 @@

Source code in supervision/annotators/core.py -
1362
-1363
-1364
-1365
-1366
-1367
-1368
-1369
-1370
+              
1370
 1371
 1372
 1373
@@ -15977,77 +16053,85 @@ 

1429 1430 1431 -1432

@convert_for_annotation_method
-def annotate(
-    self,
-    scene: ImageType,
-    detections: Detections,
-    custom_color_lookup: Optional[np.ndarray] = None,
-) -> ImageType:
-    """
-    Draws trace paths on the frame based on the detection coordinates provided.
-
-    Args:
-        scene (ImageType): The image on which the traces will be drawn.
-            `ImageType` is a flexible type, accepting either `numpy.ndarray`
-            or `PIL.Image.Image`.
-        detections (Detections): The detections which include coordinates for
-            which the traces will be drawn.
-        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-            Allows to override the default color mapping strategy.
-
-    Returns:
-        The annotated image, matching the type of `scene` (`numpy.ndarray`
-            or `PIL.Image.Image`)
-
-    Example:
-        ```python
-        import supervision as sv
-        from ultralytics import YOLO
-
-        model = YOLO('yolov8x.pt')
-        trace_annotator = sv.TraceAnnotator()
+1432
+1433
+1434
+1435
+1436
+1437
+1438
+1439
+1440
@convert_for_annotation_method
+def annotate(
+    self,
+    scene: ImageType,
+    detections: Detections,
+    custom_color_lookup: Optional[np.ndarray] = None,
+) -> ImageType:
+    """
+    Draws trace paths on the frame based on the detection coordinates provided.
+
+    Args:
+        scene (ImageType): The image on which the traces will be drawn.
+            `ImageType` is a flexible type, accepting either `numpy.ndarray`
+            or `PIL.Image.Image`.
+        detections (Detections): The detections which include coordinates for
+            which the traces will be drawn.
+        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+            Allows to override the default color mapping strategy.
+
+    Returns:
+        The annotated image, matching the type of `scene` (`numpy.ndarray`
+            or `PIL.Image.Image`)
 
-        video_info = sv.VideoInfo.from_video_path(video_path='...')
-        frames_generator = sv.get_video_frames_generator(source_path='...')
-        tracker = sv.ByteTrack()
-
-        with sv.VideoSink(target_path='...', video_info=video_info) as sink:
-           for frame in frames_generator:
-               result = model(frame)[0]
-               detections = sv.Detections.from_ultralytics(result)
-               detections = tracker.update_with_detections(detections)
-               annotated_frame = trace_annotator.annotate(
-                   scene=frame.copy(),
-                   detections=detections)
-               sink.write_frame(frame=annotated_frame)
-        ```
-
-    ![trace-annotator-example](https://media.roboflow.com/
-    supervision-annotator-examples/trace-annotator-example-purple.png)
-    """
-    self.trace.put(detections)
-
-    for detection_idx in range(len(detections)):
-        tracker_id = int(detections.tracker_id[detection_idx])
-        color = resolve_color(
-            color=self.color,
-            detections=detections,
-            detection_idx=detection_idx,
-            color_lookup=self.color_lookup
-            if custom_color_lookup is None
-            else custom_color_lookup,
-        )
-        xy = self.trace.get(tracker_id=tracker_id)
-        if len(xy) > 1:
-            scene = cv2.polylines(
-                scene,
-                [xy.astype(np.int32)],
-                False,
-                color=color.as_bgr(),
-                thickness=self.thickness,
-            )
-    return scene
+    Example:
+        ```python
+        import supervision as sv
+        from ultralytics import YOLO
+
+        model = YOLO('yolov8x.pt')
+        trace_annotator = sv.TraceAnnotator()
+
+        video_info = sv.VideoInfo.from_video_path(video_path='...')
+        frames_generator = sv.get_video_frames_generator(source_path='...')
+        tracker = sv.ByteTrack()
+
+        with sv.VideoSink(target_path='...', video_info=video_info) as sink:
+           for frame in frames_generator:
+               result = model(frame)[0]
+               detections = sv.Detections.from_ultralytics(result)
+               detections = tracker.update_with_detections(detections)
+               annotated_frame = trace_annotator.annotate(
+                   scene=frame.copy(),
+                   detections=detections)
+               sink.write_frame(frame=annotated_frame)
+        ```
+
+    ![trace-annotator-example](https://media.roboflow.com/
+    supervision-annotator-examples/trace-annotator-example-purple.png)
+    """
+    self.trace.put(detections)
+
+    for detection_idx in range(len(detections)):
+        tracker_id = int(detections.tracker_id[detection_idx])
+        color = resolve_color(
+            color=self.color,
+            detections=detections,
+            detection_idx=detection_idx,
+            color_lookup=self.color_lookup
+            if custom_color_lookup is None
+            else custom_color_lookup,
+        )
+        xy = self.trace.get(tracker_id=tracker_id)
+        if len(xy) > 1:
+            scene = cv2.polylines(
+                scene,
+                [xy.astype(np.int32)],
+                False,
+                color=color.as_bgr(),
+                thickness=self.thickness,
+            )
+    return scene
 
@@ -16080,21 +16164,7 @@

CropAnnotator

Source code in supervision/annotators/core.py -
2007
-2008
-2009
-2010
-2011
-2012
-2013
-2014
-2015
-2016
-2017
-2018
-2019
-2020
-2021
+                
2021
 2022
 2023
 2024
@@ -16224,151 +16294,165 @@ 

CropAnnotator

2148 2149 2150 -2151
class CropAnnotator(BaseAnnotator):
-    """
-    A class for drawing scaled up crops of detections on the scene.
-    """
-
-    def __init__(
-        self,
-        position: Position = Position.TOP_CENTER,
-        scale_factor: int = 2,
-        border_color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-        border_thickness: int = 2,
-        border_color_lookup: ColorLookup = ColorLookup.CLASS,
-    ):
-        """
-        Args:
-            position (Position): The anchor position for placing the cropped and scaled
-                part of the detection in the scene.
-            scale_factor (int): The factor by which to scale the cropped image part. A
-                factor of 2, for example, would double the size of the cropped area,
-                allowing for a closer view of the detection.
-            border_color (Union[Color, ColorPalette]): The color or color palette to
-                use for annotating border around the cropped area.
-            border_thickness (int): The thickness of the border around the cropped area.
-            border_color_lookup (ColorLookup): Strategy for mapping colors to
-                annotations. Options are `INDEX`, `CLASS`, `TRACK`.
-        """
-        self.position: Position = position
-        self.scale_factor: int = scale_factor
-        self.border_color: Union[Color, ColorPalette] = border_color
-        self.border_thickness: int = border_thickness
-        self.border_color_lookup: ColorLookup = border_color_lookup
-
-    @convert_for_annotation_method
-    def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
-        custom_color_lookup: Optional[np.ndarray] = None,
-    ) -> ImageType:
-        """
-        Annotates the provided scene with scaled and cropped parts of the image based
-        on the provided detections. Each detection is cropped from the original scene
-        and scaled according to the annotator's scale factor before being placed back
-        onto the scene at the specified position.
-
+2151
+2152
+2153
+2154
+2155
+2156
+2157
+2158
+2159
+2160
+2161
+2162
+2163
+2164
+2165
class CropAnnotator(BaseAnnotator):
+    """
+    A class for drawing scaled up crops of detections on the scene.
+    """
+
+    def __init__(
+        self,
+        position: Position = Position.TOP_CENTER,
+        scale_factor: int = 2,
+        border_color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+        border_thickness: int = 2,
+        border_color_lookup: ColorLookup = ColorLookup.CLASS,
+    ):
+        """
+        Args:
+            position (Position): The anchor position for placing the cropped and scaled
+                part of the detection in the scene.
+            scale_factor (int): The factor by which to scale the cropped image part. A
+                factor of 2, for example, would double the size of the cropped area,
+                allowing for a closer view of the detection.
+            border_color (Union[Color, ColorPalette]): The color or color palette to
+                use for annotating border around the cropped area.
+            border_thickness (int): The thickness of the border around the cropped area.
+            border_color_lookup (ColorLookup): Strategy for mapping colors to
+                annotations. Options are `INDEX`, `CLASS`, `TRACK`.
+        """
+        self.position: Position = position
+        self.scale_factor: int = scale_factor
+        self.border_color: Union[Color, ColorPalette] = border_color
+        self.border_thickness: int = border_thickness
+        self.border_color_lookup: ColorLookup = border_color_lookup
 
-        Args:
-            scene (ImageType): The image where cropped detection will be placed.
-                `ImageType` is a flexible type, accepting either `numpy.ndarray`
-                or `PIL.Image.Image`.
-            detections (Detections): Object detections to annotate.
-            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-                Allows to override the default color mapping strategy.
-
-        Returns:
-            The annotated image.
-
-        Example:
-            ```python
-            import supervision as sv
-
-            image = ...
-            detections = sv.Detections(...)
-
-            crop_annotator = sv.CropAnnotator()
-            annotated_frame = crop_annotator.annotate(
-                scene=image.copy(),
-                detections=detections
-            )
-            ```
-        """
-        crops = [
-            crop_image(image=scene, xyxy=xyxy) for xyxy in detections.xyxy.astype(int)
-        ]
-        resized_crops = [
-            scale_image(image=crop, scale_factor=self.scale_factor) for crop in crops
-        ]
-        anchors = detections.get_anchors_coordinates(anchor=self.position).astype(int)
-
-        for idx, (resized_crop, anchor) in enumerate(zip(resized_crops, anchors)):
-            crop_wh = resized_crop.shape[1], resized_crop.shape[0]
-            (x1, y1), (x2, y2) = self.calculate_crop_coordinates(
-                anchor=anchor, crop_wh=crop_wh, position=self.position
-            )
-            scene = overlay_image(
-                scene=scene, inserted_image=resized_crop, anchor=(x1, y1)
-            )
-            color = resolve_color(
-                color=self.border_color,
-                detections=detections,
-                detection_idx=idx,
-                color_lookup=self.border_color_lookup
-                if custom_color_lookup is None
-                else custom_color_lookup,
-            )
-            cv2.rectangle(
-                img=scene,
-                pt1=(x1, y1),
-                pt2=(x2, y2),
-                color=color.as_bgr(),
-                thickness=self.border_thickness,
-            )
-
-        return scene
-
-    @staticmethod
-    def calculate_crop_coordinates(
-        anchor: Tuple[int, int], crop_wh: Tuple[int, int], position: Position
-    ) -> Tuple[Tuple[int, int], Tuple[int, int]]:
-        anchor_x, anchor_y = anchor
-        width, height = crop_wh
-
-        if position == Position.TOP_LEFT:
-            return (anchor_x - width, anchor_y - height), (anchor_x, anchor_y)
-        elif position == Position.TOP_CENTER:
-            return (
-                (anchor_x - width // 2, anchor_y - height),
-                (anchor_x + width // 2, anchor_y),
-            )
-        elif position == Position.TOP_RIGHT:
-            return (anchor_x, anchor_y - height), (anchor_x + width, anchor_y)
-        elif position == Position.CENTER_LEFT:
-            return (
-                (anchor_x - width, anchor_y - height // 2),
-                (anchor_x, anchor_y + height // 2),
-            )
-        elif position == Position.CENTER or position == Position.CENTER_OF_MASS:
-            return (
-                (anchor_x - width // 2, anchor_y - height // 2),
-                (anchor_x + width // 2, anchor_y + height // 2),
-            )
-        elif position == Position.CENTER_RIGHT:
-            return (
-                (anchor_x, anchor_y - height // 2),
-                (anchor_x + width, anchor_y + height // 2),
-            )
-        elif position == Position.BOTTOM_LEFT:
-            return (anchor_x - width, anchor_y), (anchor_x, anchor_y + height)
-        elif position == Position.BOTTOM_CENTER:
-            return (
-                (anchor_x - width // 2, anchor_y),
-                (anchor_x + width // 2, anchor_y + height),
-            )
-        elif position == Position.BOTTOM_RIGHT:
-            return (anchor_x, anchor_y), (anchor_x + width, anchor_y + height)
+    @convert_for_annotation_method
+    def annotate(
+        self,
+        scene: ImageType,
+        detections: Detections,
+        custom_color_lookup: Optional[np.ndarray] = None,
+    ) -> ImageType:
+        """
+        Annotates the provided scene with scaled and cropped parts of the image based
+        on the provided detections. Each detection is cropped from the original scene
+        and scaled according to the annotator's scale factor before being placed back
+        onto the scene at the specified position.
+
+
+        Args:
+            scene (ImageType): The image where cropped detection will be placed.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): Object detections to annotate.
+            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+                Allows to override the default color mapping strategy.
+
+        Returns:
+            The annotated image.
+
+        Example:
+            ```python
+            import supervision as sv
+
+            image = ...
+            detections = sv.Detections(...)
+
+            crop_annotator = sv.CropAnnotator()
+            annotated_frame = crop_annotator.annotate(
+                scene=image.copy(),
+                detections=detections
+            )
+            ```
+        """
+        crops = [
+            crop_image(image=scene, xyxy=xyxy) for xyxy in detections.xyxy.astype(int)
+        ]
+        resized_crops = [
+            scale_image(image=crop, scale_factor=self.scale_factor) for crop in crops
+        ]
+        anchors = detections.get_anchors_coordinates(anchor=self.position).astype(int)
+
+        for idx, (resized_crop, anchor) in enumerate(zip(resized_crops, anchors)):
+            crop_wh = resized_crop.shape[1], resized_crop.shape[0]
+            (x1, y1), (x2, y2) = self.calculate_crop_coordinates(
+                anchor=anchor, crop_wh=crop_wh, position=self.position
+            )
+            scene = overlay_image(
+                scene=scene, inserted_image=resized_crop, anchor=(x1, y1)
+            )
+            color = resolve_color(
+                color=self.border_color,
+                detections=detections,
+                detection_idx=idx,
+                color_lookup=self.border_color_lookup
+                if custom_color_lookup is None
+                else custom_color_lookup,
+            )
+            cv2.rectangle(
+                img=scene,
+                pt1=(x1, y1),
+                pt2=(x2, y2),
+                color=color.as_bgr(),
+                thickness=self.border_thickness,
+            )
+
+        return scene
+
+    @staticmethod
+    def calculate_crop_coordinates(
+        anchor: Tuple[int, int], crop_wh: Tuple[int, int], position: Position
+    ) -> Tuple[Tuple[int, int], Tuple[int, int]]:
+        anchor_x, anchor_y = anchor
+        width, height = crop_wh
+
+        if position == Position.TOP_LEFT:
+            return (anchor_x - width, anchor_y - height), (anchor_x, anchor_y)
+        elif position == Position.TOP_CENTER:
+            return (
+                (anchor_x - width // 2, anchor_y - height),
+                (anchor_x + width // 2, anchor_y),
+            )
+        elif position == Position.TOP_RIGHT:
+            return (anchor_x, anchor_y - height), (anchor_x + width, anchor_y)
+        elif position == Position.CENTER_LEFT:
+            return (
+                (anchor_x - width, anchor_y - height // 2),
+                (anchor_x, anchor_y + height // 2),
+            )
+        elif position == Position.CENTER or position == Position.CENTER_OF_MASS:
+            return (
+                (anchor_x - width // 2, anchor_y - height // 2),
+                (anchor_x + width // 2, anchor_y + height // 2),
+            )
+        elif position == Position.CENTER_RIGHT:
+            return (
+                (anchor_x, anchor_y - height // 2),
+                (anchor_x + width, anchor_y + height // 2),
+            )
+        elif position == Position.BOTTOM_LEFT:
+            return (anchor_x - width, anchor_y), (anchor_x, anchor_y + height)
+        elif position == Position.BOTTOM_CENTER:
+            return (
+                (anchor_x - width // 2, anchor_y),
+                (anchor_x + width // 2, anchor_y + height),
+            )
+        elif position == Position.BOTTOM_RIGHT:
+            return (anchor_x, anchor_y), (anchor_x + width, anchor_y + height)
 
@@ -16488,21 +16572,7 @@

Source code in supervision/annotators/core.py -
2012
-2013
-2014
-2015
-2016
-2017
-2018
-2019
-2020
-2021
-2022
-2023
-2024
-2025
-2026
+              
2026
 2027
 2028
 2029
@@ -16513,32 +16583,46 @@ 

2034 2035 2036 -2037

def __init__(
-    self,
-    position: Position = Position.TOP_CENTER,
-    scale_factor: int = 2,
-    border_color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
-    border_thickness: int = 2,
-    border_color_lookup: ColorLookup = ColorLookup.CLASS,
-):
-    """
-    Args:
-        position (Position): The anchor position for placing the cropped and scaled
-            part of the detection in the scene.
-        scale_factor (int): The factor by which to scale the cropped image part. A
-            factor of 2, for example, would double the size of the cropped area,
-            allowing for a closer view of the detection.
-        border_color (Union[Color, ColorPalette]): The color or color palette to
-            use for annotating border around the cropped area.
-        border_thickness (int): The thickness of the border around the cropped area.
-        border_color_lookup (ColorLookup): Strategy for mapping colors to
-            annotations. Options are `INDEX`, `CLASS`, `TRACK`.
-    """
-    self.position: Position = position
-    self.scale_factor: int = scale_factor
-    self.border_color: Union[Color, ColorPalette] = border_color
-    self.border_thickness: int = border_thickness
-    self.border_color_lookup: ColorLookup = border_color_lookup
+2037
+2038
+2039
+2040
+2041
+2042
+2043
+2044
+2045
+2046
+2047
+2048
+2049
+2050
+2051
def __init__(
+    self,
+    position: Position = Position.TOP_CENTER,
+    scale_factor: int = 2,
+    border_color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
+    border_thickness: int = 2,
+    border_color_lookup: ColorLookup = ColorLookup.CLASS,
+):
+    """
+    Args:
+        position (Position): The anchor position for placing the cropped and scaled
+            part of the detection in the scene.
+        scale_factor (int): The factor by which to scale the cropped image part. A
+            factor of 2, for example, would double the size of the cropped area,
+            allowing for a closer view of the detection.
+        border_color (Union[Color, ColorPalette]): The color or color palette to
+            use for annotating border around the cropped area.
+        border_thickness (int): The thickness of the border around the cropped area.
+        border_color_lookup (ColorLookup): Strategy for mapping colors to
+            annotations. Options are `INDEX`, `CLASS`, `TRACK`.
+    """
+    self.position: Position = position
+    self.scale_factor: int = scale_factor
+    self.border_color: Union[Color, ColorPalette] = border_color
+    self.border_thickness: int = border_thickness
+    self.border_color_lookup: ColorLookup = border_color_lookup
 
@@ -16661,21 +16745,7 @@

Source code in supervision/annotators/core.py -
2039
-2040
-2041
-2042
-2043
-2044
-2045
-2046
-2047
-2048
-2049
-2050
-2051
-2052
-2053
+              
2053
 2054
 2055
 2056
@@ -16732,78 +16802,92 @@ 

2107 2108 2109 -2110

@convert_for_annotation_method
-def annotate(
-    self,
-    scene: ImageType,
-    detections: Detections,
-    custom_color_lookup: Optional[np.ndarray] = None,
-) -> ImageType:
-    """
-    Annotates the provided scene with scaled and cropped parts of the image based
-    on the provided detections. Each detection is cropped from the original scene
-    and scaled according to the annotator's scale factor before being placed back
-    onto the scene at the specified position.
-
-
-    Args:
-        scene (ImageType): The image where cropped detection will be placed.
-            `ImageType` is a flexible type, accepting either `numpy.ndarray`
-            or `PIL.Image.Image`.
-        detections (Detections): Object detections to annotate.
-        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
-            Allows to override the default color mapping strategy.
-
-    Returns:
-        The annotated image.
-
-    Example:
-        ```python
-        import supervision as sv
-
-        image = ...
-        detections = sv.Detections(...)
-
-        crop_annotator = sv.CropAnnotator()
-        annotated_frame = crop_annotator.annotate(
-            scene=image.copy(),
-            detections=detections
-        )
-        ```
-    """
-    crops = [
-        crop_image(image=scene, xyxy=xyxy) for xyxy in detections.xyxy.astype(int)
-    ]
-    resized_crops = [
-        scale_image(image=crop, scale_factor=self.scale_factor) for crop in crops
-    ]
-    anchors = detections.get_anchors_coordinates(anchor=self.position).astype(int)
-
-    for idx, (resized_crop, anchor) in enumerate(zip(resized_crops, anchors)):
-        crop_wh = resized_crop.shape[1], resized_crop.shape[0]
-        (x1, y1), (x2, y2) = self.calculate_crop_coordinates(
-            anchor=anchor, crop_wh=crop_wh, position=self.position
-        )
-        scene = overlay_image(
-            scene=scene, inserted_image=resized_crop, anchor=(x1, y1)
-        )
-        color = resolve_color(
-            color=self.border_color,
-            detections=detections,
-            detection_idx=idx,
-            color_lookup=self.border_color_lookup
-            if custom_color_lookup is None
-            else custom_color_lookup,
-        )
-        cv2.rectangle(
-            img=scene,
-            pt1=(x1, y1),
-            pt2=(x2, y2),
-            color=color.as_bgr(),
-            thickness=self.border_thickness,
-        )
-
-    return scene
+2110
+2111
+2112
+2113
+2114
+2115
+2116
+2117
+2118
+2119
+2120
+2121
+2122
+2123
+2124
@convert_for_annotation_method
+def annotate(
+    self,
+    scene: ImageType,
+    detections: Detections,
+    custom_color_lookup: Optional[np.ndarray] = None,
+) -> ImageType:
+    """
+    Annotates the provided scene with scaled and cropped parts of the image based
+    on the provided detections. Each detection is cropped from the original scene
+    and scaled according to the annotator's scale factor before being placed back
+    onto the scene at the specified position.
+
+
+    Args:
+        scene (ImageType): The image where cropped detection will be placed.
+            `ImageType` is a flexible type, accepting either `numpy.ndarray`
+            or `PIL.Image.Image`.
+        detections (Detections): Object detections to annotate.
+        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+            Allows to override the default color mapping strategy.
+
+    Returns:
+        The annotated image.
+
+    Example:
+        ```python
+        import supervision as sv
+
+        image = ...
+        detections = sv.Detections(...)
+
+        crop_annotator = sv.CropAnnotator()
+        annotated_frame = crop_annotator.annotate(
+            scene=image.copy(),
+            detections=detections
+        )
+        ```
+    """
+    crops = [
+        crop_image(image=scene, xyxy=xyxy) for xyxy in detections.xyxy.astype(int)
+    ]
+    resized_crops = [
+        scale_image(image=crop, scale_factor=self.scale_factor) for crop in crops
+    ]
+    anchors = detections.get_anchors_coordinates(anchor=self.position).astype(int)
+
+    for idx, (resized_crop, anchor) in enumerate(zip(resized_crops, anchors)):
+        crop_wh = resized_crop.shape[1], resized_crop.shape[0]
+        (x1, y1), (x2, y2) = self.calculate_crop_coordinates(
+            anchor=anchor, crop_wh=crop_wh, position=self.position
+        )
+        scene = overlay_image(
+            scene=scene, inserted_image=resized_crop, anchor=(x1, y1)
+        )
+        color = resolve_color(
+            color=self.border_color,
+            detections=detections,
+            detection_idx=idx,
+            color_lookup=self.border_color_lookup
+            if custom_color_lookup is None
+            else custom_color_lookup,
+        )
+        cv2.rectangle(
+            img=scene,
+            pt1=(x1, y1),
+            pt2=(x2, y2),
+            color=color.as_bgr(),
+            thickness=self.border_thickness,
+        )
+
+    return scene
 
diff --git a/develop/search/search_index.json b/develop/search/search_index.json index 1f71be712..b6f706e33 100644 --- a/develop/search/search_index.json +++ b/develop/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Supervision","text":""},{"location":"#hello","title":"\ud83d\udc4b Hello","text":"

We write your reusable computer vision tools. Whether you need to load your dataset from your hard drive, draw detections on an image or video, or count how many detections are in a zone. You can count on us!

"},{"location":"#install","title":"\ud83d\udcbb Install","text":"

You can install supervision in a Python>=3.8 environment.

pip install (recommended)

headlessdesktop

The headless installation of supervision is designed for environments where graphical user interfaces (GUI) are not needed, making it more lightweight and suitable for server-side applications.

pip install supervision\n

If you require the full version of supervision with GUI support you can install the desktop version. This version includes the GUI components of OpenCV, allowing you to display images and videos on the screen.

pip install \"supervision[desktop]\"\n

conda/mamba install

condamamba

conda install -c conda-forge supervision\n

mamba install -c conda-forge supervision\n

git clone (for development)

virtualenvpoetry
# clone repository and navigate to root directory\ngit clone https://github.com/roboflow/supervision.git\ncd supervision\n\n# setup python environment and activate it\npython3 -m venv venv\nsource venv/bin/activate\npip install --upgrade pip\n\n# headless install\npip install -e \".\"\n\n# desktop install\npip install -e \".[desktop]\"\n
# clone repository and navigate to root directory\ngit clone https://github.com/roboflow/supervision.git\ncd supervision\n\n# setup python environment and activate it\npoetry env use python3.10\npoetry shell\n\n# headless install\npoetry install\n\n# desktop install\npoetry install --extras \"desktop\"\n
"},{"location":"#quickstart","title":"\ud83d\ude80 Quickstart","text":"
  • Detect and Annotate

    Annotate predictions from a range of object detection and segmentation models

    Tutorial

  • Track Objects

    Discover how to enhance video analysis by implementing seamless object tracking

    Tutorial

  • Detect Small Objects

    Learn how to detect small objects in images

    Tutorial

  • Count Objects Crossing Line

    Explore methods to accurately count and analyze objects crossing a predefined line

  • Filter Objects in Zone

    Master the techniques to selectively filter and focus on objects within a specific zone

"},{"location":"assets/","title":"Assets","text":"

Supervision offers an assets download utility that allows you to download video files that you can use in your demos.

"},{"location":"assets/#install-extra","title":"Install extradownload_assetsVideoAssets","text":"

To install the Supervision assets utility, you can use pip. This utility is available as an extra within the Supervision package.

pip install

pip install \"supervision[assets]\"\n

Download a specified asset if it doesn't already exist or is corrupted.

Parameters:

Name Type Description Default asset_name Union[VideoAssets, str]

The name or type of the asset to be downloaded.

required

Returns:

Name Type Description str str

The filename of the downloaded asset.

Example
from supervision.assets import download_assets, VideoAssets\n\ndownload_assets(VideoAssets.VEHICLES)\n\"vehicles.mp4\"\n
Source code in supervision/assets/downloader.py
def download_assets(asset_name: Union[VideoAssets, str]) -> str:\n    \"\"\"\n    Download a specified asset if it doesn't already exist or is corrupted.\n\n    Parameters:\n        asset_name (Union[VideoAssets, str]): The name or type of the asset to be\n            downloaded.\n\n    Returns:\n        str: The filename of the downloaded asset.\n\n    Example:\n        ```python\n        from supervision.assets import download_assets, VideoAssets\n\n        download_assets(VideoAssets.VEHICLES)\n        \"vehicles.mp4\"\n        ```\n    \"\"\"\n\n    filename = asset_name.value if isinstance(asset_name, VideoAssets) else asset_name\n\n    if not Path(filename).exists() and filename in VIDEO_ASSETS:\n        print(f\"Downloading {filename} assets \\n\")\n        response = get(VIDEO_ASSETS[filename][0], stream=True, allow_redirects=True)\n        response.raise_for_status()\n\n        file_size = int(response.headers.get(\"Content-Length\", 0))\n        folder_path = Path(filename).expanduser().resolve()\n        folder_path.parent.mkdir(parents=True, exist_ok=True)\n\n        with tqdm.wrapattr(\n            response.raw, \"read\", total=file_size, desc=\"\", colour=\"#a351fb\"\n        ) as raw_resp:\n            with folder_path.open(\"wb\") as file:\n                copyfileobj(raw_resp, file)\n\n    elif Path(filename).exists():\n        if not is_md5_hash_matching(filename, VIDEO_ASSETS[filename][1]):\n            print(\"File corrupted. Re-downloading... \\n\")\n            os.remove(filename)\n            return download_assets(filename)\n\n        print(f\"{filename} asset download complete. \\n\")\n\n    else:\n        valid_assets = \", \".join(asset.value for asset in VideoAssets)\n        raise ValueError(\n            f\"Invalid asset. It should be one of the following: {valid_assets}.\"\n        )\n\n    return filename\n

Bases: Enum

Each member of this enum represents a video asset. The value associated with each member is the filename of the video.

Enum Member Video Filename Video URL VEHICLES vehicles.mp4 Link MILK_BOTTLING_PLANT milk-bottling-plant.mp4 Link VEHICLES_2 vehicles-2.mp4 Link GROCERY_STORE grocery-store.mp4 Link SUBWAY subway.mp4 Link MARKET_SQUARE market-square.mp4 Link PEOPLE_WALKING people-walking.mp4 Link BEACH beach-1.mp4 Link BASKETBALL basketball-1.mp4 Link Source code in supervision/assets/list.py
class VideoAssets(Enum):\n    \"\"\"\n    Each member of this enum represents a video asset. The value associated with each\n    member is the filename of the video.\n\n    | Enum Member            | Video Filename             | Video URL                                                                             |\n    |------------------------|----------------------------|---------------------------------------------------------------------------------------|\n    | `VEHICLES`             | `vehicles.mp4`             | [Link](https://media.roboflow.com/supervision/video-examples/vehicles.mp4)            |\n    | `MILK_BOTTLING_PLANT`  | `milk-bottling-plant.mp4`  | [Link](https://media.roboflow.com/supervision/video-examples/milk-bottling-plant.mp4) |\n    | `VEHICLES_2`           | `vehicles-2.mp4`           | [Link](https://media.roboflow.com/supervision/video-examples/vehicles-2.mp4)          |\n    | `GROCERY_STORE`        | `grocery-store.mp4`        | [Link](https://media.roboflow.com/supervision/video-examples/grocery-store.mp4)       |\n    | `SUBWAY`               | `subway.mp4`               | [Link](https://media.roboflow.com/supervision/video-examples/subway.mp4)              |\n    | `MARKET_SQUARE`        | `market-square.mp4`        | [Link](https://media.roboflow.com/supervision/video-examples/market-square.mp4)       |\n    | `PEOPLE_WALKING`       | `people-walking.mp4`       | [Link](https://media.roboflow.com/supervision/video-examples/people-walking.mp4)      |\n    | `BEACH`                | `beach-1.mp4`              | [Link](https://media.roboflow.com/supervision/video-examples/beach-1.mp4)             |\n    | `BASKETBALL`           | `basketball-1.mp4`         | [Link](https://media.roboflow.com/supervision/video-examples/basketball-1.mp4)        |\n    \"\"\"  # noqa: E501 // docs\n\n    VEHICLES = \"vehicles.mp4\"\n    MILK_BOTTLING_PLANT = \"milk-bottling-plant.mp4\"\n    VEHICLES_2 = \"vehicles-2.mp4\"\n    GROCERY_STORE = \"grocery-store.mp4\"\n    SUBWAY = \"subway.mp4\"\n    MARKET_SQUARE = \"market-square.mp4\"\n    PEOPLE_WALKING = \"people-walking.mp4\"\n    BEACH = \"beach-1.mp4\"\n    BASKETBALL = \"basketball-1.mp4\"\n\n    @classmethod\n    def list(cls):\n        return list(map(lambda c: c.value, cls))\n
"},{"location":"changelog/","title":"Changelog","text":""},{"location":"changelog/#0210-jun-5-2024","title":"0.21.0 Jun 5, 2024","text":"
  • Added #500: sv.Detections.with_nmm to perform non-maximum merging on the current set of object detections.

  • Added #1221: sv.Detections.from_lmm allowing to parse Large Multimodal Model (LMM) text result into sv.Detections object. For now from_lmm supports only PaliGemma result parsing.

import supervision as sv\n\npaligemma_result = \"<loc0256><loc0256><loc0768><loc0768> cat\"\ndetections = sv.Detections.from_lmm(\n    sv.LMM.PALIGEMMA,\n    paligemma_result,\n    resolution_wh=(1000, 1000),\n    classes=['cat', 'dog']\n)\ndetections.xyxy\n# array([[250., 250., 750., 750.]])\n\ndetections.class_id\n# array([0])\n
  • Added #1236: sv.VertexLabelAnnotator allowing to annotate every vertex of a keypoint skeleton with custom text and color.
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nedge_annotator = sv.EdgeAnnotator(\n    color=sv.Color.GREEN,\n    thickness=5\n)\nannotated_frame = edge_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n
  • Added #1147: sv.KeyPoints.from_inference allowing to create sv.KeyPoints from Inference result.

  • Added #1138: sv.KeyPoints.from_yolo_nas allowing to create sv.KeyPoints from YOLO-NAS result.

  • Added #1163: sv.mask_to_rle and sv.rle_to_mask allowing for easy conversion between mask and rle formats.

  • Changed #1236: sv.InferenceSlicer allowing to select overlap filtering strategy (NONE, NON_MAX_SUPPRESSION and NON_MAX_MERGE).

  • Changed #1178: sv.InferenceSlicer adding instance segmentation model support.

import cv2\nimport numpy as np\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8x-seg-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    results = model.infer(image_slice)[0]\n    return sv.Detections.from_inference(results)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
  • Changed #1228: sv.LineZone making it 10-20 times faster, depending on the use case.

  • Changed #1163: sv.DetectionDataset.from_coco and sv.DetectionDataset.as_coco adding support for run-length encoding (RLE) mask format.

"},{"location":"changelog/#0200-april-24-2024","title":"0.20.0 April 24, 2024","text":"
  • Added #1128: sv.KeyPoints to provide initial support for pose estimation and broader keypoint detection models.

  • Added #1128: sv.EdgeAnnotator and sv.VertexAnnotator to enable rendering of results from keypoint detection models.

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8l-pose')\n\nresult = model(image, verbose=False)[0]\nkeypoints = sv.KeyPoints.from_ultralytics(result)\n\nedge_annotators = sv.EdgeAnnotator(color=sv.Color.GREEN, thickness=5)\nannotated_image = edge_annotators.annotate(image.copy(), keypoints)\n
  • Changed #1037: sv.LabelAnnotator by adding an additional corner_radius argument that allows for rounding the corners of the bounding box.

  • Changed #1109: sv.PolygonZone such that the frame_resolution_wh argument is no longer required to initialize sv.PolygonZone.

Deprecated

The frame_resolution_wh parameter in sv.PolygonZone is deprecated and will be removed in supervision-0.24.0.

  • Changed #1084: sv.get_polygon_center to calculate a more accurate polygon centroid.

  • Changed #1069: sv.Detections.from_transformers by adding support for Transformers segmentation models and extract class names values.

import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForSegmentation\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50-panoptic\")\nmodel = DetrForSegmentation.from_pretrained(\"facebook/detr-resnet-50-panoptic\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_segmentation(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(results, id2label=model.config.id2label)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
  • Fixed #787: sv.ByteTrack.update_with_detections which was removing segmentation masks while tracking. Now, ByteTrack can be used alongside segmentation models.
"},{"location":"changelog/#0190-march-15-2024","title":"0.19.0 March 15, 2024","text":"
  • Added #818: sv.CSVSink allowing for the straightforward saving of image, video, or stream inference results in a .csv file.
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<SOURCE_MODEL_PATH>)\ncsv_sink = sv.CSVSink(<RESULT_CSV_FILE_PATH>)\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith csv_sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        csv_sink.append(detections, custom_data={<CUSTOM_LABEL>:<CUSTOM_DATA>})\n
  • Added #819: sv.JSONSink allowing for the straightforward saving of image, video, or stream inference results in a .json file.
```python\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<SOURCE_MODEL_PATH>)\njson_sink = sv.JSONSink(<RESULT_JSON_FILE_PATH>)\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith json_sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        json_sink.append(detections, custom_data={<CUSTOM_LABEL>:<CUSTOM_DATA>})\n
  • Added #847: sv.mask_iou_batch allowing to compute Intersection over Union (IoU) of two sets of masks.

  • Added #847: sv.mask_non_max_suppression allowing to perform Non-Maximum Suppression (NMS) on segmentation predictions.

  • Added #888: sv.CropAnnotator allowing users to annotate the scene with scaled-up crops of detections.

import cv2\nimport supervision as sv\nfrom inference import get_model\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = get_model(model_id=\"yolov8n-640\")\n\nresult = model.infer(image)[0]\ndetections = sv.Detections.from_inference(result)\n\ncrop_annotator = sv.CropAnnotator()\nannotated_frame = crop_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
  • Changed #827: sv.ByteTrack.reset allowing users to clear trackers state, enabling the processing of multiple video files in sequence.

  • Changed #802: sv.LineZoneAnnotator allowing to hide in/out count using display_in_count and display_out_count properties.

  • Changed #787: sv.ByteTrack input arguments and docstrings updated to improve readability and ease of use.

Deprecated

The track_buffer, track_thresh, and match_thresh parameters in sv.ByterTrack are deprecated and will be removed in supervision-0.23.0. Use lost_track_buffer, track_activation_threshold, and minimum_matching_threshold instead.

  • Changed #910: sv.PolygonZone to now accept a list of specific box anchors that must be in zone for a detection to be counted.

Deprecated

The triggering_position parameter in sv.PolygonZone is deprecated and will be removed in supervision-0.23.0. Use triggering_anchors instead.

  • Changed #875: annotators adding support for Pillow images. All supervision Annotators can now accept an image as either a numpy array or a Pillow Image. They automatically detect its type, draw annotations, and return the output in the same format as the input.

  • Fixed #944: sv.DetectionsSmoother removing tracking_id from sv.Detections.

"},{"location":"changelog/#0180-january-25-2024","title":"0.18.0 January 25, 2024","text":"
  • Added #720: sv.PercentageBarAnnotator allowing to annotate images and videos with percentage values representing confidence or other custom property.
>>> import supervision as sv\n\n>>> image = ...\n>>> detections = sv.Detections(...)\n\n>>> percentage_bar_annotator = sv.PercentageBarAnnotator()\n>>> annotated_frame = percentage_bar_annotator.annotate(\n...     scene=image.copy(),\n...     detections=detections\n... )\n
  • Added #702: sv.RoundBoxAnnotator allowing to annotate images and videos with rounded corners bounding boxes.

  • Added #770: sv.OrientedBoxAnnotator allowing to annotate images and videos with OBB (Oriented Bounding Boxes).

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO(\"yolov8n-obb.pt\")\n\nresult = model(image)[0]\ndetections = sv.Detections.from_ultralytics(result)\n\noriented_box_annotator = sv.OrientedBoxAnnotator()\nannotated_frame = oriented_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
  • Added #696: sv.DetectionsSmoother allowing for smoothing detections over multiple frames in video tracking.

  • Added #769: sv.ColorPalette.from_matplotlib allowing users to create a sv.ColorPalette instance from a Matplotlib color palette.

>>> import supervision as sv\n\n>>> sv.ColorPalette.from_matplotlib('viridis', 5)\nColorPalette(colors=[Color(r=68, g=1, b=84), Color(r=59, g=82, b=139), ...])\n
  • Changed #770: sv.Detections.from_ultralytics adding support for OBB (Oriented Bounding Boxes).

  • Changed #735: sv.LineZone to now accept a list of specific box anchors that must cross the line for a detection to be counted. This update marks a significant improvement from the previous requirement, where all four box corners were necessary. Users can now specify a single anchor, such as sv.Position.BOTTOM_CENTER, or any other combination of anchors defined as List[sv.Position].

  • Changed #756: sv.Color's and sv.ColorPalette's method of accessing predefined colors, transitioning from a function-based approach (sv.Color.red()) to a more intuitive and conventional property-based method (sv.Color.RED).

Deprecated

sv.ColorPalette.default() is deprecated and will be removed in supervision-0.22.0. Use sv.ColorPalette.DEFAULT instead.

  • Changed #769: sv.ColorPalette.DEFAULT value, giving users a more extensive set of annotation colors.

  • Changed #677: sv.Detections.from_roboflow to sv.Detections.from_inference streamlining its functionality to be compatible with both the both inference pip package and the Robloflow hosted API.

Deprecated

Detections.from_roboflow() is deprecated and will be removed in supervision-0.22.0. Use Detections.from_inference instead.

  • Fixed #735: sv.LineZone functionality to accurately update the counter when an object crosses a line from any direction, including from the side. This enhancement enables more precise tracking and analytics, such as calculating individual in/out counts for each lane on the road.
"},{"location":"changelog/#0170-december-06-2023","title":"0.17.0 December 06, 2023","text":"
  • Added #633: sv.PixelateAnnotator allowing to pixelate objects on images and videos.

  • Added #652: sv.TriangleAnnotator allowing to annotate images and videos with triangle markers.

  • Added #602: sv.PolygonAnnotator allowing to annotate images and videos with segmentation mask outline.

>>> import supervision as sv\n\n>>> image = ...\n>>> detections = sv.Detections(...)\n\n>>> polygon_annotator = sv.PolygonAnnotator()\n>>> annotated_frame = polygon_annotator.annotate(\n...     scene=image.copy(),\n...     detections=detections\n... )\n
  • Added #476: sv.assets allowing download of video files that you can use in your demos.
>>> from supervision.assets import download_assets, VideoAssets\n>>> download_assets(VideoAssets.VEHICLES)\n\"vehicles.mp4\"\n
  • Added #605: Position.CENTER_OF_MASS allowing to place labels in center of mass of segmentation masks.

  • Added #651: sv.scale_boxes allowing to scale sv.Detections.xyxy values.

  • Added #637: sv.calculate_dynamic_text_scale and sv.calculate_dynamic_line_thickness allowing text scale and line thickness to match image resolution.

  • Added #620: sv.Color.as_hex allowing to extract color value in HEX format.

  • Added #572: sv.Classifications.from_timm allowing to load classification result from timm models.

  • Added #478: sv.Classifications.from_clip allowing to load classification result from clip model.

  • Added #571: sv.Detections.from_azure_analyze_image allowing to load detection results from Azure Image Analysis.

  • Changed #646: sv.BoxMaskAnnotator renaming it to sv.ColorAnnotator.

  • Changed #606: sv.MaskAnnotator to make it 5x faster.

  • Fixed #584: sv.DetectionDataset.from_yolo to ignore empty lines in annotation files.

  • Fixed #555: sv.BlurAnnotator to trim negative coordinates before bluring detections.

  • Fixed #511: sv.TraceAnnotator to respect trace position.

"},{"location":"changelog/#0160-october-19-2023","title":"0.16.0 October 19, 2023","text":"
  • Added #422: sv.BoxMaskAnnotator allowing to annotate images and videos with mox masks.

  • Added #433: sv.HaloAnnotator allowing to annotate images and videos with halo effect.

>>> import supervision as sv\n\n>>> image = ...\n>>> detections = sv.Detections(...)\n\n>>> halo_annotator = sv.HaloAnnotator()\n>>> annotated_frame = halo_annotator.annotate(\n...     scene=image.copy(),\n...     detections=detections\n... )\n
  • Added #466: sv.HeatMapAnnotator allowing to annotate videos with heat maps.

  • Added #492: sv.DotAnnotator allowing to annotate images and videos with dots.

  • Added #449: sv.draw_image allowing to draw an image onto a given scene with specified opacity and dimensions.

  • Added #280: sv.FPSMonitor for monitoring frames per second (FPS) to benchmark latency.

  • Added #454: \ud83e\udd17 Hugging Face Annotators space.

  • Changed #482: sv.LineZone.trigger now return Tuple[np.ndarray, np.ndarray]. The first array indicates which detections have crossed the line from outside to inside. The second array indicates which detections have crossed the line from inside to outside.

  • Changed #465: Annotator argument name from color_map: str to color_lookup: ColorLookup enum to increase type safety.

  • Changed #426: sv.MaskAnnotator allowing 2x faster annotation.

  • Fixed #477: Poetry env definition allowing proper local installation.

  • Fixed #430: sv.ByteTrack to return np.array([], dtype=int) when svDetections is empty.

Deprecated

sv.Detections.from_yolov8 and sv.Classifications.from_yolov8 as those are now replaced by sv.Detections.from_ultralytics and sv.Classifications.from_ultralytics.

"},{"location":"changelog/#0150-october-5-2023","title":"0.15.0 October 5, 2023","text":"
  • Added #170: sv.BoundingBoxAnnotator allowing to annotate images and videos with bounding boxes.

  • Added #170: sv.BoxCornerAnnotator allowing to annotate images and videos with just bounding box corners.

  • Added #170: sv.MaskAnnotator allowing to annotate images and videos with segmentation masks.

  • Added #170: sv.EllipseAnnotator allowing to annotate images and videos with ellipses (sports game style).

  • Added #386: sv.CircleAnnotator allowing to annotate images and videos with circles.

  • Added #354: sv.TraceAnnotator allowing to draw path of moving objects on videos.

  • Added #405: sv.BlurAnnotator allowing to blur objects on images and videos.

>>> import supervision as sv\n\n>>> image = ...\n>>> detections = sv.Detections(...)\n\n>>> bounding_box_annotator = sv.BoundingBoxAnnotator()\n>>> annotated_frame = bounding_box_annotator.annotate(\n...     scene=image.copy(),\n...     detections=detections\n... )\n
  • Added #354: Supervision usage example. You can now learn how to perform traffic flow analysis with Supervision.

  • Changed #399: sv.Detections.from_roboflow now does not require class_list to be specified. The class_id value can be extracted directly from the inference response.

  • Changed #381: sv.VideoSink now allows to customize the output codec.

  • Changed #361: sv.InferenceSlicer can now operate in multithreading mode.

  • Fixed #348: sv.Detections.from_deepsparse to allow processing empty deepsparse result object.

"},{"location":"changelog/#0140-august-31-2023","title":"0.14.0 August 31, 2023","text":"
  • Added #282: support for SAHI inference technique with sv.InferenceSlicer.
>>> import cv2\n>>> import supervision as sv\n>>> from ultralytics import YOLO\n\n>>> image = cv2.imread(SOURCE_IMAGE_PATH)\n>>> model = YOLO(...)\n\n>>> def callback(image_slice: np.ndarray) -> sv.Detections:\n...     result = model(image_slice)[0]\n...     return sv.Detections.from_ultralytics(result)\n\n>>> slicer = sv.InferenceSlicer(callback = callback)\n\n>>> detections = slicer(image)\n
  • Added #297: Detections.from_deepsparse to enable seamless integration with DeepSparse framework.

  • Added #281: sv.Classifications.from_ultralytics to enable seamless integration with Ultralytics framework. This will enable you to use supervision with all models that Ultralytics supports.

Deprecated

sv.Detections.from_yolov8 and sv.Classifications.from_yolov8 are now deprecated and will be removed with supervision-0.16.0 release.

  • Added #341: First supervision usage example script showing how to detect and track objects on video using YOLOv8 + Supervision.

  • Changed #296: sv.ClassificationDataset and sv.DetectionDataset now use image path (not image name) as dataset keys.

  • Fixed #300: Detections.from_roboflow to filter out polygons with less than 3 points.

"},{"location":"changelog/#0130-august-8-2023","title":"0.13.0 August 8, 2023","text":"
  • Added #236: support for mean average precision (mAP) for object detection models with sv.MeanAveragePrecision.
>>> import supervision as sv\n>>> from ultralytics import YOLO\n\n>>> dataset = sv.DetectionDataset.from_yolo(...)\n\n>>> model = YOLO(...)\n>>> def callback(image: np.ndarray) -> sv.Detections:\n...     result = model(image)[0]\n...     return sv.Detections.from_yolov8(result)\n\n>>> mean_average_precision = sv.MeanAveragePrecision.benchmark(\n...     dataset = dataset,\n...     callback = callback\n... )\n\n>>> mean_average_precision.map50_95\n0.433\n
  • Added #256: support for ByteTrack for object tracking with sv.ByteTrack.

  • Added #222: sv.Detections.from_ultralytics to enable seamless integration with Ultralytics framework. This will enable you to use supervision with all models that Ultralytics supports.

Deprecated

sv.Detections.from_yolov8 is now deprecated and will be removed with supervision-0.15.0 release.

  • Added #191: sv.Detections.from_paddledet to enable seamless integration with PaddleDetection framework.

  • Added #245: support for loading PASCAL VOC segmentation datasets with sv.DetectionDataset..

"},{"location":"changelog/#0120-july-24-2023","title":"0.12.0 July 24, 2023","text":"

Python 3.7. Support Terminated

With the supervision-0.12.0 release, we are terminating official support for Python 3.7.

  • Added #177: initial support for object detection model benchmarking with sv.ConfusionMatrix.
>>> import supervision as sv\n>>> from ultralytics import YOLO\n\n>>> dataset = sv.DetectionDataset.from_yolo(...)\n\n>>> model = YOLO(...)\n>>> def callback(image: np.ndarray) -> sv.Detections:\n...     result = model(image)[0]\n...     return sv.Detections.from_yolov8(result)\n\n>>> confusion_matrix = sv.ConfusionMatrix.benchmark(\n...     dataset = dataset,\n...     callback = callback\n... )\n\n>>> confusion_matrix.matrix\narray([\n    [0., 0., 0., 0.],\n    [0., 1., 0., 1.],\n    [0., 1., 1., 0.],\n    [1., 1., 0., 0.]\n])\n
  • Added #173: Detections.from_mmdetection to enable seamless integration with MMDetection framework.

  • Added #130: ability to install package in headless or desktop mode.

  • Changed #180: packing method from setup.py to pyproject.toml.

  • Fixed #188: sv.DetectionDataset.from_cooc can't be loaded when there are images without annotations.

  • Fixed #226: sv.DetectionDataset.from_yolo can't load background instances.

"},{"location":"changelog/#0111-june-29-2023","title":"0.11.1 June 29, 2023","text":"
  • Fix #165: as_folder_structure fails to save sv.ClassificationDataset when it is result of inference.
"},{"location":"changelog/#0110-june-28-2023","title":"0.11.0 June 28, 2023","text":"
  • Added #150: ability to load and save sv.DetectionDataset in COCO format using as_coco and from_coco methods.
>>> import supervision as sv\n\n>>> ds = sv.DetectionDataset.from_coco(\n...     images_directory_path='...',\n...     annotations_path='...'\n... )\n\n>>> ds.as_coco(\n...     images_directory_path='...',\n...     annotations_path='...'\n... )\n
  • Added #158: ability to merge multiple sv.DetectionDataset together using merge method.
>>> import supervision as sv\n\n>>> ds_1 = sv.DetectionDataset(...)\n>>> len(ds_1)\n100\n>>> ds_1.classes\n['dog', 'person']\n\n>>> ds_2 = sv.DetectionDataset(...)\n>>> len(ds_2)\n200\n>>> ds_2.classes\n['cat']\n\n>>> ds_merged = sv.DetectionDataset.merge([ds_1, ds_2])\n>>> len(ds_merged)\n300\n>>> ds_merged.classes\n['cat', 'dog', 'person']\n
  • Added #162: additional start and end arguments to sv.get_video_frames_generator allowing to generate frames only for a selected part of the video.

  • Fix #157: incorrect loading of YOLO dataset class names from data.yaml.

"},{"location":"changelog/#0100-june-14-2023","title":"0.10.0 June 14, 2023","text":"
  • Added #125: ability to load and save sv.ClassificationDataset in a folder structure format.
>>> import supervision as sv\n\n>>> cs = sv.ClassificationDataset.from_folder_structure(\n...     root_directory_path='...'\n... )\n\n>>> cs.as_folder_structure(\n...     root_directory_path='...'\n... )\n
  • Added #125: support for sv.ClassificationDataset.split allowing to divide sv.ClassificationDataset into two parts.

  • Added #110: ability to extract masks from Roboflow API results using sv.Detections.from_roboflow.

  • Added commit hash: Supervision Quickstart notebook where you can learn more about Detection, Dataset and Video APIs.

  • Changed #135: sv.get_video_frames_generator documentation to better describe actual behavior.

"},{"location":"changelog/#090-june-7-2023","title":"0.9.0 June 7, 2023","text":"
  • Added #118: ability to select sv.Detections by index, list of indexes or slice. Here is an example illustrating the new selection methods.
>>> import supervision as sv\n\n>>> detections = sv.Detections(...)\n>>> len(detections[0])\n1\n>>> len(detections[[0, 1]])\n2\n>>> len(detections[0:2])\n2\n
  • Added #101: ability to extract masks from YOLOv8 result using sv.Detections.from_yolov8. Here is an example illustrating how to extract boolean masks from the result of the YOLOv8 model inference.

  • Added #122: ability to crop image using sv.crop. Here is an example showing how to get a separate crop for each detection in sv.Detections.

  • Added #120: ability to conveniently save multiple images into directory using sv.ImageSink. Here is an example showing how to save every tenth video frame as a separate image.

>>> import supervision as sv\n\n>>> with sv.ImageSink(target_dir_path='target/directory/path') as sink:\n...     for image in sv.get_video_frames_generator(source_path='source_video.mp4', stride=10):\n...         sink.save_image(image=image)\n
  • Fixed #106: inconvenient handling of sv.PolygonZone coordinates. Now sv.PolygonZone accepts coordinates in the form of [[x1, y1], [x2, y2], ...] that can be both integers and floats.
"},{"location":"changelog/#080-may-17-2023","title":"0.8.0 May 17, 2023","text":"
  • Added #100: support for dataset inheritance. The current Dataset got renamed to DetectionDataset. Now DetectionDataset inherits from BaseDataset. This change was made to enforce the future consistency of APIs of different types of computer vision datasets.
  • Added #100: ability to save datasets in YOLO format using DetectionDataset.as_yolo.
>>> import roboflow\n>>> from roboflow import Roboflow\n>>> import supervision as sv\n\n>>> roboflow.login()\n\n>>> rf = Roboflow()\n\n>>> project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n>>> dataset = project.version(PROJECT_VERSION).download(\"yolov5\")\n\n>>> ds = sv.DetectionDataset.from_yolo(\n...     images_directory_path=f\"{dataset.location}/train/images\",\n...     annotations_directory_path=f\"{dataset.location}/train/labels\",\n...     data_yaml_path=f\"{dataset.location}/data.yaml\"\n... )\n\n>>> ds.classes\n['dog', 'person']\n
  • Added #102: support for DetectionDataset.split allowing to divide DetectionDataset into two parts.
>>> import supervision as sv\n\n>>> ds = sv.DetectionDataset(...)\n>>> train_ds, test_ds = ds.split(split_ratio=0.7, random_state=42, shuffle=True)\n\n>>> len(train_ds), len(test_ds)\n(700, 300)\n
  • Changed #100: default value of approximation_percentage parameter from 0.75 to 0.0 in DetectionDataset.as_yolo and DetectionDataset.as_pascal_voc.
"},{"location":"changelog/#070-may-11-2023","title":"0.7.0 May 11, 2023","text":"
  • Added #91: Detections.from_yolo_nas to enable seamless integration with YOLO-NAS model.
  • Added #86: ability to load datasets in YOLO format using Dataset.from_yolo.
  • Added #84: Detections.merge to merge multiple Detections objects together.
  • Fixed #81: LineZoneAnnotator.annotate does not return annotated frame.
  • Changed #44: LineZoneAnnotator.annotate to allow for custom text for the in and out tags.
"},{"location":"changelog/#060-april-19-2023","title":"0.6.0 April 19, 2023","text":"
  • Added #71: initial Dataset support and ability to save Detections in Pascal VOC XML format.
  • Added #71: new mask_to_polygons, filter_polygons_by_area, polygon_to_xyxy and approximate_polygon utilities.
  • Added #72: ability to load Pascal VOC XML object detections dataset as Dataset.
  • Changed #70: order of Detections attributes to make it consistent with order of objects in __iter__ tuple.
  • Changed #71: generate_2d_mask to polygon_to_mask.
"},{"location":"changelog/#052-april-13-2023","title":"0.5.2 April 13, 2023","text":"
  • Fixed #63: LineZone.trigger function expects 4 values instead of 5.
"},{"location":"changelog/#051-april-12-2023","title":"0.5.1 April 12, 2023","text":"
  • Fixed Detections.__getitem__ method did not return mask for selected item.
  • Fixed Detections.area crashed for mask detections.
"},{"location":"changelog/#050-april-10-2023","title":"0.5.0 April 10, 2023","text":"
  • Added #58: Detections.mask to enable segmentation support.
  • Added #58: MaskAnnotator to allow easy Detections.mask annotation.
  • Added #58: Detections.from_sam to enable native Segment Anything Model (SAM) support.
  • Changed #58: Detections.area behaviour to work not only with boxes but also with masks.
"},{"location":"changelog/#040-april-5-2023","title":"0.4.0 April 5, 2023","text":"
  • Added #46: Detections.empty to allow easy creation of empty Detections objects.
  • Added #56: Detections.from_roboflow to allow easy creation of Detections objects from Roboflow API inference results.
  • Added #56: plot_images_grid to allow easy plotting of multiple images on single plot.
  • Added #56: initial support for Pascal VOC XML format with detections_to_voc_xml method.
  • Changed #56: show_frame_in_notebook refactored and renamed to plot_image.
"},{"location":"changelog/#032-march-23-2023","title":"0.3.2 March 23, 2023","text":"
  • Changed #50: Allow Detections.class_id to be None.
"},{"location":"changelog/#031-march-6-2023","title":"0.3.1 March 6, 2023","text":"
  • Fixed #41: PolygonZone throws an exception when the object touches the bottom edge of the image.
  • Fixed #42: Detections.wth_nms method throws an exception when Detections is empty.
  • Changed #36: Detections.wth_nms support class agnostic and non-class agnostic case.
"},{"location":"changelog/#030-march-6-2023","title":"0.3.0 March 6, 2023","text":"
  • Changed: Allow Detections.confidence to be None.
  • Added: Detections.from_transformers and Detections.from_detectron2 to enable seamless integration with Transformers and Detectron2 models.
  • Added: Detections.area to dynamically calculate bounding box area.
  • Added: Detections.wth_nms to filter out double detections with NMS. Initial - only class agnostic - implementation.
"},{"location":"changelog/#020-february-2-2023","title":"0.2.0 February 2, 2023","text":"
  • Added: Advanced Detections filtering with pandas-like API.
  • Added: Detections.from_yolov5 and Detections.from_yolov8 to enable seamless integration with YOLOv5 and YOLOv8 models.
"},{"location":"changelog/#010-january-19-2023","title":"0.1.0 January 19, 2023","text":"

Say hello to Supervision \ud83d\udc4b

"},{"location":"code_of_conduct/","title":"Code of conduct","text":"
# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make participation in our\ncommunity a harassment-free experience for everyone, regardless of age, body\nsize, visible or invisible disability, ethnicity, sex characteristics, gender\nidentity and expression, level of experience, education, socio-economic status,\nnationality, personal appearance, race, caste, color, religion, or sexual\nidentity and orientation.\n\nWe pledge to act and interact in ways that contribute to an open, welcoming,\ndiverse, inclusive, and healthy community.\n\n## Our Standards\n\nExamples of behavior that contributes to a positive environment for our\ncommunity include:\n\n* Demonstrating empathy and kindness toward other people\n* Being respectful of differing opinions, viewpoints, and experiences\n* Giving and gracefully accepting constructive feedback\n* Accepting responsibility and apologizing to those affected by our mistakes,\n  and learning from the experience\n* Focusing on what is best not just for us as individuals, but for the overall\n  community\n\nExamples of unacceptable behavior include:\n\n* The use of sexualized language or imagery, and sexual attention or advances of\n  any kind\n* Trolling, insulting or derogatory comments, and personal or political attacks\n* Public or private harassment\n* Publishing others' private information, such as a physical or email address,\n  without their explicit permission\n* Other conduct which could reasonably be considered inappropriate in a\n  professional setting\n\n## Enforcement Responsibilities\n\nCommunity leaders are responsible for clarifying and enforcing our standards of\nacceptable behavior and will take appropriate and fair corrective action in\nresponse to any behavior that they deem inappropriate, threatening, offensive,\nor harmful.\n\nCommunity leaders have the right and responsibility to remove, edit, or reject\ncomments, commits, code, wiki edits, issues, and other contributions that are\nnot aligned to this Code of Conduct, and will communicate reasons for moderation\ndecisions when appropriate.\n\n## Scope\n\nThis Code of Conduct applies within all community spaces, and also applies when\nan individual is officially representing the community in public spaces.\nExamples of representing our community include using an official e-mail address,\nposting via an official social media account, or acting as an appointed\nrepresentative at an online or offline event.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be\nreported to the community leaders responsible for enforcement at\ncommunity-reports@roboflow.com.\n\nAll complaints will be reviewed and investigated promptly and fairly.\n\nAll community leaders are obligated to respect the privacy and security of the\nreporter of any incident.\n\n## Enforcement Guidelines\n\nCommunity leaders will follow these Community Impact Guidelines in determining\nthe consequences for any action they deem in violation of this Code of Conduct:\n\n### 1. Correction\n\n**Community Impact**: Use of inappropriate language or other behavior deemed\nunprofessional or unwelcome in the community.\n\n**Consequence**: A private, written warning from community leaders, providing\nclarity around the nature of the violation and an explanation of why the\nbehavior was inappropriate. A public apology may be requested.\n\n### 2. Warning\n\n**Community Impact**: A violation through a single incident or series of\nactions.\n\n**Consequence**: A warning with consequences for continued behavior. No\ninteraction with the people involved, including unsolicited interaction with\nthose enforcing the Code of Conduct, for a specified period of time. This\nincludes avoiding interactions in community spaces as well as external channels\nlike social media. Violating these terms may lead to a temporary or permanent\nban.\n\n### 3. Temporary Ban\n\n**Community Impact**: A serious violation of community standards, including\nsustained inappropriate behavior.\n\n**Consequence**: A temporary ban from any sort of interaction or public\ncommunication with the community for a specified period of time. No public or\nprivate interaction with the people involved, including unsolicited interaction\nwith those enforcing the Code of Conduct, is allowed during this period.\nViolating these terms may lead to a permanent ban.\n\n### 4. Permanent Ban\n\n**Community Impact**: Demonstrating a pattern of violation of community\nstandards, including sustained inappropriate behavior, harassment of an\nindividual, or aggression toward or disparagement of classes of individuals.\n\n**Consequence**: A permanent ban from any sort of public interaction within the\ncommunity.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage],\nversion 2.1, available at\n[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].\n\nCommunity Impact Guidelines were inspired by\n[Mozilla's code of conduct enforcement ladder][Mozilla CoC].\n\nFor answers to common questions about this code of conduct, see the FAQ at\n[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at\n[https://www.contributor-covenant.org/translations][translations].\n\n[homepage]: https://www.contributor-covenant.org\n[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html\n[Mozilla CoC]: https://github.com/mozilla/diversity\n[FAQ]: https://www.contributor-covenant.org/faq\n[translations]: https://www.contributor-covenant.org/translations\n
"},{"location":"contributing/","title":"Contributing to Supervision \ud83d\udee0\ufe0f","text":"

Thank you for your interest in contributing to Supervision!

We are actively improving this library to reduce the amount of work you need to do to solve common computer vision problems.

"},{"location":"contributing/#contribution-guidelines","title":"Contribution Guidelines","text":"

We welcome contributions to:

  1. Add a new feature to the library (guidance below).
  2. Improve our documentation and add examples to make it clear how to leverage the supervision library.
  3. Report bugs and issues in the project.
  4. Submit a request for a new feature.
  5. Improve our test coverage.
"},{"location":"contributing/#contributing-features","title":"Contributing Features \u2728","text":"

Supervision is designed to provide generic utilities to solve problems. Thus, we focus on contributions that can have an impact on a wide range of projects.

For example, counting objects that cross a line anywhere on an image is a common problem in computer vision, but counting objects that cross a line 75% of the way through is less useful.

Before you contribute a new feature, consider submitting an Issue to discuss the feature so the community can weigh in and assist.

"},{"location":"contributing/#how-to-contribute-changes","title":"How to Contribute Changes","text":"

First, fork this repository to your own GitHub account. Click \"fork\" in the top corner of the supervision repository to get started:

Then, run git clone to download the project code to your computer.

Move to a new branch using the git checkout command:

git checkout -b <your_branch_name>\n

The name you choose for your branch should describe the change you want to make (i.e. line-counter-docs).

Make any changes you want to the project code, then run the following commands to commit your changes:

git add .\ngit commit -m \"Your commit message\"\ngit push -u origin main\n
"},{"location":"contributing/#code-quality","title":"\ud83c\udfa8 Code quality","text":""},{"location":"contributing/#pre-commit-tool","title":"Pre-commit tool","text":"

This project uses the pre-commit tool to maintain code quality and consistency. Before submitting a pull request or making any commits, it is important to run the pre-commit tool to ensure that your changes meet the project's guidelines.

Furthermore, we have integrated a pre-commit GitHub Action into our workflow. This means that with every pull request opened, the pre-commit checks will be automatically enforced, streamlining the code review process and ensuring that all contributions adhere to our quality standards.

To run the pre-commit tool, follow these steps:

  1. Install pre-commit by running the following command: poetry install. It will not only install pre-commit but also install all the deps and dev-deps of project

  2. Once pre-commit is installed, navigate to the project's root directory.

  3. Run the command pre-commit run --all-files. This will execute the pre-commit hooks configured for this project against the modified files. If any issues are found, the pre-commit tool will provide feedback on how to resolve them. Make the necessary changes and re-run the pre-commit command until all issues are resolved.

  4. You can also install pre-commit as a git hook by executing pre-commit install. Every time you do a git commit pre-commit run automatically for you.

"},{"location":"contributing/#docstrings","title":"Docstrings","text":"

All new functions and classes in supervision should include docstrings. This is a prerequisite for any new functions and classes to be added to the library.

supervision adheres to the Google Python docstring style. Please refer to the style guide while writing docstrings for your contribution.

"},{"location":"contributing/#type-checking","title":"Type checking","text":"

So far, there is no type checking with mypy. See issue.

Then, go back to your fork of the supervision repository, click \"Pull Requests\", and click \"New Pull Request\".

Make sure the base branch is develop before submitting your PR.

On the next page, review your changes then click \"Create pull request\":

Next, write a description for your pull request, and click \"Create pull request\" again to submit it for review:

When creating new functions, please ensure you have the following:

  1. Docstrings for the function and all parameters.
  2. Unit tests for the function.
  3. Examples in the documentation for the function.
  4. Created an entry in our docs to autogenerate the documentation for the function.
  5. Please share a Google Colab with minimal code to test new feature or reproduce PR whenever it is possible. Please ensure that Google Colab can be accessed without any issue.

When you submit your Pull Request, you will be asked to sign a Contributor License Agreement (CLA) by the cla-assistant GitHub bot. We can only respond to PRs from contributors who have signed the project CLA.

All pull requests will be reviewed by the maintainers of the project. We will provide feedback and ask for changes if necessary.

PRs must pass all tests and linting requirements before they can be merged.

"},{"location":"contributing/#documentation","title":"\ud83d\udcdd documentation","text":"

The supervision documentation is stored in a folder called docs. The project documentation is built using mkdocs.

To run the documentation, install the project requirements with poetry install --with dev. Then, run mkdocs serve to start the documentation server.

You can learn more about mkdocs on the mkdocs website.

"},{"location":"contributing/#cookbooks","title":"\ud83e\uddd1\u200d\ud83c\udf73 cookbooks","text":"

We are always looking for new examples and cookbooks to add to the supervision documentation. If you have a use case that you think would be helpful to others, please submit a PR with your example. Here are some guidelines for submitting a new example:

  • Create a new notebook in the docs/nodebooks folder.
  • Add a link to the new notebook in docs/theme/cookbooks.html. Make sure to add the path to the new notebook, as well as a title, labels, author and supervision version.
  • Use the Count Objects Crossing the Line example as a template for your new example.
  • Freeze the version of supervision you are using.
  • Place an appropriate Open in Colab button at the top of the notebook. You can find an example of such a button in the aforementioned Count Objects Crossing the Line cookbook.
  • Notebook should be self-contained. If you rely on external data ( videos, images, etc.) or libraries, include download and installation commands in the notebook.
  • Annotate the code with appropriate comments, including links to the documentation describing each of the tools you have used.
"},{"location":"contributing/#tests","title":"\ud83e\uddea tests","text":"

pytests is used to run our tests.

"},{"location":"contributing/#license","title":"\ud83d\udcc4 license","text":"

By contributing, you agree that your contributions will be licensed under an MIT license.

"},{"location":"deprecated/","title":"Deprecated","text":"

These features are phased out due to better alternatives or potential issues in future versions. Deprecated functionalities are supported for three subsequent releases, providing time for users to transition to updated methods.

  • Detections.from_froboflow is deprecated and will be removed in supervision-0.22.0. Use Detections.from_inference instead.
  • The method Color.white() is deprecated and will be removed in supervision-0.22.0. Use the constant Color.WHITE instead.
  • The method Color.black() is deprecated and will be removed in supervision-0.22.0. Use the constant Color.BLACK instead.
  • The method Color.red() is deprecated and will be removed in supervision-0.22.0. Use the constant Color.RED instead.
  • The method Color.green() is deprecated and will be removed in supervision-0.22.0. Use the constant Color.GREEN instead.
  • The method Color.blue() is deprecated and will be removed in supervision-0.22.0. Use the constant Color.BLUE instead.
  • The method ColorPalette.default() is deprecated and will be removed in supervision-0.22.0. Use the constant ColorPalette.DEFAULT instead.
  • BoxAnnotator is deprecated and will be removed in supervision-0.22.0. Use BoundingBoxAnnotator and LabelAnnotator instead.
  • The method FPSMonitor.__call__ is deprecated and will be removed in supervision-0.22.0. Use the attribute FPSMonitor.fps instead.
  • The track_buffer, track_thresh, and match_thresh parameters in ByterTrack are deprecated and will be removed in supervision-0.23.0. Use lost_track_buffer, track_activation_threshold, and minimum_matching_threshold instead.
  • The triggering_position parameter in sv.PolygonZone is deprecated and will be removed in supervision-0.23.0. Use triggering_anchors instead.
  • The frame_resolution_wh parameter in sv.PolygonZone is deprecated and will be removed in supervision-0.24.0.
"},{"location":"license/","title":"License","text":"
MIT License\n\nCopyright (c) 2022 Roboflow\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n
"},{"location":"trackers/","title":"ByteTrack","text":"

Initialize the ByteTrack object.

Parameters:

Name Type Description Default track_activation_threshold float

Detection confidence threshold for track activation. Increasing track_activation_threshold improves accuracy and stability but might miss true detections. Decreasing it increases completeness but risks introducing noise and instability.

0.25 lost_track_buffer int

Number of frames to buffer when a track is lost. Increasing lost_track_buffer enhances occlusion handling, significantly reducing the likelihood of track fragmentation or disappearance caused by brief detection gaps.

30 minimum_matching_threshold float

Threshold for matching tracks with detections. Increasing minimum_matching_threshold improves accuracy but risks fragmentation. Decreasing it improves completeness but risks false positives and drift.

0.8 frame_rate int

The frame rate of the video.

30 minimum_consecutive_frames int

Number of consecutive frames that an object must be tracked before it is considered a 'valid' track. Increasing minimum_consecutive_frames prevents the creation of accidental tracks from false detection or double detection, but risks missing shorter tracks.

1 Source code in supervision/tracker/byte_tracker/core.py
class ByteTrack:\n    \"\"\"\n    Initialize the ByteTrack object.\n\n    <video controls>\n        <source src=\"https://media.roboflow.com/supervision/video-examples/how-to/track-objects/annotate-video-with-traces.mp4\" type=\"video/mp4\">\n    </video>\n\n    Parameters:\n        track_activation_threshold (float, optional): Detection confidence threshold\n            for track activation. Increasing track_activation_threshold improves accuracy\n            and stability but might miss true detections. Decreasing it increases\n            completeness but risks introducing noise and instability.\n        lost_track_buffer (int, optional): Number of frames to buffer when a track is lost.\n            Increasing lost_track_buffer enhances occlusion handling, significantly\n            reducing the likelihood of track fragmentation or disappearance caused\n            by brief detection gaps.\n        minimum_matching_threshold (float, optional): Threshold for matching tracks with detections.\n            Increasing minimum_matching_threshold improves accuracy but risks fragmentation.\n            Decreasing it improves completeness but risks false positives and drift.\n        frame_rate (int, optional): The frame rate of the video.\n        minimum_consecutive_frames (int, optional): Number of consecutive frames that an object must\n            be tracked before it is considered a 'valid' track.\n            Increasing minimum_consecutive_frames prevents the creation of accidental tracks from\n            false detection or double detection, but risks missing shorter tracks.\n    \"\"\"  # noqa: E501 // docs\n\n    @deprecated_parameter(\n        old_parameter=\"track_buffer\",\n        new_parameter=\"lost_track_buffer\",\n        map_function=lambda x: x,\n        warning_message=\"`{old_parameter}` in `{function_name}` is deprecated and will \"\n        \"be remove in `supervision-0.23.0`. Use '{new_parameter}' \"\n        \"instead.\",\n    )\n    @deprecated_parameter(\n        old_parameter=\"track_thresh\",\n        new_parameter=\"track_activation_threshold\",\n        map_function=lambda x: x,\n        warning_message=\"`{old_parameter}` in `{function_name}` is deprecated and will \"\n        \"be remove in `supervision-0.23.0`. Use '{new_parameter}' \"\n        \"instead.\",\n    )\n    @deprecated_parameter(\n        old_parameter=\"match_thresh\",\n        new_parameter=\"minimum_matching_threshold\",\n        map_function=lambda x: x,\n        warning_message=\"`{old_parameter}` in `{function_name}` is deprecated and will \"\n        \"be remove in `supervision-0.23.0`. Use '{new_parameter}' \"\n        \"instead.\",\n    )\n    def __init__(\n        self,\n        track_activation_threshold: float = 0.25,\n        lost_track_buffer: int = 30,\n        minimum_matching_threshold: float = 0.8,\n        frame_rate: int = 30,\n        minimum_consecutive_frames: int = 1,\n    ):\n        self.track_activation_threshold = track_activation_threshold\n        self.minimum_matching_threshold = minimum_matching_threshold\n\n        self.frame_id = 0\n        self.det_thresh = self.track_activation_threshold + 0.1\n        self.max_time_lost = int(frame_rate / 30.0 * lost_track_buffer)\n        self.minimum_consecutive_frames = minimum_consecutive_frames\n        self.kalman_filter = KalmanFilter()\n\n        self.tracked_tracks: List[STrack] = []\n        self.lost_tracks: List[STrack] = []\n        self.removed_tracks: List[STrack] = []\n\n    def update_with_detections(self, detections: Detections) -> Detections:\n        \"\"\"\n        Updates the tracker with the provided detections and returns the updated\n        detection results.\n\n        Args:\n            detections (Detections): The detections to pass through the tracker.\n\n        Example:\n            ```python\n            import supervision as sv\n            from ultralytics import YOLO\n\n            model = YOLO(<MODEL_PATH>)\n            tracker = sv.ByteTrack()\n\n            bounding_box_annotator = sv.BoundingBoxAnnotator()\n            label_annotator = sv.LabelAnnotator()\n\n            def callback(frame: np.ndarray, index: int) -> np.ndarray:\n                results = model(frame)[0]\n                detections = sv.Detections.from_ultralytics(results)\n                detections = tracker.update_with_detections(detections)\n\n                labels = [f\"#{tracker_id}\" for tracker_id in detections.tracker_id]\n\n                annotated_frame = bounding_box_annotator.annotate(\n                    scene=frame.copy(), detections=detections)\n                annotated_frame = label_annotator.annotate(\n                    scene=annotated_frame, detections=detections, labels=labels)\n                return annotated_frame\n\n            sv.process_video(\n                source_path=<SOURCE_VIDEO_PATH>,\n                target_path=<TARGET_VIDEO_PATH>,\n                callback=callback\n            )\n            ```\n        \"\"\"\n\n        tensors = detections2boxes(detections=detections)\n        tracks = self.update_with_tensors(tensors=tensors)\n\n        if len(tracks) > 0:\n            detection_bounding_boxes = np.asarray([det[:4] for det in tensors])\n            track_bounding_boxes = np.asarray([track.tlbr for track in tracks])\n\n            ious = box_iou_batch(detection_bounding_boxes, track_bounding_boxes)\n\n            iou_costs = 1 - ious\n\n            matches, _, _ = matching.linear_assignment(iou_costs, 0.5)\n            detections.tracker_id = np.full(len(detections), -1, dtype=int)\n            for i_detection, i_track in matches:\n                detections.tracker_id[i_detection] = int(\n                    tracks[i_track].external_track_id\n                )\n\n            return detections[detections.tracker_id != -1]\n\n        else:\n            detections = Detections.empty()\n            detections.tracker_id = np.array([], dtype=int)\n\n            return detections\n\n    def reset(self):\n        \"\"\"\n        Resets the internal state of the ByteTrack tracker.\n\n        This method clears the tracking data, including tracked, lost,\n        and removed tracks, as well as resetting the frame counter. It's\n        particularly useful when processing multiple videos sequentially,\n        ensuring the tracker starts with a clean state for each new video.\n        \"\"\"\n        self.frame_id = 0\n        self.tracked_tracks: List[STrack] = []\n        self.lost_tracks: List[STrack] = []\n        self.removed_tracks: List[STrack] = []\n        BaseTrack.reset_counter()\n        STrack.reset_external_counter()\n\n    def update_with_tensors(self, tensors: np.ndarray) -> List[STrack]:\n        \"\"\"\n        Updates the tracker with the provided tensors and returns the updated tracks.\n\n        Parameters:\n            tensors: The new tensors to update with.\n\n        Returns:\n            List[STrack]: Updated tracks.\n        \"\"\"\n        self.frame_id += 1\n        activated_starcks = []\n        refind_stracks = []\n        lost_stracks = []\n        removed_stracks = []\n\n        class_ids = tensors[:, 5]\n        scores = tensors[:, 4]\n        bboxes = tensors[:, :4]\n\n        remain_inds = scores > self.track_activation_threshold\n        inds_low = scores > 0.1\n        inds_high = scores < self.track_activation_threshold\n\n        inds_second = np.logical_and(inds_low, inds_high)\n        dets_second = bboxes[inds_second]\n        dets = bboxes[remain_inds]\n        scores_keep = scores[remain_inds]\n        scores_second = scores[inds_second]\n\n        class_ids_keep = class_ids[remain_inds]\n        class_ids_second = class_ids[inds_second]\n\n        if len(dets) > 0:\n            \"\"\"Detections\"\"\"\n            detections = [\n                STrack(STrack.tlbr_to_tlwh(tlbr), s, c, self.minimum_consecutive_frames)\n                for (tlbr, s, c) in zip(dets, scores_keep, class_ids_keep)\n            ]\n        else:\n            detections = []\n\n        \"\"\" Add newly detected tracklets to tracked_stracks\"\"\"\n        unconfirmed = []\n        tracked_stracks = []  # type: list[STrack]\n\n        for track in self.tracked_tracks:\n            if not track.is_activated:\n                unconfirmed.append(track)\n            else:\n                tracked_stracks.append(track)\n\n        \"\"\" Step 2: First association, with high score detection boxes\"\"\"\n        strack_pool = joint_tracks(tracked_stracks, self.lost_tracks)\n        # Predict the current location with KF\n        STrack.multi_predict(strack_pool)\n        dists = matching.iou_distance(strack_pool, detections)\n\n        dists = matching.fuse_score(dists, detections)\n        matches, u_track, u_detection = matching.linear_assignment(\n            dists, thresh=self.minimum_matching_threshold\n        )\n\n        for itracked, idet in matches:\n            track = strack_pool[itracked]\n            det = detections[idet]\n            if track.state == TrackState.Tracked:\n                track.update(detections[idet], self.frame_id)\n                activated_starcks.append(track)\n            else:\n                track.re_activate(det, self.frame_id, new_id=False)\n                refind_stracks.append(track)\n\n        \"\"\" Step 3: Second association, with low score detection boxes\"\"\"\n        # association the untrack to the low score detections\n        if len(dets_second) > 0:\n            \"\"\"Detections\"\"\"\n            detections_second = [\n                STrack(STrack.tlbr_to_tlwh(tlbr), s, c, self.minimum_consecutive_frames)\n                for (tlbr, s, c) in zip(dets_second, scores_second, class_ids_second)\n            ]\n        else:\n            detections_second = []\n        r_tracked_stracks = [\n            strack_pool[i]\n            for i in u_track\n            if strack_pool[i].state == TrackState.Tracked\n        ]\n        dists = matching.iou_distance(r_tracked_stracks, detections_second)\n        matches, u_track, u_detection_second = matching.linear_assignment(\n            dists, thresh=0.5\n        )\n        for itracked, idet in matches:\n            track = r_tracked_stracks[itracked]\n            det = detections_second[idet]\n            if track.state == TrackState.Tracked:\n                track.update(det, self.frame_id)\n                activated_starcks.append(track)\n            else:\n                track.re_activate(det, self.frame_id, new_id=False)\n                refind_stracks.append(track)\n\n        for it in u_track:\n            track = r_tracked_stracks[it]\n            if not track.state == TrackState.Lost:\n                track.mark_lost()\n                lost_stracks.append(track)\n\n        \"\"\"Deal with unconfirmed tracks, usually tracks with only one beginning frame\"\"\"\n        detections = [detections[i] for i in u_detection]\n        dists = matching.iou_distance(unconfirmed, detections)\n\n        dists = matching.fuse_score(dists, detections)\n        matches, u_unconfirmed, u_detection = matching.linear_assignment(\n            dists, thresh=0.7\n        )\n        for itracked, idet in matches:\n            unconfirmed[itracked].update(detections[idet], self.frame_id)\n            activated_starcks.append(unconfirmed[itracked])\n        for it in u_unconfirmed:\n            track = unconfirmed[it]\n            track.mark_removed()\n            removed_stracks.append(track)\n\n        \"\"\" Step 4: Init new stracks\"\"\"\n        for inew in u_detection:\n            track = detections[inew]\n            if track.score < self.det_thresh:\n                continue\n            track.activate(self.kalman_filter, self.frame_id)\n            activated_starcks.append(track)\n        \"\"\" Step 5: Update state\"\"\"\n        for track in self.lost_tracks:\n            if self.frame_id - track.end_frame > self.max_time_lost:\n                track.mark_removed()\n                removed_stracks.append(track)\n\n        self.tracked_tracks = [\n            t for t in self.tracked_tracks if t.state == TrackState.Tracked\n        ]\n        self.tracked_tracks = joint_tracks(self.tracked_tracks, activated_starcks)\n        self.tracked_tracks = joint_tracks(self.tracked_tracks, refind_stracks)\n        self.lost_tracks = sub_tracks(self.lost_tracks, self.tracked_tracks)\n        self.lost_tracks.extend(lost_stracks)\n        self.lost_tracks = sub_tracks(self.lost_tracks, self.removed_tracks)\n        self.removed_tracks = removed_stracks\n        self.tracked_tracks, self.lost_tracks = remove_duplicate_tracks(\n            self.tracked_tracks, self.lost_tracks\n        )\n        output_stracks = [track for track in self.tracked_tracks if track.is_activated]\n\n        return output_stracks\n
"},{"location":"trackers/#supervision.tracker.byte_tracker.core.ByteTrack-functions","title":"Functions","text":""},{"location":"trackers/#supervision.tracker.byte_tracker.core.ByteTrack.reset","title":"reset()","text":"

Resets the internal state of the ByteTrack tracker.

This method clears the tracking data, including tracked, lost, and removed tracks, as well as resetting the frame counter. It's particularly useful when processing multiple videos sequentially, ensuring the tracker starts with a clean state for each new video.

Source code in supervision/tracker/byte_tracker/core.py
def reset(self):\n    \"\"\"\n    Resets the internal state of the ByteTrack tracker.\n\n    This method clears the tracking data, including tracked, lost,\n    and removed tracks, as well as resetting the frame counter. It's\n    particularly useful when processing multiple videos sequentially,\n    ensuring the tracker starts with a clean state for each new video.\n    \"\"\"\n    self.frame_id = 0\n    self.tracked_tracks: List[STrack] = []\n    self.lost_tracks: List[STrack] = []\n    self.removed_tracks: List[STrack] = []\n    BaseTrack.reset_counter()\n    STrack.reset_external_counter()\n
"},{"location":"trackers/#supervision.tracker.byte_tracker.core.ByteTrack.update_with_detections","title":"update_with_detections(detections)","text":"

Updates the tracker with the provided detections and returns the updated detection results.

Parameters:

Name Type Description Default detections Detections

The detections to pass through the tracker.

required Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<MODEL_PATH>)\ntracker = sv.ByteTrack()\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\ndef callback(frame: np.ndarray, index: int) -> np.ndarray:\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    detections = tracker.update_with_detections(detections)\n\n    labels = [f\"#{tracker_id}\" for tracker_id in detections.tracker_id]\n\n    annotated_frame = bounding_box_annotator.annotate(\n        scene=frame.copy(), detections=detections)\n    annotated_frame = label_annotator.annotate(\n        scene=annotated_frame, detections=detections, labels=labels)\n    return annotated_frame\n\nsv.process_video(\n    source_path=<SOURCE_VIDEO_PATH>,\n    target_path=<TARGET_VIDEO_PATH>,\n    callback=callback\n)\n
Source code in supervision/tracker/byte_tracker/core.py
def update_with_detections(self, detections: Detections) -> Detections:\n    \"\"\"\n    Updates the tracker with the provided detections and returns the updated\n    detection results.\n\n    Args:\n        detections (Detections): The detections to pass through the tracker.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO(<MODEL_PATH>)\n        tracker = sv.ByteTrack()\n\n        bounding_box_annotator = sv.BoundingBoxAnnotator()\n        label_annotator = sv.LabelAnnotator()\n\n        def callback(frame: np.ndarray, index: int) -> np.ndarray:\n            results = model(frame)[0]\n            detections = sv.Detections.from_ultralytics(results)\n            detections = tracker.update_with_detections(detections)\n\n            labels = [f\"#{tracker_id}\" for tracker_id in detections.tracker_id]\n\n            annotated_frame = bounding_box_annotator.annotate(\n                scene=frame.copy(), detections=detections)\n            annotated_frame = label_annotator.annotate(\n                scene=annotated_frame, detections=detections, labels=labels)\n            return annotated_frame\n\n        sv.process_video(\n            source_path=<SOURCE_VIDEO_PATH>,\n            target_path=<TARGET_VIDEO_PATH>,\n            callback=callback\n        )\n        ```\n    \"\"\"\n\n    tensors = detections2boxes(detections=detections)\n    tracks = self.update_with_tensors(tensors=tensors)\n\n    if len(tracks) > 0:\n        detection_bounding_boxes = np.asarray([det[:4] for det in tensors])\n        track_bounding_boxes = np.asarray([track.tlbr for track in tracks])\n\n        ious = box_iou_batch(detection_bounding_boxes, track_bounding_boxes)\n\n        iou_costs = 1 - ious\n\n        matches, _, _ = matching.linear_assignment(iou_costs, 0.5)\n        detections.tracker_id = np.full(len(detections), -1, dtype=int)\n        for i_detection, i_track in matches:\n            detections.tracker_id[i_detection] = int(\n                tracks[i_track].external_track_id\n            )\n\n        return detections[detections.tracker_id != -1]\n\n    else:\n        detections = Detections.empty()\n        detections.tracker_id = np.array([], dtype=int)\n\n        return detections\n
"},{"location":"trackers/#supervision.tracker.byte_tracker.core.ByteTrack.update_with_tensors","title":"update_with_tensors(tensors)","text":"

Updates the tracker with the provided tensors and returns the updated tracks.

Parameters:

Name Type Description Default tensors ndarray

The new tensors to update with.

required

Returns:

Type Description List[STrack]

List[STrack]: Updated tracks.

Source code in supervision/tracker/byte_tracker/core.py
def update_with_tensors(self, tensors: np.ndarray) -> List[STrack]:\n    \"\"\"\n    Updates the tracker with the provided tensors and returns the updated tracks.\n\n    Parameters:\n        tensors: The new tensors to update with.\n\n    Returns:\n        List[STrack]: Updated tracks.\n    \"\"\"\n    self.frame_id += 1\n    activated_starcks = []\n    refind_stracks = []\n    lost_stracks = []\n    removed_stracks = []\n\n    class_ids = tensors[:, 5]\n    scores = tensors[:, 4]\n    bboxes = tensors[:, :4]\n\n    remain_inds = scores > self.track_activation_threshold\n    inds_low = scores > 0.1\n    inds_high = scores < self.track_activation_threshold\n\n    inds_second = np.logical_and(inds_low, inds_high)\n    dets_second = bboxes[inds_second]\n    dets = bboxes[remain_inds]\n    scores_keep = scores[remain_inds]\n    scores_second = scores[inds_second]\n\n    class_ids_keep = class_ids[remain_inds]\n    class_ids_second = class_ids[inds_second]\n\n    if len(dets) > 0:\n        \"\"\"Detections\"\"\"\n        detections = [\n            STrack(STrack.tlbr_to_tlwh(tlbr), s, c, self.minimum_consecutive_frames)\n            for (tlbr, s, c) in zip(dets, scores_keep, class_ids_keep)\n        ]\n    else:\n        detections = []\n\n    \"\"\" Add newly detected tracklets to tracked_stracks\"\"\"\n    unconfirmed = []\n    tracked_stracks = []  # type: list[STrack]\n\n    for track in self.tracked_tracks:\n        if not track.is_activated:\n            unconfirmed.append(track)\n        else:\n            tracked_stracks.append(track)\n\n    \"\"\" Step 2: First association, with high score detection boxes\"\"\"\n    strack_pool = joint_tracks(tracked_stracks, self.lost_tracks)\n    # Predict the current location with KF\n    STrack.multi_predict(strack_pool)\n    dists = matching.iou_distance(strack_pool, detections)\n\n    dists = matching.fuse_score(dists, detections)\n    matches, u_track, u_detection = matching.linear_assignment(\n        dists, thresh=self.minimum_matching_threshold\n    )\n\n    for itracked, idet in matches:\n        track = strack_pool[itracked]\n        det = detections[idet]\n        if track.state == TrackState.Tracked:\n            track.update(detections[idet], self.frame_id)\n            activated_starcks.append(track)\n        else:\n            track.re_activate(det, self.frame_id, new_id=False)\n            refind_stracks.append(track)\n\n    \"\"\" Step 3: Second association, with low score detection boxes\"\"\"\n    # association the untrack to the low score detections\n    if len(dets_second) > 0:\n        \"\"\"Detections\"\"\"\n        detections_second = [\n            STrack(STrack.tlbr_to_tlwh(tlbr), s, c, self.minimum_consecutive_frames)\n            for (tlbr, s, c) in zip(dets_second, scores_second, class_ids_second)\n        ]\n    else:\n        detections_second = []\n    r_tracked_stracks = [\n        strack_pool[i]\n        for i in u_track\n        if strack_pool[i].state == TrackState.Tracked\n    ]\n    dists = matching.iou_distance(r_tracked_stracks, detections_second)\n    matches, u_track, u_detection_second = matching.linear_assignment(\n        dists, thresh=0.5\n    )\n    for itracked, idet in matches:\n        track = r_tracked_stracks[itracked]\n        det = detections_second[idet]\n        if track.state == TrackState.Tracked:\n            track.update(det, self.frame_id)\n            activated_starcks.append(track)\n        else:\n            track.re_activate(det, self.frame_id, new_id=False)\n            refind_stracks.append(track)\n\n    for it in u_track:\n        track = r_tracked_stracks[it]\n        if not track.state == TrackState.Lost:\n            track.mark_lost()\n            lost_stracks.append(track)\n\n    \"\"\"Deal with unconfirmed tracks, usually tracks with only one beginning frame\"\"\"\n    detections = [detections[i] for i in u_detection]\n    dists = matching.iou_distance(unconfirmed, detections)\n\n    dists = matching.fuse_score(dists, detections)\n    matches, u_unconfirmed, u_detection = matching.linear_assignment(\n        dists, thresh=0.7\n    )\n    for itracked, idet in matches:\n        unconfirmed[itracked].update(detections[idet], self.frame_id)\n        activated_starcks.append(unconfirmed[itracked])\n    for it in u_unconfirmed:\n        track = unconfirmed[it]\n        track.mark_removed()\n        removed_stracks.append(track)\n\n    \"\"\" Step 4: Init new stracks\"\"\"\n    for inew in u_detection:\n        track = detections[inew]\n        if track.score < self.det_thresh:\n            continue\n        track.activate(self.kalman_filter, self.frame_id)\n        activated_starcks.append(track)\n    \"\"\" Step 5: Update state\"\"\"\n    for track in self.lost_tracks:\n        if self.frame_id - track.end_frame > self.max_time_lost:\n            track.mark_removed()\n            removed_stracks.append(track)\n\n    self.tracked_tracks = [\n        t for t in self.tracked_tracks if t.state == TrackState.Tracked\n    ]\n    self.tracked_tracks = joint_tracks(self.tracked_tracks, activated_starcks)\n    self.tracked_tracks = joint_tracks(self.tracked_tracks, refind_stracks)\n    self.lost_tracks = sub_tracks(self.lost_tracks, self.tracked_tracks)\n    self.lost_tracks.extend(lost_stracks)\n    self.lost_tracks = sub_tracks(self.lost_tracks, self.removed_tracks)\n    self.removed_tracks = removed_stracks\n    self.tracked_tracks, self.lost_tracks = remove_duplicate_tracks(\n        self.tracked_tracks, self.lost_tracks\n    )\n    output_stracks = [track for track in self.tracked_tracks if track.is_activated]\n\n    return output_stracks\n
"},{"location":"classification/core/","title":"Classifications","text":"Source code in supervision/classification/core.py
@dataclass\nclass Classifications:\n    class_id: np.ndarray\n    confidence: Optional[np.ndarray] = None\n\n    def __post_init__(self) -> None:\n        \"\"\"\n        Validate the classification inputs.\n        \"\"\"\n        n = len(self.class_id)\n\n        _validate_class_ids(self.class_id, n)\n        _validate_confidence(self.confidence, n)\n\n    def __len__(self) -> int:\n        \"\"\"\n        Returns the number of classifications.\n        \"\"\"\n        return len(self.class_id)\n\n    @classmethod\n    def from_clip(cls, clip_results) -> Classifications:\n        \"\"\"\n        Creates a Classifications instance from a\n        [clip](https://github.com/openai/clip) inference result.\n\n        Args:\n            clip_results (np.ndarray): The inference result from clip model.\n\n        Returns:\n            Classifications: A new Classifications object.\n\n        Example:\n            ```python\n            from PIL import Image\n            import clip\n            import supervision as sv\n\n            model, preprocess = clip.load('ViT-B/32')\n\n            image = cv2.imread(SOURCE_IMAGE_PATH)\n            image = preprocess(image).unsqueeze(0)\n\n            text = clip.tokenize([\"a diagram\", \"a dog\", \"a cat\"])\n            output, _ = model(image, text)\n            classifications = sv.Classifications.from_clip(output)\n            ```\n        \"\"\"\n\n        confidence = clip_results.softmax(dim=-1).cpu().detach().numpy()[0]\n\n        if len(confidence) == 0:\n            return cls(class_id=np.array([]), confidence=np.array([]))\n\n        class_ids = np.arange(len(confidence))\n        return cls(class_id=class_ids, confidence=confidence)\n\n    @classmethod\n    def from_ultralytics(cls, ultralytics_results) -> Classifications:\n        \"\"\"\n        Creates a Classifications instance from a\n        [ultralytics](https://github.com/ultralytics/ultralytics) inference result.\n\n        Args:\n            ultralytics_results (ultralytics.engine.results.Results):\n                The inference result from ultralytics model.\n\n        Returns:\n            Classifications: A new Classifications object.\n\n        Example:\n            ```python\n            import cv2\n            from ultralytics import YOLO\n            import supervision as sv\n\n            image = cv2.imread(SOURCE_IMAGE_PATH)\n            model = YOLO('yolov8n-cls.pt')\n\n            output = model(image)[0]\n            classifications = sv.Classifications.from_ultralytics(output)\n            ```\n        \"\"\"\n        confidence = ultralytics_results.probs.data.cpu().numpy()\n        return cls(class_id=np.arange(confidence.shape[0]), confidence=confidence)\n\n    @classmethod\n    def from_timm(cls, timm_results) -> Classifications:\n        \"\"\"\n        Creates a Classifications instance from a\n        [timm](https://huggingface.co/docs/hub/timm) inference result.\n\n        Args:\n            timm_results (torch.Tensor): The inference result from timm model.\n\n        Returns:\n            Classifications: A new Classifications object.\n\n        Example:\n            ```python\n            from PIL import Image\n            import timm\n            from timm.data import resolve_data_config, create_transform\n            import supervision as sv\n\n            model = timm.create_model(\n                model_name='hf-hub:nateraw/resnet50-oxford-iiit-pet',\n                pretrained=True\n            ).eval()\n\n            config = resolve_data_config({}, model=model)\n            transform = create_transform(**config)\n\n            image = Image.open(SOURCE_IMAGE_PATH).convert('RGB')\n            x = transform(image).unsqueeze(0)\n\n            output = model(x)\n\n            classifications = sv.Classifications.from_timm(output)\n            ```\n        \"\"\"\n        confidence = timm_results.cpu().detach().numpy()[0]\n\n        if len(confidence) == 0:\n            return cls(class_id=np.array([]), confidence=np.array([]))\n\n        class_id = np.arange(len(confidence))\n        return cls(class_id=class_id, confidence=confidence)\n\n    def get_top_k(self, k: int) -> Tuple[np.ndarray, np.ndarray]:\n        \"\"\"\n        Retrieve the top k class IDs and confidences,\n            ordered in descending order by confidence.\n\n        Args:\n            k (int): The number of top class IDs and confidences to retrieve.\n\n        Returns:\n            Tuple[np.ndarray, np.ndarray]: A tuple containing\n                the top k class IDs and confidences.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            classifications = sv.Classifications(...)\n\n            classifications.get_top_k(1)\n\n            (array([1]), array([0.9]))\n            ```\n        \"\"\"\n        if self.confidence is None:\n            raise ValueError(\"top_k could not be calculated, confidence is None\")\n\n        order = np.argsort(self.confidence)[::-1]\n        top_k_order = order[:k]\n        top_k_class_id = self.class_id[top_k_order]\n        top_k_confidence = self.confidence[top_k_order]\n\n        return top_k_class_id, top_k_confidence\n
"},{"location":"classification/core/#supervision.classification.core.Classifications-functions","title":"Functions","text":""},{"location":"classification/core/#supervision.classification.core.Classifications.__len__","title":"__len__()","text":"

Returns the number of classifications.

Source code in supervision/classification/core.py
def __len__(self) -> int:\n    \"\"\"\n    Returns the number of classifications.\n    \"\"\"\n    return len(self.class_id)\n
"},{"location":"classification/core/#supervision.classification.core.Classifications.__post_init__","title":"__post_init__()","text":"

Validate the classification inputs.

Source code in supervision/classification/core.py
def __post_init__(self) -> None:\n    \"\"\"\n    Validate the classification inputs.\n    \"\"\"\n    n = len(self.class_id)\n\n    _validate_class_ids(self.class_id, n)\n    _validate_confidence(self.confidence, n)\n
"},{"location":"classification/core/#supervision.classification.core.Classifications.from_clip","title":"from_clip(clip_results) classmethod","text":"

Creates a Classifications instance from a clip inference result.

Parameters:

Name Type Description Default clip_results ndarray

The inference result from clip model.

required

Returns:

Name Type Description Classifications Classifications

A new Classifications object.

Example
from PIL import Image\nimport clip\nimport supervision as sv\n\nmodel, preprocess = clip.load('ViT-B/32')\n\nimage = cv2.imread(SOURCE_IMAGE_PATH)\nimage = preprocess(image).unsqueeze(0)\n\ntext = clip.tokenize([\"a diagram\", \"a dog\", \"a cat\"])\noutput, _ = model(image, text)\nclassifications = sv.Classifications.from_clip(output)\n
Source code in supervision/classification/core.py
@classmethod\ndef from_clip(cls, clip_results) -> Classifications:\n    \"\"\"\n    Creates a Classifications instance from a\n    [clip](https://github.com/openai/clip) inference result.\n\n    Args:\n        clip_results (np.ndarray): The inference result from clip model.\n\n    Returns:\n        Classifications: A new Classifications object.\n\n    Example:\n        ```python\n        from PIL import Image\n        import clip\n        import supervision as sv\n\n        model, preprocess = clip.load('ViT-B/32')\n\n        image = cv2.imread(SOURCE_IMAGE_PATH)\n        image = preprocess(image).unsqueeze(0)\n\n        text = clip.tokenize([\"a diagram\", \"a dog\", \"a cat\"])\n        output, _ = model(image, text)\n        classifications = sv.Classifications.from_clip(output)\n        ```\n    \"\"\"\n\n    confidence = clip_results.softmax(dim=-1).cpu().detach().numpy()[0]\n\n    if len(confidence) == 0:\n        return cls(class_id=np.array([]), confidence=np.array([]))\n\n    class_ids = np.arange(len(confidence))\n    return cls(class_id=class_ids, confidence=confidence)\n
"},{"location":"classification/core/#supervision.classification.core.Classifications.from_timm","title":"from_timm(timm_results) classmethod","text":"

Creates a Classifications instance from a timm inference result.

Parameters:

Name Type Description Default timm_results Tensor

The inference result from timm model.

required

Returns:

Name Type Description Classifications Classifications

A new Classifications object.

Example
from PIL import Image\nimport timm\nfrom timm.data import resolve_data_config, create_transform\nimport supervision as sv\n\nmodel = timm.create_model(\n    model_name='hf-hub:nateraw/resnet50-oxford-iiit-pet',\n    pretrained=True\n).eval()\n\nconfig = resolve_data_config({}, model=model)\ntransform = create_transform(**config)\n\nimage = Image.open(SOURCE_IMAGE_PATH).convert('RGB')\nx = transform(image).unsqueeze(0)\n\noutput = model(x)\n\nclassifications = sv.Classifications.from_timm(output)\n
Source code in supervision/classification/core.py
@classmethod\ndef from_timm(cls, timm_results) -> Classifications:\n    \"\"\"\n    Creates a Classifications instance from a\n    [timm](https://huggingface.co/docs/hub/timm) inference result.\n\n    Args:\n        timm_results (torch.Tensor): The inference result from timm model.\n\n    Returns:\n        Classifications: A new Classifications object.\n\n    Example:\n        ```python\n        from PIL import Image\n        import timm\n        from timm.data import resolve_data_config, create_transform\n        import supervision as sv\n\n        model = timm.create_model(\n            model_name='hf-hub:nateraw/resnet50-oxford-iiit-pet',\n            pretrained=True\n        ).eval()\n\n        config = resolve_data_config({}, model=model)\n        transform = create_transform(**config)\n\n        image = Image.open(SOURCE_IMAGE_PATH).convert('RGB')\n        x = transform(image).unsqueeze(0)\n\n        output = model(x)\n\n        classifications = sv.Classifications.from_timm(output)\n        ```\n    \"\"\"\n    confidence = timm_results.cpu().detach().numpy()[0]\n\n    if len(confidence) == 0:\n        return cls(class_id=np.array([]), confidence=np.array([]))\n\n    class_id = np.arange(len(confidence))\n    return cls(class_id=class_id, confidence=confidence)\n
"},{"location":"classification/core/#supervision.classification.core.Classifications.from_ultralytics","title":"from_ultralytics(ultralytics_results) classmethod","text":"

Creates a Classifications instance from a ultralytics inference result.

Parameters:

Name Type Description Default ultralytics_results Results

The inference result from ultralytics model.

required

Returns:

Name Type Description Classifications Classifications

A new Classifications object.

Example
import cv2\nfrom ultralytics import YOLO\nimport supervision as sv\n\nimage = cv2.imread(SOURCE_IMAGE_PATH)\nmodel = YOLO('yolov8n-cls.pt')\n\noutput = model(image)[0]\nclassifications = sv.Classifications.from_ultralytics(output)\n
Source code in supervision/classification/core.py
@classmethod\ndef from_ultralytics(cls, ultralytics_results) -> Classifications:\n    \"\"\"\n    Creates a Classifications instance from a\n    [ultralytics](https://github.com/ultralytics/ultralytics) inference result.\n\n    Args:\n        ultralytics_results (ultralytics.engine.results.Results):\n            The inference result from ultralytics model.\n\n    Returns:\n        Classifications: A new Classifications object.\n\n    Example:\n        ```python\n        import cv2\n        from ultralytics import YOLO\n        import supervision as sv\n\n        image = cv2.imread(SOURCE_IMAGE_PATH)\n        model = YOLO('yolov8n-cls.pt')\n\n        output = model(image)[0]\n        classifications = sv.Classifications.from_ultralytics(output)\n        ```\n    \"\"\"\n    confidence = ultralytics_results.probs.data.cpu().numpy()\n    return cls(class_id=np.arange(confidence.shape[0]), confidence=confidence)\n
"},{"location":"classification/core/#supervision.classification.core.Classifications.get_top_k","title":"get_top_k(k)","text":"

Retrieve the top k class IDs and confidences, ordered in descending order by confidence.

Parameters:

Name Type Description Default k int

The number of top class IDs and confidences to retrieve.

required

Returns:

Type Description Tuple[ndarray, ndarray]

Tuple[np.ndarray, np.ndarray]: A tuple containing the top k class IDs and confidences.

Example
import supervision as sv\n\nclassifications = sv.Classifications(...)\n\nclassifications.get_top_k(1)\n\n(array([1]), array([0.9]))\n
Source code in supervision/classification/core.py
def get_top_k(self, k: int) -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"\n    Retrieve the top k class IDs and confidences,\n        ordered in descending order by confidence.\n\n    Args:\n        k (int): The number of top class IDs and confidences to retrieve.\n\n    Returns:\n        Tuple[np.ndarray, np.ndarray]: A tuple containing\n            the top k class IDs and confidences.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        classifications = sv.Classifications(...)\n\n        classifications.get_top_k(1)\n\n        (array([1]), array([0.9]))\n        ```\n    \"\"\"\n    if self.confidence is None:\n        raise ValueError(\"top_k could not be calculated, confidence is None\")\n\n    order = np.argsort(self.confidence)[::-1]\n    top_k_order = order[:k]\n    top_k_class_id = self.class_id[top_k_order]\n    top_k_confidence = self.confidence[top_k_order]\n\n    return top_k_class_id, top_k_confidence\n
"},{"location":"datasets/core/","title":"Datasets","text":"

Warning

Dataset API is still fluid and may change. If you use Dataset API in your project until further notice, freeze the supervision version in your requirements.txt or setup.py.

DetectionDataset

Bases: BaseDataset

Dataclass containing information about object detection dataset.

Attributes:

Name Type Description classes List[str]

List containing dataset class names.

images Dict[str, ndarray]

Dictionary mapping image name to image.

annotations Dict[str, Detections]

Dictionary mapping image name to annotations.

Source code in supervision/dataset/core.py
@dataclass\nclass DetectionDataset(BaseDataset):\n    \"\"\"\n    Dataclass containing information about object detection dataset.\n\n    Attributes:\n        classes (List[str]): List containing dataset class names.\n        images (Dict[str, np.ndarray]): Dictionary mapping image name to image.\n        annotations (Dict[str, Detections]): Dictionary mapping\n            image name to annotations.\n    \"\"\"\n\n    classes: List[str]\n    images: Dict[str, np.ndarray]\n    annotations: Dict[str, Detections]\n\n    def __len__(self) -> int:\n        \"\"\"\n        Return the number of images in the dataset.\n\n        Returns:\n            int: The number of images.\n        \"\"\"\n        return len(self.images)\n\n    def __iter__(self) -> Iterator[Tuple[str, np.ndarray, Detections]]:\n        \"\"\"\n        Iterate over the images and annotations in the dataset.\n\n        Yields:\n            Iterator[Tuple[str, np.ndarray, Detections]]:\n                An iterator that yields tuples containing the image name,\n                the image data, and its corresponding annotation.\n        \"\"\"\n        for image_name, image in self.images.items():\n            yield image_name, image, self.annotations.get(image_name, None)\n\n    def __eq__(self, other):\n        if not isinstance(other, DetectionDataset):\n            return False\n\n        if set(self.classes) != set(other.classes):\n            return False\n\n        for key in self.images:\n            if not np.array_equal(self.images[key], other.images[key]):\n                return False\n            if not self.annotations[key] == other.annotations[key]:\n                return False\n\n        return True\n\n    def split(\n        self, split_ratio=0.8, random_state=None, shuffle: bool = True\n    ) -> Tuple[DetectionDataset, DetectionDataset]:\n        \"\"\"\n        Splits the dataset into two parts (training and testing)\n            using the provided split_ratio.\n\n        Args:\n            split_ratio (float, optional): The ratio of the training\n                set to the entire dataset.\n            random_state (int, optional): The seed for the random number generator.\n                This is used for reproducibility.\n            shuffle (bool, optional): Whether to shuffle the data before splitting.\n\n        Returns:\n            Tuple[DetectionDataset, DetectionDataset]: A tuple containing\n                the training and testing datasets.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            ds = sv.DetectionDataset(...)\n            train_ds, test_ds = ds.split(split_ratio=0.7, random_state=42, shuffle=True)\n            len(train_ds), len(test_ds)\n            # (700, 300)\n            ```\n        \"\"\"\n\n        image_names = list(self.images.keys())\n        train_names, test_names = train_test_split(\n            data=image_names,\n            train_ratio=split_ratio,\n            random_state=random_state,\n            shuffle=shuffle,\n        )\n\n        train_dataset = DetectionDataset(\n            classes=self.classes,\n            images={name: self.images[name] for name in train_names},\n            annotations={name: self.annotations[name] for name in train_names},\n        )\n        test_dataset = DetectionDataset(\n            classes=self.classes,\n            images={name: self.images[name] for name in test_names},\n            annotations={name: self.annotations[name] for name in test_names},\n        )\n        return train_dataset, test_dataset\n\n    def as_pascal_voc(\n        self,\n        images_directory_path: Optional[str] = None,\n        annotations_directory_path: Optional[str] = None,\n        min_image_area_percentage: float = 0.0,\n        max_image_area_percentage: float = 1.0,\n        approximation_percentage: float = 0.0,\n    ) -> None:\n        \"\"\"\n        Exports the dataset to PASCAL VOC format. This method saves the images\n        and their corresponding annotations in PASCAL VOC format.\n\n        Args:\n            images_directory_path (Optional[str]): The path to the directory\n                where the images should be saved.\n                If not provided, images will not be saved.\n            annotations_directory_path (Optional[str]): The path to\n                the directory where the annotations in PASCAL VOC format should be\n                saved. If not provided, annotations will not be saved.\n            min_image_area_percentage (float): The minimum percentage of\n                detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            max_image_area_percentage (float): The maximum percentage\n                of detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            approximation_percentage (float): The percentage of\n                polygon points to be removed from the input polygon,\n                in the range [0, 1). Argument is used only for segmentation datasets.\n        \"\"\"\n        if images_directory_path:\n            save_dataset_images(\n                images_directory_path=images_directory_path, images=self.images\n            )\n        if annotations_directory_path:\n            Path(annotations_directory_path).mkdir(parents=True, exist_ok=True)\n\n        for image_path, image in self.images.items():\n            detections = self.annotations[image_path]\n\n            if annotations_directory_path:\n                annotation_name = Path(image_path).stem\n                annotations_path = os.path.join(\n                    annotations_directory_path, f\"{annotation_name}.xml\"\n                )\n                image_name = Path(image_path).name\n                pascal_voc_xml = detections_to_pascal_voc(\n                    detections=detections,\n                    classes=self.classes,\n                    filename=image_name,\n                    image_shape=image.shape,\n                    min_image_area_percentage=min_image_area_percentage,\n                    max_image_area_percentage=max_image_area_percentage,\n                    approximation_percentage=approximation_percentage,\n                )\n\n                with open(annotations_path, \"w\") as f:\n                    f.write(pascal_voc_xml)\n\n    @classmethod\n    def from_pascal_voc(\n        cls,\n        images_directory_path: str,\n        annotations_directory_path: str,\n        force_masks: bool = False,\n    ) -> DetectionDataset:\n        \"\"\"\n        Creates a Dataset instance from PASCAL VOC formatted data.\n\n        Args:\n            images_directory_path (str): Path to the directory containing the images.\n            annotations_directory_path (str): Path to the directory\n                containing the PASCAL VOC XML annotations.\n            force_masks (bool, optional): If True, forces masks to\n                be loaded for all annotations, regardless of whether they are present.\n\n        Returns:\n            DetectionDataset: A DetectionDataset instance containing\n                the loaded images and annotations.\n\n        Examples:\n            ```python\n            import roboflow\n            from roboflow import Roboflow\n            import supervision as sv\n\n            roboflow.login()\n\n            rf = Roboflow()\n\n            project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n            dataset = project.version(PROJECT_VERSION).download(\"voc\")\n\n            ds = sv.DetectionDataset.from_pascal_voc(\n                images_directory_path=f\"{dataset.location}/train/images\",\n                annotations_directory_path=f\"{dataset.location}/train/labels\"\n            )\n\n            ds.classes\n            # ['dog', 'person']\n            ```\n        \"\"\"\n\n        classes, images, annotations = load_pascal_voc_annotations(\n            images_directory_path=images_directory_path,\n            annotations_directory_path=annotations_directory_path,\n            force_masks=force_masks,\n        )\n\n        return DetectionDataset(classes=classes, images=images, annotations=annotations)\n\n    @classmethod\n    def from_yolo(\n        cls,\n        images_directory_path: str,\n        annotations_directory_path: str,\n        data_yaml_path: str,\n        force_masks: bool = False,\n        is_obb: bool = False,\n    ) -> DetectionDataset:\n        \"\"\"\n        Creates a Dataset instance from YOLO formatted data.\n\n        Args:\n            images_directory_path (str): The path to the\n                directory containing the images.\n            annotations_directory_path (str): The path to the directory\n                containing the YOLO annotation files.\n            data_yaml_path (str): The path to the data\n                YAML file containing class information.\n            force_masks (bool, optional): If True, forces\n                masks to be loaded for all annotations,\n                regardless of whether they are present.\n            is_obb (bool, optional): If True, loads the annotations in OBB format.\n                OBB annotations are defined as `[class_id, x, y, x, y, x, y, x, y]`,\n                where pairs of [x, y] are box corners.\n\n        Returns:\n            DetectionDataset: A DetectionDataset instance\n                containing the loaded images and annotations.\n\n        Examples:\n            ```python\n            import roboflow\n            from roboflow import Roboflow\n            import supervision as sv\n\n            roboflow.login()\n            rf = Roboflow()\n\n            project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n            dataset = project.version(PROJECT_VERSION).download(\"yolov5\")\n\n            ds = sv.DetectionDataset.from_yolo(\n                images_directory_path=f\"{dataset.location}/train/images\",\n                annotations_directory_path=f\"{dataset.location}/train/labels\",\n                data_yaml_path=f\"{dataset.location}/data.yaml\"\n            )\n\n            ds.classes\n            # ['dog', 'person']\n            ```\n        \"\"\"\n        classes, images, annotations = load_yolo_annotations(\n            images_directory_path=images_directory_path,\n            annotations_directory_path=annotations_directory_path,\n            data_yaml_path=data_yaml_path,\n            force_masks=force_masks,\n            is_obb=is_obb,\n        )\n        return DetectionDataset(classes=classes, images=images, annotations=annotations)\n\n    def as_yolo(\n        self,\n        images_directory_path: Optional[str] = None,\n        annotations_directory_path: Optional[str] = None,\n        data_yaml_path: Optional[str] = None,\n        min_image_area_percentage: float = 0.0,\n        max_image_area_percentage: float = 1.0,\n        approximation_percentage: float = 0.0,\n    ) -> None:\n        \"\"\"\n        Exports the dataset to YOLO format. This method saves the\n        images and their corresponding annotations in YOLO format.\n\n        Args:\n            images_directory_path (Optional[str]): The path to the\n                directory where the images should be saved.\n                If not provided, images will not be saved.\n            annotations_directory_path (Optional[str]): The path to the\n                directory where the annotations in\n                YOLO format should be saved. If not provided,\n                annotations will not be saved.\n            data_yaml_path (Optional[str]): The path where the data.yaml\n                file should be saved.\n                If not provided, the file will not be saved.\n            min_image_area_percentage (float): The minimum percentage of\n                detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            max_image_area_percentage (float): The maximum percentage\n                of detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            approximation_percentage (float): The percentage of polygon points to\n                be removed from the input polygon, in the range [0, 1).\n                This is useful for simplifying the annotations.\n                Argument is used only for segmentation datasets.\n        \"\"\"\n        if images_directory_path is not None:\n            save_dataset_images(\n                images_directory_path=images_directory_path, images=self.images\n            )\n        if annotations_directory_path is not None:\n            save_yolo_annotations(\n                annotations_directory_path=annotations_directory_path,\n                images=self.images,\n                annotations=self.annotations,\n                min_image_area_percentage=min_image_area_percentage,\n                max_image_area_percentage=max_image_area_percentage,\n                approximation_percentage=approximation_percentage,\n            )\n        if data_yaml_path is not None:\n            save_data_yaml(data_yaml_path=data_yaml_path, classes=self.classes)\n\n    @classmethod\n    def from_coco(\n        cls,\n        images_directory_path: str,\n        annotations_path: str,\n        force_masks: bool = False,\n    ) -> DetectionDataset:\n        \"\"\"\n        Creates a Dataset instance from COCO formatted data.\n\n        Args:\n            images_directory_path (str): The path to the\n                directory containing the images.\n            annotations_path (str): The path to the json annotation files.\n            force_masks (bool, optional): If True,\n                forces masks to be loaded for all annotations,\n                regardless of whether they are present.\n\n        Returns:\n            DetectionDataset: A DetectionDataset instance containing\n                the loaded images and annotations.\n\n        Examples:\n            ```python\n            import roboflow\n            from roboflow import Roboflow\n            import supervision as sv\n\n            roboflow.login()\n            rf = Roboflow()\n\n            project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n            dataset = project.version(PROJECT_VERSION).download(\"coco\")\n\n            ds = sv.DetectionDataset.from_coco(\n                images_directory_path=f\"{dataset.location}/train\",\n                annotations_path=f\"{dataset.location}/train/_annotations.coco.json\",\n            )\n\n            ds.classes\n            # ['dog', 'person']\n            ```\n        \"\"\"\n        classes, images, annotations = load_coco_annotations(\n            images_directory_path=images_directory_path,\n            annotations_path=annotations_path,\n            force_masks=force_masks,\n        )\n        return DetectionDataset(classes=classes, images=images, annotations=annotations)\n\n    def as_coco(\n        self,\n        images_directory_path: Optional[str] = None,\n        annotations_path: Optional[str] = None,\n        min_image_area_percentage: float = 0.0,\n        max_image_area_percentage: float = 1.0,\n        approximation_percentage: float = 0.0,\n    ) -> None:\n        \"\"\"\n        Exports the dataset to COCO format. This method saves the\n        images and their corresponding annotations in COCO format.\n\n        !!! tip\n\n            The format of the mask is determined automatically based on its structure:\n\n            - If a mask contains multiple disconnected components or holes, it will be\n            saved using the Run-Length Encoding (RLE) format for efficient storage and\n            processing.\n            - If a mask consists of a single, contiguous region without any holes, it\n            will be encoded as a polygon, preserving the outline of the object.\n\n            This automatic selection ensures that the masks are stored in the most\n            appropriate and space-efficient format, complying with COCO dataset\n            standards.\n\n        Args:\n            images_directory_path (Optional[str]): The path to the directory\n                where the images should be saved.\n                If not provided, images will not be saved.\n            annotations_path (Optional[str]): The path to COCO annotation file.\n            min_image_area_percentage (float): The minimum percentage of\n                detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            max_image_area_percentage (float): The maximum percentage of\n                detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            approximation_percentage (float): The percentage of polygon points\n                to be removed from the input polygon,\n                in the range [0, 1). This is useful for simplifying the annotations.\n                Argument is used only for segmentation datasets.\n        \"\"\"\n        if images_directory_path is not None:\n            save_dataset_images(\n                images_directory_path=images_directory_path, images=self.images\n            )\n        if annotations_path is not None:\n            save_coco_annotations(\n                annotation_path=annotations_path,\n                images=self.images,\n                annotations=self.annotations,\n                classes=self.classes,\n                min_image_area_percentage=min_image_area_percentage,\n                max_image_area_percentage=max_image_area_percentage,\n                approximation_percentage=approximation_percentage,\n            )\n\n    @classmethod\n    def merge(cls, dataset_list: List[DetectionDataset]) -> DetectionDataset:\n        \"\"\"\n        Merge a list of `DetectionDataset` objects into a single\n            `DetectionDataset` object.\n\n        This method takes a list of `DetectionDataset` objects and combines\n        their respective fields (`classes`, `images`,\n        `annotations`) into a single `DetectionDataset` object.\n\n        Args:\n            dataset_list (List[DetectionDataset]): A list of `DetectionDataset`\n                objects to merge.\n\n        Returns:\n            (DetectionDataset): A single `DetectionDataset` object containing\n            the merged data from the input list.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            ds_1 = sv.DetectionDataset(...)\n            len(ds_1)\n            # 100\n            ds_1.classes\n            # ['dog', 'person']\n\n            ds_2 = sv.DetectionDataset(...)\n            len(ds_2)\n            # 200\n            ds_2.classes\n            # ['cat']\n\n            ds_merged = sv.DetectionDataset.merge([ds_1, ds_2])\n            len(ds_merged)\n            # 300\n            ds_merged.classes\n            # ['cat', 'dog', 'person']\n            ```\n        \"\"\"\n        merged_images, merged_annotations = {}, {}\n        class_lists = [dataset.classes for dataset in dataset_list]\n        merged_classes = merge_class_lists(class_lists=class_lists)\n\n        for dataset in dataset_list:\n            class_index_mapping = build_class_index_mapping(\n                source_classes=dataset.classes, target_classes=merged_classes\n            )\n            for image_name, image, detections in dataset:\n                if image_name in merged_annotations:\n                    raise ValueError(\n                        f\"Image name {image_name} is not unique across datasets.\"\n                    )\n\n                merged_images[image_name] = image\n                merged_annotations[image_name] = map_detections_class_id(\n                    source_to_target_mapping=class_index_mapping,\n                    detections=detections,\n                )\n\n        return cls(\n            classes=merged_classes, images=merged_images, annotations=merged_annotations\n        )\n
ClassificationDataset

Bases: BaseDataset

Dataclass containing information about a classification dataset.

Attributes:

Name Type Description classes List[str]

List containing dataset class names.

images Dict[str, ndarray]

Dictionary mapping image name to image.

annotations Dict[str, Detections]

Dictionary mapping image name to annotations.

Source code in supervision/dataset/core.py
@dataclass\nclass ClassificationDataset(BaseDataset):\n    \"\"\"\n    Dataclass containing information about a classification dataset.\n\n    Attributes:\n        classes (List[str]): List containing dataset class names.\n        images (Dict[str, np.ndarray]): Dictionary mapping image name to image.\n        annotations (Dict[str, Detections]): Dictionary mapping\n            image name to annotations.\n    \"\"\"\n\n    classes: List[str]\n    images: Dict[str, np.ndarray]\n    annotations: Dict[str, Classifications]\n\n    def __len__(self) -> int:\n        return len(self.images)\n\n    def split(\n        self, split_ratio=0.8, random_state=None, shuffle: bool = True\n    ) -> Tuple[ClassificationDataset, ClassificationDataset]:\n        \"\"\"\n        Splits the dataset into two parts (training and testing)\n            using the provided split_ratio.\n\n        Args:\n            split_ratio (float, optional): The ratio of the training\n                set to the entire dataset.\n            random_state (int, optional): The seed for the\n                random number generator. This is used for reproducibility.\n            shuffle (bool, optional): Whether to shuffle the data before splitting.\n\n        Returns:\n            Tuple[ClassificationDataset, ClassificationDataset]: A tuple containing\n            the training and testing datasets.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            cd = sv.ClassificationDataset(...)\n            train_cd,test_cd = cd.split(split_ratio=0.7, random_state=42,shuffle=True)\n            len(train_cd), len(test_cd)\n            # (700, 300)\n            ```\n        \"\"\"\n        image_names = list(self.images.keys())\n        train_names, test_names = train_test_split(\n            data=image_names,\n            train_ratio=split_ratio,\n            random_state=random_state,\n            shuffle=shuffle,\n        )\n\n        train_dataset = ClassificationDataset(\n            classes=self.classes,\n            images={name: self.images[name] for name in train_names},\n            annotations={name: self.annotations[name] for name in train_names},\n        )\n        test_dataset = ClassificationDataset(\n            classes=self.classes,\n            images={name: self.images[name] for name in test_names},\n            annotations={name: self.annotations[name] for name in test_names},\n        )\n        return train_dataset, test_dataset\n\n    def as_folder_structure(self, root_directory_path: str) -> None:\n        \"\"\"\n        Saves the dataset as a multi-class folder structure.\n\n        Args:\n            root_directory_path (str): The path to the directory\n                where the dataset will be saved.\n        \"\"\"\n        os.makedirs(root_directory_path, exist_ok=True)\n\n        for class_name in self.classes:\n            os.makedirs(os.path.join(root_directory_path, class_name), exist_ok=True)\n\n        for image_path in self.images:\n            classification = self.annotations[image_path]\n            image = self.images[image_path]\n            image_name = Path(image_path).name\n            class_id = (\n                classification.class_id[0]\n                if classification.confidence is None\n                else classification.get_top_k(1)[0][0]\n            )\n            class_name = self.classes[class_id]\n            image_path = os.path.join(root_directory_path, class_name, image_name)\n            cv2.imwrite(image_path, image)\n\n    @classmethod\n    def from_folder_structure(cls, root_directory_path: str) -> ClassificationDataset:\n        \"\"\"\n        Load data from a multiclass folder structure into a ClassificationDataset.\n\n        Args:\n            root_directory_path (str): The path to the dataset directory.\n\n        Returns:\n            ClassificationDataset: The dataset.\n\n        Examples:\n            ```python\n            import roboflow\n            from roboflow import Roboflow\n            import supervision as sv\n\n            roboflow.login()\n            rf = Roboflow()\n\n            project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n            dataset = project.version(PROJECT_VERSION).download(\"folder\")\n\n            cd = sv.ClassificationDataset.from_folder_structure(\n                root_directory_path=f\"{dataset.location}/train\"\n            )\n            ```\n        \"\"\"\n        classes = os.listdir(root_directory_path)\n        classes = sorted(set(classes))\n\n        images = {}\n        annotations = {}\n\n        for class_name in classes:\n            class_id = classes.index(class_name)\n\n            for image in os.listdir(os.path.join(root_directory_path, class_name)):\n                image_path = str(os.path.join(root_directory_path, class_name, image))\n                images[image_path] = cv2.imread(image_path)\n                annotations[image_path] = Classifications(\n                    class_id=np.array([class_id]),\n                )\n\n        return cls(\n            classes=classes,\n            images=images,\n            annotations=annotations,\n        )\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset-functions","title":"Functions","text":""},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.__iter__","title":"__iter__()","text":"

Iterate over the images and annotations in the dataset.

Yields:

Type Description str

Iterator[Tuple[str, np.ndarray, Detections]]: An iterator that yields tuples containing the image name, the image data, and its corresponding annotation.

Source code in supervision/dataset/core.py
def __iter__(self) -> Iterator[Tuple[str, np.ndarray, Detections]]:\n    \"\"\"\n    Iterate over the images and annotations in the dataset.\n\n    Yields:\n        Iterator[Tuple[str, np.ndarray, Detections]]:\n            An iterator that yields tuples containing the image name,\n            the image data, and its corresponding annotation.\n    \"\"\"\n    for image_name, image in self.images.items():\n        yield image_name, image, self.annotations.get(image_name, None)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.__len__","title":"__len__()","text":"

Return the number of images in the dataset.

Returns:

Name Type Description int int

The number of images.

Source code in supervision/dataset/core.py
def __len__(self) -> int:\n    \"\"\"\n    Return the number of images in the dataset.\n\n    Returns:\n        int: The number of images.\n    \"\"\"\n    return len(self.images)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.as_coco","title":"as_coco(images_directory_path=None, annotations_path=None, min_image_area_percentage=0.0, max_image_area_percentage=1.0, approximation_percentage=0.0)","text":"

Exports the dataset to COCO format. This method saves the images and their corresponding annotations in COCO format.

Tip

The format of the mask is determined automatically based on its structure:

  • If a mask contains multiple disconnected components or holes, it will be saved using the Run-Length Encoding (RLE) format for efficient storage and processing.
  • If a mask consists of a single, contiguous region without any holes, it will be encoded as a polygon, preserving the outline of the object.

This automatic selection ensures that the masks are stored in the most appropriate and space-efficient format, complying with COCO dataset standards.

Parameters:

Name Type Description Default images_directory_path Optional[str]

The path to the directory where the images should be saved. If not provided, images will not be saved.

None annotations_path Optional[str]

The path to COCO annotation file.

None min_image_area_percentage float

The minimum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

0.0 max_image_area_percentage float

The maximum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

1.0 approximation_percentage float

The percentage of polygon points to be removed from the input polygon, in the range [0, 1). This is useful for simplifying the annotations. Argument is used only for segmentation datasets.

0.0 Source code in supervision/dataset/core.py
def as_coco(\n    self,\n    images_directory_path: Optional[str] = None,\n    annotations_path: Optional[str] = None,\n    min_image_area_percentage: float = 0.0,\n    max_image_area_percentage: float = 1.0,\n    approximation_percentage: float = 0.0,\n) -> None:\n    \"\"\"\n    Exports the dataset to COCO format. This method saves the\n    images and their corresponding annotations in COCO format.\n\n    !!! tip\n\n        The format of the mask is determined automatically based on its structure:\n\n        - If a mask contains multiple disconnected components or holes, it will be\n        saved using the Run-Length Encoding (RLE) format for efficient storage and\n        processing.\n        - If a mask consists of a single, contiguous region without any holes, it\n        will be encoded as a polygon, preserving the outline of the object.\n\n        This automatic selection ensures that the masks are stored in the most\n        appropriate and space-efficient format, complying with COCO dataset\n        standards.\n\n    Args:\n        images_directory_path (Optional[str]): The path to the directory\n            where the images should be saved.\n            If not provided, images will not be saved.\n        annotations_path (Optional[str]): The path to COCO annotation file.\n        min_image_area_percentage (float): The minimum percentage of\n            detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        max_image_area_percentage (float): The maximum percentage of\n            detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        approximation_percentage (float): The percentage of polygon points\n            to be removed from the input polygon,\n            in the range [0, 1). This is useful for simplifying the annotations.\n            Argument is used only for segmentation datasets.\n    \"\"\"\n    if images_directory_path is not None:\n        save_dataset_images(\n            images_directory_path=images_directory_path, images=self.images\n        )\n    if annotations_path is not None:\n        save_coco_annotations(\n            annotation_path=annotations_path,\n            images=self.images,\n            annotations=self.annotations,\n            classes=self.classes,\n            min_image_area_percentage=min_image_area_percentage,\n            max_image_area_percentage=max_image_area_percentage,\n            approximation_percentage=approximation_percentage,\n        )\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.as_pascal_voc","title":"as_pascal_voc(images_directory_path=None, annotations_directory_path=None, min_image_area_percentage=0.0, max_image_area_percentage=1.0, approximation_percentage=0.0)","text":"

Exports the dataset to PASCAL VOC format. This method saves the images and their corresponding annotations in PASCAL VOC format.

Parameters:

Name Type Description Default images_directory_path Optional[str]

The path to the directory where the images should be saved. If not provided, images will not be saved.

None annotations_directory_path Optional[str]

The path to the directory where the annotations in PASCAL VOC format should be saved. If not provided, annotations will not be saved.

None min_image_area_percentage float

The minimum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

0.0 max_image_area_percentage float

The maximum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

1.0 approximation_percentage float

The percentage of polygon points to be removed from the input polygon, in the range [0, 1). Argument is used only for segmentation datasets.

0.0 Source code in supervision/dataset/core.py
def as_pascal_voc(\n    self,\n    images_directory_path: Optional[str] = None,\n    annotations_directory_path: Optional[str] = None,\n    min_image_area_percentage: float = 0.0,\n    max_image_area_percentage: float = 1.0,\n    approximation_percentage: float = 0.0,\n) -> None:\n    \"\"\"\n    Exports the dataset to PASCAL VOC format. This method saves the images\n    and their corresponding annotations in PASCAL VOC format.\n\n    Args:\n        images_directory_path (Optional[str]): The path to the directory\n            where the images should be saved.\n            If not provided, images will not be saved.\n        annotations_directory_path (Optional[str]): The path to\n            the directory where the annotations in PASCAL VOC format should be\n            saved. If not provided, annotations will not be saved.\n        min_image_area_percentage (float): The minimum percentage of\n            detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        max_image_area_percentage (float): The maximum percentage\n            of detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        approximation_percentage (float): The percentage of\n            polygon points to be removed from the input polygon,\n            in the range [0, 1). Argument is used only for segmentation datasets.\n    \"\"\"\n    if images_directory_path:\n        save_dataset_images(\n            images_directory_path=images_directory_path, images=self.images\n        )\n    if annotations_directory_path:\n        Path(annotations_directory_path).mkdir(parents=True, exist_ok=True)\n\n    for image_path, image in self.images.items():\n        detections = self.annotations[image_path]\n\n        if annotations_directory_path:\n            annotation_name = Path(image_path).stem\n            annotations_path = os.path.join(\n                annotations_directory_path, f\"{annotation_name}.xml\"\n            )\n            image_name = Path(image_path).name\n            pascal_voc_xml = detections_to_pascal_voc(\n                detections=detections,\n                classes=self.classes,\n                filename=image_name,\n                image_shape=image.shape,\n                min_image_area_percentage=min_image_area_percentage,\n                max_image_area_percentage=max_image_area_percentage,\n                approximation_percentage=approximation_percentage,\n            )\n\n            with open(annotations_path, \"w\") as f:\n                f.write(pascal_voc_xml)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.as_yolo","title":"as_yolo(images_directory_path=None, annotations_directory_path=None, data_yaml_path=None, min_image_area_percentage=0.0, max_image_area_percentage=1.0, approximation_percentage=0.0)","text":"

Exports the dataset to YOLO format. This method saves the images and their corresponding annotations in YOLO format.

Parameters:

Name Type Description Default images_directory_path Optional[str]

The path to the directory where the images should be saved. If not provided, images will not be saved.

None annotations_directory_path Optional[str]

The path to the directory where the annotations in YOLO format should be saved. If not provided, annotations will not be saved.

None data_yaml_path Optional[str]

The path where the data.yaml file should be saved. If not provided, the file will not be saved.

None min_image_area_percentage float

The minimum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

0.0 max_image_area_percentage float

The maximum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

1.0 approximation_percentage float

The percentage of polygon points to be removed from the input polygon, in the range [0, 1). This is useful for simplifying the annotations. Argument is used only for segmentation datasets.

0.0 Source code in supervision/dataset/core.py
def as_yolo(\n    self,\n    images_directory_path: Optional[str] = None,\n    annotations_directory_path: Optional[str] = None,\n    data_yaml_path: Optional[str] = None,\n    min_image_area_percentage: float = 0.0,\n    max_image_area_percentage: float = 1.0,\n    approximation_percentage: float = 0.0,\n) -> None:\n    \"\"\"\n    Exports the dataset to YOLO format. This method saves the\n    images and their corresponding annotations in YOLO format.\n\n    Args:\n        images_directory_path (Optional[str]): The path to the\n            directory where the images should be saved.\n            If not provided, images will not be saved.\n        annotations_directory_path (Optional[str]): The path to the\n            directory where the annotations in\n            YOLO format should be saved. If not provided,\n            annotations will not be saved.\n        data_yaml_path (Optional[str]): The path where the data.yaml\n            file should be saved.\n            If not provided, the file will not be saved.\n        min_image_area_percentage (float): The minimum percentage of\n            detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        max_image_area_percentage (float): The maximum percentage\n            of detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        approximation_percentage (float): The percentage of polygon points to\n            be removed from the input polygon, in the range [0, 1).\n            This is useful for simplifying the annotations.\n            Argument is used only for segmentation datasets.\n    \"\"\"\n    if images_directory_path is not None:\n        save_dataset_images(\n            images_directory_path=images_directory_path, images=self.images\n        )\n    if annotations_directory_path is not None:\n        save_yolo_annotations(\n            annotations_directory_path=annotations_directory_path,\n            images=self.images,\n            annotations=self.annotations,\n            min_image_area_percentage=min_image_area_percentage,\n            max_image_area_percentage=max_image_area_percentage,\n            approximation_percentage=approximation_percentage,\n        )\n    if data_yaml_path is not None:\n        save_data_yaml(data_yaml_path=data_yaml_path, classes=self.classes)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.from_coco","title":"from_coco(images_directory_path, annotations_path, force_masks=False) classmethod","text":"

Creates a Dataset instance from COCO formatted data.

Parameters:

Name Type Description Default images_directory_path str

The path to the directory containing the images.

required annotations_path str

The path to the json annotation files.

required force_masks bool

If True, forces masks to be loaded for all annotations, regardless of whether they are present.

False

Returns:

Name Type Description DetectionDataset DetectionDataset

A DetectionDataset instance containing the loaded images and annotations.

Examples:

import roboflow\nfrom roboflow import Roboflow\nimport supervision as sv\n\nroboflow.login()\nrf = Roboflow()\n\nproject = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\ndataset = project.version(PROJECT_VERSION).download(\"coco\")\n\nds = sv.DetectionDataset.from_coco(\n    images_directory_path=f\"{dataset.location}/train\",\n    annotations_path=f\"{dataset.location}/train/_annotations.coco.json\",\n)\n\nds.classes\n# ['dog', 'person']\n
Source code in supervision/dataset/core.py
@classmethod\ndef from_coco(\n    cls,\n    images_directory_path: str,\n    annotations_path: str,\n    force_masks: bool = False,\n) -> DetectionDataset:\n    \"\"\"\n    Creates a Dataset instance from COCO formatted data.\n\n    Args:\n        images_directory_path (str): The path to the\n            directory containing the images.\n        annotations_path (str): The path to the json annotation files.\n        force_masks (bool, optional): If True,\n            forces masks to be loaded for all annotations,\n            regardless of whether they are present.\n\n    Returns:\n        DetectionDataset: A DetectionDataset instance containing\n            the loaded images and annotations.\n\n    Examples:\n        ```python\n        import roboflow\n        from roboflow import Roboflow\n        import supervision as sv\n\n        roboflow.login()\n        rf = Roboflow()\n\n        project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n        dataset = project.version(PROJECT_VERSION).download(\"coco\")\n\n        ds = sv.DetectionDataset.from_coco(\n            images_directory_path=f\"{dataset.location}/train\",\n            annotations_path=f\"{dataset.location}/train/_annotations.coco.json\",\n        )\n\n        ds.classes\n        # ['dog', 'person']\n        ```\n    \"\"\"\n    classes, images, annotations = load_coco_annotations(\n        images_directory_path=images_directory_path,\n        annotations_path=annotations_path,\n        force_masks=force_masks,\n    )\n    return DetectionDataset(classes=classes, images=images, annotations=annotations)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.from_pascal_voc","title":"from_pascal_voc(images_directory_path, annotations_directory_path, force_masks=False) classmethod","text":"

Creates a Dataset instance from PASCAL VOC formatted data.

Parameters:

Name Type Description Default images_directory_path str

Path to the directory containing the images.

required annotations_directory_path str

Path to the directory containing the PASCAL VOC XML annotations.

required force_masks bool

If True, forces masks to be loaded for all annotations, regardless of whether they are present.

False

Returns:

Name Type Description DetectionDataset DetectionDataset

A DetectionDataset instance containing the loaded images and annotations.

Examples:

import roboflow\nfrom roboflow import Roboflow\nimport supervision as sv\n\nroboflow.login()\n\nrf = Roboflow()\n\nproject = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\ndataset = project.version(PROJECT_VERSION).download(\"voc\")\n\nds = sv.DetectionDataset.from_pascal_voc(\n    images_directory_path=f\"{dataset.location}/train/images\",\n    annotations_directory_path=f\"{dataset.location}/train/labels\"\n)\n\nds.classes\n# ['dog', 'person']\n
Source code in supervision/dataset/core.py
@classmethod\ndef from_pascal_voc(\n    cls,\n    images_directory_path: str,\n    annotations_directory_path: str,\n    force_masks: bool = False,\n) -> DetectionDataset:\n    \"\"\"\n    Creates a Dataset instance from PASCAL VOC formatted data.\n\n    Args:\n        images_directory_path (str): Path to the directory containing the images.\n        annotations_directory_path (str): Path to the directory\n            containing the PASCAL VOC XML annotations.\n        force_masks (bool, optional): If True, forces masks to\n            be loaded for all annotations, regardless of whether they are present.\n\n    Returns:\n        DetectionDataset: A DetectionDataset instance containing\n            the loaded images and annotations.\n\n    Examples:\n        ```python\n        import roboflow\n        from roboflow import Roboflow\n        import supervision as sv\n\n        roboflow.login()\n\n        rf = Roboflow()\n\n        project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n        dataset = project.version(PROJECT_VERSION).download(\"voc\")\n\n        ds = sv.DetectionDataset.from_pascal_voc(\n            images_directory_path=f\"{dataset.location}/train/images\",\n            annotations_directory_path=f\"{dataset.location}/train/labels\"\n        )\n\n        ds.classes\n        # ['dog', 'person']\n        ```\n    \"\"\"\n\n    classes, images, annotations = load_pascal_voc_annotations(\n        images_directory_path=images_directory_path,\n        annotations_directory_path=annotations_directory_path,\n        force_masks=force_masks,\n    )\n\n    return DetectionDataset(classes=classes, images=images, annotations=annotations)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.from_yolo","title":"from_yolo(images_directory_path, annotations_directory_path, data_yaml_path, force_masks=False, is_obb=False) classmethod","text":"

Creates a Dataset instance from YOLO formatted data.

Parameters:

Name Type Description Default images_directory_path str

The path to the directory containing the images.

required annotations_directory_path str

The path to the directory containing the YOLO annotation files.

required data_yaml_path str

The path to the data YAML file containing class information.

required force_masks bool

If True, forces masks to be loaded for all annotations, regardless of whether they are present.

False is_obb bool

If True, loads the annotations in OBB format. OBB annotations are defined as [class_id, x, y, x, y, x, y, x, y], where pairs of [x, y] are box corners.

False

Returns:

Name Type Description DetectionDataset DetectionDataset

A DetectionDataset instance containing the loaded images and annotations.

Examples:

import roboflow\nfrom roboflow import Roboflow\nimport supervision as sv\n\nroboflow.login()\nrf = Roboflow()\n\nproject = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\ndataset = project.version(PROJECT_VERSION).download(\"yolov5\")\n\nds = sv.DetectionDataset.from_yolo(\n    images_directory_path=f\"{dataset.location}/train/images\",\n    annotations_directory_path=f\"{dataset.location}/train/labels\",\n    data_yaml_path=f\"{dataset.location}/data.yaml\"\n)\n\nds.classes\n# ['dog', 'person']\n
Source code in supervision/dataset/core.py
@classmethod\ndef from_yolo(\n    cls,\n    images_directory_path: str,\n    annotations_directory_path: str,\n    data_yaml_path: str,\n    force_masks: bool = False,\n    is_obb: bool = False,\n) -> DetectionDataset:\n    \"\"\"\n    Creates a Dataset instance from YOLO formatted data.\n\n    Args:\n        images_directory_path (str): The path to the\n            directory containing the images.\n        annotations_directory_path (str): The path to the directory\n            containing the YOLO annotation files.\n        data_yaml_path (str): The path to the data\n            YAML file containing class information.\n        force_masks (bool, optional): If True, forces\n            masks to be loaded for all annotations,\n            regardless of whether they are present.\n        is_obb (bool, optional): If True, loads the annotations in OBB format.\n            OBB annotations are defined as `[class_id, x, y, x, y, x, y, x, y]`,\n            where pairs of [x, y] are box corners.\n\n    Returns:\n        DetectionDataset: A DetectionDataset instance\n            containing the loaded images and annotations.\n\n    Examples:\n        ```python\n        import roboflow\n        from roboflow import Roboflow\n        import supervision as sv\n\n        roboflow.login()\n        rf = Roboflow()\n\n        project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n        dataset = project.version(PROJECT_VERSION).download(\"yolov5\")\n\n        ds = sv.DetectionDataset.from_yolo(\n            images_directory_path=f\"{dataset.location}/train/images\",\n            annotations_directory_path=f\"{dataset.location}/train/labels\",\n            data_yaml_path=f\"{dataset.location}/data.yaml\"\n        )\n\n        ds.classes\n        # ['dog', 'person']\n        ```\n    \"\"\"\n    classes, images, annotations = load_yolo_annotations(\n        images_directory_path=images_directory_path,\n        annotations_directory_path=annotations_directory_path,\n        data_yaml_path=data_yaml_path,\n        force_masks=force_masks,\n        is_obb=is_obb,\n    )\n    return DetectionDataset(classes=classes, images=images, annotations=annotations)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.merge","title":"merge(dataset_list) classmethod","text":"

Merge a list of DetectionDataset objects into a single DetectionDataset object.

This method takes a list of DetectionDataset objects and combines their respective fields (classes, images, annotations) into a single DetectionDataset object.

Parameters:

Name Type Description Default dataset_list List[DetectionDataset]

A list of DetectionDataset objects to merge.

required

Returns:

Type Description DetectionDataset

A single DetectionDataset object containing

DetectionDataset

the merged data from the input list.

Examples:

import supervision as sv\n\nds_1 = sv.DetectionDataset(...)\nlen(ds_1)\n# 100\nds_1.classes\n# ['dog', 'person']\n\nds_2 = sv.DetectionDataset(...)\nlen(ds_2)\n# 200\nds_2.classes\n# ['cat']\n\nds_merged = sv.DetectionDataset.merge([ds_1, ds_2])\nlen(ds_merged)\n# 300\nds_merged.classes\n# ['cat', 'dog', 'person']\n
Source code in supervision/dataset/core.py
@classmethod\ndef merge(cls, dataset_list: List[DetectionDataset]) -> DetectionDataset:\n    \"\"\"\n    Merge a list of `DetectionDataset` objects into a single\n        `DetectionDataset` object.\n\n    This method takes a list of `DetectionDataset` objects and combines\n    their respective fields (`classes`, `images`,\n    `annotations`) into a single `DetectionDataset` object.\n\n    Args:\n        dataset_list (List[DetectionDataset]): A list of `DetectionDataset`\n            objects to merge.\n\n    Returns:\n        (DetectionDataset): A single `DetectionDataset` object containing\n        the merged data from the input list.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        ds_1 = sv.DetectionDataset(...)\n        len(ds_1)\n        # 100\n        ds_1.classes\n        # ['dog', 'person']\n\n        ds_2 = sv.DetectionDataset(...)\n        len(ds_2)\n        # 200\n        ds_2.classes\n        # ['cat']\n\n        ds_merged = sv.DetectionDataset.merge([ds_1, ds_2])\n        len(ds_merged)\n        # 300\n        ds_merged.classes\n        # ['cat', 'dog', 'person']\n        ```\n    \"\"\"\n    merged_images, merged_annotations = {}, {}\n    class_lists = [dataset.classes for dataset in dataset_list]\n    merged_classes = merge_class_lists(class_lists=class_lists)\n\n    for dataset in dataset_list:\n        class_index_mapping = build_class_index_mapping(\n            source_classes=dataset.classes, target_classes=merged_classes\n        )\n        for image_name, image, detections in dataset:\n            if image_name in merged_annotations:\n                raise ValueError(\n                    f\"Image name {image_name} is not unique across datasets.\"\n                )\n\n            merged_images[image_name] = image\n            merged_annotations[image_name] = map_detections_class_id(\n                source_to_target_mapping=class_index_mapping,\n                detections=detections,\n            )\n\n    return cls(\n        classes=merged_classes, images=merged_images, annotations=merged_annotations\n    )\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.split","title":"split(split_ratio=0.8, random_state=None, shuffle=True)","text":"

Splits the dataset into two parts (training and testing) using the provided split_ratio.

Parameters:

Name Type Description Default split_ratio float

The ratio of the training set to the entire dataset.

0.8 random_state int

The seed for the random number generator. This is used for reproducibility.

None shuffle bool

Whether to shuffle the data before splitting.

True

Returns:

Type Description Tuple[DetectionDataset, DetectionDataset]

Tuple[DetectionDataset, DetectionDataset]: A tuple containing the training and testing datasets.

Examples:

import supervision as sv\n\nds = sv.DetectionDataset(...)\ntrain_ds, test_ds = ds.split(split_ratio=0.7, random_state=42, shuffle=True)\nlen(train_ds), len(test_ds)\n# (700, 300)\n
Source code in supervision/dataset/core.py
def split(\n    self, split_ratio=0.8, random_state=None, shuffle: bool = True\n) -> Tuple[DetectionDataset, DetectionDataset]:\n    \"\"\"\n    Splits the dataset into two parts (training and testing)\n        using the provided split_ratio.\n\n    Args:\n        split_ratio (float, optional): The ratio of the training\n            set to the entire dataset.\n        random_state (int, optional): The seed for the random number generator.\n            This is used for reproducibility.\n        shuffle (bool, optional): Whether to shuffle the data before splitting.\n\n    Returns:\n        Tuple[DetectionDataset, DetectionDataset]: A tuple containing\n            the training and testing datasets.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        ds = sv.DetectionDataset(...)\n        train_ds, test_ds = ds.split(split_ratio=0.7, random_state=42, shuffle=True)\n        len(train_ds), len(test_ds)\n        # (700, 300)\n        ```\n    \"\"\"\n\n    image_names = list(self.images.keys())\n    train_names, test_names = train_test_split(\n        data=image_names,\n        train_ratio=split_ratio,\n        random_state=random_state,\n        shuffle=shuffle,\n    )\n\n    train_dataset = DetectionDataset(\n        classes=self.classes,\n        images={name: self.images[name] for name in train_names},\n        annotations={name: self.annotations[name] for name in train_names},\n    )\n    test_dataset = DetectionDataset(\n        classes=self.classes,\n        images={name: self.images[name] for name in test_names},\n        annotations={name: self.annotations[name] for name in test_names},\n    )\n    return train_dataset, test_dataset\n
"},{"location":"datasets/core/#supervision.dataset.core.ClassificationDataset-functions","title":"Functions","text":""},{"location":"datasets/core/#supervision.dataset.core.ClassificationDataset.as_folder_structure","title":"as_folder_structure(root_directory_path)","text":"

Saves the dataset as a multi-class folder structure.

Parameters:

Name Type Description Default root_directory_path str

The path to the directory where the dataset will be saved.

required Source code in supervision/dataset/core.py
def as_folder_structure(self, root_directory_path: str) -> None:\n    \"\"\"\n    Saves the dataset as a multi-class folder structure.\n\n    Args:\n        root_directory_path (str): The path to the directory\n            where the dataset will be saved.\n    \"\"\"\n    os.makedirs(root_directory_path, exist_ok=True)\n\n    for class_name in self.classes:\n        os.makedirs(os.path.join(root_directory_path, class_name), exist_ok=True)\n\n    for image_path in self.images:\n        classification = self.annotations[image_path]\n        image = self.images[image_path]\n        image_name = Path(image_path).name\n        class_id = (\n            classification.class_id[0]\n            if classification.confidence is None\n            else classification.get_top_k(1)[0][0]\n        )\n        class_name = self.classes[class_id]\n        image_path = os.path.join(root_directory_path, class_name, image_name)\n        cv2.imwrite(image_path, image)\n
"},{"location":"datasets/core/#supervision.dataset.core.ClassificationDataset.from_folder_structure","title":"from_folder_structure(root_directory_path) classmethod","text":"

Load data from a multiclass folder structure into a ClassificationDataset.

Parameters:

Name Type Description Default root_directory_path str

The path to the dataset directory.

required

Returns:

Name Type Description ClassificationDataset ClassificationDataset

The dataset.

Examples:

import roboflow\nfrom roboflow import Roboflow\nimport supervision as sv\n\nroboflow.login()\nrf = Roboflow()\n\nproject = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\ndataset = project.version(PROJECT_VERSION).download(\"folder\")\n\ncd = sv.ClassificationDataset.from_folder_structure(\n    root_directory_path=f\"{dataset.location}/train\"\n)\n
Source code in supervision/dataset/core.py
@classmethod\ndef from_folder_structure(cls, root_directory_path: str) -> ClassificationDataset:\n    \"\"\"\n    Load data from a multiclass folder structure into a ClassificationDataset.\n\n    Args:\n        root_directory_path (str): The path to the dataset directory.\n\n    Returns:\n        ClassificationDataset: The dataset.\n\n    Examples:\n        ```python\n        import roboflow\n        from roboflow import Roboflow\n        import supervision as sv\n\n        roboflow.login()\n        rf = Roboflow()\n\n        project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n        dataset = project.version(PROJECT_VERSION).download(\"folder\")\n\n        cd = sv.ClassificationDataset.from_folder_structure(\n            root_directory_path=f\"{dataset.location}/train\"\n        )\n        ```\n    \"\"\"\n    classes = os.listdir(root_directory_path)\n    classes = sorted(set(classes))\n\n    images = {}\n    annotations = {}\n\n    for class_name in classes:\n        class_id = classes.index(class_name)\n\n        for image in os.listdir(os.path.join(root_directory_path, class_name)):\n            image_path = str(os.path.join(root_directory_path, class_name, image))\n            images[image_path] = cv2.imread(image_path)\n            annotations[image_path] = Classifications(\n                class_id=np.array([class_id]),\n            )\n\n    return cls(\n        classes=classes,\n        images=images,\n        annotations=annotations,\n    )\n
"},{"location":"datasets/core/#supervision.dataset.core.ClassificationDataset.split","title":"split(split_ratio=0.8, random_state=None, shuffle=True)","text":"

Splits the dataset into two parts (training and testing) using the provided split_ratio.

Parameters:

Name Type Description Default split_ratio float

The ratio of the training set to the entire dataset.

0.8 random_state int

The seed for the random number generator. This is used for reproducibility.

None shuffle bool

Whether to shuffle the data before splitting.

True

Returns:

Type Description ClassificationDataset

Tuple[ClassificationDataset, ClassificationDataset]: A tuple containing

ClassificationDataset

the training and testing datasets.

Examples:

import supervision as sv\n\ncd = sv.ClassificationDataset(...)\ntrain_cd,test_cd = cd.split(split_ratio=0.7, random_state=42,shuffle=True)\nlen(train_cd), len(test_cd)\n# (700, 300)\n
Source code in supervision/dataset/core.py
def split(\n    self, split_ratio=0.8, random_state=None, shuffle: bool = True\n) -> Tuple[ClassificationDataset, ClassificationDataset]:\n    \"\"\"\n    Splits the dataset into two parts (training and testing)\n        using the provided split_ratio.\n\n    Args:\n        split_ratio (float, optional): The ratio of the training\n            set to the entire dataset.\n        random_state (int, optional): The seed for the\n            random number generator. This is used for reproducibility.\n        shuffle (bool, optional): Whether to shuffle the data before splitting.\n\n    Returns:\n        Tuple[ClassificationDataset, ClassificationDataset]: A tuple containing\n        the training and testing datasets.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        cd = sv.ClassificationDataset(...)\n        train_cd,test_cd = cd.split(split_ratio=0.7, random_state=42,shuffle=True)\n        len(train_cd), len(test_cd)\n        # (700, 300)\n        ```\n    \"\"\"\n    image_names = list(self.images.keys())\n    train_names, test_names = train_test_split(\n        data=image_names,\n        train_ratio=split_ratio,\n        random_state=random_state,\n        shuffle=shuffle,\n    )\n\n    train_dataset = ClassificationDataset(\n        classes=self.classes,\n        images={name: self.images[name] for name in train_names},\n        annotations={name: self.annotations[name] for name in train_names},\n    )\n    test_dataset = ClassificationDataset(\n        classes=self.classes,\n        images={name: self.images[name] for name in test_names},\n        annotations={name: self.annotations[name] for name in test_names},\n    )\n    return train_dataset, test_dataset\n
"},{"location":"datasets/utils/","title":"Datasets Utils","text":"rle_to_mask

Converts run-length encoding (RLE) to a binary mask.

Parameters:

Name Type Description Default rle Union[NDArray[int_], List[int]]

The 1D RLE array, the format used in the COCO dataset (column-wise encoding, values of an array with even indices represent the number of pixels assigned as background, values of an array with odd indices represent the number of pixels assigned as foreground object).

required resolution_wh Tuple[int, int]

The width (w) and height (h) of the desired binary mask.

required

Returns:

Type Description NDArray[bool_]

The generated 2D Boolean mask of shape (h, w), where the foreground object is marked with True's and the rest is filled with False's.

Raises:

Type Description AssertionError

If the sum of pixels encoded in RLE differs from the number of pixels in the expected mask (computed based on resolution_wh).

Examples:

import supervision as sv\n\nsv.rle_to_mask([5, 2, 2, 2, 5], (4, 4))\n# array([\n#     [False, False, False, False],\n#     [False, True,  True,  False],\n#     [False, True,  True,  False],\n#     [False, False, False, False],\n# ])\n
Source code in supervision/dataset/utils.py
def rle_to_mask(\n    rle: Union[npt.NDArray[np.int_], List[int]], resolution_wh: Tuple[int, int]\n) -> npt.NDArray[np.bool_]:\n    \"\"\"\n    Converts run-length encoding (RLE) to a binary mask.\n\n    Args:\n        rle (Union[npt.NDArray[np.int_], List[int]]): The 1D RLE array, the format\n            used in the COCO dataset (column-wise encoding, values of an array with\n            even indices represent the number of pixels assigned as background,\n            values of an array with odd indices represent the number of pixels\n            assigned as foreground object).\n        resolution_wh (Tuple[int, int]): The width (w) and height (h)\n            of the desired binary mask.\n\n    Returns:\n        The generated 2D Boolean mask of shape `(h, w)`, where the foreground object is\n            marked with `True`'s and the rest is filled with `False`'s.\n\n    Raises:\n        AssertionError: If the sum of pixels encoded in RLE differs from the\n            number of pixels in the expected mask (computed based on resolution_wh).\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        sv.rle_to_mask([5, 2, 2, 2, 5], (4, 4))\n        # array([\n        #     [False, False, False, False],\n        #     [False, True,  True,  False],\n        #     [False, True,  True,  False],\n        #     [False, False, False, False],\n        # ])\n        ```\n    \"\"\"\n    if isinstance(rle, list):\n        rle = np.array(rle, dtype=int)\n\n    width, height = resolution_wh\n\n    assert width * height == np.sum(rle), (\n        \"the sum of the number of pixels in the RLE must be the same \"\n        \"as the number of pixels in the expected mask\"\n    )\n\n    zero_one_values = np.zeros(shape=(rle.size, 1), dtype=np.uint8)\n    zero_one_values[1::2] = 1\n\n    decoded_rle = np.repeat(zero_one_values, rle, axis=0)\n    decoded_rle = np.append(\n        decoded_rle, np.zeros(width * height - len(decoded_rle), dtype=np.uint8)\n    )\n    return decoded_rle.reshape((height, width), order=\"F\")\n
mask_to_rle

Converts a binary mask into a run-length encoding (RLE).

Parameters:

Name Type Description Default mask NDArray[bool_]

2D binary mask where True indicates foreground object and False indicates background.

required

Returns:

Type Description List[int]

The run-length encoded mask. Values of a list with even indices represent the number of pixels assigned as background (False), values of a list with odd indices represent the number of pixels assigned as foreground object (True).

Raises:

Type Description AssertionError

If input mask is not 2D or is empty.

Examples:

import numpy as np\nimport supervision as sv\n\nmask = np.array([\n    [True, True, True, True],\n    [True, True, True, True],\n    [True, True, True, True],\n    [True, True, True, True],\n])\nsv.mask_to_rle(mask)\n# [0, 16]\n\nmask = np.array([\n    [False, False, False, False],\n    [False, True,  True,  False],\n    [False, True,  True,  False],\n    [False, False, False, False],\n])\nsv.mask_to_rle(mask)\n# [5, 2, 2, 2, 5]\n

Source code in supervision/dataset/utils.py
def mask_to_rle(mask: npt.NDArray[np.bool_]) -> List[int]:\n    \"\"\"\n    Converts a binary mask into a run-length encoding (RLE).\n\n    Args:\n        mask (npt.NDArray[np.bool_]): 2D binary mask where `True` indicates foreground\n            object and `False` indicates background.\n\n    Returns:\n        The run-length encoded mask. Values of a list with even indices\n            represent the number of pixels assigned as background (`False`), values\n            of a list with odd indices represent the number of pixels assigned\n            as foreground object (`True`).\n\n    Raises:\n        AssertionError: If input mask is not 2D or is empty.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        mask = np.array([\n            [True, True, True, True],\n            [True, True, True, True],\n            [True, True, True, True],\n            [True, True, True, True],\n        ])\n        sv.mask_to_rle(mask)\n        # [0, 16]\n\n        mask = np.array([\n            [False, False, False, False],\n            [False, True,  True,  False],\n            [False, True,  True,  False],\n            [False, False, False, False],\n        ])\n        sv.mask_to_rle(mask)\n        # [5, 2, 2, 2, 5]\n        ```\n\n    ![mask_to_rle](https://media.roboflow.com/supervision-docs/mask-to-rle.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    assert mask.ndim == 2, \"Input mask must be 2D\"\n    assert mask.size != 0, \"Input mask cannot be empty\"\n\n    on_value_change_indices = np.where(\n        mask.ravel(order=\"F\") != np.roll(mask.ravel(order=\"F\"), 1)\n    )[0]\n\n    on_value_change_indices = np.append(on_value_change_indices, mask.size)\n    # need to add 0 at the beginning when the same value is in the first and\n    # last element of the flattened mask\n    if on_value_change_indices[0] != 0:\n        on_value_change_indices = np.insert(on_value_change_indices, 0, 0)\n\n    rle = np.diff(on_value_change_indices)\n\n    if mask[0][0] == 1:\n        rle = np.insert(rle, 0, 0)\n\n    return list(rle)\n
"},{"location":"detection/annotators/","title":"Annotators","text":"BoundingBoxRoundBoxBoxCornerColorCircleDotTriangleEllipseHaloPercentageBarMaskPolygonLabelRichLabelCropBlurPixelateTraceHeatMap
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nannotated_frame = bounding_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nround_box_annotator = sv.RoundBoxAnnotator()\nannotated_frame = round_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncorner_annotator = sv.BoxCornerAnnotator()\nannotated_frame = corner_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncolor_annotator = sv.ColorAnnotator()\nannotated_frame = color_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncircle_annotator = sv.CircleAnnotator()\nannotated_frame = circle_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ndot_annotator = sv.DotAnnotator()\nannotated_frame = dot_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ntriangle_annotator = sv.TriangleAnnotator()\nannotated_frame = triangle_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nellipse_annotator = sv.EllipseAnnotator()\nannotated_frame = ellipse_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nhalo_annotator = sv.HaloAnnotator()\nannotated_frame = halo_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npercentage_bar_annotator = sv.PercentageBarAnnotator()\nannotated_frame = percentage_bar_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nmask_annotator = sv.MaskAnnotator()\nannotated_frame = mask_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npolygon_annotator = sv.PolygonAnnotator()\nannotated_frame = polygon_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)\nannotated_frame = label_annotator.annotate(\n    scene=image.copy(),\n    detections=detections,\n    labels=labels\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nrich_label_annotator = sv.RichLabelAnnotator(\n    font_path=\".../font.ttf\",\n    text_position=sv.Position.CENTER\n)\nannotated_frame = label_annotator.annotate(\n    scene=image.copy(),\n    detections=detections,\n    labels=labels\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncrop_annotator = sv.CropAnnotator()\nannotated_frame = crop_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nblur_annotator = sv.BlurAnnotator()\nannotated_frame = blur_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npixelate_annotator = sv.PixelateAnnotator()\nannotated_frame = pixelate_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO('yolov8x.pt')\n\ntrace_annotator = sv.TraceAnnotator()\n\nvideo_info = sv.VideoInfo.from_video_path(video_path='...')\nframes_generator = get_video_frames_generator(source_path='...')\ntracker = sv.ByteTrack()\n\nwith sv.VideoSink(target_path='...', video_info=video_info) as sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        detections = tracker.update_with_detections(detections)\n        annotated_frame = trace_annotator.annotate(\n            scene=frame.copy(),\n            detections=detections)\n        sink.write_frame(frame=annotated_frame)\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO('yolov8x.pt')\n\nheat_map_annotator = sv.HeatMapAnnotator()\n\nvideo_info = sv.VideoInfo.from_video_path(video_path='...')\nframes_generator = get_video_frames_generator(source_path='...')\n\nwith sv.VideoSink(target_path='...', video_info=video_info) as sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        annotated_frame = heat_map_annotator.annotate(\n            scene=frame.copy(),\n            detections=detections)\n        sink.write_frame(frame=annotated_frame)\n
BoundingBoxAnnotator

Bases: BaseAnnotator

A class for drawing bounding boxes on an image using provided detections.

Source code in supervision/annotators/core.py
class BoundingBoxAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing bounding boxes on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the bounding box lines.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with bounding boxes based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where bounding boxes will be drawn. `ImageType`\n            is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            bounding_box_annotator = sv.BoundingBoxAnnotator()\n            annotated_frame = bounding_box_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![bounding-box-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/bounding-box-annotator-example-purple.png)\n        \"\"\"\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            cv2.rectangle(\n                img=scene,\n                pt1=(x1, y1),\n                pt2=(x2, y2),\n                color=color.as_bgr(),\n                thickness=self.thickness,\n            )\n        return scene\n
RoundBoxAnnotator

Bases: BaseAnnotator

A class for drawing bounding boxes with round edges on an image using provided detections.

Source code in supervision/annotators/core.py
class RoundBoxAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing bounding boxes with round edges on an image\n    using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n        roundness: float = 0.6,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the bounding box lines.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n            roundness (float): Percent of roundness for edges of bounding box.\n                Value must be float 0 < roundness <= 1.0\n                By default roundness percent is calculated based on smaller side\n                length (width or height).\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.color_lookup: ColorLookup = color_lookup\n        if not 0 < roundness <= 1.0:\n            raise ValueError(\"roundness attribute must be float between (0, 1.0]\")\n        self.roundness: float = roundness\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with bounding boxes with rounded edges\n        based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where rounded bounding boxes will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            round_box_annotator = sv.RoundBoxAnnotator()\n            annotated_frame = round_box_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![round-box-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/round-box-annotator-example-purple.png)\n        \"\"\"\n\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n\n            radius = (\n                int((x2 - x1) // 2 * self.roundness)\n                if abs(x1 - x2) < abs(y1 - y2)\n                else int((y2 - y1) // 2 * self.roundness)\n            )\n\n            circle_coordinates = [\n                ((x1 + radius), (y1 + radius)),\n                ((x2 - radius), (y1 + radius)),\n                ((x2 - radius), (y2 - radius)),\n                ((x1 + radius), (y2 - radius)),\n            ]\n\n            line_coordinates = [\n                ((x1 + radius, y1), (x2 - radius, y1)),\n                ((x2, y1 + radius), (x2, y2 - radius)),\n                ((x1 + radius, y2), (x2 - radius, y2)),\n                ((x1, y1 + radius), (x1, y2 - radius)),\n            ]\n\n            start_angles = (180, 270, 0, 90)\n            end_angles = (270, 360, 90, 180)\n\n            for center_coordinates, line, start_angle, end_angle in zip(\n                circle_coordinates, line_coordinates, start_angles, end_angles\n            ):\n                cv2.ellipse(\n                    img=scene,\n                    center=center_coordinates,\n                    axes=(radius, radius),\n                    angle=0,\n                    startAngle=start_angle,\n                    endAngle=end_angle,\n                    color=color.as_bgr(),\n                    thickness=self.thickness,\n                )\n\n                cv2.line(\n                    img=scene,\n                    pt1=line[0],\n                    pt2=line[1],\n                    color=color.as_bgr(),\n                    thickness=self.thickness,\n                )\n\n        return scene\n
BoxCornerAnnotator

Bases: BaseAnnotator

A class for drawing box corners on an image using provided detections.

Source code in supervision/annotators/core.py
class BoxCornerAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing box corners on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 4,\n        corner_length: int = 15,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the corner lines.\n            corner_length (int): Length of each corner line.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.corner_length: int = corner_length\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with box corners based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where box corners will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            corner_annotator = sv.BoxCornerAnnotator()\n            annotated_frame = corner_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![box-corner-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/box-corner-annotator-example-purple.png)\n        \"\"\"\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            corners = [(x1, y1), (x2, y1), (x1, y2), (x2, y2)]\n\n            for x, y in corners:\n                x_end = x + self.corner_length if x == x1 else x - self.corner_length\n                cv2.line(\n                    scene, (x, y), (x_end, y), color.as_bgr(), thickness=self.thickness\n                )\n\n                y_end = y + self.corner_length if y == y1 else y - self.corner_length\n                cv2.line(\n                    scene, (x, y), (x, y_end), color.as_bgr(), thickness=self.thickness\n                )\n        return scene\n
OrientedBoxAnnotator

Bases: BaseAnnotator

A class for drawing oriented bounding boxes on an image using provided detections.

Source code in supervision/annotators/core.py
class OrientedBoxAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing oriented bounding boxes on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the bounding box lines.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with oriented bounding boxes based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where bounding boxes will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = YOLO(\"yolov8n-obb.pt\")\n\n            result = model(image)[0]\n            detections = sv.Detections.from_ultralytics(result)\n\n            oriented_box_annotator = sv.OrientedBoxAnnotator()\n            annotated_frame = oriented_box_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n        \"\"\"  # noqa E501 // docs\n\n        if detections.data is None or ORIENTED_BOX_COORDINATES not in detections.data:\n            return scene\n\n        for detection_idx in range(len(detections)):\n            bbox = np.intp(detections.data.get(ORIENTED_BOX_COORDINATES)[detection_idx])\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n\n            cv2.drawContours(scene, [bbox], 0, color.as_bgr(), self.thickness)\n\n        return scene\n
ColorAnnotator

Bases: BaseAnnotator

A class for drawing box masks on an image using provided detections.

Source code in supervision/annotators/core.py
class ColorAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing box masks on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        opacity: float = 0.5,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.color_lookup: ColorLookup = color_lookup\n        self.opacity = opacity\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with box masks based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where bounding boxes will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            color_annotator = sv.ColorAnnotator()\n            annotated_frame = color_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![box-mask-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/box-mask-annotator-example-purple.png)\n        \"\"\"\n        mask_image = scene.copy()\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            cv2.rectangle(\n                img=scene,\n                pt1=(x1, y1),\n                pt2=(x2, y2),\n                color=color.as_bgr(),\n                thickness=-1,\n            )\n        scene = cv2.addWeighted(\n            scene, self.opacity, mask_image, 1 - self.opacity, gamma=0\n        )\n        return scene\n
CircleAnnotator

Bases: BaseAnnotator

A class for drawing circle on an image using provided detections.

Source code in supervision/annotators/core.py
class CircleAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing circle on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the circle line.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with circles based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where box corners will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            circle_annotator = sv.CircleAnnotator()\n            annotated_frame = circle_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n\n        ![circle-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/circle-annotator-example-purple.png)\n        \"\"\"\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            center = ((x1 + x2) // 2, (y1 + y2) // 2)\n            distance = sqrt((x1 - center[0]) ** 2 + (y1 - center[1]) ** 2)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            cv2.circle(\n                img=scene,\n                center=center,\n                radius=int(distance),\n                color=color.as_bgr(),\n                thickness=self.thickness,\n            )\n\n        return scene\n
DotAnnotator

Bases: BaseAnnotator

A class for drawing dots on an image at specific coordinates based on provided detections.

Source code in supervision/annotators/core.py
class DotAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing dots on an image at specific coordinates based on provided\n    detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        radius: int = 4,\n        position: Position = Position.CENTER,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            radius (int): Radius of the drawn dots.\n            position (Position): The anchor position for placing the dot.\n            color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.radius: int = radius\n        self.position: Position = position\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with dots based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where dots will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            dot_annotator = sv.DotAnnotator()\n            annotated_frame = dot_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![dot-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/dot-annotator-example-purple.png)\n        \"\"\"\n        xy = detections.get_anchors_coordinates(anchor=self.position)\n        for detection_idx in range(len(detections)):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            center = (int(xy[detection_idx, 0]), int(xy[detection_idx, 1]))\n            cv2.circle(scene, center, self.radius, color.as_bgr(), -1)\n        return scene\n
TriangleAnnotator

Bases: BaseAnnotator

A class for drawing triangle markers on an image at specific coordinates based on provided detections.

Source code in supervision/annotators/core.py
class TriangleAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing triangle markers on an image at specific coordinates based on\n    provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        base: int = 10,\n        height: int = 10,\n        position: Position = Position.TOP_CENTER,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            base (int): The base width of the triangle.\n            height (int): The height of the triangle.\n            position (Position): The anchor position for placing the triangle.\n            color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.base: int = base\n        self.height: int = height\n        self.position: Position = position\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with triangles based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where triangles will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            triangle_annotator = sv.TriangleAnnotator()\n            annotated_frame = triangle_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![triangle-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/triangle-annotator-example.png)\n        \"\"\"\n        xy = detections.get_anchors_coordinates(anchor=self.position)\n        for detection_idx in range(len(detections)):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            tip_x, tip_y = int(xy[detection_idx, 0]), int(xy[detection_idx, 1])\n            vertices = np.array(\n                [\n                    [tip_x - self.base // 2, tip_y - self.height],\n                    [tip_x + self.base // 2, tip_y - self.height],\n                    [tip_x, tip_y],\n                ],\n                np.int32,\n            )\n\n            cv2.fillPoly(scene, [vertices], color.as_bgr())\n\n        return scene\n
EllipseAnnotator

Bases: BaseAnnotator

A class for drawing ellipses on an image using provided detections.

Source code in supervision/annotators/core.py
class EllipseAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing ellipses on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        start_angle: int = -45,\n        end_angle: int = 235,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the ellipse lines.\n            start_angle (int): Starting angle of the ellipse.\n            end_angle (int): Ending angle of the ellipse.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.start_angle: int = start_angle\n        self.end_angle: int = end_angle\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with ellipses based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where ellipses will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            ellipse_annotator = sv.EllipseAnnotator()\n            annotated_frame = ellipse_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![ellipse-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/ellipse-annotator-example-purple.png)\n        \"\"\"\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            center = (int((x1 + x2) / 2), y2)\n            width = x2 - x1\n            cv2.ellipse(\n                scene,\n                center=center,\n                axes=(int(width), int(0.35 * width)),\n                angle=0.0,\n                startAngle=self.start_angle,\n                endAngle=self.end_angle,\n                color=color.as_bgr(),\n                thickness=self.thickness,\n                lineType=cv2.LINE_4,\n            )\n        return scene\n
HaloAnnotator

Bases: BaseAnnotator

A class for drawing Halos on an image using provided detections.

Warning

This annotator uses sv.Detections.mask.

Source code in supervision/annotators/core.py
class HaloAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing Halos on an image using provided detections.\n\n    !!! warning\n\n        This annotator uses `sv.Detections.mask`.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        opacity: float = 0.8,\n        kernel_size: int = 40,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n            kernel_size (int): The size of the average pooling kernel used for creating\n                the halo.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.opacity = opacity\n        self.color_lookup: ColorLookup = color_lookup\n        self.kernel_size: int = kernel_size\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with halos based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where masks will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            halo_annotator = sv.HaloAnnotator()\n            annotated_frame = halo_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![halo-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/halo-annotator-example-purple.png)\n        \"\"\"\n        if detections.mask is None:\n            return scene\n        colored_mask = np.zeros_like(scene, dtype=np.uint8)\n        fmask = np.array([False] * scene.shape[0] * scene.shape[1]).reshape(\n            scene.shape[0], scene.shape[1]\n        )\n\n        for detection_idx in np.flip(np.argsort(detections.area)):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            mask = detections.mask[detection_idx]\n            fmask = np.logical_or(fmask, mask)\n            color_bgr = color.as_bgr()\n            colored_mask[mask] = color_bgr\n\n        colored_mask = cv2.blur(colored_mask, (self.kernel_size, self.kernel_size))\n        colored_mask[fmask] = [0, 0, 0]\n        gray = cv2.cvtColor(colored_mask, cv2.COLOR_BGR2GRAY)\n        alpha = self.opacity * gray / gray.max()\n        alpha_mask = alpha[:, :, np.newaxis]\n        scene = np.uint8(scene * (1 - alpha_mask) + colored_mask * self.opacity)\n        return scene\n
PercentageBarAnnotator

Bases: BaseAnnotator

A class for drawing percentage bars on an image using provided detections.

Source code in supervision/annotators/core.py
class PercentageBarAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing percentage bars on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        height: int = 16,\n        width: int = 80,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        border_color: Color = Color.BLACK,\n        position: Position = Position.TOP_CENTER,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n        border_thickness: int = None,\n    ):\n        \"\"\"\n        Args:\n            height (int): The height in pixels of the percentage bar.\n            width (int): The width in pixels of the percentage bar.\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            border_color (Color): The color of the border lines.\n            position (Position): The anchor position of drawing the percentage bar.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n            border_thickness (int): The thickness of the border lines.\n        \"\"\"\n        self.height: int = height\n        self.width: int = width\n        self.color: Union[Color, ColorPalette] = color\n        self.border_color: Color = border_color\n        self.position: Position = position\n        self.color_lookup: ColorLookup = color_lookup\n\n        if border_thickness is None:\n            self.border_thickness = int(0.15 * self.height)\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n        custom_values: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with percentage bars based on the provided\n        detections. The percentage bars visually represent the confidence or custom\n        values associated with each detection.\n\n        Args:\n            scene (ImageType): The image where percentage bars will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n            custom_values (Optional[np.ndarray]): Custom values array to use instead\n                of the default detection confidences. This array should have the\n                same length as the number of detections and contain a value between\n                0 and 1 (inclusive) for each detection, representing the percentage\n                to be displayed.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            percentage_bar_annotator = sv.PercentageBarAnnotator()\n            annotated_frame = percentage_bar_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![percentage-bar-example](https://media.roboflow.com/\n        supervision-annotator-examples/percentage-bar-annotator-example-purple.png)\n        \"\"\"\n        self.validate_custom_values(\n            custom_values=custom_values, detections_count=len(detections)\n        )\n        anchors = detections.get_anchors_coordinates(anchor=self.position)\n        for detection_idx in range(len(detections)):\n            anchor = anchors[detection_idx]\n            border_coordinates = self.calculate_border_coordinates(\n                anchor_xy=(int(anchor[0]), int(anchor[1])),\n                border_wh=(self.width, self.height),\n                position=self.position,\n            )\n            border_width = border_coordinates[1][0] - border_coordinates[0][0]\n\n            value = (\n                custom_values[detection_idx]\n                if custom_values is not None\n                else detections.confidence[detection_idx]\n            )\n\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            cv2.rectangle(\n                img=scene,\n                pt1=border_coordinates[0],\n                pt2=(\n                    border_coordinates[0][0] + int(border_width * value),\n                    border_coordinates[1][1],\n                ),\n                color=color.as_bgr(),\n                thickness=-1,\n            )\n            cv2.rectangle(\n                img=scene,\n                pt1=border_coordinates[0],\n                pt2=border_coordinates[1],\n                color=self.border_color.as_bgr(),\n                thickness=self.border_thickness,\n            )\n        return scene\n\n    @staticmethod\n    def calculate_border_coordinates(\n        anchor_xy: Tuple[int, int], border_wh: Tuple[int, int], position: Position\n    ) -> Tuple[Tuple[int, int], Tuple[int, int]]:\n        cx, cy = anchor_xy\n        width, height = border_wh\n\n        if position == Position.TOP_LEFT:\n            return (cx - width, cy - height), (cx, cy)\n        elif position == Position.TOP_CENTER:\n            return (cx - width // 2, cy), (cx + width // 2, cy - height)\n        elif position == Position.TOP_RIGHT:\n            return (cx, cy), (cx + width, cy - height)\n        elif position == Position.CENTER_LEFT:\n            return (cx - width, cy - height // 2), (cx, cy + height // 2)\n        elif position == Position.CENTER or position == Position.CENTER_OF_MASS:\n            return (\n                (cx - width // 2, cy - height // 2),\n                (cx + width // 2, cy + height // 2),\n            )\n        elif position == Position.CENTER_RIGHT:\n            return (cx, cy - height // 2), (cx + width, cy + height // 2)\n        elif position == Position.BOTTOM_LEFT:\n            return (cx - width, cy), (cx, cy + height)\n        elif position == Position.BOTTOM_CENTER:\n            return (cx - width // 2, cy), (cx + width // 2, cy + height)\n        elif position == Position.BOTTOM_RIGHT:\n            return (cx, cy), (cx + width, cy + height)\n\n    @staticmethod\n    def validate_custom_values(\n        custom_values: Optional[Union[np.ndarray, List[float]]], detections_count: int\n    ) -> None:\n        if custom_values is not None:\n            if not isinstance(custom_values, (np.ndarray, list)):\n                raise TypeError(\n                    \"custom_values must be either a numpy array or a list of floats.\"\n                )\n\n            if len(custom_values) != detections_count:\n                raise ValueError(\n                    \"The length of custom_values must match the number of detections.\"\n                )\n\n            if not all(0 <= value <= 1 for value in custom_values):\n                raise ValueError(\"All values in custom_values must be between 0 and 1.\")\n
HeatMapAnnotator

A class for drawing heatmaps on an image based on provided detections. Heat accumulates over time and is drawn as a semi-transparent overlay of blurred circles.

Source code in supervision/annotators/core.py
class HeatMapAnnotator:\n    \"\"\"\n    A class for drawing heatmaps on an image based on provided detections.\n    Heat accumulates over time and is drawn as a semi-transparent overlay\n    of blurred circles.\n    \"\"\"\n\n    def __init__(\n        self,\n        position: Position = Position.BOTTOM_CENTER,\n        opacity: float = 0.2,\n        radius: int = 40,\n        kernel_size: int = 25,\n        top_hue: int = 0,\n        low_hue: int = 125,\n    ):\n        \"\"\"\n        Args:\n            position (Position): The position of the heatmap. Defaults to\n                `BOTTOM_CENTER`.\n            opacity (float): Opacity of the overlay mask, between 0 and 1.\n            radius (int): Radius of the heat circle.\n            kernel_size (int): Kernel size for blurring the heatmap.\n            top_hue (int): Hue at the top of the heatmap. Defaults to 0 (red).\n            low_hue (int): Hue at the bottom of the heatmap. Defaults to 125 (blue).\n        \"\"\"\n        self.position = position\n        self.opacity = opacity\n        self.radius = radius\n        self.kernel_size = kernel_size\n        self.heat_mask = None\n        self.top_hue = top_hue\n        self.low_hue = low_hue\n\n    @convert_for_annotation_method\n    def annotate(self, scene: ImageType, detections: Detections) -> ImageType:\n        \"\"\"\n        Annotates the scene with a heatmap based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where the heatmap will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n            from ultralytics import YOLO\n\n            model = YOLO('yolov8x.pt')\n\n            heat_map_annotator = sv.HeatMapAnnotator()\n\n            video_info = sv.VideoInfo.from_video_path(video_path='...')\n            frames_generator = get_video_frames_generator(source_path='...')\n\n            with sv.VideoSink(target_path='...', video_info=video_info) as sink:\n               for frame in frames_generator:\n                   result = model(frame)[0]\n                   detections = sv.Detections.from_ultralytics(result)\n                   annotated_frame = heat_map_annotator.annotate(\n                       scene=frame.copy(),\n                       detections=detections)\n                   sink.write_frame(frame=annotated_frame)\n            ```\n\n        ![heatmap-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/heat-map-annotator-example-purple.png)\n        \"\"\"\n\n        if self.heat_mask is None:\n            self.heat_mask = np.zeros(scene.shape[:2])\n        mask = np.zeros(scene.shape[:2])\n        for xy in detections.get_anchors_coordinates(self.position):\n            cv2.circle(mask, (int(xy[0]), int(xy[1])), self.radius, 1, -1)\n        self.heat_mask = mask + self.heat_mask\n        temp = self.heat_mask.copy()\n        temp = self.low_hue - temp / temp.max() * (self.low_hue - self.top_hue)\n        temp = temp.astype(np.uint8)\n        if self.kernel_size is not None:\n            temp = cv2.blur(temp, (self.kernel_size, self.kernel_size))\n        hsv = np.zeros(scene.shape)\n        hsv[..., 0] = temp\n        hsv[..., 1] = 255\n        hsv[..., 2] = 255\n        temp = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)\n        mask = cv2.cvtColor(self.heat_mask.astype(np.uint8), cv2.COLOR_GRAY2BGR) > 0\n        scene[mask] = cv2.addWeighted(temp, self.opacity, scene, 1 - self.opacity, 0)[\n            mask\n        ]\n        return scene\n
MaskAnnotator

Bases: BaseAnnotator

A class for drawing masks on an image using provided detections.

Warning

This annotator uses sv.Detections.mask.

Source code in supervision/annotators/core.py
class MaskAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing masks on an image using provided detections.\n\n    !!! warning\n\n        This annotator uses `sv.Detections.mask`.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        opacity: float = 0.5,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.opacity = opacity\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with masks based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where masks will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            mask_annotator = sv.MaskAnnotator()\n            annotated_frame = mask_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![mask-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/mask-annotator-example-purple.png)\n        \"\"\"\n        if detections.mask is None:\n            return scene\n\n        colored_mask = np.array(scene, copy=True, dtype=np.uint8)\n\n        for detection_idx in np.flip(np.argsort(detections.area)):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            mask = detections.mask[detection_idx]\n            colored_mask[mask] = color.as_bgr()\n\n        scene = cv2.addWeighted(colored_mask, self.opacity, scene, 1 - self.opacity, 0)\n        return scene.astype(np.uint8)\n
PolygonAnnotator

Bases: BaseAnnotator

A class for drawing polygons on an image using provided detections.

Warning

This annotator uses sv.Detections.mask.

Source code in supervision/annotators/core.py
class PolygonAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing polygons on an image using provided detections.\n\n    !!! warning\n\n        This annotator uses `sv.Detections.mask`.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the polygon lines.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with polygons based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where polygons will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            polygon_annotator = sv.PolygonAnnotator()\n            annotated_frame = polygon_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![polygon-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/polygon-annotator-example-purple.png)\n        \"\"\"\n        if detections.mask is None:\n            return scene\n\n        for detection_idx in range(len(detections)):\n            mask = detections.mask[detection_idx]\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            for polygon in mask_to_polygons(mask=mask):\n                scene = draw_polygon(\n                    scene=scene,\n                    polygon=polygon,\n                    color=color,\n                    thickness=self.thickness,\n                )\n\n        return scene\n
LabelAnnotator

A class for annotating labels on an image using provided detections.

Source code in supervision/annotators/core.py
class LabelAnnotator:\n    \"\"\"\n    A class for annotating labels on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        text_color: Color = Color.WHITE,\n        text_scale: float = 0.5,\n        text_thickness: int = 1,\n        text_padding: int = 10,\n        text_position: Position = Position.TOP_LEFT,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n        border_radius: int = 0,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating the text background.\n            text_color (Color): The color to use for the text.\n            text_scale (float): Font scale for the text.\n            text_thickness (int): Thickness of the text characters.\n            text_padding (int): Padding around the text within its background box.\n            text_position (Position): Position of the text relative to the detection.\n                Possible values are defined in the `Position` enum.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n            border_radius (int): The radius to apply round edges. If the selected\n                value is higher than the lower dimension, width or height, is clipped.\n        \"\"\"\n        self.border_radius: int = border_radius\n        self.color: Union[Color, ColorPalette] = color\n        self.text_color: Color = text_color\n        self.text_scale: float = text_scale\n        self.text_thickness: int = text_thickness\n        self.text_padding: int = text_padding\n        self.text_anchor: Position = text_position\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        labels: List[str] = None,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with labels based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where labels will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            labels (List[str]): Optional. Custom labels for each detection.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n             import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            labels = [\n                f\"{class_name} {confidence:.2f}\"\n                for class_name, confidence\n                in zip(detections['class_name'], detections.confidence)\n            ]\n\n            label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)\n            annotated_frame = label_annotator.annotate(\n                scene=image.copy(),\n                detections=detections,\n                labels=labels\n            )\n            ```\n\n        ![label-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/label-annotator-example-purple.png)\n        \"\"\"\n        font = cv2.FONT_HERSHEY_SIMPLEX\n        anchors_coordinates = detections.get_anchors_coordinates(\n            anchor=self.text_anchor\n        ).astype(int)\n        if labels is not None and len(labels) != len(detections):\n            raise ValueError(\n                f\"The number of labels provided ({len(labels)}) does not match the \"\n                f\"number of detections ({len(detections)}). Each detection should have \"\n                f\"a corresponding label. This discrepancy can occur if the labels and \"\n                f\"detections are not aligned or if an incorrect number of labels has \"\n                f\"been provided. Please ensure that the labels array has the same \"\n                f\"length as the Detections object.\"\n            )\n\n        for detection_idx, center_coordinates in enumerate(anchors_coordinates):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=(\n                    self.color_lookup\n                    if custom_color_lookup is None\n                    else custom_color_lookup\n                ),\n            )\n\n            if labels is not None:\n                text = labels[detection_idx]\n            elif detections[CLASS_NAME_DATA_FIELD] is not None:\n                text = detections[CLASS_NAME_DATA_FIELD][detection_idx]\n            elif detections.class_id is not None:\n                text = str(detections.class_id[detection_idx])\n            else:\n                text = str(detection_idx)\n\n            text_w, text_h = cv2.getTextSize(\n                text=text,\n                fontFace=font,\n                fontScale=self.text_scale,\n                thickness=self.text_thickness,\n            )[0]\n            text_w_padded = text_w + 2 * self.text_padding\n            text_h_padded = text_h + 2 * self.text_padding\n            text_background_xyxy = resolve_text_background_xyxy(\n                center_coordinates=tuple(center_coordinates),\n                text_wh=(text_w_padded, text_h_padded),\n                position=self.text_anchor,\n            )\n\n            text_x = text_background_xyxy[0] + self.text_padding\n            text_y = text_background_xyxy[1] + self.text_padding + text_h\n\n            self.draw_rounded_rectangle(\n                scene=scene,\n                xyxy=text_background_xyxy,\n                color=color.as_bgr(),\n                border_radius=self.border_radius,\n            )\n            cv2.putText(\n                img=scene,\n                text=text,\n                org=(text_x, text_y),\n                fontFace=font,\n                fontScale=self.text_scale,\n                color=self.text_color.as_rgb(),\n                thickness=self.text_thickness,\n                lineType=cv2.LINE_AA,\n            )\n        return scene\n\n    @staticmethod\n    def draw_rounded_rectangle(\n        scene: np.ndarray,\n        xyxy: Tuple[int, int, int, int],\n        color: Tuple[int, int, int],\n        border_radius: int,\n    ) -> np.ndarray:\n        x1, y1, x2, y2 = xyxy\n        width = x2 - x1\n        height = y2 - y1\n\n        border_radius = min(border_radius, min(width, height) // 2)\n\n        rectangle_coordinates = [\n            ((x1 + border_radius, y1), (x2 - border_radius, y2)),\n            ((x1, y1 + border_radius), (x2, y2 - border_radius)),\n        ]\n        circle_centers = [\n            (x1 + border_radius, y1 + border_radius),\n            (x2 - border_radius, y1 + border_radius),\n            (x1 + border_radius, y2 - border_radius),\n            (x2 - border_radius, y2 - border_radius),\n        ]\n\n        for coordinates in rectangle_coordinates:\n            cv2.rectangle(\n                img=scene,\n                pt1=coordinates[0],\n                pt2=coordinates[1],\n                color=color,\n                thickness=-1,\n            )\n        for center in circle_centers:\n            cv2.circle(\n                img=scene,\n                center=center,\n                radius=border_radius,\n                color=color,\n                thickness=-1,\n            )\n        return scene\n
RichLabelAnnotator

A class for annotating labels on an image using provided detections, with support for Unicode characters by using a custom font.

Source code in supervision/annotators/core.py
class RichLabelAnnotator:\n    \"\"\"\n    A class for annotating labels on an image using provided detections,\n    with support for Unicode characters by using a custom font.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        text_color: Color = Color.WHITE,\n        font_path: str = None,\n        font_size: int = 10,\n        text_padding: int = 10,\n        text_position: Position = Position.TOP_LEFT,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n        border_radius: int = 0,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating the text background.\n            text_color (Color): The color to use for the text.\n            font_path (str): Path to the font file (e.g., \".ttf\" or \".otf\") to use for\n                rendering text. If `None`, the default PIL font will be used.\n            font_size (int): Font size for the text.\n            text_padding (int): Padding around the text within its background box.\n            text_position (Position): Position of the text relative to the detection.\n                Possible values are defined in the `Position` enum.\n            color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n            border_radius (int): The radius to apply round edges. If the selected\n                value is higher than the lower dimension, width or height, is clipped.\n        \"\"\"\n        self.color = color\n        self.text_color = text_color\n        self.text_padding = text_padding\n        self.text_anchor = text_position\n        self.color_lookup = color_lookup\n        self.border_radius = border_radius\n        if font_path is not None:\n            try:\n                self.font = ImageFont.truetype(font_path, font_size)\n            except OSError:\n                print(f\"Font path '{font_path}' not found. Using PIL's default font.\")\n                self.font = ImageFont.load_default(size=font_size)\n        else:\n            self.font = ImageFont.load_default(size=font_size)\n\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        labels: List[str] = None,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with labels based on the provided\n        detections, with support for Unicode characters.\n\n        Args:\n            scene (ImageType): The image where labels will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            labels (List[str]): Optional. Custom labels for each detection.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            labels = [\n                f\"{class_name} {confidence:.2f}\"\n                for class_name, confidence\n                in zip(detections['class_name'], detections.confidence)\n            ]\n\n            rich_label_annotator = sv.RichLabelAnnotator(font_path=\"path/to/font.ttf\")\n            annotated_frame = label_annotator.annotate(\n                scene=image.copy(),\n                detections=detections,\n                labels=labels\n            )\n            ```\n\n        \"\"\"\n        if isinstance(scene, np.ndarray):\n            scene = Image.fromarray(cv2.cvtColor(scene, cv2.COLOR_BGR2RGB))\n        draw = ImageDraw.Draw(scene)\n        anchors_coordinates = detections.get_anchors_coordinates(\n            anchor=self.text_anchor\n        ).astype(int)\n        if labels is not None and len(labels) != len(detections):\n            raise ValueError(\n                f\"The number of labels provided ({len(labels)}) does not match the \"\n                f\"number of detections ({len(detections)}). Each detection should have \"\n                f\"a corresponding label. This discrepancy can occur if the labels and \"\n                f\"detections are not aligned or if an incorrect number of labels has \"\n                f\"been provided. Please ensure that the labels array has the same \"\n                f\"length as the Detections object.\"\n            )\n        for detection_idx, center_coordinates in enumerate(anchors_coordinates):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=(\n                    self.color_lookup\n                    if custom_color_lookup is None\n                    else custom_color_lookup\n                ),\n            )\n            if labels is not None:\n                text = labels[detection_idx]\n            elif detections[CLASS_NAME_DATA_FIELD] is not None:\n                text = detections[CLASS_NAME_DATA_FIELD][detection_idx]\n            elif detections.class_id is not None:\n                text = str(detections.class_id[detection_idx])\n            else:\n                text = str(detection_idx)\n\n            left, top, right, bottom = draw.textbbox((0, 0), text, font=self.font)\n            text_width = right - left\n            text_height = bottom - top\n            text_w_padded = text_width + 2 * self.text_padding\n            text_h_padded = text_height + 2 * self.text_padding\n            text_background_xyxy = resolve_text_background_xyxy(\n                center_coordinates=tuple(center_coordinates),\n                text_wh=(text_w_padded, text_h_padded),\n                position=self.text_anchor,\n            )\n\n            text_x = text_background_xyxy[0] + self.text_padding - left\n            text_y = text_background_xyxy[1] + self.text_padding - top\n\n            draw.rounded_rectangle(\n                text_background_xyxy,\n                radius=self.border_radius,\n                fill=color.as_rgb(),\n                outline=None,\n            )\n            draw.text(\n                xy=(text_x, text_y),\n                text=text,\n                font=self.font,\n                fill=self.text_color.as_rgb(),\n            )\n\n        return scene\n
BlurAnnotator

Bases: BaseAnnotator

A class for blurring regions in an image using provided detections.

Source code in supervision/annotators/core.py
class BlurAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for blurring regions in an image using provided detections.\n    \"\"\"\n\n    def __init__(self, kernel_size: int = 15):\n        \"\"\"\n        Args:\n            kernel_size (int): The size of the average pooling kernel used for blurring.\n        \"\"\"\n        self.kernel_size: int = kernel_size\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene by blurring regions based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where blurring will be applied.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            blur_annotator = sv.BlurAnnotator()\n            annotated_frame = circle_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![blur-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/blur-annotator-example-purple.png)\n        \"\"\"\n        image_height, image_width = scene.shape[:2]\n        clipped_xyxy = clip_boxes(\n            xyxy=detections.xyxy, resolution_wh=(image_width, image_height)\n        ).astype(int)\n\n        for x1, y1, x2, y2 in clipped_xyxy:\n            roi = scene[y1:y2, x1:x2]\n            roi = cv2.blur(roi, (self.kernel_size, self.kernel_size))\n            scene[y1:y2, x1:x2] = roi\n\n        return scene\n
PixelateAnnotator

Bases: BaseAnnotator

A class for pixelating regions in an image using provided detections.

Source code in supervision/annotators/core.py
class PixelateAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for pixelating regions in an image using provided detections.\n    \"\"\"\n\n    def __init__(self, pixel_size: int = 20):\n        \"\"\"\n        Args:\n            pixel_size (int): The size of the pixelation.\n        \"\"\"\n        self.pixel_size: int = pixel_size\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene by pixelating regions based on the provided\n            detections.\n\n        Args:\n            scene (ImageType): The image where pixelating will be applied.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            pixelate_annotator = sv.PixelateAnnotator()\n            annotated_frame = pixelate_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![pixelate-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/pixelate-annotator-example-10.png)\n        \"\"\"\n        image_height, image_width = scene.shape[:2]\n        clipped_xyxy = clip_boxes(\n            xyxy=detections.xyxy, resolution_wh=(image_width, image_height)\n        ).astype(int)\n\n        for x1, y1, x2, y2 in clipped_xyxy:\n            roi = scene[y1:y2, x1:x2]\n            scaled_up_roi = cv2.resize(\n                src=roi, dsize=None, fx=1 / self.pixel_size, fy=1 / self.pixel_size\n            )\n            scaled_down_roi = cv2.resize(\n                src=scaled_up_roi,\n                dsize=(roi.shape[1], roi.shape[0]),\n                interpolation=cv2.INTER_NEAREST,\n            )\n\n            scene[y1:y2, x1:x2] = scaled_down_roi\n\n        return scene\n
TraceAnnotator

A class for drawing trace paths on an image based on detection coordinates.

Warning

This annotator uses the sv.Detections.tracker_id. Read here to learn how to plug tracking into your inference pipeline.

Source code in supervision/annotators/core.py
class TraceAnnotator:\n    \"\"\"\n    A class for drawing trace paths on an image based on detection coordinates.\n\n    !!! warning\n\n        This annotator uses the `sv.Detections.tracker_id`. Read\n        [here](/latest/trackers/) to learn how to plug\n        tracking into your inference pipeline.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        position: Position = Position.CENTER,\n        trace_length: int = 30,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color to draw the trace, can be\n                a single color or a color palette.\n            position (Position): The position of the trace.\n                Defaults to `CENTER`.\n            trace_length (int): The maximum length of the trace in terms of historical\n                points. Defaults to `30`.\n            thickness (int): The thickness of the trace lines. Defaults to `2`.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.trace = Trace(max_size=trace_length, anchor=position)\n        self.thickness = thickness\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Draws trace paths on the frame based on the detection coordinates provided.\n\n        Args:\n            scene (ImageType): The image on which the traces will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): The detections which include coordinates for\n                which the traces will be drawn.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n            from ultralytics import YOLO\n\n            model = YOLO('yolov8x.pt')\n            trace_annotator = sv.TraceAnnotator()\n\n            video_info = sv.VideoInfo.from_video_path(video_path='...')\n            frames_generator = sv.get_video_frames_generator(source_path='...')\n            tracker = sv.ByteTrack()\n\n            with sv.VideoSink(target_path='...', video_info=video_info) as sink:\n               for frame in frames_generator:\n                   result = model(frame)[0]\n                   detections = sv.Detections.from_ultralytics(result)\n                   detections = tracker.update_with_detections(detections)\n                   annotated_frame = trace_annotator.annotate(\n                       scene=frame.copy(),\n                       detections=detections)\n                   sink.write_frame(frame=annotated_frame)\n            ```\n\n        ![trace-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/trace-annotator-example-purple.png)\n        \"\"\"\n        self.trace.put(detections)\n\n        for detection_idx in range(len(detections)):\n            tracker_id = int(detections.tracker_id[detection_idx])\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            xy = self.trace.get(tracker_id=tracker_id)\n            if len(xy) > 1:\n                scene = cv2.polylines(\n                    scene,\n                    [xy.astype(np.int32)],\n                    False,\n                    color=color.as_bgr(),\n                    thickness=self.thickness,\n                )\n        return scene\n
CropAnnotator

Bases: BaseAnnotator

A class for drawing scaled up crops of detections on the scene.

Source code in supervision/annotators/core.py
class CropAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing scaled up crops of detections on the scene.\n    \"\"\"\n\n    def __init__(\n        self,\n        position: Position = Position.TOP_CENTER,\n        scale_factor: int = 2,\n        border_color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        border_thickness: int = 2,\n        border_color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            position (Position): The anchor position for placing the cropped and scaled\n                part of the detection in the scene.\n            scale_factor (int): The factor by which to scale the cropped image part. A\n                factor of 2, for example, would double the size of the cropped area,\n                allowing for a closer view of the detection.\n            border_color (Union[Color, ColorPalette]): The color or color palette to\n                use for annotating border around the cropped area.\n            border_thickness (int): The thickness of the border around the cropped area.\n            border_color_lookup (ColorLookup): Strategy for mapping colors to\n                annotations. Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.position: Position = position\n        self.scale_factor: int = scale_factor\n        self.border_color: Union[Color, ColorPalette] = border_color\n        self.border_thickness: int = border_thickness\n        self.border_color_lookup: ColorLookup = border_color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the provided scene with scaled and cropped parts of the image based\n        on the provided detections. Each detection is cropped from the original scene\n        and scaled according to the annotator's scale factor before being placed back\n        onto the scene at the specified position.\n\n\n        Args:\n            scene (ImageType): The image where cropped detection will be placed.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            crop_annotator = sv.CropAnnotator()\n            annotated_frame = crop_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n        \"\"\"\n        crops = [\n            crop_image(image=scene, xyxy=xyxy) for xyxy in detections.xyxy.astype(int)\n        ]\n        resized_crops = [\n            scale_image(image=crop, scale_factor=self.scale_factor) for crop in crops\n        ]\n        anchors = detections.get_anchors_coordinates(anchor=self.position).astype(int)\n\n        for idx, (resized_crop, anchor) in enumerate(zip(resized_crops, anchors)):\n            crop_wh = resized_crop.shape[1], resized_crop.shape[0]\n            (x1, y1), (x2, y2) = self.calculate_crop_coordinates(\n                anchor=anchor, crop_wh=crop_wh, position=self.position\n            )\n            scene = overlay_image(\n                scene=scene, inserted_image=resized_crop, anchor=(x1, y1)\n            )\n            color = resolve_color(\n                color=self.border_color,\n                detections=detections,\n                detection_idx=idx,\n                color_lookup=self.border_color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            cv2.rectangle(\n                img=scene,\n                pt1=(x1, y1),\n                pt2=(x2, y2),\n                color=color.as_bgr(),\n                thickness=self.border_thickness,\n            )\n\n        return scene\n\n    @staticmethod\n    def calculate_crop_coordinates(\n        anchor: Tuple[int, int], crop_wh: Tuple[int, int], position: Position\n    ) -> Tuple[Tuple[int, int], Tuple[int, int]]:\n        anchor_x, anchor_y = anchor\n        width, height = crop_wh\n\n        if position == Position.TOP_LEFT:\n            return (anchor_x - width, anchor_y - height), (anchor_x, anchor_y)\n        elif position == Position.TOP_CENTER:\n            return (\n                (anchor_x - width // 2, anchor_y - height),\n                (anchor_x + width // 2, anchor_y),\n            )\n        elif position == Position.TOP_RIGHT:\n            return (anchor_x, anchor_y - height), (anchor_x + width, anchor_y)\n        elif position == Position.CENTER_LEFT:\n            return (\n                (anchor_x - width, anchor_y - height // 2),\n                (anchor_x, anchor_y + height // 2),\n            )\n        elif position == Position.CENTER or position == Position.CENTER_OF_MASS:\n            return (\n                (anchor_x - width // 2, anchor_y - height // 2),\n                (anchor_x + width // 2, anchor_y + height // 2),\n            )\n        elif position == Position.CENTER_RIGHT:\n            return (\n                (anchor_x, anchor_y - height // 2),\n                (anchor_x + width, anchor_y + height // 2),\n            )\n        elif position == Position.BOTTOM_LEFT:\n            return (anchor_x - width, anchor_y), (anchor_x, anchor_y + height)\n        elif position == Position.BOTTOM_CENTER:\n            return (\n                (anchor_x - width // 2, anchor_y),\n                (anchor_x + width // 2, anchor_y + height),\n            )\n        elif position == Position.BOTTOM_RIGHT:\n            return (anchor_x, anchor_y), (anchor_x + width, anchor_y + height)\n
ColorLookup

Bases: Enum

Enumeration class to define strategies for mapping colors to annotations.

This enum supports three different lookup strategies
  • INDEX: Colors are determined by the index of the detection within the scene.
  • CLASS: Colors are determined by the class label of the detected object.
  • TRACK: Colors are determined by the tracking identifier of the object.
Source code in supervision/annotators/utils.py
class ColorLookup(Enum):\n    \"\"\"\n    Enumeration class to define strategies for mapping colors to annotations.\n\n    This enum supports three different lookup strategies:\n        - `INDEX`: Colors are determined by the index of the detection within the scene.\n        - `CLASS`: Colors are determined by the class label of the detected object.\n        - `TRACK`: Colors are determined by the tracking identifier of the object.\n    \"\"\"\n\n    INDEX = \"index\"\n    CLASS = \"class\"\n    TRACK = \"track\"\n\n    @classmethod\n    def list(cls):\n        return list(map(lambda c: c.value, cls))\n
"},{"location":"detection/annotators/#supervision.annotators.core.BoundingBoxAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.BoundingBoxAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the bounding box lines.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the bounding box lines.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.BoundingBoxAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with bounding boxes based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where bounding boxes will be drawn. ImageType

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nannotated_frame = bounding_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with bounding boxes based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where bounding boxes will be drawn. `ImageType`\n        is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        bounding_box_annotator = sv.BoundingBoxAnnotator()\n        annotated_frame = bounding_box_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![bounding-box-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/bounding-box-annotator-example-purple.png)\n    \"\"\"\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        cv2.rectangle(\n            img=scene,\n            pt1=(x1, y1),\n            pt2=(x2, y2),\n            color=color.as_bgr(),\n            thickness=self.thickness,\n        )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.RoundBoxAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.RoundBoxAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, color_lookup=ColorLookup.CLASS, roundness=0.6)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the bounding box lines.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS roundness float

Percent of roundness for edges of bounding box. Value must be float 0 < roundness <= 1.0 By default roundness percent is calculated based on smaller side length (width or height).

0.6 Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n    roundness: float = 0.6,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the bounding box lines.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n        roundness (float): Percent of roundness for edges of bounding box.\n            Value must be float 0 < roundness <= 1.0\n            By default roundness percent is calculated based on smaller side\n            length (width or height).\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.color_lookup: ColorLookup = color_lookup\n    if not 0 < roundness <= 1.0:\n        raise ValueError(\"roundness attribute must be float between (0, 1.0]\")\n    self.roundness: float = roundness\n
"},{"location":"detection/annotators/#supervision.annotators.core.RoundBoxAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with bounding boxes with rounded edges based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where rounded bounding boxes will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nround_box_annotator = sv.RoundBoxAnnotator()\nannotated_frame = round_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with bounding boxes with rounded edges\n    based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where rounded bounding boxes will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        round_box_annotator = sv.RoundBoxAnnotator()\n        annotated_frame = round_box_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![round-box-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/round-box-annotator-example-purple.png)\n    \"\"\"\n\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n\n        radius = (\n            int((x2 - x1) // 2 * self.roundness)\n            if abs(x1 - x2) < abs(y1 - y2)\n            else int((y2 - y1) // 2 * self.roundness)\n        )\n\n        circle_coordinates = [\n            ((x1 + radius), (y1 + radius)),\n            ((x2 - radius), (y1 + radius)),\n            ((x2 - radius), (y2 - radius)),\n            ((x1 + radius), (y2 - radius)),\n        ]\n\n        line_coordinates = [\n            ((x1 + radius, y1), (x2 - radius, y1)),\n            ((x2, y1 + radius), (x2, y2 - radius)),\n            ((x1 + radius, y2), (x2 - radius, y2)),\n            ((x1, y1 + radius), (x1, y2 - radius)),\n        ]\n\n        start_angles = (180, 270, 0, 90)\n        end_angles = (270, 360, 90, 180)\n\n        for center_coordinates, line, start_angle, end_angle in zip(\n            circle_coordinates, line_coordinates, start_angles, end_angles\n        ):\n            cv2.ellipse(\n                img=scene,\n                center=center_coordinates,\n                axes=(radius, radius),\n                angle=0,\n                startAngle=start_angle,\n                endAngle=end_angle,\n                color=color.as_bgr(),\n                thickness=self.thickness,\n            )\n\n            cv2.line(\n                img=scene,\n                pt1=line[0],\n                pt2=line[1],\n                color=color.as_bgr(),\n                thickness=self.thickness,\n            )\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.BoxCornerAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.BoxCornerAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=4, corner_length=15, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the corner lines.

4 corner_length int

Length of each corner line.

15 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 4,\n    corner_length: int = 15,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the corner lines.\n        corner_length (int): Length of each corner line.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.corner_length: int = corner_length\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.BoxCornerAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with box corners based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where box corners will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncorner_annotator = sv.BoxCornerAnnotator()\nannotated_frame = corner_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with box corners based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where box corners will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        corner_annotator = sv.BoxCornerAnnotator()\n        annotated_frame = corner_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![box-corner-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/box-corner-annotator-example-purple.png)\n    \"\"\"\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        corners = [(x1, y1), (x2, y1), (x1, y2), (x2, y2)]\n\n        for x, y in corners:\n            x_end = x + self.corner_length if x == x1 else x - self.corner_length\n            cv2.line(\n                scene, (x, y), (x_end, y), color.as_bgr(), thickness=self.thickness\n            )\n\n            y_end = y + self.corner_length if y == y1 else y - self.corner_length\n            cv2.line(\n                scene, (x, y), (x, y_end), color.as_bgr(), thickness=self.thickness\n            )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.OrientedBoxAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.OrientedBoxAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the bounding box lines.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the bounding box lines.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.OrientedBoxAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with oriented bounding boxes based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where bounding boxes will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO(\"yolov8n-obb.pt\")\n\nresult = model(image)[0]\ndetections = sv.Detections.from_ultralytics(result)\n\noriented_box_annotator = sv.OrientedBoxAnnotator()\nannotated_frame = oriented_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with oriented bounding boxes based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where bounding boxes will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO(\"yolov8n-obb.pt\")\n\n        result = model(image)[0]\n        detections = sv.Detections.from_ultralytics(result)\n\n        oriented_box_annotator = sv.OrientedBoxAnnotator()\n        annotated_frame = oriented_box_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n    \"\"\"  # noqa E501 // docs\n\n    if detections.data is None or ORIENTED_BOX_COORDINATES not in detections.data:\n        return scene\n\n    for detection_idx in range(len(detections)):\n        bbox = np.intp(detections.data.get(ORIENTED_BOX_COORDINATES)[detection_idx])\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n\n        cv2.drawContours(scene, [bbox], 0, color.as_bgr(), self.thickness)\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.ColorAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.ColorAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, opacity=0.5, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT opacity float

Opacity of the overlay mask. Must be between 0 and 1.

0.5 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    opacity: float = 0.5,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.color_lookup: ColorLookup = color_lookup\n    self.opacity = opacity\n
"},{"location":"detection/annotators/#supervision.annotators.core.ColorAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with box masks based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where bounding boxes will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncolor_annotator = sv.ColorAnnotator()\nannotated_frame = color_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with box masks based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where bounding boxes will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        color_annotator = sv.ColorAnnotator()\n        annotated_frame = color_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![box-mask-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/box-mask-annotator-example-purple.png)\n    \"\"\"\n    mask_image = scene.copy()\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        cv2.rectangle(\n            img=scene,\n            pt1=(x1, y1),\n            pt2=(x2, y2),\n            color=color.as_bgr(),\n            thickness=-1,\n        )\n    scene = cv2.addWeighted(\n        scene, self.opacity, mask_image, 1 - self.opacity, gamma=0\n    )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.CircleAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.CircleAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the circle line.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the circle line.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.CircleAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with circles based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where box corners will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncircle_annotator = sv.CircleAnnotator()\nannotated_frame = circle_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with circles based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where box corners will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        circle_annotator = sv.CircleAnnotator()\n        annotated_frame = circle_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n\n    ![circle-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/circle-annotator-example-purple.png)\n    \"\"\"\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        center = ((x1 + x2) // 2, (y1 + y2) // 2)\n        distance = sqrt((x1 - center[0]) ** 2 + (y1 - center[1]) ** 2)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        cv2.circle(\n            img=scene,\n            center=center,\n            radius=int(distance),\n            color=color.as_bgr(),\n            thickness=self.thickness,\n        )\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.DotAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.DotAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, radius=4, position=Position.CENTER, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT radius int

Radius of the drawn dots.

4 position Position

The anchor position for placing the dot.

CENTER color_lookup ColorLookup

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    radius: int = 4,\n    position: Position = Position.CENTER,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        radius (int): Radius of the drawn dots.\n        position (Position): The anchor position for placing the dot.\n        color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.radius: int = radius\n    self.position: Position = position\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.DotAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with dots based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where dots will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ndot_annotator = sv.DotAnnotator()\nannotated_frame = dot_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with dots based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where dots will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        dot_annotator = sv.DotAnnotator()\n        annotated_frame = dot_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![dot-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/dot-annotator-example-purple.png)\n    \"\"\"\n    xy = detections.get_anchors_coordinates(anchor=self.position)\n    for detection_idx in range(len(detections)):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        center = (int(xy[detection_idx, 0]), int(xy[detection_idx, 1]))\n        cv2.circle(scene, center, self.radius, color.as_bgr(), -1)\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.TriangleAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.TriangleAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, base=10, height=10, position=Position.TOP_CENTER, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT base int

The base width of the triangle.

10 height int

The height of the triangle.

10 position Position

The anchor position for placing the triangle.

TOP_CENTER color_lookup ColorLookup

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    base: int = 10,\n    height: int = 10,\n    position: Position = Position.TOP_CENTER,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        base (int): The base width of the triangle.\n        height (int): The height of the triangle.\n        position (Position): The anchor position for placing the triangle.\n        color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.base: int = base\n    self.height: int = height\n    self.position: Position = position\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.TriangleAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with triangles based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where triangles will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ntriangle_annotator = sv.TriangleAnnotator()\nannotated_frame = triangle_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with triangles based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where triangles will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        triangle_annotator = sv.TriangleAnnotator()\n        annotated_frame = triangle_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![triangle-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/triangle-annotator-example.png)\n    \"\"\"\n    xy = detections.get_anchors_coordinates(anchor=self.position)\n    for detection_idx in range(len(detections)):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        tip_x, tip_y = int(xy[detection_idx, 0]), int(xy[detection_idx, 1])\n        vertices = np.array(\n            [\n                [tip_x - self.base // 2, tip_y - self.height],\n                [tip_x + self.base // 2, tip_y - self.height],\n                [tip_x, tip_y],\n            ],\n            np.int32,\n        )\n\n        cv2.fillPoly(scene, [vertices], color.as_bgr())\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.EllipseAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.EllipseAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, start_angle=-45, end_angle=235, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the ellipse lines.

2 start_angle int

Starting angle of the ellipse.

-45 end_angle int

Ending angle of the ellipse.

235 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    start_angle: int = -45,\n    end_angle: int = 235,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the ellipse lines.\n        start_angle (int): Starting angle of the ellipse.\n        end_angle (int): Ending angle of the ellipse.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.start_angle: int = start_angle\n    self.end_angle: int = end_angle\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.EllipseAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with ellipses based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where ellipses will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nellipse_annotator = sv.EllipseAnnotator()\nannotated_frame = ellipse_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with ellipses based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where ellipses will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        ellipse_annotator = sv.EllipseAnnotator()\n        annotated_frame = ellipse_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![ellipse-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/ellipse-annotator-example-purple.png)\n    \"\"\"\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        center = (int((x1 + x2) / 2), y2)\n        width = x2 - x1\n        cv2.ellipse(\n            scene,\n            center=center,\n            axes=(int(width), int(0.35 * width)),\n            angle=0.0,\n            startAngle=self.start_angle,\n            endAngle=self.end_angle,\n            color=color.as_bgr(),\n            thickness=self.thickness,\n            lineType=cv2.LINE_4,\n        )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.HaloAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.HaloAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, opacity=0.8, kernel_size=40, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT opacity float

Opacity of the overlay mask. Must be between 0 and 1.

0.8 kernel_size int

The size of the average pooling kernel used for creating the halo.

40 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    opacity: float = 0.8,\n    kernel_size: int = 40,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n        kernel_size (int): The size of the average pooling kernel used for creating\n            the halo.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.opacity = opacity\n    self.color_lookup: ColorLookup = color_lookup\n    self.kernel_size: int = kernel_size\n
"},{"location":"detection/annotators/#supervision.annotators.core.HaloAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with halos based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where masks will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nhalo_annotator = sv.HaloAnnotator()\nannotated_frame = halo_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with halos based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where masks will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        halo_annotator = sv.HaloAnnotator()\n        annotated_frame = halo_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![halo-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/halo-annotator-example-purple.png)\n    \"\"\"\n    if detections.mask is None:\n        return scene\n    colored_mask = np.zeros_like(scene, dtype=np.uint8)\n    fmask = np.array([False] * scene.shape[0] * scene.shape[1]).reshape(\n        scene.shape[0], scene.shape[1]\n    )\n\n    for detection_idx in np.flip(np.argsort(detections.area)):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        mask = detections.mask[detection_idx]\n        fmask = np.logical_or(fmask, mask)\n        color_bgr = color.as_bgr()\n        colored_mask[mask] = color_bgr\n\n    colored_mask = cv2.blur(colored_mask, (self.kernel_size, self.kernel_size))\n    colored_mask[fmask] = [0, 0, 0]\n    gray = cv2.cvtColor(colored_mask, cv2.COLOR_BGR2GRAY)\n    alpha = self.opacity * gray / gray.max()\n    alpha_mask = alpha[:, :, np.newaxis]\n    scene = np.uint8(scene * (1 - alpha_mask) + colored_mask * self.opacity)\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.PercentageBarAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.PercentageBarAnnotator.__init__","title":"__init__(height=16, width=80, color=ColorPalette.DEFAULT, border_color=Color.BLACK, position=Position.TOP_CENTER, color_lookup=ColorLookup.CLASS, border_thickness=None)","text":"

Parameters:

Name Type Description Default height int

The height in pixels of the percentage bar.

16 width int

The width in pixels of the percentage bar.

80 color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT border_color Color

The color of the border lines.

BLACK position Position

The anchor position of drawing the percentage bar.

TOP_CENTER color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS border_thickness int

The thickness of the border lines.

None Source code in supervision/annotators/core.py
def __init__(\n    self,\n    height: int = 16,\n    width: int = 80,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    border_color: Color = Color.BLACK,\n    position: Position = Position.TOP_CENTER,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n    border_thickness: int = None,\n):\n    \"\"\"\n    Args:\n        height (int): The height in pixels of the percentage bar.\n        width (int): The width in pixels of the percentage bar.\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        border_color (Color): The color of the border lines.\n        position (Position): The anchor position of drawing the percentage bar.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n        border_thickness (int): The thickness of the border lines.\n    \"\"\"\n    self.height: int = height\n    self.width: int = width\n    self.color: Union[Color, ColorPalette] = color\n    self.border_color: Color = border_color\n    self.position: Position = position\n    self.color_lookup: ColorLookup = color_lookup\n\n    if border_thickness is None:\n        self.border_thickness = int(0.15 * self.height)\n
"},{"location":"detection/annotators/#supervision.annotators.core.PercentageBarAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None, custom_values=None)","text":"

Annotates the given scene with percentage bars based on the provided detections. The percentage bars visually represent the confidence or custom values associated with each detection.

Parameters:

Name Type Description Default scene ImageType

The image where percentage bars will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None custom_values Optional[ndarray]

Custom values array to use instead of the default detection confidences. This array should have the same length as the number of detections and contain a value between 0 and 1 (inclusive) for each detection, representing the percentage to be displayed.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npercentage_bar_annotator = sv.PercentageBarAnnotator()\nannotated_frame = percentage_bar_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n    custom_values: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with percentage bars based on the provided\n    detections. The percentage bars visually represent the confidence or custom\n    values associated with each detection.\n\n    Args:\n        scene (ImageType): The image where percentage bars will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n        custom_values (Optional[np.ndarray]): Custom values array to use instead\n            of the default detection confidences. This array should have the\n            same length as the number of detections and contain a value between\n            0 and 1 (inclusive) for each detection, representing the percentage\n            to be displayed.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        percentage_bar_annotator = sv.PercentageBarAnnotator()\n        annotated_frame = percentage_bar_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![percentage-bar-example](https://media.roboflow.com/\n    supervision-annotator-examples/percentage-bar-annotator-example-purple.png)\n    \"\"\"\n    self.validate_custom_values(\n        custom_values=custom_values, detections_count=len(detections)\n    )\n    anchors = detections.get_anchors_coordinates(anchor=self.position)\n    for detection_idx in range(len(detections)):\n        anchor = anchors[detection_idx]\n        border_coordinates = self.calculate_border_coordinates(\n            anchor_xy=(int(anchor[0]), int(anchor[1])),\n            border_wh=(self.width, self.height),\n            position=self.position,\n        )\n        border_width = border_coordinates[1][0] - border_coordinates[0][0]\n\n        value = (\n            custom_values[detection_idx]\n            if custom_values is not None\n            else detections.confidence[detection_idx]\n        )\n\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        cv2.rectangle(\n            img=scene,\n            pt1=border_coordinates[0],\n            pt2=(\n                border_coordinates[0][0] + int(border_width * value),\n                border_coordinates[1][1],\n            ),\n            color=color.as_bgr(),\n            thickness=-1,\n        )\n        cv2.rectangle(\n            img=scene,\n            pt1=border_coordinates[0],\n            pt2=border_coordinates[1],\n            color=self.border_color.as_bgr(),\n            thickness=self.border_thickness,\n        )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.HeatMapAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.HeatMapAnnotator.__init__","title":"__init__(position=Position.BOTTOM_CENTER, opacity=0.2, radius=40, kernel_size=25, top_hue=0, low_hue=125)","text":"

Parameters:

Name Type Description Default position Position

The position of the heatmap. Defaults to BOTTOM_CENTER.

BOTTOM_CENTER opacity float

Opacity of the overlay mask, between 0 and 1.

0.2 radius int

Radius of the heat circle.

40 kernel_size int

Kernel size for blurring the heatmap.

25 top_hue int

Hue at the top of the heatmap. Defaults to 0 (red).

0 low_hue int

Hue at the bottom of the heatmap. Defaults to 125 (blue).

125 Source code in supervision/annotators/core.py
def __init__(\n    self,\n    position: Position = Position.BOTTOM_CENTER,\n    opacity: float = 0.2,\n    radius: int = 40,\n    kernel_size: int = 25,\n    top_hue: int = 0,\n    low_hue: int = 125,\n):\n    \"\"\"\n    Args:\n        position (Position): The position of the heatmap. Defaults to\n            `BOTTOM_CENTER`.\n        opacity (float): Opacity of the overlay mask, between 0 and 1.\n        radius (int): Radius of the heat circle.\n        kernel_size (int): Kernel size for blurring the heatmap.\n        top_hue (int): Hue at the top of the heatmap. Defaults to 0 (red).\n        low_hue (int): Hue at the bottom of the heatmap. Defaults to 125 (blue).\n    \"\"\"\n    self.position = position\n    self.opacity = opacity\n    self.radius = radius\n    self.kernel_size = kernel_size\n    self.heat_mask = None\n    self.top_hue = top_hue\n    self.low_hue = low_hue\n
"},{"location":"detection/annotators/#supervision.annotators.core.HeatMapAnnotator.annotate","title":"annotate(scene, detections)","text":"

Annotates the scene with a heatmap based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where the heatmap will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO('yolov8x.pt')\n\nheat_map_annotator = sv.HeatMapAnnotator()\n\nvideo_info = sv.VideoInfo.from_video_path(video_path='...')\nframes_generator = get_video_frames_generator(source_path='...')\n\nwith sv.VideoSink(target_path='...', video_info=video_info) as sink:\n   for frame in frames_generator:\n       result = model(frame)[0]\n       detections = sv.Detections.from_ultralytics(result)\n       annotated_frame = heat_map_annotator.annotate(\n           scene=frame.copy(),\n           detections=detections)\n       sink.write_frame(frame=annotated_frame)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(self, scene: ImageType, detections: Detections) -> ImageType:\n    \"\"\"\n    Annotates the scene with a heatmap based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where the heatmap will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO('yolov8x.pt')\n\n        heat_map_annotator = sv.HeatMapAnnotator()\n\n        video_info = sv.VideoInfo.from_video_path(video_path='...')\n        frames_generator = get_video_frames_generator(source_path='...')\n\n        with sv.VideoSink(target_path='...', video_info=video_info) as sink:\n           for frame in frames_generator:\n               result = model(frame)[0]\n               detections = sv.Detections.from_ultralytics(result)\n               annotated_frame = heat_map_annotator.annotate(\n                   scene=frame.copy(),\n                   detections=detections)\n               sink.write_frame(frame=annotated_frame)\n        ```\n\n    ![heatmap-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/heat-map-annotator-example-purple.png)\n    \"\"\"\n\n    if self.heat_mask is None:\n        self.heat_mask = np.zeros(scene.shape[:2])\n    mask = np.zeros(scene.shape[:2])\n    for xy in detections.get_anchors_coordinates(self.position):\n        cv2.circle(mask, (int(xy[0]), int(xy[1])), self.radius, 1, -1)\n    self.heat_mask = mask + self.heat_mask\n    temp = self.heat_mask.copy()\n    temp = self.low_hue - temp / temp.max() * (self.low_hue - self.top_hue)\n    temp = temp.astype(np.uint8)\n    if self.kernel_size is not None:\n        temp = cv2.blur(temp, (self.kernel_size, self.kernel_size))\n    hsv = np.zeros(scene.shape)\n    hsv[..., 0] = temp\n    hsv[..., 1] = 255\n    hsv[..., 2] = 255\n    temp = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)\n    mask = cv2.cvtColor(self.heat_mask.astype(np.uint8), cv2.COLOR_GRAY2BGR) > 0\n    scene[mask] = cv2.addWeighted(temp, self.opacity, scene, 1 - self.opacity, 0)[\n        mask\n    ]\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.MaskAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.MaskAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, opacity=0.5, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT opacity float

Opacity of the overlay mask. Must be between 0 and 1.

0.5 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    opacity: float = 0.5,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.opacity = opacity\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.MaskAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with masks based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where masks will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nmask_annotator = sv.MaskAnnotator()\nannotated_frame = mask_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with masks based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where masks will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        mask_annotator = sv.MaskAnnotator()\n        annotated_frame = mask_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![mask-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/mask-annotator-example-purple.png)\n    \"\"\"\n    if detections.mask is None:\n        return scene\n\n    colored_mask = np.array(scene, copy=True, dtype=np.uint8)\n\n    for detection_idx in np.flip(np.argsort(detections.area)):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        mask = detections.mask[detection_idx]\n        colored_mask[mask] = color.as_bgr()\n\n    scene = cv2.addWeighted(colored_mask, self.opacity, scene, 1 - self.opacity, 0)\n    return scene.astype(np.uint8)\n
"},{"location":"detection/annotators/#supervision.annotators.core.PolygonAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.PolygonAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the polygon lines.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the polygon lines.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.PolygonAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with polygons based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where polygons will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npolygon_annotator = sv.PolygonAnnotator()\nannotated_frame = polygon_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with polygons based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where polygons will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        polygon_annotator = sv.PolygonAnnotator()\n        annotated_frame = polygon_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![polygon-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/polygon-annotator-example-purple.png)\n    \"\"\"\n    if detections.mask is None:\n        return scene\n\n    for detection_idx in range(len(detections)):\n        mask = detections.mask[detection_idx]\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        for polygon in mask_to_polygons(mask=mask):\n            scene = draw_polygon(\n                scene=scene,\n                polygon=polygon,\n                color=color,\n                thickness=self.thickness,\n            )\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.LabelAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.LabelAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, text_color=Color.WHITE, text_scale=0.5, text_thickness=1, text_padding=10, text_position=Position.TOP_LEFT, color_lookup=ColorLookup.CLASS, border_radius=0)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating the text background.

DEFAULT text_color Color

The color to use for the text.

WHITE text_scale float

Font scale for the text.

0.5 text_thickness int

Thickness of the text characters.

1 text_padding int

Padding around the text within its background box.

10 text_position Position

Position of the text relative to the detection. Possible values are defined in the Position enum.

TOP_LEFT color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS border_radius int

The radius to apply round edges. If the selected value is higher than the lower dimension, width or height, is clipped.

0 Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    text_color: Color = Color.WHITE,\n    text_scale: float = 0.5,\n    text_thickness: int = 1,\n    text_padding: int = 10,\n    text_position: Position = Position.TOP_LEFT,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n    border_radius: int = 0,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating the text background.\n        text_color (Color): The color to use for the text.\n        text_scale (float): Font scale for the text.\n        text_thickness (int): Thickness of the text characters.\n        text_padding (int): Padding around the text within its background box.\n        text_position (Position): Position of the text relative to the detection.\n            Possible values are defined in the `Position` enum.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n        border_radius (int): The radius to apply round edges. If the selected\n            value is higher than the lower dimension, width or height, is clipped.\n    \"\"\"\n    self.border_radius: int = border_radius\n    self.color: Union[Color, ColorPalette] = color\n    self.text_color: Color = text_color\n    self.text_scale: float = text_scale\n    self.text_thickness: int = text_thickness\n    self.text_padding: int = text_padding\n    self.text_anchor: Position = text_position\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.LabelAnnotator.annotate","title":"annotate(scene, detections, labels=None, custom_color_lookup=None)","text":"

Annotates the given scene with labels based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where labels will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required labels List[str]

Optional. Custom labels for each detection.

None custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
 import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)\nannotated_frame = label_annotator.annotate(\n    scene=image.copy(),\n    detections=detections,\n    labels=labels\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    labels: List[str] = None,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with labels based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where labels will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        labels (List[str]): Optional. Custom labels for each detection.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n         import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        labels = [\n            f\"{class_name} {confidence:.2f}\"\n            for class_name, confidence\n            in zip(detections['class_name'], detections.confidence)\n        ]\n\n        label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)\n        annotated_frame = label_annotator.annotate(\n            scene=image.copy(),\n            detections=detections,\n            labels=labels\n        )\n        ```\n\n    ![label-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/label-annotator-example-purple.png)\n    \"\"\"\n    font = cv2.FONT_HERSHEY_SIMPLEX\n    anchors_coordinates = detections.get_anchors_coordinates(\n        anchor=self.text_anchor\n    ).astype(int)\n    if labels is not None and len(labels) != len(detections):\n        raise ValueError(\n            f\"The number of labels provided ({len(labels)}) does not match the \"\n            f\"number of detections ({len(detections)}). Each detection should have \"\n            f\"a corresponding label. This discrepancy can occur if the labels and \"\n            f\"detections are not aligned or if an incorrect number of labels has \"\n            f\"been provided. Please ensure that the labels array has the same \"\n            f\"length as the Detections object.\"\n        )\n\n    for detection_idx, center_coordinates in enumerate(anchors_coordinates):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=(\n                self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup\n            ),\n        )\n\n        if labels is not None:\n            text = labels[detection_idx]\n        elif detections[CLASS_NAME_DATA_FIELD] is not None:\n            text = detections[CLASS_NAME_DATA_FIELD][detection_idx]\n        elif detections.class_id is not None:\n            text = str(detections.class_id[detection_idx])\n        else:\n            text = str(detection_idx)\n\n        text_w, text_h = cv2.getTextSize(\n            text=text,\n            fontFace=font,\n            fontScale=self.text_scale,\n            thickness=self.text_thickness,\n        )[0]\n        text_w_padded = text_w + 2 * self.text_padding\n        text_h_padded = text_h + 2 * self.text_padding\n        text_background_xyxy = resolve_text_background_xyxy(\n            center_coordinates=tuple(center_coordinates),\n            text_wh=(text_w_padded, text_h_padded),\n            position=self.text_anchor,\n        )\n\n        text_x = text_background_xyxy[0] + self.text_padding\n        text_y = text_background_xyxy[1] + self.text_padding + text_h\n\n        self.draw_rounded_rectangle(\n            scene=scene,\n            xyxy=text_background_xyxy,\n            color=color.as_bgr(),\n            border_radius=self.border_radius,\n        )\n        cv2.putText(\n            img=scene,\n            text=text,\n            org=(text_x, text_y),\n            fontFace=font,\n            fontScale=self.text_scale,\n            color=self.text_color.as_rgb(),\n            thickness=self.text_thickness,\n            lineType=cv2.LINE_AA,\n        )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.RichLabelAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.RichLabelAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, text_color=Color.WHITE, font_path=None, font_size=10, text_padding=10, text_position=Position.TOP_LEFT, color_lookup=ColorLookup.CLASS, border_radius=0)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating the text background.

DEFAULT text_color Color

The color to use for the text.

WHITE font_path str

Path to the font file (e.g., \".ttf\" or \".otf\") to use for rendering text. If None, the default PIL font will be used.

None font_size int

Font size for the text.

10 text_padding int

Padding around the text within its background box.

10 text_position Position

Position of the text relative to the detection. Possible values are defined in the Position enum.

TOP_LEFT color_lookup ColorLookup

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS border_radius int

The radius to apply round edges. If the selected value is higher than the lower dimension, width or height, is clipped.

0 Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    text_color: Color = Color.WHITE,\n    font_path: str = None,\n    font_size: int = 10,\n    text_padding: int = 10,\n    text_position: Position = Position.TOP_LEFT,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n    border_radius: int = 0,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating the text background.\n        text_color (Color): The color to use for the text.\n        font_path (str): Path to the font file (e.g., \".ttf\" or \".otf\") to use for\n            rendering text. If `None`, the default PIL font will be used.\n        font_size (int): Font size for the text.\n        text_padding (int): Padding around the text within its background box.\n        text_position (Position): Position of the text relative to the detection.\n            Possible values are defined in the `Position` enum.\n        color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n        border_radius (int): The radius to apply round edges. If the selected\n            value is higher than the lower dimension, width or height, is clipped.\n    \"\"\"\n    self.color = color\n    self.text_color = text_color\n    self.text_padding = text_padding\n    self.text_anchor = text_position\n    self.color_lookup = color_lookup\n    self.border_radius = border_radius\n    if font_path is not None:\n        try:\n            self.font = ImageFont.truetype(font_path, font_size)\n        except OSError:\n            print(f\"Font path '{font_path}' not found. Using PIL's default font.\")\n            self.font = ImageFont.load_default(size=font_size)\n    else:\n        self.font = ImageFont.load_default(size=font_size)\n
"},{"location":"detection/annotators/#supervision.annotators.core.RichLabelAnnotator.annotate","title":"annotate(scene, detections, labels=None, custom_color_lookup=None)","text":"

Annotates the given scene with labels based on the provided detections, with support for Unicode characters.

Parameters:

Name Type Description Default scene ImageType

The image where labels will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required labels List[str]

Optional. Custom labels for each detection.

None custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nrich_label_annotator = sv.RichLabelAnnotator(font_path=\"path/to/font.ttf\")\nannotated_frame = label_annotator.annotate(\n    scene=image.copy(),\n    detections=detections,\n    labels=labels\n)\n
Source code in supervision/annotators/core.py
def annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    labels: List[str] = None,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with labels based on the provided\n    detections, with support for Unicode characters.\n\n    Args:\n        scene (ImageType): The image where labels will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        labels (List[str]): Optional. Custom labels for each detection.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        labels = [\n            f\"{class_name} {confidence:.2f}\"\n            for class_name, confidence\n            in zip(detections['class_name'], detections.confidence)\n        ]\n\n        rich_label_annotator = sv.RichLabelAnnotator(font_path=\"path/to/font.ttf\")\n        annotated_frame = label_annotator.annotate(\n            scene=image.copy(),\n            detections=detections,\n            labels=labels\n        )\n        ```\n\n    \"\"\"\n    if isinstance(scene, np.ndarray):\n        scene = Image.fromarray(cv2.cvtColor(scene, cv2.COLOR_BGR2RGB))\n    draw = ImageDraw.Draw(scene)\n    anchors_coordinates = detections.get_anchors_coordinates(\n        anchor=self.text_anchor\n    ).astype(int)\n    if labels is not None and len(labels) != len(detections):\n        raise ValueError(\n            f\"The number of labels provided ({len(labels)}) does not match the \"\n            f\"number of detections ({len(detections)}). Each detection should have \"\n            f\"a corresponding label. This discrepancy can occur if the labels and \"\n            f\"detections are not aligned or if an incorrect number of labels has \"\n            f\"been provided. Please ensure that the labels array has the same \"\n            f\"length as the Detections object.\"\n        )\n    for detection_idx, center_coordinates in enumerate(anchors_coordinates):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=(\n                self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup\n            ),\n        )\n        if labels is not None:\n            text = labels[detection_idx]\n        elif detections[CLASS_NAME_DATA_FIELD] is not None:\n            text = detections[CLASS_NAME_DATA_FIELD][detection_idx]\n        elif detections.class_id is not None:\n            text = str(detections.class_id[detection_idx])\n        else:\n            text = str(detection_idx)\n\n        left, top, right, bottom = draw.textbbox((0, 0), text, font=self.font)\n        text_width = right - left\n        text_height = bottom - top\n        text_w_padded = text_width + 2 * self.text_padding\n        text_h_padded = text_height + 2 * self.text_padding\n        text_background_xyxy = resolve_text_background_xyxy(\n            center_coordinates=tuple(center_coordinates),\n            text_wh=(text_w_padded, text_h_padded),\n            position=self.text_anchor,\n        )\n\n        text_x = text_background_xyxy[0] + self.text_padding - left\n        text_y = text_background_xyxy[1] + self.text_padding - top\n\n        draw.rounded_rectangle(\n            text_background_xyxy,\n            radius=self.border_radius,\n            fill=color.as_rgb(),\n            outline=None,\n        )\n        draw.text(\n            xy=(text_x, text_y),\n            text=text,\n            font=self.font,\n            fill=self.text_color.as_rgb(),\n        )\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.BlurAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.BlurAnnotator.__init__","title":"__init__(kernel_size=15)","text":"

Parameters:

Name Type Description Default kernel_size int

The size of the average pooling kernel used for blurring.

15 Source code in supervision/annotators/core.py
def __init__(self, kernel_size: int = 15):\n    \"\"\"\n    Args:\n        kernel_size (int): The size of the average pooling kernel used for blurring.\n    \"\"\"\n    self.kernel_size: int = kernel_size\n
"},{"location":"detection/annotators/#supervision.annotators.core.BlurAnnotator.annotate","title":"annotate(scene, detections)","text":"

Annotates the given scene by blurring regions based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where blurring will be applied. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nblur_annotator = sv.BlurAnnotator()\nannotated_frame = circle_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene by blurring regions based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where blurring will be applied.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        blur_annotator = sv.BlurAnnotator()\n        annotated_frame = circle_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![blur-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/blur-annotator-example-purple.png)\n    \"\"\"\n    image_height, image_width = scene.shape[:2]\n    clipped_xyxy = clip_boxes(\n        xyxy=detections.xyxy, resolution_wh=(image_width, image_height)\n    ).astype(int)\n\n    for x1, y1, x2, y2 in clipped_xyxy:\n        roi = scene[y1:y2, x1:x2]\n        roi = cv2.blur(roi, (self.kernel_size, self.kernel_size))\n        scene[y1:y2, x1:x2] = roi\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.PixelateAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.PixelateAnnotator.__init__","title":"__init__(pixel_size=20)","text":"

Parameters:

Name Type Description Default pixel_size int

The size of the pixelation.

20 Source code in supervision/annotators/core.py
def __init__(self, pixel_size: int = 20):\n    \"\"\"\n    Args:\n        pixel_size (int): The size of the pixelation.\n    \"\"\"\n    self.pixel_size: int = pixel_size\n
"},{"location":"detection/annotators/#supervision.annotators.core.PixelateAnnotator.annotate","title":"annotate(scene, detections)","text":"

Annotates the given scene by pixelating regions based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where pixelating will be applied. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npixelate_annotator = sv.PixelateAnnotator()\nannotated_frame = pixelate_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene by pixelating regions based on the provided\n        detections.\n\n    Args:\n        scene (ImageType): The image where pixelating will be applied.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        pixelate_annotator = sv.PixelateAnnotator()\n        annotated_frame = pixelate_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![pixelate-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/pixelate-annotator-example-10.png)\n    \"\"\"\n    image_height, image_width = scene.shape[:2]\n    clipped_xyxy = clip_boxes(\n        xyxy=detections.xyxy, resolution_wh=(image_width, image_height)\n    ).astype(int)\n\n    for x1, y1, x2, y2 in clipped_xyxy:\n        roi = scene[y1:y2, x1:x2]\n        scaled_up_roi = cv2.resize(\n            src=roi, dsize=None, fx=1 / self.pixel_size, fy=1 / self.pixel_size\n        )\n        scaled_down_roi = cv2.resize(\n            src=scaled_up_roi,\n            dsize=(roi.shape[1], roi.shape[0]),\n            interpolation=cv2.INTER_NEAREST,\n        )\n\n        scene[y1:y2, x1:x2] = scaled_down_roi\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.TraceAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.TraceAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, position=Position.CENTER, trace_length=30, thickness=2, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color to draw the trace, can be a single color or a color palette.

DEFAULT position Position

The position of the trace. Defaults to CENTER.

CENTER trace_length int

The maximum length of the trace in terms of historical points. Defaults to 30.

30 thickness int

The thickness of the trace lines. Defaults to 2.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    position: Position = Position.CENTER,\n    trace_length: int = 30,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color to draw the trace, can be\n            a single color or a color palette.\n        position (Position): The position of the trace.\n            Defaults to `CENTER`.\n        trace_length (int): The maximum length of the trace in terms of historical\n            points. Defaults to `30`.\n        thickness (int): The thickness of the trace lines. Defaults to `2`.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.trace = Trace(max_size=trace_length, anchor=position)\n    self.thickness = thickness\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.TraceAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Draws trace paths on the frame based on the detection coordinates provided.

Parameters:

Name Type Description Default scene ImageType

The image on which the traces will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

The detections which include coordinates for which the traces will be drawn.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO('yolov8x.pt')\ntrace_annotator = sv.TraceAnnotator()\n\nvideo_info = sv.VideoInfo.from_video_path(video_path='...')\nframes_generator = sv.get_video_frames_generator(source_path='...')\ntracker = sv.ByteTrack()\n\nwith sv.VideoSink(target_path='...', video_info=video_info) as sink:\n   for frame in frames_generator:\n       result = model(frame)[0]\n       detections = sv.Detections.from_ultralytics(result)\n       detections = tracker.update_with_detections(detections)\n       annotated_frame = trace_annotator.annotate(\n           scene=frame.copy(),\n           detections=detections)\n       sink.write_frame(frame=annotated_frame)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Draws trace paths on the frame based on the detection coordinates provided.\n\n    Args:\n        scene (ImageType): The image on which the traces will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): The detections which include coordinates for\n            which the traces will be drawn.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO('yolov8x.pt')\n        trace_annotator = sv.TraceAnnotator()\n\n        video_info = sv.VideoInfo.from_video_path(video_path='...')\n        frames_generator = sv.get_video_frames_generator(source_path='...')\n        tracker = sv.ByteTrack()\n\n        with sv.VideoSink(target_path='...', video_info=video_info) as sink:\n           for frame in frames_generator:\n               result = model(frame)[0]\n               detections = sv.Detections.from_ultralytics(result)\n               detections = tracker.update_with_detections(detections)\n               annotated_frame = trace_annotator.annotate(\n                   scene=frame.copy(),\n                   detections=detections)\n               sink.write_frame(frame=annotated_frame)\n        ```\n\n    ![trace-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/trace-annotator-example-purple.png)\n    \"\"\"\n    self.trace.put(detections)\n\n    for detection_idx in range(len(detections)):\n        tracker_id = int(detections.tracker_id[detection_idx])\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        xy = self.trace.get(tracker_id=tracker_id)\n        if len(xy) > 1:\n            scene = cv2.polylines(\n                scene,\n                [xy.astype(np.int32)],\n                False,\n                color=color.as_bgr(),\n                thickness=self.thickness,\n            )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.CropAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.CropAnnotator.__init__","title":"__init__(position=Position.TOP_CENTER, scale_factor=2, border_color=ColorPalette.DEFAULT, border_thickness=2, border_color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default position Position

The anchor position for placing the cropped and scaled part of the detection in the scene.

TOP_CENTER scale_factor int

The factor by which to scale the cropped image part. A factor of 2, for example, would double the size of the cropped area, allowing for a closer view of the detection.

2 border_color Union[Color, ColorPalette]

The color or color palette to use for annotating border around the cropped area.

DEFAULT border_thickness int

The thickness of the border around the cropped area.

2 border_color_lookup ColorLookup

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    position: Position = Position.TOP_CENTER,\n    scale_factor: int = 2,\n    border_color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    border_thickness: int = 2,\n    border_color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        position (Position): The anchor position for placing the cropped and scaled\n            part of the detection in the scene.\n        scale_factor (int): The factor by which to scale the cropped image part. A\n            factor of 2, for example, would double the size of the cropped area,\n            allowing for a closer view of the detection.\n        border_color (Union[Color, ColorPalette]): The color or color palette to\n            use for annotating border around the cropped area.\n        border_thickness (int): The thickness of the border around the cropped area.\n        border_color_lookup (ColorLookup): Strategy for mapping colors to\n            annotations. Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.position: Position = position\n    self.scale_factor: int = scale_factor\n    self.border_color: Union[Color, ColorPalette] = border_color\n    self.border_thickness: int = border_thickness\n    self.border_color_lookup: ColorLookup = border_color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.CropAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the provided scene with scaled and cropped parts of the image based on the provided detections. Each detection is cropped from the original scene and scaled according to the annotator's scale factor before being placed back onto the scene at the specified position.

Parameters:

Name Type Description Default scene ImageType

The image where cropped detection will be placed. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image.

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncrop_annotator = sv.CropAnnotator()\nannotated_frame = crop_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the provided scene with scaled and cropped parts of the image based\n    on the provided detections. Each detection is cropped from the original scene\n    and scaled according to the annotator's scale factor before being placed back\n    onto the scene at the specified position.\n\n\n    Args:\n        scene (ImageType): The image where cropped detection will be placed.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        crop_annotator = sv.CropAnnotator()\n        annotated_frame = crop_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n    \"\"\"\n    crops = [\n        crop_image(image=scene, xyxy=xyxy) for xyxy in detections.xyxy.astype(int)\n    ]\n    resized_crops = [\n        scale_image(image=crop, scale_factor=self.scale_factor) for crop in crops\n    ]\n    anchors = detections.get_anchors_coordinates(anchor=self.position).astype(int)\n\n    for idx, (resized_crop, anchor) in enumerate(zip(resized_crops, anchors)):\n        crop_wh = resized_crop.shape[1], resized_crop.shape[0]\n        (x1, y1), (x2, y2) = self.calculate_crop_coordinates(\n            anchor=anchor, crop_wh=crop_wh, position=self.position\n        )\n        scene = overlay_image(\n            scene=scene, inserted_image=resized_crop, anchor=(x1, y1)\n        )\n        color = resolve_color(\n            color=self.border_color,\n            detections=detections,\n            detection_idx=idx,\n            color_lookup=self.border_color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        cv2.rectangle(\n            img=scene,\n            pt1=(x1, y1),\n            pt2=(x2, y2),\n            color=color.as_bgr(),\n            thickness=self.border_thickness,\n        )\n\n    return scene\n
"},{"location":"detection/core/","title":"Detections","text":"

The sv.Detections class in the Supervision library standardizes results from various object detection and segmentation models into a consistent format. This class simplifies data manipulation and filtering, providing a uniform API for integration with Supervision trackers, annotators, and tools.

InferenceUltralyticsTransformers

Use sv.Detections.from_inference method, which accepts model results from both detection and segmentation models.

import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n

Use sv.Detections.from_ultralytics method, which accepts model results from both detection and segmentation models.

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n

Use sv.Detections.from_transformers method, which accepts model results from both detection and segmentation models.

import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label)\n

Attributes:

Name Type Description xyxy ndarray

An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]

mask Optional[ndarray]

(Optional[np.ndarray]): An array of shape (n, H, W) containing the segmentation masks.

confidence Optional[ndarray]

An array of shape (n,) containing the confidence scores of the detections.

class_id Optional[ndarray]

An array of shape (n,) containing the class ids of the detections.

tracker_id Optional[ndarray]

An array of shape (n,) containing the tracker ids of the detections.

data Dict[str, Union[ndarray, List]]

A dictionary containing additional data where each key is a string representing the data type, and the value is either a NumPy array or a list of corresponding data.

Source code in supervision/detection/core.py
@dataclass\nclass Detections:\n    \"\"\"\n    The `sv.Detections` class in the Supervision library standardizes results from\n    various object detection and segmentation models into a consistent format. This\n    class simplifies data manipulation and filtering, providing a uniform API for\n    integration with Supervision [trackers](/trackers/), [annotators](/detection/annotators/), and [tools](/detection/tools/line_zone/).\n\n    === \"Inference\"\n\n        Use [`sv.Detections.from_inference`](/detection/core/#supervision.detection.core.Detections.from_inference)\n        method, which accepts model results from both detection and segmentation models.\n\n        ```python\n        import cv2\n        import supervision as sv\n        from inference import get_model\n\n        model = get_model(model_id=\"yolov8n-640\")\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        results = model.infer(image)[0]\n        detections = sv.Detections.from_inference(results)\n        ```\n\n    === \"Ultralytics\"\n\n        Use [`sv.Detections.from_ultralytics`](/detection/core/#supervision.detection.core.Detections.from_ultralytics)\n        method, which accepts model results from both detection and segmentation models.\n\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO(\"yolov8n.pt\")\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        results = model(image)[0]\n        detections = sv.Detections.from_ultralytics(results)\n        ```\n\n    === \"Transformers\"\n\n        Use [`sv.Detections.from_transformers`](/detection/core/#supervision.detection.core.Detections.from_transformers)\n        method, which accepts model results from both detection and segmentation models.\n\n        ```python\n        import torch\n        import supervision as sv\n        from PIL import Image\n        from transformers import DetrImageProcessor, DetrForObjectDetection\n\n        processor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\n        model = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        inputs = processor(images=image, return_tensors=\"pt\")\n\n        with torch.no_grad():\n            outputs = model(**inputs)\n\n        width, height = image.size\n        target_size = torch.tensor([[height, width]])\n        results = processor.post_process_object_detection(\n            outputs=outputs, target_sizes=target_size)[0]\n        detections = sv.Detections.from_transformers(\n            transformers_results=results,\n            id2label=model.config.id2label)\n        ```\n\n    Attributes:\n        xyxy (np.ndarray): An array of shape `(n, 4)` containing\n            the bounding boxes coordinates in format `[x1, y1, x2, y2]`\n        mask: (Optional[np.ndarray]): An array of shape\n            `(n, H, W)` containing the segmentation masks.\n        confidence (Optional[np.ndarray]): An array of shape\n            `(n,)` containing the confidence scores of the detections.\n        class_id (Optional[np.ndarray]): An array of shape\n            `(n,)` containing the class ids of the detections.\n        tracker_id (Optional[np.ndarray]): An array of shape\n            `(n,)` containing the tracker ids of the detections.\n        data (Dict[str, Union[np.ndarray, List]]): A dictionary containing additional\n            data where each key is a string representing the data type, and the value\n            is either a NumPy array or a list of corresponding data.\n    \"\"\"  # noqa: E501 // docs\n\n    xyxy: np.ndarray\n    mask: Optional[np.ndarray] = None\n    confidence: Optional[np.ndarray] = None\n    class_id: Optional[np.ndarray] = None\n    tracker_id: Optional[np.ndarray] = None\n    data: Dict[str, Union[np.ndarray, List]] = field(default_factory=dict)\n\n    def __post_init__(self):\n        validate_detections_fields(\n            xyxy=self.xyxy,\n            mask=self.mask,\n            confidence=self.confidence,\n            class_id=self.class_id,\n            tracker_id=self.tracker_id,\n            data=self.data,\n        )\n\n    def __len__(self):\n        \"\"\"\n        Returns the number of detections in the Detections object.\n        \"\"\"\n        return len(self.xyxy)\n\n    def __iter__(\n        self,\n    ) -> Iterator[\n        Tuple[\n            np.ndarray,\n            Optional[np.ndarray],\n            Optional[float],\n            Optional[int],\n            Optional[int],\n            Dict[str, Union[np.ndarray, List]],\n        ]\n    ]:\n        \"\"\"\n        Iterates over the Detections object and yield a tuple of\n        `(xyxy, mask, confidence, class_id, tracker_id, data)` for each detection.\n        \"\"\"\n        for i in range(len(self.xyxy)):\n            yield (\n                self.xyxy[i],\n                self.mask[i] if self.mask is not None else None,\n                self.confidence[i] if self.confidence is not None else None,\n                self.class_id[i] if self.class_id is not None else None,\n                self.tracker_id[i] if self.tracker_id is not None else None,\n                get_data_item(self.data, i),\n            )\n\n    def __eq__(self, other: Detections):\n        return all(\n            [\n                np.array_equal(self.xyxy, other.xyxy),\n                np.array_equal(self.mask, other.mask),\n                np.array_equal(self.class_id, other.class_id),\n                np.array_equal(self.confidence, other.confidence),\n                np.array_equal(self.tracker_id, other.tracker_id),\n                is_data_equal(self.data, other.data),\n            ]\n        )\n\n    @classmethod\n    def from_yolov5(cls, yolov5_results) -> Detections:\n        \"\"\"\n        Creates a Detections instance from a\n        [YOLOv5](https://github.com/ultralytics/yolov5) inference result.\n\n        Args:\n            yolov5_results (yolov5.models.common.Detections):\n                The output Detections instance from YOLOv5\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import cv2\n            import torch\n            import supervision as sv\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = torch.hub.load('ultralytics/yolov5', 'yolov5s')\n            result = model(image)\n            detections = sv.Detections.from_yolov5(result)\n            ```\n        \"\"\"\n        yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy()\n\n        return cls(\n            xyxy=yolov5_detections_predictions[:, :4],\n            confidence=yolov5_detections_predictions[:, 4],\n            class_id=yolov5_detections_predictions[:, 5].astype(int),\n        )\n\n    @classmethod\n    def from_ultralytics(cls, ultralytics_results) -> Detections:\n        \"\"\"\n        Creates a `sv.Detections` instance from a\n        [YOLOv8](https://github.com/ultralytics/ultralytics) inference result.\n\n        !!! Note\n\n            `from_ultralytics` is compatible with\n            [detection](https://docs.ultralytics.com/tasks/detect/),\n            [segmentation](https://docs.ultralytics.com/tasks/segment/), and\n            [OBB](https://docs.ultralytics.com/tasks/obb/) models.\n\n        Args:\n            ultralytics_results (ultralytics.yolo.engine.results.Results):\n                The output Results instance from Ultralytics\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = YOLO('yolov8s.pt')\n            results = model(image)[0]\n            detections = sv.Detections.from_ultralytics(results)\n            ```\n\n        !!! tip\n\n            Class names values can be accessed using `detections[\"class_name\"]`.\n        \"\"\"  # noqa: E501 // docs\n\n        if hasattr(ultralytics_results, \"obb\") and ultralytics_results.obb is not None:\n            class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int)\n            class_names = np.array([ultralytics_results.names[i] for i in class_id])\n            oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy()\n            return cls(\n                xyxy=ultralytics_results.obb.xyxy.cpu().numpy(),\n                confidence=ultralytics_results.obb.conf.cpu().numpy(),\n                class_id=class_id,\n                tracker_id=ultralytics_results.obb.id.int().cpu().numpy()\n                if ultralytics_results.obb.id is not None\n                else None,\n                data={\n                    ORIENTED_BOX_COORDINATES: oriented_box_coordinates,\n                    CLASS_NAME_DATA_FIELD: class_names,\n                },\n            )\n\n        class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)\n        class_names = np.array([ultralytics_results.names[i] for i in class_id])\n        return cls(\n            xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),\n            confidence=ultralytics_results.boxes.conf.cpu().numpy(),\n            class_id=class_id,\n            mask=extract_ultralytics_masks(ultralytics_results),\n            tracker_id=ultralytics_results.boxes.id.int().cpu().numpy()\n            if ultralytics_results.boxes.id is not None\n            else None,\n            data={CLASS_NAME_DATA_FIELD: class_names},\n        )\n\n    @classmethod\n    def from_yolo_nas(cls, yolo_nas_results) -> Detections:\n        \"\"\"\n        Creates a Detections instance from a\n        [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS.md)\n        inference result.\n\n        Args:\n            yolo_nas_results (ImageDetectionPrediction):\n                The output Results instance from YOLO-NAS\n                ImageDetectionPrediction is coming from\n                'super_gradients.training.models.prediction_results'\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import cv2\n            from super_gradients.training import models\n            import supervision as sv\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = models.get('yolo_nas_l', pretrained_weights=\"coco\")\n\n            result = list(model.predict(image, conf=0.35))[0]\n            detections = sv.Detections.from_yolo_nas(result)\n            ```\n        \"\"\"\n        if np.asarray(yolo_nas_results.prediction.bboxes_xyxy).shape[0] == 0:\n            return cls.empty()\n\n        return cls(\n            xyxy=yolo_nas_results.prediction.bboxes_xyxy,\n            confidence=yolo_nas_results.prediction.confidence,\n            class_id=yolo_nas_results.prediction.labels.astype(int),\n        )\n\n    @classmethod\n    def from_tensorflow(\n        cls, tensorflow_results: dict, resolution_wh: tuple\n    ) -> Detections:\n        \"\"\"\n        Creates a Detections instance from a\n        [Tensorflow Hub](https://www.tensorflow.org/hub/tutorials/tf2_object_detection)\n        inference result.\n\n        Args:\n            tensorflow_results (dict):\n                The output results from Tensorflow Hub.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import tensorflow as tf\n            import tensorflow_hub as hub\n            import numpy as np\n            import cv2\n\n            module_handle = \"https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1\"\n            model = hub.load(module_handle)\n            img = np.array(cv2.imread(SOURCE_IMAGE_PATH))\n            result = model(img)\n            detections = sv.Detections.from_tensorflow(result)\n            ```\n        \"\"\"  # noqa: E501 // docs\n\n        boxes = tensorflow_results[\"detection_boxes\"][0].numpy()\n        boxes[:, [0, 2]] *= resolution_wh[0]\n        boxes[:, [1, 3]] *= resolution_wh[1]\n        boxes = boxes[:, [1, 0, 3, 2]]\n        return cls(\n            xyxy=boxes,\n            confidence=tensorflow_results[\"detection_scores\"][0].numpy(),\n            class_id=tensorflow_results[\"detection_classes\"][0].numpy().astype(int),\n        )\n\n    @classmethod\n    def from_deepsparse(cls, deepsparse_results) -> Detections:\n        \"\"\"\n        Creates a Detections instance from a\n        [DeepSparse](https://github.com/neuralmagic/deepsparse)\n        inference result.\n\n        Args:\n            deepsparse_results (deepsparse.yolo.schemas.YOLOOutput):\n                The output Results instance from DeepSparse.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import supervision as sv\n            from deepsparse import Pipeline\n\n            yolo_pipeline = Pipeline.create(\n                task=\"yolo\",\n                model_path = \"zoo:cv/detection/yolov5-l/pytorch/ultralytics/coco/pruned80_quant-none\"\n             )\n            result = yolo_pipeline(<SOURCE IMAGE PATH>)\n            detections = sv.Detections.from_deepsparse(result)\n            ```\n        \"\"\"  # noqa: E501 // docs\n\n        if np.asarray(deepsparse_results.boxes[0]).shape[0] == 0:\n            return cls.empty()\n\n        return cls(\n            xyxy=np.array(deepsparse_results.boxes[0]),\n            confidence=np.array(deepsparse_results.scores[0]),\n            class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int),\n        )\n\n    @classmethod\n    def from_mmdetection(cls, mmdet_results) -> Detections:\n        \"\"\"\n        Creates a Detections instance from a\n        [mmdetection](https://github.com/open-mmlab/mmdetection) and\n        [mmyolo](https://github.com/open-mmlab/mmyolo) inference result.\n\n        Args:\n            mmdet_results (mmdet.structures.DetDataSample):\n                The output Results instance from MMDetection.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from mmdet.apis import init_detector, inference_detector\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = init_detector(<CONFIG_PATH>, <WEIGHTS_PATH>, device=<DEVICE>)\n\n            result = inference_detector(model, image)\n            detections = sv.Detections.from_mmdetection(result)\n            ```\n        \"\"\"  # noqa: E501 // docs\n\n        return cls(\n            xyxy=mmdet_results.pred_instances.bboxes.cpu().numpy(),\n            confidence=mmdet_results.pred_instances.scores.cpu().numpy(),\n            class_id=mmdet_results.pred_instances.labels.cpu().numpy().astype(int),\n            mask=mmdet_results.pred_instances.masks.cpu().numpy()\n            if \"masks\" in mmdet_results.pred_instances\n            else None,\n        )\n\n    @classmethod\n    def from_transformers(\n        cls, transformers_results: dict, id2label: Optional[Dict[int, str]] = None\n    ) -> Detections:\n        \"\"\"\n        Creates a Detections instance from object detection or segmentation\n        [Transformer](https://github.com/huggingface/transformers) inference result.\n\n        Args:\n            transformers_results (dict): The output of Transformers model inference. A\n                dictionary containing the `scores`, `labels`, `boxes` and `masks` keys.\n            id2label (Optional[Dict[int, str]]): A dictionary mapping class IDs to\n                class names. If provided, the resulting Detections object will contain\n                `class_name` data field with the class names.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import torch\n            import supervision as sv\n            from PIL import Image\n            from transformers import DetrImageProcessor, DetrForObjectDetection\n\n            processor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\n            model = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\n            image = Image.open(<SOURCE_IMAGE_PATH>)\n            inputs = processor(images=image, return_tensors=\"pt\")\n\n            with torch.no_grad():\n                outputs = model(**inputs)\n\n            width, height = image.size\n            target_size = torch.tensor([[height, width]])\n            results = processor.post_process_object_detection(\n                outputs=outputs, target_sizes=target_size)[0]\n\n            detections = sv.Detections.from_transformers(\n                transformers_results=results,\n                id2label=model.config.id2label\n            )\n            ```\n\n        !!! tip\n\n            Class names values can be accessed using `detections[\"class_name\"]`.\n        \"\"\"  # noqa: E501 // docs\n\n        class_ids = transformers_results[\"labels\"].cpu().detach().numpy().astype(int)\n        data = {}\n        if id2label is not None:\n            class_names = np.array([id2label[class_id] for class_id in class_ids])\n            data[CLASS_NAME_DATA_FIELD] = class_names\n        if \"boxes\" in transformers_results:\n            return cls(\n                xyxy=transformers_results[\"boxes\"].cpu().detach().numpy(),\n                confidence=transformers_results[\"scores\"].cpu().detach().numpy(),\n                class_id=class_ids,\n                data=data,\n            )\n        elif \"masks\" in transformers_results:\n            masks = transformers_results[\"masks\"].cpu().detach().numpy().astype(bool)\n            return cls(\n                xyxy=mask_to_xyxy(masks),\n                mask=masks,\n                confidence=transformers_results[\"scores\"].cpu().detach().numpy(),\n                class_id=class_ids,\n                data=data,\n            )\n        else:\n            raise NotImplementedError(\n                \"Only object detection and semantic segmentation results are supported.\"\n            )\n\n    @classmethod\n    def from_detectron2(cls, detectron2_results) -> Detections:\n        \"\"\"\n        Create a Detections object from the\n        [Detectron2](https://github.com/facebookresearch/detectron2) inference result.\n\n        Args:\n            detectron2_results: The output of a\n                Detectron2 model containing instances with prediction data.\n\n        Returns:\n            (Detections): A Detections object containing the bounding boxes,\n                class IDs, and confidences of the predictions.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from detectron2.engine import DefaultPredictor\n            from detectron2.config import get_cfg\n\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            cfg = get_cfg()\n            cfg.merge_from_file(<CONFIG_PATH>)\n            cfg.MODEL.WEIGHTS = <WEIGHTS_PATH>\n            predictor = DefaultPredictor(cfg)\n\n            result = predictor(image)\n            detections = sv.Detections.from_detectron2(result)\n            ```\n        \"\"\"\n\n        return cls(\n            xyxy=detectron2_results[\"instances\"].pred_boxes.tensor.cpu().numpy(),\n            confidence=detectron2_results[\"instances\"].scores.cpu().numpy(),\n            class_id=detectron2_results[\"instances\"]\n            .pred_classes.cpu()\n            .numpy()\n            .astype(int),\n        )\n\n    @classmethod\n    def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections:\n        \"\"\"\n        Create a `sv.Detections` object from the [Roboflow](https://roboflow.com/)\n        API inference result or the [Inference](https://inference.roboflow.com/)\n        package results. This method extracts bounding boxes, class IDs,\n        confidences, and class names from the Roboflow API result and encapsulates\n        them into a Detections object.\n\n        Args:\n            roboflow_result (dict, any): The result from the\n                Roboflow API or Inference package containing predictions.\n\n        Returns:\n            (Detections): A Detections object containing the bounding boxes, class IDs,\n                and confidences of the predictions.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from inference import get_model\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = get_model(model_id=\"yolov8s-640\")\n\n            result = model.infer(image)[0]\n            detections = sv.Detections.from_inference(result)\n            ```\n\n        !!! tip\n\n            Class names values can be accessed using `detections[\"class_name\"]`.\n        \"\"\"\n        with suppress(AttributeError):\n            roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True)\n        xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result(\n            roboflow_result=roboflow_result\n        )\n\n        if np.asarray(xyxy).shape[0] == 0:\n            empty_detection = cls.empty()\n            empty_detection.data = {CLASS_NAME_DATA_FIELD: np.empty(0)}\n            return empty_detection\n\n        return cls(\n            xyxy=xyxy,\n            confidence=confidence,\n            class_id=class_id,\n            mask=masks,\n            tracker_id=trackers,\n            data=data,\n        )\n\n    @classmethod\n    @deprecated(\n        \"`Detections.from_roboflow` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Detections.from_inference` instead.\"\n    )\n    def from_roboflow(cls, roboflow_result: Union[dict, Any]) -> Detections:\n        \"\"\"\n        !!! failure \"Deprecated\"\n\n            `Detections.from_roboflow` is deprecated and will be removed in\n            `supervision-0.22.0`. Use `Detections.from_inference` instead.\n\n        Create a Detections object from the [Roboflow](https://roboflow.com/)\n            API inference result or the [Inference](https://inference.roboflow.com/)\n            package results.\n\n        Args:\n            roboflow_result (dict): The result from the\n                Roboflow API containing predictions.\n\n        Returns:\n            (Detections): A Detections object containing the bounding boxes, class IDs,\n                and confidences of the predictions.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from inference import get_model\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = get_model(model_id=\"yolov8s-640\")\n\n            result = model.infer(image)[0]\n            detections = sv.Detections.from_roboflow(result)\n            ```\n        \"\"\"\n        return cls.from_inference(roboflow_result)\n\n    @classmethod\n    def from_sam(cls, sam_result: List[dict]) -> Detections:\n        \"\"\"\n        Creates a Detections instance from\n        [Segment Anything Model](https://github.com/facebookresearch/segment-anything)\n        inference result.\n\n        Args:\n            sam_result (List[dict]): The output Results instance from SAM\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import supervision as sv\n            from segment_anything import (\n                sam_model_registry,\n                SamAutomaticMaskGenerator\n             )\n\n            sam_model_reg = sam_model_registry[MODEL_TYPE]\n            sam = sam_model_reg(checkpoint=CHECKPOINT_PATH).to(device=DEVICE)\n            mask_generator = SamAutomaticMaskGenerator(sam)\n            sam_result = mask_generator.generate(IMAGE)\n            detections = sv.Detections.from_sam(sam_result=sam_result)\n            ```\n        \"\"\"\n\n        sorted_generated_masks = sorted(\n            sam_result, key=lambda x: x[\"area\"], reverse=True\n        )\n\n        xywh = np.array([mask[\"bbox\"] for mask in sorted_generated_masks])\n        mask = np.array([mask[\"segmentation\"] for mask in sorted_generated_masks])\n\n        if np.asarray(xywh).shape[0] == 0:\n            return cls.empty()\n\n        xyxy = xywh_to_xyxy(boxes_xywh=xywh)\n        return cls(xyxy=xyxy, mask=mask)\n\n    @classmethod\n    def from_azure_analyze_image(\n        cls, azure_result: dict, class_map: Optional[Dict[int, str]] = None\n    ) -> Detections:\n        \"\"\"\n        Creates a Detections instance from [Azure Image Analysis 4.0](\n        https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/\n        concept-object-detection-40).\n\n        Args:\n            azure_result (dict): The result from Azure Image Analysis. It should\n                contain detected objects and their bounding box coordinates.\n            class_map (Optional[Dict[int, str]]): A mapping ofclass IDs (int) to class\n                names (str). If None, a new mapping is created dynamically.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import requests\n            import supervision as sv\n\n            image = open(input, \"rb\").read()\n\n            endpoint = \"https://.cognitiveservices.azure.com/\"\n            subscription_key = \"\"\n\n            headers = {\n                \"Content-Type\": \"application/octet-stream\",\n                \"Ocp-Apim-Subscription-Key\": subscription_key\n             }\n\n            response = requests.post(endpoint,\n                headers=self.headers,\n                data=image\n             ).json()\n\n            detections = sv.Detections.from_azure_analyze_image(response)\n            ```\n        \"\"\"\n        if \"error\" in azure_result:\n            raise ValueError(\n                f'Azure API returned an error {azure_result[\"error\"][\"message\"]}'\n            )\n\n        xyxy, confidences, class_ids = [], [], []\n\n        is_dynamic_mapping = class_map is None\n        if is_dynamic_mapping:\n            class_map = {}\n\n        class_map = {value: key for key, value in class_map.items()}\n\n        for detection in azure_result[\"objectsResult\"][\"values\"]:\n            bbox = detection[\"boundingBox\"]\n\n            tags = detection[\"tags\"]\n\n            x0 = bbox[\"x\"]\n            y0 = bbox[\"y\"]\n            x1 = x0 + bbox[\"w\"]\n            y1 = y0 + bbox[\"h\"]\n\n            for tag in tags:\n                confidence = tag[\"confidence\"]\n                class_name = tag[\"name\"]\n                class_id = class_map.get(class_name, None)\n\n                if is_dynamic_mapping and class_id is None:\n                    class_id = len(class_map)\n                    class_map[class_name] = class_id\n\n                if class_id is not None:\n                    xyxy.append([x0, y0, x1, y1])\n                    confidences.append(confidence)\n                    class_ids.append(class_id)\n\n        if len(xyxy) == 0:\n            return Detections.empty()\n\n        return cls(\n            xyxy=np.array(xyxy),\n            class_id=np.array(class_ids),\n            confidence=np.array(confidences),\n        )\n\n    @classmethod\n    def from_paddledet(cls, paddledet_result) -> Detections:\n        \"\"\"\n        Creates a Detections instance from\n            [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)\n            inference result.\n\n        Args:\n            paddledet_result (List[dict]): The output Results instance from PaddleDet\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import supervision as sv\n            import paddle\n            from ppdet.engine import Trainer\n            from ppdet.core.workspace import load_config\n\n            weights = ()\n            config = ()\n\n            cfg = load_config(config)\n            trainer = Trainer(cfg, mode='test')\n            trainer.load_weights(weights)\n\n            paddledet_result = trainer.predict([images])[0]\n\n            detections = sv.Detections.from_paddledet(paddledet_result)\n            ```\n        \"\"\"\n\n        if np.asarray(paddledet_result[\"bbox\"][:, 2:6]).shape[0] == 0:\n            return cls.empty()\n\n        return cls(\n            xyxy=paddledet_result[\"bbox\"][:, 2:6],\n            confidence=paddledet_result[\"bbox\"][:, 1],\n            class_id=paddledet_result[\"bbox\"][:, 0].astype(int),\n        )\n\n    @classmethod\n    def from_lmm(cls, lmm: Union[LMM, str], result: str, **kwargs) -> Detections:\n        \"\"\"\n        Creates a Detections object from the given result string based on the specified\n        Large Multimodal Model (LMM).\n\n        Args:\n            lmm (Union[LMM, str]): The type of LMM (Large Multimodal Model) to use.\n            result (str): The result string containing the detection data.\n            **kwargs: Additional keyword arguments required by the specified LMM.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Raises:\n            ValueError: If the LMM is invalid, required arguments are missing, or\n                disallowed arguments are provided.\n            ValueError: If the specified LMM is not supported.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            paligemma_result = \"<loc0256><loc0256><loc0768><loc0768> cat\"\n            detections = sv.Detections.from_lmm(\n                sv.LMM.PALIGEMMA,\n                paligemma_result,\n                resolution_wh=(1000, 1000),\n                classes=['cat', 'dog']\n            )\n            detections.xyxy\n            # array([[250., 250., 750., 750.]])\n\n            detections.class_id\n            # array([0])\n            ```\n        \"\"\"\n        lmm = validate_lmm_and_kwargs(lmm, kwargs)\n\n        if lmm == LMM.PALIGEMMA:\n            xyxy, class_id, class_name = from_paligemma(result, **kwargs)\n            data = {CLASS_NAME_DATA_FIELD: class_name}\n            return cls(xyxy=xyxy, class_id=class_id, data=data)\n\n        raise ValueError(f\"Unsupported LMM: {lmm}\")\n\n    @classmethod\n    def empty(cls) -> Detections:\n        \"\"\"\n        Create an empty Detections object with no bounding boxes,\n            confidences, or class IDs.\n\n        Returns:\n            (Detections): An empty Detections object.\n\n        Example:\n            ```python\n            from supervision import Detections\n\n            empty_detections = Detections.empty()\n            ```\n        \"\"\"\n        return cls(\n            xyxy=np.empty((0, 4), dtype=np.float32),\n            confidence=np.array([], dtype=np.float32),\n            class_id=np.array([], dtype=int),\n        )\n\n    def is_empty(self) -> bool:\n        \"\"\"\n        Returns `True` if the `Detections` object is considered empty.\n        \"\"\"\n        empty_detections = Detections.empty()\n        empty_detections.data = self.data\n        return self == empty_detections\n\n    @classmethod\n    def merge(cls, detections_list: List[Detections]) -> Detections:\n        \"\"\"\n        Merge a list of Detections objects into a single Detections object.\n\n        This method takes a list of Detections objects and combines their\n        respective fields (`xyxy`, `mask`, `confidence`, `class_id`, and `tracker_id`)\n        into a single Detections object.\n\n        For example, if merging Detections with 3 and 4 detected objects, this method\n        will return a Detections with 7 objects (7 entries in `xyxy`, `mask`, etc).\n\n        !!! Note\n\n            When merging, empty `Detections` objects are ignored.\n\n        Args:\n            detections_list (List[Detections]): A list of Detections objects to merge.\n\n        Returns:\n            (Detections): A single Detections object containing\n                the merged data from the input list.\n\n        Example:\n            ```python\n            import numpy as np\n            import supervision as sv\n\n            detections_1 = sv.Detections(\n                xyxy=np.array([[15, 15, 100, 100], [200, 200, 300, 300]]),\n                class_id=np.array([1, 2]),\n                data={'feature_vector': np.array([0.1, 0.2)])}\n             )\n\n            detections_2 = sv.Detections(\n                xyxy=np.array([[30, 30, 120, 120]]),\n                class_id=np.array([1]),\n                data={'feature_vector': [np.array([0.3])]}\n             )\n\n            merged_detections = Detections.merge([detections_1, detections_2])\n\n            merged_detections.xyxy\n            array([[ 15,  15, 100, 100],\n                   [200, 200, 300, 300],\n                   [ 30,  30, 120, 120]])\n\n            merged_detections.class_id\n            array([1, 2, 1])\n\n            merged_detections.data['feature_vector']\n            array([0.1, 0.2, 0.3])\n            ```\n        \"\"\"\n        detections_list = [\n            detections for detections in detections_list if not detections.is_empty()\n        ]\n\n        if len(detections_list) == 0:\n            return Detections.empty()\n\n        for detections in detections_list:\n            validate_detections_fields(\n                xyxy=detections.xyxy,\n                mask=detections.mask,\n                confidence=detections.confidence,\n                class_id=detections.class_id,\n                tracker_id=detections.tracker_id,\n                data=detections.data,\n            )\n\n        xyxy = np.vstack([d.xyxy for d in detections_list])\n\n        def stack_or_none(name: str):\n            if all(d.__getattribute__(name) is None for d in detections_list):\n                return None\n            if any(d.__getattribute__(name) is None for d in detections_list):\n                raise ValueError(f\"All or none of the '{name}' fields must be None\")\n            return (\n                np.vstack([d.__getattribute__(name) for d in detections_list])\n                if name == \"mask\"\n                else np.hstack([d.__getattribute__(name) for d in detections_list])\n            )\n\n        mask = stack_or_none(\"mask\")\n        confidence = stack_or_none(\"confidence\")\n        class_id = stack_or_none(\"class_id\")\n        tracker_id = stack_or_none(\"tracker_id\")\n\n        data = merge_data([d.data for d in detections_list])\n\n        return cls(\n            xyxy=xyxy,\n            mask=mask,\n            confidence=confidence,\n            class_id=class_id,\n            tracker_id=tracker_id,\n            data=data,\n        )\n\n    def get_anchors_coordinates(self, anchor: Position) -> np.ndarray:\n        \"\"\"\n        Calculates and returns the coordinates of a specific anchor point\n        within the bounding boxes defined by the `xyxy` attribute. The anchor\n        point can be any of the predefined positions in the `Position` enum,\n        such as `CENTER`, `CENTER_LEFT`, `BOTTOM_RIGHT`, etc.\n\n        Args:\n            anchor (Position): An enum specifying the position of the anchor point\n                within the bounding box. Supported positions are defined in the\n                `Position` enum.\n\n        Returns:\n            np.ndarray: An array of shape `(n, 2)`, where `n` is the number of bounding\n                boxes. Each row contains the `[x, y]` coordinates of the specified\n                anchor point for the corresponding bounding box.\n\n        Raises:\n            ValueError: If the provided `anchor` is not supported.\n        \"\"\"\n        if anchor == Position.CENTER:\n            return np.array(\n                [\n                    (self.xyxy[:, 0] + self.xyxy[:, 2]) / 2,\n                    (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n                ]\n            ).transpose()\n        elif anchor == Position.CENTER_OF_MASS:\n            if self.mask is None:\n                raise ValueError(\n                    \"Cannot use `Position.CENTER_OF_MASS` without a detection mask.\"\n                )\n            return calculate_masks_centroids(masks=self.mask)\n        elif anchor == Position.CENTER_LEFT:\n            return np.array(\n                [\n                    self.xyxy[:, 0],\n                    (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n                ]\n            ).transpose()\n        elif anchor == Position.CENTER_RIGHT:\n            return np.array(\n                [\n                    self.xyxy[:, 2],\n                    (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n                ]\n            ).transpose()\n        elif anchor == Position.BOTTOM_CENTER:\n            return np.array(\n                [(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 3]]\n            ).transpose()\n        elif anchor == Position.BOTTOM_LEFT:\n            return np.array([self.xyxy[:, 0], self.xyxy[:, 3]]).transpose()\n        elif anchor == Position.BOTTOM_RIGHT:\n            return np.array([self.xyxy[:, 2], self.xyxy[:, 3]]).transpose()\n        elif anchor == Position.TOP_CENTER:\n            return np.array(\n                [(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 1]]\n            ).transpose()\n        elif anchor == Position.TOP_LEFT:\n            return np.array([self.xyxy[:, 0], self.xyxy[:, 1]]).transpose()\n        elif anchor == Position.TOP_RIGHT:\n            return np.array([self.xyxy[:, 2], self.xyxy[:, 1]]).transpose()\n\n        raise ValueError(f\"{anchor} is not supported.\")\n\n    def __getitem__(\n        self, index: Union[int, slice, List[int], np.ndarray, str]\n    ) -> Union[Detections, List, np.ndarray, None]:\n        \"\"\"\n        Get a subset of the Detections object or access an item from its data field.\n\n        When provided with an integer, slice, list of integers, or a numpy array, this\n        method returns a new Detections object that represents a subset of the original\n        detections. When provided with a string, it accesses the corresponding item in\n        the data dictionary.\n\n        Args:\n            index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,\n                or key to access a subset of the Detections or an item from the data.\n\n        Returns:\n            Union[Detections, Any]: A subset of the Detections object or an item from\n                the data field.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            detections = sv.Detections()\n\n            first_detection = detections[0]\n            first_10_detections = detections[0:10]\n            some_detections = detections[[0, 2, 4]]\n            class_0_detections = detections[detections.class_id == 0]\n            high_confidence_detections = detections[detections.confidence > 0.5]\n\n            feature_vector = detections['feature_vector']\n            ```\n        \"\"\"\n        if isinstance(index, str):\n            return self.data.get(index)\n        if isinstance(index, int):\n            index = [index]\n        return Detections(\n            xyxy=self.xyxy[index],\n            mask=self.mask[index] if self.mask is not None else None,\n            confidence=self.confidence[index] if self.confidence is not None else None,\n            class_id=self.class_id[index] if self.class_id is not None else None,\n            tracker_id=self.tracker_id[index] if self.tracker_id is not None else None,\n            data=get_data_item(self.data, index),\n        )\n\n    def __setitem__(self, key: str, value: Union[np.ndarray, List]):\n        \"\"\"\n        Set a value in the data dictionary of the Detections object.\n\n        Args:\n            key (str): The key in the data dictionary to set.\n            value (Union[np.ndarray, List]): The value to set for the key.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = YOLO('yolov8s.pt')\n\n            result = model(image)[0]\n            detections = sv.Detections.from_ultralytics(result)\n\n            detections['names'] = [\n                 model.model.names[class_id]\n                 for class_id\n                 in detections.class_id\n             ]\n            ```\n        \"\"\"\n        if not isinstance(value, (np.ndarray, list)):\n            raise TypeError(\"Value must be a np.ndarray or a list\")\n\n        if isinstance(value, list):\n            value = np.array(value)\n\n        self.data[key] = value\n\n    @property\n    def area(self) -> np.ndarray:\n        \"\"\"\n        Calculate the area of each detection in the set of object detections.\n        If masks field is defined property returns are of each mask.\n        If only box is given property return area of each box.\n\n        Returns:\n          np.ndarray: An array of floats containing the area of each detection\n            in the format of `(area_1, area_2, , area_n)`,\n            where n is the number of detections.\n        \"\"\"\n        if self.mask is not None:\n            return np.array([np.sum(mask) for mask in self.mask])\n        else:\n            return self.box_area\n\n    @property\n    def box_area(self) -> np.ndarray:\n        \"\"\"\n        Calculate the area of each bounding box in the set of object detections.\n\n        Returns:\n            np.ndarray: An array of floats containing the area of each bounding\n                box in the format of `(area_1, area_2, , area_n)`,\n                where n is the number of detections.\n        \"\"\"\n        return (self.xyxy[:, 3] - self.xyxy[:, 1]) * (self.xyxy[:, 2] - self.xyxy[:, 0])\n\n    def with_nms(\n        self, threshold: float = 0.5, class_agnostic: bool = False\n    ) -> Detections:\n        \"\"\"\n        Performs non-max suppression on detection set. If the detections result\n        from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used.\n\n        Args:\n            threshold (float, optional): The intersection-over-union threshold\n                to use for non-maximum suppression. I'm the lower the value the more\n                restrictive the NMS becomes. Defaults to 0.5.\n            class_agnostic (bool, optional): Whether to perform class-agnostic\n                non-maximum suppression. If True, the class_id of each detection\n                will be ignored. Defaults to False.\n\n        Returns:\n            Detections: A new Detections object containing the subset of detections\n                after non-maximum suppression.\n\n        Raises:\n            AssertionError: If `confidence` is None and class_agnostic is False.\n                If `class_id` is None and class_agnostic is False.\n        \"\"\"\n        if len(self) == 0:\n            return self\n\n        assert (\n            self.confidence is not None\n        ), \"Detections confidence must be given for NMS to be executed.\"\n\n        if class_agnostic:\n            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))\n        else:\n            assert self.class_id is not None, (\n                \"Detections class_id must be given for NMS to be executed. If you\"\n                \" intended to perform class agnostic NMS set class_agnostic=True.\"\n            )\n            predictions = np.hstack(\n                (\n                    self.xyxy,\n                    self.confidence.reshape(-1, 1),\n                    self.class_id.reshape(-1, 1),\n                )\n            )\n\n        if self.mask is not None:\n            indices = mask_non_max_suppression(\n                predictions=predictions, masks=self.mask, iou_threshold=threshold\n            )\n        else:\n            indices = box_non_max_suppression(\n                predictions=predictions, iou_threshold=threshold\n            )\n\n        return self[indices]\n\n    def with_nmm(\n        self, threshold: float = 0.5, class_agnostic: bool = False\n    ) -> Detections:\n        \"\"\"\n        Perform non-maximum merging on the current set of object detections.\n\n        Args:\n            threshold (float, optional): The intersection-over-union threshold\n                to use for non-maximum merging. Defaults to 0.5.\n            class_agnostic (bool, optional): Whether to perform class-agnostic\n                non-maximum merging. If True, the class_id of each detection\n                will be ignored. Defaults to False.\n\n        Returns:\n            Detections: A new Detections object containing the subset of detections\n                after non-maximum merging.\n\n        Raises:\n            AssertionError: If `confidence` is None or `class_id` is None and\n                class_agnostic is False.\n\n        ![non-max-merging](https://media.roboflow.com/supervision-docs/non-max-merging.png){ align=center width=\"800\" }\n        \"\"\"  # noqa: E501 // docs\n        if len(self) == 0:\n            return self\n\n        assert (\n            self.confidence is not None\n        ), \"Detections confidence must be given for NMM to be executed.\"\n\n        if class_agnostic:\n            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))\n        else:\n            assert self.class_id is not None, (\n                \"Detections class_id must be given for NMM to be executed. If you\"\n                \" intended to perform class agnostic NMM set class_agnostic=True.\"\n            )\n            predictions = np.hstack(\n                (\n                    self.xyxy,\n                    self.confidence.reshape(-1, 1),\n                    self.class_id.reshape(-1, 1),\n                )\n            )\n\n        merge_groups = box_non_max_merge(\n            predictions=predictions, iou_threshold=threshold\n        )\n\n        result = []\n        for merge_group in merge_groups:\n            unmerged_detections = [self[i] for i in merge_group]\n            merged_detections = merge_inner_detections_objects(\n                unmerged_detections, threshold\n            )\n            result.append(merged_detections)\n\n        return Detections.merge(result)\n
"},{"location":"detection/core/#supervision.detection.core.Detections-attributes","title":"Attributes","text":""},{"location":"detection/core/#supervision.detection.core.Detections.area","title":"area: np.ndarray property","text":"

Calculate the area of each detection in the set of object detections. If masks field is defined property returns are of each mask. If only box is given property return area of each box.

Returns:

Type Description ndarray

np.ndarray: An array of floats containing the area of each detection in the format of (area_1, area_2, , area_n), where n is the number of detections.

"},{"location":"detection/core/#supervision.detection.core.Detections.box_area","title":"box_area: np.ndarray property","text":"

Calculate the area of each bounding box in the set of object detections.

Returns:

Type Description ndarray

np.ndarray: An array of floats containing the area of each bounding box in the format of (area_1, area_2, , area_n), where n is the number of detections.

"},{"location":"detection/core/#supervision.detection.core.Detections-functions","title":"Functions","text":""},{"location":"detection/core/#supervision.detection.core.Detections.__getitem__","title":"__getitem__(index)","text":"

Get a subset of the Detections object or access an item from its data field.

When provided with an integer, slice, list of integers, or a numpy array, this method returns a new Detections object that represents a subset of the original detections. When provided with a string, it accesses the corresponding item in the data dictionary.

Parameters:

Name Type Description Default index Union[int, slice, List[int], ndarray, str]

The index, indices, or key to access a subset of the Detections or an item from the data.

required

Returns:

Type Description Union[Detections, List, ndarray, None]

Union[Detections, Any]: A subset of the Detections object or an item from the data field.

Example
import supervision as sv\n\ndetections = sv.Detections()\n\nfirst_detection = detections[0]\nfirst_10_detections = detections[0:10]\nsome_detections = detections[[0, 2, 4]]\nclass_0_detections = detections[detections.class_id == 0]\nhigh_confidence_detections = detections[detections.confidence > 0.5]\n\nfeature_vector = detections['feature_vector']\n
Source code in supervision/detection/core.py
def __getitem__(\n    self, index: Union[int, slice, List[int], np.ndarray, str]\n) -> Union[Detections, List, np.ndarray, None]:\n    \"\"\"\n    Get a subset of the Detections object or access an item from its data field.\n\n    When provided with an integer, slice, list of integers, or a numpy array, this\n    method returns a new Detections object that represents a subset of the original\n    detections. When provided with a string, it accesses the corresponding item in\n    the data dictionary.\n\n    Args:\n        index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,\n            or key to access a subset of the Detections or an item from the data.\n\n    Returns:\n        Union[Detections, Any]: A subset of the Detections object or an item from\n            the data field.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        detections = sv.Detections()\n\n        first_detection = detections[0]\n        first_10_detections = detections[0:10]\n        some_detections = detections[[0, 2, 4]]\n        class_0_detections = detections[detections.class_id == 0]\n        high_confidence_detections = detections[detections.confidence > 0.5]\n\n        feature_vector = detections['feature_vector']\n        ```\n    \"\"\"\n    if isinstance(index, str):\n        return self.data.get(index)\n    if isinstance(index, int):\n        index = [index]\n    return Detections(\n        xyxy=self.xyxy[index],\n        mask=self.mask[index] if self.mask is not None else None,\n        confidence=self.confidence[index] if self.confidence is not None else None,\n        class_id=self.class_id[index] if self.class_id is not None else None,\n        tracker_id=self.tracker_id[index] if self.tracker_id is not None else None,\n        data=get_data_item(self.data, index),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.__iter__","title":"__iter__()","text":"

Iterates over the Detections object and yield a tuple of (xyxy, mask, confidence, class_id, tracker_id, data) for each detection.

Source code in supervision/detection/core.py
def __iter__(\n    self,\n) -> Iterator[\n    Tuple[\n        np.ndarray,\n        Optional[np.ndarray],\n        Optional[float],\n        Optional[int],\n        Optional[int],\n        Dict[str, Union[np.ndarray, List]],\n    ]\n]:\n    \"\"\"\n    Iterates over the Detections object and yield a tuple of\n    `(xyxy, mask, confidence, class_id, tracker_id, data)` for each detection.\n    \"\"\"\n    for i in range(len(self.xyxy)):\n        yield (\n            self.xyxy[i],\n            self.mask[i] if self.mask is not None else None,\n            self.confidence[i] if self.confidence is not None else None,\n            self.class_id[i] if self.class_id is not None else None,\n            self.tracker_id[i] if self.tracker_id is not None else None,\n            get_data_item(self.data, i),\n        )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.__len__","title":"__len__()","text":"

Returns the number of detections in the Detections object.

Source code in supervision/detection/core.py
def __len__(self):\n    \"\"\"\n    Returns the number of detections in the Detections object.\n    \"\"\"\n    return len(self.xyxy)\n
"},{"location":"detection/core/#supervision.detection.core.Detections.__setitem__","title":"__setitem__(key, value)","text":"

Set a value in the data dictionary of the Detections object.

Parameters:

Name Type Description Default key str

The key in the data dictionary to set.

required value Union[ndarray, List]

The value to set for the key.

required Example
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8s.pt')\n\nresult = model(image)[0]\ndetections = sv.Detections.from_ultralytics(result)\n\ndetections['names'] = [\n     model.model.names[class_id]\n     for class_id\n     in detections.class_id\n ]\n
Source code in supervision/detection/core.py
def __setitem__(self, key: str, value: Union[np.ndarray, List]):\n    \"\"\"\n    Set a value in the data dictionary of the Detections object.\n\n    Args:\n        key (str): The key in the data dictionary to set.\n        value (Union[np.ndarray, List]): The value to set for the key.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO('yolov8s.pt')\n\n        result = model(image)[0]\n        detections = sv.Detections.from_ultralytics(result)\n\n        detections['names'] = [\n             model.model.names[class_id]\n             for class_id\n             in detections.class_id\n         ]\n        ```\n    \"\"\"\n    if not isinstance(value, (np.ndarray, list)):\n        raise TypeError(\"Value must be a np.ndarray or a list\")\n\n    if isinstance(value, list):\n        value = np.array(value)\n\n    self.data[key] = value\n
"},{"location":"detection/core/#supervision.detection.core.Detections.empty","title":"empty() classmethod","text":"

Create an empty Detections object with no bounding boxes, confidences, or class IDs.

Returns:

Type Description Detections

An empty Detections object.

Example
from supervision import Detections\n\nempty_detections = Detections.empty()\n
Source code in supervision/detection/core.py
@classmethod\ndef empty(cls) -> Detections:\n    \"\"\"\n    Create an empty Detections object with no bounding boxes,\n        confidences, or class IDs.\n\n    Returns:\n        (Detections): An empty Detections object.\n\n    Example:\n        ```python\n        from supervision import Detections\n\n        empty_detections = Detections.empty()\n        ```\n    \"\"\"\n    return cls(\n        xyxy=np.empty((0, 4), dtype=np.float32),\n        confidence=np.array([], dtype=np.float32),\n        class_id=np.array([], dtype=int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_azure_analyze_image","title":"from_azure_analyze_image(azure_result, class_map=None) classmethod","text":"

Creates a Detections instance from Azure Image Analysis 4.0.

Parameters:

Name Type Description Default azure_result dict

The result from Azure Image Analysis. It should contain detected objects and their bounding box coordinates.

required class_map Optional[Dict[int, str]]

A mapping ofclass IDs (int) to class names (str). If None, a new mapping is created dynamically.

None

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import requests\nimport supervision as sv\n\nimage = open(input, \"rb\").read()\n\nendpoint = \"https://.cognitiveservices.azure.com/\"\nsubscription_key = \"\"\n\nheaders = {\n    \"Content-Type\": \"application/octet-stream\",\n    \"Ocp-Apim-Subscription-Key\": subscription_key\n }\n\nresponse = requests.post(endpoint,\n    headers=self.headers,\n    data=image\n ).json()\n\ndetections = sv.Detections.from_azure_analyze_image(response)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_azure_analyze_image(\n    cls, azure_result: dict, class_map: Optional[Dict[int, str]] = None\n) -> Detections:\n    \"\"\"\n    Creates a Detections instance from [Azure Image Analysis 4.0](\n    https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/\n    concept-object-detection-40).\n\n    Args:\n        azure_result (dict): The result from Azure Image Analysis. It should\n            contain detected objects and their bounding box coordinates.\n        class_map (Optional[Dict[int, str]]): A mapping ofclass IDs (int) to class\n            names (str). If None, a new mapping is created dynamically.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import requests\n        import supervision as sv\n\n        image = open(input, \"rb\").read()\n\n        endpoint = \"https://.cognitiveservices.azure.com/\"\n        subscription_key = \"\"\n\n        headers = {\n            \"Content-Type\": \"application/octet-stream\",\n            \"Ocp-Apim-Subscription-Key\": subscription_key\n         }\n\n        response = requests.post(endpoint,\n            headers=self.headers,\n            data=image\n         ).json()\n\n        detections = sv.Detections.from_azure_analyze_image(response)\n        ```\n    \"\"\"\n    if \"error\" in azure_result:\n        raise ValueError(\n            f'Azure API returned an error {azure_result[\"error\"][\"message\"]}'\n        )\n\n    xyxy, confidences, class_ids = [], [], []\n\n    is_dynamic_mapping = class_map is None\n    if is_dynamic_mapping:\n        class_map = {}\n\n    class_map = {value: key for key, value in class_map.items()}\n\n    for detection in azure_result[\"objectsResult\"][\"values\"]:\n        bbox = detection[\"boundingBox\"]\n\n        tags = detection[\"tags\"]\n\n        x0 = bbox[\"x\"]\n        y0 = bbox[\"y\"]\n        x1 = x0 + bbox[\"w\"]\n        y1 = y0 + bbox[\"h\"]\n\n        for tag in tags:\n            confidence = tag[\"confidence\"]\n            class_name = tag[\"name\"]\n            class_id = class_map.get(class_name, None)\n\n            if is_dynamic_mapping and class_id is None:\n                class_id = len(class_map)\n                class_map[class_name] = class_id\n\n            if class_id is not None:\n                xyxy.append([x0, y0, x1, y1])\n                confidences.append(confidence)\n                class_ids.append(class_id)\n\n    if len(xyxy) == 0:\n        return Detections.empty()\n\n    return cls(\n        xyxy=np.array(xyxy),\n        class_id=np.array(class_ids),\n        confidence=np.array(confidences),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_deepsparse","title":"from_deepsparse(deepsparse_results) classmethod","text":"

Creates a Detections instance from a DeepSparse inference result.

Parameters:

Name Type Description Default deepsparse_results YOLOOutput

The output Results instance from DeepSparse.

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import supervision as sv\nfrom deepsparse import Pipeline\n\nyolo_pipeline = Pipeline.create(\n    task=\"yolo\",\n    model_path = \"zoo:cv/detection/yolov5-l/pytorch/ultralytics/coco/pruned80_quant-none\"\n )\nresult = yolo_pipeline(<SOURCE IMAGE PATH>)\ndetections = sv.Detections.from_deepsparse(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_deepsparse(cls, deepsparse_results) -> Detections:\n    \"\"\"\n    Creates a Detections instance from a\n    [DeepSparse](https://github.com/neuralmagic/deepsparse)\n    inference result.\n\n    Args:\n        deepsparse_results (deepsparse.yolo.schemas.YOLOOutput):\n            The output Results instance from DeepSparse.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import supervision as sv\n        from deepsparse import Pipeline\n\n        yolo_pipeline = Pipeline.create(\n            task=\"yolo\",\n            model_path = \"zoo:cv/detection/yolov5-l/pytorch/ultralytics/coco/pruned80_quant-none\"\n         )\n        result = yolo_pipeline(<SOURCE IMAGE PATH>)\n        detections = sv.Detections.from_deepsparse(result)\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    if np.asarray(deepsparse_results.boxes[0]).shape[0] == 0:\n        return cls.empty()\n\n    return cls(\n        xyxy=np.array(deepsparse_results.boxes[0]),\n        confidence=np.array(deepsparse_results.scores[0]),\n        class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_detectron2","title":"from_detectron2(detectron2_results) classmethod","text":"

Create a Detections object from the Detectron2 inference result.

Parameters:

Name Type Description Default detectron2_results

The output of a Detectron2 model containing instances with prediction data.

required

Returns:

Type Description Detections

A Detections object containing the bounding boxes, class IDs, and confidences of the predictions.

Example
import cv2\nimport supervision as sv\nfrom detectron2.engine import DefaultPredictor\nfrom detectron2.config import get_cfg\n\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\ncfg = get_cfg()\ncfg.merge_from_file(<CONFIG_PATH>)\ncfg.MODEL.WEIGHTS = <WEIGHTS_PATH>\npredictor = DefaultPredictor(cfg)\n\nresult = predictor(image)\ndetections = sv.Detections.from_detectron2(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_detectron2(cls, detectron2_results) -> Detections:\n    \"\"\"\n    Create a Detections object from the\n    [Detectron2](https://github.com/facebookresearch/detectron2) inference result.\n\n    Args:\n        detectron2_results: The output of a\n            Detectron2 model containing instances with prediction data.\n\n    Returns:\n        (Detections): A Detections object containing the bounding boxes,\n            class IDs, and confidences of the predictions.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from detectron2.engine import DefaultPredictor\n        from detectron2.config import get_cfg\n\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        cfg = get_cfg()\n        cfg.merge_from_file(<CONFIG_PATH>)\n        cfg.MODEL.WEIGHTS = <WEIGHTS_PATH>\n        predictor = DefaultPredictor(cfg)\n\n        result = predictor(image)\n        detections = sv.Detections.from_detectron2(result)\n        ```\n    \"\"\"\n\n    return cls(\n        xyxy=detectron2_results[\"instances\"].pred_boxes.tensor.cpu().numpy(),\n        confidence=detectron2_results[\"instances\"].scores.cpu().numpy(),\n        class_id=detectron2_results[\"instances\"]\n        .pred_classes.cpu()\n        .numpy()\n        .astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_inference","title":"from_inference(roboflow_result) classmethod","text":"

Create a sv.Detections object from the Roboflow API inference result or the Inference package results. This method extracts bounding boxes, class IDs, confidences, and class names from the Roboflow API result and encapsulates them into a Detections object.

Parameters:

Name Type Description Default roboflow_result (dict, any)

The result from the Roboflow API or Inference package containing predictions.

required

Returns:

Type Description Detections

A Detections object containing the bounding boxes, class IDs, and confidences of the predictions.

Example
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = get_model(model_id=\"yolov8s-640\")\n\nresult = model.infer(image)[0]\ndetections = sv.Detections.from_inference(result)\n

Tip

Class names values can be accessed using detections[\"class_name\"].

Source code in supervision/detection/core.py
@classmethod\ndef from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections:\n    \"\"\"\n    Create a `sv.Detections` object from the [Roboflow](https://roboflow.com/)\n    API inference result or the [Inference](https://inference.roboflow.com/)\n    package results. This method extracts bounding boxes, class IDs,\n    confidences, and class names from the Roboflow API result and encapsulates\n    them into a Detections object.\n\n    Args:\n        roboflow_result (dict, any): The result from the\n            Roboflow API or Inference package containing predictions.\n\n    Returns:\n        (Detections): A Detections object containing the bounding boxes, class IDs,\n            and confidences of the predictions.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from inference import get_model\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = get_model(model_id=\"yolov8s-640\")\n\n        result = model.infer(image)[0]\n        detections = sv.Detections.from_inference(result)\n        ```\n\n    !!! tip\n\n        Class names values can be accessed using `detections[\"class_name\"]`.\n    \"\"\"\n    with suppress(AttributeError):\n        roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True)\n    xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result(\n        roboflow_result=roboflow_result\n    )\n\n    if np.asarray(xyxy).shape[0] == 0:\n        empty_detection = cls.empty()\n        empty_detection.data = {CLASS_NAME_DATA_FIELD: np.empty(0)}\n        return empty_detection\n\n    return cls(\n        xyxy=xyxy,\n        confidence=confidence,\n        class_id=class_id,\n        mask=masks,\n        tracker_id=trackers,\n        data=data,\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_lmm","title":"from_lmm(lmm, result, **kwargs) classmethod","text":"

Creates a Detections object from the given result string based on the specified Large Multimodal Model (LMM).

Parameters:

Name Type Description Default lmm Union[LMM, str]

The type of LMM (Large Multimodal Model) to use.

required result str

The result string containing the detection data.

required **kwargs

Additional keyword arguments required by the specified LMM.

{}

Returns:

Name Type Description Detections Detections

A new Detections object.

Raises:

Type Description ValueError

If the LMM is invalid, required arguments are missing, or disallowed arguments are provided.

ValueError

If the specified LMM is not supported.

Examples:

import supervision as sv\n\npaligemma_result = \"<loc0256><loc0256><loc0768><loc0768> cat\"\ndetections = sv.Detections.from_lmm(\n    sv.LMM.PALIGEMMA,\n    paligemma_result,\n    resolution_wh=(1000, 1000),\n    classes=['cat', 'dog']\n)\ndetections.xyxy\n# array([[250., 250., 750., 750.]])\n\ndetections.class_id\n# array([0])\n
Source code in supervision/detection/core.py
@classmethod\ndef from_lmm(cls, lmm: Union[LMM, str], result: str, **kwargs) -> Detections:\n    \"\"\"\n    Creates a Detections object from the given result string based on the specified\n    Large Multimodal Model (LMM).\n\n    Args:\n        lmm (Union[LMM, str]): The type of LMM (Large Multimodal Model) to use.\n        result (str): The result string containing the detection data.\n        **kwargs: Additional keyword arguments required by the specified LMM.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Raises:\n        ValueError: If the LMM is invalid, required arguments are missing, or\n            disallowed arguments are provided.\n        ValueError: If the specified LMM is not supported.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        paligemma_result = \"<loc0256><loc0256><loc0768><loc0768> cat\"\n        detections = sv.Detections.from_lmm(\n            sv.LMM.PALIGEMMA,\n            paligemma_result,\n            resolution_wh=(1000, 1000),\n            classes=['cat', 'dog']\n        )\n        detections.xyxy\n        # array([[250., 250., 750., 750.]])\n\n        detections.class_id\n        # array([0])\n        ```\n    \"\"\"\n    lmm = validate_lmm_and_kwargs(lmm, kwargs)\n\n    if lmm == LMM.PALIGEMMA:\n        xyxy, class_id, class_name = from_paligemma(result, **kwargs)\n        data = {CLASS_NAME_DATA_FIELD: class_name}\n        return cls(xyxy=xyxy, class_id=class_id, data=data)\n\n    raise ValueError(f\"Unsupported LMM: {lmm}\")\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_mmdetection","title":"from_mmdetection(mmdet_results) classmethod","text":"

Creates a Detections instance from a mmdetection and mmyolo inference result.

Parameters:

Name Type Description Default mmdet_results DetDataSample

The output Results instance from MMDetection.

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import cv2\nimport supervision as sv\nfrom mmdet.apis import init_detector, inference_detector\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = init_detector(<CONFIG_PATH>, <WEIGHTS_PATH>, device=<DEVICE>)\n\nresult = inference_detector(model, image)\ndetections = sv.Detections.from_mmdetection(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_mmdetection(cls, mmdet_results) -> Detections:\n    \"\"\"\n    Creates a Detections instance from a\n    [mmdetection](https://github.com/open-mmlab/mmdetection) and\n    [mmyolo](https://github.com/open-mmlab/mmyolo) inference result.\n\n    Args:\n        mmdet_results (mmdet.structures.DetDataSample):\n            The output Results instance from MMDetection.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from mmdet.apis import init_detector, inference_detector\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = init_detector(<CONFIG_PATH>, <WEIGHTS_PATH>, device=<DEVICE>)\n\n        result = inference_detector(model, image)\n        detections = sv.Detections.from_mmdetection(result)\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    return cls(\n        xyxy=mmdet_results.pred_instances.bboxes.cpu().numpy(),\n        confidence=mmdet_results.pred_instances.scores.cpu().numpy(),\n        class_id=mmdet_results.pred_instances.labels.cpu().numpy().astype(int),\n        mask=mmdet_results.pred_instances.masks.cpu().numpy()\n        if \"masks\" in mmdet_results.pred_instances\n        else None,\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_paddledet","title":"from_paddledet(paddledet_result) classmethod","text":"

Creates a Detections instance from PaddleDetection inference result.

Parameters:

Name Type Description Default paddledet_result List[dict]

The output Results instance from PaddleDet

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import supervision as sv\nimport paddle\nfrom ppdet.engine import Trainer\nfrom ppdet.core.workspace import load_config\n\nweights = ()\nconfig = ()\n\ncfg = load_config(config)\ntrainer = Trainer(cfg, mode='test')\ntrainer.load_weights(weights)\n\npaddledet_result = trainer.predict([images])[0]\n\ndetections = sv.Detections.from_paddledet(paddledet_result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_paddledet(cls, paddledet_result) -> Detections:\n    \"\"\"\n    Creates a Detections instance from\n        [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)\n        inference result.\n\n    Args:\n        paddledet_result (List[dict]): The output Results instance from PaddleDet\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import supervision as sv\n        import paddle\n        from ppdet.engine import Trainer\n        from ppdet.core.workspace import load_config\n\n        weights = ()\n        config = ()\n\n        cfg = load_config(config)\n        trainer = Trainer(cfg, mode='test')\n        trainer.load_weights(weights)\n\n        paddledet_result = trainer.predict([images])[0]\n\n        detections = sv.Detections.from_paddledet(paddledet_result)\n        ```\n    \"\"\"\n\n    if np.asarray(paddledet_result[\"bbox\"][:, 2:6]).shape[0] == 0:\n        return cls.empty()\n\n    return cls(\n        xyxy=paddledet_result[\"bbox\"][:, 2:6],\n        confidence=paddledet_result[\"bbox\"][:, 1],\n        class_id=paddledet_result[\"bbox\"][:, 0].astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_roboflow","title":"from_roboflow(roboflow_result) classmethod","text":"

Deprecated

Detections.from_roboflow is deprecated and will be removed in supervision-0.22.0. Use Detections.from_inference instead.

Create a Detections object from the Roboflow API inference result or the Inference package results.

Parameters:

Name Type Description Default roboflow_result dict

The result from the Roboflow API containing predictions.

required

Returns:

Type Description Detections

A Detections object containing the bounding boxes, class IDs, and confidences of the predictions.

Example
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = get_model(model_id=\"yolov8s-640\")\n\nresult = model.infer(image)[0]\ndetections = sv.Detections.from_roboflow(result)\n
Source code in supervision/detection/core.py
@classmethod\n@deprecated(\n    \"`Detections.from_roboflow` is deprecated and will be removed in \"\n    \"`supervision-0.22.0`. Use `Detections.from_inference` instead.\"\n)\ndef from_roboflow(cls, roboflow_result: Union[dict, Any]) -> Detections:\n    \"\"\"\n    !!! failure \"Deprecated\"\n\n        `Detections.from_roboflow` is deprecated and will be removed in\n        `supervision-0.22.0`. Use `Detections.from_inference` instead.\n\n    Create a Detections object from the [Roboflow](https://roboflow.com/)\n        API inference result or the [Inference](https://inference.roboflow.com/)\n        package results.\n\n    Args:\n        roboflow_result (dict): The result from the\n            Roboflow API containing predictions.\n\n    Returns:\n        (Detections): A Detections object containing the bounding boxes, class IDs,\n            and confidences of the predictions.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from inference import get_model\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = get_model(model_id=\"yolov8s-640\")\n\n        result = model.infer(image)[0]\n        detections = sv.Detections.from_roboflow(result)\n        ```\n    \"\"\"\n    return cls.from_inference(roboflow_result)\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_sam","title":"from_sam(sam_result) classmethod","text":"

Creates a Detections instance from Segment Anything Model inference result.

Parameters:

Name Type Description Default sam_result List[dict]

The output Results instance from SAM

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import supervision as sv\nfrom segment_anything import (\n    sam_model_registry,\n    SamAutomaticMaskGenerator\n )\n\nsam_model_reg = sam_model_registry[MODEL_TYPE]\nsam = sam_model_reg(checkpoint=CHECKPOINT_PATH).to(device=DEVICE)\nmask_generator = SamAutomaticMaskGenerator(sam)\nsam_result = mask_generator.generate(IMAGE)\ndetections = sv.Detections.from_sam(sam_result=sam_result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_sam(cls, sam_result: List[dict]) -> Detections:\n    \"\"\"\n    Creates a Detections instance from\n    [Segment Anything Model](https://github.com/facebookresearch/segment-anything)\n    inference result.\n\n    Args:\n        sam_result (List[dict]): The output Results instance from SAM\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import supervision as sv\n        from segment_anything import (\n            sam_model_registry,\n            SamAutomaticMaskGenerator\n         )\n\n        sam_model_reg = sam_model_registry[MODEL_TYPE]\n        sam = sam_model_reg(checkpoint=CHECKPOINT_PATH).to(device=DEVICE)\n        mask_generator = SamAutomaticMaskGenerator(sam)\n        sam_result = mask_generator.generate(IMAGE)\n        detections = sv.Detections.from_sam(sam_result=sam_result)\n        ```\n    \"\"\"\n\n    sorted_generated_masks = sorted(\n        sam_result, key=lambda x: x[\"area\"], reverse=True\n    )\n\n    xywh = np.array([mask[\"bbox\"] for mask in sorted_generated_masks])\n    mask = np.array([mask[\"segmentation\"] for mask in sorted_generated_masks])\n\n    if np.asarray(xywh).shape[0] == 0:\n        return cls.empty()\n\n    xyxy = xywh_to_xyxy(boxes_xywh=xywh)\n    return cls(xyxy=xyxy, mask=mask)\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_tensorflow","title":"from_tensorflow(tensorflow_results, resolution_wh) classmethod","text":"

Creates a Detections instance from a Tensorflow Hub inference result.

Parameters:

Name Type Description Default tensorflow_results dict

The output results from Tensorflow Hub.

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import tensorflow as tf\nimport tensorflow_hub as hub\nimport numpy as np\nimport cv2\n\nmodule_handle = \"https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1\"\nmodel = hub.load(module_handle)\nimg = np.array(cv2.imread(SOURCE_IMAGE_PATH))\nresult = model(img)\ndetections = sv.Detections.from_tensorflow(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_tensorflow(\n    cls, tensorflow_results: dict, resolution_wh: tuple\n) -> Detections:\n    \"\"\"\n    Creates a Detections instance from a\n    [Tensorflow Hub](https://www.tensorflow.org/hub/tutorials/tf2_object_detection)\n    inference result.\n\n    Args:\n        tensorflow_results (dict):\n            The output results from Tensorflow Hub.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import tensorflow as tf\n        import tensorflow_hub as hub\n        import numpy as np\n        import cv2\n\n        module_handle = \"https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1\"\n        model = hub.load(module_handle)\n        img = np.array(cv2.imread(SOURCE_IMAGE_PATH))\n        result = model(img)\n        detections = sv.Detections.from_tensorflow(result)\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    boxes = tensorflow_results[\"detection_boxes\"][0].numpy()\n    boxes[:, [0, 2]] *= resolution_wh[0]\n    boxes[:, [1, 3]] *= resolution_wh[1]\n    boxes = boxes[:, [1, 0, 3, 2]]\n    return cls(\n        xyxy=boxes,\n        confidence=tensorflow_results[\"detection_scores\"][0].numpy(),\n        class_id=tensorflow_results[\"detection_classes\"][0].numpy().astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_transformers","title":"from_transformers(transformers_results, id2label=None) classmethod","text":"

Creates a Detections instance from object detection or segmentation Transformer inference result.

Parameters:

Name Type Description Default transformers_results dict

The output of Transformers model inference. A dictionary containing the scores, labels, boxes and masks keys.

required id2label Optional[Dict[int, str]]

A dictionary mapping class IDs to class names. If provided, the resulting Detections object will contain class_name data field with the class names.

None

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\n\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label\n)\n

Tip

Class names values can be accessed using detections[\"class_name\"].

Source code in supervision/detection/core.py
@classmethod\ndef from_transformers(\n    cls, transformers_results: dict, id2label: Optional[Dict[int, str]] = None\n) -> Detections:\n    \"\"\"\n    Creates a Detections instance from object detection or segmentation\n    [Transformer](https://github.com/huggingface/transformers) inference result.\n\n    Args:\n        transformers_results (dict): The output of Transformers model inference. A\n            dictionary containing the `scores`, `labels`, `boxes` and `masks` keys.\n        id2label (Optional[Dict[int, str]]): A dictionary mapping class IDs to\n            class names. If provided, the resulting Detections object will contain\n            `class_name` data field with the class names.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import torch\n        import supervision as sv\n        from PIL import Image\n        from transformers import DetrImageProcessor, DetrForObjectDetection\n\n        processor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\n        model = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        inputs = processor(images=image, return_tensors=\"pt\")\n\n        with torch.no_grad():\n            outputs = model(**inputs)\n\n        width, height = image.size\n        target_size = torch.tensor([[height, width]])\n        results = processor.post_process_object_detection(\n            outputs=outputs, target_sizes=target_size)[0]\n\n        detections = sv.Detections.from_transformers(\n            transformers_results=results,\n            id2label=model.config.id2label\n        )\n        ```\n\n    !!! tip\n\n        Class names values can be accessed using `detections[\"class_name\"]`.\n    \"\"\"  # noqa: E501 // docs\n\n    class_ids = transformers_results[\"labels\"].cpu().detach().numpy().astype(int)\n    data = {}\n    if id2label is not None:\n        class_names = np.array([id2label[class_id] for class_id in class_ids])\n        data[CLASS_NAME_DATA_FIELD] = class_names\n    if \"boxes\" in transformers_results:\n        return cls(\n            xyxy=transformers_results[\"boxes\"].cpu().detach().numpy(),\n            confidence=transformers_results[\"scores\"].cpu().detach().numpy(),\n            class_id=class_ids,\n            data=data,\n        )\n    elif \"masks\" in transformers_results:\n        masks = transformers_results[\"masks\"].cpu().detach().numpy().astype(bool)\n        return cls(\n            xyxy=mask_to_xyxy(masks),\n            mask=masks,\n            confidence=transformers_results[\"scores\"].cpu().detach().numpy(),\n            class_id=class_ids,\n            data=data,\n        )\n    else:\n        raise NotImplementedError(\n            \"Only object detection and semantic segmentation results are supported.\"\n        )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_ultralytics","title":"from_ultralytics(ultralytics_results) classmethod","text":"

Creates a sv.Detections instance from a YOLOv8 inference result.

Note

from_ultralytics is compatible with detection, segmentation, and OBB models.

Parameters:

Name Type Description Default ultralytics_results Results

The output Results instance from Ultralytics

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8s.pt')\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n

Tip

Class names values can be accessed using detections[\"class_name\"].

Source code in supervision/detection/core.py
@classmethod\ndef from_ultralytics(cls, ultralytics_results) -> Detections:\n    \"\"\"\n    Creates a `sv.Detections` instance from a\n    [YOLOv8](https://github.com/ultralytics/ultralytics) inference result.\n\n    !!! Note\n\n        `from_ultralytics` is compatible with\n        [detection](https://docs.ultralytics.com/tasks/detect/),\n        [segmentation](https://docs.ultralytics.com/tasks/segment/), and\n        [OBB](https://docs.ultralytics.com/tasks/obb/) models.\n\n    Args:\n        ultralytics_results (ultralytics.yolo.engine.results.Results):\n            The output Results instance from Ultralytics\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO('yolov8s.pt')\n        results = model(image)[0]\n        detections = sv.Detections.from_ultralytics(results)\n        ```\n\n    !!! tip\n\n        Class names values can be accessed using `detections[\"class_name\"]`.\n    \"\"\"  # noqa: E501 // docs\n\n    if hasattr(ultralytics_results, \"obb\") and ultralytics_results.obb is not None:\n        class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int)\n        class_names = np.array([ultralytics_results.names[i] for i in class_id])\n        oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy()\n        return cls(\n            xyxy=ultralytics_results.obb.xyxy.cpu().numpy(),\n            confidence=ultralytics_results.obb.conf.cpu().numpy(),\n            class_id=class_id,\n            tracker_id=ultralytics_results.obb.id.int().cpu().numpy()\n            if ultralytics_results.obb.id is not None\n            else None,\n            data={\n                ORIENTED_BOX_COORDINATES: oriented_box_coordinates,\n                CLASS_NAME_DATA_FIELD: class_names,\n            },\n        )\n\n    class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)\n    class_names = np.array([ultralytics_results.names[i] for i in class_id])\n    return cls(\n        xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),\n        confidence=ultralytics_results.boxes.conf.cpu().numpy(),\n        class_id=class_id,\n        mask=extract_ultralytics_masks(ultralytics_results),\n        tracker_id=ultralytics_results.boxes.id.int().cpu().numpy()\n        if ultralytics_results.boxes.id is not None\n        else None,\n        data={CLASS_NAME_DATA_FIELD: class_names},\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_yolo_nas","title":"from_yolo_nas(yolo_nas_results) classmethod","text":"

Creates a Detections instance from a YOLO-NAS inference result.

Parameters:

Name Type Description Default yolo_nas_results ImageDetectionPrediction

The output Results instance from YOLO-NAS ImageDetectionPrediction is coming from 'super_gradients.training.models.prediction_results'

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import cv2\nfrom super_gradients.training import models\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = models.get('yolo_nas_l', pretrained_weights=\"coco\")\n\nresult = list(model.predict(image, conf=0.35))[0]\ndetections = sv.Detections.from_yolo_nas(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_yolo_nas(cls, yolo_nas_results) -> Detections:\n    \"\"\"\n    Creates a Detections instance from a\n    [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS.md)\n    inference result.\n\n    Args:\n        yolo_nas_results (ImageDetectionPrediction):\n            The output Results instance from YOLO-NAS\n            ImageDetectionPrediction is coming from\n            'super_gradients.training.models.prediction_results'\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import cv2\n        from super_gradients.training import models\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = models.get('yolo_nas_l', pretrained_weights=\"coco\")\n\n        result = list(model.predict(image, conf=0.35))[0]\n        detections = sv.Detections.from_yolo_nas(result)\n        ```\n    \"\"\"\n    if np.asarray(yolo_nas_results.prediction.bboxes_xyxy).shape[0] == 0:\n        return cls.empty()\n\n    return cls(\n        xyxy=yolo_nas_results.prediction.bboxes_xyxy,\n        confidence=yolo_nas_results.prediction.confidence,\n        class_id=yolo_nas_results.prediction.labels.astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_yolov5","title":"from_yolov5(yolov5_results) classmethod","text":"

Creates a Detections instance from a YOLOv5 inference result.

Parameters:

Name Type Description Default yolov5_results Detections

The output Detections instance from YOLOv5

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import cv2\nimport torch\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = torch.hub.load('ultralytics/yolov5', 'yolov5s')\nresult = model(image)\ndetections = sv.Detections.from_yolov5(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_yolov5(cls, yolov5_results) -> Detections:\n    \"\"\"\n    Creates a Detections instance from a\n    [YOLOv5](https://github.com/ultralytics/yolov5) inference result.\n\n    Args:\n        yolov5_results (yolov5.models.common.Detections):\n            The output Detections instance from YOLOv5\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import cv2\n        import torch\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = torch.hub.load('ultralytics/yolov5', 'yolov5s')\n        result = model(image)\n        detections = sv.Detections.from_yolov5(result)\n        ```\n    \"\"\"\n    yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy()\n\n    return cls(\n        xyxy=yolov5_detections_predictions[:, :4],\n        confidence=yolov5_detections_predictions[:, 4],\n        class_id=yolov5_detections_predictions[:, 5].astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.get_anchors_coordinates","title":"get_anchors_coordinates(anchor)","text":"

Calculates and returns the coordinates of a specific anchor point within the bounding boxes defined by the xyxy attribute. The anchor point can be any of the predefined positions in the Position enum, such as CENTER, CENTER_LEFT, BOTTOM_RIGHT, etc.

Parameters:

Name Type Description Default anchor Position

An enum specifying the position of the anchor point within the bounding box. Supported positions are defined in the Position enum.

required

Returns:

Type Description ndarray

np.ndarray: An array of shape (n, 2), where n is the number of bounding boxes. Each row contains the [x, y] coordinates of the specified anchor point for the corresponding bounding box.

Raises:

Type Description ValueError

If the provided anchor is not supported.

Source code in supervision/detection/core.py
def get_anchors_coordinates(self, anchor: Position) -> np.ndarray:\n    \"\"\"\n    Calculates and returns the coordinates of a specific anchor point\n    within the bounding boxes defined by the `xyxy` attribute. The anchor\n    point can be any of the predefined positions in the `Position` enum,\n    such as `CENTER`, `CENTER_LEFT`, `BOTTOM_RIGHT`, etc.\n\n    Args:\n        anchor (Position): An enum specifying the position of the anchor point\n            within the bounding box. Supported positions are defined in the\n            `Position` enum.\n\n    Returns:\n        np.ndarray: An array of shape `(n, 2)`, where `n` is the number of bounding\n            boxes. Each row contains the `[x, y]` coordinates of the specified\n            anchor point for the corresponding bounding box.\n\n    Raises:\n        ValueError: If the provided `anchor` is not supported.\n    \"\"\"\n    if anchor == Position.CENTER:\n        return np.array(\n            [\n                (self.xyxy[:, 0] + self.xyxy[:, 2]) / 2,\n                (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n            ]\n        ).transpose()\n    elif anchor == Position.CENTER_OF_MASS:\n        if self.mask is None:\n            raise ValueError(\n                \"Cannot use `Position.CENTER_OF_MASS` without a detection mask.\"\n            )\n        return calculate_masks_centroids(masks=self.mask)\n    elif anchor == Position.CENTER_LEFT:\n        return np.array(\n            [\n                self.xyxy[:, 0],\n                (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n            ]\n        ).transpose()\n    elif anchor == Position.CENTER_RIGHT:\n        return np.array(\n            [\n                self.xyxy[:, 2],\n                (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n            ]\n        ).transpose()\n    elif anchor == Position.BOTTOM_CENTER:\n        return np.array(\n            [(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 3]]\n        ).transpose()\n    elif anchor == Position.BOTTOM_LEFT:\n        return np.array([self.xyxy[:, 0], self.xyxy[:, 3]]).transpose()\n    elif anchor == Position.BOTTOM_RIGHT:\n        return np.array([self.xyxy[:, 2], self.xyxy[:, 3]]).transpose()\n    elif anchor == Position.TOP_CENTER:\n        return np.array(\n            [(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 1]]\n        ).transpose()\n    elif anchor == Position.TOP_LEFT:\n        return np.array([self.xyxy[:, 0], self.xyxy[:, 1]]).transpose()\n    elif anchor == Position.TOP_RIGHT:\n        return np.array([self.xyxy[:, 2], self.xyxy[:, 1]]).transpose()\n\n    raise ValueError(f\"{anchor} is not supported.\")\n
"},{"location":"detection/core/#supervision.detection.core.Detections.is_empty","title":"is_empty()","text":"

Returns True if the Detections object is considered empty.

Source code in supervision/detection/core.py
def is_empty(self) -> bool:\n    \"\"\"\n    Returns `True` if the `Detections` object is considered empty.\n    \"\"\"\n    empty_detections = Detections.empty()\n    empty_detections.data = self.data\n    return self == empty_detections\n
"},{"location":"detection/core/#supervision.detection.core.Detections.merge","title":"merge(detections_list) classmethod","text":"

Merge a list of Detections objects into a single Detections object.

This method takes a list of Detections objects and combines their respective fields (xyxy, mask, confidence, class_id, and tracker_id) into a single Detections object.

For example, if merging Detections with 3 and 4 detected objects, this method will return a Detections with 7 objects (7 entries in xyxy, mask, etc).

Note

When merging, empty Detections objects are ignored.

Parameters:

Name Type Description Default detections_list List[Detections]

A list of Detections objects to merge.

required

Returns:

Type Description Detections

A single Detections object containing the merged data from the input list.

Example
import numpy as np\nimport supervision as sv\n\ndetections_1 = sv.Detections(\n    xyxy=np.array([[15, 15, 100, 100], [200, 200, 300, 300]]),\n    class_id=np.array([1, 2]),\n    data={'feature_vector': np.array([0.1, 0.2)])}\n )\n\ndetections_2 = sv.Detections(\n    xyxy=np.array([[30, 30, 120, 120]]),\n    class_id=np.array([1]),\n    data={'feature_vector': [np.array([0.3])]}\n )\n\nmerged_detections = Detections.merge([detections_1, detections_2])\n\nmerged_detections.xyxy\narray([[ 15,  15, 100, 100],\n       [200, 200, 300, 300],\n       [ 30,  30, 120, 120]])\n\nmerged_detections.class_id\narray([1, 2, 1])\n\nmerged_detections.data['feature_vector']\narray([0.1, 0.2, 0.3])\n
Source code in supervision/detection/core.py
@classmethod\ndef merge(cls, detections_list: List[Detections]) -> Detections:\n    \"\"\"\n    Merge a list of Detections objects into a single Detections object.\n\n    This method takes a list of Detections objects and combines their\n    respective fields (`xyxy`, `mask`, `confidence`, `class_id`, and `tracker_id`)\n    into a single Detections object.\n\n    For example, if merging Detections with 3 and 4 detected objects, this method\n    will return a Detections with 7 objects (7 entries in `xyxy`, `mask`, etc).\n\n    !!! Note\n\n        When merging, empty `Detections` objects are ignored.\n\n    Args:\n        detections_list (List[Detections]): A list of Detections objects to merge.\n\n    Returns:\n        (Detections): A single Detections object containing\n            the merged data from the input list.\n\n    Example:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        detections_1 = sv.Detections(\n            xyxy=np.array([[15, 15, 100, 100], [200, 200, 300, 300]]),\n            class_id=np.array([1, 2]),\n            data={'feature_vector': np.array([0.1, 0.2)])}\n         )\n\n        detections_2 = sv.Detections(\n            xyxy=np.array([[30, 30, 120, 120]]),\n            class_id=np.array([1]),\n            data={'feature_vector': [np.array([0.3])]}\n         )\n\n        merged_detections = Detections.merge([detections_1, detections_2])\n\n        merged_detections.xyxy\n        array([[ 15,  15, 100, 100],\n               [200, 200, 300, 300],\n               [ 30,  30, 120, 120]])\n\n        merged_detections.class_id\n        array([1, 2, 1])\n\n        merged_detections.data['feature_vector']\n        array([0.1, 0.2, 0.3])\n        ```\n    \"\"\"\n    detections_list = [\n        detections for detections in detections_list if not detections.is_empty()\n    ]\n\n    if len(detections_list) == 0:\n        return Detections.empty()\n\n    for detections in detections_list:\n        validate_detections_fields(\n            xyxy=detections.xyxy,\n            mask=detections.mask,\n            confidence=detections.confidence,\n            class_id=detections.class_id,\n            tracker_id=detections.tracker_id,\n            data=detections.data,\n        )\n\n    xyxy = np.vstack([d.xyxy for d in detections_list])\n\n    def stack_or_none(name: str):\n        if all(d.__getattribute__(name) is None for d in detections_list):\n            return None\n        if any(d.__getattribute__(name) is None for d in detections_list):\n            raise ValueError(f\"All or none of the '{name}' fields must be None\")\n        return (\n            np.vstack([d.__getattribute__(name) for d in detections_list])\n            if name == \"mask\"\n            else np.hstack([d.__getattribute__(name) for d in detections_list])\n        )\n\n    mask = stack_or_none(\"mask\")\n    confidence = stack_or_none(\"confidence\")\n    class_id = stack_or_none(\"class_id\")\n    tracker_id = stack_or_none(\"tracker_id\")\n\n    data = merge_data([d.data for d in detections_list])\n\n    return cls(\n        xyxy=xyxy,\n        mask=mask,\n        confidence=confidence,\n        class_id=class_id,\n        tracker_id=tracker_id,\n        data=data,\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.with_nmm","title":"with_nmm(threshold=0.5, class_agnostic=False)","text":"

Perform non-maximum merging on the current set of object detections.

Parameters:

Name Type Description Default threshold float

The intersection-over-union threshold to use for non-maximum merging. Defaults to 0.5.

0.5 class_agnostic bool

Whether to perform class-agnostic non-maximum merging. If True, the class_id of each detection will be ignored. Defaults to False.

False

Returns:

Name Type Description Detections Detections

A new Detections object containing the subset of detections after non-maximum merging.

Raises:

Type Description AssertionError

If confidence is None or class_id is None and class_agnostic is False.

Source code in supervision/detection/core.py
def with_nmm(\n    self, threshold: float = 0.5, class_agnostic: bool = False\n) -> Detections:\n    \"\"\"\n    Perform non-maximum merging on the current set of object detections.\n\n    Args:\n        threshold (float, optional): The intersection-over-union threshold\n            to use for non-maximum merging. Defaults to 0.5.\n        class_agnostic (bool, optional): Whether to perform class-agnostic\n            non-maximum merging. If True, the class_id of each detection\n            will be ignored. Defaults to False.\n\n    Returns:\n        Detections: A new Detections object containing the subset of detections\n            after non-maximum merging.\n\n    Raises:\n        AssertionError: If `confidence` is None or `class_id` is None and\n            class_agnostic is False.\n\n    ![non-max-merging](https://media.roboflow.com/supervision-docs/non-max-merging.png){ align=center width=\"800\" }\n    \"\"\"  # noqa: E501 // docs\n    if len(self) == 0:\n        return self\n\n    assert (\n        self.confidence is not None\n    ), \"Detections confidence must be given for NMM to be executed.\"\n\n    if class_agnostic:\n        predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))\n    else:\n        assert self.class_id is not None, (\n            \"Detections class_id must be given for NMM to be executed. If you\"\n            \" intended to perform class agnostic NMM set class_agnostic=True.\"\n        )\n        predictions = np.hstack(\n            (\n                self.xyxy,\n                self.confidence.reshape(-1, 1),\n                self.class_id.reshape(-1, 1),\n            )\n        )\n\n    merge_groups = box_non_max_merge(\n        predictions=predictions, iou_threshold=threshold\n    )\n\n    result = []\n    for merge_group in merge_groups:\n        unmerged_detections = [self[i] for i in merge_group]\n        merged_detections = merge_inner_detections_objects(\n            unmerged_detections, threshold\n        )\n        result.append(merged_detections)\n\n    return Detections.merge(result)\n
"},{"location":"detection/core/#supervision.detection.core.Detections.with_nms","title":"with_nms(threshold=0.5, class_agnostic=False)","text":"

Performs non-max suppression on detection set. If the detections result from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used.

Parameters:

Name Type Description Default threshold float

The intersection-over-union threshold to use for non-maximum suppression. I'm the lower the value the more restrictive the NMS becomes. Defaults to 0.5.

0.5 class_agnostic bool

Whether to perform class-agnostic non-maximum suppression. If True, the class_id of each detection will be ignored. Defaults to False.

False

Returns:

Name Type Description Detections Detections

A new Detections object containing the subset of detections after non-maximum suppression.

Raises:

Type Description AssertionError

If confidence is None and class_agnostic is False. If class_id is None and class_agnostic is False.

Source code in supervision/detection/core.py
def with_nms(\n    self, threshold: float = 0.5, class_agnostic: bool = False\n) -> Detections:\n    \"\"\"\n    Performs non-max suppression on detection set. If the detections result\n    from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used.\n\n    Args:\n        threshold (float, optional): The intersection-over-union threshold\n            to use for non-maximum suppression. I'm the lower the value the more\n            restrictive the NMS becomes. Defaults to 0.5.\n        class_agnostic (bool, optional): Whether to perform class-agnostic\n            non-maximum suppression. If True, the class_id of each detection\n            will be ignored. Defaults to False.\n\n    Returns:\n        Detections: A new Detections object containing the subset of detections\n            after non-maximum suppression.\n\n    Raises:\n        AssertionError: If `confidence` is None and class_agnostic is False.\n            If `class_id` is None and class_agnostic is False.\n    \"\"\"\n    if len(self) == 0:\n        return self\n\n    assert (\n        self.confidence is not None\n    ), \"Detections confidence must be given for NMS to be executed.\"\n\n    if class_agnostic:\n        predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))\n    else:\n        assert self.class_id is not None, (\n            \"Detections class_id must be given for NMS to be executed. If you\"\n            \" intended to perform class agnostic NMS set class_agnostic=True.\"\n        )\n        predictions = np.hstack(\n            (\n                self.xyxy,\n                self.confidence.reshape(-1, 1),\n                self.class_id.reshape(-1, 1),\n            )\n        )\n\n    if self.mask is not None:\n        indices = mask_non_max_suppression(\n            predictions=predictions, masks=self.mask, iou_threshold=threshold\n        )\n    else:\n        indices = box_non_max_suppression(\n            predictions=predictions, iou_threshold=threshold\n        )\n\n    return self[indices]\n
"},{"location":"detection/double_detection_filter/","title":"Double Detection Filter","text":"OverlapFilter

Bases: Enum

Enum specifying the strategy for filtering overlapping detections.

Attributes:

Name Type Description NONE

Do not filter detections based on overlap.

NON_MAX_SUPPRESSION

Filter detections using non-max suppression. This means, detections that overlap by more than a set threshold will be discarded, except for the one with the highest confidence.

NON_MAX_MERGE

Merge detections with non-max merging. This means, detections that overlap by more than a set threshold will be merged into a single detection.

Source code in supervision/detection/overlap_filter.py
class OverlapFilter(Enum):\n    \"\"\"\n    Enum specifying the strategy for filtering overlapping detections.\n\n    Attributes:\n        NONE: Do not filter detections based on overlap.\n        NON_MAX_SUPPRESSION: Filter detections using non-max suppression. This means,\n            detections that overlap by more than a set threshold will be discarded,\n            except for the one with the highest confidence.\n        NON_MAX_MERGE: Merge detections with non-max merging. This means,\n            detections that overlap by more than a set threshold will be merged\n            into a single detection.\n    \"\"\"\n\n    NONE = \"none\"\n    NON_MAX_SUPPRESSION = \"non_max_suppression\"\n    NON_MAX_MERGE = \"non_max_merge\"\n
box_non_max_suppression

Perform Non-Maximum Suppression (NMS) on object detection predictions.

Parameters:

Name Type Description Default predictions ndarray

An array of object detection predictions in the format of (x_min, y_min, x_max, y_max, score) or (x_min, y_min, x_max, y_max, score, class).

required iou_threshold float

The intersection-over-union threshold to use for non-maximum suppression.

0.5

Returns:

Type Description ndarray

np.ndarray: A boolean array indicating which predictions to keep after n on-maximum suppression.

Raises:

Type Description AssertionError

If iou_threshold is not within the closed range from 0 to 1.

Source code in supervision/detection/overlap_filter.py
def box_non_max_suppression(\n    predictions: np.ndarray, iou_threshold: float = 0.5\n) -> np.ndarray:\n    \"\"\"\n    Perform Non-Maximum Suppression (NMS) on object detection predictions.\n\n    Args:\n        predictions (np.ndarray): An array of object detection predictions in\n            the format of `(x_min, y_min, x_max, y_max, score)`\n            or `(x_min, y_min, x_max, y_max, score, class)`.\n        iou_threshold (float, optional): The intersection-over-union threshold\n            to use for non-maximum suppression.\n\n    Returns:\n        np.ndarray: A boolean array indicating which predictions to keep after n\n            on-maximum suppression.\n\n    Raises:\n        AssertionError: If `iou_threshold` is not within the\n            closed range from `0` to `1`.\n    \"\"\"\n    assert 0 <= iou_threshold <= 1, (\n        \"Value of `iou_threshold` must be in the closed range from 0 to 1, \"\n        f\"{iou_threshold} given.\"\n    )\n    rows, columns = predictions.shape\n\n    # add column #5 - category filled with zeros for agnostic nms\n    if columns == 5:\n        predictions = np.c_[predictions, np.zeros(rows)]\n\n    # sort predictions column #4 - score\n    sort_index = np.flip(predictions[:, 4].argsort())\n    predictions = predictions[sort_index]\n\n    boxes = predictions[:, :4]\n    categories = predictions[:, 5]\n    ious = box_iou_batch(boxes, boxes)\n    ious = ious - np.eye(rows)\n\n    keep = np.ones(rows, dtype=bool)\n\n    for index, (iou, category) in enumerate(zip(ious, categories)):\n        if not keep[index]:\n            continue\n\n        # drop detections with iou > iou_threshold and\n        # same category as current detections\n        condition = (iou > iou_threshold) & (categories == category)\n        keep = keep & ~condition\n\n    return keep[sort_index.argsort()]\n
mask_non_max_suppression

Perform Non-Maximum Suppression (NMS) on segmentation predictions.

Parameters:

Name Type Description Default predictions ndarray

A 2D array of object detection predictions in the format of (x_min, y_min, x_max, y_max, score) or (x_min, y_min, x_max, y_max, score, class). Shape: (N, 5) or (N, 6), where N is the number of predictions.

required masks ndarray

A 3D array of binary masks corresponding to the predictions. Shape: (N, H, W), where N is the number of predictions, and H, W are the dimensions of each mask.

required iou_threshold float

The intersection-over-union threshold to use for non-maximum suppression.

0.5 mask_dimension int

The dimension to which the masks should be resized before computing IOU values. Defaults to 640.

640

Returns:

Type Description ndarray

np.ndarray: A boolean array indicating which predictions to keep after non-maximum suppression.

Raises:

Type Description AssertionError

If iou_threshold is not within the closed

Source code in supervision/detection/overlap_filter.py
def mask_non_max_suppression(\n    predictions: np.ndarray,\n    masks: np.ndarray,\n    iou_threshold: float = 0.5,\n    mask_dimension: int = 640,\n) -> np.ndarray:\n    \"\"\"\n    Perform Non-Maximum Suppression (NMS) on segmentation predictions.\n\n    Args:\n        predictions (np.ndarray): A 2D array of object detection predictions in\n            the format of `(x_min, y_min, x_max, y_max, score)`\n            or `(x_min, y_min, x_max, y_max, score, class)`. Shape: `(N, 5)` or\n            `(N, 6)`, where N is the number of predictions.\n        masks (np.ndarray): A 3D array of binary masks corresponding to the predictions.\n            Shape: `(N, H, W)`, where N is the number of predictions, and H, W are the\n            dimensions of each mask.\n        iou_threshold (float, optional): The intersection-over-union threshold\n            to use for non-maximum suppression.\n        mask_dimension (int, optional): The dimension to which the masks should be\n            resized before computing IOU values. Defaults to 640.\n\n    Returns:\n        np.ndarray: A boolean array indicating which predictions to keep after\n            non-maximum suppression.\n\n    Raises:\n        AssertionError: If `iou_threshold` is not within the closed\n        range from `0` to `1`.\n    \"\"\"\n    assert 0 <= iou_threshold <= 1, (\n        \"Value of `iou_threshold` must be in the closed range from 0 to 1, \"\n        f\"{iou_threshold} given.\"\n    )\n    rows, columns = predictions.shape\n\n    if columns == 5:\n        predictions = np.c_[predictions, np.zeros(rows)]\n\n    sort_index = predictions[:, 4].argsort()[::-1]\n    predictions = predictions[sort_index]\n    masks = masks[sort_index]\n    masks_resized = resize_masks(masks, mask_dimension)\n    ious = mask_iou_batch(masks_resized, masks_resized)\n    categories = predictions[:, 5]\n\n    keep = np.ones(rows, dtype=bool)\n    for i in range(rows):\n        if keep[i]:\n            condition = (ious[i] > iou_threshold) & (categories[i] == categories)\n            keep[i + 1 :] = np.where(condition[i + 1 :], False, keep[i + 1 :])\n\n    return keep[sort_index.argsort()]\n
box_non_max_merge

Apply greedy version of non-maximum merging per category to avoid detecting too many overlapping bounding boxes for a given object.

Parameters:

Name Type Description Default predictions NDArray[float64]

An array of shape (n, 5) or (n, 6) containing the bounding boxes coordinates in format [x1, y1, x2, y2], the confidence scores and class_ids. Omit class_id column to allow detections of different classes to be merged.

required iou_threshold float

The intersection-over-union threshold to use for non-maximum suppression. Defaults to 0.5.

0.5

Returns:

Type Description List[List[int]]

List[List[int]]: Groups of prediction indices be merged. Each group may have 1 or more elements.

Source code in supervision/detection/overlap_filter.py
def box_non_max_merge(\n    predictions: npt.NDArray[np.float64],\n    iou_threshold: float = 0.5,\n) -> List[List[int]]:\n    \"\"\"\n    Apply greedy version of non-maximum merging per category to avoid detecting\n    too many overlapping bounding boxes for a given object.\n\n    Args:\n        predictions (npt.NDArray[np.float64]): An array of shape `(n, 5)` or `(n, 6)`\n            containing the bounding boxes coordinates in format `[x1, y1, x2, y2]`,\n            the confidence scores and class_ids. Omit class_id column to allow\n            detections of different classes to be merged.\n        iou_threshold (float, optional): The intersection-over-union threshold\n            to use for non-maximum suppression. Defaults to 0.5.\n\n    Returns:\n        List[List[int]]: Groups of prediction indices be merged.\n            Each group may have 1 or more elements.\n    \"\"\"\n    if predictions.shape[1] == 5:\n        return group_overlapping_boxes(predictions, iou_threshold)\n\n    category_ids = predictions[:, 5]\n    merge_groups = []\n    for category_id in np.unique(category_ids):\n        curr_indices = np.where(category_ids == category_id)[0]\n        merge_class_groups = group_overlapping_boxes(\n            predictions[curr_indices], iou_threshold\n        )\n\n        for merge_class_group in merge_class_groups:\n            merge_groups.append(curr_indices[merge_class_group].tolist())\n\n    for merge_group in merge_groups:\n        if len(merge_group) == 0:\n            raise ValueError(\n                f\"Empty group detected when non-max-merging \"\n                f\"detections: {merge_groups}\"\n            )\n    return merge_groups\n
"},{"location":"detection/metrics/","title":"Metrics","text":"ConfusionMatrix

Confusion matrix for object detection tasks.

Attributes:

Name Type Description matrix ndarray

An 2D np.ndarray of shape (len(classes) + 1, len(classes) + 1) containing the number of TP, FP, FN and TN for each class.

classes List[str]

Model class names.

conf_threshold float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded from the matrix.

iou_threshold float

Detection IoU threshold between 0 and 1. Detections with lower IoU will be classified as FP.

Source code in supervision/metrics/detection.py
@dataclass\nclass ConfusionMatrix:\n    \"\"\"\n    Confusion matrix for object detection tasks.\n\n    Attributes:\n        matrix (np.ndarray): An 2D `np.ndarray` of shape\n            `(len(classes) + 1, len(classes) + 1)`\n            containing the number of `TP`, `FP`, `FN` and `TN` for each class.\n        classes (List[str]): Model class names.\n        conf_threshold (float): Detection confidence threshold between `0` and `1`.\n            Detections with lower confidence will be excluded from the matrix.\n        iou_threshold (float): Detection IoU threshold between `0` and `1`.\n            Detections with lower IoU will be classified as `FP`.\n    \"\"\"\n\n    matrix: np.ndarray\n    classes: List[str]\n    conf_threshold: float\n    iou_threshold: float\n\n    @classmethod\n    def from_detections(\n        cls,\n        predictions: List[Detections],\n        targets: List[Detections],\n        classes: List[str],\n        conf_threshold: float = 0.3,\n        iou_threshold: float = 0.5,\n    ) -> ConfusionMatrix:\n        \"\"\"\n        Calculate confusion matrix based on predicted and ground-truth detections.\n\n        Args:\n            targets (List[Detections]): Detections objects from ground-truth.\n            predictions (List[Detections]): Detections objects predicted by the model.\n            classes (List[str]): Model class names.\n            conf_threshold (float): Detection confidence threshold between `0` and `1`.\n                Detections with lower confidence will be excluded.\n            iou_threshold (float): Detection IoU threshold between `0` and `1`.\n                Detections with lower IoU will be classified as `FP`.\n\n        Returns:\n            ConfusionMatrix: New instance of ConfusionMatrix.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            targets = [\n                sv.Detections(...),\n                sv.Detections(...)\n            ]\n\n            predictions = [\n                sv.Detections(...),\n                sv.Detections(...)\n            ]\n\n            confusion_matrix = sv.ConfusionMatrix.from_detections(\n                predictions=predictions,\n                targets=target,\n                classes=['person', ...]\n            )\n\n            print(confusion_matrix.matrix)\n            # np.array([\n            #    [0., 0., 0., 0.],\n            #    [0., 1., 0., 1.],\n            #    [0., 1., 1., 0.],\n            #    [1., 1., 0., 0.]\n            # ])\n            ```\n        \"\"\"\n\n        prediction_tensors = []\n        target_tensors = []\n        for prediction, target in zip(predictions, targets):\n            prediction_tensors.append(\n                detections_to_tensor(prediction, with_confidence=True)\n            )\n            target_tensors.append(detections_to_tensor(target, with_confidence=False))\n        return cls.from_tensors(\n            predictions=prediction_tensors,\n            targets=target_tensors,\n            classes=classes,\n            conf_threshold=conf_threshold,\n            iou_threshold=iou_threshold,\n        )\n\n    @classmethod\n    def from_tensors(\n        cls,\n        predictions: List[np.ndarray],\n        targets: List[np.ndarray],\n        classes: List[str],\n        conf_threshold: float = 0.3,\n        iou_threshold: float = 0.5,\n    ) -> ConfusionMatrix:\n        \"\"\"\n        Calculate confusion matrix based on predicted and ground-truth detections.\n\n        Args:\n            predictions (List[np.ndarray]): Each element of the list describes a single\n                image and has `shape = (M, 6)` where `M` is the number of detected\n                objects. Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class, conf)` format.\n            targets (List[np.ndarray]): Each element of the list describes a single\n                image and has `shape = (N, 5)` where `N` is the number of\n                ground-truth objects. Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class)` format.\n            classes (List[str]): Model class names.\n            conf_threshold (float): Detection confidence threshold between `0` and `1`.\n                Detections with lower confidence will be excluded.\n            iou_threshold (float): Detection iou  threshold between `0` and `1`.\n                Detections with lower iou will be classified as `FP`.\n\n        Returns:\n            ConfusionMatrix: New instance of ConfusionMatrix.\n\n        Example:\n            ```python\n            import supervision as sv\n            import numpy as np\n\n            targets = (\n                [\n                    np.array(\n                        [\n                            [0.0, 0.0, 3.0, 3.0, 1],\n                            [2.0, 2.0, 5.0, 5.0, 1],\n                            [6.0, 1.0, 8.0, 3.0, 2],\n                        ]\n                    ),\n                    np.array([1.0, 1.0, 2.0, 2.0, 2]),\n                ]\n            )\n\n            predictions = [\n                np.array(\n                    [\n                        [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n                        [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n                        [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n                        [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n                    ]\n                ),\n                np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n            ]\n\n            confusion_matrix = sv.ConfusionMatrix.from_tensors(\n                predictions=predictions,\n                targets=targets,\n                classes=['person', ...]\n            )\n\n            print(confusion_matrix.matrix)\n            # np.array([\n            #     [0., 0., 0., 0.],\n            #     [0., 1., 0., 1.],\n            #     [0., 1., 1., 0.],\n            #     [1., 1., 0., 0.]\n            # ])\n            ```\n        \"\"\"\n        validate_input_tensors(predictions, targets)\n\n        num_classes = len(classes)\n        matrix = np.zeros((num_classes + 1, num_classes + 1))\n        for true_batch, detection_batch in zip(targets, predictions):\n            matrix += cls.evaluate_detection_batch(\n                predictions=detection_batch,\n                targets=true_batch,\n                num_classes=num_classes,\n                conf_threshold=conf_threshold,\n                iou_threshold=iou_threshold,\n            )\n        return cls(\n            matrix=matrix,\n            classes=classes,\n            conf_threshold=conf_threshold,\n            iou_threshold=iou_threshold,\n        )\n\n    @staticmethod\n    def evaluate_detection_batch(\n        predictions: np.ndarray,\n        targets: np.ndarray,\n        num_classes: int,\n        conf_threshold: float,\n        iou_threshold: float,\n    ) -> np.ndarray:\n        \"\"\"\n        Calculate confusion matrix for a batch of detections for a single image.\n\n        Args:\n            predictions (np.ndarray): Batch prediction. Describes a single image and\n                has `shape = (M, 6)` where `M` is the number of detected objects.\n                Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class, conf)` format.\n            targets (np.ndarray): Batch target labels. Describes a single image and\n                has `shape = (N, 5)` where `N` is the number of ground-truth objects.\n                Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class)` format.\n            num_classes (int): Number of classes.\n            conf_threshold (float): Detection confidence threshold between `0` and `1`.\n                Detections with lower confidence will be excluded.\n            iou_threshold (float): Detection iou  threshold between `0` and `1`.\n                Detections with lower iou will be classified as `FP`.\n\n        Returns:\n            np.ndarray: Confusion matrix based on a single image.\n        \"\"\"\n        result_matrix = np.zeros((num_classes + 1, num_classes + 1))\n\n        conf_idx = 5\n        confidence = predictions[:, conf_idx]\n        detection_batch_filtered = predictions[confidence > conf_threshold]\n\n        class_id_idx = 4\n        true_classes = np.array(targets[:, class_id_idx], dtype=np.int16)\n        detection_classes = np.array(\n            detection_batch_filtered[:, class_id_idx], dtype=np.int16\n        )\n        true_boxes = targets[:, :class_id_idx]\n        detection_boxes = detection_batch_filtered[:, :class_id_idx]\n\n        iou_batch = box_iou_batch(\n            boxes_true=true_boxes, boxes_detection=detection_boxes\n        )\n        matched_idx = np.asarray(iou_batch > iou_threshold).nonzero()\n\n        if matched_idx[0].shape[0]:\n            matches = np.stack(\n                (matched_idx[0], matched_idx[1], iou_batch[matched_idx]), axis=1\n            )\n            matches = ConfusionMatrix._drop_extra_matches(matches=matches)\n        else:\n            matches = np.zeros((0, 3))\n\n        matched_true_idx, matched_detection_idx, _ = matches.transpose().astype(\n            np.int16\n        )\n\n        for i, true_class_value in enumerate(true_classes):\n            j = matched_true_idx == i\n            if matches.shape[0] > 0 and sum(j) == 1:\n                result_matrix[\n                    true_class_value, detection_classes[matched_detection_idx[j]]\n                ] += 1  # TP\n            else:\n                result_matrix[true_class_value, num_classes] += 1  # FN\n\n        for i, detection_class_value in enumerate(detection_classes):\n            if not any(matched_detection_idx == i):\n                result_matrix[num_classes, detection_class_value] += 1  # FP\n\n        return result_matrix\n\n    @staticmethod\n    def _drop_extra_matches(matches: np.ndarray) -> np.ndarray:\n        \"\"\"\n        Deduplicate matches. If there are multiple matches for the same true or\n        predicted box, only the one with the highest IoU is kept.\n        \"\"\"\n        if matches.shape[0] > 0:\n            matches = matches[matches[:, 2].argsort()[::-1]]\n            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]\n            matches = matches[matches[:, 2].argsort()[::-1]]\n            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]\n        return matches\n\n    @classmethod\n    def benchmark(\n        cls,\n        dataset: DetectionDataset,\n        callback: Callable[[np.ndarray], Detections],\n        conf_threshold: float = 0.3,\n        iou_threshold: float = 0.5,\n    ) -> ConfusionMatrix:\n        \"\"\"\n        Calculate confusion matrix from dataset and callback function.\n\n        Args:\n            dataset (DetectionDataset): Object detection dataset used for evaluation.\n            callback (Callable[[np.ndarray], Detections]): Function that takes an image\n                as input and returns Detections object.\n            conf_threshold (float): Detection confidence threshold between `0` and `1`.\n                Detections with lower confidence will be excluded.\n            iou_threshold (float): Detection IoU threshold between `0` and `1`.\n                Detections with lower IoU will be classified as `FP`.\n\n        Returns:\n            ConfusionMatrix: New instance of ConfusionMatrix.\n\n        Example:\n            ```python\n            import supervision as sv\n            from ultralytics import YOLO\n\n            dataset = sv.DetectionDataset.from_yolo(...)\n\n            model = YOLO(...)\n            def callback(image: np.ndarray) -> sv.Detections:\n                result = model(image)[0]\n                return sv.Detections.from_ultralytics(result)\n\n            confusion_matrix = sv.ConfusionMatrix.benchmark(\n                dataset = dataset,\n                callback = callback\n            )\n\n            print(confusion_matrix.matrix)\n            # np.array([\n            #     [0., 0., 0., 0.],\n            #     [0., 1., 0., 1.],\n            #     [0., 1., 1., 0.],\n            #     [1., 1., 0., 0.]\n            # ])\n            ```\n        \"\"\"\n        predictions, targets = [], []\n        for img_name, img in dataset.images.items():\n            predictions_batch = callback(img)\n            predictions.append(predictions_batch)\n            targets_batch = dataset.annotations[img_name]\n            targets.append(targets_batch)\n        return cls.from_detections(\n            predictions=predictions,\n            targets=targets,\n            classes=dataset.classes,\n            conf_threshold=conf_threshold,\n            iou_threshold=iou_threshold,\n        )\n\n    def plot(\n        self,\n        save_path: Optional[str] = None,\n        title: Optional[str] = None,\n        classes: Optional[List[str]] = None,\n        normalize: bool = False,\n        fig_size: Tuple[int, int] = (12, 10),\n    ) -> matplotlib.figure.Figure:\n        \"\"\"\n        Create confusion matrix plot and save it at selected location.\n\n        Args:\n            save_path (Optional[str]): Path to save the plot. If not provided,\n                plot will be displayed.\n            title (Optional[str]): Title of the plot.\n            classes (Optional[List[str]]): List of classes to be displayed on the plot.\n                If not provided, all classes will be displayed.\n            normalize (bool): If True, normalize the confusion matrix.\n            fig_size (Tuple[int, int]): Size of the plot.\n\n        Returns:\n            matplotlib.figure.Figure: Confusion matrix plot.\n        \"\"\"\n\n        array = self.matrix.copy()\n\n        if normalize:\n            eps = 1e-8\n            array = array / (array.sum(0).reshape(1, -1) + eps)\n\n        array[array < 0.005] = np.nan\n\n        fig, ax = plt.subplots(figsize=fig_size, tight_layout=True, facecolor=\"white\")\n\n        class_names = classes if classes is not None else self.classes\n        use_labels_for_ticks = class_names is not None and (0 < len(class_names) < 99)\n        if use_labels_for_ticks:\n            x_tick_labels = class_names + [\"FN\"]\n            y_tick_labels = class_names + [\"FP\"]\n            num_ticks = len(x_tick_labels)\n        else:\n            x_tick_labels = None\n            y_tick_labels = None\n            num_ticks = len(array)\n        im = ax.imshow(array, cmap=\"Blues\")\n\n        cbar = ax.figure.colorbar(im, ax=ax)\n        cbar.mappable.set_clim(vmin=0, vmax=np.nanmax(array))\n\n        if x_tick_labels is None:\n            tick_interval = 2\n        else:\n            tick_interval = 1\n        ax.set_xticks(np.arange(0, num_ticks, tick_interval), labels=x_tick_labels)\n        ax.set_yticks(np.arange(0, num_ticks, tick_interval), labels=y_tick_labels)\n\n        plt.setp(ax.get_xticklabels(), rotation=90, ha=\"right\", rotation_mode=\"default\")\n\n        labelsize = 10 if num_ticks < 50 else 8\n        ax.tick_params(axis=\"both\", which=\"both\", labelsize=labelsize)\n\n        if num_ticks < 30:\n            for i in range(array.shape[0]):\n                for j in range(array.shape[1]):\n                    n_preds = array[i, j]\n                    if not np.isnan(n_preds):\n                        ax.text(\n                            j,\n                            i,\n                            f\"{n_preds:.2f}\" if normalize else f\"{n_preds:.0f}\",\n                            ha=\"center\",\n                            va=\"center\",\n                            color=\"black\"\n                            if n_preds < 0.5 * np.nanmax(array)\n                            else \"white\",\n                        )\n\n        if title:\n            ax.set_title(title, fontsize=20)\n\n        ax.set_xlabel(\"Predicted\")\n        ax.set_ylabel(\"True\")\n        ax.set_facecolor(\"white\")\n        if save_path:\n            fig.savefig(\n                save_path, dpi=250, facecolor=fig.get_facecolor(), transparent=True\n            )\n        return fig\n
MeanAveragePrecision

Mean Average Precision for object detection tasks.

Attributes:

Name Type Description map50_95 float

Mean Average Precision (mAP) calculated over IoU thresholds ranging from 0.50 to 0.95 with a step size of 0.05.

map50 float

Mean Average Precision (mAP) calculated specifically at an IoU threshold of 0.50.

map75 float

Mean Average Precision (mAP) calculated specifically at an IoU threshold of 0.75.

per_class_ap50_95 ndarray

Average Precision (AP) values calculated over IoU thresholds ranging from 0.50 to 0.95 with a step size of 0.05, provided for each individual class.

Source code in supervision/metrics/detection.py
@dataclass(frozen=True)\nclass MeanAveragePrecision:\n    \"\"\"\n    Mean Average Precision for object detection tasks.\n\n    Attributes:\n        map50_95 (float): Mean Average Precision (mAP) calculated over IoU thresholds\n            ranging from `0.50` to `0.95` with a step size of `0.05`.\n        map50 (float): Mean Average Precision (mAP) calculated specifically at\n            an IoU threshold of `0.50`.\n        map75 (float): Mean Average Precision (mAP) calculated specifically at\n            an IoU threshold of `0.75`.\n        per_class_ap50_95 (np.ndarray): Average Precision (AP) values calculated over\n            IoU thresholds ranging from `0.50` to `0.95` with a step size of `0.05`,\n            provided for each individual class.\n    \"\"\"\n\n    map50_95: float\n    map50: float\n    map75: float\n    per_class_ap50_95: np.ndarray\n\n    @classmethod\n    def from_detections(\n        cls,\n        predictions: List[Detections],\n        targets: List[Detections],\n    ) -> MeanAveragePrecision:\n        \"\"\"\n        Calculate mean average precision based on predicted and ground-truth detections.\n\n        Args:\n            targets (List[Detections]): Detections objects from ground-truth.\n            predictions (List[Detections]): Detections objects predicted by the model.\n        Returns:\n            MeanAveragePrecision: New instance of ConfusionMatrix.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            targets = [\n                sv.Detections(...),\n                sv.Detections(...)\n            ]\n\n            predictions = [\n                sv.Detections(...),\n                sv.Detections(...)\n            ]\n\n            mean_average_precision = sv.MeanAveragePrecision.from_detections(\n                predictions=predictions,\n                targets=target,\n            )\n\n            print(mean_average_precison.map50_95)\n            # 0.2899\n            ```\n        \"\"\"\n        prediction_tensors = []\n        target_tensors = []\n        for prediction, target in zip(predictions, targets):\n            prediction_tensors.append(\n                detections_to_tensor(prediction, with_confidence=True)\n            )\n            target_tensors.append(detections_to_tensor(target, with_confidence=False))\n        return cls.from_tensors(\n            predictions=prediction_tensors,\n            targets=target_tensors,\n        )\n\n    @classmethod\n    def benchmark(\n        cls,\n        dataset: DetectionDataset,\n        callback: Callable[[np.ndarray], Detections],\n    ) -> MeanAveragePrecision:\n        \"\"\"\n        Calculate mean average precision from dataset and callback function.\n\n        Args:\n            dataset (DetectionDataset): Object detection dataset used for evaluation.\n            callback (Callable[[np.ndarray], Detections]): Function that takes\n                an image as input and returns Detections object.\n        Returns:\n            MeanAveragePrecision: New instance of MeanAveragePrecision.\n\n        Example:\n            ```python\n            import supervision as sv\n            from ultralytics import YOLO\n\n            dataset = sv.DetectionDataset.from_yolo(...)\n\n            model = YOLO(...)\n            def callback(image: np.ndarray) -> sv.Detections:\n                result = model(image)[0]\n                return sv.Detections.from_ultralytics(result)\n\n            mean_average_precision = sv.MeanAveragePrecision.benchmark(\n                dataset = dataset,\n                callback = callback\n            )\n\n            print(mean_average_precision.map50_95)\n            # 0.433\n            ```\n        \"\"\"\n        predictions, targets = [], []\n        for img_name, img in dataset.images.items():\n            predictions_batch = callback(img)\n            predictions.append(predictions_batch)\n            targets_batch = dataset.annotations[img_name]\n            targets.append(targets_batch)\n        return cls.from_detections(\n            predictions=predictions,\n            targets=targets,\n        )\n\n    @classmethod\n    def from_tensors(\n        cls,\n        predictions: List[np.ndarray],\n        targets: List[np.ndarray],\n    ) -> MeanAveragePrecision:\n        \"\"\"\n        Calculate Mean Average Precision based on predicted and ground-truth\n            detections at different threshold.\n\n        Args:\n            predictions (List[np.ndarray]): Each element of the list describes\n                a single image and has `shape = (M, 6)` where `M` is\n                the number of detected objects. Each row is expected to be\n                in `(x_min, y_min, x_max, y_max, class, conf)` format.\n            targets (List[np.ndarray]): Each element of the list describes a single\n                image and has `shape = (N, 5)` where `N` is the\n                number of ground-truth objects. Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class)` format.\n        Returns:\n            MeanAveragePrecision: New instance of MeanAveragePrecision.\n\n        Example:\n            ```python\n            import supervision as sv\n            import numpy as np\n\n            targets = (\n                [\n                    np.array(\n                        [\n                            [0.0, 0.0, 3.0, 3.0, 1],\n                            [2.0, 2.0, 5.0, 5.0, 1],\n                            [6.0, 1.0, 8.0, 3.0, 2],\n                        ]\n                    ),\n                    np.array([[1.0, 1.0, 2.0, 2.0, 2]]),\n                ]\n            )\n\n            predictions = [\n                np.array(\n                    [\n                        [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n                        [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n                        [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n                        [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n                    ]\n                ),\n                np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n            ]\n\n            mean_average_precison = sv.MeanAveragePrecision.from_tensors(\n                predictions=predictions,\n                targets=targets,\n            )\n\n            print(mean_average_precison.map50_95)\n            # 0.6649\n            ```\n        \"\"\"\n        validate_input_tensors(predictions, targets)\n        iou_thresholds = np.linspace(0.5, 0.95, 10)\n        stats = []\n\n        # Gather matching stats for predictions and targets\n        for true_objs, predicted_objs in zip(targets, predictions):\n            if predicted_objs.shape[0] == 0:\n                if true_objs.shape[0]:\n                    stats.append(\n                        (\n                            np.zeros((0, iou_thresholds.size), dtype=bool),\n                            *np.zeros((2, 0)),\n                            true_objs[:, 4],\n                        )\n                    )\n                continue\n\n            if true_objs.shape[0]:\n                matches = cls._match_detection_batch(\n                    predicted_objs, true_objs, iou_thresholds\n                )\n                stats.append(\n                    (\n                        matches,\n                        predicted_objs[:, 5],\n                        predicted_objs[:, 4],\n                        true_objs[:, 4],\n                    )\n                )\n\n        # Compute average precisions if any matches exist\n        if stats:\n            concatenated_stats = [np.concatenate(items, 0) for items in zip(*stats)]\n            average_precisions = cls._average_precisions_per_class(*concatenated_stats)\n            map50 = average_precisions[:, 0].mean()\n            map75 = average_precisions[:, 5].mean()\n            map50_95 = average_precisions.mean()\n        else:\n            map50, map75, map50_95 = 0, 0, 0\n            average_precisions = []\n\n        return cls(\n            map50_95=map50_95,\n            map50=map50,\n            map75=map75,\n            per_class_ap50_95=average_precisions,\n        )\n\n    @staticmethod\n    def compute_average_precision(recall: np.ndarray, precision: np.ndarray) -> float:\n        \"\"\"\n        Compute the average precision using 101-point interpolation (COCO), given\n            the recall and precision curves.\n\n        Args:\n            recall (np.ndarray): The recall curve.\n            precision (np.ndarray): The precision curve.\n\n        Returns:\n            float: Average precision.\n        \"\"\"\n        extended_recall = np.concatenate(([0.0], recall, [1.0]))\n        extended_precision = np.concatenate(([1.0], precision, [0.0]))\n        max_accumulated_precision = np.flip(\n            np.maximum.accumulate(np.flip(extended_precision))\n        )\n        interpolated_recall_levels = np.linspace(0, 1, 101)\n        interpolated_precision = np.interp(\n            interpolated_recall_levels, extended_recall, max_accumulated_precision\n        )\n        average_precision = np.trapz(interpolated_precision, interpolated_recall_levels)\n        return average_precision\n\n    @staticmethod\n    def _match_detection_batch(\n        predictions: np.ndarray, targets: np.ndarray, iou_thresholds: np.ndarray\n    ) -> np.ndarray:\n        \"\"\"\n        Match predictions with target labels based on IoU levels.\n\n        Args:\n            predictions (np.ndarray): Batch prediction. Describes a single image and\n                has `shape = (M, 6)` where `M` is the number of detected objects.\n                Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class, conf)` format.\n            targets (np.ndarray): Batch target labels. Describes a single image and\n                has `shape = (N, 5)` where `N` is the number of ground-truth objects.\n                Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class)` format.\n            iou_thresholds (np.ndarray): Array contains different IoU thresholds.\n\n        Returns:\n            np.ndarray: Matched prediction with target labels result.\n        \"\"\"\n        num_predictions, num_iou_levels = predictions.shape[0], iou_thresholds.shape[0]\n        correct = np.zeros((num_predictions, num_iou_levels), dtype=bool)\n        iou = box_iou_batch(targets[:, :4], predictions[:, :4])\n        correct_class = targets[:, 4:5] == predictions[:, 4]\n\n        for i, iou_level in enumerate(iou_thresholds):\n            matched_indices = np.where((iou >= iou_level) & correct_class)\n\n            if matched_indices[0].shape[0]:\n                combined_indices = np.stack(matched_indices, axis=1)\n                iou_values = iou[matched_indices][:, None]\n                matches = np.hstack([combined_indices, iou_values])\n\n                if matched_indices[0].shape[0] > 1:\n                    matches = matches[matches[:, 2].argsort()[::-1]]\n                    matches = matches[np.unique(matches[:, 1], return_index=True)[1]]\n                    matches = matches[np.unique(matches[:, 0], return_index=True)[1]]\n\n                correct[matches[:, 1].astype(int), i] = True\n\n        return correct\n\n    @staticmethod\n    def _average_precisions_per_class(\n        matches: np.ndarray,\n        prediction_confidence: np.ndarray,\n        prediction_class_ids: np.ndarray,\n        true_class_ids: np.ndarray,\n        eps: float = 1e-16,\n    ) -> np.ndarray:\n        \"\"\"\n        Compute the average precision, given the recall and precision curves.\n        Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.\n\n        Args:\n            matches (np.ndarray): True positives.\n            prediction_confidence (np.ndarray): Objectness value from 0-1.\n            prediction_class_ids (np.ndarray): Predicted object classes.\n            true_class_ids (np.ndarray): True object classes.\n            eps (float, optional): Small value to prevent division by zero.\n\n        Returns:\n            np.ndarray: Average precision for different IoU levels.\n        \"\"\"\n        sorted_indices = np.argsort(-prediction_confidence)\n        matches = matches[sorted_indices]\n        prediction_class_ids = prediction_class_ids[sorted_indices]\n\n        unique_classes, class_counts = np.unique(true_class_ids, return_counts=True)\n        num_classes = unique_classes.shape[0]\n\n        average_precisions = np.zeros((num_classes, matches.shape[1]))\n\n        for class_idx, class_id in enumerate(unique_classes):\n            is_class = prediction_class_ids == class_id\n            total_true = class_counts[class_idx]\n            total_prediction = is_class.sum()\n\n            if total_prediction == 0 or total_true == 0:\n                continue\n\n            false_positives = (1 - matches[is_class]).cumsum(0)\n            true_positives = matches[is_class].cumsum(0)\n            recall = true_positives / (total_true + eps)\n            precision = true_positives / (true_positives + false_positives)\n\n            for iou_level_idx in range(matches.shape[1]):\n                average_precisions[class_idx, iou_level_idx] = (\n                    MeanAveragePrecision.compute_average_precision(\n                        recall[:, iou_level_idx], precision[:, iou_level_idx]\n                    )\n                )\n\n        return average_precisions\n
"},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix-functions","title":"Functions","text":""},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix.benchmark","title":"benchmark(dataset, callback, conf_threshold=0.3, iou_threshold=0.5) classmethod","text":"

Calculate confusion matrix from dataset and callback function.

Parameters:

Name Type Description Default dataset DetectionDataset

Object detection dataset used for evaluation.

required callback Callable[[ndarray], Detections]

Function that takes an image as input and returns Detections object.

required conf_threshold float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

0.3 iou_threshold float

Detection IoU threshold between 0 and 1. Detections with lower IoU will be classified as FP.

0.5

Returns:

Name Type Description ConfusionMatrix ConfusionMatrix

New instance of ConfusionMatrix.

Example
import supervision as sv\nfrom ultralytics import YOLO\n\ndataset = sv.DetectionDataset.from_yolo(...)\n\nmodel = YOLO(...)\ndef callback(image: np.ndarray) -> sv.Detections:\n    result = model(image)[0]\n    return sv.Detections.from_ultralytics(result)\n\nconfusion_matrix = sv.ConfusionMatrix.benchmark(\n    dataset = dataset,\n    callback = callback\n)\n\nprint(confusion_matrix.matrix)\n# np.array([\n#     [0., 0., 0., 0.],\n#     [0., 1., 0., 1.],\n#     [0., 1., 1., 0.],\n#     [1., 1., 0., 0.]\n# ])\n
Source code in supervision/metrics/detection.py
@classmethod\ndef benchmark(\n    cls,\n    dataset: DetectionDataset,\n    callback: Callable[[np.ndarray], Detections],\n    conf_threshold: float = 0.3,\n    iou_threshold: float = 0.5,\n) -> ConfusionMatrix:\n    \"\"\"\n    Calculate confusion matrix from dataset and callback function.\n\n    Args:\n        dataset (DetectionDataset): Object detection dataset used for evaluation.\n        callback (Callable[[np.ndarray], Detections]): Function that takes an image\n            as input and returns Detections object.\n        conf_threshold (float): Detection confidence threshold between `0` and `1`.\n            Detections with lower confidence will be excluded.\n        iou_threshold (float): Detection IoU threshold between `0` and `1`.\n            Detections with lower IoU will be classified as `FP`.\n\n    Returns:\n        ConfusionMatrix: New instance of ConfusionMatrix.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        dataset = sv.DetectionDataset.from_yolo(...)\n\n        model = YOLO(...)\n        def callback(image: np.ndarray) -> sv.Detections:\n            result = model(image)[0]\n            return sv.Detections.from_ultralytics(result)\n\n        confusion_matrix = sv.ConfusionMatrix.benchmark(\n            dataset = dataset,\n            callback = callback\n        )\n\n        print(confusion_matrix.matrix)\n        # np.array([\n        #     [0., 0., 0., 0.],\n        #     [0., 1., 0., 1.],\n        #     [0., 1., 1., 0.],\n        #     [1., 1., 0., 0.]\n        # ])\n        ```\n    \"\"\"\n    predictions, targets = [], []\n    for img_name, img in dataset.images.items():\n        predictions_batch = callback(img)\n        predictions.append(predictions_batch)\n        targets_batch = dataset.annotations[img_name]\n        targets.append(targets_batch)\n    return cls.from_detections(\n        predictions=predictions,\n        targets=targets,\n        classes=dataset.classes,\n        conf_threshold=conf_threshold,\n        iou_threshold=iou_threshold,\n    )\n
"},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix.evaluate_detection_batch","title":"evaluate_detection_batch(predictions, targets, num_classes, conf_threshold, iou_threshold) staticmethod","text":"

Calculate confusion matrix for a batch of detections for a single image.

Parameters:

Name Type Description Default predictions ndarray

Batch prediction. Describes a single image and has shape = (M, 6) where M is the number of detected objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class, conf) format.

required targets ndarray

Batch target labels. Describes a single image and has shape = (N, 5) where N is the number of ground-truth objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class) format.

required num_classes int

Number of classes.

required conf_threshold float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

required iou_threshold float

Detection iou threshold between 0 and 1. Detections with lower iou will be classified as FP.

required

Returns:

Type Description ndarray

np.ndarray: Confusion matrix based on a single image.

Source code in supervision/metrics/detection.py
@staticmethod\ndef evaluate_detection_batch(\n    predictions: np.ndarray,\n    targets: np.ndarray,\n    num_classes: int,\n    conf_threshold: float,\n    iou_threshold: float,\n) -> np.ndarray:\n    \"\"\"\n    Calculate confusion matrix for a batch of detections for a single image.\n\n    Args:\n        predictions (np.ndarray): Batch prediction. Describes a single image and\n            has `shape = (M, 6)` where `M` is the number of detected objects.\n            Each row is expected to be in\n            `(x_min, y_min, x_max, y_max, class, conf)` format.\n        targets (np.ndarray): Batch target labels. Describes a single image and\n            has `shape = (N, 5)` where `N` is the number of ground-truth objects.\n            Each row is expected to be in\n            `(x_min, y_min, x_max, y_max, class)` format.\n        num_classes (int): Number of classes.\n        conf_threshold (float): Detection confidence threshold between `0` and `1`.\n            Detections with lower confidence will be excluded.\n        iou_threshold (float): Detection iou  threshold between `0` and `1`.\n            Detections with lower iou will be classified as `FP`.\n\n    Returns:\n        np.ndarray: Confusion matrix based on a single image.\n    \"\"\"\n    result_matrix = np.zeros((num_classes + 1, num_classes + 1))\n\n    conf_idx = 5\n    confidence = predictions[:, conf_idx]\n    detection_batch_filtered = predictions[confidence > conf_threshold]\n\n    class_id_idx = 4\n    true_classes = np.array(targets[:, class_id_idx], dtype=np.int16)\n    detection_classes = np.array(\n        detection_batch_filtered[:, class_id_idx], dtype=np.int16\n    )\n    true_boxes = targets[:, :class_id_idx]\n    detection_boxes = detection_batch_filtered[:, :class_id_idx]\n\n    iou_batch = box_iou_batch(\n        boxes_true=true_boxes, boxes_detection=detection_boxes\n    )\n    matched_idx = np.asarray(iou_batch > iou_threshold).nonzero()\n\n    if matched_idx[0].shape[0]:\n        matches = np.stack(\n            (matched_idx[0], matched_idx[1], iou_batch[matched_idx]), axis=1\n        )\n        matches = ConfusionMatrix._drop_extra_matches(matches=matches)\n    else:\n        matches = np.zeros((0, 3))\n\n    matched_true_idx, matched_detection_idx, _ = matches.transpose().astype(\n        np.int16\n    )\n\n    for i, true_class_value in enumerate(true_classes):\n        j = matched_true_idx == i\n        if matches.shape[0] > 0 and sum(j) == 1:\n            result_matrix[\n                true_class_value, detection_classes[matched_detection_idx[j]]\n            ] += 1  # TP\n        else:\n            result_matrix[true_class_value, num_classes] += 1  # FN\n\n    for i, detection_class_value in enumerate(detection_classes):\n        if not any(matched_detection_idx == i):\n            result_matrix[num_classes, detection_class_value] += 1  # FP\n\n    return result_matrix\n
"},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix.from_detections","title":"from_detections(predictions, targets, classes, conf_threshold=0.3, iou_threshold=0.5) classmethod","text":"

Calculate confusion matrix based on predicted and ground-truth detections.

Parameters:

Name Type Description Default targets List[Detections]

Detections objects from ground-truth.

required predictions List[Detections]

Detections objects predicted by the model.

required classes List[str]

Model class names.

required conf_threshold float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

0.3 iou_threshold float

Detection IoU threshold between 0 and 1. Detections with lower IoU will be classified as FP.

0.5

Returns:

Name Type Description ConfusionMatrix ConfusionMatrix

New instance of ConfusionMatrix.

Example
import supervision as sv\n\ntargets = [\n    sv.Detections(...),\n    sv.Detections(...)\n]\n\npredictions = [\n    sv.Detections(...),\n    sv.Detections(...)\n]\n\nconfusion_matrix = sv.ConfusionMatrix.from_detections(\n    predictions=predictions,\n    targets=target,\n    classes=['person', ...]\n)\n\nprint(confusion_matrix.matrix)\n# np.array([\n#    [0., 0., 0., 0.],\n#    [0., 1., 0., 1.],\n#    [0., 1., 1., 0.],\n#    [1., 1., 0., 0.]\n# ])\n
Source code in supervision/metrics/detection.py
@classmethod\ndef from_detections(\n    cls,\n    predictions: List[Detections],\n    targets: List[Detections],\n    classes: List[str],\n    conf_threshold: float = 0.3,\n    iou_threshold: float = 0.5,\n) -> ConfusionMatrix:\n    \"\"\"\n    Calculate confusion matrix based on predicted and ground-truth detections.\n\n    Args:\n        targets (List[Detections]): Detections objects from ground-truth.\n        predictions (List[Detections]): Detections objects predicted by the model.\n        classes (List[str]): Model class names.\n        conf_threshold (float): Detection confidence threshold between `0` and `1`.\n            Detections with lower confidence will be excluded.\n        iou_threshold (float): Detection IoU threshold between `0` and `1`.\n            Detections with lower IoU will be classified as `FP`.\n\n    Returns:\n        ConfusionMatrix: New instance of ConfusionMatrix.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        targets = [\n            sv.Detections(...),\n            sv.Detections(...)\n        ]\n\n        predictions = [\n            sv.Detections(...),\n            sv.Detections(...)\n        ]\n\n        confusion_matrix = sv.ConfusionMatrix.from_detections(\n            predictions=predictions,\n            targets=target,\n            classes=['person', ...]\n        )\n\n        print(confusion_matrix.matrix)\n        # np.array([\n        #    [0., 0., 0., 0.],\n        #    [0., 1., 0., 1.],\n        #    [0., 1., 1., 0.],\n        #    [1., 1., 0., 0.]\n        # ])\n        ```\n    \"\"\"\n\n    prediction_tensors = []\n    target_tensors = []\n    for prediction, target in zip(predictions, targets):\n        prediction_tensors.append(\n            detections_to_tensor(prediction, with_confidence=True)\n        )\n        target_tensors.append(detections_to_tensor(target, with_confidence=False))\n    return cls.from_tensors(\n        predictions=prediction_tensors,\n        targets=target_tensors,\n        classes=classes,\n        conf_threshold=conf_threshold,\n        iou_threshold=iou_threshold,\n    )\n
"},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix.from_tensors","title":"from_tensors(predictions, targets, classes, conf_threshold=0.3, iou_threshold=0.5) classmethod","text":"

Calculate confusion matrix based on predicted and ground-truth detections.

Parameters:

Name Type Description Default predictions List[ndarray]

Each element of the list describes a single image and has shape = (M, 6) where M is the number of detected objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class, conf) format.

required targets List[ndarray]

Each element of the list describes a single image and has shape = (N, 5) where N is the number of ground-truth objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class) format.

required classes List[str]

Model class names.

required conf_threshold float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

0.3 iou_threshold float

Detection iou threshold between 0 and 1. Detections with lower iou will be classified as FP.

0.5

Returns:

Name Type Description ConfusionMatrix ConfusionMatrix

New instance of ConfusionMatrix.

Example
import supervision as sv\nimport numpy as np\n\ntargets = (\n    [\n        np.array(\n            [\n                [0.0, 0.0, 3.0, 3.0, 1],\n                [2.0, 2.0, 5.0, 5.0, 1],\n                [6.0, 1.0, 8.0, 3.0, 2],\n            ]\n        ),\n        np.array([1.0, 1.0, 2.0, 2.0, 2]),\n    ]\n)\n\npredictions = [\n    np.array(\n        [\n            [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n            [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n            [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n            [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n        ]\n    ),\n    np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n]\n\nconfusion_matrix = sv.ConfusionMatrix.from_tensors(\n    predictions=predictions,\n    targets=targets,\n    classes=['person', ...]\n)\n\nprint(confusion_matrix.matrix)\n# np.array([\n#     [0., 0., 0., 0.],\n#     [0., 1., 0., 1.],\n#     [0., 1., 1., 0.],\n#     [1., 1., 0., 0.]\n# ])\n
Source code in supervision/metrics/detection.py
@classmethod\ndef from_tensors(\n    cls,\n    predictions: List[np.ndarray],\n    targets: List[np.ndarray],\n    classes: List[str],\n    conf_threshold: float = 0.3,\n    iou_threshold: float = 0.5,\n) -> ConfusionMatrix:\n    \"\"\"\n    Calculate confusion matrix based on predicted and ground-truth detections.\n\n    Args:\n        predictions (List[np.ndarray]): Each element of the list describes a single\n            image and has `shape = (M, 6)` where `M` is the number of detected\n            objects. Each row is expected to be in\n            `(x_min, y_min, x_max, y_max, class, conf)` format.\n        targets (List[np.ndarray]): Each element of the list describes a single\n            image and has `shape = (N, 5)` where `N` is the number of\n            ground-truth objects. Each row is expected to be in\n            `(x_min, y_min, x_max, y_max, class)` format.\n        classes (List[str]): Model class names.\n        conf_threshold (float): Detection confidence threshold between `0` and `1`.\n            Detections with lower confidence will be excluded.\n        iou_threshold (float): Detection iou  threshold between `0` and `1`.\n            Detections with lower iou will be classified as `FP`.\n\n    Returns:\n        ConfusionMatrix: New instance of ConfusionMatrix.\n\n    Example:\n        ```python\n        import supervision as sv\n        import numpy as np\n\n        targets = (\n            [\n                np.array(\n                    [\n                        [0.0, 0.0, 3.0, 3.0, 1],\n                        [2.0, 2.0, 5.0, 5.0, 1],\n                        [6.0, 1.0, 8.0, 3.0, 2],\n                    ]\n                ),\n                np.array([1.0, 1.0, 2.0, 2.0, 2]),\n            ]\n        )\n\n        predictions = [\n            np.array(\n                [\n                    [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n                    [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n                    [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n                    [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n                ]\n            ),\n            np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n        ]\n\n        confusion_matrix = sv.ConfusionMatrix.from_tensors(\n            predictions=predictions,\n            targets=targets,\n            classes=['person', ...]\n        )\n\n        print(confusion_matrix.matrix)\n        # np.array([\n        #     [0., 0., 0., 0.],\n        #     [0., 1., 0., 1.],\n        #     [0., 1., 1., 0.],\n        #     [1., 1., 0., 0.]\n        # ])\n        ```\n    \"\"\"\n    validate_input_tensors(predictions, targets)\n\n    num_classes = len(classes)\n    matrix = np.zeros((num_classes + 1, num_classes + 1))\n    for true_batch, detection_batch in zip(targets, predictions):\n        matrix += cls.evaluate_detection_batch(\n            predictions=detection_batch,\n            targets=true_batch,\n            num_classes=num_classes,\n            conf_threshold=conf_threshold,\n            iou_threshold=iou_threshold,\n        )\n    return cls(\n        matrix=matrix,\n        classes=classes,\n        conf_threshold=conf_threshold,\n        iou_threshold=iou_threshold,\n    )\n
"},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix.plot","title":"plot(save_path=None, title=None, classes=None, normalize=False, fig_size=(12, 10))","text":"

Create confusion matrix plot and save it at selected location.

Parameters:

Name Type Description Default save_path Optional[str]

Path to save the plot. If not provided, plot will be displayed.

None title Optional[str]

Title of the plot.

None classes Optional[List[str]]

List of classes to be displayed on the plot. If not provided, all classes will be displayed.

None normalize bool

If True, normalize the confusion matrix.

False fig_size Tuple[int, int]

Size of the plot.

(12, 10)

Returns:

Type Description Figure

matplotlib.figure.Figure: Confusion matrix plot.

Source code in supervision/metrics/detection.py
def plot(\n    self,\n    save_path: Optional[str] = None,\n    title: Optional[str] = None,\n    classes: Optional[List[str]] = None,\n    normalize: bool = False,\n    fig_size: Tuple[int, int] = (12, 10),\n) -> matplotlib.figure.Figure:\n    \"\"\"\n    Create confusion matrix plot and save it at selected location.\n\n    Args:\n        save_path (Optional[str]): Path to save the plot. If not provided,\n            plot will be displayed.\n        title (Optional[str]): Title of the plot.\n        classes (Optional[List[str]]): List of classes to be displayed on the plot.\n            If not provided, all classes will be displayed.\n        normalize (bool): If True, normalize the confusion matrix.\n        fig_size (Tuple[int, int]): Size of the plot.\n\n    Returns:\n        matplotlib.figure.Figure: Confusion matrix plot.\n    \"\"\"\n\n    array = self.matrix.copy()\n\n    if normalize:\n        eps = 1e-8\n        array = array / (array.sum(0).reshape(1, -1) + eps)\n\n    array[array < 0.005] = np.nan\n\n    fig, ax = plt.subplots(figsize=fig_size, tight_layout=True, facecolor=\"white\")\n\n    class_names = classes if classes is not None else self.classes\n    use_labels_for_ticks = class_names is not None and (0 < len(class_names) < 99)\n    if use_labels_for_ticks:\n        x_tick_labels = class_names + [\"FN\"]\n        y_tick_labels = class_names + [\"FP\"]\n        num_ticks = len(x_tick_labels)\n    else:\n        x_tick_labels = None\n        y_tick_labels = None\n        num_ticks = len(array)\n    im = ax.imshow(array, cmap=\"Blues\")\n\n    cbar = ax.figure.colorbar(im, ax=ax)\n    cbar.mappable.set_clim(vmin=0, vmax=np.nanmax(array))\n\n    if x_tick_labels is None:\n        tick_interval = 2\n    else:\n        tick_interval = 1\n    ax.set_xticks(np.arange(0, num_ticks, tick_interval), labels=x_tick_labels)\n    ax.set_yticks(np.arange(0, num_ticks, tick_interval), labels=y_tick_labels)\n\n    plt.setp(ax.get_xticklabels(), rotation=90, ha=\"right\", rotation_mode=\"default\")\n\n    labelsize = 10 if num_ticks < 50 else 8\n    ax.tick_params(axis=\"both\", which=\"both\", labelsize=labelsize)\n\n    if num_ticks < 30:\n        for i in range(array.shape[0]):\n            for j in range(array.shape[1]):\n                n_preds = array[i, j]\n                if not np.isnan(n_preds):\n                    ax.text(\n                        j,\n                        i,\n                        f\"{n_preds:.2f}\" if normalize else f\"{n_preds:.0f}\",\n                        ha=\"center\",\n                        va=\"center\",\n                        color=\"black\"\n                        if n_preds < 0.5 * np.nanmax(array)\n                        else \"white\",\n                    )\n\n    if title:\n        ax.set_title(title, fontsize=20)\n\n    ax.set_xlabel(\"Predicted\")\n    ax.set_ylabel(\"True\")\n    ax.set_facecolor(\"white\")\n    if save_path:\n        fig.savefig(\n            save_path, dpi=250, facecolor=fig.get_facecolor(), transparent=True\n        )\n    return fig\n
"},{"location":"detection/metrics/#supervision.metrics.detection.MeanAveragePrecision-functions","title":"Functions","text":""},{"location":"detection/metrics/#supervision.metrics.detection.MeanAveragePrecision.benchmark","title":"benchmark(dataset, callback) classmethod","text":"

Calculate mean average precision from dataset and callback function.

Parameters:

Name Type Description Default dataset DetectionDataset

Object detection dataset used for evaluation.

required callback Callable[[ndarray], Detections]

Function that takes an image as input and returns Detections object.

required

Returns: MeanAveragePrecision: New instance of MeanAveragePrecision.

Example
import supervision as sv\nfrom ultralytics import YOLO\n\ndataset = sv.DetectionDataset.from_yolo(...)\n\nmodel = YOLO(...)\ndef callback(image: np.ndarray) -> sv.Detections:\n    result = model(image)[0]\n    return sv.Detections.from_ultralytics(result)\n\nmean_average_precision = sv.MeanAveragePrecision.benchmark(\n    dataset = dataset,\n    callback = callback\n)\n\nprint(mean_average_precision.map50_95)\n# 0.433\n
Source code in supervision/metrics/detection.py
@classmethod\ndef benchmark(\n    cls,\n    dataset: DetectionDataset,\n    callback: Callable[[np.ndarray], Detections],\n) -> MeanAveragePrecision:\n    \"\"\"\n    Calculate mean average precision from dataset and callback function.\n\n    Args:\n        dataset (DetectionDataset): Object detection dataset used for evaluation.\n        callback (Callable[[np.ndarray], Detections]): Function that takes\n            an image as input and returns Detections object.\n    Returns:\n        MeanAveragePrecision: New instance of MeanAveragePrecision.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        dataset = sv.DetectionDataset.from_yolo(...)\n\n        model = YOLO(...)\n        def callback(image: np.ndarray) -> sv.Detections:\n            result = model(image)[0]\n            return sv.Detections.from_ultralytics(result)\n\n        mean_average_precision = sv.MeanAveragePrecision.benchmark(\n            dataset = dataset,\n            callback = callback\n        )\n\n        print(mean_average_precision.map50_95)\n        # 0.433\n        ```\n    \"\"\"\n    predictions, targets = [], []\n    for img_name, img in dataset.images.items():\n        predictions_batch = callback(img)\n        predictions.append(predictions_batch)\n        targets_batch = dataset.annotations[img_name]\n        targets.append(targets_batch)\n    return cls.from_detections(\n        predictions=predictions,\n        targets=targets,\n    )\n
"},{"location":"detection/metrics/#supervision.metrics.detection.MeanAveragePrecision.compute_average_precision","title":"compute_average_precision(recall, precision) staticmethod","text":"

Compute the average precision using 101-point interpolation (COCO), given the recall and precision curves.

Parameters:

Name Type Description Default recall ndarray

The recall curve.

required precision ndarray

The precision curve.

required

Returns:

Name Type Description float float

Average precision.

Source code in supervision/metrics/detection.py
@staticmethod\ndef compute_average_precision(recall: np.ndarray, precision: np.ndarray) -> float:\n    \"\"\"\n    Compute the average precision using 101-point interpolation (COCO), given\n        the recall and precision curves.\n\n    Args:\n        recall (np.ndarray): The recall curve.\n        precision (np.ndarray): The precision curve.\n\n    Returns:\n        float: Average precision.\n    \"\"\"\n    extended_recall = np.concatenate(([0.0], recall, [1.0]))\n    extended_precision = np.concatenate(([1.0], precision, [0.0]))\n    max_accumulated_precision = np.flip(\n        np.maximum.accumulate(np.flip(extended_precision))\n    )\n    interpolated_recall_levels = np.linspace(0, 1, 101)\n    interpolated_precision = np.interp(\n        interpolated_recall_levels, extended_recall, max_accumulated_precision\n    )\n    average_precision = np.trapz(interpolated_precision, interpolated_recall_levels)\n    return average_precision\n
"},{"location":"detection/metrics/#supervision.metrics.detection.MeanAveragePrecision.from_detections","title":"from_detections(predictions, targets) classmethod","text":"

Calculate mean average precision based on predicted and ground-truth detections.

Parameters:

Name Type Description Default targets List[Detections]

Detections objects from ground-truth.

required predictions List[Detections]

Detections objects predicted by the model.

required

Returns: MeanAveragePrecision: New instance of ConfusionMatrix.

Example
import supervision as sv\n\ntargets = [\n    sv.Detections(...),\n    sv.Detections(...)\n]\n\npredictions = [\n    sv.Detections(...),\n    sv.Detections(...)\n]\n\nmean_average_precision = sv.MeanAveragePrecision.from_detections(\n    predictions=predictions,\n    targets=target,\n)\n\nprint(mean_average_precison.map50_95)\n# 0.2899\n
Source code in supervision/metrics/detection.py
@classmethod\ndef from_detections(\n    cls,\n    predictions: List[Detections],\n    targets: List[Detections],\n) -> MeanAveragePrecision:\n    \"\"\"\n    Calculate mean average precision based on predicted and ground-truth detections.\n\n    Args:\n        targets (List[Detections]): Detections objects from ground-truth.\n        predictions (List[Detections]): Detections objects predicted by the model.\n    Returns:\n        MeanAveragePrecision: New instance of ConfusionMatrix.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        targets = [\n            sv.Detections(...),\n            sv.Detections(...)\n        ]\n\n        predictions = [\n            sv.Detections(...),\n            sv.Detections(...)\n        ]\n\n        mean_average_precision = sv.MeanAveragePrecision.from_detections(\n            predictions=predictions,\n            targets=target,\n        )\n\n        print(mean_average_precison.map50_95)\n        # 0.2899\n        ```\n    \"\"\"\n    prediction_tensors = []\n    target_tensors = []\n    for prediction, target in zip(predictions, targets):\n        prediction_tensors.append(\n            detections_to_tensor(prediction, with_confidence=True)\n        )\n        target_tensors.append(detections_to_tensor(target, with_confidence=False))\n    return cls.from_tensors(\n        predictions=prediction_tensors,\n        targets=target_tensors,\n    )\n
"},{"location":"detection/metrics/#supervision.metrics.detection.MeanAveragePrecision.from_tensors","title":"from_tensors(predictions, targets) classmethod","text":"

Calculate Mean Average Precision based on predicted and ground-truth detections at different threshold.

Parameters:

Name Type Description Default predictions List[ndarray]

Each element of the list describes a single image and has shape = (M, 6) where M is the number of detected objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class, conf) format.

required targets List[ndarray]

Each element of the list describes a single image and has shape = (N, 5) where N is the number of ground-truth objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class) format.

required

Returns: MeanAveragePrecision: New instance of MeanAveragePrecision.

Example
import supervision as sv\nimport numpy as np\n\ntargets = (\n    [\n        np.array(\n            [\n                [0.0, 0.0, 3.0, 3.0, 1],\n                [2.0, 2.0, 5.0, 5.0, 1],\n                [6.0, 1.0, 8.0, 3.0, 2],\n            ]\n        ),\n        np.array([[1.0, 1.0, 2.0, 2.0, 2]]),\n    ]\n)\n\npredictions = [\n    np.array(\n        [\n            [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n            [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n            [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n            [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n        ]\n    ),\n    np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n]\n\nmean_average_precison = sv.MeanAveragePrecision.from_tensors(\n    predictions=predictions,\n    targets=targets,\n)\n\nprint(mean_average_precison.map50_95)\n# 0.6649\n
Source code in supervision/metrics/detection.py
@classmethod\ndef from_tensors(\n    cls,\n    predictions: List[np.ndarray],\n    targets: List[np.ndarray],\n) -> MeanAveragePrecision:\n    \"\"\"\n    Calculate Mean Average Precision based on predicted and ground-truth\n        detections at different threshold.\n\n    Args:\n        predictions (List[np.ndarray]): Each element of the list describes\n            a single image and has `shape = (M, 6)` where `M` is\n            the number of detected objects. Each row is expected to be\n            in `(x_min, y_min, x_max, y_max, class, conf)` format.\n        targets (List[np.ndarray]): Each element of the list describes a single\n            image and has `shape = (N, 5)` where `N` is the\n            number of ground-truth objects. Each row is expected to be in\n            `(x_min, y_min, x_max, y_max, class)` format.\n    Returns:\n        MeanAveragePrecision: New instance of MeanAveragePrecision.\n\n    Example:\n        ```python\n        import supervision as sv\n        import numpy as np\n\n        targets = (\n            [\n                np.array(\n                    [\n                        [0.0, 0.0, 3.0, 3.0, 1],\n                        [2.0, 2.0, 5.0, 5.0, 1],\n                        [6.0, 1.0, 8.0, 3.0, 2],\n                    ]\n                ),\n                np.array([[1.0, 1.0, 2.0, 2.0, 2]]),\n            ]\n        )\n\n        predictions = [\n            np.array(\n                [\n                    [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n                    [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n                    [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n                    [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n                ]\n            ),\n            np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n        ]\n\n        mean_average_precison = sv.MeanAveragePrecision.from_tensors(\n            predictions=predictions,\n            targets=targets,\n        )\n\n        print(mean_average_precison.map50_95)\n        # 0.6649\n        ```\n    \"\"\"\n    validate_input_tensors(predictions, targets)\n    iou_thresholds = np.linspace(0.5, 0.95, 10)\n    stats = []\n\n    # Gather matching stats for predictions and targets\n    for true_objs, predicted_objs in zip(targets, predictions):\n        if predicted_objs.shape[0] == 0:\n            if true_objs.shape[0]:\n                stats.append(\n                    (\n                        np.zeros((0, iou_thresholds.size), dtype=bool),\n                        *np.zeros((2, 0)),\n                        true_objs[:, 4],\n                    )\n                )\n            continue\n\n        if true_objs.shape[0]:\n            matches = cls._match_detection_batch(\n                predicted_objs, true_objs, iou_thresholds\n            )\n            stats.append(\n                (\n                    matches,\n                    predicted_objs[:, 5],\n                    predicted_objs[:, 4],\n                    true_objs[:, 4],\n                )\n            )\n\n    # Compute average precisions if any matches exist\n    if stats:\n        concatenated_stats = [np.concatenate(items, 0) for items in zip(*stats)]\n        average_precisions = cls._average_precisions_per_class(*concatenated_stats)\n        map50 = average_precisions[:, 0].mean()\n        map75 = average_precisions[:, 5].mean()\n        map50_95 = average_precisions.mean()\n    else:\n        map50, map75, map50_95 = 0, 0, 0\n        average_precisions = []\n\n    return cls(\n        map50_95=map50_95,\n        map50=map50,\n        map75=map75,\n        per_class_ap50_95=average_precisions,\n    )\n
"},{"location":"detection/utils/","title":"Detection Utils","text":"box_iou_batch

Compute Intersection over Union (IoU) of two sets of bounding boxes - boxes_true and boxes_detection. Both sets of boxes are expected to be in (x_min, y_min, x_max, y_max) format.

Parameters:

Name Type Description Default boxes_true ndarray

2D np.ndarray representing ground-truth boxes. shape = (N, 4) where N is number of true objects.

required boxes_detection ndarray

2D np.ndarray representing detection boxes. shape = (M, 4) where M is number of detected objects.

required

Returns:

Type Description ndarray

np.ndarray: Pairwise IoU of boxes from boxes_true and boxes_detection. shape = (N, M) where N is number of true objects and M is number of detected objects.

Source code in supervision/detection/utils.py
def box_iou_batch(boxes_true: np.ndarray, boxes_detection: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Compute Intersection over Union (IoU) of two sets of bounding boxes -\n        `boxes_true` and `boxes_detection`. Both sets\n        of boxes are expected to be in `(x_min, y_min, x_max, y_max)` format.\n\n    Args:\n        boxes_true (np.ndarray): 2D `np.ndarray` representing ground-truth boxes.\n            `shape = (N, 4)` where `N` is number of true objects.\n        boxes_detection (np.ndarray): 2D `np.ndarray` representing detection boxes.\n            `shape = (M, 4)` where `M` is number of detected objects.\n\n    Returns:\n        np.ndarray: Pairwise IoU of boxes from `boxes_true` and `boxes_detection`.\n            `shape = (N, M)` where `N` is number of true objects and\n            `M` is number of detected objects.\n    \"\"\"\n\n    def box_area(box):\n        return (box[2] - box[0]) * (box[3] - box[1])\n\n    area_true = box_area(boxes_true.T)\n    area_detection = box_area(boxes_detection.T)\n\n    top_left = np.maximum(boxes_true[:, None, :2], boxes_detection[:, :2])\n    bottom_right = np.minimum(boxes_true[:, None, 2:], boxes_detection[:, 2:])\n\n    area_inter = np.prod(np.clip(bottom_right - top_left, a_min=0, a_max=None), 2)\n    ious = area_inter / (area_true[:, None] + area_detection - area_inter)\n    ious = np.nan_to_num(ious)\n    return ious\n
mask_iou_batch

Compute Intersection over Union (IoU) of two sets of masks - masks_true and masks_detection.

Parameters:

Name Type Description Default masks_true ndarray

3D np.ndarray representing ground-truth masks.

required masks_detection ndarray

3D np.ndarray representing detection masks.

required memory_limit int

memory limit in MB, default is 1024 * 5 MB (5GB).

1024 * 5

Returns:

Type Description ndarray

np.ndarray: Pairwise IoU of masks from masks_true and masks_detection.

Source code in supervision/detection/utils.py
def mask_iou_batch(\n    masks_true: np.ndarray,\n    masks_detection: np.ndarray,\n    memory_limit: int = 1024 * 5,\n) -> np.ndarray:\n    \"\"\"\n    Compute Intersection over Union (IoU) of two sets of masks -\n        `masks_true` and `masks_detection`.\n\n    Args:\n        masks_true (np.ndarray): 3D `np.ndarray` representing ground-truth masks.\n        masks_detection (np.ndarray): 3D `np.ndarray` representing detection masks.\n        memory_limit (int, optional): memory limit in MB, default is 1024 * 5 MB (5GB).\n\n    Returns:\n        np.ndarray: Pairwise IoU of masks from `masks_true` and `masks_detection`.\n    \"\"\"\n    memory = (\n        masks_true.shape[0]\n        * masks_true.shape[1]\n        * masks_true.shape[2]\n        * masks_detection.shape[0]\n        / 1024\n        / 1024\n    )\n    if memory <= memory_limit:\n        return _mask_iou_batch_split(masks_true, masks_detection)\n\n    ious = []\n    step = max(\n        memory_limit\n        * 1024\n        * 1024\n        // (\n            masks_detection.shape[0]\n            * masks_detection.shape[1]\n            * masks_detection.shape[2]\n        ),\n        1,\n    )\n    for i in range(0, masks_true.shape[0], step):\n        ious.append(_mask_iou_batch_split(masks_true[i : i + step], masks_detection))\n\n    return np.vstack(ious)\n
polygon_to_mask

Generate a mask from a polygon.

Parameters:

Name Type Description Default polygon ndarray

The polygon for which the mask should be generated, given as a list of vertices.

required resolution_wh Tuple[int, int]

The width and height of the desired resolution.

required

Returns:

Type Description ndarray

np.ndarray: The generated 2D mask, where the polygon is marked with 1's and the rest is filled with 0's.

Source code in supervision/detection/utils.py
def polygon_to_mask(polygon: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray:\n    \"\"\"Generate a mask from a polygon.\n\n    Args:\n        polygon (np.ndarray): The polygon for which the mask should be generated,\n            given as a list of vertices.\n        resolution_wh (Tuple[int, int]): The width and height of the desired resolution.\n\n    Returns:\n        np.ndarray: The generated 2D mask, where the polygon is marked with\n            `1`'s and the rest is filled with `0`'s.\n    \"\"\"\n    width, height = resolution_wh\n    mask = np.zeros((height, width))\n\n    cv2.fillPoly(mask, [polygon], color=1)\n    return mask\n
mask_to_xyxy

Converts a 3D np.array of 2D bool masks into a 2D np.array of bounding boxes.

Parameters:

Name Type Description Default masks ndarray

A 3D np.array of shape (N, W, H) containing 2D bool masks

required

Returns:

Type Description ndarray

np.ndarray: A 2D np.array of shape (N, 4) containing the bounding boxes (x_min, y_min, x_max, y_max) for each mask

Source code in supervision/detection/utils.py
def mask_to_xyxy(masks: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Converts a 3D `np.array` of 2D bool masks into a 2D `np.array` of bounding boxes.\n\n    Parameters:\n        masks (np.ndarray): A 3D `np.array` of shape `(N, W, H)`\n            containing 2D bool masks\n\n    Returns:\n        np.ndarray: A 2D `np.array` of shape `(N, 4)` containing the bounding boxes\n            `(x_min, y_min, x_max, y_max)` for each mask\n    \"\"\"\n    n = masks.shape[0]\n    xyxy = np.zeros((n, 4), dtype=int)\n\n    for i, mask in enumerate(masks):\n        rows, cols = np.where(mask)\n\n        if len(rows) > 0 and len(cols) > 0:\n            x_min, x_max = np.min(cols), np.max(cols)\n            y_min, y_max = np.min(rows), np.max(rows)\n            xyxy[i, :] = [x_min, y_min, x_max, y_max]\n\n    return xyxy\n
mask_to_polygons

Converts a binary mask to a list of polygons.

Parameters:

Name Type Description Default mask ndarray

A binary mask represented as a 2D NumPy array of shape (H, W), where H and W are the height and width of the mask, respectively.

required

Returns:

Type Description List[ndarray]

List[np.ndarray]: A list of polygons, where each polygon is represented by a NumPy array of shape (N, 2), containing the x, y coordinates of the points. Polygons with fewer points than MIN_POLYGON_POINT_COUNT = 3 are excluded from the output.

Source code in supervision/detection/utils.py
def mask_to_polygons(mask: np.ndarray) -> List[np.ndarray]:\n    \"\"\"\n    Converts a binary mask to a list of polygons.\n\n    Parameters:\n        mask (np.ndarray): A binary mask represented as a 2D NumPy array of\n            shape `(H, W)`, where H and W are the height and width of\n            the mask, respectively.\n\n    Returns:\n        List[np.ndarray]: A list of polygons, where each polygon is represented by a\n            NumPy array of shape `(N, 2)`, containing the `x`, `y` coordinates\n            of the points. Polygons with fewer points than `MIN_POLYGON_POINT_COUNT = 3`\n            are excluded from the output.\n    \"\"\"\n\n    contours, _ = cv2.findContours(\n        mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE\n    )\n    return [\n        np.squeeze(contour, axis=1)\n        for contour in contours\n        if contour.shape[0] >= MIN_POLYGON_POINT_COUNT\n    ]\n
polygon_to_xyxy

Converts a polygon represented by a NumPy array into a bounding box.

Parameters:

Name Type Description Default polygon ndarray

A polygon represented by a NumPy array of shape (N, 2), containing the x, y coordinates of the points.

required

Returns:

Type Description ndarray

np.ndarray: A 1D NumPy array containing the bounding box (x_min, y_min, x_max, y_max) of the input polygon.

Source code in supervision/detection/utils.py
def polygon_to_xyxy(polygon: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Converts a polygon represented by a NumPy array into a bounding box.\n\n    Parameters:\n        polygon (np.ndarray): A polygon represented by a NumPy array of shape `(N, 2)`,\n            containing the `x`, `y` coordinates of the points.\n\n    Returns:\n        np.ndarray: A 1D NumPy array containing the bounding box\n            `(x_min, y_min, x_max, y_max)` of the input polygon.\n    \"\"\"\n    x_min, y_min = np.min(polygon, axis=0)\n    x_max, y_max = np.max(polygon, axis=0)\n    return np.array([x_min, y_min, x_max, y_max])\n
filter_polygons_by_area

Filters a list of polygons based on their area.

Parameters:

Name Type Description Default polygons List[ndarray]

A list of polygons, where each polygon is represented by a NumPy array of shape (N, 2), containing the x, y coordinates of the points.

required min_area Optional[float]

The minimum area threshold. Only polygons with an area greater than or equal to this value will be included in the output. If set to None, no minimum area constraint will be applied.

None max_area Optional[float]

The maximum area threshold. Only polygons with an area less than or equal to this value will be included in the output. If set to None, no maximum area constraint will be applied.

None

Returns:

Type Description List[ndarray]

List[np.ndarray]: A new list of polygons containing only those with areas within the specified thresholds.

Source code in supervision/detection/utils.py
def filter_polygons_by_area(\n    polygons: List[np.ndarray],\n    min_area: Optional[float] = None,\n    max_area: Optional[float] = None,\n) -> List[np.ndarray]:\n    \"\"\"\n    Filters a list of polygons based on their area.\n\n    Parameters:\n        polygons (List[np.ndarray]): A list of polygons, where each polygon is\n            represented by a NumPy array of shape `(N, 2)`,\n            containing the `x`, `y` coordinates of the points.\n        min_area (Optional[float]): The minimum area threshold.\n            Only polygons with an area greater than or equal to this value\n            will be included in the output. If set to None,\n            no minimum area constraint will be applied.\n        max_area (Optional[float]): The maximum area threshold.\n            Only polygons with an area less than or equal to this value\n            will be included in the output. If set to None,\n            no maximum area constraint will be applied.\n\n    Returns:\n        List[np.ndarray]: A new list of polygons containing only those with\n            areas within the specified thresholds.\n    \"\"\"\n    if min_area is None and max_area is None:\n        return polygons\n    ares = [cv2.contourArea(polygon) for polygon in polygons]\n    return [\n        polygon\n        for polygon, area in zip(polygons, ares)\n        if (min_area is None or area >= min_area)\n        and (max_area is None or area <= max_area)\n    ]\n
move_boxes

Parameters:

Name Type Description Default xyxy NDArray[float64]

An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]

required offset array

An array of shape (2,) containing offset values in format is [dx, dy].

required

Returns:

Type Description NDArray[float64]

npt.NDArray[np.float64]: Repositioned bounding boxes.

Examples:

import numpy as np\nimport supervision as sv\n\nxyxy = np.array([\n    [10, 10, 20, 20],\n    [30, 30, 40, 40]\n])\noffset = np.array([5, 5])\n\nsv.move_boxes(xyxy=xyxy, offset=offset)\n# array([\n#    [15, 15, 25, 25],\n#    [35, 35, 45, 45]\n# ])\n
Source code in supervision/detection/utils.py
def move_boxes(\n    xyxy: npt.NDArray[np.float64], offset: npt.NDArray[np.int32]\n) -> npt.NDArray[np.float64]:\n    \"\"\"\n    Parameters:\n        xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the\n            bounding boxes coordinates in format `[x1, y1, x2, y2]`\n        offset (np.array): An array of shape `(2,)` containing offset values in format\n            is `[dx, dy]`.\n\n    Returns:\n        npt.NDArray[np.float64]: Repositioned bounding boxes.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        xyxy = np.array([\n            [10, 10, 20, 20],\n            [30, 30, 40, 40]\n        ])\n        offset = np.array([5, 5])\n\n        sv.move_boxes(xyxy=xyxy, offset=offset)\n        # array([\n        #    [15, 15, 25, 25],\n        #    [35, 35, 45, 45]\n        # ])\n        ```\n    \"\"\"\n    return xyxy + np.hstack([offset, offset])\n
move_masks

Offset the masks in an array by the specified (x, y) amount.

Parameters:

Name Type Description Default masks NDArray[bool_]

A 3D array of binary masks corresponding to the predictions. Shape: (N, H, W), where N is the number of predictions, and H, W are the dimensions of each mask.

required offset NDArray[int32]

An array of shape (2,) containing non-negative int values [dx, dy].

required resolution_wh Tuple[int, int]

The width and height of the desired mask resolution.

required

Returns:

Type Description NDArray[bool_]

(npt.NDArray[np.bool_]) repositioned masks, optionally padded to the specified shape.

Source code in supervision/detection/utils.py
def move_masks(\n    masks: npt.NDArray[np.bool_],\n    offset: npt.NDArray[np.int32],\n    resolution_wh: Tuple[int, int],\n) -> npt.NDArray[np.bool_]:\n    \"\"\"\n    Offset the masks in an array by the specified (x, y) amount.\n\n    Args:\n        masks (npt.NDArray[np.bool_]): A 3D array of binary masks corresponding to the\n            predictions. Shape: `(N, H, W)`, where N is the number of predictions, and\n            H, W are the dimensions of each mask.\n        offset (npt.NDArray[np.int32]): An array of shape `(2,)` containing non-negative\n            int values `[dx, dy]`.\n        resolution_wh (Tuple[int, int]): The width and height of the desired mask\n            resolution.\n\n    Returns:\n        (npt.NDArray[np.bool_]) repositioned masks, optionally padded to the specified\n            shape.\n    \"\"\"\n\n    if offset[0] < 0 or offset[1] < 0:\n        raise ValueError(f\"Offset values must be non-negative integers. Got: {offset}\")\n\n    mask_array = np.full((masks.shape[0], resolution_wh[1], resolution_wh[0]), False)\n    mask_array[\n        :,\n        offset[1] : masks.shape[1] + offset[1],\n        offset[0] : masks.shape[2] + offset[0],\n    ] = masks\n\n    return mask_array\n
scale_boxes

Scale the dimensions of bounding boxes.

Parameters:

Name Type Description Default xyxy NDArray[float64]

An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]

required factor float

A float value representing the factor by which the box dimensions are scaled. A factor greater than 1 enlarges the boxes, while a factor less than 1 shrinks them.

required

Returns:

Type Description NDArray[float64]

npt.NDArray[np.float64]: Scaled bounding boxes.

Examples:

import numpy as np\nimport supervision as sv\n\nxyxy = np.array([\n    [10, 10, 20, 20],\n    [30, 30, 40, 40]\n])\n\nsv.scale_boxes(xyxy=xyxy, factor=1.5)\n# array([\n#    [ 7.5,  7.5, 22.5, 22.5],\n#    [27.5, 27.5, 42.5, 42.5]\n# ])\n
Source code in supervision/detection/utils.py
def scale_boxes(\n    xyxy: npt.NDArray[np.float64], factor: float\n) -> npt.NDArray[np.float64]:\n    \"\"\"\n    Scale the dimensions of bounding boxes.\n\n    Parameters:\n        xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the\n            bounding boxes coordinates in format `[x1, y1, x2, y2]`\n        factor (float): A float value representing the factor by which the box\n            dimensions are scaled. A factor greater than 1 enlarges the boxes, while a\n            factor less than 1 shrinks them.\n\n    Returns:\n        npt.NDArray[np.float64]: Scaled bounding boxes.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        xyxy = np.array([\n            [10, 10, 20, 20],\n            [30, 30, 40, 40]\n        ])\n\n        sv.scale_boxes(xyxy=xyxy, factor=1.5)\n        # array([\n        #    [ 7.5,  7.5, 22.5, 22.5],\n        #    [27.5, 27.5, 42.5, 42.5]\n        # ])\n        ```\n    \"\"\"\n    centers = (xyxy[:, :2] + xyxy[:, 2:]) / 2\n    new_sizes = (xyxy[:, 2:] - xyxy[:, :2]) * factor\n    return np.concatenate((centers - new_sizes / 2, centers + new_sizes / 2), axis=1)\n
clip_boxes

Clips bounding boxes coordinates to fit within the frame resolution.

Parameters:

Name Type Description Default xyxy ndarray

A numpy array of shape (N, 4) where each row corresponds to a bounding box in

required resolution_wh Tuple[int, int]

A tuple of the form (width, height) representing the resolution of the frame.

required

Returns:

Type Description ndarray

np.ndarray: A numpy array of shape (N, 4) where each row corresponds to a bounding box with coordinates clipped to fit within the frame resolution.

Examples:

import numpy as np\nimport supervision as sv\n\nxyxy = np.array([\n    [10, 20, 300, 200],\n    [15, 25, 350, 450],\n    [-10, -20, 30, 40]\n])\n\nsv.clip_boxes(xyxy=xyxy, resolution_wh=(320, 240))\n# array([\n#     [ 10,  20, 300, 200],\n#     [ 15,  25, 320, 240],\n#     [  0,   0,  30,  40]\n# ])\n
Source code in supervision/detection/utils.py
def clip_boxes(xyxy: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray:\n    \"\"\"\n    Clips bounding boxes coordinates to fit within the frame resolution.\n\n    Args:\n        xyxy (np.ndarray): A numpy array of shape `(N, 4)` where each\n            row corresponds to a bounding box in\n        the format `(x_min, y_min, x_max, y_max)`.\n        resolution_wh (Tuple[int, int]): A tuple of the form `(width, height)`\n            representing the resolution of the frame.\n\n    Returns:\n        np.ndarray: A numpy array of shape `(N, 4)` where each row\n            corresponds to a bounding box with coordinates clipped to fit\n            within the frame resolution.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        xyxy = np.array([\n            [10, 20, 300, 200],\n            [15, 25, 350, 450],\n            [-10, -20, 30, 40]\n        ])\n\n        sv.clip_boxes(xyxy=xyxy, resolution_wh=(320, 240))\n        # array([\n        #     [ 10,  20, 300, 200],\n        #     [ 15,  25, 320, 240],\n        #     [  0,   0,  30,  40]\n        # ])\n        ```\n    \"\"\"\n    result = np.copy(xyxy)\n    width, height = resolution_wh\n    result[:, [0, 2]] = result[:, [0, 2]].clip(0, width)\n    result[:, [1, 3]] = result[:, [1, 3]].clip(0, height)\n    return result\n
pad_boxes

Pads bounding boxes coordinates with a constant padding.

Parameters:

Name Type Description Default xyxy ndarray

A numpy array of shape (N, 4) where each row corresponds to a bounding box in the format (x_min, y_min, x_max, y_max).

required px int

The padding value to be added to both the left and right sides of each bounding box.

required py Optional[int]

The padding value to be added to both the top and bottom sides of each bounding box. If not provided, px will be used for both dimensions.

None

Returns:

Type Description ndarray

np.ndarray: A numpy array of shape (N, 4) where each row corresponds to a bounding box with coordinates padded according to the provided padding values.

Examples:

import numpy as np\nimport supervision as sv\n\nxyxy = np.array([\n    [10, 20, 30, 40],\n    [15, 25, 35, 45]\n])\n\nsv.pad_boxes(xyxy=xyxy, px=5, py=10)\n# array([\n#     [ 5, 10, 35, 50],\n#     [10, 15, 40, 55]\n# ])\n
Source code in supervision/detection/utils.py
def pad_boxes(xyxy: np.ndarray, px: int, py: Optional[int] = None) -> np.ndarray:\n    \"\"\"\n    Pads bounding boxes coordinates with a constant padding.\n\n    Args:\n        xyxy (np.ndarray): A numpy array of shape `(N, 4)` where each\n            row corresponds to a bounding box in the format\n            `(x_min, y_min, x_max, y_max)`.\n        px (int): The padding value to be added to both the left and right sides of\n            each bounding box.\n        py (Optional[int]): The padding value to be added to both the top and bottom\n            sides of each bounding box. If not provided, `px` will be used for both\n            dimensions.\n\n    Returns:\n        np.ndarray: A numpy array of shape `(N, 4)` where each row corresponds to a\n            bounding box with coordinates padded according to the provided padding\n            values.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        xyxy = np.array([\n            [10, 20, 30, 40],\n            [15, 25, 35, 45]\n        ])\n\n        sv.pad_boxes(xyxy=xyxy, px=5, py=10)\n        # array([\n        #     [ 5, 10, 35, 50],\n        #     [10, 15, 40, 55]\n        # ])\n        ```\n    \"\"\"\n    if py is None:\n        py = px\n\n    result = xyxy.copy()\n    result[:, [0, 1]] -= [px, py]\n    result[:, [2, 3]] += [px, py]\n\n    return result\n
contains_holes

Checks if the binary mask contains holes (background pixels fully enclosed by foreground pixels).

Parameters:

Name Type Description Default mask NDArray[bool_]

2D binary mask where True indicates foreground object and False indicates background.

required

Returns:

Type Description bool

True if holes are detected, False otherwise.

Examples:

import numpy as np\nimport supervision as sv\n\nmask = np.array([\n    [0, 0, 0, 0, 0],\n    [0, 1, 1, 1, 0],\n    [0, 1, 0, 1, 0],\n    [0, 1, 1, 1, 0],\n    [0, 0, 0, 0, 0]\n]).astype(bool)\n\nsv.contains_holes(mask=mask)\n# True\n\nmask = np.array([\n    [0, 0, 0, 0, 0],\n    [0, 1, 1, 1, 0],\n    [0, 1, 1, 1, 0],\n    [0, 1, 1, 1, 0],\n    [0, 0, 0, 0, 0]\n]).astype(bool)\n\nsv.contains_holes(mask=mask)\n# False\n

Source code in supervision/detection/utils.py
def contains_holes(mask: npt.NDArray[np.bool_]) -> bool:\n    \"\"\"\n    Checks if the binary mask contains holes (background pixels fully enclosed by\n    foreground pixels).\n\n    Args:\n        mask (npt.NDArray[np.bool_]): 2D binary mask where `True` indicates foreground\n            object and `False` indicates background.\n\n    Returns:\n        True if holes are detected, False otherwise.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        mask = np.array([\n            [0, 0, 0, 0, 0],\n            [0, 1, 1, 1, 0],\n            [0, 1, 0, 1, 0],\n            [0, 1, 1, 1, 0],\n            [0, 0, 0, 0, 0]\n        ]).astype(bool)\n\n        sv.contains_holes(mask=mask)\n        # True\n\n        mask = np.array([\n            [0, 0, 0, 0, 0],\n            [0, 1, 1, 1, 0],\n            [0, 1, 1, 1, 0],\n            [0, 1, 1, 1, 0],\n            [0, 0, 0, 0, 0]\n        ]).astype(bool)\n\n        sv.contains_holes(mask=mask)\n        # False\n        ```\n\n    ![contains_holes](https://media.roboflow.com/supervision-docs/contains-holes.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    mask_uint8 = mask.astype(np.uint8)\n    _, hierarchy = cv2.findContours(mask_uint8, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)\n\n    if hierarchy is not None:\n        parent_contour_index = 3\n        for h in hierarchy[0]:\n            if h[parent_contour_index] != -1:\n                return True\n    return False\n
contains_multiple_segments

Checks if the binary mask contains multiple unconnected foreground segments.

Parameters:

Name Type Description Default mask NDArray[bool_]

2D binary mask where True indicates foreground object and False indicates background.

required connectivity int)

Default: 4 is 4-way connectivity, which means that foreground pixels are the part of the same segment/component if their edges touch. Alternatively: 8 for 8-way connectivity, when foreground pixels are connected by their edges or corners touch.

4

Returns:

Type Description bool

True when the mask contains multiple not connected components, False otherwise.

Raises:

Type Description ValueError

If connectivity(int) parameter value is not 4 or 8.

Examples:

import numpy as np\nimport supervision as sv\n\nmask = np.array([\n    [0, 0, 0, 0, 0, 0],\n    [0, 1, 1, 0, 1, 1],\n    [0, 1, 1, 0, 1, 1],\n    [0, 0, 0, 0, 0, 0],\n    [0, 1, 1, 1, 0, 0],\n    [0, 1, 1, 1, 0, 0]\n]).astype(bool)\n\nsv.contains_multiple_segments(mask=mask, connectivity=4)\n# True\n\nmask = np.array([\n    [0, 0, 0, 0, 0, 0],\n    [0, 1, 1, 1, 1, 1],\n    [0, 1, 1, 1, 1, 1],\n    [0, 1, 1, 1, 1, 1],\n    [0, 1, 1, 1, 1, 1],\n    [0, 0, 0, 0, 0, 0]\n]).astype(bool)\n\nsv.contains_multiple_segments(mask=mask, connectivity=4)\n# False\n

Source code in supervision/detection/utils.py
def contains_multiple_segments(\n    mask: npt.NDArray[np.bool_], connectivity: int = 4\n) -> bool:\n    \"\"\"\n    Checks if the binary mask contains multiple unconnected foreground segments.\n\n    Args:\n        mask (npt.NDArray[np.bool_]): 2D binary mask where `True` indicates foreground\n            object and `False` indicates background.\n        connectivity (int) : Default: 4 is 4-way connectivity, which means that\n            foreground pixels are the part of the same segment/component\n            if their edges touch.\n            Alternatively: 8 for 8-way connectivity, when foreground pixels are\n            connected by their edges or corners touch.\n\n    Returns:\n        True when the mask contains multiple not connected components, False otherwise.\n\n    Raises:\n        ValueError: If connectivity(int) parameter value is not 4 or 8.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        mask = np.array([\n            [0, 0, 0, 0, 0, 0],\n            [0, 1, 1, 0, 1, 1],\n            [0, 1, 1, 0, 1, 1],\n            [0, 0, 0, 0, 0, 0],\n            [0, 1, 1, 1, 0, 0],\n            [0, 1, 1, 1, 0, 0]\n        ]).astype(bool)\n\n        sv.contains_multiple_segments(mask=mask, connectivity=4)\n        # True\n\n        mask = np.array([\n            [0, 0, 0, 0, 0, 0],\n            [0, 1, 1, 1, 1, 1],\n            [0, 1, 1, 1, 1, 1],\n            [0, 1, 1, 1, 1, 1],\n            [0, 1, 1, 1, 1, 1],\n            [0, 0, 0, 0, 0, 0]\n        ]).astype(bool)\n\n        sv.contains_multiple_segments(mask=mask, connectivity=4)\n        # False\n        ```\n\n    ![contains_multiple_segments](https://media.roboflow.com/supervision-docs/contains-multiple-segments.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    if connectivity != 4 and connectivity != 8:\n        raise ValueError(\n            \"Incorrect connectivity value. Possible connectivity values: 4 or 8.\"\n        )\n    mask_uint8 = mask.astype(np.uint8)\n    labels = np.zeros_like(mask_uint8, dtype=np.int32)\n    number_of_labels, _ = cv2.connectedComponents(\n        mask_uint8, labels, connectivity=connectivity\n    )\n    return number_of_labels > 2\n
"},{"location":"detection/tools/inference_slicer/","title":"InferenceSlicer","text":"

InferenceSlicer performs slicing-based inference for small target detection. This method, often referred to as Slicing Adaptive Inference (SAHI), involves dividing a larger image into smaller slices, performing inference on each slice, and then merging the detections.

Parameters:

Name Type Description Default slice_wh Tuple[int, int]

Dimensions of each slice in the format (width, height).

(320, 320) overlap_ratio_wh Tuple[float, float]

Overlap ratio between consecutive slices in the format (width_ratio, height_ratio).

(0.2, 0.2) overlap_filter_strategy Union[OverlapFilter, str]

Strategy for filtering or merging overlapping detections in slices.

NON_MAX_SUPPRESSION iou_threshold float

Intersection over Union (IoU) threshold used when filtering by overlap.

0.5 callback Callable

A function that performs inference on a given image slice and returns detections.

required thread_workers int

Number of threads for parallel execution.

1 Note

The class ensures that slices do not exceed the boundaries of the original image. As a result, the final slices in the row and column dimensions might be smaller than the specified slice dimensions if the image's width or height is not a multiple of the slice's width or height minus the overlap.

Source code in supervision/detection/tools/inference_slicer.py
class InferenceSlicer:\n    \"\"\"\n    InferenceSlicer performs slicing-based inference for small target detection. This\n    method, often referred to as\n    [Slicing Adaptive Inference (SAHI)](https://ieeexplore.ieee.org/document/9897990),\n    involves dividing a larger image into smaller slices, performing inference on each\n    slice, and then merging the detections.\n\n    Args:\n        slice_wh (Tuple[int, int]): Dimensions of each slice in the format\n            `(width, height)`.\n        overlap_ratio_wh (Tuple[float, float]): Overlap ratio between consecutive\n            slices in the format `(width_ratio, height_ratio)`.\n        overlap_filter_strategy (Union[OverlapFilter, str]): Strategy for\n            filtering or merging overlapping detections in slices.\n        iou_threshold (float): Intersection over Union (IoU) threshold\n            used when filtering by overlap.\n        callback (Callable): A function that performs inference on a given image\n            slice and returns detections.\n        thread_workers (int): Number of threads for parallel execution.\n\n    Note:\n        The class ensures that slices do not exceed the boundaries of the original\n        image. As a result, the final slices in the row and column dimensions might be\n        smaller than the specified slice dimensions if the image's width or height is\n        not a multiple of the slice's width or height minus the overlap.\n    \"\"\"\n\n    def __init__(\n        self,\n        callback: Callable[[np.ndarray], Detections],\n        slice_wh: Tuple[int, int] = (320, 320),\n        overlap_ratio_wh: Tuple[float, float] = (0.2, 0.2),\n        overlap_filter_strategy: Union[\n            OverlapFilter, str\n        ] = OverlapFilter.NON_MAX_SUPPRESSION,\n        iou_threshold: float = 0.5,\n        thread_workers: int = 1,\n    ):\n        overlap_filter_strategy = validate_overlap_filter(overlap_filter_strategy)\n\n        self.slice_wh = slice_wh\n        self.overlap_ratio_wh = overlap_ratio_wh\n        self.iou_threshold = iou_threshold\n        self.overlap_filter_strategy = overlap_filter_strategy\n        self.callback = callback\n        self.thread_workers = thread_workers\n\n    def __call__(self, image: np.ndarray) -> Detections:\n        \"\"\"\n        Performs slicing-based inference on the provided image using the specified\n            callback.\n\n        Args:\n            image (np.ndarray): The input image on which inference needs to be\n                performed. The image should be in the format\n                `(height, width, channels)`.\n\n        Returns:\n            Detections: A collection of detections for the entire image after merging\n                results from all slices and applying NMS.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(SOURCE_IMAGE_PATH)\n            model = YOLO(...)\n\n            def callback(image_slice: np.ndarray) -> sv.Detections:\n                result = model(image_slice)[0]\n                return sv.Detections.from_ultralytics(result)\n\n            slicer = sv.InferenceSlicer(\n                callback=callback,\n                overlap_filter_strategy=sv.OverlapFilter.NON_MAX_SUPPRESSION,\n            )\n\n            detections = slicer(image)\n            ```\n        \"\"\"\n        detections_list = []\n        resolution_wh = (image.shape[1], image.shape[0])\n        offsets = self._generate_offset(\n            resolution_wh=resolution_wh,\n            slice_wh=self.slice_wh,\n            overlap_ratio_wh=self.overlap_ratio_wh,\n        )\n\n        with ThreadPoolExecutor(max_workers=self.thread_workers) as executor:\n            futures = [\n                executor.submit(self._run_callback, image, offset) for offset in offsets\n            ]\n            for future in as_completed(futures):\n                detections_list.append(future.result())\n\n        merged = Detections.merge(detections_list=detections_list)\n        if self.overlap_filter_strategy == OverlapFilter.NONE:\n            return merged\n        elif self.overlap_filter_strategy == OverlapFilter.NON_MAX_SUPPRESSION:\n            return merged.with_nms(threshold=self.iou_threshold)\n        elif self.overlap_filter_strategy == OverlapFilter.NON_MAX_MERGE:\n            return merged.with_nmm(threshold=self.iou_threshold)\n        else:\n            warnings.warn(\n                f\"Invalid overlap filter strategy: {self.overlap_filter_strategy}\",\n                category=SupervisionWarnings,\n            )\n            return merged\n\n    def _run_callback(self, image, offset) -> Detections:\n        \"\"\"\n        Run the provided callback on a slice of an image.\n\n        Args:\n            image (np.ndarray): The input image on which inference needs to run\n            offset (np.ndarray): An array of shape `(4,)` containing coordinates\n                for the slice.\n\n        Returns:\n            Detections: A collection of detections for the slice.\n        \"\"\"\n        image_slice = crop_image(image=image, xyxy=offset)\n        detections = self.callback(image_slice)\n        resolution_wh = (image.shape[1], image.shape[0])\n        detections = move_detections(\n            detections=detections, offset=offset[:2], resolution_wh=resolution_wh\n        )\n\n        return detections\n\n    @staticmethod\n    def _generate_offset(\n        resolution_wh: Tuple[int, int],\n        slice_wh: Tuple[int, int],\n        overlap_ratio_wh: Tuple[float, float],\n    ) -> np.ndarray:\n        \"\"\"\n        Generate offset coordinates for slicing an image based on the given resolution,\n        slice dimensions, and overlap ratios.\n\n        Args:\n            resolution_wh (Tuple[int, int]): A tuple representing the width and height\n                of the image to be sliced.\n            slice_wh (Tuple[int, int]): A tuple representing the desired width and\n                height of each slice.\n            overlap_ratio_wh (Tuple[float, float]): A tuple representing the desired\n                overlap ratio for width and height between consecutive slices. Each\n                value should be in the range [0, 1), where 0 means no overlap and a\n                value close to 1 means high overlap.\n\n        Returns:\n            np.ndarray: An array of shape `(n, 4)` containing coordinates for each\n                slice in the format `[xmin, ymin, xmax, ymax]`.\n\n        Note:\n            The function ensures that slices do not exceed the boundaries of the\n                original image. As a result, the final slices in the row and column\n                dimensions might be smaller than the specified slice dimensions if the\n                image's width or height is not a multiple of the slice's width or\n                height minus the overlap.\n        \"\"\"\n        slice_width, slice_height = slice_wh\n        image_width, image_height = resolution_wh\n        overlap_ratio_width, overlap_ratio_height = overlap_ratio_wh\n\n        width_stride = slice_width - int(overlap_ratio_width * slice_width)\n        height_stride = slice_height - int(overlap_ratio_height * slice_height)\n\n        ws = np.arange(0, image_width, width_stride)\n        hs = np.arange(0, image_height, height_stride)\n\n        xmin, ymin = np.meshgrid(ws, hs)\n        xmax = np.clip(xmin + slice_width, 0, image_width)\n        ymax = np.clip(ymin + slice_height, 0, image_height)\n\n        offsets = np.stack([xmin, ymin, xmax, ymax], axis=-1).reshape(-1, 4)\n\n        return offsets\n
"},{"location":"detection/tools/inference_slicer/#supervision.detection.tools.inference_slicer.InferenceSlicer-functions","title":"Functions","text":""},{"location":"detection/tools/inference_slicer/#supervision.detection.tools.inference_slicer.InferenceSlicer.__call__","title":"__call__(image)","text":"

Performs slicing-based inference on the provided image using the specified callback.

Parameters:

Name Type Description Default image ndarray

The input image on which inference needs to be performed. The image should be in the format (height, width, channels).

required

Returns:

Name Type Description Detections Detections

A collection of detections for the entire image after merging results from all slices and applying NMS.

Example
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(SOURCE_IMAGE_PATH)\nmodel = YOLO(...)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    result = model(image_slice)[0]\n    return sv.Detections.from_ultralytics(result)\n\nslicer = sv.InferenceSlicer(\n    callback=callback,\n    overlap_filter_strategy=sv.OverlapFilter.NON_MAX_SUPPRESSION,\n)\n\ndetections = slicer(image)\n
Source code in supervision/detection/tools/inference_slicer.py
def __call__(self, image: np.ndarray) -> Detections:\n    \"\"\"\n    Performs slicing-based inference on the provided image using the specified\n        callback.\n\n    Args:\n        image (np.ndarray): The input image on which inference needs to be\n            performed. The image should be in the format\n            `(height, width, channels)`.\n\n    Returns:\n        Detections: A collection of detections for the entire image after merging\n            results from all slices and applying NMS.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(SOURCE_IMAGE_PATH)\n        model = YOLO(...)\n\n        def callback(image_slice: np.ndarray) -> sv.Detections:\n            result = model(image_slice)[0]\n            return sv.Detections.from_ultralytics(result)\n\n        slicer = sv.InferenceSlicer(\n            callback=callback,\n            overlap_filter_strategy=sv.OverlapFilter.NON_MAX_SUPPRESSION,\n        )\n\n        detections = slicer(image)\n        ```\n    \"\"\"\n    detections_list = []\n    resolution_wh = (image.shape[1], image.shape[0])\n    offsets = self._generate_offset(\n        resolution_wh=resolution_wh,\n        slice_wh=self.slice_wh,\n        overlap_ratio_wh=self.overlap_ratio_wh,\n    )\n\n    with ThreadPoolExecutor(max_workers=self.thread_workers) as executor:\n        futures = [\n            executor.submit(self._run_callback, image, offset) for offset in offsets\n        ]\n        for future in as_completed(futures):\n            detections_list.append(future.result())\n\n    merged = Detections.merge(detections_list=detections_list)\n    if self.overlap_filter_strategy == OverlapFilter.NONE:\n        return merged\n    elif self.overlap_filter_strategy == OverlapFilter.NON_MAX_SUPPRESSION:\n        return merged.with_nms(threshold=self.iou_threshold)\n    elif self.overlap_filter_strategy == OverlapFilter.NON_MAX_MERGE:\n        return merged.with_nmm(threshold=self.iou_threshold)\n    else:\n        warnings.warn(\n            f\"Invalid overlap filter strategy: {self.overlap_filter_strategy}\",\n            category=SupervisionWarnings,\n        )\n        return merged\n
"},{"location":"detection/tools/line_zone/","title":"Line Zone","text":"LineZone

This class is responsible for counting the number of objects that cross a predefined line.

Warning

LineZone uses the tracker_id. Read here to learn how to plug tracking into your inference pipeline.

Attributes:

Name Type Description in_count int

The number of objects that have crossed the line from outside to inside.

out_count int

The number of objects that have crossed the line from inside to outside.

Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<SOURCE_MODEL_PATH>)\ntracker = sv.ByteTrack()\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\nstart, end = sv.Point(x=0, y=1080), sv.Point(x=3840, y=1080)\nline_zone = sv.LineZone(start=start, end=end)\n\nfor frame in frames_generator:\n    result = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(result)\n    detections = tracker.update_with_detections(detections)\n    crossed_in, crossed_out = line_zone.trigger(detections)\n\nline_zone.in_count, line_zone.out_count\n# 7, 2\n
Source code in supervision/detection/line_zone.py
class LineZone:\n    \"\"\"\n    This class is responsible for counting the number of objects that cross a\n    predefined line.\n\n    <video controls>\n        <source\n            src=\"https://media.roboflow.com/supervision/cookbooks/count-objects-crossing-the-line-result-1280x720.mp4\"\n            type=\"video/mp4\">\n    </video>\n\n    !!! warning\n\n        LineZone uses the `tracker_id`. Read\n        [here](/latest/trackers/) to learn how to plug\n        tracking into your inference pipeline.\n\n    Attributes:\n        in_count (int): The number of objects that have crossed the line from outside\n            to inside.\n        out_count (int): The number of objects that have crossed the line from inside\n            to outside.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO(<SOURCE_MODEL_PATH>)\n        tracker = sv.ByteTrack()\n        frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n        start, end = sv.Point(x=0, y=1080), sv.Point(x=3840, y=1080)\n        line_zone = sv.LineZone(start=start, end=end)\n\n        for frame in frames_generator:\n            result = model(frame)[0]\n            detections = sv.Detections.from_ultralytics(result)\n            detections = tracker.update_with_detections(detections)\n            crossed_in, crossed_out = line_zone.trigger(detections)\n\n        line_zone.in_count, line_zone.out_count\n        # 7, 2\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    def __init__(\n        self,\n        start: Point,\n        end: Point,\n        triggering_anchors: Iterable[Position] = (\n            Position.TOP_LEFT,\n            Position.TOP_RIGHT,\n            Position.BOTTOM_LEFT,\n            Position.BOTTOM_RIGHT,\n        ),\n    ):\n        \"\"\"\n        Args:\n            start (Point): The starting point of the line.\n            end (Point): The ending point of the line.\n            triggering_anchors (List[sv.Position]): A list of positions\n                specifying which anchors of the detections bounding box\n                to consider when deciding on whether the detection\n                has passed the line counter or not. By default, this\n                contains the four corners of the detection's bounding box\n        \"\"\"\n        self.vector = Vector(start=start, end=end)\n        self.limits = self.calculate_region_of_interest_limits(vector=self.vector)\n        self.tracker_state: Dict[str, bool] = {}\n        self.in_count: int = 0\n        self.out_count: int = 0\n        self.triggering_anchors = triggering_anchors\n        if not list(self.triggering_anchors):\n            raise ValueError(\"Triggering anchors cannot be empty.\")\n\n    @staticmethod\n    def calculate_region_of_interest_limits(vector: Vector) -> Tuple[Vector, Vector]:\n        magnitude = vector.magnitude\n\n        if magnitude == 0:\n            raise ValueError(\"The magnitude of the vector cannot be zero.\")\n\n        delta_x = vector.end.x - vector.start.x\n        delta_y = vector.end.y - vector.start.y\n\n        unit_vector_x = delta_x / magnitude\n        unit_vector_y = delta_y / magnitude\n\n        perpendicular_vector_x = -unit_vector_y\n        perpendicular_vector_y = unit_vector_x\n\n        start_region_limit = Vector(\n            start=vector.start,\n            end=Point(\n                x=vector.start.x + perpendicular_vector_x,\n                y=vector.start.y + perpendicular_vector_y,\n            ),\n        )\n        end_region_limit = Vector(\n            start=vector.end,\n            end=Point(\n                x=vector.end.x - perpendicular_vector_x,\n                y=vector.end.y - perpendicular_vector_y,\n            ),\n        )\n        return start_region_limit, end_region_limit\n\n    @staticmethod\n    def is_point_in_limits(point: Point, limits: Tuple[Vector, Vector]) -> bool:\n        cross_product_1 = limits[0].cross_product(point)\n        cross_product_2 = limits[1].cross_product(point)\n        return (cross_product_1 > 0) == (cross_product_2 > 0)\n\n    def trigger(self, detections: Detections) -> Tuple[np.ndarray, np.ndarray]:\n        \"\"\"\n        Update the `in_count` and `out_count` based on the objects that cross the line.\n\n        Args:\n            detections (Detections): A list of detections for which to update the\n                counts.\n\n        Returns:\n            A tuple of two boolean NumPy arrays. The first array indicates which\n                detections have crossed the line from outside to inside. The second\n                array indicates which detections have crossed the line from inside to\n                outside.\n        \"\"\"\n        crossed_in = np.full(len(detections), False)\n        crossed_out = np.full(len(detections), False)\n\n        if len(detections) == 0:\n            return crossed_in, crossed_out\n\n        if detections.tracker_id is None:\n            warnings.warn(\n                \"Line zone counting skipped. LineZone requires tracker_id. Refer to \"\n                \"https://supervision.roboflow.com/latest/trackers for more \"\n                \"information.\",\n                category=SupervisionWarnings,\n            )\n            return crossed_in, crossed_out\n\n        all_anchors = np.array(\n            [\n                detections.get_anchors_coordinates(anchor)\n                for anchor in self.triggering_anchors\n            ]\n        )\n\n        cross_products_1 = cross_product(all_anchors, self.limits[0])\n        cross_products_2 = cross_product(all_anchors, self.limits[1])\n        in_limits = (cross_products_1 > 0) == (cross_products_2 > 0)\n        in_limits = np.all(in_limits, axis=0)\n\n        triggers = cross_product(all_anchors, self.vector) < 0\n        has_any_left_trigger = np.any(triggers, axis=0)\n        has_any_right_trigger = np.any(~triggers, axis=0)\n        is_uniformly_triggered = ~(has_any_left_trigger & has_any_right_trigger)\n        for i, tracker_id in enumerate(detections.tracker_id):\n            if not in_limits[i]:\n                continue\n\n            if not is_uniformly_triggered[i]:\n                continue\n\n            tracker_state = has_any_left_trigger[i]\n            if tracker_id not in self.tracker_state:\n                self.tracker_state[tracker_id] = tracker_state\n                continue\n\n            if self.tracker_state.get(tracker_id) == tracker_state:\n                continue\n\n            self.tracker_state[tracker_id] = tracker_state\n            if tracker_state:\n                self.in_count += 1\n                crossed_in[i] = True\n            else:\n                self.out_count += 1\n                crossed_out[i] = True\n\n        return crossed_in, crossed_out\n
LineZoneAnnotator Source code in supervision/detection/line_zone.py
class LineZoneAnnotator:\n    def __init__(\n        self,\n        thickness: float = 2,\n        color: Color = Color.WHITE,\n        text_thickness: float = 2,\n        text_color: Color = Color.BLACK,\n        text_scale: float = 0.5,\n        text_offset: float = 1.5,\n        text_padding: int = 10,\n        custom_in_text: Optional[str] = None,\n        custom_out_text: Optional[str] = None,\n        display_in_count: bool = True,\n        display_out_count: bool = True,\n    ):\n        \"\"\"\n        Initialize the LineCounterAnnotator object with default values.\n\n        Attributes:\n            thickness (float): The thickness of the line that will be drawn.\n            color (Color): The color of the line that will be drawn.\n            text_thickness (float): The thickness of the text that will be drawn.\n            text_color (Color): The color of the text that will be drawn.\n            text_scale (float): The scale of the text that will be drawn.\n            text_offset (float): The offset of the text that will be drawn.\n            text_padding (int): The padding of the text that will be drawn.\n            display_in_count (bool): Whether to display the in count or not.\n            display_out_count (bool): Whether to display the out count or not.\n\n        \"\"\"\n        self.thickness: float = thickness\n        self.color: Color = color\n        self.text_thickness: float = text_thickness\n        self.text_color: Color = text_color\n        self.text_scale: float = text_scale\n        self.text_offset: float = text_offset\n        self.text_padding: int = text_padding\n        self.custom_in_text: str = custom_in_text\n        self.custom_out_text: str = custom_out_text\n        self.display_in_count: bool = display_in_count\n        self.display_out_count: bool = display_out_count\n\n    def _annotate_count(\n        self,\n        frame: np.ndarray,\n        center_text_anchor: Point,\n        text: str,\n        is_in_count: bool,\n    ) -> None:\n        \"\"\"This method is drawing the text on the frame.\n\n        Args:\n            frame (np.ndarray): The image on which the text will be drawn.\n            center_text_anchor: The center point that the text will be drawn.\n            text (str): The text that will be drawn.\n            is_in_count (bool): Whether to display the in count or out count.\n        \"\"\"\n        _, text_height = cv2.getTextSize(\n            text, cv2.FONT_HERSHEY_SIMPLEX, self.text_scale, self.text_thickness\n        )[0]\n\n        if is_in_count:\n            center_text_anchor.y -= int(self.text_offset * text_height)\n        else:\n            center_text_anchor.y += int(self.text_offset * text_height)\n\n        draw_text(\n            scene=frame,\n            text=text,\n            text_anchor=center_text_anchor,\n            text_color=self.text_color,\n            text_scale=self.text_scale,\n            text_thickness=self.text_thickness,\n            text_padding=self.text_padding,\n            background_color=self.color,\n        )\n\n    def annotate(self, frame: np.ndarray, line_counter: LineZone) -> np.ndarray:\n        \"\"\"\n        Draws the line on the frame using the line_counter provided.\n\n        Attributes:\n            frame (np.ndarray): The image on which the line will be drawn.\n            line_counter (LineCounter): The line counter\n                that will be used to draw the line.\n\n        Returns:\n            np.ndarray: The image with the line drawn on it.\n\n        \"\"\"\n        cv2.line(\n            frame,\n            line_counter.vector.start.as_xy_int_tuple(),\n            line_counter.vector.end.as_xy_int_tuple(),\n            self.color.as_bgr(),\n            self.thickness,\n            lineType=cv2.LINE_AA,\n            shift=0,\n        )\n        cv2.circle(\n            frame,\n            line_counter.vector.start.as_xy_int_tuple(),\n            radius=5,\n            color=self.text_color.as_bgr(),\n            thickness=-1,\n            lineType=cv2.LINE_AA,\n        )\n        cv2.circle(\n            frame,\n            line_counter.vector.end.as_xy_int_tuple(),\n            radius=5,\n            color=self.text_color.as_bgr(),\n            thickness=-1,\n            lineType=cv2.LINE_AA,\n        )\n\n        text_anchor = Vector(\n            start=line_counter.vector.start, end=line_counter.vector.end\n        )\n\n        if self.display_in_count:\n            in_text = (\n                f\"{self.custom_in_text}: {line_counter.in_count}\"\n                if self.custom_in_text is not None\n                else f\"in: {line_counter.in_count}\"\n            )\n            self._annotate_count(\n                frame=frame,\n                center_text_anchor=text_anchor.center,\n                text=in_text,\n                is_in_count=True,\n            )\n\n        if self.display_out_count:\n            out_text = (\n                f\"{self.custom_out_text}: {line_counter.out_count}\"\n                if self.custom_out_text is not None\n                else f\"out: {line_counter.out_count}\"\n            )\n            self._annotate_count(\n                frame=frame,\n                center_text_anchor=text_anchor.center,\n                text=out_text,\n                is_in_count=False,\n            )\n        return frame\n
"},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZone-functions","title":"Functions","text":""},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZone.__init__","title":"__init__(start, end, triggering_anchors=(Position.TOP_LEFT, Position.TOP_RIGHT, Position.BOTTOM_LEFT, Position.BOTTOM_RIGHT))","text":"

Parameters:

Name Type Description Default start Point

The starting point of the line.

required end Point

The ending point of the line.

required triggering_anchors List[Position]

A list of positions specifying which anchors of the detections bounding box to consider when deciding on whether the detection has passed the line counter or not. By default, this contains the four corners of the detection's bounding box

(TOP_LEFT, TOP_RIGHT, BOTTOM_LEFT, BOTTOM_RIGHT) Source code in supervision/detection/line_zone.py
def __init__(\n    self,\n    start: Point,\n    end: Point,\n    triggering_anchors: Iterable[Position] = (\n        Position.TOP_LEFT,\n        Position.TOP_RIGHT,\n        Position.BOTTOM_LEFT,\n        Position.BOTTOM_RIGHT,\n    ),\n):\n    \"\"\"\n    Args:\n        start (Point): The starting point of the line.\n        end (Point): The ending point of the line.\n        triggering_anchors (List[sv.Position]): A list of positions\n            specifying which anchors of the detections bounding box\n            to consider when deciding on whether the detection\n            has passed the line counter or not. By default, this\n            contains the four corners of the detection's bounding box\n    \"\"\"\n    self.vector = Vector(start=start, end=end)\n    self.limits = self.calculate_region_of_interest_limits(vector=self.vector)\n    self.tracker_state: Dict[str, bool] = {}\n    self.in_count: int = 0\n    self.out_count: int = 0\n    self.triggering_anchors = triggering_anchors\n    if not list(self.triggering_anchors):\n        raise ValueError(\"Triggering anchors cannot be empty.\")\n
"},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZone.trigger","title":"trigger(detections)","text":"

Update the in_count and out_count based on the objects that cross the line.

Parameters:

Name Type Description Default detections Detections

A list of detections for which to update the counts.

required

Returns:

Type Description Tuple[ndarray, ndarray]

A tuple of two boolean NumPy arrays. The first array indicates which detections have crossed the line from outside to inside. The second array indicates which detections have crossed the line from inside to outside.

Source code in supervision/detection/line_zone.py
def trigger(self, detections: Detections) -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"\n    Update the `in_count` and `out_count` based on the objects that cross the line.\n\n    Args:\n        detections (Detections): A list of detections for which to update the\n            counts.\n\n    Returns:\n        A tuple of two boolean NumPy arrays. The first array indicates which\n            detections have crossed the line from outside to inside. The second\n            array indicates which detections have crossed the line from inside to\n            outside.\n    \"\"\"\n    crossed_in = np.full(len(detections), False)\n    crossed_out = np.full(len(detections), False)\n\n    if len(detections) == 0:\n        return crossed_in, crossed_out\n\n    if detections.tracker_id is None:\n        warnings.warn(\n            \"Line zone counting skipped. LineZone requires tracker_id. Refer to \"\n            \"https://supervision.roboflow.com/latest/trackers for more \"\n            \"information.\",\n            category=SupervisionWarnings,\n        )\n        return crossed_in, crossed_out\n\n    all_anchors = np.array(\n        [\n            detections.get_anchors_coordinates(anchor)\n            for anchor in self.triggering_anchors\n        ]\n    )\n\n    cross_products_1 = cross_product(all_anchors, self.limits[0])\n    cross_products_2 = cross_product(all_anchors, self.limits[1])\n    in_limits = (cross_products_1 > 0) == (cross_products_2 > 0)\n    in_limits = np.all(in_limits, axis=0)\n\n    triggers = cross_product(all_anchors, self.vector) < 0\n    has_any_left_trigger = np.any(triggers, axis=0)\n    has_any_right_trigger = np.any(~triggers, axis=0)\n    is_uniformly_triggered = ~(has_any_left_trigger & has_any_right_trigger)\n    for i, tracker_id in enumerate(detections.tracker_id):\n        if not in_limits[i]:\n            continue\n\n        if not is_uniformly_triggered[i]:\n            continue\n\n        tracker_state = has_any_left_trigger[i]\n        if tracker_id not in self.tracker_state:\n            self.tracker_state[tracker_id] = tracker_state\n            continue\n\n        if self.tracker_state.get(tracker_id) == tracker_state:\n            continue\n\n        self.tracker_state[tracker_id] = tracker_state\n        if tracker_state:\n            self.in_count += 1\n            crossed_in[i] = True\n        else:\n            self.out_count += 1\n            crossed_out[i] = True\n\n    return crossed_in, crossed_out\n
"},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZoneAnnotator-functions","title":"Functions","text":""},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZoneAnnotator.__init__","title":"__init__(thickness=2, color=Color.WHITE, text_thickness=2, text_color=Color.BLACK, text_scale=0.5, text_offset=1.5, text_padding=10, custom_in_text=None, custom_out_text=None, display_in_count=True, display_out_count=True)","text":"

Initialize the LineCounterAnnotator object with default values.

Attributes:

Name Type Description thickness float

The thickness of the line that will be drawn.

color Color

The color of the line that will be drawn.

text_thickness float

The thickness of the text that will be drawn.

text_color Color

The color of the text that will be drawn.

text_scale float

The scale of the text that will be drawn.

text_offset float

The offset of the text that will be drawn.

text_padding int

The padding of the text that will be drawn.

display_in_count bool

Whether to display the in count or not.

display_out_count bool

Whether to display the out count or not.

Source code in supervision/detection/line_zone.py
def __init__(\n    self,\n    thickness: float = 2,\n    color: Color = Color.WHITE,\n    text_thickness: float = 2,\n    text_color: Color = Color.BLACK,\n    text_scale: float = 0.5,\n    text_offset: float = 1.5,\n    text_padding: int = 10,\n    custom_in_text: Optional[str] = None,\n    custom_out_text: Optional[str] = None,\n    display_in_count: bool = True,\n    display_out_count: bool = True,\n):\n    \"\"\"\n    Initialize the LineCounterAnnotator object with default values.\n\n    Attributes:\n        thickness (float): The thickness of the line that will be drawn.\n        color (Color): The color of the line that will be drawn.\n        text_thickness (float): The thickness of the text that will be drawn.\n        text_color (Color): The color of the text that will be drawn.\n        text_scale (float): The scale of the text that will be drawn.\n        text_offset (float): The offset of the text that will be drawn.\n        text_padding (int): The padding of the text that will be drawn.\n        display_in_count (bool): Whether to display the in count or not.\n        display_out_count (bool): Whether to display the out count or not.\n\n    \"\"\"\n    self.thickness: float = thickness\n    self.color: Color = color\n    self.text_thickness: float = text_thickness\n    self.text_color: Color = text_color\n    self.text_scale: float = text_scale\n    self.text_offset: float = text_offset\n    self.text_padding: int = text_padding\n    self.custom_in_text: str = custom_in_text\n    self.custom_out_text: str = custom_out_text\n    self.display_in_count: bool = display_in_count\n    self.display_out_count: bool = display_out_count\n
"},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZoneAnnotator.annotate","title":"annotate(frame, line_counter)","text":"

Draws the line on the frame using the line_counter provided.

Attributes:

Name Type Description frame ndarray

The image on which the line will be drawn.

line_counter LineCounter

The line counter that will be used to draw the line.

Returns:

Type Description ndarray

np.ndarray: The image with the line drawn on it.

Source code in supervision/detection/line_zone.py
def annotate(self, frame: np.ndarray, line_counter: LineZone) -> np.ndarray:\n    \"\"\"\n    Draws the line on the frame using the line_counter provided.\n\n    Attributes:\n        frame (np.ndarray): The image on which the line will be drawn.\n        line_counter (LineCounter): The line counter\n            that will be used to draw the line.\n\n    Returns:\n        np.ndarray: The image with the line drawn on it.\n\n    \"\"\"\n    cv2.line(\n        frame,\n        line_counter.vector.start.as_xy_int_tuple(),\n        line_counter.vector.end.as_xy_int_tuple(),\n        self.color.as_bgr(),\n        self.thickness,\n        lineType=cv2.LINE_AA,\n        shift=0,\n    )\n    cv2.circle(\n        frame,\n        line_counter.vector.start.as_xy_int_tuple(),\n        radius=5,\n        color=self.text_color.as_bgr(),\n        thickness=-1,\n        lineType=cv2.LINE_AA,\n    )\n    cv2.circle(\n        frame,\n        line_counter.vector.end.as_xy_int_tuple(),\n        radius=5,\n        color=self.text_color.as_bgr(),\n        thickness=-1,\n        lineType=cv2.LINE_AA,\n    )\n\n    text_anchor = Vector(\n        start=line_counter.vector.start, end=line_counter.vector.end\n    )\n\n    if self.display_in_count:\n        in_text = (\n            f\"{self.custom_in_text}: {line_counter.in_count}\"\n            if self.custom_in_text is not None\n            else f\"in: {line_counter.in_count}\"\n        )\n        self._annotate_count(\n            frame=frame,\n            center_text_anchor=text_anchor.center,\n            text=in_text,\n            is_in_count=True,\n        )\n\n    if self.display_out_count:\n        out_text = (\n            f\"{self.custom_out_text}: {line_counter.out_count}\"\n            if self.custom_out_text is not None\n            else f\"out: {line_counter.out_count}\"\n        )\n        self._annotate_count(\n            frame=frame,\n            center_text_anchor=text_anchor.center,\n            text=out_text,\n            is_in_count=False,\n        )\n    return frame\n
"},{"location":"detection/tools/polygon_zone/","title":"Polygon Zone","text":"PolygonZone

A class for defining a polygon-shaped zone within a frame for detecting objects.

Attributes:

Name Type Description polygon ndarray

A polygon represented by a numpy array of shape (N, 2), containing the x, y coordinates of the points.

triggering_anchors Iterable[Position]

A list of positions specifying which anchors of the detections bounding box to consider when deciding on whether the detection fits within the PolygonZone (default: (sv.Position.BOTTOM_CENTER,)).

current_count int

The current count of detected objects within the zone

mask ndarray

The 2D bool mask for the polygon zone

Source code in supervision/detection/tools/polygon_zone.py
class PolygonZone:\n    \"\"\"\n    A class for defining a polygon-shaped zone within a frame for detecting objects.\n\n    Attributes:\n        polygon (np.ndarray): A polygon represented by a numpy array of shape\n            `(N, 2)`, containing the `x`, `y` coordinates of the points.\n        triggering_anchors (Iterable[sv.Position]): A list of positions specifying\n            which anchors of the detections bounding box to consider when deciding on\n            whether the detection fits within the PolygonZone\n            (default: (sv.Position.BOTTOM_CENTER,)).\n        current_count (int): The current count of detected objects within the zone\n        mask (np.ndarray): The 2D bool mask for the polygon zone\n    \"\"\"\n\n    @deprecated_parameter(\n        old_parameter=\"triggering_position\",\n        new_parameter=\"triggering_anchors\",\n        map_function=lambda x: [x],\n        warning_message=\"`{old_parameter}` in `{function_name}` is deprecated and will \"\n        \"be remove in `supervision-0.23.0`. Use '{new_parameter}' \"\n        \"instead.\",\n    )\n    def __init__(\n        self,\n        polygon: npt.NDArray[np.int64],\n        frame_resolution_wh: Optional[Tuple[int, int]] = None,\n        triggering_anchors: Iterable[Position] = (Position.BOTTOM_CENTER,),\n    ):\n        if frame_resolution_wh is not None:\n            warnings.warn(\n                \"The `frame_resolution_wh` parameter is no longer required and will be \"\n                \"dropped in version supervision-0.24.0. The mask resolution is now \"\n                \"calculated automatically based on the polygon coordinates.\",\n                category=SupervisionWarnings,\n            )\n\n        self.polygon = polygon.astype(int)\n        self.triggering_anchors = triggering_anchors\n        if not list(self.triggering_anchors):\n            raise ValueError(\"Triggering anchors cannot be empty.\")\n\n        self.current_count = 0\n\n        x_max, y_max = np.max(polygon, axis=0)\n        self.frame_resolution_wh = (x_max + 1, y_max + 1)\n        self.mask = polygon_to_mask(\n            polygon=polygon, resolution_wh=(x_max + 2, y_max + 2)\n        )\n\n    def trigger(self, detections: Detections) -> npt.NDArray[np.bool_]:\n        \"\"\"\n        Determines if the detections are within the polygon zone.\n\n        Parameters:\n            detections (Detections): The detections\n                to be checked against the polygon zone\n\n        Returns:\n            np.ndarray: A boolean numpy array indicating\n                if each detection is within the polygon zone\n        \"\"\"\n\n        clipped_xyxy = clip_boxes(\n            xyxy=detections.xyxy, resolution_wh=self.frame_resolution_wh\n        )\n        clipped_detections = replace(detections, xyxy=clipped_xyxy)\n        all_clipped_anchors = np.array(\n            [\n                np.ceil(clipped_detections.get_anchors_coordinates(anchor)).astype(int)\n                for anchor in self.triggering_anchors\n            ]\n        )\n\n        is_in_zone: npt.NDArray[np.bool_] = (\n            self.mask[all_clipped_anchors[:, :, 1], all_clipped_anchors[:, :, 0]]\n            .transpose()\n            .astype(bool)\n        )\n\n        is_in_zone: npt.NDArray[np.bool_] = np.all(is_in_zone, axis=1)\n        self.current_count = int(np.sum(is_in_zone))\n        return is_in_zone.astype(bool)\n
PolygonZoneAnnotator

A class for annotating a polygon-shaped zone within a frame with a count of detected objects.

Attributes:

Name Type Description zone PolygonZone

The polygon zone to be annotated

color Color

The color to draw the polygon lines

thickness int

The thickness of the polygon lines, default is 2

text_color Color

The color of the text on the polygon, default is black

text_scale float

The scale of the text on the polygon, default is 0.5

text_thickness int

The thickness of the text on the polygon, default is 1

text_padding int

The padding around the text on the polygon, default is 10

font int

The font type for the text on the polygon, default is cv2.FONT_HERSHEY_SIMPLEX

center Tuple[int, int]

The center of the polygon for text placement

display_in_zone_count bool

Show the label of the zone or not. Default is True

Source code in supervision/detection/tools/polygon_zone.py
class PolygonZoneAnnotator:\n    \"\"\"\n    A class for annotating a polygon-shaped zone within a\n        frame with a count of detected objects.\n\n    Attributes:\n        zone (PolygonZone): The polygon zone to be annotated\n        color (Color): The color to draw the polygon lines\n        thickness (int): The thickness of the polygon lines, default is 2\n        text_color (Color): The color of the text on the polygon, default is black\n        text_scale (float): The scale of the text on the polygon, default is 0.5\n        text_thickness (int): The thickness of the text on the polygon, default is 1\n        text_padding (int): The padding around the text on the polygon, default is 10\n        font (int): The font type for the text on the polygon,\n            default is cv2.FONT_HERSHEY_SIMPLEX\n        center (Tuple[int, int]): The center of the polygon for text placement\n        display_in_zone_count (bool): Show the label of the zone or not. Default is True\n    \"\"\"\n\n    def __init__(\n        self,\n        zone: PolygonZone,\n        color: Color,\n        thickness: int = 2,\n        text_color: Color = Color.BLACK,\n        text_scale: float = 0.5,\n        text_thickness: int = 1,\n        text_padding: int = 10,\n        display_in_zone_count: bool = True,\n    ):\n        self.zone = zone\n        self.color = color\n        self.thickness = thickness\n        self.text_color = text_color\n        self.text_scale = text_scale\n        self.text_thickness = text_thickness\n        self.text_padding = text_padding\n        self.font = cv2.FONT_HERSHEY_SIMPLEX\n        self.center = get_polygon_center(polygon=zone.polygon)\n        self.display_in_zone_count = display_in_zone_count\n\n    def annotate(self, scene: np.ndarray, label: Optional[str] = None) -> np.ndarray:\n        \"\"\"\n        Annotates the polygon zone within a frame with a count of detected objects.\n\n        Parameters:\n            scene (np.ndarray): The image on which the polygon zone will be annotated\n            label (Optional[str]): An optional label for the count of detected objects\n                within the polygon zone (default: None)\n\n        Returns:\n            np.ndarray: The image with the polygon zone and count of detected objects\n        \"\"\"\n        annotated_frame = draw_polygon(\n            scene=scene,\n            polygon=self.zone.polygon,\n            color=self.color,\n            thickness=self.thickness,\n        )\n\n        if self.display_in_zone_count:\n            annotated_frame = draw_text(\n                scene=annotated_frame,\n                text=str(self.zone.current_count) if label is None else label,\n                text_anchor=self.center,\n                background_color=self.color,\n                text_color=self.text_color,\n                text_scale=self.text_scale,\n                text_thickness=self.text_thickness,\n                text_padding=self.text_padding,\n                text_font=self.font,\n            )\n\n        return annotated_frame\n
"},{"location":"detection/tools/polygon_zone/#supervision.detection.tools.polygon_zone.PolygonZone-functions","title":"Functions","text":""},{"location":"detection/tools/polygon_zone/#supervision.detection.tools.polygon_zone.PolygonZone.trigger","title":"trigger(detections)","text":"

Determines if the detections are within the polygon zone.

Parameters:

Name Type Description Default detections Detections

The detections to be checked against the polygon zone

required

Returns:

Type Description NDArray[bool_]

np.ndarray: A boolean numpy array indicating if each detection is within the polygon zone

Source code in supervision/detection/tools/polygon_zone.py
def trigger(self, detections: Detections) -> npt.NDArray[np.bool_]:\n    \"\"\"\n    Determines if the detections are within the polygon zone.\n\n    Parameters:\n        detections (Detections): The detections\n            to be checked against the polygon zone\n\n    Returns:\n        np.ndarray: A boolean numpy array indicating\n            if each detection is within the polygon zone\n    \"\"\"\n\n    clipped_xyxy = clip_boxes(\n        xyxy=detections.xyxy, resolution_wh=self.frame_resolution_wh\n    )\n    clipped_detections = replace(detections, xyxy=clipped_xyxy)\n    all_clipped_anchors = np.array(\n        [\n            np.ceil(clipped_detections.get_anchors_coordinates(anchor)).astype(int)\n            for anchor in self.triggering_anchors\n        ]\n    )\n\n    is_in_zone: npt.NDArray[np.bool_] = (\n        self.mask[all_clipped_anchors[:, :, 1], all_clipped_anchors[:, :, 0]]\n        .transpose()\n        .astype(bool)\n    )\n\n    is_in_zone: npt.NDArray[np.bool_] = np.all(is_in_zone, axis=1)\n    self.current_count = int(np.sum(is_in_zone))\n    return is_in_zone.astype(bool)\n
"},{"location":"detection/tools/polygon_zone/#supervision.detection.tools.polygon_zone.PolygonZoneAnnotator-functions","title":"Functions","text":""},{"location":"detection/tools/polygon_zone/#supervision.detection.tools.polygon_zone.PolygonZoneAnnotator.annotate","title":"annotate(scene, label=None)","text":"

Annotates the polygon zone within a frame with a count of detected objects.

Parameters:

Name Type Description Default scene ndarray

The image on which the polygon zone will be annotated

required label Optional[str]

An optional label for the count of detected objects within the polygon zone (default: None)

None

Returns:

Type Description ndarray

np.ndarray: The image with the polygon zone and count of detected objects

Source code in supervision/detection/tools/polygon_zone.py
def annotate(self, scene: np.ndarray, label: Optional[str] = None) -> np.ndarray:\n    \"\"\"\n    Annotates the polygon zone within a frame with a count of detected objects.\n\n    Parameters:\n        scene (np.ndarray): The image on which the polygon zone will be annotated\n        label (Optional[str]): An optional label for the count of detected objects\n            within the polygon zone (default: None)\n\n    Returns:\n        np.ndarray: The image with the polygon zone and count of detected objects\n    \"\"\"\n    annotated_frame = draw_polygon(\n        scene=scene,\n        polygon=self.zone.polygon,\n        color=self.color,\n        thickness=self.thickness,\n    )\n\n    if self.display_in_zone_count:\n        annotated_frame = draw_text(\n            scene=annotated_frame,\n            text=str(self.zone.current_count) if label is None else label,\n            text_anchor=self.center,\n            background_color=self.color,\n            text_color=self.text_color,\n            text_scale=self.text_scale,\n            text_thickness=self.text_thickness,\n            text_padding=self.text_padding,\n            text_font=self.font,\n        )\n\n    return annotated_frame\n
"},{"location":"detection/tools/save_detections/","title":"Save Detections","text":"CSV Sink

A utility class for saving detection data to a CSV file. This class is designed to efficiently serialize detection objects into a CSV format, allowing for the inclusion of bounding box coordinates and additional attributes like confidence, class_id, and tracker_id.

Tip

CSVSink allow to pass custom data alongside the detection fields, providing flexibility for logging various types of information.

Parameters:

Name Type Description Default file_name str

The name of the CSV file where the detections will be stored. Defaults to 'output.csv'.

'output.csv' Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<SOURCE_MODEL_PATH>)\ncsv_sink = sv.CSVSink(<RESULT_CSV_FILE_PATH>)\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith csv_sink as sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        sink.append(detections, custom_data={'<CUSTOM_LABEL>':'<CUSTOM_DATA>'})\n
Source code in supervision/detection/tools/csv_sink.py
class CSVSink:\n    \"\"\"\n    A utility class for saving detection data to a CSV file. This class is designed to\n    efficiently serialize detection objects into a CSV format, allowing for the\n    inclusion of bounding box coordinates and additional attributes like `confidence`,\n    `class_id`, and `tracker_id`.\n\n    !!! tip\n\n        CSVSink allow to pass custom data alongside the detection fields, providing\n        flexibility for logging various types of information.\n\n    Args:\n        file_name (str): The name of the CSV file where the detections will be stored.\n            Defaults to 'output.csv'.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO(<SOURCE_MODEL_PATH>)\n        csv_sink = sv.CSVSink(<RESULT_CSV_FILE_PATH>)\n        frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\n        with csv_sink as sink:\n            for frame in frames_generator:\n                result = model(frame)[0]\n                detections = sv.Detections.from_ultralytics(result)\n                sink.append(detections, custom_data={'<CUSTOM_LABEL>':'<CUSTOM_DATA>'})\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    def __init__(self, file_name: str = \"output.csv\") -> None:\n        \"\"\"\n        Initialize the CSVSink instance.\n\n        Args:\n            file_name (str): The name of the CSV file.\n\n        Returns:\n            None\n        \"\"\"\n        self.file_name = file_name\n        self.file: Optional[open] = None\n        self.writer: Optional[csv.writer] = None\n        self.header_written = False\n        self.field_names = []\n\n    def __enter__(self) -> CSVSink:\n        self.open()\n        return self\n\n    def __exit__(\n        self,\n        exc_type: Optional[type],\n        exc_val: Optional[Exception],\n        exc_tb: Optional[Any],\n    ) -> None:\n        self.close()\n\n    def open(self) -> None:\n        \"\"\"\n        Open the CSV file for writing.\n\n        Returns:\n            None\n        \"\"\"\n        parent_directory = os.path.dirname(self.file_name)\n        if parent_directory and not os.path.exists(parent_directory):\n            os.makedirs(parent_directory)\n\n        self.file = open(self.file_name, \"w\", newline=\"\")\n        self.writer = csv.writer(self.file)\n\n    def close(self) -> None:\n        \"\"\"\n        Close the CSV file.\n\n        Returns:\n            None\n        \"\"\"\n        if self.file:\n            self.file.close()\n\n    @staticmethod\n    def parse_detection_data(\n        detections: Detections, custom_data: Dict[str, Any] = None\n    ) -> List[Dict[str, Any]]:\n        parsed_rows = []\n        for i in range(len(detections.xyxy)):\n            row = {\n                \"x_min\": detections.xyxy[i][0],\n                \"y_min\": detections.xyxy[i][1],\n                \"x_max\": detections.xyxy[i][2],\n                \"y_max\": detections.xyxy[i][3],\n                \"class_id\": \"\"\n                if detections.class_id is None\n                else str(detections.class_id[i]),\n                \"confidence\": \"\"\n                if detections.confidence is None\n                else str(detections.confidence[i]),\n                \"tracker_id\": \"\"\n                if detections.tracker_id is None\n                else str(detections.tracker_id[i]),\n            }\n\n            if hasattr(detections, \"data\"):\n                for key, value in detections.data.items():\n                    if value.ndim == 0:\n                        row[key] = value\n                    else:\n                        row[key] = value[i]\n\n            if custom_data:\n                row.update(custom_data)\n            parsed_rows.append(row)\n        return parsed_rows\n\n    def append(\n        self, detections: Detections, custom_data: Dict[str, Any] = None\n    ) -> None:\n        \"\"\"\n        Append detection data to the CSV file.\n\n        Args:\n            detections (Detections): The detection data.\n            custom_data (Dict[str, Any]): Custom data to include.\n\n        Returns:\n            None\n        \"\"\"\n        if not self.writer:\n            raise Exception(\n                f\"Cannot append to CSV: The file '{self.file_name}' is not open.\"\n            )\n        field_names = CSVSink.parse_field_names(detections, custom_data)\n        if not self.header_written:\n            self.field_names = field_names\n            self.writer.writerow(field_names)\n            self.header_written = True\n\n        if field_names != self.field_names:\n            print(\n                f\"Field names do not match the header. \"\n                f\"Expected: {self.field_names}, given: {field_names}\"\n            )\n\n        parsed_rows = CSVSink.parse_detection_data(detections, custom_data)\n        for row in parsed_rows:\n            self.writer.writerow(\n                [row.get(field_name, \"\") for field_name in self.field_names]\n            )\n\n    @staticmethod\n    def parse_field_names(\n        detections: Detections, custom_data: Dict[str, Any]\n    ) -> List[str]:\n        dynamic_header = sorted(\n            set(custom_data.keys()) | set(getattr(detections, \"data\", {}).keys())\n        )\n        return BASE_HEADER + dynamic_header\n
JSON Sink

A utility class for saving detection data to a JSON file. This class is designed to efficiently serialize detection objects into a JSON format, allowing for the inclusion of bounding box coordinates and additional attributes like confidence, class_id, and tracker_id.

Tip

JSONsink allow to pass custom data alongside the detection fields, providing flexibility for logging various types of information.

Parameters:

Name Type Description Default file_name str

The name of the JSON file where the detections will be stored. Defaults to 'output.json'.

'output.json' Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<SOURCE_MODEL_PATH>)\njson_sink = sv.JSONSink(<RESULT_JSON_FILE_PATH>)\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith json_sink as sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        sink.append(detections, custom_data={'<CUSTOM_LABEL>':'<CUSTOM_DATA>'})\n
Source code in supervision/detection/tools/json_sink.py
class JSONSink:\n    \"\"\"\n    A utility class for saving detection data to a JSON file. This class is designed to\n    efficiently serialize detection objects into a JSON format, allowing for the\n    inclusion of bounding box coordinates and additional attributes like `confidence`,\n    `class_id`, and `tracker_id`.\n\n    !!! tip\n\n        JSONsink allow to pass custom data alongside the detection fields, providing\n        flexibility for logging various types of information.\n\n    Args:\n        file_name (str): The name of the JSON file where the detections will be stored.\n            Defaults to 'output.json'.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO(<SOURCE_MODEL_PATH>)\n        json_sink = sv.JSONSink(<RESULT_JSON_FILE_PATH>)\n        frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\n        with json_sink as sink:\n            for frame in frames_generator:\n                result = model(frame)[0]\n                detections = sv.Detections.from_ultralytics(result)\n                sink.append(detections, custom_data={'<CUSTOM_LABEL>':'<CUSTOM_DATA>'})\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    def __init__(self, file_name: str = \"output.json\") -> None:\n        \"\"\"\n        Initialize the JSONSink instance.\n\n        Args:\n            file_name (str): The name of the JSON file.\n\n        Returns:\n            None\n        \"\"\"\n        self.file_name = file_name\n        self.file: Optional[open] = None\n        self.data: List[Dict[str, Any]] = []\n\n    def __enter__(self) -> JSONSink:\n        self.open()\n        return self\n\n    def __exit__(\n        self,\n        exc_type: Optional[type],\n        exc_val: Optional[Exception],\n        exc_tb: Optional[Any],\n    ) -> None:\n        self.write_and_close()\n\n    def open(self) -> None:\n        \"\"\"\n        Open the JSON file for writing.\n\n        Returns:\n            None\n        \"\"\"\n        parent_directory = os.path.dirname(self.file_name)\n        if parent_directory and not os.path.exists(parent_directory):\n            os.makedirs(parent_directory)\n\n        self.file = open(self.file_name, \"w\")\n\n    def write_and_close(self) -> None:\n        \"\"\"\n        Write and close the JSON file.\n\n        Returns:\n            None\n        \"\"\"\n        if self.file:\n            json.dump(self.data, self.file, indent=4)\n            self.file.close()\n\n    @staticmethod\n    def parse_detection_data(\n        detections: Detections, custom_data: Dict[str, Any] = None\n    ) -> List[Dict[str, Any]]:\n        parsed_rows = []\n        for i in range(len(detections.xyxy)):\n            row = {\n                \"x_min\": float(detections.xyxy[i][0]),\n                \"y_min\": float(detections.xyxy[i][1]),\n                \"x_max\": float(detections.xyxy[i][2]),\n                \"y_max\": float(detections.xyxy[i][3]),\n                \"class_id\": \"\"\n                if detections.class_id is None\n                else int(detections.class_id[i]),\n                \"confidence\": \"\"\n                if detections.confidence is None\n                else float(detections.confidence[i]),\n                \"tracker_id\": \"\"\n                if detections.tracker_id is None\n                else int(detections.tracker_id[i]),\n            }\n\n            if hasattr(detections, \"data\"):\n                for key, value in detections.data.items():\n                    row[key] = (\n                        str(value[i])\n                        if hasattr(value, \"__getitem__\") and value.ndim != 0\n                        else str(value)\n                    )\n\n            if custom_data:\n                row.update(custom_data)\n            parsed_rows.append(row)\n        return parsed_rows\n\n    def append(\n        self, detections: Detections, custom_data: Dict[str, Any] = None\n    ) -> None:\n        \"\"\"\n        Append detection data to the JSON file.\n\n        Args:\n            detections (Detections): The detection data.\n            custom_data (Dict[str, Any]): Custom data to include.\n\n        Returns:\n            None\n        \"\"\"\n        parsed_rows = JSONSink.parse_detection_data(detections, custom_data)\n        self.data.extend(parsed_rows)\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink-functions","title":"Functions","text":""},{"location":"detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink.__init__","title":"__init__(file_name='output.csv')","text":"

Initialize the CSVSink instance.

Parameters:

Name Type Description Default file_name str

The name of the CSV file.

'output.csv'

Returns:

Type Description None

None

Source code in supervision/detection/tools/csv_sink.py
def __init__(self, file_name: str = \"output.csv\") -> None:\n    \"\"\"\n    Initialize the CSVSink instance.\n\n    Args:\n        file_name (str): The name of the CSV file.\n\n    Returns:\n        None\n    \"\"\"\n    self.file_name = file_name\n    self.file: Optional[open] = None\n    self.writer: Optional[csv.writer] = None\n    self.header_written = False\n    self.field_names = []\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink.append","title":"append(detections, custom_data=None)","text":"

Append detection data to the CSV file.

Parameters:

Name Type Description Default detections Detections

The detection data.

required custom_data Dict[str, Any]

Custom data to include.

None

Returns:

Type Description None

None

Source code in supervision/detection/tools/csv_sink.py
def append(\n    self, detections: Detections, custom_data: Dict[str, Any] = None\n) -> None:\n    \"\"\"\n    Append detection data to the CSV file.\n\n    Args:\n        detections (Detections): The detection data.\n        custom_data (Dict[str, Any]): Custom data to include.\n\n    Returns:\n        None\n    \"\"\"\n    if not self.writer:\n        raise Exception(\n            f\"Cannot append to CSV: The file '{self.file_name}' is not open.\"\n        )\n    field_names = CSVSink.parse_field_names(detections, custom_data)\n    if not self.header_written:\n        self.field_names = field_names\n        self.writer.writerow(field_names)\n        self.header_written = True\n\n    if field_names != self.field_names:\n        print(\n            f\"Field names do not match the header. \"\n            f\"Expected: {self.field_names}, given: {field_names}\"\n        )\n\n    parsed_rows = CSVSink.parse_detection_data(detections, custom_data)\n    for row in parsed_rows:\n        self.writer.writerow(\n            [row.get(field_name, \"\") for field_name in self.field_names]\n        )\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink.close","title":"close()","text":"

Close the CSV file.

Returns:

Type Description None

None

Source code in supervision/detection/tools/csv_sink.py
def close(self) -> None:\n    \"\"\"\n    Close the CSV file.\n\n    Returns:\n        None\n    \"\"\"\n    if self.file:\n        self.file.close()\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink.open","title":"open()","text":"

Open the CSV file for writing.

Returns:

Type Description None

None

Source code in supervision/detection/tools/csv_sink.py
def open(self) -> None:\n    \"\"\"\n    Open the CSV file for writing.\n\n    Returns:\n        None\n    \"\"\"\n    parent_directory = os.path.dirname(self.file_name)\n    if parent_directory and not os.path.exists(parent_directory):\n        os.makedirs(parent_directory)\n\n    self.file = open(self.file_name, \"w\", newline=\"\")\n    self.writer = csv.writer(self.file)\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.json_sink.JSONSink-functions","title":"Functions","text":""},{"location":"detection/tools/save_detections/#supervision.detection.tools.json_sink.JSONSink.__init__","title":"__init__(file_name='output.json')","text":"

Initialize the JSONSink instance.

Parameters:

Name Type Description Default file_name str

The name of the JSON file.

'output.json'

Returns:

Type Description None

None

Source code in supervision/detection/tools/json_sink.py
def __init__(self, file_name: str = \"output.json\") -> None:\n    \"\"\"\n    Initialize the JSONSink instance.\n\n    Args:\n        file_name (str): The name of the JSON file.\n\n    Returns:\n        None\n    \"\"\"\n    self.file_name = file_name\n    self.file: Optional[open] = None\n    self.data: List[Dict[str, Any]] = []\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.json_sink.JSONSink.append","title":"append(detections, custom_data=None)","text":"

Append detection data to the JSON file.

Parameters:

Name Type Description Default detections Detections

The detection data.

required custom_data Dict[str, Any]

Custom data to include.

None

Returns:

Type Description None

None

Source code in supervision/detection/tools/json_sink.py
def append(\n    self, detections: Detections, custom_data: Dict[str, Any] = None\n) -> None:\n    \"\"\"\n    Append detection data to the JSON file.\n\n    Args:\n        detections (Detections): The detection data.\n        custom_data (Dict[str, Any]): Custom data to include.\n\n    Returns:\n        None\n    \"\"\"\n    parsed_rows = JSONSink.parse_detection_data(detections, custom_data)\n    self.data.extend(parsed_rows)\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.json_sink.JSONSink.open","title":"open()","text":"

Open the JSON file for writing.

Returns:

Type Description None

None

Source code in supervision/detection/tools/json_sink.py
def open(self) -> None:\n    \"\"\"\n    Open the JSON file for writing.\n\n    Returns:\n        None\n    \"\"\"\n    parent_directory = os.path.dirname(self.file_name)\n    if parent_directory and not os.path.exists(parent_directory):\n        os.makedirs(parent_directory)\n\n    self.file = open(self.file_name, \"w\")\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.json_sink.JSONSink.write_and_close","title":"write_and_close()","text":"

Write and close the JSON file.

Returns:

Type Description None

None

Source code in supervision/detection/tools/json_sink.py
def write_and_close(self) -> None:\n    \"\"\"\n    Write and close the JSON file.\n\n    Returns:\n        None\n    \"\"\"\n    if self.file:\n        json.dump(self.data, self.file, indent=4)\n        self.file.close()\n
"},{"location":"detection/tools/smoother/","title":"Detection Smoother","text":"

A utility class for smoothing detections over multiple frames in video tracking. It maintains a history of detections for each track and provides smoothed predictions based on these histories.

Warning

  • DetectionsSmoother requires the tracker_id for each detection. Refer to Roboflow Trackers for information on integrating tracking into your inference pipeline.
  • This class is not compatible with segmentation models.
Example
import supervision as sv\n\nfrom ultralytics import YOLO\n\nvideo_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_FILE_PATH>)\nframe_generator = sv.get_video_frames_generator(source_path=<SOURCE_FILE_PATH>)\n\nmodel = YOLO(<MODEL_PATH>)\ntracker = sv.ByteTrack(frame_rate=video_info.fps)\nsmoother = sv.DetectionsSmoother()\n\nannotator = sv.BoundingBoxAnnotator()\n\nwith sv.VideoSink(<TARGET_FILE_PATH>, video_info=video_info) as sink:\n    for frame in frame_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        detections = tracker.update_with_detections(detections)\n        detections = smoother.update_with_detections(detections)\n\n        annotated_frame = bounding_box_annotator.annotate(frame.copy(), detections)\n        sink.write_frame(annotated_frame)\n
Source code in supervision/detection/tools/smoother.py
class DetectionsSmoother:\n    \"\"\"\n    A utility class for smoothing detections over multiple frames in video tracking.\n    It maintains a history of detections for each track and provides smoothed\n    predictions based on these histories.\n\n    <video controls>\n        <source\n            src=\"https://media.roboflow.com/supervision-detection-smoothing.mp4\"\n            type=\"video/mp4\">\n    </video>\n\n    !!! warning\n\n        - `DetectionsSmoother` requires the `tracker_id` for each detection. Refer to\n          [Roboflow Trackers](/latest/trackers/) for\n          information on integrating tracking into your inference pipeline.\n        - This class is not compatible with segmentation models.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        from ultralytics import YOLO\n\n        video_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_FILE_PATH>)\n        frame_generator = sv.get_video_frames_generator(source_path=<SOURCE_FILE_PATH>)\n\n        model = YOLO(<MODEL_PATH>)\n        tracker = sv.ByteTrack(frame_rate=video_info.fps)\n        smoother = sv.DetectionsSmoother()\n\n        annotator = sv.BoundingBoxAnnotator()\n\n        with sv.VideoSink(<TARGET_FILE_PATH>, video_info=video_info) as sink:\n            for frame in frame_generator:\n                result = model(frame)[0]\n                detections = sv.Detections.from_ultralytics(result)\n                detections = tracker.update_with_detections(detections)\n                detections = smoother.update_with_detections(detections)\n\n                annotated_frame = bounding_box_annotator.annotate(frame.copy(), detections)\n                sink.write_frame(annotated_frame)\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    def __init__(self, length: int = 5) -> None:\n        \"\"\"\n        Args:\n            length (int): The maximum number of frames to consider for smoothing\n                detections. Defaults to 5.\n        \"\"\"\n        self.tracks = defaultdict(lambda: deque(maxlen=length))\n\n    def update_with_detections(self, detections: Detections) -> Detections:\n        \"\"\"\n        Updates the smoother with a new set of detections from a frame.\n\n        Args:\n            detections (Detections): The detections to add to the smoother.\n        \"\"\"\n\n        if detections.tracker_id is None:\n            warnings.warn(\n                \"Smoothing skipped. DetectionsSmoother requires tracker_id. Refer to \"\n                \"https://supervision.roboflow.com/latest/trackers for more \"\n                \"information.\",\n                category=SupervisionWarnings,\n            )\n            return detections\n\n        for detection_idx in range(len(detections)):\n            tracker_id = detections.tracker_id[detection_idx]\n\n            self.tracks[tracker_id].append(detections[detection_idx])\n\n        for track_id in self.tracks.keys():\n            if track_id not in detections.tracker_id:\n                self.tracks[track_id].append(None)\n\n        for track_id in list(self.tracks.keys()):\n            if all([d is None for d in self.tracks[track_id]]):\n                del self.tracks[track_id]\n\n        return self.get_smoothed_detections()\n\n    def get_track(self, track_id: int) -> Optional[Detections]:\n        track = self.tracks.get(track_id, None)\n        if track is None:\n            return None\n\n        track = [d for d in track if d is not None]\n        if len(track) == 0:\n            return None\n\n        ret = deepcopy(track[0])\n        ret.xyxy = np.mean([d.xyxy for d in track], axis=0)\n        ret.confidence = np.mean([d.confidence for d in track], axis=0)\n\n        return ret\n\n    def get_smoothed_detections(self) -> Detections:\n        tracked_detections = []\n        for track_id in self.tracks:\n            track = self.get_track(track_id)\n            if track is not None:\n                tracked_detections.append(track)\n\n        detections = Detections.merge(tracked_detections)\n        if len(detections) == 0:\n            detections.tracker_id = np.array([], dtype=int)\n\n        return detections\n
"},{"location":"detection/tools/smoother/#supervision.detection.tools.smoother.DetectionsSmoother-functions","title":"Functions","text":""},{"location":"detection/tools/smoother/#supervision.detection.tools.smoother.DetectionsSmoother.__init__","title":"__init__(length=5)","text":"

Parameters:

Name Type Description Default length int

The maximum number of frames to consider for smoothing detections. Defaults to 5.

5 Source code in supervision/detection/tools/smoother.py
def __init__(self, length: int = 5) -> None:\n    \"\"\"\n    Args:\n        length (int): The maximum number of frames to consider for smoothing\n            detections. Defaults to 5.\n    \"\"\"\n    self.tracks = defaultdict(lambda: deque(maxlen=length))\n
"},{"location":"detection/tools/smoother/#supervision.detection.tools.smoother.DetectionsSmoother.update_with_detections","title":"update_with_detections(detections)","text":"

Updates the smoother with a new set of detections from a frame.

Parameters:

Name Type Description Default detections Detections

The detections to add to the smoother.

required Source code in supervision/detection/tools/smoother.py
def update_with_detections(self, detections: Detections) -> Detections:\n    \"\"\"\n    Updates the smoother with a new set of detections from a frame.\n\n    Args:\n        detections (Detections): The detections to add to the smoother.\n    \"\"\"\n\n    if detections.tracker_id is None:\n        warnings.warn(\n            \"Smoothing skipped. DetectionsSmoother requires tracker_id. Refer to \"\n            \"https://supervision.roboflow.com/latest/trackers for more \"\n            \"information.\",\n            category=SupervisionWarnings,\n        )\n        return detections\n\n    for detection_idx in range(len(detections)):\n        tracker_id = detections.tracker_id[detection_idx]\n\n        self.tracks[tracker_id].append(detections[detection_idx])\n\n    for track_id in self.tracks.keys():\n        if track_id not in detections.tracker_id:\n            self.tracks[track_id].append(None)\n\n    for track_id in list(self.tracks.keys()):\n        if all([d is None for d in self.tracks[track_id]]):\n            del self.tracks[track_id]\n\n    return self.get_smoothed_detections()\n
"},{"location":"how_to/detect_and_annotate/","title":"Detect and Annotate","text":"

Supervision provides a seamless process for annotating predictions generated by various object detection and segmentation models. This guide shows how to perform inference with the Inference, Ultralytics or Transformers packages. Following this, you'll learn how to import these predictions into Supervision and use them to annotate source image.

"},{"location":"how_to/detect_and_annotate/#run-detection","title":"Run Detection","text":"

First, you'll need to obtain predictions from your object detection or segmentation model.

InferenceUltralyticsTransformers
import cv2\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\n
import cv2\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\n
import torch\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\n
"},{"location":"how_to/detect_and_annotate/#load-predictions-into-supervision","title":"Load Predictions into Supervision","text":"

Now that we have predictions from a model, we can load them into Supervision.

InferenceUltralyticsTransformers

We can do so using the sv.Detections.from_inference method, which accepts model results from both detection and segmentation models.

import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n

We can do so using the sv.Detections.from_ultralytics method, which accepts model results from both detection and segmentation models.

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n

We can do so using the sv.Detections.from_transformers method, which accepts model results from both detection and segmentation models.

import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label)\n

You can load predictions from other computer vision frameworks and libraries using:

  • from_deepsparse (Deepsparse)
  • from_detectron2 (Detectron2)
  • from_mmdetection (MMDetection)
  • from_sam (Segment Anything Model)
  • from_yolo_nas (YOLO-NAS)
"},{"location":"how_to/detect_and_annotate/#annotate-image-with-detections","title":"Annotate Image with Detections","text":"

Finally, we can annotate the image with the predictions. Since we are working with an object detection model, we will use the sv.BoundingBoxAnnotator and sv.LabelAnnotator classes.

InferenceUltralyticsTransformers
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n

"},{"location":"how_to/detect_and_annotate/#display-custom-labels","title":"Display Custom Labels","text":"

By default, sv.LabelAnnotator will label each detection with its class_name (if possible) or class_id. You can override this behavior by passing a list of custom labels to the annotate method.

InferenceUltralyticsTransformers
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n
import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n

"},{"location":"how_to/detect_and_annotate/#annotate-image-with-segmentations","title":"Annotate Image with Segmentations","text":"

If you are running the segmentation model sv.MaskAnnotator is a drop-in replacement for sv.BoundingBoxAnnotator that will allow you to draw masks instead of boxes.

InferenceUltralyticsTransformers
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-seg-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n-seg.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForSegmentation\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50-panoptic\")\nmodel = DetrForSegmentation.from_pretrained(\"facebook/detr-resnet-50-panoptic\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_segmentation(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n

"},{"location":"how_to/detect_small_objects/","title":"Detect Small Objects","text":"

This guide shows how to detect small objects with the Inference, Ultralytics or Transformers packages using InferenceSlicer.

"},{"location":"how_to/detect_small_objects/#baseline-detection","title":"Baseline Detection","text":"

Small object detection in high-resolution images presents challenges due to the objects' size relative to the image resolution.

InferenceUltralyticsTransformers
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8x-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8x.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForSegmentation\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForSegmentation.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image_slice.size\ntarget_size = torch.tensor([[width, height]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nlabels = [\n    model.config.id2label[class_id]\n    for class_id\n    in detections.class_id\n]\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n

"},{"location":"how_to/detect_small_objects/#input-resolution","title":"Input Resolution","text":"

Modifying the input resolution of images before detection can enhance small object identification at the cost of processing speed and increased memory usage. This method is less effective for ultra-high-resolution images (4K and above).

InferenceUltralytics
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8x-1280\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8x.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image, imgsz=1280)[0]\ndetections = sv.Detections.from_ultralytics(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n

"},{"location":"how_to/detect_small_objects/#inference-slicer","title":"Inference Slicer","text":"

InferenceSlicer processes high-resolution images by dividing them into smaller segments, detecting objects within each, and aggregating the results.

InferenceUltralyticsTransformers
import cv2\nimport numpy as np\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8x-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    results = model.infer(image_slice)[0]\n    return sv.Detections.from_inference(results)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8x.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    result = model(image_slice)[0]\n    return sv.Detections.from_ultralytics(result)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport torch\nimport numpy as np\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    image_slice = cv2.cvtColor(image_slice, cv2.COLOR_BGR2RGB)\n    image_slice = Image.fromarray(image_slice)\n    inputs = processor(images=image_slice, return_tensors=\"pt\")\n\n    with torch.no_grad():\n        outputs = model(**inputs)\n\n    width, height = image_slice.size\n    target_size = torch.tensor([[width, height]])\n    results = processor.post_process_object_detection(\n        outputs=outputs, target_sizes=target_size)[0]\n    return sv.Detections.from_transformers(results)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nlabels = [\n    model.config.id2label[class_id]\n    for class_id\n    in detections.class_id\n]\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n

"},{"location":"how_to/detect_small_objects/#small-object-segmentation","title":"Small Object Segmentation","text":"

InferenceSlicer can perform segmentation tasks too.

InferenceUltralytics
import cv2\nimport numpy as np\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8x-seg-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    results = model.infer(image_slice)[0]\n    return sv.Detections.from_inference(results)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8x-seg.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    result = model(image_slice)[0]\n    return sv.Detections.from_ultralytics(result)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n

"},{"location":"how_to/filter_detections/","title":"Filter Detections","text":"

The advanced filtering capabilities of the Detections class offer users a versatile and efficient way to narrow down and refine object detections. This section outlines various filtering methods, including filtering by specific class or a set of classes, confidence, object area, bounding box area, relative area, box dimensions, and designated zones. Each method is demonstrated with concise code examples to provide users with a clear understanding of how to implement the filters in their applications.

"},{"location":"how_to/filter_detections/#by-specific-class","title":"by specific class","text":"

Allows you to select detections that belong only to one selected class.

AfterBefore
import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.class_id == 0]\n

import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.class_id == 0]\n

"},{"location":"how_to/filter_detections/#by-set-of-classes","title":"by set of classes","text":"

Allows you to select detections that belong only to selected set of classes.

AfterBefore
import numpy as np\nimport supervision as sv\n\nselected_classes = [0, 2, 3]\ndetections = sv.Detections(...)\ndetections = detections[np.isin(detections.class_id, selected_classes)]\n

import numpy as np\nimport supervision as sv\n\nclass_id = [0, 2, 3]\ndetections = sv.Detections(...)\ndetections = detections[np.isin(detections.class_id, class_id)]\n

"},{"location":"how_to/filter_detections/#by-confidence","title":"by confidence","text":"

Allows you to select detections with specific confidence value, for example higher than selected threshold.

AfterBefore
import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.confidence > 0.5]\n

import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.confidence > 0.5]\n

"},{"location":"how_to/filter_detections/#by-area","title":"by area","text":"

Allows you to select detections based on their size. We define the area as the number of pixels occupied by the detection in the image. In the example below, we have sifted out the detections that are too small.

AfterBefore
import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.area > 1000]\n

import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.area > 1000]\n

"},{"location":"how_to/filter_detections/#by-relative-area","title":"by relative area","text":"

Allows you to select detections based on their size in relation to the size of whole image. Sometimes the concept of detection size changes depending on the image. Detection occupying 10000 square px can be large on a 1280x720 image but small on a 3840x2160 image. In such cases, we can filter out detections based on the percentage of the image area occupied by them. In the example below, we remove too large detections.

AfterBefore
import supervision as sv\n\nimage = ...\nheight, width, channels = image.shape\nimage_area = height * width\n\ndetections = sv.Detections(...)\ndetections = detections[(detections.area / image_area) < 0.8]\n

import supervision as sv\n\nimage = ...\nheight, width, channels = image.shape\nimage_area = height * width\n\ndetections = sv.Detections(...)\ndetections = detections[(detections.area / image_area) < 0.8]\n

"},{"location":"how_to/filter_detections/#by-box-dimensions","title":"by box dimensions","text":"

Allows you to select detections based on their dimensions. The size of the bounding box, as well as its coordinates, can be criteria for rejecting detection. Implementing such filtering requires a bit of custom code but is relatively simple and fast.

AfterBefore
import supervision as sv\n\ndetections = sv.Detections(...)\nw = detections.xyxy[:, 2] - detections.xyxy[:, 0]\nh = detections.xyxy[:, 3] - detections.xyxy[:, 1]\ndetections = detections[(w > 200) & (h > 200)]\n

import supervision as sv\n\ndetections = sv.Detections(...)\nw = detections.xyxy[:, 2] - detections.xyxy[:, 0]\nh = detections.xyxy[:, 3] - detections.xyxy[:, 1]\ndetections = detections[(w > 200) & (h > 200)]\n

"},{"location":"how_to/filter_detections/#by-polygonzone","title":"by PolygonZone","text":"

Allows you to use Detections in combination with PolygonZone to weed out bounding boxes that are in and out of the zone. In the example below you can see how to filter out all detections located in the lower part of the image.

AfterBefore
import supervision as sv\n\nzone = sv.PolygonZone(...)\ndetections = sv.Detections(...)\nmask = zone.trigger(detections=detections)\ndetections = detections[mask]\n

import supervision as sv\n\nzone = sv.PolygonZone(...)\ndetections = sv.Detections(...)\nmask = zone.trigger(detections=detections)\ndetections = detections[mask]\n

"},{"location":"how_to/filter_detections/#by-mixed-conditions","title":"by mixed conditions","text":"

Detections' greatest strength, however, is that you can build arbitrarily complex logical conditions by simply combining separate conditions using & or |.

AfterBefore
import supervision as sv\n\nzone = sv.PolygonZone(...)\ndetections = sv.Detections(...)\nmask = zone.trigger(detections=detections)\ndetections = detections[(detections.confidence > 0.7) & mask]\n

import supervision as sv\n\nzone = sv.PolygonZone(...)\ndetections = sv.Detections(...)\nmask = zone.trigger(detections=detections)\ndetections = detections[mask]\n

"},{"location":"how_to/save_detections/","title":"Save Detections","text":"

Supervision enables an easy way to save detections in .CSV and .JSON files for offline processing. This guide demonstrates how to perform video inference using the Inference, Ultralytics or Transformers packages and save their results with sv.CSVSink and sv.JSONSink.

"},{"location":"how_to/save_detections/#run-detection","title":"Run Detection","text":"

First, you'll need to obtain predictions from your object detection or segmentation model. You can learn more on this topic in our How to Detect and Annotate guide.

InferenceUltralyticsTransformers
import supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nfor frame in frames_generator:\n\n    results = model.infer(image)[0]\n    detections = sv.Detections.from_inference(results)\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nfor frame in frames_generator:\n\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n
import torch\nimport supervision as sv\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nfor frame in frames_generator:\n\n    frame = sv.cv2_to_pillow(frame)\n    inputs = processor(images=frame, return_tensors=\"pt\")\n\n    with torch.no_grad():\n        outputs = model(**inputs)\n\n    width, height = frame.size\n    target_size = torch.tensor([[height, width]])\n    results = processor.post_process_object_detection(\n        outputs=outputs, target_sizes=target_size)[0]\n    detections = sv.Detections.from_transformers(results)\n
"},{"location":"how_to/save_detections/#save-detections-as-csv","title":"Save Detections as CSV","text":"

To save detections to a .CSV file, open our sv.CSVSink and then pass the sv.Detections object resulting from the inference to it. Its fields are parsed and saved on disk.

InferenceUltralyticsTransformers
import supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame in frames_generator:\n\n        results = model.infer(image)[0]\n        detections = sv.Detections.from_inference(results)\n        sink.append(detections, {})\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame in frames_generator:\n\n        results = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(results)\n        sink.append(detections, {})\n
import torch\nimport supervision as sv\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame in frames_generator:\n\n        frame = sv.cv2_to_pillow(frame)\n        inputs = processor(images=frame, return_tensors=\"pt\")\n\n        with torch.no_grad():\n            outputs = model(**inputs)\n\n        width, height = frame.size\n        target_size = torch.tensor([[height, width]])\n        results = processor.post_process_object_detection(\n            outputs=outputs, target_sizes=target_size)[0]\n        detections = sv.Detections.from_transformers(results)\n        sink.append(detections, {})\n
x_min y_min x_max y_max class_id confidence tracker_id class_name 2941.14 1269.31 3220.77 1500.67 2 0.8517 car 944.889 899.641 1235.42 1308.80 7 0.6752 truck 1439.78 1077.79 1621.27 1231.40 2 0.6450 car"},{"location":"how_to/save_detections/#custom-fields","title":"Custom Fields","text":"

Besides regular fields in sv.Detections, sv.CSVSink also allows you to add custom information to each row, which can be passed via the custom_data dictionary. Let's utilize this feature to save information about the frame index from which the detections originate.

InferenceUltralyticsTransformers
import supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        results = model.infer(image)[0]\n        detections = sv.Detections.from_inference(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        results = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
import torch\nimport supervision as sv\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        frame = sv.cv2_to_pillow(frame)\n        inputs = processor(images=frame, return_tensors=\"pt\")\n\n        with torch.no_grad():\n            outputs = model(**inputs)\n\n        width, height = frame.size\n        target_size = torch.tensor([[height, width]])\n        results = processor.post_process_object_detection(\n            outputs=outputs, target_sizes=target_size)[0]\n        detections = sv.Detections.from_transformers(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
x_min y_min x_max y_max class_id confidence tracker_id class_name frame_index 2941.14 1269.31 3220.77 1500.67 2 0.8517 car 0 944.889 899.641 1235.42 1308.80 7 0.6752 truck 0 1439.78 1077.79 1621.27 1231.40 2 0.6450 car 0"},{"location":"how_to/save_detections/#save-detections-as-json","title":"Save Detections as JSON","text":"

If you prefer to save the result in a .JSON file instead of a .CSV file, all you need to do is replace sv.CSVSink with sv.JSONSink.

InferenceUltralyticsTransformers
import supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.JSONSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        results = model.infer(image)[0]\n        detections = sv.Detections.from_inference(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.JSONSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        results = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
import torch\nimport supervision as sv\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.JSONSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        frame = sv.cv2_to_pillow(frame)\n        inputs = processor(images=frame, return_tensors=\"pt\")\n\n        with torch.no_grad():\n            outputs = model(**inputs)\n\n        width, height = frame.size\n        target_size = torch.tensor([[height, width]])\n        results = processor.post_process_object_detection(\n            outputs=outputs, target_sizes=target_size)[0]\n        detections = sv.Detections.from_transformers(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
"},{"location":"how_to/track_objects/","title":"Track Objects","text":"

Leverage Supervision's advanced capabilities for enhancing your video analysis by seamlessly tracking objects recognized by a multitude of object detection and segmentation models. This comprehensive guide will take you through the steps to perform inference using the YOLOv8 model via either the Inference or Ultralytics packages. Following this, you'll discover how to track these objects efficiently and annotate your video content for a deeper analysis.

To make it easier for you to follow our tutorial download the video we will use as an example. You can do this using supervision[assets] extension.

from supervision.assets import download_assets, VideoAssets\n\ndownload_assets(VideoAssets.PEOPLE_WALKING)\n
"},{"location":"how_to/track_objects/#run-inference","title":"Run Inference","text":"

First, you'll need to obtain predictions from your object detection or segmentation model. In this tutorial, we are using the YOLOv8 model as an example. However, Supervision is versatile and compatible with various models. Check this link for guidance on how to plug in other models.

We will define a callback function, which will process each frame of the video by obtaining model predictions and then annotating the frame based on these predictions. This callback function will be essential in the subsequent steps of the tutorial, as it will be modified to include tracking, labeling, and trace annotations.

UltralyticsInference
import numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nbox_annotator = sv.BoundingBoxAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    return box_annotator.annotate(frame.copy(), detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
import numpy as np\nimport supervision as sv\nfrom inference.models.utils import get_roboflow_model\n\nmodel = get_roboflow_model(model_id=\"yolov8n-640\", api_key=<ROBOFLOW API KEY>)\nbox_annotator = sv.BoundingBoxAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model.infer(frame)[0]\n    detections = sv.Detections.from_inference(results)\n    return box_annotator.annotate(frame.copy(), detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
"},{"location":"how_to/track_objects/#tracking","title":"Tracking","text":"

After running inference and obtaining predictions, the next step is to track the detected objects throughout the video. Utilizing Supervision\u2019s sv.ByteTrack functionality, each detected object is assigned a unique tracker ID, enabling the continuous following of the object's motion path across different frames.

UltralyticsInference
import numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    detections = tracker.update_with_detections(detections)\n    return box_annotator.annotate(frame.copy(), detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
import numpy as np\nimport supervision as sv\nfrom inference.models.utils import get_roboflow_model\n\nmodel = get_roboflow_model(model_id=\"yolov8n-640\", api_key=<ROBOFLOW API KEY>)\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model.infer(frame)[0]\n    detections = sv.Detections.from_inference(results)\n    detections = tracker.update_with_detections(detections)\n    return box_annotator.annotate(frame.copy(), detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
"},{"location":"how_to/track_objects/#annotate-video-with-tracking-ids","title":"Annotate Video with Tracking IDs","text":"

Annotating the video with tracking IDs helps in distinguishing and following each object distinctly. With the sv.LabelAnnotator in Supervision, we can overlay the tracker IDs and class labels on the detected objects, offering a clear visual representation of each object's class and unique identifier.

UltralyticsInference
import numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    detections = tracker.update_with_detections(detections)\n\n    labels = [\n        f\"#{tracker_id} {results.names[class_id]}\"\n        for class_id, tracker_id\n        in zip(detections.class_id, detections.tracker_id)\n    ]\n\n    annotated_frame = box_annotator.annotate(\n        frame.copy(), detections=detections)\n    return label_annotator.annotate(\n        annotated_frame, detections=detections, labels=labels)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
import numpy as np\nimport supervision as sv\nfrom inference.models.utils import get_roboflow_model\n\nmodel = get_roboflow_model(model_id=\"yolov8n-640\", api_key=<ROBOFLOW API KEY>)\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model.infer(frame)[0]\n    detections = sv.Detections.from_inference(results)\n    detections = tracker.update_with_detections(detections)\n\n    labels = [\n        f\"#{tracker_id} {results.names[class_id]}\"\n        for class_id, tracker_id\n        in zip(detections.class_id, detections.tracker_id)\n    ]\n\n    annotated_frame = box_annotator.annotate(\n        frame.copy(), detections=detections)\n    return label_annotator.annotate(\n        annotated_frame, detections=detections, labels=labels)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
"},{"location":"how_to/track_objects/#annotate-video-with-traces","title":"Annotate Video with Traces","text":"

Adding traces to the video involves overlaying the historical paths of the detected objects. This feature, powered by the sv.TraceAnnotator, allows for visualizing the trajectories of objects, helping in understanding the movement patterns and interactions between objects in the video.

UltralyticsInference
import numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\ntrace_annotator = sv.TraceAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    detections = tracker.update_with_detections(detections)\n\n    labels = [\n        f\"#{tracker_id} {results.names[class_id]}\"\n        for class_id, tracker_id\n        in zip(detections.class_id, detections.tracker_id)\n    ]\n\n    annotated_frame = box_annotator.annotate(\n        frame.copy(), detections=detections)\n    annotated_frame = label_annotator.annotate(\n        annotated_frame, detections=detections, labels=labels)\n    return trace_annotator.annotate(\n        annotated_frame, detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
import numpy as np\nimport supervision as sv\nfrom inference.models.utils import get_roboflow_model\n\nmodel = get_roboflow_model(model_id=\"yolov8n-640\", api_key=<ROBOFLOW API KEY>)\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\ntrace_annotator = sv.TraceAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model.infer(frame)[0]\n    detections = sv.Detections.from_inference(results)\n    detections = tracker.update_with_detections(detections)\n\n    labels = [\n        f\"#{tracker_id} {results.names[class_id]}\"\n        for class_id, tracker_id\n        in zip(detections.class_id, detections.tracker_id)\n    ]\n\n    annotated_frame = box_annotator.annotate(\n        frame.copy(), detections=detections)\n    annotated_frame = label_annotator.annotate(\n        annotated_frame, detections=detections, labels=labels)\n    return trace_annotator.annotate(\n        annotated_frame, detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n

This structured walkthrough should give a detailed pathway to annotate videos effectively using Supervision\u2019s various functionalities, including object tracking and trace annotations.

"},{"location":"keypoint/annotators/","title":"Annotators","text":"VertexAnnotatorEdgeAnnotatorVertexLabelAnnotator
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nvertex_annotator = sv.VertexAnnotator(\n    color=sv.Color.GREEN,\n    radius=10\n)\nannotated_frame = vertex_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nedge_annotator = sv.EdgeAnnotator(\n    color=sv.Color.GREEN,\n    thickness=5\n)\nannotated_frame = edge_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nvertex_label_annotator = sv.VertexLabelAnnotator(\n    color=sv.Color.GREEN,\n    text_color=sv.Color.BLACK,\n    border_radius=5\n)\nannotated_frame = vertex_label_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n
VertexAnnotator

Bases: BaseKeyPointAnnotator

A class that specializes in drawing skeleton vertices on images. It uses specified key points to determine the locations where the vertices should be drawn.

Source code in supervision/keypoint/annotators.py
class VertexAnnotator(BaseKeyPointAnnotator):\n    \"\"\"\n    A class that specializes in drawing skeleton vertices on images. It uses\n    specified key points to determine the locations where the vertices should be\n    drawn.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Color = Color.ROBOFLOW,\n        radius: int = 4,\n    ) -> None:\n        \"\"\"\n        Args:\n            color (Color, optional): The color to use for annotating key points.\n            radius (int, optional): The radius of the circles used to represent the key\n                points.\n        \"\"\"\n        self.color = color\n        self.radius = radius\n\n    @convert_for_annotation_method\n    def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:\n        \"\"\"\n        Annotates the given scene with skeleton vertices based on the provided key\n        points. It draws circles at each key point location.\n\n        Args:\n            scene (ImageType): The image where skeleton vertices will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray` or\n                `PIL.Image.Image`.\n            key_points (KeyPoints): A collection of key points where each key point\n                consists of x and y coordinates.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            key_points = sv.KeyPoints(...)\n\n            vertex_annotator = sv.VertexAnnotator(\n                color=sv.Color.GREEN,\n                radius=10\n            )\n            annotated_frame = vertex_annotator.annotate(\n                scene=image.copy(),\n                key_points=key_points\n            )\n            ```\n\n        ![vertex-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/vertex-annotator-example.png)\n        \"\"\"\n        if len(key_points) == 0:\n            return scene\n\n        for xy in key_points.xy:\n            for x, y in xy:\n                cv2.circle(\n                    img=scene,\n                    center=(int(x), int(y)),\n                    radius=self.radius,\n                    color=self.color.as_bgr(),\n                    thickness=-1,\n                )\n\n        return scene\n
EdgeAnnotator

Bases: BaseKeyPointAnnotator

A class that specializes in drawing skeleton edges on images using specified key points. It connects key points with lines to form the skeleton structure.

Source code in supervision/keypoint/annotators.py
class EdgeAnnotator(BaseKeyPointAnnotator):\n    \"\"\"\n    A class that specializes in drawing skeleton edges on images using specified key\n    points. It connects key points with lines to form the skeleton structure.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Color = Color.ROBOFLOW,\n        thickness: int = 2,\n        edges: Optional[List[Tuple[int, int]]] = None,\n    ) -> None:\n        \"\"\"\n        Args:\n            color (Color, optional): The color to use for the edges.\n            thickness (int, optional): The thickness of the edges.\n            edges (Optional[List[Tuple[int, int]]]): The edges to draw.\n                If set to `None`, will attempt to select automatically.\n        \"\"\"\n        self.color = color\n        self.thickness = thickness\n        self.edges = edges\n\n    @convert_for_annotation_method\n    def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:\n        \"\"\"\n        Annotates the given scene by drawing lines between specified key points to form\n        edges.\n\n        Args:\n            scene (ImageType): The image where skeleton edges will be drawn. `ImageType`\n                is a flexible type, accepting either `numpy.ndarray` or\n                `PIL.Image.Image`.\n            key_points (KeyPoints): A collection of key points where each key point\n                consists of x and y coordinates.\n\n        Returns:\n            Returns:\n                The annotated image, matching the type of `scene` (`numpy.ndarray`\n                    or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            key_points = sv.KeyPoints(...)\n\n            edge_annotator = sv.EdgeAnnotator(\n                color=sv.Color.GREEN,\n                thickness=5\n            )\n            annotated_frame = edge_annotator.annotate(\n                scene=image.copy(),\n                key_points=key_points\n            )\n            ```\n\n        ![edge-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/edge-annotator-example.png)\n        \"\"\"\n        if len(key_points) == 0:\n            return scene\n\n        for xy in key_points.xy:\n            edges = self.edges\n            if not edges:\n                edges = SKELETONS_BY_VERTEX_COUNT.get(len(xy))\n            if not edges:\n                warn(f\"No skeleton found with {len(xy)} vertices\")\n                return scene\n\n            for class_a, class_b in edges:\n                xy_a = xy[class_a - 1]\n                xy_b = xy[class_b - 1]\n                missing_a = np.allclose(xy_a, 0)\n                missing_b = np.allclose(xy_b, 0)\n                if missing_a or missing_b:\n                    continue\n\n                cv2.line(\n                    img=scene,\n                    pt1=(int(xy_a[0]), int(xy_a[1])),\n                    pt2=(int(xy_b[0]), int(xy_b[1])),\n                    color=self.color.as_bgr(),\n                    thickness=self.thickness,\n                )\n\n        return scene\n
VertexLabelAnnotator

A class that draws labels of skeleton vertices on images. It uses specified key points to determine the locations where the vertices should be drawn.

Source code in supervision/keypoint/annotators.py
class VertexLabelAnnotator:\n    \"\"\"\n    A class that draws labels of skeleton vertices on images. It uses specified key\n    points to determine the locations where the vertices should be drawn.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, List[Color]] = Color.ROBOFLOW,\n        text_color: Color = Color.WHITE,\n        text_scale: float = 0.5,\n        text_thickness: int = 1,\n        text_padding: int = 10,\n        border_radius: int = 0,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, List[Color]], optional): The color to use for each\n                keypoint label. If a list is provided, the colors will be used in order\n                for each keypoint.\n            text_color (Color, optional): The color to use for the labels.\n            text_scale (float, optional): The scale of the text.\n            text_thickness (int, optional): The thickness of the text.\n            text_padding (int, optional): The padding around the text.\n            border_radius (int, optional): The radius of the rounded corners of the\n                boxes. Set to a high value to produce circles.\n        \"\"\"\n        self.border_radius: int = border_radius\n        self.color: Union[Color, List[Color]] = color\n        self.text_color: Color = text_color\n        self.text_scale: float = text_scale\n        self.text_thickness: int = text_thickness\n        self.text_padding: int = text_padding\n\n    def annotate(\n        self, scene: ImageType, key_points: KeyPoints, labels: List[str] = None\n    ) -> ImageType:\n        \"\"\"\n        A class that draws labels of skeleton vertices on images. It uses specified key\n            points to determine the locations where the vertices should be drawn.\n\n        Args:\n            scene (ImageType): The image where vertex labels will be drawn. `ImageType`\n                is a flexible type, accepting either `numpy.ndarray` or\n                `PIL.Image.Image`.\n            key_points (KeyPoints): A collection of key points where each key point\n                consists of x and y coordinates.\n            labels (List[str], optional): A list of labels to be displayed on the\n                annotated image. If not provided, keypoint indices will be used.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            key_points = sv.KeyPoints(...)\n\n            vertex_label_annotator = sv.VertexLabelAnnotator(\n                color=sv.Color.GREEN,\n                text_color=sv.Color.BLACK,\n                border_radius=5\n            )\n            annotated_frame = vertex_label_annotator.annotate(\n                scene=image.copy(),\n                key_points=key_points\n            )\n            ```\n\n        ![vertex-label-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/vertex-label-annotator-example.png)\n\n        !!! tip\n\n            `VertexLabelAnnotator` allows to customize the color of each keypoint label\n            values.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            key_points = sv.KeyPoints(...)\n\n            LABELS = [\n                \"nose\", \"left eye\", \"right eye\", \"left ear\",\n                \"right ear\", \"left shoulder\", \"right shoulder\", \"left elbow\",\n                \"right elbow\", \"left wrist\", \"right wrist\", \"left hip\",\n                \"right hip\", \"left knee\", \"right knee\", \"left ankle\",\n                \"right ankle\"\n            ]\n\n            COLORS = [\n                \"#FF6347\", \"#FF6347\", \"#FF6347\", \"#FF6347\",\n                \"#FF6347\", \"#FF1493\", \"#00FF00\", \"#FF1493\",\n                \"#00FF00\", \"#FF1493\", \"#00FF00\", \"#FFD700\",\n                \"#00BFFF\", \"#FFD700\", \"#00BFFF\", \"#FFD700\",\n                \"#00BFFF\"\n            ]\n            COLORS = [sv.Color.from_hex(color_hex=c) for c in COLORS]\n\n            vertex_label_annotator = sv.VertexLabelAnnotator(\n                color=COLORS,\n                text_color=sv.Color.BLACK,\n                border_radius=5\n            )\n            annotated_frame = vertex_label_annotator.annotate(\n                scene=image.copy(),\n                key_points=key_points,\n                labels=labels\n            )\n            ```\n        ![vertex-label-annotator-custom-example](https://media.roboflow.com/\n        supervision-annotator-examples/vertex-label-annotator-custom-example.png)\n        \"\"\"\n        font = cv2.FONT_HERSHEY_SIMPLEX\n\n        skeletons_count, points_count, _ = key_points.xy.shape\n        if skeletons_count == 0:\n            return scene\n\n        anchors = key_points.xy.reshape(points_count * skeletons_count, 2).astype(int)\n        mask = np.all(anchors != 0, axis=1)\n\n        if not np.any(mask):\n            return scene\n\n        colors = self.preprocess_and_validate_colors(\n            colors=self.color,\n            points_count=points_count,\n            skeletons_count=skeletons_count,\n        )\n\n        labels = self.preprocess_and_validate_labels(\n            labels=labels, points_count=points_count, skeletons_count=skeletons_count\n        )\n\n        anchors = anchors[mask]\n        colors = colors[mask]\n        labels = labels[mask]\n\n        xyxy = np.array(\n            [\n                self.get_text_bounding_box(\n                    text=label,\n                    font=font,\n                    text_scale=self.text_scale,\n                    text_thickness=self.text_thickness,\n                    center_coordinates=tuple(anchor),\n                )\n                for anchor, label in zip(anchors, labels)\n            ]\n        )\n\n        xyxy_padded = pad_boxes(xyxy=xyxy, px=self.text_padding)\n\n        for text, color, box, box_padded in zip(labels, colors, xyxy, xyxy_padded):\n            draw_rounded_rectangle(\n                scene=scene,\n                rect=Rect.from_xyxy(box_padded),\n                color=color,\n                border_radius=self.border_radius,\n            )\n            cv2.putText(\n                img=scene,\n                text=text,\n                org=(box[0], box[1] + self.text_padding),\n                fontFace=font,\n                fontScale=self.text_scale,\n                color=self.text_color.as_rgb(),\n                thickness=self.text_thickness,\n                lineType=cv2.LINE_AA,\n            )\n\n        return scene\n\n    @staticmethod\n    def get_text_bounding_box(\n        text: str,\n        font: int,\n        text_scale: float,\n        text_thickness: int,\n        center_coordinates: Tuple[int, int],\n    ) -> Tuple[int, int, int, int]:\n        text_w, text_h = cv2.getTextSize(\n            text=text,\n            fontFace=font,\n            fontScale=text_scale,\n            thickness=text_thickness,\n        )[0]\n        center_x, center_y = center_coordinates\n        return (\n            center_x - text_w // 2,\n            center_y - text_h // 2,\n            center_x + text_w // 2,\n            center_y + text_h // 2,\n        )\n\n    @staticmethod\n    def preprocess_and_validate_labels(\n        labels: Optional[List[str]], points_count: int, skeletons_count: int\n    ) -> np.array:\n        if labels and len(labels) != points_count:\n            raise ValueError(\n                f\"Number of labels ({len(labels)}) must match number of key points \"\n                f\"({points_count}).\"\n            )\n        if labels is None:\n            labels = [str(i) for i in range(points_count)]\n\n        return np.array(labels * skeletons_count)\n\n    @staticmethod\n    def preprocess_and_validate_colors(\n        colors: Optional[Union[Color, List[Color]]],\n        points_count: int,\n        skeletons_count: int,\n    ) -> np.array:\n        if isinstance(colors, list) and len(colors) != points_count:\n            raise ValueError(\n                f\"Number of colors ({len(colors)}) must match number of key points \"\n                f\"({points_count}).\"\n            )\n        return (\n            np.array(colors * skeletons_count)\n            if isinstance(colors, list)\n            else np.array([colors] * points_count * skeletons_count)\n        )\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexAnnotator-functions","title":"Functions","text":""},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexAnnotator.__init__","title":"__init__(color=Color.ROBOFLOW, radius=4)","text":"

Parameters:

Name Type Description Default color Color

The color to use for annotating key points.

ROBOFLOW radius int

The radius of the circles used to represent the key points.

4 Source code in supervision/keypoint/annotators.py
def __init__(\n    self,\n    color: Color = Color.ROBOFLOW,\n    radius: int = 4,\n) -> None:\n    \"\"\"\n    Args:\n        color (Color, optional): The color to use for annotating key points.\n        radius (int, optional): The radius of the circles used to represent the key\n            points.\n    \"\"\"\n    self.color = color\n    self.radius = radius\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexAnnotator.annotate","title":"annotate(scene, key_points)","text":"

Annotates the given scene with skeleton vertices based on the provided key points. It draws circles at each key point location.

Parameters:

Name Type Description Default scene ImageType

The image where skeleton vertices will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required key_points KeyPoints

A collection of key points where each key point consists of x and y coordinates.

required

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nvertex_annotator = sv.VertexAnnotator(\n    color=sv.Color.GREEN,\n    radius=10\n)\nannotated_frame = vertex_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n

Source code in supervision/keypoint/annotators.py
@convert_for_annotation_method\ndef annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:\n    \"\"\"\n    Annotates the given scene with skeleton vertices based on the provided key\n    points. It draws circles at each key point location.\n\n    Args:\n        scene (ImageType): The image where skeleton vertices will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray` or\n            `PIL.Image.Image`.\n        key_points (KeyPoints): A collection of key points where each key point\n            consists of x and y coordinates.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        key_points = sv.KeyPoints(...)\n\n        vertex_annotator = sv.VertexAnnotator(\n            color=sv.Color.GREEN,\n            radius=10\n        )\n        annotated_frame = vertex_annotator.annotate(\n            scene=image.copy(),\n            key_points=key_points\n        )\n        ```\n\n    ![vertex-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/vertex-annotator-example.png)\n    \"\"\"\n    if len(key_points) == 0:\n        return scene\n\n    for xy in key_points.xy:\n        for x, y in xy:\n            cv2.circle(\n                img=scene,\n                center=(int(x), int(y)),\n                radius=self.radius,\n                color=self.color.as_bgr(),\n                thickness=-1,\n            )\n\n    return scene\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.EdgeAnnotator-functions","title":"Functions","text":""},{"location":"keypoint/annotators/#supervision.keypoint.annotators.EdgeAnnotator.__init__","title":"__init__(color=Color.ROBOFLOW, thickness=2, edges=None)","text":"

Parameters:

Name Type Description Default color Color

The color to use for the edges.

ROBOFLOW thickness int

The thickness of the edges.

2 edges Optional[List[Tuple[int, int]]]

The edges to draw. If set to None, will attempt to select automatically.

None Source code in supervision/keypoint/annotators.py
def __init__(\n    self,\n    color: Color = Color.ROBOFLOW,\n    thickness: int = 2,\n    edges: Optional[List[Tuple[int, int]]] = None,\n) -> None:\n    \"\"\"\n    Args:\n        color (Color, optional): The color to use for the edges.\n        thickness (int, optional): The thickness of the edges.\n        edges (Optional[List[Tuple[int, int]]]): The edges to draw.\n            If set to `None`, will attempt to select automatically.\n    \"\"\"\n    self.color = color\n    self.thickness = thickness\n    self.edges = edges\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.EdgeAnnotator.annotate","title":"annotate(scene, key_points)","text":"

Annotates the given scene by drawing lines between specified key points to form edges.

Parameters:

Name Type Description Default scene ImageType

The image where skeleton edges will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required key_points KeyPoints

A collection of key points where each key point consists of x and y coordinates.

required

Returns:

Name Type Description Returns ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nedge_annotator = sv.EdgeAnnotator(\n    color=sv.Color.GREEN,\n    thickness=5\n)\nannotated_frame = edge_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n

Source code in supervision/keypoint/annotators.py
@convert_for_annotation_method\ndef annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:\n    \"\"\"\n    Annotates the given scene by drawing lines between specified key points to form\n    edges.\n\n    Args:\n        scene (ImageType): The image where skeleton edges will be drawn. `ImageType`\n            is a flexible type, accepting either `numpy.ndarray` or\n            `PIL.Image.Image`.\n        key_points (KeyPoints): A collection of key points where each key point\n            consists of x and y coordinates.\n\n    Returns:\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        key_points = sv.KeyPoints(...)\n\n        edge_annotator = sv.EdgeAnnotator(\n            color=sv.Color.GREEN,\n            thickness=5\n        )\n        annotated_frame = edge_annotator.annotate(\n            scene=image.copy(),\n            key_points=key_points\n        )\n        ```\n\n    ![edge-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/edge-annotator-example.png)\n    \"\"\"\n    if len(key_points) == 0:\n        return scene\n\n    for xy in key_points.xy:\n        edges = self.edges\n        if not edges:\n            edges = SKELETONS_BY_VERTEX_COUNT.get(len(xy))\n        if not edges:\n            warn(f\"No skeleton found with {len(xy)} vertices\")\n            return scene\n\n        for class_a, class_b in edges:\n            xy_a = xy[class_a - 1]\n            xy_b = xy[class_b - 1]\n            missing_a = np.allclose(xy_a, 0)\n            missing_b = np.allclose(xy_b, 0)\n            if missing_a or missing_b:\n                continue\n\n            cv2.line(\n                img=scene,\n                pt1=(int(xy_a[0]), int(xy_a[1])),\n                pt2=(int(xy_b[0]), int(xy_b[1])),\n                color=self.color.as_bgr(),\n                thickness=self.thickness,\n            )\n\n    return scene\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexLabelAnnotator-functions","title":"Functions","text":""},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexLabelAnnotator.__init__","title":"__init__(color=Color.ROBOFLOW, text_color=Color.WHITE, text_scale=0.5, text_thickness=1, text_padding=10, border_radius=0)","text":"

Parameters:

Name Type Description Default color Union[Color, List[Color]]

The color to use for each keypoint label. If a list is provided, the colors will be used in order for each keypoint.

ROBOFLOW text_color Color

The color to use for the labels.

WHITE text_scale float

The scale of the text.

0.5 text_thickness int

The thickness of the text.

1 text_padding int

The padding around the text.

10 border_radius int

The radius of the rounded corners of the boxes. Set to a high value to produce circles.

0 Source code in supervision/keypoint/annotators.py
def __init__(\n    self,\n    color: Union[Color, List[Color]] = Color.ROBOFLOW,\n    text_color: Color = Color.WHITE,\n    text_scale: float = 0.5,\n    text_thickness: int = 1,\n    text_padding: int = 10,\n    border_radius: int = 0,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, List[Color]], optional): The color to use for each\n            keypoint label. If a list is provided, the colors will be used in order\n            for each keypoint.\n        text_color (Color, optional): The color to use for the labels.\n        text_scale (float, optional): The scale of the text.\n        text_thickness (int, optional): The thickness of the text.\n        text_padding (int, optional): The padding around the text.\n        border_radius (int, optional): The radius of the rounded corners of the\n            boxes. Set to a high value to produce circles.\n    \"\"\"\n    self.border_radius: int = border_radius\n    self.color: Union[Color, List[Color]] = color\n    self.text_color: Color = text_color\n    self.text_scale: float = text_scale\n    self.text_thickness: int = text_thickness\n    self.text_padding: int = text_padding\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexLabelAnnotator.annotate","title":"annotate(scene, key_points, labels=None)","text":"

A class that draws labels of skeleton vertices on images. It uses specified key points to determine the locations where the vertices should be drawn.

Parameters:

Name Type Description Default scene ImageType

The image where vertex labels will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required key_points KeyPoints

A collection of key points where each key point consists of x and y coordinates.

required labels List[str]

A list of labels to be displayed on the annotated image. If not provided, keypoint indices will be used.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nvertex_label_annotator = sv.VertexLabelAnnotator(\n    color=sv.Color.GREEN,\n    text_color=sv.Color.BLACK,\n    border_radius=5\n)\nannotated_frame = vertex_label_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n

Tip

VertexLabelAnnotator allows to customize the color of each keypoint label values.

Example
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nLABELS = [\n    \"nose\", \"left eye\", \"right eye\", \"left ear\",\n    \"right ear\", \"left shoulder\", \"right shoulder\", \"left elbow\",\n    \"right elbow\", \"left wrist\", \"right wrist\", \"left hip\",\n    \"right hip\", \"left knee\", \"right knee\", \"left ankle\",\n    \"right ankle\"\n]\n\nCOLORS = [\n    \"#FF6347\", \"#FF6347\", \"#FF6347\", \"#FF6347\",\n    \"#FF6347\", \"#FF1493\", \"#00FF00\", \"#FF1493\",\n    \"#00FF00\", \"#FF1493\", \"#00FF00\", \"#FFD700\",\n    \"#00BFFF\", \"#FFD700\", \"#00BFFF\", \"#FFD700\",\n    \"#00BFFF\"\n]\nCOLORS = [sv.Color.from_hex(color_hex=c) for c in COLORS]\n\nvertex_label_annotator = sv.VertexLabelAnnotator(\n    color=COLORS,\n    text_color=sv.Color.BLACK,\n    border_radius=5\n)\nannotated_frame = vertex_label_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points,\n    labels=labels\n)\n

Source code in supervision/keypoint/annotators.py
def annotate(\n    self, scene: ImageType, key_points: KeyPoints, labels: List[str] = None\n) -> ImageType:\n    \"\"\"\n    A class that draws labels of skeleton vertices on images. It uses specified key\n        points to determine the locations where the vertices should be drawn.\n\n    Args:\n        scene (ImageType): The image where vertex labels will be drawn. `ImageType`\n            is a flexible type, accepting either `numpy.ndarray` or\n            `PIL.Image.Image`.\n        key_points (KeyPoints): A collection of key points where each key point\n            consists of x and y coordinates.\n        labels (List[str], optional): A list of labels to be displayed on the\n            annotated image. If not provided, keypoint indices will be used.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        key_points = sv.KeyPoints(...)\n\n        vertex_label_annotator = sv.VertexLabelAnnotator(\n            color=sv.Color.GREEN,\n            text_color=sv.Color.BLACK,\n            border_radius=5\n        )\n        annotated_frame = vertex_label_annotator.annotate(\n            scene=image.copy(),\n            key_points=key_points\n        )\n        ```\n\n    ![vertex-label-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/vertex-label-annotator-example.png)\n\n    !!! tip\n\n        `VertexLabelAnnotator` allows to customize the color of each keypoint label\n        values.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        key_points = sv.KeyPoints(...)\n\n        LABELS = [\n            \"nose\", \"left eye\", \"right eye\", \"left ear\",\n            \"right ear\", \"left shoulder\", \"right shoulder\", \"left elbow\",\n            \"right elbow\", \"left wrist\", \"right wrist\", \"left hip\",\n            \"right hip\", \"left knee\", \"right knee\", \"left ankle\",\n            \"right ankle\"\n        ]\n\n        COLORS = [\n            \"#FF6347\", \"#FF6347\", \"#FF6347\", \"#FF6347\",\n            \"#FF6347\", \"#FF1493\", \"#00FF00\", \"#FF1493\",\n            \"#00FF00\", \"#FF1493\", \"#00FF00\", \"#FFD700\",\n            \"#00BFFF\", \"#FFD700\", \"#00BFFF\", \"#FFD700\",\n            \"#00BFFF\"\n        ]\n        COLORS = [sv.Color.from_hex(color_hex=c) for c in COLORS]\n\n        vertex_label_annotator = sv.VertexLabelAnnotator(\n            color=COLORS,\n            text_color=sv.Color.BLACK,\n            border_radius=5\n        )\n        annotated_frame = vertex_label_annotator.annotate(\n            scene=image.copy(),\n            key_points=key_points,\n            labels=labels\n        )\n        ```\n    ![vertex-label-annotator-custom-example](https://media.roboflow.com/\n    supervision-annotator-examples/vertex-label-annotator-custom-example.png)\n    \"\"\"\n    font = cv2.FONT_HERSHEY_SIMPLEX\n\n    skeletons_count, points_count, _ = key_points.xy.shape\n    if skeletons_count == 0:\n        return scene\n\n    anchors = key_points.xy.reshape(points_count * skeletons_count, 2).astype(int)\n    mask = np.all(anchors != 0, axis=1)\n\n    if not np.any(mask):\n        return scene\n\n    colors = self.preprocess_and_validate_colors(\n        colors=self.color,\n        points_count=points_count,\n        skeletons_count=skeletons_count,\n    )\n\n    labels = self.preprocess_and_validate_labels(\n        labels=labels, points_count=points_count, skeletons_count=skeletons_count\n    )\n\n    anchors = anchors[mask]\n    colors = colors[mask]\n    labels = labels[mask]\n\n    xyxy = np.array(\n        [\n            self.get_text_bounding_box(\n                text=label,\n                font=font,\n                text_scale=self.text_scale,\n                text_thickness=self.text_thickness,\n                center_coordinates=tuple(anchor),\n            )\n            for anchor, label in zip(anchors, labels)\n        ]\n    )\n\n    xyxy_padded = pad_boxes(xyxy=xyxy, px=self.text_padding)\n\n    for text, color, box, box_padded in zip(labels, colors, xyxy, xyxy_padded):\n        draw_rounded_rectangle(\n            scene=scene,\n            rect=Rect.from_xyxy(box_padded),\n            color=color,\n            border_radius=self.border_radius,\n        )\n        cv2.putText(\n            img=scene,\n            text=text,\n            org=(box[0], box[1] + self.text_padding),\n            fontFace=font,\n            fontScale=self.text_scale,\n            color=self.text_color.as_rgb(),\n            thickness=self.text_thickness,\n            lineType=cv2.LINE_AA,\n        )\n\n    return scene\n
"},{"location":"keypoint/core/","title":"Keypoint Detection","text":"

The sv.KeyPoints class in the Supervision library standardizes results from various keypoint detection and pose estimation models into a consistent format. This class simplifies data manipulation and filtering, providing a uniform API for integration with Supervision keypoints annotators.

UltralyticsInferenceMediaPipe

Use sv.KeyPoints.from_ultralytics method, which accepts YOLOv8 pose result.

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8s-pose.pt')\n\nresult = model(image)[0]\nkey_points = sv.KeyPoints.from_ultralytics(result)\n

Use sv.KeyPoints.from_inference method, which accepts Inference pose result.

import cv2\nimport supervision as sv\nfrom inference import get_model\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)\n\nresult = model.infer(image)[0]\nkey_points = sv.KeyPoints.from_inference(result)\n

Use sv.KeyPoints.from_mediapipe method, which accepts MediaPipe pose result.

import cv2\nimport mediapipe as mp\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage_height, image_width, _ = image.shape\nmediapipe_image = mp.Image(\n    image_format=mp.ImageFormat.SRGB,\n    data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\noptions = mp.tasks.vision.PoseLandmarkerOptions(\n    base_options=mp.tasks.BaseOptions(\n        model_asset_path=\"pose_landmarker_heavy.task\"\n    ),\n    running_mode=mp.tasks.vision.RunningMode.IMAGE,\n    num_poses=2)\n\nPoseLandmarker = mp.tasks.vision.PoseLandmarker\nwith PoseLandmarker.create_from_options(options) as landmarker:\n    pose_landmarker_result = landmarker.detect(mediapipe_image)\n\nkey_points = sv.KeyPoints.from_mediapipe(\n    pose_landmarker_result, (image_width, image_height))\n

Attributes:

Name Type Description xy ndarray

An array of shape (n, 2) containing the bounding boxes coordinates in format [x1, y1]

confidence Optional[ndarray]

An array of shape (n,) containing the confidence scores of the keypoint keypoints.

class_id Optional[ndarray]

An array of shape (n,) containing the class ids of the keypoint keypoints.

data Dict[str, Union[ndarray, List]]

A dictionary containing additional data where each key is a string representing the data type, and the value is either a NumPy array or a list of corresponding data.

Source code in supervision/keypoint/core.py
@dataclass\nclass KeyPoints:\n    \"\"\"\n    The `sv.KeyPoints` class in the Supervision library standardizes results from\n    various keypoint detection and pose estimation models into a consistent format. This\n    class simplifies data manipulation and filtering, providing a uniform API for\n    integration with Supervision [keypoints annotators](/keypoint/annotators).\n\n    === \"Ultralytics\"\n\n        Use [`sv.KeyPoints.from_ultralytics`](/keypoint/core/#supervision.keypoint.core.KeyPoints.from_ultralytics)\n        method, which accepts [YOLOv8](https://github.com/ultralytics/ultralytics)\n        pose result.\n\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO('yolov8s-pose.pt')\n\n        result = model(image)[0]\n        key_points = sv.KeyPoints.from_ultralytics(result)\n        ```\n\n    === \"Inference\"\n\n        Use [`sv.KeyPoints.from_inference`](/keypoint/core/#supervision.keypoint.core.KeyPoints.from_inference)\n        method, which accepts [Inference](https://inference.roboflow.com/) pose result.\n\n        ```python\n        import cv2\n        import supervision as sv\n        from inference import get_model\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)\n\n        result = model.infer(image)[0]\n        key_points = sv.KeyPoints.from_inference(result)\n        ```\n\n    === \"MediaPipe\"\n\n        Use [`sv.KeyPoints.from_mediapipe`](/keypoint/core/#supervision.keypoint.core.KeyPoints.from_mediapipe)\n        method, which accepts [MediaPipe](https://github.com/google-ai-edge/mediapipe)\n        pose result.\n\n        ```python\n        import cv2\n        import mediapipe as mp\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image_height, image_width, _ = image.shape\n        mediapipe_image = mp.Image(\n            image_format=mp.ImageFormat.SRGB,\n            data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\n        options = mp.tasks.vision.PoseLandmarkerOptions(\n            base_options=mp.tasks.BaseOptions(\n                model_asset_path=\"pose_landmarker_heavy.task\"\n            ),\n            running_mode=mp.tasks.vision.RunningMode.IMAGE,\n            num_poses=2)\n\n        PoseLandmarker = mp.tasks.vision.PoseLandmarker\n        with PoseLandmarker.create_from_options(options) as landmarker:\n            pose_landmarker_result = landmarker.detect(mediapipe_image)\n\n        key_points = sv.KeyPoints.from_mediapipe(\n            pose_landmarker_result, (image_width, image_height))\n        ```\n\n    Attributes:\n        xy (np.ndarray): An array of shape `(n, 2)` containing\n            the bounding boxes coordinates in format `[x1, y1]`\n        confidence (Optional[np.ndarray]): An array of shape\n            `(n,)` containing the confidence scores of the keypoint keypoints.\n        class_id (Optional[np.ndarray]): An array of shape\n            `(n,)` containing the class ids of the keypoint keypoints.\n        data (Dict[str, Union[np.ndarray, List]]): A dictionary containing additional\n            data where each key is a string representing the data type, and the value\n            is either a NumPy array or a list of corresponding data.\n    \"\"\"  # noqa: E501 // docs\n\n    xy: npt.NDArray[np.float32]\n    class_id: Optional[npt.NDArray[np.int_]] = None\n    confidence: Optional[npt.NDArray[np.float32]] = None\n    data: Dict[str, Union[npt.NDArray[Any], List]] = field(default_factory=dict)\n\n    def __post_init__(self):\n        validate_keypoints_fields(\n            xy=self.xy,\n            confidence=self.confidence,\n            class_id=self.class_id,\n            data=self.data,\n        )\n\n    def __len__(self) -> int:\n        \"\"\"\n        Returns the number of keypoints in the `sv.KeyPoints` object.\n        \"\"\"\n        return len(self.xy)\n\n    def __iter__(\n        self,\n    ) -> Iterator[\n        Tuple[\n            np.ndarray,\n            Optional[np.ndarray],\n            Optional[float],\n            Optional[int],\n            Optional[int],\n            Dict[str, Union[np.ndarray, List]],\n        ]\n    ]:\n        \"\"\"\n        Iterates over the Keypoint object and yield a tuple of\n        `(xy, confidence, class_id, data)` for each keypoint detection.\n        \"\"\"\n        for i in range(len(self.xy)):\n            yield (\n                self.xy[i],\n                self.confidence[i] if self.confidence is not None else None,\n                self.class_id[i] if self.class_id is not None else None,\n                get_data_item(self.data, i),\n            )\n\n    def __eq__(self, other: KeyPoints) -> bool:\n        return all(\n            [\n                np.array_equal(self.xy, other.xy),\n                np.array_equal(self.class_id, other.class_id),\n                np.array_equal(self.confidence, other.confidence),\n                is_data_equal(self.data, other.data),\n            ]\n        )\n\n    @classmethod\n    def from_inference(cls, inference_result: Union[dict, Any]) -> KeyPoints:\n        \"\"\"\n        Create a `sv.KeyPoints` object from the [Roboflow](https://roboflow.com/)\n        API inference result or the [Inference](https://inference.roboflow.com/)\n        package results.\n\n        Args:\n            inference_result (dict, any): The result from the\n                Roboflow API or Inference package containing predictions with keypoints.\n\n        Returns:\n            A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n                and class names, and confidences of each keypoint.\n\n        Examples:\n            ```python\n            import cv2\n            import supervision as sv\n            from inference import get_model\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)\n\n            result = model.infer(image)[0]\n            key_points = sv.KeyPoints.from_inference(result)\n            ```\n\n            ```python\n            import cv2\n            import supervision as sv\n            from inference_sdk import InferenceHTTPClient\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            client = InferenceHTTPClient(\n                api_url=\"https://detect.roboflow.com\",\n                api_key=<ROBOFLOW_API_KEY>\n            )\n\n            result = client.infer(image, model_id=<POSE_MODEL_ID>)\n            key_points = sv.KeyPoints.from_inference(result)\n            ```\n        \"\"\"\n        if isinstance(inference_result, list):\n            raise ValueError(\n                \"from_inference() operates on a single result at a time.\"\n                \"You can retrieve it like so:  inference_result = model.infer(image)[0]\"\n            )\n\n        with suppress(AttributeError):\n            inference_result = inference_result.dict(exclude_none=True, by_alias=True)\n\n        if not inference_result.get(\"predictions\"):\n            return cls.empty()\n\n        xy = []\n        confidence = []\n        class_id = []\n        class_names = []\n\n        for prediction in inference_result[\"predictions\"]:\n            prediction_xy = []\n            prediction_confidence = []\n            for keypoint in prediction[\"keypoints\"]:\n                prediction_xy.append([keypoint[\"x\"], keypoint[\"y\"]])\n                prediction_confidence.append(keypoint[\"confidence\"])\n            xy.append(prediction_xy)\n            confidence.append(prediction_confidence)\n\n            class_id.append(prediction[\"class_id\"])\n            class_names.append(prediction[\"class\"])\n\n        data = {CLASS_NAME_DATA_FIELD: np.array(class_names)}\n\n        return cls(\n            xy=np.array(xy, dtype=np.float32),\n            confidence=np.array(confidence, dtype=np.float32),\n            class_id=np.array(class_id, dtype=int),\n            data=data,\n        )\n\n    @classmethod\n    def from_mediapipe(\n        cls, mediapipe_results, resolution_wh: Tuple[int, int]\n    ) -> KeyPoints:\n        \"\"\"\n        Creates a `sv.KeyPoints` instance from a\n        [MediaPipe](https://github.com/google-ai-edge/mediapipe)\n        pose landmark detection inference result.\n\n        Args:\n            mediapipe_results (Union[PoseLandmarkerResult, SolutionOutputs]):\n                The output results from Mediapipe. It supports both: the inference\n                result `PoseLandmarker` and the legacy one from `Pose`.\n            resolution_wh (Tuple[int, int]): A tuple of the form `(width, height)`\n                representing the resolution of the frame.\n\n        Returns:\n            A `sv.KeyPoints` object containing the keypoint coordinates and\n                confidences of each keypoint.\n\n        !!! tip\n            Before you start, download model bundles from the\n            [MediaPipe website](https://ai.google.dev/edge/mediapipe/solutions/vision/pose_landmarker/index#models).\n\n        Examples:\n            ```python\n            import cv2\n            import mediapipe as mp\n            import supervision as sv\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            image_height, image_width, _ = image.shape\n            mediapipe_image = mp.Image(\n                image_format=mp.ImageFormat.SRGB,\n                data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\n            options = mp.tasks.vision.PoseLandmarkerOptions(\n                base_options=mp.tasks.BaseOptions(\n                    model_asset_path=\"pose_landmarker_heavy.task\"\n                ),\n                running_mode=mp.tasks.vision.RunningMode.IMAGE,\n                num_poses=2)\n\n            PoseLandmarker = mp.tasks.vision.PoseLandmarker\n            with PoseLandmarker.create_from_options(options) as landmarker:\n                pose_landmarker_result = landmarker.detect(mediapipe_image)\n\n            key_points = sv.KeyPoints.from_mediapipe(\n                pose_landmarker_result, (image_width, image_height))\n            ```\n        \"\"\"  # noqa: E501 // docs\n        results = mediapipe_results.pose_landmarks\n        if not isinstance(mediapipe_results.pose_landmarks, list):\n            if mediapipe_results.pose_landmarks is None:\n                results = []\n            else:\n                results = [\n                    [landmark for landmark in mediapipe_results.pose_landmarks.landmark]\n                ]\n\n        if len(results) == 0:\n            return cls.empty()\n\n        xy = []\n        confidence = []\n        for pose in results:\n            prediction_xy = []\n            prediction_confidence = []\n            for landmark in pose:\n                keypoint_xy = [\n                    landmark.x * resolution_wh[0],\n                    landmark.y * resolution_wh[1],\n                ]\n                prediction_xy.append(keypoint_xy)\n                prediction_confidence.append(landmark.visibility)\n\n            xy.append(prediction_xy)\n            confidence.append(prediction_confidence)\n\n        return cls(\n            xy=np.array(xy, dtype=np.float32),\n            confidence=np.array(confidence, dtype=np.float32),\n        )\n\n    @classmethod\n    def from_ultralytics(cls, ultralytics_results) -> KeyPoints:\n        \"\"\"\n        Creates a `sv.KeyPoints` instance from a\n        [YOLOv8](https://github.com/ultralytics/ultralytics) pose inference result.\n\n        Args:\n            ultralytics_results (ultralytics.engine.results.Keypoints):\n                The output Results instance from YOLOv8\n\n        Returns:\n            A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n                and class names, and confidences of each keypoint.\n\n        Examples:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = YOLO('yolov8s-pose.pt')\n\n            result = model(image)[0]\n            key_points = sv.KeyPoints.from_ultralytics(result)\n            ```\n        \"\"\"\n        if ultralytics_results.keypoints.xy.numel() == 0:\n            return cls.empty()\n\n        xy = ultralytics_results.keypoints.xy.cpu().numpy()\n        class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)\n        class_names = np.array([ultralytics_results.names[i] for i in class_id])\n\n        confidence = ultralytics_results.keypoints.conf.cpu().numpy()\n        data = {CLASS_NAME_DATA_FIELD: class_names}\n        return cls(xy, class_id, confidence, data)\n\n    @classmethod\n    def from_yolo_nas(cls, yolo_nas_results) -> KeyPoints:\n        \"\"\"\n        Create a `sv.KeyPoints` instance from a [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS-POSE.md)\n        pose inference results.\n\n        Args:\n            yolo_nas_results (ImagePoseEstimationPrediction): The output object from\n                YOLO NAS.\n\n        Returns:\n            A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n                and class names, and confidences of each keypoint.\n\n        Examples:\n            ```python\n            import cv2\n            import torch\n            import supervision as sv\n            import super_gradients\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n\n            device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n            model = super_gradients.training.models.get(\n                \"yolo_nas_pose_s\", pretrained_weights=\"coco_pose\").to(device)\n\n            results = model.predict(image, conf=0.1)\n            key_points = sv.KeyPoints.from_yolo_nas(results)\n            ```\n        \"\"\"  # noqa: E501 // docs\n        if len(yolo_nas_results.prediction.poses) == 0:\n            return cls.empty()\n\n        xy = yolo_nas_results.prediction.poses[:, :, :2]\n        confidence = yolo_nas_results.prediction.poses[:, :, 2]\n\n        # yolo_nas_results treats params differently.\n        # prediction.labels may not exist, whereas class_names might be None\n        if hasattr(yolo_nas_results.prediction, \"labels\"):\n            class_id = yolo_nas_results.prediction.labels  # np.array[int]\n        else:\n            class_id = None\n\n        data = {}\n        if class_id is not None and yolo_nas_results.class_names is not None:\n            class_names = []\n            for c_id in class_id:\n                name = yolo_nas_results.class_names[c_id]  # tuple[str]\n                class_names.append(name)\n            data[CLASS_NAME_DATA_FIELD] = class_names\n\n        return cls(\n            xy=xy,\n            confidence=confidence,\n            class_id=class_id,\n            data=data,\n        )\n\n    def __getitem__(\n        self, index: Union[int, slice, List[int], np.ndarray, str]\n    ) -> Union[KeyPoints, List, np.ndarray, None]:\n        \"\"\"\n        Get a subset of the `sv.KeyPoints` object or access an item from its data field.\n\n        When provided with an integer, slice, list of integers, or a numpy array, this\n        method returns a new `sv.KeyPoints` object that represents a subset of the\n        original `sv.KeyPoints`. When provided with a string, it accesses the\n        corresponding item in the data dictionary.\n\n        Args:\n            index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,\n                or key to access a subset of the `sv.KeyPoints` or an item from the\n                data.\n\n        Returns:\n            A subset of the `sv.KeyPoints` object or an item from the data field.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            key_points = sv.KeyPoints()\n\n            # access the first keypoint using an integer index\n            key_points[0]\n\n            # access the first 10 keypoints using index slice\n            key_points[0:10]\n\n            # access selected keypoints using a list of indices\n            key_points[[0, 2, 4]]\n\n            # access keypoints with selected class_id\n            key_points[key_points.class_id == 0]\n\n            # access keypoints with confidence greater than 0.5\n            key_points[key_points.confidence > 0.5]\n            ```\n        \"\"\"\n        if isinstance(index, str):\n            return self.data.get(index)\n        if isinstance(index, int):\n            index = [index]\n        return KeyPoints(\n            xy=self.xy[index],\n            confidence=self.confidence[index] if self.confidence is not None else None,\n            class_id=self.class_id[index] if self.class_id is not None else None,\n            data=get_data_item(self.data, index),\n        )\n\n    def __setitem__(self, key: str, value: Union[np.ndarray, List]):\n        \"\"\"\n        Set a value in the data dictionary of the `sv.KeyPoints` object.\n\n        Args:\n            key (str): The key in the data dictionary to set.\n            value (Union[np.ndarray, List]): The value to set for the key.\n\n        Examples:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = YOLO('yolov8s.pt')\n\n            result = model(image)[0]\n            keypoints = sv.KeyPoints.from_ultralytics(result)\n\n            keypoints['class_name'] = [\n                 model.model.names[class_id]\n                 for class_id\n                 in keypoints.class_id\n             ]\n            ```\n        \"\"\"\n        if not isinstance(value, (np.ndarray, list)):\n            raise TypeError(\"Value must be a np.ndarray or a list\")\n\n        if isinstance(value, list):\n            value = np.array(value)\n\n        self.data[key] = value\n\n    @classmethod\n    def empty(cls) -> KeyPoints:\n        \"\"\"\n        Create an empty Keypoints object with no keypoints.\n\n        Returns:\n            An empty `sv.KeyPoints` object.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            key_points = sv.KeyPoints.empty()\n            ```\n        \"\"\"\n        return cls(xy=np.empty((0, 0, 2), dtype=np.float32))\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints-functions","title":"Functions","text":""},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.__getitem__","title":"__getitem__(index)","text":"

Get a subset of the sv.KeyPoints object or access an item from its data field.

When provided with an integer, slice, list of integers, or a numpy array, this method returns a new sv.KeyPoints object that represents a subset of the original sv.KeyPoints. When provided with a string, it accesses the corresponding item in the data dictionary.

Parameters:

Name Type Description Default index Union[int, slice, List[int], ndarray, str]

The index, indices, or key to access a subset of the sv.KeyPoints or an item from the data.

required

Returns:

Type Description Union[KeyPoints, List, ndarray, None]

A subset of the sv.KeyPoints object or an item from the data field.

Examples:

import supervision as sv\n\nkey_points = sv.KeyPoints()\n\n# access the first keypoint using an integer index\nkey_points[0]\n\n# access the first 10 keypoints using index slice\nkey_points[0:10]\n\n# access selected keypoints using a list of indices\nkey_points[[0, 2, 4]]\n\n# access keypoints with selected class_id\nkey_points[key_points.class_id == 0]\n\n# access keypoints with confidence greater than 0.5\nkey_points[key_points.confidence > 0.5]\n
Source code in supervision/keypoint/core.py
def __getitem__(\n    self, index: Union[int, slice, List[int], np.ndarray, str]\n) -> Union[KeyPoints, List, np.ndarray, None]:\n    \"\"\"\n    Get a subset of the `sv.KeyPoints` object or access an item from its data field.\n\n    When provided with an integer, slice, list of integers, or a numpy array, this\n    method returns a new `sv.KeyPoints` object that represents a subset of the\n    original `sv.KeyPoints`. When provided with a string, it accesses the\n    corresponding item in the data dictionary.\n\n    Args:\n        index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,\n            or key to access a subset of the `sv.KeyPoints` or an item from the\n            data.\n\n    Returns:\n        A subset of the `sv.KeyPoints` object or an item from the data field.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        key_points = sv.KeyPoints()\n\n        # access the first keypoint using an integer index\n        key_points[0]\n\n        # access the first 10 keypoints using index slice\n        key_points[0:10]\n\n        # access selected keypoints using a list of indices\n        key_points[[0, 2, 4]]\n\n        # access keypoints with selected class_id\n        key_points[key_points.class_id == 0]\n\n        # access keypoints with confidence greater than 0.5\n        key_points[key_points.confidence > 0.5]\n        ```\n    \"\"\"\n    if isinstance(index, str):\n        return self.data.get(index)\n    if isinstance(index, int):\n        index = [index]\n    return KeyPoints(\n        xy=self.xy[index],\n        confidence=self.confidence[index] if self.confidence is not None else None,\n        class_id=self.class_id[index] if self.class_id is not None else None,\n        data=get_data_item(self.data, index),\n    )\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.__iter__","title":"__iter__()","text":"

Iterates over the Keypoint object and yield a tuple of (xy, confidence, class_id, data) for each keypoint detection.

Source code in supervision/keypoint/core.py
def __iter__(\n    self,\n) -> Iterator[\n    Tuple[\n        np.ndarray,\n        Optional[np.ndarray],\n        Optional[float],\n        Optional[int],\n        Optional[int],\n        Dict[str, Union[np.ndarray, List]],\n    ]\n]:\n    \"\"\"\n    Iterates over the Keypoint object and yield a tuple of\n    `(xy, confidence, class_id, data)` for each keypoint detection.\n    \"\"\"\n    for i in range(len(self.xy)):\n        yield (\n            self.xy[i],\n            self.confidence[i] if self.confidence is not None else None,\n            self.class_id[i] if self.class_id is not None else None,\n            get_data_item(self.data, i),\n        )\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.__len__","title":"__len__()","text":"

Returns the number of keypoints in the sv.KeyPoints object.

Source code in supervision/keypoint/core.py
def __len__(self) -> int:\n    \"\"\"\n    Returns the number of keypoints in the `sv.KeyPoints` object.\n    \"\"\"\n    return len(self.xy)\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.__setitem__","title":"__setitem__(key, value)","text":"

Set a value in the data dictionary of the sv.KeyPoints object.

Parameters:

Name Type Description Default key str

The key in the data dictionary to set.

required value Union[ndarray, List]

The value to set for the key.

required

Examples:

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8s.pt')\n\nresult = model(image)[0]\nkeypoints = sv.KeyPoints.from_ultralytics(result)\n\nkeypoints['class_name'] = [\n     model.model.names[class_id]\n     for class_id\n     in keypoints.class_id\n ]\n
Source code in supervision/keypoint/core.py
def __setitem__(self, key: str, value: Union[np.ndarray, List]):\n    \"\"\"\n    Set a value in the data dictionary of the `sv.KeyPoints` object.\n\n    Args:\n        key (str): The key in the data dictionary to set.\n        value (Union[np.ndarray, List]): The value to set for the key.\n\n    Examples:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO('yolov8s.pt')\n\n        result = model(image)[0]\n        keypoints = sv.KeyPoints.from_ultralytics(result)\n\n        keypoints['class_name'] = [\n             model.model.names[class_id]\n             for class_id\n             in keypoints.class_id\n         ]\n        ```\n    \"\"\"\n    if not isinstance(value, (np.ndarray, list)):\n        raise TypeError(\"Value must be a np.ndarray or a list\")\n\n    if isinstance(value, list):\n        value = np.array(value)\n\n    self.data[key] = value\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.empty","title":"empty() classmethod","text":"

Create an empty Keypoints object with no keypoints.

Returns:

Type Description KeyPoints

An empty sv.KeyPoints object.

Examples:

import supervision as sv\n\nkey_points = sv.KeyPoints.empty()\n
Source code in supervision/keypoint/core.py
@classmethod\ndef empty(cls) -> KeyPoints:\n    \"\"\"\n    Create an empty Keypoints object with no keypoints.\n\n    Returns:\n        An empty `sv.KeyPoints` object.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        key_points = sv.KeyPoints.empty()\n        ```\n    \"\"\"\n    return cls(xy=np.empty((0, 0, 2), dtype=np.float32))\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.from_inference","title":"from_inference(inference_result) classmethod","text":"

Create a sv.KeyPoints object from the Roboflow API inference result or the Inference package results.

Parameters:

Name Type Description Default inference_result (dict, any)

The result from the Roboflow API or Inference package containing predictions with keypoints.

required

Returns:

Type Description KeyPoints

A sv.KeyPoints object containing the keypoint coordinates, class IDs, and class names, and confidences of each keypoint.

Examples:

import cv2\nimport supervision as sv\nfrom inference import get_model\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)\n\nresult = model.infer(image)[0]\nkey_points = sv.KeyPoints.from_inference(result)\n
import cv2\nimport supervision as sv\nfrom inference_sdk import InferenceHTTPClient\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nclient = InferenceHTTPClient(\n    api_url=\"https://detect.roboflow.com\",\n    api_key=<ROBOFLOW_API_KEY>\n)\n\nresult = client.infer(image, model_id=<POSE_MODEL_ID>)\nkey_points = sv.KeyPoints.from_inference(result)\n
Source code in supervision/keypoint/core.py
@classmethod\ndef from_inference(cls, inference_result: Union[dict, Any]) -> KeyPoints:\n    \"\"\"\n    Create a `sv.KeyPoints` object from the [Roboflow](https://roboflow.com/)\n    API inference result or the [Inference](https://inference.roboflow.com/)\n    package results.\n\n    Args:\n        inference_result (dict, any): The result from the\n            Roboflow API or Inference package containing predictions with keypoints.\n\n    Returns:\n        A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n            and class names, and confidences of each keypoint.\n\n    Examples:\n        ```python\n        import cv2\n        import supervision as sv\n        from inference import get_model\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)\n\n        result = model.infer(image)[0]\n        key_points = sv.KeyPoints.from_inference(result)\n        ```\n\n        ```python\n        import cv2\n        import supervision as sv\n        from inference_sdk import InferenceHTTPClient\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        client = InferenceHTTPClient(\n            api_url=\"https://detect.roboflow.com\",\n            api_key=<ROBOFLOW_API_KEY>\n        )\n\n        result = client.infer(image, model_id=<POSE_MODEL_ID>)\n        key_points = sv.KeyPoints.from_inference(result)\n        ```\n    \"\"\"\n    if isinstance(inference_result, list):\n        raise ValueError(\n            \"from_inference() operates on a single result at a time.\"\n            \"You can retrieve it like so:  inference_result = model.infer(image)[0]\"\n        )\n\n    with suppress(AttributeError):\n        inference_result = inference_result.dict(exclude_none=True, by_alias=True)\n\n    if not inference_result.get(\"predictions\"):\n        return cls.empty()\n\n    xy = []\n    confidence = []\n    class_id = []\n    class_names = []\n\n    for prediction in inference_result[\"predictions\"]:\n        prediction_xy = []\n        prediction_confidence = []\n        for keypoint in prediction[\"keypoints\"]:\n            prediction_xy.append([keypoint[\"x\"], keypoint[\"y\"]])\n            prediction_confidence.append(keypoint[\"confidence\"])\n        xy.append(prediction_xy)\n        confidence.append(prediction_confidence)\n\n        class_id.append(prediction[\"class_id\"])\n        class_names.append(prediction[\"class\"])\n\n    data = {CLASS_NAME_DATA_FIELD: np.array(class_names)}\n\n    return cls(\n        xy=np.array(xy, dtype=np.float32),\n        confidence=np.array(confidence, dtype=np.float32),\n        class_id=np.array(class_id, dtype=int),\n        data=data,\n    )\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.from_mediapipe","title":"from_mediapipe(mediapipe_results, resolution_wh) classmethod","text":"

Creates a sv.KeyPoints instance from a MediaPipe pose landmark detection inference result.

Parameters:

Name Type Description Default mediapipe_results Union[PoseLandmarkerResult, SolutionOutputs]

The output results from Mediapipe. It supports both: the inference result PoseLandmarker and the legacy one from Pose.

required resolution_wh Tuple[int, int]

A tuple of the form (width, height) representing the resolution of the frame.

required

Returns:

Type Description KeyPoints

A sv.KeyPoints object containing the keypoint coordinates and confidences of each keypoint.

Tip

Before you start, download model bundles from the MediaPipe website.

Examples:

import cv2\nimport mediapipe as mp\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage_height, image_width, _ = image.shape\nmediapipe_image = mp.Image(\n    image_format=mp.ImageFormat.SRGB,\n    data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\noptions = mp.tasks.vision.PoseLandmarkerOptions(\n    base_options=mp.tasks.BaseOptions(\n        model_asset_path=\"pose_landmarker_heavy.task\"\n    ),\n    running_mode=mp.tasks.vision.RunningMode.IMAGE,\n    num_poses=2)\n\nPoseLandmarker = mp.tasks.vision.PoseLandmarker\nwith PoseLandmarker.create_from_options(options) as landmarker:\n    pose_landmarker_result = landmarker.detect(mediapipe_image)\n\nkey_points = sv.KeyPoints.from_mediapipe(\n    pose_landmarker_result, (image_width, image_height))\n
Source code in supervision/keypoint/core.py
@classmethod\ndef from_mediapipe(\n    cls, mediapipe_results, resolution_wh: Tuple[int, int]\n) -> KeyPoints:\n    \"\"\"\n    Creates a `sv.KeyPoints` instance from a\n    [MediaPipe](https://github.com/google-ai-edge/mediapipe)\n    pose landmark detection inference result.\n\n    Args:\n        mediapipe_results (Union[PoseLandmarkerResult, SolutionOutputs]):\n            The output results from Mediapipe. It supports both: the inference\n            result `PoseLandmarker` and the legacy one from `Pose`.\n        resolution_wh (Tuple[int, int]): A tuple of the form `(width, height)`\n            representing the resolution of the frame.\n\n    Returns:\n        A `sv.KeyPoints` object containing the keypoint coordinates and\n            confidences of each keypoint.\n\n    !!! tip\n        Before you start, download model bundles from the\n        [MediaPipe website](https://ai.google.dev/edge/mediapipe/solutions/vision/pose_landmarker/index#models).\n\n    Examples:\n        ```python\n        import cv2\n        import mediapipe as mp\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image_height, image_width, _ = image.shape\n        mediapipe_image = mp.Image(\n            image_format=mp.ImageFormat.SRGB,\n            data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\n        options = mp.tasks.vision.PoseLandmarkerOptions(\n            base_options=mp.tasks.BaseOptions(\n                model_asset_path=\"pose_landmarker_heavy.task\"\n            ),\n            running_mode=mp.tasks.vision.RunningMode.IMAGE,\n            num_poses=2)\n\n        PoseLandmarker = mp.tasks.vision.PoseLandmarker\n        with PoseLandmarker.create_from_options(options) as landmarker:\n            pose_landmarker_result = landmarker.detect(mediapipe_image)\n\n        key_points = sv.KeyPoints.from_mediapipe(\n            pose_landmarker_result, (image_width, image_height))\n        ```\n    \"\"\"  # noqa: E501 // docs\n    results = mediapipe_results.pose_landmarks\n    if not isinstance(mediapipe_results.pose_landmarks, list):\n        if mediapipe_results.pose_landmarks is None:\n            results = []\n        else:\n            results = [\n                [landmark for landmark in mediapipe_results.pose_landmarks.landmark]\n            ]\n\n    if len(results) == 0:\n        return cls.empty()\n\n    xy = []\n    confidence = []\n    for pose in results:\n        prediction_xy = []\n        prediction_confidence = []\n        for landmark in pose:\n            keypoint_xy = [\n                landmark.x * resolution_wh[0],\n                landmark.y * resolution_wh[1],\n            ]\n            prediction_xy.append(keypoint_xy)\n            prediction_confidence.append(landmark.visibility)\n\n        xy.append(prediction_xy)\n        confidence.append(prediction_confidence)\n\n    return cls(\n        xy=np.array(xy, dtype=np.float32),\n        confidence=np.array(confidence, dtype=np.float32),\n    )\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.from_ultralytics","title":"from_ultralytics(ultralytics_results) classmethod","text":"

Creates a sv.KeyPoints instance from a YOLOv8 pose inference result.

Parameters:

Name Type Description Default ultralytics_results Keypoints

The output Results instance from YOLOv8

required

Returns:

Type Description KeyPoints

A sv.KeyPoints object containing the keypoint coordinates, class IDs, and class names, and confidences of each keypoint.

Examples:

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8s-pose.pt')\n\nresult = model(image)[0]\nkey_points = sv.KeyPoints.from_ultralytics(result)\n
Source code in supervision/keypoint/core.py
@classmethod\ndef from_ultralytics(cls, ultralytics_results) -> KeyPoints:\n    \"\"\"\n    Creates a `sv.KeyPoints` instance from a\n    [YOLOv8](https://github.com/ultralytics/ultralytics) pose inference result.\n\n    Args:\n        ultralytics_results (ultralytics.engine.results.Keypoints):\n            The output Results instance from YOLOv8\n\n    Returns:\n        A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n            and class names, and confidences of each keypoint.\n\n    Examples:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO('yolov8s-pose.pt')\n\n        result = model(image)[0]\n        key_points = sv.KeyPoints.from_ultralytics(result)\n        ```\n    \"\"\"\n    if ultralytics_results.keypoints.xy.numel() == 0:\n        return cls.empty()\n\n    xy = ultralytics_results.keypoints.xy.cpu().numpy()\n    class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)\n    class_names = np.array([ultralytics_results.names[i] for i in class_id])\n\n    confidence = ultralytics_results.keypoints.conf.cpu().numpy()\n    data = {CLASS_NAME_DATA_FIELD: class_names}\n    return cls(xy, class_id, confidence, data)\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.from_yolo_nas","title":"from_yolo_nas(yolo_nas_results) classmethod","text":"

Create a sv.KeyPoints instance from a YOLO-NAS pose inference results.

Parameters:

Name Type Description Default yolo_nas_results ImagePoseEstimationPrediction

The output object from YOLO NAS.

required

Returns:

Type Description KeyPoints

A sv.KeyPoints object containing the keypoint coordinates, class IDs, and class names, and confidences of each keypoint.

Examples:

import cv2\nimport torch\nimport supervision as sv\nimport super_gradients\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndevice = \"cuda\" if torch.cuda.is_available() else \"cpu\"\nmodel = super_gradients.training.models.get(\n    \"yolo_nas_pose_s\", pretrained_weights=\"coco_pose\").to(device)\n\nresults = model.predict(image, conf=0.1)\nkey_points = sv.KeyPoints.from_yolo_nas(results)\n
Source code in supervision/keypoint/core.py
@classmethod\ndef from_yolo_nas(cls, yolo_nas_results) -> KeyPoints:\n    \"\"\"\n    Create a `sv.KeyPoints` instance from a [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS-POSE.md)\n    pose inference results.\n\n    Args:\n        yolo_nas_results (ImagePoseEstimationPrediction): The output object from\n            YOLO NAS.\n\n    Returns:\n        A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n            and class names, and confidences of each keypoint.\n\n    Examples:\n        ```python\n        import cv2\n        import torch\n        import supervision as sv\n        import super_gradients\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n\n        device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n        model = super_gradients.training.models.get(\n            \"yolo_nas_pose_s\", pretrained_weights=\"coco_pose\").to(device)\n\n        results = model.predict(image, conf=0.1)\n        key_points = sv.KeyPoints.from_yolo_nas(results)\n        ```\n    \"\"\"  # noqa: E501 // docs\n    if len(yolo_nas_results.prediction.poses) == 0:\n        return cls.empty()\n\n    xy = yolo_nas_results.prediction.poses[:, :, :2]\n    confidence = yolo_nas_results.prediction.poses[:, :, 2]\n\n    # yolo_nas_results treats params differently.\n    # prediction.labels may not exist, whereas class_names might be None\n    if hasattr(yolo_nas_results.prediction, \"labels\"):\n        class_id = yolo_nas_results.prediction.labels  # np.array[int]\n    else:\n        class_id = None\n\n    data = {}\n    if class_id is not None and yolo_nas_results.class_names is not None:\n        class_names = []\n        for c_id in class_id:\n            name = yolo_nas_results.class_names[c_id]  # tuple[str]\n            class_names.append(name)\n        data[CLASS_NAME_DATA_FIELD] = class_names\n\n    return cls(\n        xy=xy,\n        confidence=confidence,\n        class_id=class_id,\n        data=data,\n    )\n
"},{"location":"notebooks/annotate-video-with-detections/","title":"Annotate Video with Detections","text":"In\u00a0[1]: Copied!
!nvidia-smi\n
!nvidia-smi
Fri Feb 23 03:15:00 2024       \n+---------------------------------------------------------------------------------------+\n| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n|-----------------------------------------+----------------------+----------------------+\n| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n|                                         |                      |               MIG M. |\n|=========================================+======================+======================|\n|   0  Tesla V100-SXM2-16GB           Off | 00000000:00:04.0 Off |                    0 |\n| N/A   33C    P0              24W / 300W |      0MiB / 16384MiB |      0%      Default |\n|                                         |                      |                  N/A |\n+-----------------------------------------+----------------------+----------------------+\n                                                                                         \n+---------------------------------------------------------------------------------------+\n| Processes:                                                                            |\n|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n|        ID   ID                                                             Usage      |\n|=======================================================================================|\n|  No running processes found                                                           |\n+---------------------------------------------------------------------------------------+\n
In\u00a0[\u00a0]: Copied!
!pip install -q inference-gpu \"supervision[assets]\"\n
!pip install -q inference-gpu \"supervision[assets]\" In\u00a0[\u00a0]: Copied!
from supervision.assets import download_assets, VideoAssets\n\n# Download a supervision video asset\npath_to_video = download_assets(VideoAssets.PEOPLE_WALKING)\n
from supervision.assets import download_assets, VideoAssets # Download a supervision video asset path_to_video = download_assets(VideoAssets.PEOPLE_WALKING)

As a result, we've downloaded a video. Let's take a look at the video below. Keep in mind that the video preview below works only in the web version of the cookbooks and not in Google Colab.

In\u00a0[4]: Copied!
import supervision as sv\nfrom supervision.assets import download_assets, VideoAssets\nfrom inference.models.utils import get_roboflow_model\n\n\n# Load a yolov8 model from roboflow.\nmodel = get_roboflow_model(\"yolov8s-640\")\n\n# Create a frame generator and video info object from supervision utilities.\nframe_generator = sv.get_video_frames_generator(path_to_video)\n\n# Yield a single frame from the generator.\nframe = next(frame_generator)\n\n# Run inference on our frame\nresult = model.infer(frame)[0]\n\n# Parse result into detections data model.\ndetections = sv.Detections.from_inference(result)\n\n# Pretty Print the resulting detections.\nfrom pprint import pprint\npprint(detections)\n
import supervision as sv from supervision.assets import download_assets, VideoAssets from inference.models.utils import get_roboflow_model # Load a yolov8 model from roboflow. model = get_roboflow_model(\"yolov8s-640\") # Create a frame generator and video info object from supervision utilities. frame_generator = sv.get_video_frames_generator(path_to_video) # Yield a single frame from the generator. frame = next(frame_generator) # Run inference on our frame result = model.infer(frame)[0] # Parse result into detections data model. detections = sv.Detections.from_inference(result) # Pretty Print the resulting detections. from pprint import pprint pprint(detections)
Detections(xyxy=array([[1140.,  951., 1245., 1079.],\n       [ 666.,  648.,  745.,  854.],\n       [  34.,  794.,  142.,  990.],\n       [1140.,  505., 1211.,  657.],\n       [ 260.,  438.,  332.,  612.],\n       [1413.,  702., 1523.,  887.],\n       [1462.,  472., 1543.,  643.],\n       [1446.,  318., 1516.,  483.],\n       [ 753.,  451.,  821.,  623.],\n       [ 924.,  172.,  983.,  307.],\n       [1791.,  144., 1852.,  275.],\n       [  93.,  132.,  146.,  251.],\n       [ 708.,  240.,  765.,  388.],\n       [ 200.,   44.,  267.,  161.],\n       [1204.,  131., 1255.,  266.],\n       [ 569.,  267.,  628.,  408.],\n       [1163.,  150., 1210.,  280.],\n       [ 799.,   78.,  847.,  204.],\n       [1690.,  152., 1751.,  283.],\n       [ 344.,  495.,  396.,  641.],\n       [1722.,   77., 1782.,  178.]]),\n           mask=None,\n           confidence=array([0.83215541, 0.80572134, 0.7919845 , 0.7912274 , 0.77121079,\n       0.7599591 , 0.75711554, 0.75494027, 0.73076195, 0.71452248,\n       0.69572842, 0.65269446, 0.63952065, 0.62914598, 0.61361706,\n       0.5968492 , 0.55311316, 0.5470854 , 0.54070991, 0.52209878,\n       0.41217673]),\n           class_id=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),\n           tracker_id=None,\n           data={'class_name': array(['person', 'person', 'person', 'person', 'person', 'person',\n       'person', 'person', 'person', 'person', 'person', 'person',\n       'person', 'person', 'person', 'person', 'person', 'person',\n       'person', 'person', 'person'], dtype='<U6')})\n

First, we load our model using the method get_roboflow_model(). Notice how we pass in a model_id? We're using an alias here. This is where we can pass in other models from Roboflow Universe like this rock, paper, scissors model utilizing our roboflow api key.

model = get_roboflow_mode(\n    model_id=\"rock-paper-scissors-sxsw/11\",\n    api_key=\"roboflow_private_api_key\"\n)\n

If you don't have an api key, you can create an free Roboflow account. This model wouldn't be much help with detecting people, but it's a nice exercise to see how our code becomes model agnostic!

We then create a frame_generator object and yeild a single frame for inference using next(). We pass our frame to model.infer() to run inference. After, we pass that data into a little helpfer function called sv.Detections.from_inference() to parse it. Lastly we print our detections to show we are in fact detecting a few people in the frame!

In\u00a0[5]: Copied!
# Create a bounding box annotator object.\nbounding_box = sv.BoundingBoxAnnotator()\n\n# Annotate our frame with detections.\nannotated_frame = bounding_box.annotate(scene=frame.copy(), detections=detections)\n\n# Display the frame.\nsv.plot_image(annotated_frame)\n
# Create a bounding box annotator object. bounding_box = sv.BoundingBoxAnnotator() # Annotate our frame with detections. annotated_frame = bounding_box.annotate(scene=frame.copy(), detections=detections) # Display the frame. sv.plot_image(annotated_frame)

Notice that we create a box_annoator variable by initalizing a BoundingBoxAnnotator. We can change the color and thickness, but for simplicity we keep the defaults. There are a ton of easy to use annotators available in the Supervision package other than a bounding box that are fun to play with.

In\u00a0[\u00a0]: Copied!
from tqdm import tqdm\n\n# Create a video_info object for use in the VideoSink.\nvideo_info = sv.VideoInfo.from_video_path(video_path=path_to_video)\n\n# Create a VideoSink context manager to save our frames.\nwith sv.VideoSink(target_path=\"output.mp4\", video_info=video_info) as sink:\n\n    # Iterate through frames yielded from the frame_generator.\n    for frame in tqdm(frame_generator, total=video_info.total_frames):\n\n        # Run inference on our frame.\n        result = model.infer(frame)[0]\n\n        # Parse the result into the detections data model.\n        detections = sv.Detections.from_inference(result)\n\n        # Apply bounding box to detections on a copy of the frame.\n        annotated_frame = bounding_box.annotate(\n            scene=frame.copy(),\n            detections=detections\n        )\n\n        # Write the annotated frame to the video sink.\n        sink.write_frame(frame=annotated_frame)\n
from tqdm import tqdm # Create a video_info object for use in the VideoSink. video_info = sv.VideoInfo.from_video_path(video_path=path_to_video) # Create a VideoSink context manager to save our frames. with sv.VideoSink(target_path=\"output.mp4\", video_info=video_info) as sink: # Iterate through frames yielded from the frame_generator. for frame in tqdm(frame_generator, total=video_info.total_frames): # Run inference on our frame. result = model.infer(frame)[0] # Parse the result into the detections data model. detections = sv.Detections.from_inference(result) # Apply bounding box to detections on a copy of the frame. annotated_frame = bounding_box.annotate( scene=frame.copy(), detections=detections ) # Write the annotated frame to the video sink. sink.write_frame(frame=annotated_frame)

In the code above we've created avideo_info variable to pass information about the video to our VideoSink. The VideoSink is a cool little context manager that allows us to write_frames() to a video ouput file. We're also optionally using tqdm to display a progress bar with a % complete. We only scratched the surface of all of the customizable Annotators and additional features that Supervision and Inference have to offer. Stay tuned for more cookbooks on how to take advantge of them in your computer vision applications. Happy building! \ud83d\ude80

"},{"location":"notebooks/annotate-video-with-detections/#annotate-video-with-detections","title":"Annotate Video with Detections\u00b6","text":"

One of the most common requirements of computer vision applications is detecting objects in images and displaying bounding boxes around those objects. In this cookbook we'll walk through the steps on how to utilize the open source Roboflow ecosystem to accomplish this task on a video. Let's dive in!

"},{"location":"notebooks/annotate-video-with-detections/#before-you-start","title":"Before you start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/annotate-video-with-detections/#installing-dependencies","title":"Installing Dependencies\u00b6","text":"

In this cookbook we'll be utilizing the open source packages Inference and Supervision to accomplish our goals. Let's get those installed in our notebook with pip.

"},{"location":"notebooks/annotate-video-with-detections/#download-a-video-asset","title":"Download a Video Asset\u00b6","text":"

First, let's download a video that we can detect objects in. Supervision comes with a great utility called Assets to help us hit the ground running. When we run this script, the video is saved in our local directory and can be accessed with the variable path_to_video.

"},{"location":"notebooks/annotate-video-with-detections/#detecting-objects","title":"Detecting Objects\u00b6","text":"

For this example, the objects in the video that we'd like to detect are people. In order to display bounding boxes around the people in the video, we first need a way to detect them. We'll be using the open source Inference package for this task. Inference allows us to quickly use thousands of models, including fine tuned models from Roboflow Universe, with a few lines of code. We'll also utilize a few utilities for working with our video data from the Supervision package.

"},{"location":"notebooks/annotate-video-with-detections/#annotaing-the-frame-with-bounding-boxes","title":"Annotaing the Frame with Bounding Boxes\u00b6","text":"

Now that we're detecting images, let's get to the fun part. Let's annotate the frame and display the bounding boxes on the frame.

"},{"location":"notebooks/annotate-video-with-detections/#saving-bounding-boxes-to-a-video","title":"Saving Bounding Boxes to a Video\u00b6","text":"

Let's wrap up our code by utilizing a VideoSink to draw bounding boxes and save the resulting video. Take a peak at the final code example below. This can take a couple minutes deppending on your runtime and since since we're processing a full video. Feel free to skip ahead to see the resulting video.

"},{"location":"notebooks/count-objects-crossing-the-line/","title":"Count Objects Crossing the Line","text":"

Click the Open in Colab button to run the cookbook on Google Colab.

In\u00a0[1]: Copied!
!nvidia-smi\n
!nvidia-smi
Mon Feb 12 13:03:38 2024       \n+---------------------------------------------------------------------------------------+\n| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n|-----------------------------------------+----------------------+----------------------+\n| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n|                                         |                      |               MIG M. |\n|=========================================+======================+======================|\n|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |\n| N/A   48C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |\n|                                         |                      |                  N/A |\n+-----------------------------------------+----------------------+----------------------+\n                                                                                         \n+---------------------------------------------------------------------------------------+\n| Processes:                                                                            |\n|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n|        ID   ID                                                             Usage      |\n|=======================================================================================|\n|  No running processes found                                                           |\n+---------------------------------------------------------------------------------------+\n

NOTE: To make it easier for us to manage datasets, images and models we create a HOME constant.

In\u00a0[2]: Copied!
import os\nHOME = os.getcwd()\nprint(HOME)\n
import os HOME = os.getcwd() print(HOME)
/content\n
In\u00a0[\u00a0]: Copied!
!pip install -q ultralytics supervision==0.18.0\n
!pip install -q ultralytics supervision==0.18.0 In\u00a0[4]: Copied!
import numpy as np\nimport supervision as sv\n\nfrom ultralytics import YOLO\nfrom supervision.assets import download_assets, VideoAssets\n
import numpy as np import supervision as sv from ultralytics import YOLO from supervision.assets import download_assets, VideoAssets

As an example input video, we will use one of the videos available in supervision.assets. Supervision offers an assets download utility that allows you to download video files that you can use in your demos.

In\u00a0[\u00a0]: Copied!
download_assets(VideoAssets.VEHICLES)\n
download_assets(VideoAssets.VEHICLES)

NOTE: If you want to run the cookbook using your own file as input, simply upload video to Google Colab and replace SOURCE_VIDEO_PATH with the path to your file.

In\u00a0[6]: Copied!
SOURCE_VIDEO_PATH = f\"{HOME}/vehicles.mp4\"\n
SOURCE_VIDEO_PATH = f\"{HOME}/vehicles.mp4\"

As a result of executing the above commands, you will download a video file and save it at the SOURCE_VIDEO_PATH. Keep in mind that the video preview below works only in the web version of the cookbooks and not in Google Colab.

The get_video_frames_generator enables us to easily iterate over video frames. Let's create a video generator for our sample input file and display its first frame on the screen.

In\u00a0[7]: Copied!
generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH)\nframe = next(generator)\n\nsv.plot_image(frame, (12, 12))\n
generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH) frame = next(generator) sv.plot_image(frame, (12, 12))

We can also use VideoInfo.from_video_path to learn basic information about our video, such as duration, resolution, or FPS.

In\u00a0[8]: Copied!
sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)\n
sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH) Out[8]:
VideoInfo(width=3840, height=2160, fps=25, total_frames=538)

We initiate the model and perform detection on the first frame of the video. Then, we convert the result into a sv.Detections object, which will be useful in the later parts of the cookbook.

In\u00a0[9]: Copied!
model = YOLO(\"yolov8x.pt\")\n\nresults = model(frame, verbose=False)[0]\ndetections = sv.Detections.from_ultralytics(results)\n
model = YOLO(\"yolov8x.pt\") results = model(frame, verbose=False)[0] detections = sv.Detections.from_ultralytics(results)
Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x.pt to 'yolov8x.pt'...\n
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 131M/131M [00:00<00:00, 241MB/s]\n

The results we've obtained can be easily visualized with sv.BoundingBoxAnnotator. By default, this annotator uses the same color to highlight objects of the same category. However, with the integration of a tracker, it becomes possible to assign unique colors to each tracked object. We can easily define our own color palettes and adjust parameters such as line thickness, allowing for a highly tailored visualization experience.

In\u00a0[10]: Copied!
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4)\nannotated_frame = bounding_box_annotator.annotate(frame.copy(), detections)\nsv.plot_image(annotated_frame, (12, 12))\n
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4) annotated_frame = bounding_box_annotator.annotate(frame.copy(), detections) sv.plot_image(annotated_frame, (12, 12))

Supervision annotators can be easily combined with one another. Let's enhance our visualization by adding sv.LabelAnnotator, which we will use to mark detections with a label indicating their category and confidence level.

In\u00a0[12]: Copied!
labels = [\n    f\"{results.names[class_id]} {confidence:0.2f}\"\n    for class_id, confidence\n    in zip(detections.class_id, detections.confidence)\n]\n\nbounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4)\nlabel_annotator = sv.LabelAnnotator(text_thickness=4, text_scale=2)\n\nannotated_frame = frame.copy()\nannotated_frame = bounding_box_annotator.annotate(annotated_frame, detections)\nannotated_frame = label_annotator.annotate(annotated_frame, detections, labels)\nsv.plot_image(annotated_frame, (12, 12))\n
labels = [ f\"{results.names[class_id]} {confidence:0.2f}\" for class_id, confidence in zip(detections.class_id, detections.confidence) ] bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4) label_annotator = sv.LabelAnnotator(text_thickness=4, text_scale=2) annotated_frame = frame.copy() annotated_frame = bounding_box_annotator.annotate(annotated_frame, detections) annotated_frame = label_annotator.annotate(annotated_frame, detections, labels) sv.plot_image(annotated_frame, (12, 12))

To set the position of sv.LineZone, we need to define the start and end points. The position of each point is defined as a pair of coordinates (x, y). The origin of the coordinate system is located in the top-left corner of the frame. The x axis runs from left to right, and the y axis runs from top to bottom.

I decided to place my line horizontally, at the midpoint of the frame's height. I obtained the full dimensions of the frame using sv.VideoInfo.

In\u00a0[14]: Copied!
sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)\n
sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH) Out[14]:
VideoInfo(width=3840, height=2160, fps=25, total_frames=538)

The line we've created, together with the in_count and out_count, can be elegantly visualized using sv.LineZoneAnnotator. This tool also allows for extensive customization options; we can alter the color of both the line and the text, opt to hide the in/out counts, and adjust the labels. By default, the labels are set to in and out, but they can be tailored to fit the context of our project, providing a clear and intuitive display of object movement across the designated line.

In\u00a0[22]: Copied!
START = sv.Point(0, 1500)\nEND = sv.Point(3840, 1500)\n\nline_zone = sv.LineZone(start=START, end=END)\n\nline_zone_annotator = sv.LineZoneAnnotator(\n    thickness=4,\n    text_thickness=4,\n    text_scale=2)\n\nannotated_frame = frame.copy()\nannotated_frame = line_zone_annotator.annotate(annotated_frame, line_counter=line_zone)\nsv.plot_image(annotated_frame, (12, 12))\n
START = sv.Point(0, 1500) END = sv.Point(3840, 1500) line_zone = sv.LineZone(start=START, end=END) line_zone_annotator = sv.LineZoneAnnotator( thickness=4, text_thickness=4, text_scale=2) annotated_frame = frame.copy() annotated_frame = line_zone_annotator.annotate(annotated_frame, line_counter=line_zone) sv.plot_image(annotated_frame, (12, 12)) In\u00a0[18]: Copied!
byte_tracker = sv.ByteTrack()\n
byte_tracker = sv.ByteTrack()

For an even better visualization, we will add another annotator - sv.TraceAnnotator, which allows for drawing the path traversed by each object over the last few frames. We will use it in combination with sv.BoundingBoxAnnotator and sv.LabelAnnotator, which we became familiar with earlier.

In\u00a0[17]: Copied!
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4)\nlabel_annotator = sv.LabelAnnotator(text_thickness=4, text_scale=2)\ntrace_annotator = sv.TraceAnnotator(thickness=4)\n
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4) label_annotator = sv.LabelAnnotator(text_thickness=4, text_scale=2) trace_annotator = sv.TraceAnnotator(thickness=4)

All the operations we plan to perform for each frame of our video - detection, tracking, annotation, and counting - are encapsulated in a function named callback.

In\u00a0[19]: Copied!
def callback(frame: np.ndarray, index:int) -> np.ndarray:\n    results = model(frame, verbose=False)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    detections = byte_tracker.update_with_detections(detections)\n\n    labels = [\n        f\"#{tracker_id} {model.model.names[class_id]} {confidence:0.2f}\"\n        for confidence, class_id, tracker_id\n        in zip(detections.confidence, detections.class_id, detections.tracker_id)\n    ]\n\n    annotated_frame = frame.copy()\n    annotated_frame = trace_annotator.annotate(\n        scene=annotated_frame,\n        detections=detections)\n    annotated_frame = bounding_box_annotator.annotate(\n        scene=annotated_frame,\n        detections=detections)\n    annotated_frame = label_annotator.annotate(\n        scene=annotated_frame,\n        detections=detections,\n        labels=labels)\n\n    line_zone.trigger(detections)\n\n    return  line_zone_annotator.annotate(annotated_frame, line_counter=line_zone)\n
def callback(frame: np.ndarray, index:int) -> np.ndarray: results = model(frame, verbose=False)[0] detections = sv.Detections.from_ultralytics(results) detections = byte_tracker.update_with_detections(detections) labels = [ f\"#{tracker_id} {model.model.names[class_id]} {confidence:0.2f}\" for confidence, class_id, tracker_id in zip(detections.confidence, detections.class_id, detections.tracker_id) ] annotated_frame = frame.copy() annotated_frame = trace_annotator.annotate( scene=annotated_frame, detections=detections) annotated_frame = bounding_box_annotator.annotate( scene=annotated_frame, detections=detections) annotated_frame = label_annotator.annotate( scene=annotated_frame, detections=detections, labels=labels) line_zone.trigger(detections) return line_zone_annotator.annotate(annotated_frame, line_counter=line_zone)

Finally, we are ready to process our entire video. We will use sv.process_video and pass to it the previously defined SOURCE_VIDEO_PATH, TARGET_VIDEO_PATH, and callback.

In\u00a0[21]: Copied!
TARGET_VIDEO_PATH = f\"{HOME}/count-objects-crossing-the-line-result.mp4\"\n
TARGET_VIDEO_PATH = f\"{HOME}/count-objects-crossing-the-line-result.mp4\" In\u00a0[20]: Copied!
sv.process_video(\n    source_path = SOURCE_VIDEO_PATH,\n    target_path = TARGET_VIDEO_PATH,\n    callback=callback\n)\n
sv.process_video( source_path = SOURCE_VIDEO_PATH, target_path = TARGET_VIDEO_PATH, callback=callback )

Keep in mind that the video preview below works only in the web version of the cookbooks and not in Google Colab.

"},{"location":"notebooks/count-objects-crossing-the-line/#count-objects-crossing-the-line","title":"Count Objects Crossing the Line\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#before-you-start","title":"Before you start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/count-objects-crossing-the-line/#install-required-packages","title":"Install required packages\u00b6","text":"

In this cookbook, we'll leverage two Python packages - ultralytics for running object detection, and supervision for tracking, visualizing detections, and crucially, counting objects that cross a line.

"},{"location":"notebooks/count-objects-crossing-the-line/#imports","title":"Imports\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#download-video","title":"Download video\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#read-single-frame-from-video","title":"Read single frame from video\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#run-object-detection","title":"Run Object Detection\u00b6","text":"

Let's start by running the detection model on the first frame and annotating the results. In this cookbook, we use Ultralytics YOLOv8, but it can be successfully replaced with other models.

"},{"location":"notebooks/count-objects-crossing-the-line/#improve-vizualizations","title":"Improve Vizualizations\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#define-line-position","title":"Define Line Position\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#process-video","title":"Process Video\u00b6","text":""},{"location":"notebooks/download-supervision-assets/","title":"Download Supervision Assets","text":"In\u00a0[10]: Copied!
!pip install -q \"supervision[assets]\"\n
!pip install -q \"supervision[assets]\" In\u00a0[\u00a0]: Copied!
from supervision.assets import download_assets, VideoAssets\n\n# Download the a video of the subway.\npath_to_video = download_assets(VideoAssets.SUBWAY)\n
from supervision.assets import download_assets, VideoAssets # Download the a video of the subway. path_to_video = download_assets(VideoAssets.SUBWAY)

With this, we now have a high quality video asset for use in demos. Let's take a look at what we downloaded. Keep in mind that the video preview below works only in the web version of the cookbooks and not in Google Colab.

We're now equipt with a video asset from Supervision to run some experiments on! For more information on available assets, visit the Supervision API Reference. Happy building!

"},{"location":"notebooks/download-supervision-assets/#download-supervision-assets","title":"Download Supervision Assets\u00b6","text":"

When experimenting with interesting and useful features of the Supervision package, it's important to have some sort of image or video data to experiment with. Luckily for us, Supervision ships with Assets! Assets is a collection of videos that you can utilize to start experimenting with the various features Supervision has to offer. Let's take a look at how to use this resource.

"},{"location":"notebooks/download-supervision-assets/#install-dependencies","title":"Install Dependencies\u00b6","text":""},{"location":"notebooks/download-supervision-assets/#download-a-video","title":"Download a Video\u00b6","text":"

From here we can download and utilize a video asset directly from a python script! Note below that we're utilizing the method download_assets to download the VideoAssets.SUBWAY video to our local directory. This method returns the file path, so we can then utilize this path for additional experimentation. From here, you will see a video asset to experiment with in your local directory.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/","title":"Evaluating Alignment of Text-to-image Diffusion Models","text":"In\u00a0[1]: Copied!
!nvidia-smi\n
!nvidia-smi
Thu Feb 29 18:16:26 2024       \n+---------------------------------------------------------------------------------------+\n| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n|-----------------------------------------+----------------------+----------------------+\n| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n|                                         |                      |               MIG M. |\n|=========================================+======================+======================|\n|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |\n| N/A   46C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |\n|                                         |                      |                  N/A |\n+-----------------------------------------+----------------------+----------------------+\n                                                                                         \n+---------------------------------------------------------------------------------------+\n| Processes:                                                                            |\n|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n|        ID   ID                                                             Usage      |\n|=======================================================================================|\n|  No running processes found                                                           |\n+---------------------------------------------------------------------------------------+\n
In\u00a0[\u00a0]: Copied!
!pip install -q torch diffusers accelerate inference-gpu[yolo-world] dill git+https://github.com/openai/CLIP.git supervision==0.19.0rc5\n
!pip install -q torch diffusers accelerate inference-gpu[yolo-world] dill git+https://github.com/openai/CLIP.git supervision==0.19.0rc5 In\u00a0[\u00a0]: Copied!
import itertools\nimport cv2\nfrom diffusers import StableDiffusionXLPipeline\nimport numpy as np\nfrom PIL import Image\nimport supervision as sv\nimport torch\nfrom inference.models import YOLOWorld\n
import itertools import cv2 from diffusers import StableDiffusionXLPipeline import numpy as np from PIL import Image import supervision as sv import torch from inference.models import YOLOWorld In\u00a0[\u00a0]: Copied!
pipeline = StableDiffusionXLPipeline.from_pretrained(\n    \"stabilityai/stable-diffusion-xl-base-1.0\",\n    torch_dtype=torch.float16,\n    variant=\"fp16\",\n    use_safetensors=True,\n).to(\"cuda\")\n
pipeline = StableDiffusionXLPipeline.from_pretrained( \"stabilityai/stable-diffusion-xl-base-1.0\", torch_dtype=torch.float16, variant=\"fp16\", use_safetensors=True, ).to(\"cuda\")

In this example, we'll focus on generating an image of a black cat playing with a blue ball next to a parked white car. We don't care about the aesthetic aspect of the image.

In\u00a0[41]: Copied!
PROMPT = \"a black cat playing with a blue ball next to a parked white car, wide angle, photorealistic\"\nNEGATIVE_PROMPT = \"low quality, blurred, text, illustration\"\nWIDTH, HEIGHT = 1024, 768\nSEED = 9213799\n\nimage = pipeline(\n    prompt=PROMPT,\n    negative_prompt=NEGATIVE_PROMPT,\n    generator=torch.manual_seed(SEED),\n    width=WIDTH,\n    height=HEIGHT,\n).images[0]\nimage\n
PROMPT = \"a black cat playing with a blue ball next to a parked white car, wide angle, photorealistic\" NEGATIVE_PROMPT = \"low quality, blurred, text, illustration\" WIDTH, HEIGHT = 1024, 768 SEED = 9213799 image = pipeline( prompt=PROMPT, negative_prompt=NEGATIVE_PROMPT, generator=torch.manual_seed(SEED), width=WIDTH, height=HEIGHT, ).images[0] image
  0%|          | 0/50 [00:00<?, ?it/s]
Out[41]:

Not bad! The results seem to be well-aligned with the prompt.

In\u00a0[42]: Copied!
model = YOLOWorld(model_id=\"yolo_world/l\")\n
model = YOLOWorld(model_id=\"yolo_world/l\")

YOLO-World model allows us to define our own set of labels. Let's create it by combining lists of pre-defined colors and objects.

In\u00a0[43]: Copied!
COLORS = [\"green\", \"yellow\", \"black\", \"blue\", \"red\", \"white\", \"orange\"]\nOBJECTS = [\"car\", \"cat\", \"ball\", \"dog\", \"tree\", \"house\", \"person\"]\nCLASSES = [f\"{color} {obj}\" for color, obj in itertools.product(COLORS, OBJECTS)]\nprint(\"Number of labels:\", len(CLASSES))\n
COLORS = [\"green\", \"yellow\", \"black\", \"blue\", \"red\", \"white\", \"orange\"] OBJECTS = [\"car\", \"cat\", \"ball\", \"dog\", \"tree\", \"house\", \"person\"] CLASSES = [f\"{color} {obj}\" for color, obj in itertools.product(COLORS, OBJECTS)] print(\"Number of labels:\", len(CLASSES))
Number of labels: 49\n

Let's feed these labels into our model:

In\u00a0[44]: Copied!
model.set_classes(CLASSES)\n
model.set_classes(CLASSES)

Time to detect some objects!

In\u00a0[45]: Copied!
results = model.infer(image)\n
results = model.infer(image)

We'll convert the results to the sv.Detections format to enable features like filtering or annotations.

In\u00a0[46]: Copied!
detections = sv.Detections.from_inference(results)\n
detections = sv.Detections.from_inference(results)

Speaking of which: we only care about strong detections, so we filter out ones that are below 0.6 confidence.

In\u00a0[47]: Copied!
valid_detections = detections[detections.confidence >= 0.6]\n
valid_detections = detections[detections.confidence >= 0.6]

A quick peek on the detected labels and their score:

In\u00a0[48]: Copied!
labels = [\n    f\"{CLASSES[class_id]} {confidence:0.2f}\"\n    for class_id, confidence\n    in zip(valid_detections.class_id, valid_detections.confidence)\n]\nlabels\n
labels = [ f\"{CLASSES[class_id]} {confidence:0.2f}\" for class_id, confidence in zip(valid_detections.class_id, valid_detections.confidence) ] labels Out[48]:
['blue ball 0.95', 'black cat 0.72', 'white car 0.68']
In\u00a0[49]: Copied!
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=2)\nlabel_annotator = sv.LabelAnnotator(text_thickness=1, text_scale=0.5,text_color=sv.Color.BLACK)\n
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=2) label_annotator = sv.LabelAnnotator(text_thickness=1, text_scale=0.5,text_color=sv.Color.BLACK)

Finally, annotating our image is as simple as calling annotate methods from our annotators:

In\u00a0[50]: Copied!
annotated_image = bounding_box_annotator.annotate(image, valid_detections)\nannotated_image = label_annotator.annotate(annotated_image, valid_detections, labels)\n\nsv.plot_image(annotated_image, (12, 12))\n
annotated_image = bounding_box_annotator.annotate(image, valid_detections) annotated_image = label_annotator.annotate(annotated_image, valid_detections, labels) sv.plot_image(annotated_image, (12, 12)) In\u00a0[51]: Copied!
GROUND_TRUTH = {\"black cat\", \"blue ball\", \"white car\"}\nprediction = {CLASSES[class_id] for class_id in valid_detections.class_id}\n\nprediction.issubset(GROUND_TRUTH)\n
GROUND_TRUTH = {\"black cat\", \"blue ball\", \"white car\"} prediction = {CLASSES[class_id] for class_id in valid_detections.class_id} prediction.issubset(GROUND_TRUTH) Out[51]:
True

Using sv.Detections makes it super easy to do.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#evaluating-alignment-of-text-to-image-diffusion-models","title":"Evaluating Alignment of Text-to-image Diffusion Models\u00b6","text":"

Click the Open in Colab button to run the cookbook on Google Colab.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#introduction","title":"Introduction\u00b6","text":"

It is a common scenario to evaluate text-to-image models for its alignment to the prompt. One way to test it is to use a set of prompts, consisting of number of objects and their basic physical properties (e.g. color), to generate images and manually evaluate the results. This process can be greatly improved using object detection models.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#before-you-start","title":"Before you start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#install-required-packages","title":"Install required packages\u00b6","text":"

In this cookbook, we'll leverage the following Python packages:

  • diffusers for image generation pipelines,
  • inference for running object detection,
  • supervision for visualizing detections.
"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#imports","title":"Imports\u00b6","text":""},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#generating-an-image","title":"Generating an image\u00b6","text":"

We'll use SDXL model to generate our image. Let's initialize our pipeline first:

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#detecting-objects","title":"Detecting objects\u00b6","text":"

Now, let's see how can we detect the objects automatically. For this, we'll use YOLO-World model from inference library.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#visualizing-results","title":"Visualizing results\u00b6","text":"

Now, let's use the power of supervision to visualize them. Our output image is in Pillow format, but annotators can accept the image to be a BGR np.ndarray or pillow's PIL.Image.Image.

Time to define how we want our detections to be visualized. A combination of sv.BoundingBoxAnnotator and sv.LabelAnnotator should be perfect.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#testing-it-automatically","title":"Testing it automatically\u00b6","text":"

We can also test if all requested objects are in the generated image by comparing a set of ground-truth labels with predicted ones:

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#next-steps","title":"Next steps\u00b6","text":"

In this tutorial you learned how to detect and visualize objects for a simple image generation evaluation study.

Having a pipeline capable of evaluating a single image, the natural next step should be to run it on a set of pre-defined scenarios and calculate metrics.

"},{"location":"notebooks/object-tracking/","title":"Object Tracking","text":"In\u00a0[1]: Copied!
!nvidia-smi\n
!nvidia-smi
Fri Feb 23 03:18:02 2024       \n+---------------------------------------------------------------------------------------+\n| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n|-----------------------------------------+----------------------+----------------------+\n| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n|                                         |                      |               MIG M. |\n|=========================================+======================+======================|\n|   0  Tesla V100-SXM2-16GB           Off | 00000000:00:04.0 Off |                    0 |\n| N/A   33C    P0              24W / 300W |      0MiB / 16384MiB |      0%      Default |\n|                                         |                      |                  N/A |\n+-----------------------------------------+----------------------+----------------------+\n                                                                                         \n+---------------------------------------------------------------------------------------+\n| Processes:                                                                            |\n|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n|        ID   ID                                                             Usage      |\n|=======================================================================================|\n|  No running processes found                                                           |\n+---------------------------------------------------------------------------------------+\n
In\u00a0[\u00a0]: Copied!
!pip install -q inference-gpu \"supervision[assets]\"\n
!pip install -q inference-gpu \"supervision[assets]\" In\u00a0[\u00a0]: Copied!
from supervision.assets import download_assets, VideoAssets\n\n# Download a supervision video asset\npath_to_video = download_assets(VideoAssets.PEOPLE_WALKING)\n
from supervision.assets import download_assets, VideoAssets # Download a supervision video asset path_to_video = download_assets(VideoAssets.PEOPLE_WALKING) In\u00a0[4]: Copied!
import supervision as sv\nfrom inference.models.utils import get_roboflow_model\n\n# Load a pre trained yolov8 nano model from Roboflow Inference.\nmodel = get_roboflow_model('yolov8n-640')\n\n# Create a video info object from the video path.\nvideo_info = sv.VideoInfo.from_video_path(path_to_video)\n\n# Create a label annotator for labeling detections with our tracker_id.\nlabel = sv.LabelAnnotator()\n\n# Create a ByteTrack object to track detections.\nbyte_tracker = sv.ByteTrack(frame_rate=video_info.fps)\n\n# Create a frame generator from video path for iteration of frames.\nframe_generator = sv.get_video_frames_generator(path_to_video)\n\n# Grab a frame from the frame_generator.\nframe = next(frame_generator)\n\n# Run inference on the frame by passing it to our model.\nresult = model.infer(frame)[0]\n\n# Convert model results to a supervision detection object.\ndetections = sv.Detections.from_inference(result)\n\n# Update detections with tracker ids fro byte_tracker.\ntracked_detections = byte_tracker.update_with_detections(detections)\n\n# Create labels with tracker_id for label annotator.\nlabels = [ f\"{tracker_id}\" for tracker_id in tracked_detections.tracker_id ]\n\n# Apply label annotator to frame.\nannotated_frame = label.annotate(scene=frame.copy(), detections=tracked_detections, labels=labels)\n\n# Display the frame.\nsv.plot_image(annotated_frame)\n
import supervision as sv from inference.models.utils import get_roboflow_model # Load a pre trained yolov8 nano model from Roboflow Inference. model = get_roboflow_model('yolov8n-640') # Create a video info object from the video path. video_info = sv.VideoInfo.from_video_path(path_to_video) # Create a label annotator for labeling detections with our tracker_id. label = sv.LabelAnnotator() # Create a ByteTrack object to track detections. byte_tracker = sv.ByteTrack(frame_rate=video_info.fps) # Create a frame generator from video path for iteration of frames. frame_generator = sv.get_video_frames_generator(path_to_video) # Grab a frame from the frame_generator. frame = next(frame_generator) # Run inference on the frame by passing it to our model. result = model.infer(frame)[0] # Convert model results to a supervision detection object. detections = sv.Detections.from_inference(result) # Update detections with tracker ids fro byte_tracker. tracked_detections = byte_tracker.update_with_detections(detections) # Create labels with tracker_id for label annotator. labels = [ f\"{tracker_id}\" for tracker_id in tracked_detections.tracker_id ] # Apply label annotator to frame. annotated_frame = label.annotate(scene=frame.copy(), detections=tracked_detections, labels=labels) # Display the frame. sv.plot_image(annotated_frame) In\u00a0[\u00a0]: Copied!
from tqdm import tqdm\n\n# Load a pre trained yolov8 nano model from Roboflow Inference.\nmodel = get_roboflow_model('yolov8n-640')\n\n# Create a video info object from the video path.\nvideo_info = sv.VideoInfo.from_video_path(path_to_video)\n\n# Create a label annotator for labeling detections with our tracker_id.\nlabel = sv.LabelAnnotator()\n\n# Create a ByteTrack object to track detections.\nbyte_tracker = sv.ByteTrack(frame_rate=video_info.fps)\n\n# Create a frame generator from video path for iteration of frames.\nframe_generator = sv.get_video_frames_generator(path_to_video)\n\n# Create a video sink context manager to save resulting video.\nwith sv.VideoSink(target_path=\"output.mp4\", video_info=video_info) as sink:\n\n    # Iterate through frames yielded from the frame_generator.\n    for frame in tqdm(frame_generator, total=video_info.total_frames):\n\n        # Run inference on the frame by passing it to our model.\n        result = model.infer(frame)[0]\n\n        # Convert model results to a supervision detection object.\n        detections = sv.Detections.from_inference(result)\n\n        # Update detections with tracker ids fro byte_tracker.\n        tracked_detections = byte_tracker.update_with_detections(detections)\n\n        # Create labels with tracker_id for label annotator.\n        labels = [ f\"{tracker_id}\" for tracker_id in tracked_detections.tracker_id ]\n\n        # Apply label annotator to frame.\n        annotated_frame = label.annotate(scene=frame.copy(), detections=tracked_detections, labels=labels)\n\n        # Save the annotated frame to an output video.\n        sink.write_frame(frame=annotated_frame)\n
from tqdm import tqdm # Load a pre trained yolov8 nano model from Roboflow Inference. model = get_roboflow_model('yolov8n-640') # Create a video info object from the video path. video_info = sv.VideoInfo.from_video_path(path_to_video) # Create a label annotator for labeling detections with our tracker_id. label = sv.LabelAnnotator() # Create a ByteTrack object to track detections. byte_tracker = sv.ByteTrack(frame_rate=video_info.fps) # Create a frame generator from video path for iteration of frames. frame_generator = sv.get_video_frames_generator(path_to_video) # Create a video sink context manager to save resulting video. with sv.VideoSink(target_path=\"output.mp4\", video_info=video_info) as sink: # Iterate through frames yielded from the frame_generator. for frame in tqdm(frame_generator, total=video_info.total_frames): # Run inference on the frame by passing it to our model. result = model.infer(frame)[0] # Convert model results to a supervision detection object. detections = sv.Detections.from_inference(result) # Update detections with tracker ids fro byte_tracker. tracked_detections = byte_tracker.update_with_detections(detections) # Create labels with tracker_id for label annotator. labels = [ f\"{tracker_id}\" for tracker_id in tracked_detections.tracker_id ] # Apply label annotator to frame. annotated_frame = label.annotate(scene=frame.copy(), detections=tracked_detections, labels=labels) # Save the annotated frame to an output video. sink.write_frame(frame=annotated_frame)

Let's take a look at our resulting video. It will also be created in your current directory with the name output.mp4 Notice how even with a little flicker, we can see the tracker_id on the people walking in the video. With trackers under your belt, there are now a wide variety of use cases you can solve for! Happy building!

"},{"location":"notebooks/object-tracking/#object-tracking","title":"Object Tracking\u00b6","text":"

In some cases, it's important for us to track objects across multiple frames of a video. For example, we may need to figure out the direction a vehicle is moving, or count objects in a frame. Some Supervision Annotators and Tools like LineZone require tracking to be setup. In this cookbook, we'll cover how to get a tracker up and running for use in your computer vision applications.

"},{"location":"notebooks/object-tracking/#what-is-a-tracker","title":"What is a Tracker?\u00b6","text":"

Trackers are a piece of code that identifies objects across frames and assigns them a unique tracker_id. There are a few popular trackers at the time of writing this including ByteTrack and Bot-SORT. Supervision makes using trackers a breeze and comes with ByteTrack built-in.

"},{"location":"notebooks/object-tracking/#before-you-start","title":"Before you start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/object-tracking/#install-dependencies","title":"Install Dependencies\u00b6","text":""},{"location":"notebooks/object-tracking/#download-a-video-asset","title":"Download a Video Asset\u00b6","text":"

Now that we have our enviornment setup, lets download a video that we can detect objects in. Supervision comes with a great utility to help us hit the ground running. We can use the below snippet to he video is save a video asset in our local directory. It can also be accessed with the variable path_to_video for additional application logic.

"},{"location":"notebooks/object-tracking/#tracking-objects-in-a-frame","title":"Tracking Objects in a Frame\u00b6","text":"

Now that we have our video installed, let's get to work on tracking objects. We'll first pull in a model from roboflow Inference to detect people in our video. Then let's create a byte_tracker object that we'll pass our detections to. This will give us a tracker_id. We'll then utilize that tracker id to label our detections with a label_annotator to display the tracker id.

"},{"location":"notebooks/object-tracking/#tracking-objects-in-a-video","title":"Tracking Objects in a Video\u00b6","text":"

Finally, we'll use a utility called VideoSink to save the annotated frames to a video. Let's dive in to the code.

"},{"location":"notebooks/occupancy_analytics/","title":"How To Analyze Occupancy with Supervision","text":"In\u00a0[\u00a0]: Copied!
!nvidia-smi\n
!nvidia-smi In\u00a0[\u00a0]: Copied!
!pip install roboflow supervision==0.19.0 -q\n
!pip install roboflow supervision==0.19.0 -q In\u00a0[\u00a0]: Copied!
VIDEO_PATH = \"/content/parkinglot1080.mov\"\n
VIDEO_PATH = \"/content/parkinglot1080.mov\"

First, let's create a directory to save the video frames

In\u00a0[\u00a0]: Copied!
import os\n\nFRAMES_DIR = \"/content/frames\"\nos.mkdir(FRAMES_DIR)\n
import os FRAMES_DIR = \"/content/frames\" os.mkdir(FRAMES_DIR)

Then, we can use Supervision's get_video_frames_generator function to get, then save, our video frames

In\u00a0[\u00a0]: Copied!
import supervision as sv\nfrom PIL import Image\n\nframes_generator = sv.get_video_frames_generator(VIDEO_PATH)\n\nfor i, frame in enumerate(frames_generator):\n  img = Image.fromarray(frame)\n  img.save(f\"{FRAMES_DIR}/{i}.jpg\")\n\nprint(f\"Saved frames to {FRAMES_DIR}\")\n
import supervision as sv from PIL import Image frames_generator = sv.get_video_frames_generator(VIDEO_PATH) for i, frame in enumerate(frames_generator): img = Image.fromarray(frame) img.save(f\"{FRAMES_DIR}/{i}.jpg\") print(f\"Saved frames to {FRAMES_DIR}\")
Saved frames to /content/frames\n
In\u00a0[\u00a0]: Copied!
# Note: This code block was written by ChatGPT\n\nimport os\nimport random\nfrom PIL import Image\nimport numpy as np\n\n# import shutil\n# shutil.rmtree(\"augmented\")\n\ndef random_crop(img):\n    width, height = img.size\n\n    crop_width = random.randint(int(width * 0.1), int(width * 0.4))\n    crop_height = random.randint(int(height * 0.1), int(height * 0.4))\n\n    left = random.randint(0, width - crop_width)\n    top = random.randint(0, height - crop_height)\n\n    return img.crop((left, top, left + crop_width, top + crop_height))\n\ndef augment_images(source_folder, target_folder, num_images=100):\n    if not os.path.exists(target_folder):\n        os.makedirs(target_folder)\n\n    all_images = [file for file in os.listdir(source_folder) if file.endswith('.jpg')]\n\n    selected_images = np.random.choice(all_images, size=min(num_images, len(all_images)), replace=False)\n\n    for i, filename in enumerate(selected_images):\n        with Image.open(os.path.join(source_folder, filename)) as img:\n            cropped_img = random_crop(img)\n            cropped_img.save(os.path.join(target_folder, f'augmented_{i}.jpg'))\n\n# Paths to the source and target folders\nsource_folder = '/content/frames'\ntarget_folder = '/content/augmented'\n\n# Augment images\naugment_images(source_folder, target_folder)\n
# Note: This code block was written by ChatGPT import os import random from PIL import Image import numpy as np # import shutil # shutil.rmtree(\"augmented\") def random_crop(img): width, height = img.size crop_width = random.randint(int(width * 0.1), int(width * 0.4)) crop_height = random.randint(int(height * 0.1), int(height * 0.4)) left = random.randint(0, width - crop_width) top = random.randint(0, height - crop_height) return img.crop((left, top, left + crop_width, top + crop_height)) def augment_images(source_folder, target_folder, num_images=100): if not os.path.exists(target_folder): os.makedirs(target_folder) all_images = [file for file in os.listdir(source_folder) if file.endswith('.jpg')] selected_images = np.random.choice(all_images, size=min(num_images, len(all_images)), replace=False) for i, filename in enumerate(selected_images): with Image.open(os.path.join(source_folder, filename)) as img: cropped_img = random_crop(img) cropped_img.save(os.path.join(target_folder, f'augmented_{i}.jpg')) # Paths to the source and target folders source_folder = '/content/frames' target_folder = '/content/augmented' # Augment images augment_images(source_folder, target_folder) In\u00a0[\u00a0]: Copied!
# Upload the extracted frames to Roboflow\nimport os\nimport roboflow\n\nrf = roboflow.Roboflow(api_key=\"YOUR_ROBOFLOW_API_KEY\")\nproject = rf.workspace().project(\"parking-lot-occupancy-detection-eoaek\")\n\nfor filename in os.listdir(FRAMES_DIR):\n  img_path = os.path.join(FRAMES_DIR, filename)\n  if os.path.isfile(img_path):\n      project.upload(image_path=img_path)\n
# Upload the extracted frames to Roboflow import os import roboflow rf = roboflow.Roboflow(api_key=\"YOUR_ROBOFLOW_API_KEY\") project = rf.workspace().project(\"parking-lot-occupancy-detection-eoaek\") for filename in os.listdir(FRAMES_DIR): img_path = os.path.join(FRAMES_DIR, filename) if os.path.isfile(img_path): project.upload(image_path=img_path)
loading Roboflow workspace...\nloading Roboflow project...\n
In\u00a0[\u00a0]: Copied!
# PASTE CODE FROM ROBOFLOW HERE\n
# PASTE CODE FROM ROBOFLOW HERE In\u00a0[\u00a0]: Copied!
from roboflow import Roboflow\nimport supervision as sv\nimport numpy as np\nimport cv2\n\nrf = Roboflow(api_key=\"YOUR_ROBOFLOW_API_KEY\") # Get your own API key - This one won't work\nproject = rf.workspace().project(\"parking-lot-occupancy-detection-eoaek\")\nmodel = project.version(\"5\").model\n\ndef callback(x: np.ndarray) -> sv.Detections:\n    result = model.predict(x, confidence=25, overlap=30).json()\n    return sv.Detections.from_inference(result)\n
from roboflow import Roboflow import supervision as sv import numpy as np import cv2 rf = Roboflow(api_key=\"YOUR_ROBOFLOW_API_KEY\") # Get your own API key - This one won't work project = rf.workspace().project(\"parking-lot-occupancy-detection-eoaek\") model = project.version(\"5\").model def callback(x: np.ndarray) -> sv.Detections: result = model.predict(x, confidence=25, overlap=30).json() return sv.Detections.from_inference(result)
loading Roboflow workspace...\nloading Roboflow project...\n
In\u00a0[\u00a0]: Copied!
# Polygons From PolygonZone\n\nzones = [\n    {\n        'name': \"Zone 1\",\n        'polygon': np.array([[229, 50],[-3, 306],[1, 614],[369, 50]]),\n        'max': 32\n    },\n    {\n        'name': 'Zone 2',\n        'polygon': np.array([[465, 46],[177, 574],[401, 578],[609, 46]]),\n        'max': 38\n    },\n    {\n        'name': 'Zone 3',\n        'polygon': np.array([[697, 58],[461, 858],[737, 858],[849, 58]]),\n        'max': 46\n    },\n    {\n        'name': 'Zone 4',\n        'polygon': np.array([[941, 58],[909, 862],[1273, 858],[1137, 58]]),\n        'max': 48\n    },\n    {\n        'name': 'Zone 5',\n        'polygon': np.array([[1229, 46],[1501, 1078],[1889, 1078],[1405, 46]]),\n        'max': 52\n    }\n]\n
# Polygons From PolygonZone zones = [ { 'name': \"Zone 1\", 'polygon': np.array([[229, 50],[-3, 306],[1, 614],[369, 50]]), 'max': 32 }, { 'name': 'Zone 2', 'polygon': np.array([[465, 46],[177, 574],[401, 578],[609, 46]]), 'max': 38 }, { 'name': 'Zone 3', 'polygon': np.array([[697, 58],[461, 858],[737, 858],[849, 58]]), 'max': 46 }, { 'name': 'Zone 4', 'polygon': np.array([[941, 58],[909, 862],[1273, 858],[1137, 58]]), 'max': 48 }, { 'name': 'Zone 5', 'polygon': np.array([[1229, 46],[1501, 1078],[1889, 1078],[1405, 46]]), 'max': 52 } ] In\u00a0[\u00a0]: Copied!
tracker = sv.ByteTrack()\nslicer = sv.InferenceSlicer(\n    callback=callback,\n    slice_wh=(800, 800),\n    overlap_ratio_wh=(0.2, 0.2),\n    thread_workers=10,\n    iou_threshold=0.2\n)\ntriangle_annotator = sv.TriangleAnnotator(\n    base=20,\n    height=20\n)\nheat_map_annotator = sv.HeatMapAnnotator()\n\ndef setup_zones(frame_wh):\n  if zones:\n    for zone in zones:\n      zone['history'] = []\n      zone['PolygonZone'] = sv.PolygonZone(\n          polygon=zone['polygon'],\n          frame_resolution_wh=frame_wh\n      )\n      zone['PolygonZoneAnnotator'] = sv.PolygonZoneAnnotator(\n        zone=zone['PolygonZone'],\n        color=sv.Color.WHITE,\n        thickness=4,\n    )\n\ndef process_frame(frame,heatmap=None):\n    detections = slicer(image=frame)\n    detections = tracker.update_with_detections(detections)\n\n    annotated_frame = frame.copy()\n\n    annotated_frame = triangle_annotator.annotate(\n        scene=annotated_frame,\n        detections=detections\n    )\n\n    if heatmap is None:\n      heatmap = np.full(frame.shape, 255, dtype=np.uint8)\n\n    heat_map_annotator.annotate(\n      scene=heatmap,\n      detections=detections\n    )\n\n    if zones:\n      for zone in zones:\n        zone_presence = zone['PolygonZone'].trigger(detections)\n        zone_present_idxs = [idx for idx, present in enumerate(zone_presence) if present]\n        zone_present = detections[zone_present_idxs]\n\n        zone_count = len(zone_present)\n        zone['history'].append(zone_count)\n\n\n        annotated_frame = zone['PolygonZoneAnnotator'].annotate(\n            scene=annotated_frame,\n            label=f\"{zone['name']}: {zone_count}\"\n        )\n\n        # Heatmap\n        heatmap = zone['PolygonZoneAnnotator'].annotate(\n            scene=heatmap,\n            label=\" \"\n        )\n\n    return annotated_frame, heatmap\n
tracker = sv.ByteTrack() slicer = sv.InferenceSlicer( callback=callback, slice_wh=(800, 800), overlap_ratio_wh=(0.2, 0.2), thread_workers=10, iou_threshold=0.2 ) triangle_annotator = sv.TriangleAnnotator( base=20, height=20 ) heat_map_annotator = sv.HeatMapAnnotator() def setup_zones(frame_wh): if zones: for zone in zones: zone['history'] = [] zone['PolygonZone'] = sv.PolygonZone( polygon=zone['polygon'], frame_resolution_wh=frame_wh ) zone['PolygonZoneAnnotator'] = sv.PolygonZoneAnnotator( zone=zone['PolygonZone'], color=sv.Color.WHITE, thickness=4, ) def process_frame(frame,heatmap=None): detections = slicer(image=frame) detections = tracker.update_with_detections(detections) annotated_frame = frame.copy() annotated_frame = triangle_annotator.annotate( scene=annotated_frame, detections=detections ) if heatmap is None: heatmap = np.full(frame.shape, 255, dtype=np.uint8) heat_map_annotator.annotate( scene=heatmap, detections=detections ) if zones: for zone in zones: zone_presence = zone['PolygonZone'].trigger(detections) zone_present_idxs = [idx for idx, present in enumerate(zone_presence) if present] zone_present = detections[zone_present_idxs] zone_count = len(zone_present) zone['history'].append(zone_count) annotated_frame = zone['PolygonZoneAnnotator'].annotate( scene=annotated_frame, label=f\"{zone['name']}: {zone_count}\" ) # Heatmap heatmap = zone['PolygonZoneAnnotator'].annotate( scene=heatmap, label=\" \" ) return annotated_frame, heatmap In\u00a0[\u00a0]: Copied!
image = cv2.imread(\"./frames/5.jpg\")\nimage_wh = (image.shape[1],image.shape[0])\nsetup_zones(image_wh)\n\nannotated_image, heatmap = process_frame(image)\n\nsv.plot_image(annotated_image)\nsv.plot_image(heatmap)\n
image = cv2.imread(\"./frames/5.jpg\") image_wh = (image.shape[1],image.shape[0]) setup_zones(image_wh) annotated_image, heatmap = process_frame(image) sv.plot_image(annotated_image) sv.plot_image(heatmap)
(1920, 1080)\n
In\u00a0[\u00a0]: Copied!
# Credit to https://matplotlib.org/matplotblog/posts/matplotlib-cyberpunk-style/ for graph styles\n%matplotlib agg\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom PIL import Image\nfrom io import BytesIO\n\ndef generate_graphs(max_frames):\n  plt.ioff()\n  # Plot Styles\n  plt.style.use(\"seaborn-dark\")\n  for param in ['figure.facecolor', 'axes.facecolor', 'savefig.facecolor']:\n      plt.rcParams[param] = '#212946'\n\n  for param in ['text.color', 'axes.labelcolor', 'xtick.color', 'ytick.color']:\n      plt.rcParams[param] = '0.9'\n\n\n  dataframe = pd.DataFrame()\n  graphs = {}\n\n\n  for zone in zones:\n    percentage_history = [(count/zone['max'])*100 for count in zone['history']]\n    dataframe[zone['name']] = percentage_history\n    plt.title(f'{zone[\"name\"]} Usage')\n\n    # Extra Styles\n    fig, ax1 = plt.subplots()\n    ax1.grid(color='#2A3459')\n\n    # Data\n    ax1.plot(zone[\"history\"])\n\n    # Axis Labeling\n    plt.ylabel('Vehicles')\n    plt.ylim(top=zone[\"max\"])\n    plt.xlim(right=max_frames)\n    ax2 = ax1.twinx()\n    ax2.set_ylabel('Occupied Percentage (%)')\n\n    # Export Graph Image\n    buf = BytesIO()\n    fig.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)\n    buf.seek(0)\n    graphs[zone['name']] = Image.open(buf)\n    plt.close(fig)\n\n\n  plt.ioff()\n  dataframe.plot()\n\n  # Axis\n  plt.ylabel('Occupied (%)', fontsize=15)\n  plt.ylim(top=100)\n  plt.xlim(right=max_frames)\n\n  # Export combined\n  buf = BytesIO()\n  plt.savefig(buf, format='png', bbox_inches='tight')\n  buf.seek(0)\n\n  plt.close()\n\n  graphs['combined_percentage'] = Image.open(buf)\n\n  return graphs\n
# Credit to https://matplotlib.org/matplotblog/posts/matplotlib-cyberpunk-style/ for graph styles %matplotlib agg import pandas as pd import matplotlib.pyplot as plt from PIL import Image from io import BytesIO def generate_graphs(max_frames): plt.ioff() # Plot Styles plt.style.use(\"seaborn-dark\") for param in ['figure.facecolor', 'axes.facecolor', 'savefig.facecolor']: plt.rcParams[param] = '#212946' for param in ['text.color', 'axes.labelcolor', 'xtick.color', 'ytick.color']: plt.rcParams[param] = '0.9' dataframe = pd.DataFrame() graphs = {} for zone in zones: percentage_history = [(count/zone['max'])*100 for count in zone['history']] dataframe[zone['name']] = percentage_history plt.title(f'{zone[\"name\"]} Usage') # Extra Styles fig, ax1 = plt.subplots() ax1.grid(color='#2A3459') # Data ax1.plot(zone[\"history\"]) # Axis Labeling plt.ylabel('Vehicles') plt.ylim(top=zone[\"max\"]) plt.xlim(right=max_frames) ax2 = ax1.twinx() ax2.set_ylabel('Occupied Percentage (%)') # Export Graph Image buf = BytesIO() fig.savefig(buf, format='png', bbox_inches='tight', pad_inches=0) buf.seek(0) graphs[zone['name']] = Image.open(buf) plt.close(fig) plt.ioff() dataframe.plot() # Axis plt.ylabel('Occupied (%)', fontsize=15) plt.ylim(top=100) plt.xlim(right=max_frames) # Export combined buf = BytesIO() plt.savefig(buf, format='png', bbox_inches='tight') buf.seek(0) plt.close() graphs['combined_percentage'] = Image.open(buf) return graphs In\u00a0[\u00a0]: Copied!
generate_graphs(400)['combined_percentage']\n
generate_graphs(400)['combined_percentage'] Out[\u00a0]: In\u00a0[\u00a0]: Copied!
VIDEO_PATH = \"/content/parkinglot1080.mov\"\nMAIN_OUTPUT_PATH = \"/content/parkinglot_annotated.mp4\"\nframes_generator = sv.get_video_frames_generator(source_path=VIDEO_PATH)\nvideo_info = sv.VideoInfo.from_video_path(video_path=VIDEO_PATH)\n\nsetup_zones(video_info.resolution_wh)\n\n\nwith sv.VideoSink(target_path=MAIN_OUTPUT_PATH, video_info=video_info) as sink:\n  heatmap = None\n  for i, frame in enumerate(frames_generator):\n    print(f\"Processing frame {i}\")\n\n    # Infer\n    annotated_frame, heatmap = process_frame(frame, heatmap)\n\n    # Save the latest heatmap\n    Image.fromarray(heatmap).save(f\"/content/heatmap/{i}.jpg\")\n\n    # Create Graphs\n    graphs = generate_graphs(video_info.total_frames)\n    graph = graphs[\"combined_percentage\"].convert(\"RGB\")\n    graph.save(f\"/content/graphs/{i}.jpg\")\n\n    # sv.plot_image(annotated_frame)\n\n    # Send as frame to video\n    sink.write_frame(frame=annotated_frame)\n
VIDEO_PATH = \"/content/parkinglot1080.mov\" MAIN_OUTPUT_PATH = \"/content/parkinglot_annotated.mp4\" frames_generator = sv.get_video_frames_generator(source_path=VIDEO_PATH) video_info = sv.VideoInfo.from_video_path(video_path=VIDEO_PATH) setup_zones(video_info.resolution_wh) with sv.VideoSink(target_path=MAIN_OUTPUT_PATH, video_info=video_info) as sink: heatmap = None for i, frame in enumerate(frames_generator): print(f\"Processing frame {i}\") # Infer annotated_frame, heatmap = process_frame(frame, heatmap) # Save the latest heatmap Image.fromarray(heatmap).save(f\"/content/heatmap/{i}.jpg\") # Create Graphs graphs = generate_graphs(video_info.total_frames) graph = graphs[\"combined_percentage\"].convert(\"RGB\") graph.save(f\"/content/graphs/{i}.jpg\") # sv.plot_image(annotated_frame) # Send as frame to video sink.write_frame(frame=annotated_frame)
Processing frame 0\n
Processing frame 1\n
Processing frame 2\n
Processing frame 3\n
Processing frame 4\n
Processing frame 5\n
Processing frame 6\n
Processing frame 7\n
Processing frame 8\n
Processing frame 9\n
Processing frame 10\n
...
In\u00a0[\u00a0]: Copied!
import cv2\ndef create_videos_from_dir(dir,output):\n  images = len(os.listdir(dir))-1\n\n  sample_img_path = os.path.join(dir,f\"1.jpg\")\n  sample_img = cv2.imread(sample_img_path)\n  height, width, channels = sample_img.shape\n  video_info = sv.VideoInfo(width=width,height=height,fps=24,total_frames=images)\n\n  with sv.VideoSink(target_path=output, video_info=video_info) as sink:\n    for i in range(images):\n      path = os.path.join(dir,f\"{i}.jpg\")\n      img = cv2.imread(path)\n      sink.write_frame(frame=img)\n\n# Graphs\ncreate_videos_from_dir(\"/content/graphs\",\"/content/parkinglot_graph.mp4\")\n\n# Heatmap\ncreate_videos_from_dir(\"/content/heatmap\",\"/content/parkinglot_heatmap.mp4\")\n
import cv2 def create_videos_from_dir(dir,output): images = len(os.listdir(dir))-1 sample_img_path = os.path.join(dir,f\"1.jpg\") sample_img = cv2.imread(sample_img_path) height, width, channels = sample_img.shape video_info = sv.VideoInfo(width=width,height=height,fps=24,total_frames=images) with sv.VideoSink(target_path=output, video_info=video_info) as sink: for i in range(images): path = os.path.join(dir,f\"{i}.jpg\") img = cv2.imread(path) sink.write_frame(frame=img) # Graphs create_videos_from_dir(\"/content/graphs\",\"/content/parkinglot_graph.mp4\") # Heatmap create_videos_from_dir(\"/content/heatmap\",\"/content/parkinglot_heatmap.mp4\") In\u00a0[\u00a0]: Copied!
import pickle\n\nwith open('parkinglot_zonedata.pkl', 'wb') as outp:\n  pickle.dump(zones, outp, pickle.HIGHEST_PROTOCOL)\n
import pickle with open('parkinglot_zonedata.pkl', 'wb') as outp: pickle.dump(zones, outp, pickle.HIGHEST_PROTOCOL) In\u00a0[\u00a0]: Copied!
with open('parkinglot_zonedata.pkl', 'rb') as inp:\n    zones_imported = pickle.load(inp)\n    zones = zones_imported\n
with open('parkinglot_zonedata.pkl', 'rb') as inp: zones_imported = pickle.load(inp) zones = zones_imported In\u00a0[\u00a0]: Copied!
import statistics\nfor zone in zones:\n    occupancy_percent_history = [(count/zone['max'])*100 for count in zone['history']]\n    average_occupancy = round(statistics.mean(occupancy_percent_history))\n    median_occupancy = round(statistics.median(occupancy_percent_history))\n    highest_occupancy = round(max(occupancy_percent_history))\n    lowest_occupancy = round(min(occupancy_percent_history))\n    print(f\"{zone['name']} had an average occupancy of {average_occupancy}% with a median occupancy of {median_occupancy}%.\")\n
import statistics for zone in zones: occupancy_percent_history = [(count/zone['max'])*100 for count in zone['history']] average_occupancy = round(statistics.mean(occupancy_percent_history)) median_occupancy = round(statistics.median(occupancy_percent_history)) highest_occupancy = round(max(occupancy_percent_history)) lowest_occupancy = round(min(occupancy_percent_history)) print(f\"{zone['name']} had an average occupancy of {average_occupancy}% with a median occupancy of {median_occupancy}%.\")
Zone 1 had an average occupancy of 60% with a median occupancy of 59%.\nZone 2 had an average occupancy of 69% with a median occupancy of 68%.\nZone 3 had an average occupancy of 85% with a median occupancy of 85%.\nZone 4 had an average occupancy of 85% with a median occupancy of 85%.\nZone 5 had an average occupancy of 91% with a median occupancy of 92%.\n
In\u00a0[\u00a0]: Copied!
lot_history = []\nfor zone in zones:\n    for idx, entry in enumerate(zone['history']):\n      if(idx >= len(lot_history) or len(lot_history)==0): lot_history.append([])\n      lot_history[idx].append(zone['history'][idx]/zone['max'])\n\nlot_occupancy_history = [sum(entry)/len(entry)*100 for entry in lot_history]\n\naverage_occupancy = round(statistics.mean(lot_occupancy_history))\nmedian_occupancy = round(statistics.median(lot_occupancy_history))\nhighest_occupancy = round(max(lot_occupancy_history))\nlowest_occupancy = round(min(lot_occupancy_history))\n\nprint(f\"The entire lot had an average occupancy of {average_occupancy}% with a median occupancy of {median_occupancy}%.\")\n
lot_history = [] for zone in zones: for idx, entry in enumerate(zone['history']): if(idx >= len(lot_history) or len(lot_history)==0): lot_history.append([]) lot_history[idx].append(zone['history'][idx]/zone['max']) lot_occupancy_history = [sum(entry)/len(entry)*100 for entry in lot_history] average_occupancy = round(statistics.mean(lot_occupancy_history)) median_occupancy = round(statistics.median(lot_occupancy_history)) highest_occupancy = round(max(lot_occupancy_history)) lowest_occupancy = round(min(lot_occupancy_history)) print(f\"The entire lot had an average occupancy of {average_occupancy}% with a median occupancy of {median_occupancy}%.\")
The entire lot had an average occupancy of 78% with a median occupancy of 78%.\n
In\u00a0[\u00a0]: Copied!
print(lot_occupancy_history)\n\n# [\n#    ...\n#    73.51691310215338,\n#    73.34063105087132,\n#    73.86694684034501,\n#    ...\n# ]\n
print(lot_occupancy_history) # [ # ... # 73.51691310215338, # 73.34063105087132, # 73.86694684034501, # ... # ]
[0.0, 73.6265622249604, 73.51691310215338, 73.34063105087132, 73.86694684034501, 73.81677961626474, 74.2515622249604, 74.55142873907177, 74.34309540573842, 76.10547585518981, 75.33624508595904, 75.19454468110075, 74.56954468110075, 74.2334462829314, 74.07528017367835, 74.29457841929236, 74.08624508595905, 74.95162970134366, 75.19619491873496, 75.78914363668368, 76.15564307927008, 75.53779410901838, 75.6293272897964, 75.43910989849205, 76.54025846388546, 76.54025846388546, 77.77632312386316, 76.71654051516751, 77.70969019538813, 78.09430558000352, 77.13320718183418, 78.25247168925658, 79.48853634923428, 78.42875374053864, 78.28452297130787, 77.51782256644957, 78.28452297130787, 77.65952297130787, 78.60250542744822, 79.33715455025524, 79.33715455025524, 78.81083876078155, 78.66913835592325, 78.57045414539694, 78.04413835592325, 77.70803995775391, 77.60935574722761, 77.08303995775391, 77.70803995775391, 77.20662442058324, 78.11755559467231, 77.20662442058324, 77.20662442058324, 78.02602241389427, 77.20662442058324, 77.20662442058324, 78.21623980519861, 77.20662442058324, 76.53860822625124, 76.68030863110955, 76.92069324649417, 77.81768908056092, 76.68030863110955, 79.78422226133897, 79.01752185648066, 78.7315906823916, 79.25790647186528, 78.7315906823916, 78.1065906823916, 77.79957313853194, 77.08303995775391, 76.90675790647185, 77.64140702927888, 77.94842457313852, 77.2707269846858, 78.92345831133017, 78.5067916446635, 76.77811271489762, 78.07365927360208, 78.42622337616618, 79.27767265152849, 79.61212081206361, 81.54146863815058, 81.01515284867688, 80.1472378689198, 80.7541219268908, 79.0098574194684, 80.16117320894209, 79.74450654227543, 78.78340814410608, 79.88873731150619, 80.01947280408379, 80.35392096461891, 79.69851992020185, 79.31390453558646, 79.69851992020185, 80.08313530481722, 80.32351992020186, 79.69851992020185, 79.66646863815056, 80.49980197148389, 79.8706213694772, 78.86100598486182, 78.68725429795222, 80.46357008742592, 78.49703690664789, 78.35280613741712, 80.70813530481722, 80.27335269612156, 79.23168602945492, 79.5041219268908, 79.55681951534353, 79.50665229126326, 79.6163014140703, 81.9855659214927, 81.41073314557296, 82.03573314557296, 81.45671976764653, 81.3651865868685, 80.03043771636449, 81.374501261515, 81.40908290793874, 81.64946752332337, 82.17578331279704, 83.04534853018835, 82.17578331279704, 81.84386551663441, 80.88276711846505, 82.12979669072347, 80.46775068943262, 81.17034853018833, 80.75368186352169, 79.51761720354398, 81.18428387021063, 80.55928387021063, 79.7259505368773, 79.91616792818166, 80.35095053687732, 79.77446752332335, 79.79258346535234, 79.82463474740362, 80.01485213870797, 79.77446752332335, 79.3578008566567, 78.55651880537464, 78.58857008742592, 79.38985213870797, 79.38985213870797, 78.97318547204131, 78.41481840051634, 78.721835944376, 79.14946752332335, 79.3578008566567, 80.15908290793874, 79.59818547204131, 80.63985213870797, 81.47318547204131, 80.01485213870797, 81.61741624127208, 81.20074957460541, 80.99241624127208, 81.02446752332335, 81.40908290793874, 82.03408290793874, 81.64946752332337, 81.02446752332335, 81.02446752332335, 81.40908290793874, 80.36741624127208, 80.77014756791645, 80.86883177844277, 80.24383177844277, 81.01053218330107, 79.474601009212, 80.48421639382738, 80.85071583641377, 79.7103649592208, 80.52558235052514, 79.99926656105146, 80.85071583641377, 79.26461743824443, 79.16593322771813, 78.36465117643607, 78.78131784310274, 78.78131784310274, 79.19798450976941, 79.74241624127207, 81.20074957460541, 79.93263363257641, 79.38820190107376, 79.88961743824443, 79.67413307516283, 79.56448395235581, 79.14781728568914, 78.63246640849616, 78.2157997418295, 78.74926656105147, 78.36465117643607, 78.78131784310274, 79.16593322771813, 78.9575998943848, 79.5825998943848, 78.22295077157777, 78.73115061902247, 78.62150149621544, 78.1867188875198, 77.77005222085313, 78.20483482954879, 78.1727835474975, 77.78816816288212, 78.09518570674176, 78.20483482954879, 77.71057032212639, 79.0562841049111, 78.62150149621544, 80.52558235052514, 79.47295077157777, 78.11330164877074, 78.28958370005282, 78.00365252596374, 77.20237047468169, 78.20483482954879, 77.58698585929707, 77.58698585929707, 77.6785190400751, 77.6785190400751, 78.58945021416417, 77.61188611160007, 78.12723698879304, 75.51758786598603, 75.08280525729039, 78.24403714134836, 76.3234905826439, 77.37612216159127, 77.23442175673296, 77.76073754620666, 78.30516927770933, 76.52467288622894, 76.03040837880654, 76.04852432083553, 76.86792231414657, 75.85665669189696, 75.90517367834302, 77.24373643137945, 77.00797248137064, 78.44521944493339, 77.77467288622894, 76.7148902775333, 77.53428827084431, 77.72450566214869, 76.1885744880596, 75.53152320600833, 76.01229243677756, 74.26802792935516, 74.34309540573842, 76.700954937511, 75.29575925599954, 74.87194155958458, 75.94565950830253, 75.32065950830254, 75.16249339904945, 75.91360822625126, 75.88155694419997, 77.14967288622894, 77.29137329108724, 76.56603884292672, 75.07609429091121, 76.80642345831133, 76.38975679164466, 77.09950566214869, 77.90078771343074, 77.0674543800974, 76.10635598192805, 77.74515196855015, 78.58945021416417, 78.74761632341725, 77.83668514932818, 77.66205333568033, 78.97153523440709, 76.8426553423693, 77.11762160417767, 76.73300621956228, 76.90928827084433, 78.01758786598603, 77.28972305345303, 76.36485653934166, 77.6924543800974, 78.63543683623774, 77.41748811828903, 76.78152320600834, 75.35524115472627, 77.16360822625126, 77.07207504547323, 75.98024115472629, 76.49559203191927, 75.64414275655695, 76.35389162706097, 75.19542480783899, 76.36485653934166, 76.46354074986797, 77.97838555418647, 77.71057032212639, 78.07706976471279, 77.11597136654345, 78.60338555418647, 78.50470134366016, 79.03816816288212, 80.36576600363784, 76.60777151909876, 77.54360294549082, 76.63267177140176, 79.22838555418647, 77.97838555418647, 78.27146775802383, 80.78243267030452, 76.76593762835181, 78.57430469987679, 78.1757539752391, 77.35635598192806, 77.35635598192806, 78.74508595904477, 77.8341547849557, 78.86188611160007, 76.89117232881536, 77.55918852314733, 78.36762160417766, 77.19818987267499, 77.63297248137064, 77.63297248137064, 76.43992401572494, 76.92322361086663, 77.43142345831131, 76.48844100217099, 75.82207504547321, 79.310604060318, 79.310604060318, 78.78428827084433, 79.2119198497917, 77.63297248137064, 78.25082145162237, 79.07021944493339, 79.07021944493339, 78.21877016957109, 76.34674059731267, 77.84130581470399, 77.25767177140176, 78.02437217626006, 77.49805638678637, 76.48844100217099, 77.06327377809072, 77.06327377809072, 78.9534192923781, 78.3284192923781, 78.12008595904476, 77.49508595904476, 77.33691984979171, 77.67136801032682, 77.26863668368246, 76.71907087953998, 77.22727072698468, 77.12693627882416, 74.78972305345305, 76.03972305345303, 76.71026961215748, 75.5589538226838, 75.65510766883764, 76.08273924778501, 77.04933843806842, 75.62305638678637, 77.04933843806842, 77.5756542275421, 76.56603884292672, 77.2091547849557, 75.52437217626006, 76.98270550959337, 77.74940591445169, 77.99232089420876, 77.60770550959337, 78.42710350290443, 78.04248811828904, 78.3174543800974, 77.17710350290442, 76.68283899548202, 76.5841547849557, 76.5951196972364, 75.27049228422226, 74.43297834888224, 74.86776095757789, 74.51057618963797, 77.22727072698468, 76.21600510473507, 77.48412104676406, 76.84815613448336, 77.04933843806842, 76.9506542275421, 77.99232089420876, 76.98270550959337, 75.52734260400165, 75.8132737780907, 77.93962330575603, 76.74529132195038]\n
In\u00a0[\u00a0]: Copied!
%matplotlib inline\n\nimport matplotlib.pyplot as plt\n\nfig, ax1 = plt.subplots()\nplt.title('Total Lot Usage')\nax1.grid(color='#2A3459')\n\nax1.plot(lot_occupancy_history)\nax1.set_ylabel('Occupied Percentage (%)')\n\nplt.ylim(top=100)\nplt.xlim(right=len(lot_occupancy_history))\n\nplt.show()\n
%matplotlib inline import matplotlib.pyplot as plt fig, ax1 = plt.subplots() plt.title('Total Lot Usage') ax1.grid(color='#2A3459') ax1.plot(lot_occupancy_history) ax1.set_ylabel('Occupied Percentage (%)') plt.ylim(top=100) plt.xlim(right=len(lot_occupancy_history)) plt.show() In\u00a0[\u00a0]: Copied!
import cv2\nimport numpy as np\n\ndef transform_image(image, points):\n    width = max(np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3]))\n    height = max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2]))\n    dest_points = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]], dtype=\"float32\")\n    matrix = cv2.getPerspectiveTransform(points.astype(\"float32\"), dest_points)\n    transformed_image = cv2.warpPerspective(image, matrix, (int(width), int(height)))\n\n    return transformed_image\n\ndef generate_top_down_views(frame,show=True):\n  heatmap = cv2.imread(f\"heatmap/{frame}.jpg\")\n  image = cv2.imread(f\"frames/{frame}.jpg\")\n\n  images = []\n\n  for zone in zones:\n    if show: print(f\"Occupancy Visualization of {zone['name']}\")\n    top_down_image = transform_image(image, zone['polygon'])\n    top_down_heatmap = transform_image(heatmap, zone['polygon'])\n\n    combined_image = cv2.addWeighted(top_down_image, 0.7, top_down_heatmap, 0.3, 0)\n\n    if show: sv.plot_image(combined_image, size=(5,5))\n\n    images.append(combined_image)\n\n  return images\n
import cv2 import numpy as np def transform_image(image, points): width = max(np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3])) height = max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2])) dest_points = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]], dtype=\"float32\") matrix = cv2.getPerspectiveTransform(points.astype(\"float32\"), dest_points) transformed_image = cv2.warpPerspective(image, matrix, (int(width), int(height))) return transformed_image def generate_top_down_views(frame,show=True): heatmap = cv2.imread(f\"heatmap/{frame}.jpg\") image = cv2.imread(f\"frames/{frame}.jpg\") images = [] for zone in zones: if show: print(f\"Occupancy Visualization of {zone['name']}\") top_down_image = transform_image(image, zone['polygon']) top_down_heatmap = transform_image(heatmap, zone['polygon']) combined_image = cv2.addWeighted(top_down_image, 0.7, top_down_heatmap, 0.3, 0) if show: sv.plot_image(combined_image, size=(5,5)) images.append(combined_image) return images In\u00a0[\u00a0]: Copied!
generate_top_down_views(400)\n
generate_top_down_views(400) In\u00a0[\u00a0]: Copied!
import os\nimport numpy as np\nfrom PIL import Image\nimport supervision as sv\n\nfor filename in os.listdir(\"frames\"):\n  img_path = os.path.join(\"frames\", filename)\n  heatmap_path = os.path.join(\"heatmap\", filename)\n  if os.path.isfile(img_path) and os.path.isfile(heatmap_path):\n    frame = int(filename.replace(\".jpg\",\"\"))\n    images = generate_top_down_views(frame,False)\n    gap = 10\n\n    pil_images = [Image.fromarray(image) for image in images]\n\n    # Resize images to have the same width\n    widths, heights = zip(*(i.size for i in pil_images))\n    max_width = max(widths)\n    total_height = sum(heights) + gap * (len(images) - 1)\n    resized_images = [i.resize((max_width, int(i.height * max_width / i.width))) for i in pil_images]\n\n    # Create a new image with the correct combined size\n    combined_image = Image.new('RGB', (max_width, total_height))\n\n    # Paste each image into the combined image with the specified gap\n    y_offset = 0\n    for img in resized_images:\n        combined_image.paste(img, (0, y_offset))\n        y_offset += img.height + gap\n\n    combined_image = combined_image.rotate(90, expand=True)\n\n    combined_image.save(f\"sectionheatmaps/{frame}.jpg\")\n\n    sv.plot_image(np.array(combined_image))\n
import os import numpy as np from PIL import Image import supervision as sv for filename in os.listdir(\"frames\"): img_path = os.path.join(\"frames\", filename) heatmap_path = os.path.join(\"heatmap\", filename) if os.path.isfile(img_path) and os.path.isfile(heatmap_path): frame = int(filename.replace(\".jpg\",\"\")) images = generate_top_down_views(frame,False) gap = 10 pil_images = [Image.fromarray(image) for image in images] # Resize images to have the same width widths, heights = zip(*(i.size for i in pil_images)) max_width = max(widths) total_height = sum(heights) + gap * (len(images) - 1) resized_images = [i.resize((max_width, int(i.height * max_width / i.width))) for i in pil_images] # Create a new image with the correct combined size combined_image = Image.new('RGB', (max_width, total_height)) # Paste each image into the combined image with the specified gap y_offset = 0 for img in resized_images: combined_image.paste(img, (0, y_offset)) y_offset += img.height + gap combined_image = combined_image.rotate(90, expand=True) combined_image.save(f\"sectionheatmaps/{frame}.jpg\") sv.plot_image(np.array(combined_image))"},{"location":"notebooks/occupancy_analytics/#how-to-analyze-occupancy-with-supervision","title":"How To Analyze Occupancy with Supervision\u00b6","text":"

In this notebook, we'll use a parking lot to demonstrate how we can extract numerous informative metrics and detailed graphics, all from one video, using Supervision.

This notebook accompanies the Occupancy Analytics with Computer Vision tutorial on the Roboflow Blog. Check it out for deeper explanations and context!

In this notebook, we will cover the following:

  1. Getting training data
  2. Training a object detection model
  3. Detect vehicles
  4. Analyze data and generate statistics
"},{"location":"notebooks/occupancy_analytics/#before-you-start","title":"Before You Start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/occupancy_analytics/#install-relevant-packages","title":"Install Relevant Packages\u00b6","text":"

Here, we will install the Roboflow package, for uploading and training our model, and Supervision for visualization and extracting metrics from our predicted model results.

"},{"location":"notebooks/occupancy_analytics/#getting-video-data","title":"Getting Video Data\u00b6","text":"

We will start with turning a single video into a folder of frame images, for training our model. Upload your video and set your video's file path here.

"},{"location":"notebooks/occupancy_analytics/#random-crop-sampling-if-using-sahi","title":"Random Crop Sampling (If Using SAHI)\u00b6","text":"

If we are using SAHI (which we are in our example), randomly sampling cropped portions of our image can help mimic the effect of SAHI detection during training, improving performance.

"},{"location":"notebooks/occupancy_analytics/#training-a-model","title":"Training a Model\u00b6","text":"

Now that we have our images, we can upload our extracted frames as training data to Roboflow.

"},{"location":"notebooks/occupancy_analytics/#upload-training-data","title":"Upload Training Data\u00b6","text":""},{"location":"notebooks/occupancy_analytics/#training-model-using-autodistill-optional","title":"Training Model Using Autodistill (Optional)\u00b6","text":"

We can train our model using Automated Labeling, powered by Autodistill, to automatically label our data. Copy the code required for this section from the Roboflow app.

Note: It's not required to use Autodistill

"},{"location":"notebooks/occupancy_analytics/#vehicle-detection","title":"Vehicle Detection\u00b6","text":"

Now, we can run our model to get inference data for our video data.

"},{"location":"notebooks/occupancy_analytics/#setup-model","title":"Setup Model\u00b6","text":"

First, set the model up as a callback function so that we can call it later on while using Supervision.

"},{"location":"notebooks/occupancy_analytics/#configure-zones","title":"Configure Zones\u00b6","text":"

Next, we will set up a list of the zones to be used with PolygonZone. You can get these polygon coordinates using this web utility.

For our example, we have have zones, but you can add as many or as little zones as you would like.

"},{"location":"notebooks/occupancy_analytics/#setup-supervision","title":"Setup Supervision\u00b6","text":"

For our use case, we will use the following features of Supervision. Refer to the linked documentation for more details:

  • ByteTrack: To track the location of our vehicles, so we can assess how long they are parked
  • InferenceSlicer: A helper utility to run SAHI on our model
  • TriangleAnnotator: To help visualize the locations of the vehicles
  • HeatMapAnnotator: To generate heatmaps so we can identify our busiest areas
  • PolygonZone, PolygonZoneAnnotator: To help count and identify vehicles in our respective zones and the annotator to help visualize those zones.
"},{"location":"notebooks/occupancy_analytics/#try-with-a-single-image","title":"Try With a Single Image\u00b6","text":""},{"location":"notebooks/occupancy_analytics/#setup-graphs","title":"Setup Graphs\u00b6","text":"

Before we run the model on the entire video, we will set up the logic to generate our graphs using matplotlib.

"},{"location":"notebooks/occupancy_analytics/#process-video","title":"Process Video\u00b6","text":"

Now, we can process the video to get detections from the entire video.

"},{"location":"notebooks/occupancy_analytics/#generate-graphsheatmap-video-optional","title":"Generate Graphs/Heatmap Video (optional)\u00b6","text":""},{"location":"notebooks/occupancy_analytics/#analyze-data","title":"Analyze Data\u00b6","text":"

Lastly, we can analyze the data we got to extract quantitative metrics from our video.

"},{"location":"notebooks/occupancy_analytics/#save-your-data-for-later","title":"Save your data for later\u00b6","text":"

Using Pickle, we can save our zone detection data so that we can load it in for later analysis. Remember to download your file from the Colab file manager.

"},{"location":"notebooks/occupancy_analytics/#import-your-data","title":"Import your data\u00b6","text":"

To load your data back in, upload the saved file to the Colab enviorment and run the code cell.

"},{"location":"notebooks/occupancy_analytics/#occupancy-per-section","title":"Occupancy Per Section\u00b6","text":"

Since we recorded the number of objects (vehicles) in each zone, we can compare that against our hardcoded max that we put in while setting up our zones. Using this data, we can calculate the average and median occupancy, as well as any other metrics such as the max or the minimum occupancy throughout that time period.

"},{"location":"notebooks/occupancy_analytics/#total-occupancy","title":"Total Occupancy\u00b6","text":"

Using the occupancy for the zones, we can also add up all the occupancy metrics throughout all the zones in order to calculate metrics for the whole parking lot.

"},{"location":"notebooks/occupancy_analytics/#busy-areas","title":"Busy Areas\u00b6","text":"

Using Supervision's heat map annotator, we can use heatmaps while transforming the images in order to create images on top-down views of each zone.

"},{"location":"notebooks/quickstart/","title":"Supervision Quickstart","text":"In\u00a0[\u00a0]: Copied!
!nvidia-smi\n
!nvidia-smi
Tue Jun 13 13:06:22 2023       \n+-----------------------------------------------------------------------------+\n| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |\n|-------------------------------+----------------------+----------------------+\n| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n|                               |                      |               MIG M. |\n|===============================+======================+======================|\n|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n| N/A   52C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |\n|                               |                      |                  N/A |\n+-------------------------------+----------------------+----------------------+\n                                                                               \n+-----------------------------------------------------------------------------+\n| Processes:                                                                  |\n|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n|        ID   ID                                                   Usage      |\n|=============================================================================|\n|  No running processes found                                                 |\n+-----------------------------------------------------------------------------+\n

NOTE: To make it easier for us to manage datasets, images and models we create a HOME constant.

In\u00a0[\u00a0]: Copied!
import os\n\nHOME = os.getcwd()\nprint(HOME)\n
import os HOME = os.getcwd() print(HOME)
/content\n

NOTE: During our demo, we will need some example images.

In\u00a0[\u00a0]: Copied!
!mkdir {HOME}/images\n
!mkdir {HOME}/images

NOTE: Feel free to use your images. Just make sure to put them into images directory that we just created. \u261d\ufe0f

In\u00a0[\u00a0]: Copied!
%cd {HOME}/images\n\n!wget -q https://media.roboflow.com/notebooks/examples/dog.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-2.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-3.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-4.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-5.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-6.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-7.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-8.jpeg\n
%cd {HOME}/images !wget -q https://media.roboflow.com/notebooks/examples/dog.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-2.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-3.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-4.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-5.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-6.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-7.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-8.jpeg
/content/images\n
In\u00a0[\u00a0]: Copied!
!pip install -q supervision\n\nimport supervision as sv\n\nprint(sv.__version__)\n
!pip install -q supervision import supervision as sv print(sv.__version__)
     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 0.0/45.4 kB ? eta -:--:--\r     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 45.4/45.4 kB 3.4 MB/s eta 0:00:00\n0.17.0\n
In\u00a0[\u00a0]: Copied!
import cv2\n\nIMAGE_PATH = f\"{HOME}/images/dog.jpeg\"\n\nimage = cv2.imread(IMAGE_PATH)\n
import cv2 IMAGE_PATH = f\"{HOME}/images/dog.jpeg\" image = cv2.imread(IMAGE_PATH) In\u00a0[\u00a0]: Copied!
!pip install -q super-gradients\n
!pip install -q super-gradients In\u00a0[\u00a0]: Copied!
from super_gradients.training import models\n\nmodel = models.get(\"yolo_nas_l\", pretrained_weights=\"coco\")\nresult = model.predict(image)\ndetections = sv.Detections.from_yolo_nas(result)\n
from super_gradients.training import models model = models.get(\"yolo_nas_l\", pretrained_weights=\"coco\") result = model.predict(image) detections = sv.Detections.from_yolo_nas(result) In\u00a0[\u00a0]: Copied!
\"detections\", len(detections)\n
\"detections\", len(detections) Out[\u00a0]:
('detections', 7)
In\u00a0[\u00a0]: Copied!
!pip install -q ultralytics\n
!pip install -q ultralytics
     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 0.0/599.6 kB ? eta -:--:--\r     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2578 593.9/599.6 kB 19.5 MB/s eta 0:00:01\r     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 599.6/599.6 kB 14.3 MB/s eta 0:00:00\n
In\u00a0[\u00a0]: Copied!
from ultralytics import YOLO\n\nmodel = YOLO(\"yolov8s.pt\")\nresult = model(image, verbose=False)[0]\ndetections = sv.Detections.from_ultralytics(result)\n
from ultralytics import YOLO model = YOLO(\"yolov8s.pt\") result = model(image, verbose=False)[0] detections = sv.Detections.from_ultralytics(result) In\u00a0[\u00a0]: Copied!
\"detections\", len(detections)\n
\"detections\", len(detections) Out[\u00a0]:
('detections', 4)
In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\n\nannotated_image = box_annotator.annotate(image.copy(), detections=detections)\n\nsv.plot_image(image=annotated_image, size=(8, 8))\n
box_annotator = sv.BoxAnnotator() annotated_image = box_annotator.annotate(image.copy(), detections=detections) sv.plot_image(image=annotated_image, size=(8, 8))

NOTE: By default sv.BoxAnnotator use corresponding class_id as label, however, the labels can have arbitrary format.

In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\n\nlabels = [\n    f\"{model.model.names[class_id]} {confidence:.2f}\"\n    for class_id, confidence in zip(detections.class_id, detections.confidence)\n]\nannotated_image = box_annotator.annotate(\n    image.copy(), detections=detections, labels=labels\n)\n\nsv.plot_image(image=annotated_image, size=(8, 8))\n
box_annotator = sv.BoxAnnotator() labels = [ f\"{model.model.names[class_id]} {confidence:.2f}\" for class_id, confidence in zip(detections.class_id, detections.confidence) ] annotated_image = box_annotator.annotate( image.copy(), detections=detections, labels=labels ) sv.plot_image(image=annotated_image, size=(8, 8)) In\u00a0[\u00a0]: Copied!
mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)\n\nannotated_image = mask_annotator.annotate(image.copy(), detections=detections)\n\nsv.plot_image(image=annotated_image, size=(8, 8))\n
mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX) annotated_image = mask_annotator.annotate(image.copy(), detections=detections) sv.plot_image(image=annotated_image, size=(8, 8)) In\u00a0[\u00a0]: Copied!
detections_index = detections[0]\ndetections_index_list = detections[[0, 1, 3]]\ndetections_index_slice = detections[:2]\n
detections_index = detections[0] detections_index_list = detections[[0, 1, 3]] detections_index_slice = detections[:2] In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\n\nimages = [\n    box_annotator.annotate(image.copy(), detections=d)\n    for d in [detections_index, detections_index_list, detections_index_slice]\n]\ntitles = [\n    \"by index - detections[0]\",\n    \"by index list - detections[[0, 1, 3]]\",\n    \"by index slice - detections[:2]\",\n]\n\nsv.plot_images_grid(images=images, titles=titles, grid_size=(1, 3))\n
box_annotator = sv.BoxAnnotator() images = [ box_annotator.annotate(image.copy(), detections=d) for d in [detections_index, detections_index_list, detections_index_slice] ] titles = [ \"by index - detections[0]\", \"by index list - detections[[0, 1, 3]]\", \"by index slice - detections[:2]\", ] sv.plot_images_grid(images=images, titles=titles, grid_size=(1, 3)) In\u00a0[\u00a0]: Copied!
detections_filtered = detections[detections.class_id == 0]\n
detections_filtered = detections[detections.class_id == 0] In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\nannotated_image = box_annotator.annotate(image.copy(), detections=detections_filtered)\nsv.plot_image(image=annotated_image, size=(8, 8))\n
box_annotator = sv.BoxAnnotator() annotated_image = box_annotator.annotate(image.copy(), detections=detections_filtered) sv.plot_image(image=annotated_image, size=(8, 8)) In\u00a0[\u00a0]: Copied!
detections_filtered = detections[detections.confidence > 0.75]\n
detections_filtered = detections[detections.confidence > 0.75] In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\nlabels = [\n    f\"{model.model.names[class_id]} {confidence:.2f}\"\n    for class_id, confidence in zip(\n        detections_filtered.class_id, detections_filtered.confidence\n    )\n]\nannotated_image = box_annotator.annotate(\n    image.copy(), detections=detections_filtered, labels=labels\n)\nsv.plot_image(image=annotated_image, size=(8, 8))\n
box_annotator = sv.BoxAnnotator() labels = [ f\"{model.model.names[class_id]} {confidence:.2f}\" for class_id, confidence in zip( detections_filtered.class_id, detections_filtered.confidence ) ] annotated_image = box_annotator.annotate( image.copy(), detections=detections_filtered, labels=labels ) sv.plot_image(image=annotated_image, size=(8, 8)) In\u00a0[\u00a0]: Copied!
detections_filtered = detections[\n    (detections.class_id != 0) & (detections.confidence > 0.75)\n]\n
detections_filtered = detections[ (detections.class_id != 0) & (detections.confidence > 0.75) ] In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\nlabels = [\n    f\"{class_id} {confidence:.2f}\"\n    for class_id, confidence in zip(\n        detections_filtered.class_id, detections_filtered.confidence\n    )\n]\nannotated_image = box_annotator.annotate(\n    image.copy(), detections=detections_filtered, labels=labels\n)\nsv.plot_image(image=annotated_image, size=(8, 8))\n
box_annotator = sv.BoxAnnotator() labels = [ f\"{class_id} {confidence:.2f}\" for class_id, confidence in zip( detections_filtered.class_id, detections_filtered.confidence ) ] annotated_image = box_annotator.annotate( image.copy(), detections=detections_filtered, labels=labels ) sv.plot_image(image=annotated_image, size=(8, 8))

NOTE: During our demo, we will need some example videos.

In\u00a0[\u00a0]: Copied!
!pip install -q supervision[assets]\n
!pip install -q supervision[assets] In\u00a0[\u00a0]: Copied!
!mkdir {HOME}/videos\n
!mkdir {HOME}/videos

NOTE: Feel free to use your videos. Just make sure to put them into videos directory that we just created. \u261d\ufe0f

In\u00a0[\u00a0]: Copied!
%cd {HOME}/videos\n
%cd {HOME}/videos In\u00a0[\u00a0]: Copied!
from supervision.assets import download_assets, VideoAssets\n\ndownload_assets(VideoAssets.VEHICLES)\nVIDEO_PATH = f\"{HOME}/videos/vehicle.mp4\"\n
from supervision.assets import download_assets, VideoAssets download_assets(VideoAssets.VEHICLES) VIDEO_PATH = f\"{HOME}/videos/vehicle.mp4\" In\u00a0[\u00a0]: Copied!
sv.VideoInfo.from_video_path(video_path=VIDEO_PATH)\n
sv.VideoInfo.from_video_path(video_path=VIDEO_PATH) Out[\u00a0]:
VideoInfo(width=3840, height=2160, fps=25, total_frames=538)
In\u00a0[\u00a0]: Copied!
frame_generator = sv.get_video_frames_generator(source_path=VIDEO_PATH)\n
frame_generator = sv.get_video_frames_generator(source_path=VIDEO_PATH) In\u00a0[\u00a0]: Copied!
frame = next(iter(frame_generator))\nsv.plot_image(image=frame, size=(8, 8))\n
frame = next(iter(frame_generator)) sv.plot_image(image=frame, size=(8, 8)) In\u00a0[\u00a0]: Copied!
RESULT_VIDEO_PATH = f\"{HOME}/videos/vehicle-counting-result.mp4\"\n
RESULT_VIDEO_PATH = f\"{HOME}/videos/vehicle-counting-result.mp4\"

NOTE: Note that this time we have given a custom value for the stride parameter equal to 2. As a result, get_video_frames_generator will return us every second video frame.

In\u00a0[\u00a0]: Copied!
video_info = sv.VideoInfo.from_video_path(video_path=VIDEO_PATH)\n\nwith sv.VideoSink(target_path=RESULT_VIDEO_PATH, video_info=video_info) as sink:\n    for frame in sv.get_video_frames_generator(source_path=VIDEO_PATH, stride=2):\n        sink.write_frame(frame=frame)\n
video_info = sv.VideoInfo.from_video_path(video_path=VIDEO_PATH) with sv.VideoSink(target_path=RESULT_VIDEO_PATH, video_info=video_info) as sink: for frame in sv.get_video_frames_generator(source_path=VIDEO_PATH, stride=2): sink.write_frame(frame=frame)

NOTE: If we once again use VideoInfo we will notice that the final video has 2 times fewer frames.

In\u00a0[\u00a0]: Copied!
sv.VideoInfo.from_video_path(video_path=RESULT_VIDEO_PATH)\n
sv.VideoInfo.from_video_path(video_path=RESULT_VIDEO_PATH) Out[\u00a0]:
VideoInfo(width=3840, height=2160, fps=25, total_frames=269)
In\u00a0[\u00a0]: Copied!
!pip install -q roboflow\n
!pip install -q roboflow In\u00a0[\u00a0]: Copied!
!mkdir {HOME}/datasets\n%cd {HOME}/datasets\n\nimport roboflow\nfrom roboflow import Roboflow\n\nroboflow.login()\n\nrf = Roboflow()\n\nproject = rf.workspace(\"roboflow-jvuqo\").project(\"fashion-assistant-segmentation\")\ndataset = project.version(5).download(\"yolov8\")\n
!mkdir {HOME}/datasets %cd {HOME}/datasets import roboflow from roboflow import Roboflow roboflow.login() rf = Roboflow() project = rf.workspace(\"roboflow-jvuqo\").project(\"fashion-assistant-segmentation\") dataset = project.version(5).download(\"yolov8\")
/content/datasets\n\rvisit https://app.roboflow.com/auth-cli to get your authentication token.\nPaste the authentication token here: \u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\nloading Roboflow workspace...\nloading Roboflow project...\nDependency ultralytics<=8.0.20 is required but found version=8.0.117, to fix: `pip install ultralytics<=8.0.20`\nDownloading Dataset Version Zip in fashion-assistant-segmentation-5 to yolov8: 100% [125448709 / 125448709] bytes\n
Extracting Dataset Version Zip to fashion-assistant-segmentation-5 in yolov8:: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1254/1254 [00:00<00:00, 3283.33it/s]\n
In\u00a0[\u00a0]: Copied!
ds = sv.DetectionDataset.from_yolo(\n    images_directory_path=f\"{dataset.location}/train/images\",\n    annotations_directory_path=f\"{dataset.location}/train/labels\",\n    data_yaml_path=f\"{dataset.location}/data.yaml\",\n)\n
ds = sv.DetectionDataset.from_yolo( images_directory_path=f\"{dataset.location}/train/images\", annotations_directory_path=f\"{dataset.location}/train/labels\", data_yaml_path=f\"{dataset.location}/data.yaml\", ) In\u00a0[\u00a0]: Copied!
len(ds)\n
len(ds) Out[\u00a0]:
573
In\u00a0[\u00a0]: Copied!
ds.classes\n
ds.classes Out[\u00a0]:
['baseball cap',\n 'hoodie',\n 'jacket',\n 'pants',\n 'shirt',\n 'shorts',\n 'sneaker',\n 'sunglasses',\n 'sweatshirt',\n 'tshirt']
In\u00a0[\u00a0]: Copied!
IMAGE_NAME = list(ds.images.keys())[0]\n\nimage = ds.images[IMAGE_NAME]\nannotations = ds.annotations[IMAGE_NAME]\n\nbox_annotator = sv.BoxAnnotator()\nmask_annotator = sv.MaskAnnotator()\n\nlabels = [f\"{ds.classes[class_id]}\" for class_id in annotations.class_id]\n\nannotated_image = mask_annotator.annotate(image.copy(), detections=annotations)\nannotated_image = box_annotator.annotate(\n    annotated_image, detections=annotations, labels=labels\n)\n\nsv.plot_image(image=annotated_image, size=(8, 8))\n
IMAGE_NAME = list(ds.images.keys())[0] image = ds.images[IMAGE_NAME] annotations = ds.annotations[IMAGE_NAME] box_annotator = sv.BoxAnnotator() mask_annotator = sv.MaskAnnotator() labels = [f\"{ds.classes[class_id]}\" for class_id in annotations.class_id] annotated_image = mask_annotator.annotate(image.copy(), detections=annotations) annotated_image = box_annotator.annotate( annotated_image, detections=annotations, labels=labels ) sv.plot_image(image=annotated_image, size=(8, 8)) In\u00a0[\u00a0]: Copied!
ds_train, ds_test = ds.split(split_ratio=0.8)\n
ds_train, ds_test = ds.split(split_ratio=0.8) In\u00a0[\u00a0]: Copied!
\"ds_train\", len(ds_train), \"ds_test\", len(ds_test)\n
\"ds_train\", len(ds_train), \"ds_test\", len(ds_test) Out[\u00a0]:
('ds_train', 458, 'ds_test', 115)
In\u00a0[\u00a0]: Copied!
ds_train.as_pascal_voc(\n    images_directory_path=f\"{HOME}/datasets/result/images\",\n    annotations_directory_path=f\"{HOME}/datasets/result/labels\",\n)\n
ds_train.as_pascal_voc( images_directory_path=f\"{HOME}/datasets/result/images\", annotations_directory_path=f\"{HOME}/datasets/result/labels\", )"},{"location":"notebooks/quickstart/#supervision-quickstart","title":"Supervision Quickstart\u00b6","text":"

We write your reusable computer vision tools. Whether you need to load your dataset from your hard drive, draw detections on an image or video, or count how many detections are in a zone. You can count on us! \ud83e\udd1d

We hope that the resources in this notebook will help you get the most out of Supervision. Please browse the Supervision Docs for details, raise an issue on GitHub for support, and join our discussions section for questions!

"},{"location":"notebooks/quickstart/#table-of-contents","title":"Table of contents\u00b6","text":"
  • Before you start
  • Install
  • Detection API
    • Plug in your model
      • YOLO-NAS
      • YOLOv8
    • Annotate
      • BoxAnnotator
      • MaskAnnotator
    • Filter
      • By index, index list and index slice
      • By class_id
      • By confidence
      • By advanced logical condition
  • Video API
    • VideoInfo
    • get_video_frames_generator
    • VideoSink
  • Dataset API
    • DetectionDataset.from_yolo
    • Visualize annotations
    • split
    • DetectionDataset.as_pascal_voc
"},{"location":"notebooks/quickstart/#before-you-start","title":"\u26a1 Before you start\u00b6","text":"

NOTE: In this notebook, we aim to show - among other things - how simple it is to integrate supervision with popular object detection and instance segmentation libraries and frameworks. GPU access is optional but will certainly make the ride smoother.

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/quickstart/#install","title":"\u200d\ud83d\udcbb Install\u00b6","text":""},{"location":"notebooks/quickstart/#detection-api","title":"\ud83d\udc41\ufe0f Detection API\u00b6","text":"
  • xyxy (np.ndarray): An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]
  • mask: (Optional[np.ndarray]): An array of shape (n, W, H) containing the segmentation masks.
  • confidence (Optional[np.ndarray]): An array of shape (n,) containing the confidence scores of the detections.
  • class_id (Optional[np.ndarray]): An array of shape (n,) containing the class ids of the detections.
  • tracker_id (Optional[np.ndarray]): An array of shape (n,) containing the tracker ids of the detections.
"},{"location":"notebooks/quickstart/#plug-in-your-model","title":"\ud83d\udd0c Plug in your model\u00b6","text":"

NOTE: In our example, we will focus only on integration with YOLO-NAS and YOLOv8. However, keep in mind that supervision allows seamless integration with many other models like SAM, Transformers, and YOLOv5. You can learn more from our documentation.

"},{"location":"notebooks/quickstart/#yolo-nas","title":"YOLO-NAS \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#ultralytics","title":"Ultralytics \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#annotate","title":"\ud83d\udc69\u200d\ud83c\udfa8 Annotate\u00b6","text":""},{"location":"notebooks/quickstart/#boxannotator","title":"BoxAnnotator \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#maskannotator","title":"MaskAnnotator \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#filter","title":"\ud83d\uddd1 Filter \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#by-index-index-list-and-index-slice","title":"By index, index list and index slice\u00b6","text":"

NOTE: sv.Detections filter API allows you to access detections by index, index list or index slice

"},{"location":"notebooks/quickstart/#by-class_id","title":"By class_id\u00b6","text":"

NOTE: Let's use sv.Detections filter API to display only objects with class_id == 0

"},{"location":"notebooks/quickstart/#by-confidence","title":"By confidence\u00b6","text":"

NOTE: Let's use sv.Detections filter API to display only objects with confidence > 0.75

"},{"location":"notebooks/quickstart/#by-advanced-logical-condition","title":"By advanced logical condition\u00b6","text":"

NOTE: Let's use sv.Detections filter API allows you to build advanced logical conditions. Let's select only detections with class_id != 0 and confidence > 0.75.

"},{"location":"notebooks/quickstart/#video-api","title":"\ud83c\udfac Video API\u00b6","text":"

NOTE: supervision offers a lot of utils to make working with videos easier. Let's take a look at some of them.

"},{"location":"notebooks/quickstart/#videoinfo","title":"VideoInfo \ud83d\udcda\u00b6","text":"

NOTE: VideoInfo allows us to easily retrieve information about video files, such as resolution, FPS and total number of frames.

"},{"location":"notebooks/quickstart/#get_video_frames_generator","title":"get_video_frames_generator \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#videosink","title":"VideoSink \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#dataset-api","title":"\ud83d\uddbc\ufe0f Dataset API\u00b6","text":"

NOTE: In order to demonstrate the capabilities of the Dataset API, we need a dataset. Let's download one from Roboflow Universe. To do this we first need to install the roboflow pip package.

"},{"location":"notebooks/quickstart/#detectiondatasetfrom_yolo","title":"DetectionDataset.from_yolo \ud83d\udcda\u00b6","text":"

NOTE: Currently Dataset API always loads loads images from hard drive. In the future, we plan to add lazy loading.

"},{"location":"notebooks/quickstart/#visualize-annotations","title":"\ud83c\udff7\ufe0f Visualize annotations\u00b6","text":""},{"location":"notebooks/quickstart/#split","title":"split \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#detectiondatasetas_pascal_voc","title":"DetectionDataset.as_pascal_voc \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#congratulations","title":"\ud83c\udfc6 Congratulations\u00b6","text":""},{"location":"notebooks/quickstart/#learning-resources","title":"Learning Resources\u00b6","text":"
  • Documentation
  • GitHub
  • YouTube Supervision Playlist
"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/","title":"Zero-Shot Object Detection with YOLO-World","text":"In\u00a0[1]: Copied!
!nvidia-smi\n
!nvidia-smi
Fri Feb 16 12:46:14 2024       \n+---------------------------------------------------------------------------------------+\n| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n|-----------------------------------------+----------------------+----------------------+\n| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n|                                         |                      |               MIG M. |\n|=========================================+======================+======================|\n|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |\n| N/A   65C    P8              13W /  70W |      0MiB / 15360MiB |      0%      Default |\n|                                         |                      |                  N/A |\n+-----------------------------------------+----------------------+----------------------+\n                                                                                         \n+---------------------------------------------------------------------------------------+\n| Processes:                                                                            |\n|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n|        ID   ID                                                             Usage      |\n|=======================================================================================|\n|  No running processes found                                                           |\n+---------------------------------------------------------------------------------------+\n

NOTE: To make it easier for us to manage datasets, images and models we create a HOME constant.

In\u00a0[2]: Copied!
import os\nHOME = os.getcwd()\nprint(HOME)\n
import os HOME = os.getcwd() print(HOME)
/content\n
In\u00a0[\u00a0]: Copied!
!pip install -q inference-gpu[yolo-world]==0.9.12rc1\n
!pip install -q inference-gpu[yolo-world]==0.9.12rc1 In\u00a0[\u00a0]: Copied!
!pip install -q supervision==0.19.0rc3\n
!pip install -q supervision==0.19.0rc3 In\u00a0[\u00a0]: Copied!
import cv2\nimport supervision as sv\n\nfrom tqdm import tqdm\nfrom inference.models.yolo_world.yolo_world import YOLOWorld\n
import cv2 import supervision as sv from tqdm import tqdm from inference.models.yolo_world.yolo_world import YOLOWorld In\u00a0[6]: Copied!
!wget -P {HOME} -q https://media.roboflow.com/notebooks/examples/dog.jpeg\n!wget -P {HOME} -q https://media.roboflow.com/supervision/cookbooks/yellow-filling.mp4\n
!wget -P {HOME} -q https://media.roboflow.com/notebooks/examples/dog.jpeg !wget -P {HOME} -q https://media.roboflow.com/supervision/cookbooks/yellow-filling.mp4 In\u00a0[7]: Copied!
SOURCE_IMAGE_PATH = f\"{HOME}/dog.jpeg\"\nSOURCE_VIDEO_PATH = f\"{HOME}/yellow-filling.mp4\"\n
SOURCE_IMAGE_PATH = f\"{HOME}/dog.jpeg\" SOURCE_VIDEO_PATH = f\"{HOME}/yellow-filling.mp4\"

NOTE: If you want to run the cookbook using your own file as input, simply upload video to Google Colab and replace SOURCE_IMAGE_PATH and SOURCE_VIDEO_PATH with the path to your file.

In\u00a0[8]: Copied!
model = YOLOWorld(model_id=\"yolo_world/l\")\n
model = YOLOWorld(model_id=\"yolo_world/l\")

YOLO-World is a zero-shot model, enabling object detection without any training. You only need to define a prompt as a list of classes (things) you are searching for.

In\u00a0[9]: Copied!
classes = [\"person\", \"backpack\", \"dog\", \"eye\", \"nose\", \"ear\", \"tongue\"]\nmodel.set_classes(classes)\n
classes = [\"person\", \"backpack\", \"dog\", \"eye\", \"nose\", \"ear\", \"tongue\"] model.set_classes(classes)
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 338M/338M [00:03<00:00, 106MiB/s]\n

We perform detection on our sample image. Then, we convert the result into a sv.Detections object, which will be useful in the later parts of the cookbook.

In\u00a0[10]: Copied!
image = cv2.imread(SOURCE_IMAGE_PATH)\nresults = model.infer(image)\ndetections = sv.Detections.from_inference(results)\n
image = cv2.imread(SOURCE_IMAGE_PATH) results = model.infer(image) detections = sv.Detections.from_inference(results)

The results we've obtained can be easily visualized with sv.BoundingBoxAnnotator and sv.LabelAnnotator. We can adjust parameters such as line thickness, text scale, line and text color allowing for a highly tailored visualization experience.

In\u00a0[11]: Copied!
BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=2)\nLABEL_ANNOTATOR = sv.LabelAnnotator(text_thickness=2, text_scale=1, text_color=sv.Color.BLACK)\n
BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=2) LABEL_ANNOTATOR = sv.LabelAnnotator(text_thickness=2, text_scale=1, text_color=sv.Color.BLACK) In\u00a0[12]: Copied!
annotated_image = image.copy()\nannotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)\nannotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections)\nsv.plot_image(annotated_image, (10, 10))\n
annotated_image = image.copy() annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections) annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections) sv.plot_image(annotated_image, (10, 10))

Note that many classes from our prompt were not detected. This is because the default confidence threshold in Inference is set to 0.5. Let's try significantly lowering this value. We've observed that the confidence returned by YOLO-World is significantly lower when querying for classes outside the COCO dataset.

In\u00a0[13]: Copied!
image = cv2.imread(SOURCE_IMAGE_PATH)\nresults = model.infer(image, confidence=0.003)\ndetections = sv.Detections.from_inference(results)\n
image = cv2.imread(SOURCE_IMAGE_PATH) results = model.infer(image, confidence=0.003) detections = sv.Detections.from_inference(results)

By default, sv.LabelAnnotator displays only the names of objects. To also view the confidence levels associated with each detection, we must define custom labels and pass them to sv.LabelAnnotator.

In\u00a0[14]: Copied!
labels = [\n    f\"{classes[class_id]} {confidence:0.3f}\"\n    for class_id, confidence\n    in zip(detections.class_id, detections.confidence)\n]\n\nannotated_image = image.copy()\nannotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)\nannotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections, labels=labels)\nsv.plot_image(annotated_image, (10, 10))\n
labels = [ f\"{classes[class_id]} {confidence:0.3f}\" for class_id, confidence in zip(detections.class_id, detections.confidence) ] annotated_image = image.copy() annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections) annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections, labels=labels) sv.plot_image(annotated_image, (10, 10)) In\u00a0[15]: Copied!
image = cv2.imread(SOURCE_IMAGE_PATH)\nresults = model.infer(image, confidence=0.003)\ndetections = sv.Detections.from_inference(results).with_nms(threshold=0.1)\n
image = cv2.imread(SOURCE_IMAGE_PATH) results = model.infer(image, confidence=0.003) detections = sv.Detections.from_inference(results).with_nms(threshold=0.1) In\u00a0[16]: Copied!
labels = [\n    f\"{classes[class_id]} {confidence:0.3f}\"\n    for class_id, confidence\n    in zip(detections.class_id, detections.confidence)\n]\n\nannotated_image = image.copy()\nannotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)\nannotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections, labels=labels)\nsv.plot_image(annotated_image, (10, 10))\n
labels = [ f\"{classes[class_id]} {confidence:0.3f}\" for class_id, confidence in zip(detections.class_id, detections.confidence) ] annotated_image = image.copy() annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections) annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections, labels=labels) sv.plot_image(annotated_image, (10, 10))

The get_video_frames_generator enables us to easily iterate over video frames. Let's create a video generator for our sample input file and display its first frame on the screen.

In\u00a0[17]: Copied!
generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH)\nframe = next(generator)\n\nsv.plot_image(frame, (10, 10))\n
generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH) frame = next(generator) sv.plot_image(frame, (10, 10))

Let's update our list of classes. This time we are looking for yellow filling. The rest of the code performing detection, filtering and visualization remains unchanged.

In\u00a0[23]: Copied!
classes = [\"yellow filling\"]\nmodel.set_classes(classes)\n
classes = [\"yellow filling\"] model.set_classes(classes) In\u00a0[38]: Copied!
results = model.infer(frame, confidence=0.002)\ndetections = sv.Detections.from_inference(results).with_nms(threshold=0.1)\n
results = model.infer(frame, confidence=0.002) detections = sv.Detections.from_inference(results).with_nms(threshold=0.1) In\u00a0[39]: Copied!
annotated_image = frame.copy()\nannotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)\nannotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections)\nsv.plot_image(annotated_image, (10, 10))\n
annotated_image = frame.copy() annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections) annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections) sv.plot_image(annotated_image, (10, 10))

Our prompt allowed us to locate all filled holes, but we also accidentally marked the entire high-level element. To address this issue, we'll filter detections based on their relative area in relation to the entire video frame. If a detection occupies more than 10% of the frame's total area, it will be discarded.

We can use VideoInfo.from_video_path to learn basic information about our video, such as duration, resolution, or FPS.

In\u00a0[40]: Copied!
video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)\nvideo_info\n
video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH) video_info Out[40]:
VideoInfo(width=1280, height=720, fps=25, total_frames=442)

Knowing the frame's resolution allows us to easily calculate its total area, expressed in pixels.

In\u00a0[41]: Copied!
width, height = video_info.resolution_wh\nframe_area = width * height\nframe_area\n
width, height = video_info.resolution_wh frame_area = width * height frame_area Out[41]:
921600

On the other hand, by using sv.Detections.area property, we can learn the area of each individual bounding box.

In\u00a0[45]: Copied!
results = model.infer(frame, confidence=0.002)\ndetections = sv.Detections.from_inference(results).with_nms(threshold=0.1)\ndetections.area\n
results = model.infer(frame, confidence=0.002) detections = sv.Detections.from_inference(results).with_nms(threshold=0.1) detections.area Out[45]:
array([ 7.5408e+05,       92844,       11255,       12969,      9875.9,      8007.7,      5433.5])

Now, we can combine these two pieces of information to construct a filtering condition for detections with an area greater than 10% of the entire frame.

In\u00a0[46]: Copied!
(detections.area / frame_area) < 0.10\n
(detections.area / frame_area) < 0.10 Out[46]:
array([False, False,  True,  True,  True,  True,  True])
In\u00a0[47]: Copied!
detections = detections[(detections.area / frame_area) < 0.10]\n\nannotated_image = frame.copy()\nannotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)\nannotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections)\nsv.plot_image(annotated_image, (10, 10))\n
detections = detections[(detections.area / frame_area) < 0.10] annotated_image = frame.copy() annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections) annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections) sv.plot_image(annotated_image, (10, 10))

Finally, we are ready to process our entire video. Now in truth we can appreciate the speed of YOLO-World.

In\u00a0[49]: Copied!
TARGET_VIDEO_PATH = f\"{HOME}/yellow-filling-output.mp4\"\n
TARGET_VIDEO_PATH = f\"{HOME}/yellow-filling-output.mp4\" In\u00a0[50]: Copied!
frame_generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH)\nvideo_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)\n\nwidth, height = video_info.resolution_wh\nframe_area = width * height\nframe_area\n\nwith sv.VideoSink(target_path=TARGET_VIDEO_PATH, video_info=video_info) as sink:\n    for frame in tqdm(frame_generator, total=video_info.total_frames):\n        results = model.infer(frame, confidence=0.002)\n        detections = sv.Detections.from_inference(results).with_nms(threshold=0.1)\n        detections = detections[(detections.area / frame_area) < 0.10]\n\n        annotated_frame = frame.copy()\n        annotated_frame = BOUNDING_BOX_ANNOTATOR.annotate(annotated_frame, detections)\n        annotated_frame = LABEL_ANNOTATOR.annotate(annotated_frame, detections)\n        sink.write_frame(annotated_frame)\n
frame_generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH) video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH) width, height = video_info.resolution_wh frame_area = width * height frame_area with sv.VideoSink(target_path=TARGET_VIDEO_PATH, video_info=video_info) as sink: for frame in tqdm(frame_generator, total=video_info.total_frames): results = model.infer(frame, confidence=0.002) detections = sv.Detections.from_inference(results).with_nms(threshold=0.1) detections = detections[(detections.area / frame_area) < 0.10] annotated_frame = frame.copy() annotated_frame = BOUNDING_BOX_ANNOTATOR.annotate(annotated_frame, detections) annotated_frame = LABEL_ANNOTATOR.annotate(annotated_frame, detections) sink.write_frame(annotated_frame)
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 442/442 [00:31<00:00, 13.90it/s]\n

Keep in mind that the video preview below works only in the web version of the cookbooks and not in Google Colab.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#zero-shot-object-detection-with-yolo-world","title":"Zero-Shot Object Detection with YOLO-World\u00b6","text":"

Click the Open in Colab button to run the cookbook on Google Colab.

YOLO-World was designed to solve a limitation of existing zero-shot object detection models: speed. Whereas other state-of-the-art models use Transformers, a powerful but typically slower architecture, YOLO-World uses the faster CNN-based YOLO architecture.

According to the paper YOLO-World reached between 35.4 AP with 52.0 FPS for the large version and 26.2 AP with 74.1 FPS for the small version. While the V100 is a powerful GPU, achieving such high FPS on any device is impressive.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#before-you-start","title":"Before you start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#install-required-packages","title":"Install required packages\u00b6","text":"

In this guide, we utilize two Python packages: inference, for executing zero-shot object detection using YOLO-World, and supervision, for post-processing and visualizing the detected objects.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#imports","title":"Imports\u00b6","text":""},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#download-example-data","title":"Download example data\u00b6","text":""},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#run-object-detection","title":"Run Object Detection\u00b6","text":"

The Inference package provides the YOLO-World model in three versions: S, M, and L. You can load them by defining model_id as yolo_world/s, yolo_world/m, and yolo_world/l, respectively. The ROBOFLOW_API_KEY is not required to utilize this model.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#adjusting-confidence-level","title":"Adjusting Confidence Level\u00b6","text":""},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#using-non-max-suppression-nms-to-eliminate-double-detections","title":"Using Non-Max Suppression (NMS) to Eliminate Double Detections\u00b6","text":"

To eliminate duplicates, we will use Non-Max Suppression (NMS). NMS evaluates the extent to which detections overlap using the Intersection over Union metric and, upon exceeding a defined threshold, treats them as duplicates. Duplicates are then discarded, starting with those of the lowest confidence. The value should be within the range [0, 1]. The smaller the value, the more restrictive the NMS.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#video-processing","title":"Video Processing\u00b6","text":""},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#filtering-detectuions-by-area","title":"Filtering Detectuions by Area\u00b6","text":""},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#final-result","title":"Final Result\u00b6","text":""},{"location":"utils/draw/","title":"Draw Utils","text":"draw_line

Draws a line on a given scene.

Parameters:

Name Type Description Default scene ndarray

The scene on which the line will be drawn

required start Point

The starting point of the line

required end Point

The end point of the line

required color Color

The color of the line

required thickness int

The thickness of the line

2

Returns:

Type Description ndarray

np.ndarray: The scene with the line drawn on it

Source code in supervision/draw/utils.py
def draw_line(\n    scene: np.ndarray, start: Point, end: Point, color: Color, thickness: int = 2\n) -> np.ndarray:\n    \"\"\"\n    Draws a line on a given scene.\n\n    Parameters:\n        scene (np.ndarray): The scene on which the line will be drawn\n        start (Point): The starting point of the line\n        end (Point): The end point of the line\n        color (Color): The color of the line\n        thickness (int): The thickness of the line\n\n    Returns:\n        np.ndarray: The scene with the line drawn on it\n    \"\"\"\n    cv2.line(\n        scene,\n        start.as_xy_int_tuple(),\n        end.as_xy_int_tuple(),\n        color.as_bgr(),\n        thickness=thickness,\n    )\n    return scene\n
draw_rectangle

Draws a rectangle on an image.

Parameters:

Name Type Description Default scene ndarray

The scene on which the rectangle will be drawn

required rect Rect

The rectangle to be drawn

required color Color

The color of the rectangle

required thickness int

The thickness of the rectangle border

2

Returns:

Type Description ndarray

np.ndarray: The scene with the rectangle drawn on it

Source code in supervision/draw/utils.py
def draw_rectangle(\n    scene: np.ndarray, rect: Rect, color: Color, thickness: int = 2\n) -> np.ndarray:\n    \"\"\"\n    Draws a rectangle on an image.\n\n    Parameters:\n        scene (np.ndarray): The scene on which the rectangle will be drawn\n        rect (Rect): The rectangle to be drawn\n        color (Color): The color of the rectangle\n        thickness (int): The thickness of the rectangle border\n\n    Returns:\n        np.ndarray: The scene with the rectangle drawn on it\n    \"\"\"\n    cv2.rectangle(\n        scene,\n        rect.top_left.as_xy_int_tuple(),\n        rect.bottom_right.as_xy_int_tuple(),\n        color.as_bgr(),\n        thickness=thickness,\n    )\n    return scene\n
draw_filled_rectangle

Draws a filled rectangle on an image.

Parameters:

Name Type Description Default scene ndarray

The scene on which the rectangle will be drawn

required rect Rect

The rectangle to be drawn

required color Color

The color of the rectangle

required

Returns:

Type Description ndarray

np.ndarray: The scene with the rectangle drawn on it

Source code in supervision/draw/utils.py
def draw_filled_rectangle(scene: np.ndarray, rect: Rect, color: Color) -> np.ndarray:\n    \"\"\"\n    Draws a filled rectangle on an image.\n\n    Parameters:\n        scene (np.ndarray): The scene on which the rectangle will be drawn\n        rect (Rect): The rectangle to be drawn\n        color (Color): The color of the rectangle\n\n    Returns:\n        np.ndarray: The scene with the rectangle drawn on it\n    \"\"\"\n    cv2.rectangle(\n        scene,\n        rect.top_left.as_xy_int_tuple(),\n        rect.bottom_right.as_xy_int_tuple(),\n        color.as_bgr(),\n        -1,\n    )\n    return scene\n
draw_polygon

Draw a polygon on a scene.

Parameters:

Name Type Description Default scene ndarray

The scene to draw the polygon on.

required polygon ndarray

The polygon to be drawn, given as a list of vertices.

required color Color

The color of the polygon.

required thickness int

The thickness of the polygon lines, by default 2.

2

Returns:

Type Description ndarray

np.ndarray: The scene with the polygon drawn on it.

Source code in supervision/draw/utils.py
def draw_polygon(\n    scene: np.ndarray, polygon: np.ndarray, color: Color, thickness: int = 2\n) -> np.ndarray:\n    \"\"\"Draw a polygon on a scene.\n\n    Parameters:\n        scene (np.ndarray): The scene to draw the polygon on.\n        polygon (np.ndarray): The polygon to be drawn, given as a list of vertices.\n        color (Color): The color of the polygon.\n        thickness (int, optional): The thickness of the polygon lines, by default 2.\n\n    Returns:\n        np.ndarray: The scene with the polygon drawn on it.\n    \"\"\"\n    cv2.polylines(\n        scene, [polygon], isClosed=True, color=color.as_bgr(), thickness=thickness\n    )\n    return scene\n
draw_text

Draw text with background on a scene.

Parameters:

Name Type Description Default scene ndarray

A 2-dimensional numpy ndarray representing an image or scene

required text str

The text to be drawn.

required text_anchor Point

The anchor point for the text, represented as a Point object with x and y attributes.

required text_color Color

The color of the text. Defaults to black.

BLACK text_scale float

The scale of the text. Defaults to 0.5.

0.5 text_thickness int

The thickness of the text. Defaults to 1.

1 text_padding int

The amount of padding to add around the text when drawing a rectangle in the background. Defaults to 10.

10 text_font int

The font to use for the text. Defaults to cv2.FONT_HERSHEY_SIMPLEX.

FONT_HERSHEY_SIMPLEX background_color Color

The color of the background rectangle, if one is to be drawn. Defaults to None.

None

Returns:

Type Description ndarray

np.ndarray: The input scene with the text drawn on it.

Examples:

import numpy as np\n\nscene = np.zeros((100, 100, 3), dtype=np.uint8)\ntext_anchor = Point(x=50, y=50)\nscene = draw_text(scene=scene, text=\"Hello, world!\",text_anchor=text_anchor)\n
Source code in supervision/draw/utils.py
def draw_text(\n    scene: np.ndarray,\n    text: str,\n    text_anchor: Point,\n    text_color: Color = Color.BLACK,\n    text_scale: float = 0.5,\n    text_thickness: int = 1,\n    text_padding: int = 10,\n    text_font: int = cv2.FONT_HERSHEY_SIMPLEX,\n    background_color: Optional[Color] = None,\n) -> np.ndarray:\n    \"\"\"\n    Draw text with background on a scene.\n\n    Parameters:\n        scene (np.ndarray): A 2-dimensional numpy ndarray representing an image or scene\n        text (str): The text to be drawn.\n        text_anchor (Point): The anchor point for the text, represented as a\n            Point object with x and y attributes.\n        text_color (Color, optional): The color of the text. Defaults to black.\n        text_scale (float, optional): The scale of the text. Defaults to 0.5.\n        text_thickness (int, optional): The thickness of the text. Defaults to 1.\n        text_padding (int, optional): The amount of padding to add around the text\n            when drawing a rectangle in the background. Defaults to 10.\n        text_font (int, optional): The font to use for the text.\n            Defaults to cv2.FONT_HERSHEY_SIMPLEX.\n        background_color (Color, optional): The color of the background rectangle,\n            if one is to be drawn. Defaults to None.\n\n    Returns:\n        np.ndarray: The input scene with the text drawn on it.\n\n    Examples:\n        ```python\n        import numpy as np\n\n        scene = np.zeros((100, 100, 3), dtype=np.uint8)\n        text_anchor = Point(x=50, y=50)\n        scene = draw_text(scene=scene, text=\"Hello, world!\",text_anchor=text_anchor)\n        ```\n    \"\"\"\n    text_width, text_height = cv2.getTextSize(\n        text=text,\n        fontFace=text_font,\n        fontScale=text_scale,\n        thickness=text_thickness,\n    )[0]\n\n    text_anchor_x, text_anchor_y = text_anchor.as_xy_int_tuple()\n\n    text_rect = Rect(\n        x=text_anchor_x - text_width // 2,\n        y=text_anchor_y - text_height // 2,\n        width=text_width,\n        height=text_height,\n    ).pad(text_padding)\n\n    if background_color is not None:\n        scene = draw_filled_rectangle(\n            scene=scene, rect=text_rect, color=background_color\n        )\n\n    cv2.putText(\n        img=scene,\n        text=text,\n        org=(text_anchor_x - text_width // 2, text_anchor_y + text_height // 2),\n        fontFace=text_font,\n        fontScale=text_scale,\n        color=text_color.as_bgr(),\n        thickness=text_thickness,\n        lineType=cv2.LINE_AA,\n    )\n    return scene\n
draw_image

Draws an image onto a given scene with specified opacity and dimensions.

Parameters:

Name Type Description Default scene ndarray

Background image where the new image will be drawn.

required image Union[str, ndarray]

Image to draw.

required opacity float

Opacity of the image to be drawn.

required rect Rect

Rectangle specifying where to draw the image.

required

Returns:

Type Description ndarray

np.ndarray: The updated scene.

Raises:

Type Description FileNotFoundError

If the image path does not exist.

ValueError

For invalid opacity or rectangle dimensions.

Source code in supervision/draw/utils.py
def draw_image(\n    scene: np.ndarray, image: Union[str, np.ndarray], opacity: float, rect: Rect\n) -> np.ndarray:\n    \"\"\"\n    Draws an image onto a given scene with specified opacity and dimensions.\n\n    Args:\n        scene (np.ndarray): Background image where the new image will be drawn.\n        image (Union[str, np.ndarray]): Image to draw.\n        opacity (float): Opacity of the image to be drawn.\n        rect (Rect): Rectangle specifying where to draw the image.\n\n    Returns:\n        np.ndarray: The updated scene.\n\n    Raises:\n        FileNotFoundError: If the image path does not exist.\n        ValueError: For invalid opacity or rectangle dimensions.\n    \"\"\"\n\n    # Validate and load image\n    if isinstance(image, str):\n        if not os.path.exists(image):\n            raise FileNotFoundError(f\"Image path ('{image}') does not exist.\")\n        image = cv2.imread(image, cv2.IMREAD_UNCHANGED)\n\n    # Validate opacity\n    if not 0.0 <= opacity <= 1.0:\n        raise ValueError(\"Opacity must be between 0.0 and 1.0.\")\n\n    # Validate rectangle dimensions\n    if (\n        rect.x < 0\n        or rect.y < 0\n        or rect.x + rect.width > scene.shape[1]\n        or rect.y + rect.height > scene.shape[0]\n    ):\n        raise ValueError(\"Invalid rectangle dimensions.\")\n\n    # Resize and isolate alpha channel\n    image = cv2.resize(image, (rect.width, rect.height))\n    alpha_channel = (\n        image[:, :, 3]\n        if image.shape[2] == 4\n        else np.ones((rect.height, rect.width), dtype=image.dtype) * 255\n    )\n    alpha_scaled = cv2.convertScaleAbs(alpha_channel * opacity)\n\n    # Perform blending\n    scene_roi = scene[rect.y : rect.y + rect.height, rect.x : rect.x + rect.width]\n    alpha_float = alpha_scaled.astype(np.float32) / 255.0\n    blended_roi = cv2.convertScaleAbs(\n        (1 - alpha_float[..., np.newaxis]) * scene_roi\n        + alpha_float[..., np.newaxis] * image[:, :, :3]\n    )\n\n    # Update the scene\n    scene[rect.y : rect.y + rect.height, rect.x : rect.x + rect.width] = blended_roi\n\n    return scene\n
calculate_optimal_text_scale

Calculate font scale based on the resolution of an image.

Parameters:

Name Type Description Default resolution_wh Tuple[int, int]

A tuple representing the width and height of the image.

required

Returns:

Name Type Description float float

The calculated font scale factor.

Source code in supervision/draw/utils.py
def calculate_optimal_text_scale(resolution_wh: Tuple[int, int]) -> float:\n    \"\"\"\n    Calculate font scale based on the resolution of an image.\n\n    Parameters:\n         resolution_wh (Tuple[int, int]): A tuple representing the width and height\n             of the image.\n\n    Returns:\n         float: The calculated font scale factor.\n    \"\"\"\n    return min(resolution_wh) * 1e-3\n
calculate_optimal_line_thickness

Calculate line thickness based on the resolution of an image.

Parameters:

Name Type Description Default resolution_wh Tuple[int, int]

A tuple representing the width and height of the image.

required

Returns:

Name Type Description int int

The calculated line thickness in pixels.

Source code in supervision/draw/utils.py
def calculate_optimal_line_thickness(resolution_wh: Tuple[int, int]) -> int:\n    \"\"\"\n    Calculate line thickness based on the resolution of an image.\n\n    Parameters:\n        resolution_wh (Tuple[int, int]): A tuple representing the width and height\n            of the image.\n\n    Returns:\n        int: The calculated line thickness in pixels.\n    \"\"\"\n    if min(resolution_wh) < 1080:\n        return 2\n    return 4\n
Color

Represents a color in RGB format.

This class provides methods to work with colors, including creating colors from hex codes, converting colors to hex strings, RGB tuples, and BGR tuples.

Attributes:

Name Type Description r int

Red channel value (0-255).

g int

Green channel value (0-255).

b int

Blue channel value (0-255).

Example
import supervision as sv\n\nsv.Color.WHITE\n# Color(r=255, g=255, b=255)\n
Constant Hex Code RGB WHITE #FFFFFF (255, 255, 255) BLACK #000000 (0, 0, 0) RED #FF0000 (255, 0, 0) GREEN #00FF00 (0, 255, 0) BLUE #0000FF (0, 0, 255) YELLOW #FFFF00 (255, 255, 0) ROBOFLOW #A351FB (163, 81, 251) Source code in supervision/draw/color.py
@dataclass\nclass Color:\n    \"\"\"\n    Represents a color in RGB format.\n\n    This class provides methods to work with colors, including creating colors from hex\n    codes, converting colors to hex strings, RGB tuples, and BGR tuples.\n\n    Attributes:\n        r (int): Red channel value (0-255).\n        g (int): Green channel value (0-255).\n        b (int): Blue channel value (0-255).\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color.WHITE\n        # Color(r=255, g=255, b=255)\n        ```\n\n    | Constant   | Hex Code   | RGB              |\n    |------------|------------|------------------|\n    | `WHITE`    | `#FFFFFF`  | `(255, 255, 255)`|\n    | `BLACK`    | `#000000`  | `(0, 0, 0)`      |\n    | `RED`      | `#FF0000`  | `(255, 0, 0)`    |\n    | `GREEN`    | `#00FF00`  | `(0, 255, 0)`    |\n    | `BLUE`     | `#0000FF`  | `(0, 0, 255)`    |\n    | `YELLOW`   | `#FFFF00`  | `(255, 255, 0)`  |\n    | `ROBOFLOW` | `#A351FB`  | `(163, 81, 251)` |\n    \"\"\"\n\n    r: int\n    g: int\n    b: int\n\n    @classmethod\n    def from_hex(cls, color_hex: str) -> Color:\n        \"\"\"\n        Create a Color instance from a hex string.\n\n        Args:\n            color_hex (str): The hex string representing the color. This string can\n                start with '#' followed by either 3 or 6 hexadecimal characters. In\n                case of 3 characters, each character is repeated to form the full\n                6-character hex code.\n\n        Returns:\n            Color: An instance representing the color.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color.from_hex('#ff00ff')\n            # Color(r=255, g=0, b=255)\n\n            sv.Color.from_hex('#f0f')\n            # Color(r=255, g=0, b=255)\n            ```\n        \"\"\"\n        _validate_color_hex(color_hex)\n        color_hex = color_hex.lstrip(\"#\")\n        if len(color_hex) == 3:\n            color_hex = \"\".join(c * 2 for c in color_hex)\n        r, g, b = (int(color_hex[i : i + 2], 16) for i in range(0, 6, 2))\n        return cls(r, g, b)\n\n    @classmethod\n    def from_rgb_tuple(cls, color_tuple: Tuple[int, int, int]) -> Color:\n        \"\"\"\n        Create a Color instance from an RGB tuple.\n\n        Args:\n            color_tuple (Tuple[int, int, int]): A tuple representing the color in RGB\n                format, where each element is an integer in the range 0-255.\n\n        Returns:\n            Color: An instance representing the color.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color.from_rgb_tuple((255, 255, 0))\n            # Color(r=255, g=255, b=0)\n            ```\n        \"\"\"\n        r, g, b = color_tuple\n        return cls(r=r, g=g, b=b)\n\n    @classmethod\n    def from_bgr_tuple(cls, color_tuple: Tuple[int, int, int]) -> Color:\n        \"\"\"\n        Create a Color instance from a BGR tuple.\n\n        Args:\n            color_tuple (Tuple[int, int, int]): A tuple representing the color in BGR\n                format, where each element is an integer in the range 0-255.\n\n        Returns:\n            Color: An instance representing the color.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color.from_bgr_tuple((0, 255, 255))\n            # Color(r=255, g=255, b=0)\n            ```\n        \"\"\"\n        b, g, r = color_tuple\n        return cls(r=r, g=g, b=b)\n\n    def as_hex(self) -> str:\n        \"\"\"\n        Converts the Color instance to a hex string.\n\n        Returns:\n            str: The hexadecimal color string.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color(r=255, g=255, b=0).as_hex()\n            # '#ffff00'\n            ```\n        \"\"\"\n        return f\"#{self.r:02x}{self.g:02x}{self.b:02x}\"\n\n    def as_rgb(self) -> Tuple[int, int, int]:\n        \"\"\"\n        Returns the color as an RGB tuple.\n\n        Returns:\n            Tuple[int, int, int]: RGB tuple.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color(r=255, g=255, b=0).as_rgb()\n            # (255, 255, 0)\n            ```\n        \"\"\"\n        return self.r, self.g, self.b\n\n    def as_bgr(self) -> Tuple[int, int, int]:\n        \"\"\"\n        Returns the color as a BGR tuple.\n\n        Returns:\n            Tuple[int, int, int]: BGR tuple.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color(r=255, g=255, b=0).as_bgr()\n            # (0, 255, 255)\n            ```\n        \"\"\"\n        return self.b, self.g, self.r\n\n    @classproperty\n    def WHITE(cls) -> Color:\n        return Color.from_hex(\"#FFFFFF\")\n\n    @classproperty\n    def BLACK(cls) -> Color:\n        return Color.from_hex(\"#000000\")\n\n    @classproperty\n    def RED(cls) -> Color:\n        return Color.from_hex(\"#FF0000\")\n\n    @classproperty\n    def GREEN(cls) -> Color:\n        return Color.from_hex(\"#00FF00\")\n\n    @classproperty\n    def BLUE(cls) -> Color:\n        return Color.from_hex(\"#0000FF\")\n\n    @classproperty\n    def YELLOW(cls) -> Color:\n        return Color.from_hex(\"#FFFF00\")\n\n    @classproperty\n    def ROBOFLOW(cls) -> Color:\n        return Color.from_hex(\"#A351FB\")\n\n    @classmethod\n    @deprecated(\n        \"`Color.white()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.WHITE` instead.\"\n    )\n    def white(cls) -> Color:\n        return Color.from_hex(color_hex=\"#ffffff\")\n\n    @classmethod\n    @deprecated(\n        \"`Color.black()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.BLACK` instead.\"\n    )\n    def black(cls) -> Color:\n        return Color.from_hex(color_hex=\"#000000\")\n\n    @classmethod\n    @deprecated(\n        \"`Color.red()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.RED` instead.\"\n    )\n    def red(cls) -> Color:\n        return Color.from_hex(color_hex=\"#ff0000\")\n\n    @classmethod\n    @deprecated(\n        \"`Color.green()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.GREEN` instead.\"\n    )\n    def green(cls) -> Color:\n        return Color.from_hex(color_hex=\"#00ff00\")\n\n    @classmethod\n    @deprecated(\n        \"`Color.blue()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.BLUE` instead.\"\n    )\n    def blue(cls) -> Color:\n        return Color.from_hex(color_hex=\"#0000ff\")\n
ColorPalette Source code in supervision/draw/color.py
@dataclass\nclass ColorPalette:\n    colors: List[Color]\n\n    @classproperty\n    def DEFAULT(cls) -> ColorPalette:\n        \"\"\"\n        Returns a default color palette.\n\n        Returns:\n            ColorPalette: A ColorPalette instance with default colors.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.ColorPalette.DEFAULT\n            # ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n            ```\n\n        ![default-color-palette](https://media.roboflow.com/\n        supervision-annotator-examples/default-color-palette.png)\n        \"\"\"  # noqa: E501 // docs\n        return ColorPalette.from_hex(color_hex_list=DEFAULT_COLOR_PALETTE)\n\n    @classproperty\n    def ROBOFLOW(cls) -> ColorPalette:\n        \"\"\"\n        Returns a Roboflow color palette.\n\n        Returns:\n            ColorPalette: A ColorPalette instance with Roboflow colors.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.ColorPalette.ROBOFLOW\n            # ColorPalette(colors=[Color(r=194, g=141, b=252), Color(r=163, g=81, b=251), ...])\n            ```\n\n        ![roboflow-color-palette](https://media.roboflow.com/\n        supervision-annotator-examples/roboflow-color-palette.png)\n        \"\"\"  # noqa: E501 // docs\n        return ColorPalette.from_hex(color_hex_list=ROBOFLOW_COLOR_PALETTE)\n\n    @classproperty\n    def LEGACY(cls) -> ColorPalette:\n        return ColorPalette.from_hex(color_hex_list=LEGACY_COLOR_PALETTE)\n\n    @classmethod\n    @deprecated(\n        \"`ColorPalette.default()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.DEFAULT` instead.\"\n    )\n    def default(cls) -> ColorPalette:\n        \"\"\"\n        !!! failure \"Deprecated\"\n\n            `ColorPalette.default()` is deprecated and will be removed in\n            `supervision-0.22.0`. Use `Color.DEFAULT` instead.\n\n        Returns a default color palette.\n\n        Returns:\n            ColorPalette: A ColorPalette instance with default colors.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.ColorPalette.default()\n            # ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n            ```\n        \"\"\"  # noqa: E501 // docs\n        return ColorPalette.from_hex(color_hex_list=DEFAULT_COLOR_PALETTE)\n\n    @classmethod\n    def from_hex(cls, color_hex_list: List[str]) -> ColorPalette:\n        \"\"\"\n        Create a ColorPalette instance from a list of hex strings.\n\n        Args:\n            color_hex_list (List[str]): List of color hex strings.\n\n        Returns:\n            ColorPalette: A ColorPalette instance.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\n            # ColorPalette(colors=[Color(r=255, g=0, b=0), Color(r=0, g=255, b=0), ...])\n            ```\n        \"\"\"\n        colors = [Color.from_hex(color_hex) for color_hex in color_hex_list]\n        return cls(colors)\n\n    @classmethod\n    def from_matplotlib(cls, palette_name: str, color_count: int) -> ColorPalette:\n        \"\"\"\n        Create a ColorPalette instance from a Matplotlib color palette.\n\n        Args:\n            palette_name (str): Name of the Matplotlib palette.\n            color_count (int): Number of colors to sample from the palette.\n\n        Returns:\n            ColorPalette: A ColorPalette instance.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.ColorPalette.from_matplotlib('viridis', 5)\n            # ColorPalette(colors=[Color(r=68, g=1, b=84), Color(r=59, g=82, b=139), ...])\n            ```\n\n        ![visualized_color_palette](https://media.roboflow.com/\n        supervision-annotator-examples/visualized_color_palette.png)\n        \"\"\"  # noqa: E501 // docs\n        mpl_palette = plt.get_cmap(palette_name, color_count)\n        colors = [\n            Color(int(r * 255), int(g * 255), int(b * 255))\n            for r, g, b, _ in mpl_palette.colors\n        ]\n        return cls(colors)\n\n    def by_idx(self, idx: int) -> Color:\n        \"\"\"\n        Return the color at a given index in the palette.\n\n        Args:\n            idx (int): Index of the color in the palette.\n\n        Returns:\n            Color: Color at the given index.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            color_palette = sv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\n            color_palette.by_idx(1)\n            # Color(r=0, g=255, b=0)\n            ```\n        \"\"\"\n        if idx < 0:\n            raise ValueError(\"idx argument should not be negative\")\n        idx = idx % len(self.colors)\n        return self.colors[idx]\n
"},{"location":"utils/draw/#supervision.draw.color.Color-functions","title":"Functions","text":""},{"location":"utils/draw/#supervision.draw.color.Color.as_bgr","title":"as_bgr()","text":"

Returns the color as a BGR tuple.

Returns:

Type Description Tuple[int, int, int]

Tuple[int, int, int]: BGR tuple.

Example
import supervision as sv\n\nsv.Color(r=255, g=255, b=0).as_bgr()\n# (0, 255, 255)\n
Source code in supervision/draw/color.py
def as_bgr(self) -> Tuple[int, int, int]:\n    \"\"\"\n    Returns the color as a BGR tuple.\n\n    Returns:\n        Tuple[int, int, int]: BGR tuple.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color(r=255, g=255, b=0).as_bgr()\n        # (0, 255, 255)\n        ```\n    \"\"\"\n    return self.b, self.g, self.r\n
"},{"location":"utils/draw/#supervision.draw.color.Color.as_hex","title":"as_hex()","text":"

Converts the Color instance to a hex string.

Returns:

Name Type Description str str

The hexadecimal color string.

Example
import supervision as sv\n\nsv.Color(r=255, g=255, b=0).as_hex()\n# '#ffff00'\n
Source code in supervision/draw/color.py
def as_hex(self) -> str:\n    \"\"\"\n    Converts the Color instance to a hex string.\n\n    Returns:\n        str: The hexadecimal color string.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color(r=255, g=255, b=0).as_hex()\n        # '#ffff00'\n        ```\n    \"\"\"\n    return f\"#{self.r:02x}{self.g:02x}{self.b:02x}\"\n
"},{"location":"utils/draw/#supervision.draw.color.Color.as_rgb","title":"as_rgb()","text":"

Returns the color as an RGB tuple.

Returns:

Type Description Tuple[int, int, int]

Tuple[int, int, int]: RGB tuple.

Example
import supervision as sv\n\nsv.Color(r=255, g=255, b=0).as_rgb()\n# (255, 255, 0)\n
Source code in supervision/draw/color.py
def as_rgb(self) -> Tuple[int, int, int]:\n    \"\"\"\n    Returns the color as an RGB tuple.\n\n    Returns:\n        Tuple[int, int, int]: RGB tuple.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color(r=255, g=255, b=0).as_rgb()\n        # (255, 255, 0)\n        ```\n    \"\"\"\n    return self.r, self.g, self.b\n
"},{"location":"utils/draw/#supervision.draw.color.Color.from_bgr_tuple","title":"from_bgr_tuple(color_tuple) classmethod","text":"

Create a Color instance from a BGR tuple.

Parameters:

Name Type Description Default color_tuple Tuple[int, int, int]

A tuple representing the color in BGR format, where each element is an integer in the range 0-255.

required

Returns:

Name Type Description Color Color

An instance representing the color.

Example
import supervision as sv\n\nsv.Color.from_bgr_tuple((0, 255, 255))\n# Color(r=255, g=255, b=0)\n
Source code in supervision/draw/color.py
@classmethod\ndef from_bgr_tuple(cls, color_tuple: Tuple[int, int, int]) -> Color:\n    \"\"\"\n    Create a Color instance from a BGR tuple.\n\n    Args:\n        color_tuple (Tuple[int, int, int]): A tuple representing the color in BGR\n            format, where each element is an integer in the range 0-255.\n\n    Returns:\n        Color: An instance representing the color.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color.from_bgr_tuple((0, 255, 255))\n        # Color(r=255, g=255, b=0)\n        ```\n    \"\"\"\n    b, g, r = color_tuple\n    return cls(r=r, g=g, b=b)\n
"},{"location":"utils/draw/#supervision.draw.color.Color.from_hex","title":"from_hex(color_hex) classmethod","text":"

Create a Color instance from a hex string.

Parameters:

Name Type Description Default color_hex str

The hex string representing the color. This string can start with '#' followed by either 3 or 6 hexadecimal characters. In case of 3 characters, each character is repeated to form the full 6-character hex code.

required

Returns:

Name Type Description Color Color

An instance representing the color.

Example
import supervision as sv\n\nsv.Color.from_hex('#ff00ff')\n# Color(r=255, g=0, b=255)\n\nsv.Color.from_hex('#f0f')\n# Color(r=255, g=0, b=255)\n
Source code in supervision/draw/color.py
@classmethod\ndef from_hex(cls, color_hex: str) -> Color:\n    \"\"\"\n    Create a Color instance from a hex string.\n\n    Args:\n        color_hex (str): The hex string representing the color. This string can\n            start with '#' followed by either 3 or 6 hexadecimal characters. In\n            case of 3 characters, each character is repeated to form the full\n            6-character hex code.\n\n    Returns:\n        Color: An instance representing the color.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color.from_hex('#ff00ff')\n        # Color(r=255, g=0, b=255)\n\n        sv.Color.from_hex('#f0f')\n        # Color(r=255, g=0, b=255)\n        ```\n    \"\"\"\n    _validate_color_hex(color_hex)\n    color_hex = color_hex.lstrip(\"#\")\n    if len(color_hex) == 3:\n        color_hex = \"\".join(c * 2 for c in color_hex)\n    r, g, b = (int(color_hex[i : i + 2], 16) for i in range(0, 6, 2))\n    return cls(r, g, b)\n
"},{"location":"utils/draw/#supervision.draw.color.Color.from_rgb_tuple","title":"from_rgb_tuple(color_tuple) classmethod","text":"

Create a Color instance from an RGB tuple.

Parameters:

Name Type Description Default color_tuple Tuple[int, int, int]

A tuple representing the color in RGB format, where each element is an integer in the range 0-255.

required

Returns:

Name Type Description Color Color

An instance representing the color.

Example
import supervision as sv\n\nsv.Color.from_rgb_tuple((255, 255, 0))\n# Color(r=255, g=255, b=0)\n
Source code in supervision/draw/color.py
@classmethod\ndef from_rgb_tuple(cls, color_tuple: Tuple[int, int, int]) -> Color:\n    \"\"\"\n    Create a Color instance from an RGB tuple.\n\n    Args:\n        color_tuple (Tuple[int, int, int]): A tuple representing the color in RGB\n            format, where each element is an integer in the range 0-255.\n\n    Returns:\n        Color: An instance representing the color.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color.from_rgb_tuple((255, 255, 0))\n        # Color(r=255, g=255, b=0)\n        ```\n    \"\"\"\n    r, g, b = color_tuple\n    return cls(r=r, g=g, b=b)\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette-functions","title":"Functions","text":""},{"location":"utils/draw/#supervision.draw.color.ColorPalette.DEFAULT","title":"DEFAULT()","text":"

Returns a default color palette.

Returns:

Name Type Description ColorPalette ColorPalette

A ColorPalette instance with default colors.

Example
import supervision as sv\n\nsv.ColorPalette.DEFAULT\n# ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n

Source code in supervision/draw/color.py
@classproperty\ndef DEFAULT(cls) -> ColorPalette:\n    \"\"\"\n    Returns a default color palette.\n\n    Returns:\n        ColorPalette: A ColorPalette instance with default colors.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.ColorPalette.DEFAULT\n        # ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n        ```\n\n    ![default-color-palette](https://media.roboflow.com/\n    supervision-annotator-examples/default-color-palette.png)\n    \"\"\"  # noqa: E501 // docs\n    return ColorPalette.from_hex(color_hex_list=DEFAULT_COLOR_PALETTE)\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette.ROBOFLOW","title":"ROBOFLOW()","text":"

Returns a Roboflow color palette.

Returns:

Name Type Description ColorPalette ColorPalette

A ColorPalette instance with Roboflow colors.

Example
import supervision as sv\n\nsv.ColorPalette.ROBOFLOW\n# ColorPalette(colors=[Color(r=194, g=141, b=252), Color(r=163, g=81, b=251), ...])\n

Source code in supervision/draw/color.py
@classproperty\ndef ROBOFLOW(cls) -> ColorPalette:\n    \"\"\"\n    Returns a Roboflow color palette.\n\n    Returns:\n        ColorPalette: A ColorPalette instance with Roboflow colors.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.ColorPalette.ROBOFLOW\n        # ColorPalette(colors=[Color(r=194, g=141, b=252), Color(r=163, g=81, b=251), ...])\n        ```\n\n    ![roboflow-color-palette](https://media.roboflow.com/\n    supervision-annotator-examples/roboflow-color-palette.png)\n    \"\"\"  # noqa: E501 // docs\n    return ColorPalette.from_hex(color_hex_list=ROBOFLOW_COLOR_PALETTE)\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette.by_idx","title":"by_idx(idx)","text":"

Return the color at a given index in the palette.

Parameters:

Name Type Description Default idx int

Index of the color in the palette.

required

Returns:

Name Type Description Color Color

Color at the given index.

Example
import supervision as sv\n\ncolor_palette = sv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\ncolor_palette.by_idx(1)\n# Color(r=0, g=255, b=0)\n
Source code in supervision/draw/color.py
def by_idx(self, idx: int) -> Color:\n    \"\"\"\n    Return the color at a given index in the palette.\n\n    Args:\n        idx (int): Index of the color in the palette.\n\n    Returns:\n        Color: Color at the given index.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        color_palette = sv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\n        color_palette.by_idx(1)\n        # Color(r=0, g=255, b=0)\n        ```\n    \"\"\"\n    if idx < 0:\n        raise ValueError(\"idx argument should not be negative\")\n    idx = idx % len(self.colors)\n    return self.colors[idx]\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette.default","title":"default() classmethod","text":"

Deprecated

ColorPalette.default() is deprecated and will be removed in supervision-0.22.0. Use Color.DEFAULT instead.

Returns a default color palette.

Returns:

Name Type Description ColorPalette ColorPalette

A ColorPalette instance with default colors.

Example
import supervision as sv\n\nsv.ColorPalette.default()\n# ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n
Source code in supervision/draw/color.py
@classmethod\n@deprecated(\n    \"`ColorPalette.default()` is deprecated and will be removed in \"\n    \"`supervision-0.22.0`. Use `Color.DEFAULT` instead.\"\n)\ndef default(cls) -> ColorPalette:\n    \"\"\"\n    !!! failure \"Deprecated\"\n\n        `ColorPalette.default()` is deprecated and will be removed in\n        `supervision-0.22.0`. Use `Color.DEFAULT` instead.\n\n    Returns a default color palette.\n\n    Returns:\n        ColorPalette: A ColorPalette instance with default colors.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.ColorPalette.default()\n        # ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n        ```\n    \"\"\"  # noqa: E501 // docs\n    return ColorPalette.from_hex(color_hex_list=DEFAULT_COLOR_PALETTE)\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette.from_hex","title":"from_hex(color_hex_list) classmethod","text":"

Create a ColorPalette instance from a list of hex strings.

Parameters:

Name Type Description Default color_hex_list List[str]

List of color hex strings.

required

Returns:

Name Type Description ColorPalette ColorPalette

A ColorPalette instance.

Example
import supervision as sv\n\nsv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\n# ColorPalette(colors=[Color(r=255, g=0, b=0), Color(r=0, g=255, b=0), ...])\n
Source code in supervision/draw/color.py
@classmethod\ndef from_hex(cls, color_hex_list: List[str]) -> ColorPalette:\n    \"\"\"\n    Create a ColorPalette instance from a list of hex strings.\n\n    Args:\n        color_hex_list (List[str]): List of color hex strings.\n\n    Returns:\n        ColorPalette: A ColorPalette instance.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\n        # ColorPalette(colors=[Color(r=255, g=0, b=0), Color(r=0, g=255, b=0), ...])\n        ```\n    \"\"\"\n    colors = [Color.from_hex(color_hex) for color_hex in color_hex_list]\n    return cls(colors)\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette.from_matplotlib","title":"from_matplotlib(palette_name, color_count) classmethod","text":"

Create a ColorPalette instance from a Matplotlib color palette.

Parameters:

Name Type Description Default palette_name str

Name of the Matplotlib palette.

required color_count int

Number of colors to sample from the palette.

required

Returns:

Name Type Description ColorPalette ColorPalette

A ColorPalette instance.

Example
import supervision as sv\n\nsv.ColorPalette.from_matplotlib('viridis', 5)\n# ColorPalette(colors=[Color(r=68, g=1, b=84), Color(r=59, g=82, b=139), ...])\n

Source code in supervision/draw/color.py
@classmethod\ndef from_matplotlib(cls, palette_name: str, color_count: int) -> ColorPalette:\n    \"\"\"\n    Create a ColorPalette instance from a Matplotlib color palette.\n\n    Args:\n        palette_name (str): Name of the Matplotlib palette.\n        color_count (int): Number of colors to sample from the palette.\n\n    Returns:\n        ColorPalette: A ColorPalette instance.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.ColorPalette.from_matplotlib('viridis', 5)\n        # ColorPalette(colors=[Color(r=68, g=1, b=84), Color(r=59, g=82, b=139), ...])\n        ```\n\n    ![visualized_color_palette](https://media.roboflow.com/\n    supervision-annotator-examples/visualized_color_palette.png)\n    \"\"\"  # noqa: E501 // docs\n    mpl_palette = plt.get_cmap(palette_name, color_count)\n    colors = [\n        Color(int(r * 255), int(g * 255), int(b * 255))\n        for r, g, b, _ in mpl_palette.colors\n    ]\n    return cls(colors)\n
"},{"location":"utils/file/","title":"File Utils","text":"list_files_with_extensions

List files in a directory with specified extensions or all files if no extensions are provided.

Parameters:

Name Type Description Default directory Union[str, Path]

The directory path as a string or Path object.

required extensions Optional[List[str]]

A list of file extensions to filter. Default is None, which lists all files.

None

Returns:

Type Description List[Path]

A list of Path objects for the matching files.

Examples:

import supervision as sv\n\n# List all files in the directory\nfiles = sv.list_files_with_extensions(directory='my_directory')\n\n# List only files with '.txt' and '.md' extensions\nfiles = sv.list_files_with_extensions(\n    directory='my_directory', extensions=['txt', 'md'])\n
Source code in supervision/utils/file.py
def list_files_with_extensions(\n    directory: Union[str, Path], extensions: Optional[List[str]] = None\n) -> List[Path]:\n    \"\"\"\n    List files in a directory with specified extensions or\n        all files if no extensions are provided.\n\n    Args:\n        directory (Union[str, Path]): The directory path as a string or Path object.\n        extensions (Optional[List[str]]): A list of file extensions to filter.\n            Default is None, which lists all files.\n\n    Returns:\n        (List[Path]): A list of Path objects for the matching files.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        # List all files in the directory\n        files = sv.list_files_with_extensions(directory='my_directory')\n\n        # List only files with '.txt' and '.md' extensions\n        files = sv.list_files_with_extensions(\n            directory='my_directory', extensions=['txt', 'md'])\n        ```\n    \"\"\"\n\n    directory = Path(directory)\n    files_with_extensions = []\n\n    if extensions is not None:\n        for ext in extensions:\n            files_with_extensions.extend(directory.glob(f\"*.{ext}\"))\n    else:\n        files_with_extensions.extend(directory.glob(\"*\"))\n\n    return files_with_extensions\n
"},{"location":"utils/geometry/","title":"Geometry","text":"get_polygon_center

Calculate the center of a polygon. The center is calculated as the center of the solid figure formed by the points of the polygon

Parameters:

Name Type Description Default polygon ndarray

A 2-dimensional numpy ndarray representing the vertices of the polygon.

required

Returns:

Name Type Description Point Point

The center of the polygon, represented as a Point object with x and y attributes.

Examples:

import numpy as np\nimport supervision as sv\n\npolygon = np.array([[0, 0], [0, 2], [2, 2], [2, 0]])\nsv.get_polygon_center(polygon=polygon)\n# Point(x=1, y=1)\n
Source code in supervision/geometry/utils.py
def get_polygon_center(polygon: np.ndarray) -> Point:\n    \"\"\"\n    Calculate the center of a polygon. The center is calculated as the center\n    of the solid figure formed by the points of the polygon\n\n    Parameters:\n        polygon (np.ndarray): A 2-dimensional numpy ndarray representing the\n            vertices of the polygon.\n\n    Returns:\n        Point: The center of the polygon, represented as a\n            Point object with x and y attributes.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        polygon = np.array([[0, 0], [0, 2], [2, 2], [2, 0]])\n        sv.get_polygon_center(polygon=polygon)\n        # Point(x=1, y=1)\n        ```\n    \"\"\"\n\n    # This is one of the 3 candidate algorithms considered for centroid calculation.\n    # For a more detailed discussion, see PR #1084 and commit eb33176\n\n    shift_polygon = np.roll(polygon, -1, axis=0)\n    signed_areas = np.cross(polygon, shift_polygon) / 2\n    if signed_areas.sum() == 0:\n        center = np.mean(polygon, axis=0).round()\n        return Point(x=center[0], y=center[1])\n    centroids = (polygon + shift_polygon) / 3.0\n    center = np.average(centroids, axis=0, weights=signed_areas).round()\n\n    return Point(x=center[0], y=center[1])\n
Position

Bases: Enum

Enum representing the position of an anchor point.

Source code in supervision/geometry/core.py
class Position(Enum):\n    \"\"\"\n    Enum representing the position of an anchor point.\n    \"\"\"\n\n    CENTER = \"CENTER\"\n    CENTER_LEFT = \"CENTER_LEFT\"\n    CENTER_RIGHT = \"CENTER_RIGHT\"\n    TOP_CENTER = \"TOP_CENTER\"\n    TOP_LEFT = \"TOP_LEFT\"\n    TOP_RIGHT = \"TOP_RIGHT\"\n    BOTTOM_LEFT = \"BOTTOM_LEFT\"\n    BOTTOM_CENTER = \"BOTTOM_CENTER\"\n    BOTTOM_RIGHT = \"BOTTOM_RIGHT\"\n    CENTER_OF_MASS = \"CENTER_OF_MASS\"\n\n    @classmethod\n    def list(cls):\n        return list(map(lambda c: c.value, cls))\n
"},{"location":"utils/image/","title":"Image Utils","text":"crop_image

Crops the given image based on the given bounding box.

Parameters:

Name Type Description Default image ImageType

The image to be cropped. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required xyxy Union[ndarray, List[int], Tuple[int, int, int, int]]

A bounding box coordinates in the format (x_min, y_min, x_max, y_max), accepted as either a numpy.ndarray, a list, or a tuple.

required

Returns:

Type Description ImageType

The cropped image. The type is determined by the input type and may be either a numpy.ndarray or PIL.Image.Image.

OpenCVPillow
import cv2\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage.shape\n# (1080, 1920, 3)\n\nxyxy = [200, 400, 600, 800]\ncropped_image = sv.crop_image(image=image, xyxy=xyxy)\ncropped_image.shape\n# (400, 400, 3)\n
from PIL import Image\nimport supervision as sv\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\nimage.size\n# (1920, 1080)\n\nxyxy = [200, 400, 600, 800]\ncropped_image = sv.crop_image(image=image, xyxy=xyxy)\ncropped_image.size\n# (400, 400)\n

Source code in supervision/utils/image.py
@convert_for_image_processing\ndef crop_image(\n    image: ImageType,\n    xyxy: Union[npt.NDArray[int], List[int], Tuple[int, int, int, int]],\n) -> ImageType:\n    \"\"\"\n    Crops the given image based on the given bounding box.\n\n    Args:\n        image (ImageType): The image to be cropped. `ImageType` is a flexible type,\n            accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        xyxy (Union[np.ndarray, List[int], Tuple[int, int, int, int]]): A bounding box\n            coordinates in the format `(x_min, y_min, x_max, y_max)`, accepted as either\n            a `numpy.ndarray`, a `list`, or a `tuple`.\n\n    Returns:\n        (ImageType): The cropped image. The type is determined by the input type and\n            may be either a `numpy.ndarray` or `PIL.Image.Image`.\n\n    === \"OpenCV\"\n\n        ```python\n        import cv2\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image.shape\n        # (1080, 1920, 3)\n\n        xyxy = [200, 400, 600, 800]\n        cropped_image = sv.crop_image(image=image, xyxy=xyxy)\n        cropped_image.shape\n        # (400, 400, 3)\n        ```\n\n    === \"Pillow\"\n\n        ```python\n        from PIL import Image\n        import supervision as sv\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        image.size\n        # (1920, 1080)\n\n        xyxy = [200, 400, 600, 800]\n        cropped_image = sv.crop_image(image=image, xyxy=xyxy)\n        cropped_image.size\n        # (400, 400)\n        ```\n\n    ![crop_image](https://media.roboflow.com/supervision-docs/crop-image.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n\n    if isinstance(xyxy, (list, tuple)):\n        xyxy = np.array(xyxy)\n    xyxy = np.round(xyxy).astype(int)\n    x_min, y_min, x_max, y_max = xyxy.flatten()\n    return image[y_min:y_max, x_min:x_max]\n
scale_image

Scales the given image based on the given scale factor.

Parameters:

Name Type Description Default image ImageType

The image to be scaled. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required scale_factor float

The factor by which the image will be scaled. Scale factor > 1.0 zooms in, < 1.0 zooms out.

required

Returns:

Type Description ImageType

The scaled image. The type is determined by the input type and may be either a numpy.ndarray or PIL.Image.Image.

Raises:

Type Description ValueError

If the scale factor is non-positive.

OpenCVPillow
import cv2\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage.shape\n# (1080, 1920, 3)\n\nscaled_image = sv.scale_image(image=image, scale_factor=0.5)\nscaled_image.shape\n# (540, 960, 3)\n
from PIL import Image\nimport supervision as sv\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\nimage.size\n# (1920, 1080)\n\nscaled_image = sv.scale_image(image=image, scale_factor=0.5)\nscaled_image.size\n# (960, 540)\n
Source code in supervision/utils/image.py
@convert_for_image_processing\ndef scale_image(image: ImageType, scale_factor: float) -> ImageType:\n    \"\"\"\n    Scales the given image based on the given scale factor.\n\n    Args:\n        image (ImageType): The image to be scaled. `ImageType` is a flexible type,\n            accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        scale_factor (float): The factor by which the image will be scaled. Scale\n            factor > `1.0` zooms in, < `1.0` zooms out.\n\n    Returns:\n        (ImageType): The scaled image. The type is determined by the input type and\n            may be either a `numpy.ndarray` or `PIL.Image.Image`.\n\n    Raises:\n        ValueError: If the scale factor is non-positive.\n\n    === \"OpenCV\"\n\n        ```python\n        import cv2\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image.shape\n        # (1080, 1920, 3)\n\n        scaled_image = sv.scale_image(image=image, scale_factor=0.5)\n        scaled_image.shape\n        # (540, 960, 3)\n        ```\n\n    === \"Pillow\"\n\n        ```python\n        from PIL import Image\n        import supervision as sv\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        image.size\n        # (1920, 1080)\n\n        scaled_image = sv.scale_image(image=image, scale_factor=0.5)\n        scaled_image.size\n        # (960, 540)\n        ```\n    \"\"\"\n    if scale_factor <= 0:\n        raise ValueError(\"Scale factor must be positive.\")\n\n    width_old, height_old = image.shape[1], image.shape[0]\n    width_new = int(width_old * scale_factor)\n    height_new = int(height_old * scale_factor)\n    return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)\n
resize_image

Resizes the given image to a specified resolution. Can maintain the original aspect ratio or resize directly to the desired dimensions.

Parameters:

Name Type Description Default image ImageType

The image to be resized. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required resolution_wh Tuple[int, int]

The target resolution as (width, height).

required keep_aspect_ratio bool

Flag to maintain the image's original aspect ratio. Defaults to False.

False

Returns:

Type Description ImageType

The resized image. The type is determined by the input type and may be either a numpy.ndarray or PIL.Image.Image.

OpenCVPillow
import cv2\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage.shape\n# (1080, 1920, 3)\n\nresized_image = sv.resize_image(\n    image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True\n)\nresized_image.shape\n# (562, 1000, 3)\n
from PIL import Image\nimport supervision as sv\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\nimage.size\n# (1920, 1080)\n\nresized_image = sv.resize_image(\n    image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True\n)\nresized_image.size\n# (1000, 562)\n

Source code in supervision/utils/image.py
@convert_for_image_processing\ndef resize_image(\n    image: ImageType,\n    resolution_wh: Tuple[int, int],\n    keep_aspect_ratio: bool = False,\n) -> ImageType:\n    \"\"\"\n    Resizes the given image to a specified resolution. Can maintain the original aspect\n    ratio or resize directly to the desired dimensions.\n\n    Args:\n        image (ImageType): The image to be resized. `ImageType` is a flexible type,\n            accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        resolution_wh (Tuple[int, int]): The target resolution as\n            `(width, height)`.\n        keep_aspect_ratio (bool, optional): Flag to maintain the image's original\n            aspect ratio. Defaults to `False`.\n\n    Returns:\n        (ImageType): The resized image. The type is determined by the input type and\n            may be either a `numpy.ndarray` or `PIL.Image.Image`.\n\n    === \"OpenCV\"\n\n        ```python\n        import cv2\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image.shape\n        # (1080, 1920, 3)\n\n        resized_image = sv.resize_image(\n            image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True\n        )\n        resized_image.shape\n        # (562, 1000, 3)\n        ```\n\n    === \"Pillow\"\n\n        ```python\n        from PIL import Image\n        import supervision as sv\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        image.size\n        # (1920, 1080)\n\n        resized_image = sv.resize_image(\n            image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True\n        )\n        resized_image.size\n        # (1000, 562)\n        ```\n\n    ![resize_image](https://media.roboflow.com/supervision-docs/resize-image.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    if keep_aspect_ratio:\n        image_ratio = image.shape[1] / image.shape[0]\n        target_ratio = resolution_wh[0] / resolution_wh[1]\n        if image_ratio >= target_ratio:\n            width_new = resolution_wh[0]\n            height_new = int(resolution_wh[0] / image_ratio)\n        else:\n            height_new = resolution_wh[1]\n            width_new = int(resolution_wh[1] * image_ratio)\n    else:\n        width_new, height_new = resolution_wh\n\n    return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)\n
letterbox_image

Resizes and pads an image to a specified resolution with a given color, maintaining the original aspect ratio.

Parameters:

Name Type Description Default image ImageType

The image to be resized. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required resolution_wh Tuple[int, int]

The target resolution as (width, height).

required color Union[Tuple[int, int, int], Color]

The color to pad with. If tuple provided it should be in BGR format.

BLACK

Returns:

Type Description ImageType

The resized image. The type is determined by the input type and may be either a numpy.ndarray or PIL.Image.Image.

OpenCVPillow
import cv2\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage.shape\n# (1080, 1920, 3)\n\nletterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))\nletterboxed_image.shape\n# (1000, 1000, 3)\n
from PIL import Image\nimport supervision as sv\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\nimage.size\n# (1920, 1080)\n\nletterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))\nletterboxed_image.size\n# (1000, 1000)\n

Source code in supervision/utils/image.py
@convert_for_image_processing\ndef letterbox_image(\n    image: ImageType,\n    resolution_wh: Tuple[int, int],\n    color: Union[Tuple[int, int, int], Color] = Color.BLACK,\n) -> ImageType:\n    \"\"\"\n    Resizes and pads an image to a specified resolution with a given color, maintaining\n    the original aspect ratio.\n\n    Args:\n        image (ImageType): The image to be resized. `ImageType` is a flexible type,\n            accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        resolution_wh (Tuple[int, int]): The target resolution as\n            `(width, height)`.\n        color (Union[Tuple[int, int, int], Color]): The color to pad with. If tuple\n            provided it should be in BGR format.\n\n    Returns:\n        (ImageType): The resized image. The type is determined by the input type and\n            may be either a `numpy.ndarray` or `PIL.Image.Image`.\n\n    === \"OpenCV\"\n\n        ```python\n        import cv2\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image.shape\n        # (1080, 1920, 3)\n\n        letterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))\n        letterboxed_image.shape\n        # (1000, 1000, 3)\n        ```\n\n    === \"Pillow\"\n\n        ```python\n        from PIL import Image\n        import supervision as sv\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        image.size\n        # (1920, 1080)\n\n        letterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))\n        letterboxed_image.size\n        # (1000, 1000)\n        ```\n\n    ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    color = unify_to_bgr(color=color)\n    resized_image = resize_image(\n        image=image, resolution_wh=resolution_wh, keep_aspect_ratio=True\n    )\n    height_new, width_new = resized_image.shape[:2]\n    padding_top = (resolution_wh[1] - height_new) // 2\n    padding_bottom = resolution_wh[1] - height_new - padding_top\n    padding_left = (resolution_wh[0] - width_new) // 2\n    padding_right = resolution_wh[0] - width_new - padding_left\n    return cv2.copyMakeBorder(\n        resized_image,\n        padding_top,\n        padding_bottom,\n        padding_left,\n        padding_right,\n        cv2.BORDER_CONSTANT,\n        value=color,\n    )\n
overlay_image

Places an image onto a scene at a given anchor point, handling cases where the image's position is partially or completely outside the scene's bounds.

Parameters:

Name Type Description Default image ndarray

The background scene onto which the image is placed.

required overlay ndarray

The image to be placed onto the scene.

required anchor Tuple[int, int]

The (x, y) coordinates in the scene where the top-left corner of the image will be placed.

required

Returns:

Type Description ndarray

The result image with overlay.

Examples:

import cv2\nimport numpy as np\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\noverlay = np.zeros((400, 400, 3), dtype=np.uint8)\nresult_image = sv.overlay_image(image=image, overlay=overlay, anchor=(200, 400))\n

Source code in supervision/utils/image.py
def overlay_image(\n    image: npt.NDArray[np.uint8],\n    overlay: npt.NDArray[np.uint8],\n    anchor: Tuple[int, int],\n) -> npt.NDArray[np.uint8]:\n    \"\"\"\n    Places an image onto a scene at a given anchor point, handling cases where\n    the image's position is partially or completely outside the scene's bounds.\n\n    Args:\n        image (np.ndarray): The background scene onto which the image is placed.\n        overlay (np.ndarray): The image to be placed onto the scene.\n        anchor (Tuple[int, int]): The `(x, y)` coordinates in the scene where the\n            top-left corner of the image will be placed.\n\n    Returns:\n        (np.ndarray): The result image with overlay.\n\n    Examples:\n        ```python\n        import cv2\n        import numpy as np\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        overlay = np.zeros((400, 400, 3), dtype=np.uint8)\n        result_image = sv.overlay_image(image=image, overlay=overlay, anchor=(200, 400))\n        ```\n\n    ![overlay_image](https://media.roboflow.com/supervision-docs/overlay-image.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    scene_height, scene_width = image.shape[:2]\n    image_height, image_width = overlay.shape[:2]\n    anchor_x, anchor_y = anchor\n\n    is_out_horizontally = anchor_x + image_width <= 0 or anchor_x >= scene_width\n    is_out_vertically = anchor_y + image_height <= 0 or anchor_y >= scene_height\n\n    if is_out_horizontally or is_out_vertically:\n        return image\n\n    x_min = max(anchor_x, 0)\n    y_min = max(anchor_y, 0)\n    x_max = min(scene_width, anchor_x + image_width)\n    y_max = min(scene_height, anchor_y + image_height)\n\n    crop_x_min = max(-anchor_x, 0)\n    crop_y_min = max(-anchor_y, 0)\n    crop_x_max = image_width - max((anchor_x + image_width) - scene_width, 0)\n    crop_y_max = image_height - max((anchor_y + image_height) - scene_height, 0)\n\n    image[y_min:y_max, x_min:x_max] = overlay[\n        crop_y_min:crop_y_max, crop_x_min:crop_x_max\n    ]\n\n    return image\n
ImageSink Source code in supervision/utils/image.py
class ImageSink:\n    def __init__(\n        self,\n        target_dir_path: str,\n        overwrite: bool = False,\n        image_name_pattern: str = \"image_{:05d}.png\",\n    ):\n        \"\"\"\n        Initialize a context manager for saving images.\n\n        Args:\n            target_dir_path (str): The target directory where images will be saved.\n            overwrite (bool, optional): Whether to overwrite the existing directory.\n                Defaults to False.\n            image_name_pattern (str, optional): The image file name pattern.\n                Defaults to \"image_{:05d}.png\".\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>, stride=2)\n\n            with sv.ImageSink(target_dir_path=<TARGET_CROPS_DIRECTORY>) as sink:\n                for image in frames_generator:\n                    sink.save_image(image=image)\n            ```\n        \"\"\"  # noqa E501 // docs\n\n        self.target_dir_path = target_dir_path\n        self.overwrite = overwrite\n        self.image_name_pattern = image_name_pattern\n        self.image_count = 0\n\n    def __enter__(self):\n        if os.path.exists(self.target_dir_path):\n            if self.overwrite:\n                shutil.rmtree(self.target_dir_path)\n                os.makedirs(self.target_dir_path)\n        else:\n            os.makedirs(self.target_dir_path)\n\n        return self\n\n    def save_image(self, image: np.ndarray, image_name: Optional[str] = None):\n        \"\"\"\n        Save a given image in the target directory.\n\n        Args:\n            image (np.ndarray): The image to be saved. The image must be in BGR color\n                format.\n            image_name (str, optional): The name to use for the saved image.\n                If not provided, a name will be\n                generated using the `image_name_pattern`.\n        \"\"\"\n        if image_name is None:\n            image_name = self.image_name_pattern.format(self.image_count)\n\n        image_path = os.path.join(self.target_dir_path, image_name)\n        cv2.imwrite(image_path, image)\n        self.image_count += 1\n\n    def __exit__(self, exc_type, exc_value, exc_traceback):\n        pass\n
"},{"location":"utils/image/#supervision.utils.image.ImageSink-functions","title":"Functions","text":""},{"location":"utils/image/#supervision.utils.image.ImageSink.__init__","title":"__init__(target_dir_path, overwrite=False, image_name_pattern='image_{:05d}.png')","text":"

Initialize a context manager for saving images.

Parameters:

Name Type Description Default target_dir_path str

The target directory where images will be saved.

required overwrite bool

Whether to overwrite the existing directory. Defaults to False.

False image_name_pattern str

The image file name pattern. Defaults to \"image_{:05d}.png\".

'image_{:05d}.png'

Examples:

import supervision as sv\n\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>, stride=2)\n\nwith sv.ImageSink(target_dir_path=<TARGET_CROPS_DIRECTORY>) as sink:\n    for image in frames_generator:\n        sink.save_image(image=image)\n
Source code in supervision/utils/image.py
def __init__(\n    self,\n    target_dir_path: str,\n    overwrite: bool = False,\n    image_name_pattern: str = \"image_{:05d}.png\",\n):\n    \"\"\"\n    Initialize a context manager for saving images.\n\n    Args:\n        target_dir_path (str): The target directory where images will be saved.\n        overwrite (bool, optional): Whether to overwrite the existing directory.\n            Defaults to False.\n        image_name_pattern (str, optional): The image file name pattern.\n            Defaults to \"image_{:05d}.png\".\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>, stride=2)\n\n        with sv.ImageSink(target_dir_path=<TARGET_CROPS_DIRECTORY>) as sink:\n            for image in frames_generator:\n                sink.save_image(image=image)\n        ```\n    \"\"\"  # noqa E501 // docs\n\n    self.target_dir_path = target_dir_path\n    self.overwrite = overwrite\n    self.image_name_pattern = image_name_pattern\n    self.image_count = 0\n
"},{"location":"utils/image/#supervision.utils.image.ImageSink.save_image","title":"save_image(image, image_name=None)","text":"

Save a given image in the target directory.

Parameters:

Name Type Description Default image ndarray

The image to be saved. The image must be in BGR color format.

required image_name str

The name to use for the saved image. If not provided, a name will be generated using the image_name_pattern.

None Source code in supervision/utils/image.py
def save_image(self, image: np.ndarray, image_name: Optional[str] = None):\n    \"\"\"\n    Save a given image in the target directory.\n\n    Args:\n        image (np.ndarray): The image to be saved. The image must be in BGR color\n            format.\n        image_name (str, optional): The name to use for the saved image.\n            If not provided, a name will be\n            generated using the `image_name_pattern`.\n    \"\"\"\n    if image_name is None:\n        image_name = self.image_name_pattern.format(self.image_count)\n\n    image_path = os.path.join(self.target_dir_path, image_name)\n    cv2.imwrite(image_path, image)\n    self.image_count += 1\n
"},{"location":"utils/iterables/","title":"Iterables Utils","text":"create_batches

Provides a generator that yields chunks of the input sequence of the size specified by the batch_size parameter. The last chunk may be a smaller batch.

Parameters:

Name Type Description Default sequence Iterable[V]

The sequence to be split into batches.

required batch_size int

The expected size of a batch.

required

Returns:

Type Description Generator[List[V], None, None]

A generator that yields chunks of sequence of size batch_size, up to the length of the input sequence.

Examples:

list(create_batches([1, 2, 3, 4, 5], 2))\n# [[1, 2], [3, 4], [5]]\n\nlist(create_batches(\"abcde\", 3))\n# [['a', 'b', 'c'], ['d', 'e']]\n
Source code in supervision/utils/iterables.py
def create_batches(\n    sequence: Iterable[V], batch_size: int\n) -> Generator[List[V], None, None]:\n    \"\"\"\n    Provides a generator that yields chunks of the input sequence\n    of the size specified by the `batch_size` parameter. The last\n    chunk may be a smaller batch.\n\n    Args:\n        sequence (Iterable[V]): The sequence to be split into batches.\n        batch_size (int): The expected size of a batch.\n\n    Returns:\n        (Generator[List[V], None, None]): A generator that yields chunks\n            of `sequence` of size `batch_size`, up to the length of\n            the input `sequence`.\n\n    Examples:\n        ```python\n        list(create_batches([1, 2, 3, 4, 5], 2))\n        # [[1, 2], [3, 4], [5]]\n\n        list(create_batches(\"abcde\", 3))\n        # [['a', 'b', 'c'], ['d', 'e']]\n        ```\n    \"\"\"\n    batch_size = max(batch_size, 1)\n    current_batch = []\n    for element in sequence:\n        if len(current_batch) == batch_size:\n            yield current_batch\n            current_batch = []\n        current_batch.append(element)\n    if current_batch:\n        yield current_batch\n
fill

Fill the sequence with padding elements until the sequence reaches the desired size.

Parameters:

Name Type Description Default sequence List[V]

The input sequence.

required desired_size int

The expected size of the output list. The difference between this value and the actual length of sequence (if positive) dictates how many elements will be added as padding.

required content V

The element to be placed at the end of the input sequence as padding.

required

Returns:

Type Description List[V]

A padded version of the input sequence (if needed).

Examples:

fill([1, 2], 4, 0)\n# [1, 2, 0, 0]\n\nfill(['a', 'b'], 3, 'c')\n# ['a', 'b', 'c']\n
Source code in supervision/utils/iterables.py
def fill(sequence: List[V], desired_size: int, content: V) -> List[V]:\n    \"\"\"\n    Fill the sequence with padding elements until the sequence reaches\n    the desired size.\n\n    Args:\n        sequence (List[V]): The input sequence.\n        desired_size (int): The expected size of the output list. The\n            difference between this value and the actual length of `sequence`\n            (if positive) dictates how many elements will be added as padding.\n        content (V): The element to be placed at the end of the input\n            `sequence` as padding.\n\n    Returns:\n        (List[V]): A padded version of the input `sequence` (if needed).\n\n    Examples:\n        ```python\n        fill([1, 2], 4, 0)\n        # [1, 2, 0, 0]\n\n        fill(['a', 'b'], 3, 'c')\n        # ['a', 'b', 'c']\n        ```\n    \"\"\"\n    missing_size = max(0, desired_size - len(sequence))\n    sequence.extend([content] * missing_size)\n    return sequence\n
"},{"location":"utils/notebook/","title":"Notebooks Utils","text":"plot_image

Plots image using matplotlib.

Parameters:

Name Type Description Default image ImageType

The frame to be displayed ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required size Tuple[int, int]

The size of the plot.

(12, 12) cmap str

the colormap to use for single channel images.

'gray'

Examples:

import cv2\nimport supervision as sv\n\nimage = cv2.imread(\"path/to/image.jpg\")\n\n%matplotlib inline\nsv.plot_image(image=image, size=(16, 16))\n
Source code in supervision/utils/notebook.py
def plot_image(\n    image: ImageType, size: Tuple[int, int] = (12, 12), cmap: Optional[str] = \"gray\"\n) -> None:\n    \"\"\"\n    Plots image using matplotlib.\n\n    Args:\n        image (ImageType): The frame to be displayed ImageType\n             is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        size (Tuple[int, int]): The size of the plot.\n        cmap (str): the colormap to use for single channel images.\n\n    Examples:\n        ```python\n        import cv2\n        import supervision as sv\n\n        image = cv2.imread(\"path/to/image.jpg\")\n\n        %matplotlib inline\n        sv.plot_image(image=image, size=(16, 16))\n        ```\n    \"\"\"\n    if isinstance(image, Image.Image):\n        image = pillow_to_cv2(image)\n\n    plt.figure(figsize=size)\n\n    if image.ndim == 2:\n        plt.imshow(image, cmap=cmap)\n    else:\n        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\n    plt.axis(\"off\")\n    plt.show()\n
plot_images_grid

Plots images in a grid using matplotlib.

Parameters:

Name Type Description Default images List[ImageType]

A list of images as ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required grid_size Tuple[int, int]

A tuple specifying the number of rows and columns for the grid.

required titles Optional[List[str]]

A list of titles for each image. Defaults to None.

None size Tuple[int, int]

A tuple specifying the width and height of the entire plot in inches.

(12, 12) cmap str

the colormap to use for single channel images.

'gray'

Raises:

Type Description ValueError

If the number of images exceeds the grid size.

Examples:

import cv2\nimport supervision as sv\nfrom PIL import Image\n\nimage1 = cv2.imread(\"path/to/image1.jpg\")\nimage2 = Image.open(\"path/to/image2.jpg\")\nimage3 = cv2.imread(\"path/to/image3.jpg\")\n\nimages = [image1, image2, image3]\ntitles = [\"Image 1\", \"Image 2\", \"Image 3\"]\n\n%matplotlib inline\nplot_images_grid(images, grid_size=(2, 2), titles=titles, size=(16, 16))\n
Source code in supervision/utils/notebook.py
def plot_images_grid(\n    images: List[ImageType],\n    grid_size: Tuple[int, int],\n    titles: Optional[List[str]] = None,\n    size: Tuple[int, int] = (12, 12),\n    cmap: Optional[str] = \"gray\",\n) -> None:\n    \"\"\"\n    Plots images in a grid using matplotlib.\n\n    Args:\n       images (List[ImageType]): A list of images as ImageType\n             is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`.\n       grid_size (Tuple[int, int]): A tuple specifying the number\n            of rows and columns for the grid.\n       titles (Optional[List[str]]): A list of titles for each image.\n            Defaults to None.\n       size (Tuple[int, int]): A tuple specifying the width and\n            height of the entire plot in inches.\n       cmap (str): the colormap to use for single channel images.\n\n    Raises:\n       ValueError: If the number of images exceeds the grid size.\n\n    Examples:\n        ```python\n        import cv2\n        import supervision as sv\n        from PIL import Image\n\n        image1 = cv2.imread(\"path/to/image1.jpg\")\n        image2 = Image.open(\"path/to/image2.jpg\")\n        image3 = cv2.imread(\"path/to/image3.jpg\")\n\n        images = [image1, image2, image3]\n        titles = [\"Image 1\", \"Image 2\", \"Image 3\"]\n\n        %matplotlib inline\n        plot_images_grid(images, grid_size=(2, 2), titles=titles, size=(16, 16))\n        ```\n    \"\"\"\n    nrows, ncols = grid_size\n\n    for idx, img in enumerate(images):\n        if isinstance(img, Image.Image):\n            images[idx] = pillow_to_cv2(img)\n\n    if len(images) > nrows * ncols:\n        raise ValueError(\n            \"The number of images exceeds the grid size. Please increase the grid size\"\n            \" or reduce the number of images.\"\n        )\n\n    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=size)\n\n    for idx, ax in enumerate(axes.flat):\n        if idx < len(images):\n            if images[idx].ndim == 2:\n                ax.imshow(images[idx], cmap=cmap)\n            else:\n                ax.imshow(cv2.cvtColor(images[idx], cv2.COLOR_BGR2RGB))\n\n            if titles is not None and idx < len(titles):\n                ax.set_title(titles[idx])\n\n        ax.axis(\"off\")\n    plt.show()\n
"},{"location":"utils/video/","title":"Video Utils","text":"VideoInfo

A class to store video information, including width, height, fps and total number of frames.

Attributes:

Name Type Description width int

width of the video in pixels

height int

height of the video in pixels

fps int

frames per second of the video

total_frames int

total number of frames in the video, default is None

Examples:

import supervision as sv\n\nvideo_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_VIDEO_FILE>)\n\nvideo_info\n# VideoInfo(width=3840, height=2160, fps=25, total_frames=538)\n\nvideo_info.resolution_wh\n# (3840, 2160)\n
Source code in supervision/utils/video.py
@dataclass\nclass VideoInfo:\n    \"\"\"\n    A class to store video information, including width, height, fps and\n        total number of frames.\n\n    Attributes:\n        width (int): width of the video in pixels\n        height (int): height of the video in pixels\n        fps (int): frames per second of the video\n        total_frames (int, optional): total number of frames in the video,\n            default is None\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        video_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_VIDEO_FILE>)\n\n        video_info\n        # VideoInfo(width=3840, height=2160, fps=25, total_frames=538)\n\n        video_info.resolution_wh\n        # (3840, 2160)\n        ```\n    \"\"\"\n\n    width: int\n    height: int\n    fps: int\n    total_frames: Optional[int] = None\n\n    @classmethod\n    def from_video_path(cls, video_path: str) -> VideoInfo:\n        video = cv2.VideoCapture(video_path)\n        if not video.isOpened():\n            raise Exception(f\"Could not open video at {video_path}\")\n\n        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))\n        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))\n        fps = int(video.get(cv2.CAP_PROP_FPS))\n        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))\n        video.release()\n        return VideoInfo(width, height, fps, total_frames)\n\n    @property\n    def resolution_wh(self) -> Tuple[int, int]:\n        return self.width, self.height\n
VideoSink

Context manager that saves video frames to a file using OpenCV.

Attributes:

Name Type Description target_path str

The path to the output file where the video will be saved.

video_info VideoInfo

Information about the video resolution, fps, and total frame count.

codec str

FOURCC code for video format

Example
import supervision as sv\n\nvideo_info = sv.VideoInfo.from_video_path(<SOURCE_VIDEO_PATH>)\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.VideoSink(target_path=<TARGET_VIDEO_PATH>, video_info=video_info) as sink:\n    for frame in frames_generator:\n        sink.write_frame(frame=frame)\n
Source code in supervision/utils/video.py
class VideoSink:\n    \"\"\"\n    Context manager that saves video frames to a file using OpenCV.\n\n    Attributes:\n        target_path (str): The path to the output file where the video will be saved.\n        video_info (VideoInfo): Information about the video resolution, fps,\n            and total frame count.\n        codec (str): FOURCC code for video format\n\n    Example:\n        ```python\n        import supervision as sv\n\n        video_info = sv.VideoInfo.from_video_path(<SOURCE_VIDEO_PATH>)\n        frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\n        with sv.VideoSink(target_path=<TARGET_VIDEO_PATH>, video_info=video_info) as sink:\n            for frame in frames_generator:\n                sink.write_frame(frame=frame)\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    def __init__(self, target_path: str, video_info: VideoInfo, codec: str = \"mp4v\"):\n        self.target_path = target_path\n        self.video_info = video_info\n        self.__codec = codec\n        self.__writer = None\n\n    def __enter__(self):\n        try:\n            self.__fourcc = cv2.VideoWriter_fourcc(*self.__codec)\n        except TypeError as e:\n            print(str(e) + \". Defaulting to mp4v...\")\n            self.__fourcc = cv2.VideoWriter_fourcc(*\"mp4v\")\n        self.__writer = cv2.VideoWriter(\n            self.target_path,\n            self.__fourcc,\n            self.video_info.fps,\n            self.video_info.resolution_wh,\n        )\n        return self\n\n    def write_frame(self, frame: np.ndarray):\n        \"\"\"\n        Writes a single video frame to the target video file.\n\n        Args:\n            frame (np.ndarray): The video frame to be written to the file. The frame\n                must be in BGR color format.\n        \"\"\"\n        self.__writer.write(frame)\n\n    def __exit__(self, exc_type, exc_value, exc_traceback):\n        self.__writer.release()\n
FPSMonitor

A class for monitoring frames per second (FPS) to benchmark latency.

Source code in supervision/utils/video.py
class FPSMonitor:\n    \"\"\"\n    A class for monitoring frames per second (FPS) to benchmark latency.\n    \"\"\"\n\n    def __init__(self, sample_size: int = 30):\n        \"\"\"\n        Args:\n            sample_size (int): The maximum number of observations for latency\n                benchmarking.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            frames_generator = sv.get_video_frames_generator(source_path=<SOURCE_FILE_PATH>)\n            fps_monitor = sv.FPSMonitor()\n\n            for frame in frames_generator:\n                # your processing code here\n                fps_monitor.tick()\n                fps = fps_monitor.fps\n            ```\n        \"\"\"  # noqa: E501 // docs\n        self.all_timestamps = deque(maxlen=sample_size)\n\n    @deprecated(\n        \"`FPSMonitor.__call__` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `FPSMonitor.fps` instead.\"\n    )\n    def __call__(self) -> float:\n        \"\"\"\n        !!! failure \"Deprecated\"\n\n            `FPSMonitor.__call__` is deprecated and will be removed in\n            `supervision-0.22.0`. Use `FPSMonitor.fps` instead.\n\n        Computes and returns the average FPS based on the stored time stamps.\n\n        Returns:\n            float: The average FPS. Returns 0.0 if no time stamps are stored.\n        \"\"\"\n        return self.fps\n\n    @property\n    def fps(self) -> float:\n        \"\"\"\n        Computes and returns the average FPS based on the stored time stamps.\n\n        Returns:\n            float: The average FPS. Returns 0.0 if no time stamps are stored.\n        \"\"\"\n        if not self.all_timestamps:\n            return 0.0\n        taken_time = self.all_timestamps[-1] - self.all_timestamps[0]\n        return (len(self.all_timestamps)) / taken_time if taken_time != 0 else 0.0\n\n    def tick(self) -> None:\n        \"\"\"\n        Adds a new time stamp to the deque for FPS calculation.\n        \"\"\"\n        self.all_timestamps.append(time.monotonic())\n\n    def reset(self) -> None:\n        \"\"\"\n        Clears all the time stamps from the deque.\n        \"\"\"\n        self.all_timestamps.clear()\n
get_video_frames_generator

Get a generator that yields the frames of the video.

Parameters:

Name Type Description Default source_path str

The path of the video file.

required stride int

Indicates the interval at which frames are returned, skipping stride - 1 frames between each.

1 start int

Indicates the starting position from which video should generate frames

0 end Optional[int]

Indicates the ending position at which video should stop generating frames. If None, video will be read to the end.

None

Returns:

Type Description Generator[ndarray, None, None]

A generator that yields the frames of the video.

Examples:

import supervision as sv\n\nfor frame in sv.get_video_frames_generator(source_path=<SOURCE_VIDEO_PATH>):\n    ...\n
Source code in supervision/utils/video.py
def get_video_frames_generator(\n    source_path: str, stride: int = 1, start: int = 0, end: Optional[int] = None\n) -> Generator[np.ndarray, None, None]:\n    \"\"\"\n    Get a generator that yields the frames of the video.\n\n    Args:\n        source_path (str): The path of the video file.\n        stride (int): Indicates the interval at which frames are returned,\n            skipping stride - 1 frames between each.\n        start (int): Indicates the starting position from which\n            video should generate frames\n        end (Optional[int]): Indicates the ending position at which video\n            should stop generating frames. If None, video will be read to the end.\n\n    Returns:\n        (Generator[np.ndarray, None, None]): A generator that yields the\n            frames of the video.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        for frame in sv.get_video_frames_generator(source_path=<SOURCE_VIDEO_PATH>):\n            ...\n        ```\n    \"\"\"\n    video, start, end = _validate_and_setup_video(source_path, start, end)\n    frame_position = start\n    while True:\n        success, frame = video.read()\n        if not success or frame_position >= end:\n            break\n        yield frame\n        for _ in range(stride - 1):\n            success = video.grab()\n            if not success:\n                break\n        frame_position += stride\n    video.release()\n
process_video

Process a video file by applying a callback function on each frame and saving the result to a target video file.

Parameters:

Name Type Description Default source_path str

The path to the source video file.

required target_path str

The path to the target video file.

required callback Callable[[ndarray, int], ndarray]

A function that takes in a numpy ndarray representation of a video frame and an int index of the frame and returns a processed numpy ndarray representation of the frame.

required

Examples:

import supervision as sv\n\ndef callback(scene: np.ndarray, index: int) -> np.ndarray:\n    ...\n\nprocess_video(\n    source_path=<SOURCE_VIDEO_PATH>,\n    target_path=<TARGET_VIDEO_PATH>,\n    callback=callback\n)\n
Source code in supervision/utils/video.py
def process_video(\n    source_path: str,\n    target_path: str,\n    callback: Callable[[np.ndarray, int], np.ndarray],\n) -> None:\n    \"\"\"\n    Process a video file by applying a callback function on each frame\n        and saving the result to a target video file.\n\n    Args:\n        source_path (str): The path to the source video file.\n        target_path (str): The path to the target video file.\n        callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in\n            a numpy ndarray representation of a video frame and an\n            int index of the frame and returns a processed numpy ndarray\n            representation of the frame.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        def callback(scene: np.ndarray, index: int) -> np.ndarray:\n            ...\n\n        process_video(\n            source_path=<SOURCE_VIDEO_PATH>,\n            target_path=<TARGET_VIDEO_PATH>,\n            callback=callback\n        )\n        ```\n    \"\"\"\n    source_video_info = VideoInfo.from_video_path(video_path=source_path)\n    with VideoSink(target_path=target_path, video_info=source_video_info) as sink:\n        for index, frame in enumerate(\n            get_video_frames_generator(source_path=source_path)\n        ):\n            result_frame = callback(frame, index)\n            sink.write_frame(frame=result_frame)\n
"},{"location":"utils/video/#supervision.utils.video.VideoSink-functions","title":"Functions","text":""},{"location":"utils/video/#supervision.utils.video.VideoSink.write_frame","title":"write_frame(frame)","text":"

Writes a single video frame to the target video file.

Parameters:

Name Type Description Default frame ndarray

The video frame to be written to the file. The frame must be in BGR color format.

required Source code in supervision/utils/video.py
def write_frame(self, frame: np.ndarray):\n    \"\"\"\n    Writes a single video frame to the target video file.\n\n    Args:\n        frame (np.ndarray): The video frame to be written to the file. The frame\n            must be in BGR color format.\n    \"\"\"\n    self.__writer.write(frame)\n
"},{"location":"utils/video/#supervision.utils.video.FPSMonitor-attributes","title":"Attributes","text":""},{"location":"utils/video/#supervision.utils.video.FPSMonitor.fps","title":"fps: float property","text":"

Computes and returns the average FPS based on the stored time stamps.

Returns:

Name Type Description float float

The average FPS. Returns 0.0 if no time stamps are stored.

"},{"location":"utils/video/#supervision.utils.video.FPSMonitor-functions","title":"Functions","text":""},{"location":"utils/video/#supervision.utils.video.FPSMonitor.__call__","title":"__call__()","text":"

Deprecated

FPSMonitor.__call__ is deprecated and will be removed in supervision-0.22.0. Use FPSMonitor.fps instead.

Computes and returns the average FPS based on the stored time stamps.

Returns:

Name Type Description float float

The average FPS. Returns 0.0 if no time stamps are stored.

Source code in supervision/utils/video.py
@deprecated(\n    \"`FPSMonitor.__call__` is deprecated and will be removed in \"\n    \"`supervision-0.22.0`. Use `FPSMonitor.fps` instead.\"\n)\ndef __call__(self) -> float:\n    \"\"\"\n    !!! failure \"Deprecated\"\n\n        `FPSMonitor.__call__` is deprecated and will be removed in\n        `supervision-0.22.0`. Use `FPSMonitor.fps` instead.\n\n    Computes and returns the average FPS based on the stored time stamps.\n\n    Returns:\n        float: The average FPS. Returns 0.0 if no time stamps are stored.\n    \"\"\"\n    return self.fps\n
"},{"location":"utils/video/#supervision.utils.video.FPSMonitor.__init__","title":"__init__(sample_size=30)","text":"

Parameters:

Name Type Description Default sample_size int

The maximum number of observations for latency benchmarking.

30

Examples:

import supervision as sv\n\nframes_generator = sv.get_video_frames_generator(source_path=<SOURCE_FILE_PATH>)\nfps_monitor = sv.FPSMonitor()\n\nfor frame in frames_generator:\n    # your processing code here\n    fps_monitor.tick()\n    fps = fps_monitor.fps\n
Source code in supervision/utils/video.py
def __init__(self, sample_size: int = 30):\n    \"\"\"\n    Args:\n        sample_size (int): The maximum number of observations for latency\n            benchmarking.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        frames_generator = sv.get_video_frames_generator(source_path=<SOURCE_FILE_PATH>)\n        fps_monitor = sv.FPSMonitor()\n\n        for frame in frames_generator:\n            # your processing code here\n            fps_monitor.tick()\n            fps = fps_monitor.fps\n        ```\n    \"\"\"  # noqa: E501 // docs\n    self.all_timestamps = deque(maxlen=sample_size)\n
"},{"location":"utils/video/#supervision.utils.video.FPSMonitor.reset","title":"reset()","text":"

Clears all the time stamps from the deque.

Source code in supervision/utils/video.py
def reset(self) -> None:\n    \"\"\"\n    Clears all the time stamps from the deque.\n    \"\"\"\n    self.all_timestamps.clear()\n
"},{"location":"utils/video/#supervision.utils.video.FPSMonitor.tick","title":"tick()","text":"

Adds a new time stamp to the deque for FPS calculation.

Source code in supervision/utils/video.py
def tick(self) -> None:\n    \"\"\"\n    Adds a new time stamp to the deque for FPS calculation.\n    \"\"\"\n    self.all_timestamps.append(time.monotonic())\n
"}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Supervision","text":""},{"location":"#hello","title":"\ud83d\udc4b Hello","text":"

We write your reusable computer vision tools. Whether you need to load your dataset from your hard drive, draw detections on an image or video, or count how many detections are in a zone. You can count on us!

"},{"location":"#install","title":"\ud83d\udcbb Install","text":"

You can install supervision in a Python>=3.8 environment.

pip install (recommended)

headlessdesktop

The headless installation of supervision is designed for environments where graphical user interfaces (GUI) are not needed, making it more lightweight and suitable for server-side applications.

pip install supervision\n

If you require the full version of supervision with GUI support you can install the desktop version. This version includes the GUI components of OpenCV, allowing you to display images and videos on the screen.

pip install \"supervision[desktop]\"\n

conda/mamba install

condamamba

conda install -c conda-forge supervision\n

mamba install -c conda-forge supervision\n

git clone (for development)

virtualenvpoetry
# clone repository and navigate to root directory\ngit clone https://github.com/roboflow/supervision.git\ncd supervision\n\n# setup python environment and activate it\npython3 -m venv venv\nsource venv/bin/activate\npip install --upgrade pip\n\n# headless install\npip install -e \".\"\n\n# desktop install\npip install -e \".[desktop]\"\n
# clone repository and navigate to root directory\ngit clone https://github.com/roboflow/supervision.git\ncd supervision\n\n# setup python environment and activate it\npoetry env use python3.10\npoetry shell\n\n# headless install\npoetry install\n\n# desktop install\npoetry install --extras \"desktop\"\n
"},{"location":"#quickstart","title":"\ud83d\ude80 Quickstart","text":"
  • Detect and Annotate

    Annotate predictions from a range of object detection and segmentation models

    Tutorial

  • Track Objects

    Discover how to enhance video analysis by implementing seamless object tracking

    Tutorial

  • Detect Small Objects

    Learn how to detect small objects in images

    Tutorial

  • Count Objects Crossing Line

    Explore methods to accurately count and analyze objects crossing a predefined line

  • Filter Objects in Zone

    Master the techniques to selectively filter and focus on objects within a specific zone

"},{"location":"assets/","title":"Assets","text":"

Supervision offers an assets download utility that allows you to download video files that you can use in your demos.

"},{"location":"assets/#install-extra","title":"Install extradownload_assetsVideoAssets","text":"

To install the Supervision assets utility, you can use pip. This utility is available as an extra within the Supervision package.

pip install

pip install \"supervision[assets]\"\n

Download a specified asset if it doesn't already exist or is corrupted.

Parameters:

Name Type Description Default asset_name Union[VideoAssets, str]

The name or type of the asset to be downloaded.

required

Returns:

Name Type Description str str

The filename of the downloaded asset.

Example
from supervision.assets import download_assets, VideoAssets\n\ndownload_assets(VideoAssets.VEHICLES)\n\"vehicles.mp4\"\n
Source code in supervision/assets/downloader.py
def download_assets(asset_name: Union[VideoAssets, str]) -> str:\n    \"\"\"\n    Download a specified asset if it doesn't already exist or is corrupted.\n\n    Parameters:\n        asset_name (Union[VideoAssets, str]): The name or type of the asset to be\n            downloaded.\n\n    Returns:\n        str: The filename of the downloaded asset.\n\n    Example:\n        ```python\n        from supervision.assets import download_assets, VideoAssets\n\n        download_assets(VideoAssets.VEHICLES)\n        \"vehicles.mp4\"\n        ```\n    \"\"\"\n\n    filename = asset_name.value if isinstance(asset_name, VideoAssets) else asset_name\n\n    if not Path(filename).exists() and filename in VIDEO_ASSETS:\n        print(f\"Downloading {filename} assets \\n\")\n        response = get(VIDEO_ASSETS[filename][0], stream=True, allow_redirects=True)\n        response.raise_for_status()\n\n        file_size = int(response.headers.get(\"Content-Length\", 0))\n        folder_path = Path(filename).expanduser().resolve()\n        folder_path.parent.mkdir(parents=True, exist_ok=True)\n\n        with tqdm.wrapattr(\n            response.raw, \"read\", total=file_size, desc=\"\", colour=\"#a351fb\"\n        ) as raw_resp:\n            with folder_path.open(\"wb\") as file:\n                copyfileobj(raw_resp, file)\n\n    elif Path(filename).exists():\n        if not is_md5_hash_matching(filename, VIDEO_ASSETS[filename][1]):\n            print(\"File corrupted. Re-downloading... \\n\")\n            os.remove(filename)\n            return download_assets(filename)\n\n        print(f\"{filename} asset download complete. \\n\")\n\n    else:\n        valid_assets = \", \".join(asset.value for asset in VideoAssets)\n        raise ValueError(\n            f\"Invalid asset. It should be one of the following: {valid_assets}.\"\n        )\n\n    return filename\n

Bases: Enum

Each member of this enum represents a video asset. The value associated with each member is the filename of the video.

Enum Member Video Filename Video URL VEHICLES vehicles.mp4 Link MILK_BOTTLING_PLANT milk-bottling-plant.mp4 Link VEHICLES_2 vehicles-2.mp4 Link GROCERY_STORE grocery-store.mp4 Link SUBWAY subway.mp4 Link MARKET_SQUARE market-square.mp4 Link PEOPLE_WALKING people-walking.mp4 Link BEACH beach-1.mp4 Link BASKETBALL basketball-1.mp4 Link Source code in supervision/assets/list.py
class VideoAssets(Enum):\n    \"\"\"\n    Each member of this enum represents a video asset. The value associated with each\n    member is the filename of the video.\n\n    | Enum Member            | Video Filename             | Video URL                                                                             |\n    |------------------------|----------------------------|---------------------------------------------------------------------------------------|\n    | `VEHICLES`             | `vehicles.mp4`             | [Link](https://media.roboflow.com/supervision/video-examples/vehicles.mp4)            |\n    | `MILK_BOTTLING_PLANT`  | `milk-bottling-plant.mp4`  | [Link](https://media.roboflow.com/supervision/video-examples/milk-bottling-plant.mp4) |\n    | `VEHICLES_2`           | `vehicles-2.mp4`           | [Link](https://media.roboflow.com/supervision/video-examples/vehicles-2.mp4)          |\n    | `GROCERY_STORE`        | `grocery-store.mp4`        | [Link](https://media.roboflow.com/supervision/video-examples/grocery-store.mp4)       |\n    | `SUBWAY`               | `subway.mp4`               | [Link](https://media.roboflow.com/supervision/video-examples/subway.mp4)              |\n    | `MARKET_SQUARE`        | `market-square.mp4`        | [Link](https://media.roboflow.com/supervision/video-examples/market-square.mp4)       |\n    | `PEOPLE_WALKING`       | `people-walking.mp4`       | [Link](https://media.roboflow.com/supervision/video-examples/people-walking.mp4)      |\n    | `BEACH`                | `beach-1.mp4`              | [Link](https://media.roboflow.com/supervision/video-examples/beach-1.mp4)             |\n    | `BASKETBALL`           | `basketball-1.mp4`         | [Link](https://media.roboflow.com/supervision/video-examples/basketball-1.mp4)        |\n    \"\"\"  # noqa: E501 // docs\n\n    VEHICLES = \"vehicles.mp4\"\n    MILK_BOTTLING_PLANT = \"milk-bottling-plant.mp4\"\n    VEHICLES_2 = \"vehicles-2.mp4\"\n    GROCERY_STORE = \"grocery-store.mp4\"\n    SUBWAY = \"subway.mp4\"\n    MARKET_SQUARE = \"market-square.mp4\"\n    PEOPLE_WALKING = \"people-walking.mp4\"\n    BEACH = \"beach-1.mp4\"\n    BASKETBALL = \"basketball-1.mp4\"\n\n    @classmethod\n    def list(cls):\n        return list(map(lambda c: c.value, cls))\n
"},{"location":"changelog/","title":"Changelog","text":""},{"location":"changelog/#0210-jun-5-2024","title":"0.21.0 Jun 5, 2024","text":"
  • Added #500: sv.Detections.with_nmm to perform non-maximum merging on the current set of object detections.

  • Added #1221: sv.Detections.from_lmm allowing to parse Large Multimodal Model (LMM) text result into sv.Detections object. For now from_lmm supports only PaliGemma result parsing.

import supervision as sv\n\npaligemma_result = \"<loc0256><loc0256><loc0768><loc0768> cat\"\ndetections = sv.Detections.from_lmm(\n    sv.LMM.PALIGEMMA,\n    paligemma_result,\n    resolution_wh=(1000, 1000),\n    classes=['cat', 'dog']\n)\ndetections.xyxy\n# array([[250., 250., 750., 750.]])\n\ndetections.class_id\n# array([0])\n
  • Added #1236: sv.VertexLabelAnnotator allowing to annotate every vertex of a keypoint skeleton with custom text and color.
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nedge_annotator = sv.EdgeAnnotator(\n    color=sv.Color.GREEN,\n    thickness=5\n)\nannotated_frame = edge_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n
  • Added #1147: sv.KeyPoints.from_inference allowing to create sv.KeyPoints from Inference result.

  • Added #1138: sv.KeyPoints.from_yolo_nas allowing to create sv.KeyPoints from YOLO-NAS result.

  • Added #1163: sv.mask_to_rle and sv.rle_to_mask allowing for easy conversion between mask and rle formats.

  • Changed #1236: sv.InferenceSlicer allowing to select overlap filtering strategy (NONE, NON_MAX_SUPPRESSION and NON_MAX_MERGE).

  • Changed #1178: sv.InferenceSlicer adding instance segmentation model support.

import cv2\nimport numpy as np\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8x-seg-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    results = model.infer(image_slice)[0]\n    return sv.Detections.from_inference(results)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
  • Changed #1228: sv.LineZone making it 10-20 times faster, depending on the use case.

  • Changed #1163: sv.DetectionDataset.from_coco and sv.DetectionDataset.as_coco adding support for run-length encoding (RLE) mask format.

"},{"location":"changelog/#0200-april-24-2024","title":"0.20.0 April 24, 2024","text":"
  • Added #1128: sv.KeyPoints to provide initial support for pose estimation and broader keypoint detection models.

  • Added #1128: sv.EdgeAnnotator and sv.VertexAnnotator to enable rendering of results from keypoint detection models.

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8l-pose')\n\nresult = model(image, verbose=False)[0]\nkeypoints = sv.KeyPoints.from_ultralytics(result)\n\nedge_annotators = sv.EdgeAnnotator(color=sv.Color.GREEN, thickness=5)\nannotated_image = edge_annotators.annotate(image.copy(), keypoints)\n
  • Changed #1037: sv.LabelAnnotator by adding an additional corner_radius argument that allows for rounding the corners of the bounding box.

  • Changed #1109: sv.PolygonZone such that the frame_resolution_wh argument is no longer required to initialize sv.PolygonZone.

Deprecated

The frame_resolution_wh parameter in sv.PolygonZone is deprecated and will be removed in supervision-0.24.0.

  • Changed #1084: sv.get_polygon_center to calculate a more accurate polygon centroid.

  • Changed #1069: sv.Detections.from_transformers by adding support for Transformers segmentation models and extract class names values.

import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForSegmentation\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50-panoptic\")\nmodel = DetrForSegmentation.from_pretrained(\"facebook/detr-resnet-50-panoptic\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_segmentation(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(results, id2label=model.config.id2label)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
  • Fixed #787: sv.ByteTrack.update_with_detections which was removing segmentation masks while tracking. Now, ByteTrack can be used alongside segmentation models.
"},{"location":"changelog/#0190-march-15-2024","title":"0.19.0 March 15, 2024","text":"
  • Added #818: sv.CSVSink allowing for the straightforward saving of image, video, or stream inference results in a .csv file.
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<SOURCE_MODEL_PATH>)\ncsv_sink = sv.CSVSink(<RESULT_CSV_FILE_PATH>)\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith csv_sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        csv_sink.append(detections, custom_data={<CUSTOM_LABEL>:<CUSTOM_DATA>})\n
  • Added #819: sv.JSONSink allowing for the straightforward saving of image, video, or stream inference results in a .json file.
```python\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<SOURCE_MODEL_PATH>)\njson_sink = sv.JSONSink(<RESULT_JSON_FILE_PATH>)\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith json_sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        json_sink.append(detections, custom_data={<CUSTOM_LABEL>:<CUSTOM_DATA>})\n
  • Added #847: sv.mask_iou_batch allowing to compute Intersection over Union (IoU) of two sets of masks.

  • Added #847: sv.mask_non_max_suppression allowing to perform Non-Maximum Suppression (NMS) on segmentation predictions.

  • Added #888: sv.CropAnnotator allowing users to annotate the scene with scaled-up crops of detections.

import cv2\nimport supervision as sv\nfrom inference import get_model\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = get_model(model_id=\"yolov8n-640\")\n\nresult = model.infer(image)[0]\ndetections = sv.Detections.from_inference(result)\n\ncrop_annotator = sv.CropAnnotator()\nannotated_frame = crop_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
  • Changed #827: sv.ByteTrack.reset allowing users to clear trackers state, enabling the processing of multiple video files in sequence.

  • Changed #802: sv.LineZoneAnnotator allowing to hide in/out count using display_in_count and display_out_count properties.

  • Changed #787: sv.ByteTrack input arguments and docstrings updated to improve readability and ease of use.

Deprecated

The track_buffer, track_thresh, and match_thresh parameters in sv.ByterTrack are deprecated and will be removed in supervision-0.23.0. Use lost_track_buffer, track_activation_threshold, and minimum_matching_threshold instead.

  • Changed #910: sv.PolygonZone to now accept a list of specific box anchors that must be in zone for a detection to be counted.

Deprecated

The triggering_position parameter in sv.PolygonZone is deprecated and will be removed in supervision-0.23.0. Use triggering_anchors instead.

  • Changed #875: annotators adding support for Pillow images. All supervision Annotators can now accept an image as either a numpy array or a Pillow Image. They automatically detect its type, draw annotations, and return the output in the same format as the input.

  • Fixed #944: sv.DetectionsSmoother removing tracking_id from sv.Detections.

"},{"location":"changelog/#0180-january-25-2024","title":"0.18.0 January 25, 2024","text":"
  • Added #720: sv.PercentageBarAnnotator allowing to annotate images and videos with percentage values representing confidence or other custom property.
>>> import supervision as sv\n\n>>> image = ...\n>>> detections = sv.Detections(...)\n\n>>> percentage_bar_annotator = sv.PercentageBarAnnotator()\n>>> annotated_frame = percentage_bar_annotator.annotate(\n...     scene=image.copy(),\n...     detections=detections\n... )\n
  • Added #702: sv.RoundBoxAnnotator allowing to annotate images and videos with rounded corners bounding boxes.

  • Added #770: sv.OrientedBoxAnnotator allowing to annotate images and videos with OBB (Oriented Bounding Boxes).

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO(\"yolov8n-obb.pt\")\n\nresult = model(image)[0]\ndetections = sv.Detections.from_ultralytics(result)\n\noriented_box_annotator = sv.OrientedBoxAnnotator()\nannotated_frame = oriented_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
  • Added #696: sv.DetectionsSmoother allowing for smoothing detections over multiple frames in video tracking.

  • Added #769: sv.ColorPalette.from_matplotlib allowing users to create a sv.ColorPalette instance from a Matplotlib color palette.

>>> import supervision as sv\n\n>>> sv.ColorPalette.from_matplotlib('viridis', 5)\nColorPalette(colors=[Color(r=68, g=1, b=84), Color(r=59, g=82, b=139), ...])\n
  • Changed #770: sv.Detections.from_ultralytics adding support for OBB (Oriented Bounding Boxes).

  • Changed #735: sv.LineZone to now accept a list of specific box anchors that must cross the line for a detection to be counted. This update marks a significant improvement from the previous requirement, where all four box corners were necessary. Users can now specify a single anchor, such as sv.Position.BOTTOM_CENTER, or any other combination of anchors defined as List[sv.Position].

  • Changed #756: sv.Color's and sv.ColorPalette's method of accessing predefined colors, transitioning from a function-based approach (sv.Color.red()) to a more intuitive and conventional property-based method (sv.Color.RED).

Deprecated

sv.ColorPalette.default() is deprecated and will be removed in supervision-0.22.0. Use sv.ColorPalette.DEFAULT instead.

  • Changed #769: sv.ColorPalette.DEFAULT value, giving users a more extensive set of annotation colors.

  • Changed #677: sv.Detections.from_roboflow to sv.Detections.from_inference streamlining its functionality to be compatible with both the both inference pip package and the Robloflow hosted API.

Deprecated

Detections.from_roboflow() is deprecated and will be removed in supervision-0.22.0. Use Detections.from_inference instead.

  • Fixed #735: sv.LineZone functionality to accurately update the counter when an object crosses a line from any direction, including from the side. This enhancement enables more precise tracking and analytics, such as calculating individual in/out counts for each lane on the road.
"},{"location":"changelog/#0170-december-06-2023","title":"0.17.0 December 06, 2023","text":"
  • Added #633: sv.PixelateAnnotator allowing to pixelate objects on images and videos.

  • Added #652: sv.TriangleAnnotator allowing to annotate images and videos with triangle markers.

  • Added #602: sv.PolygonAnnotator allowing to annotate images and videos with segmentation mask outline.

>>> import supervision as sv\n\n>>> image = ...\n>>> detections = sv.Detections(...)\n\n>>> polygon_annotator = sv.PolygonAnnotator()\n>>> annotated_frame = polygon_annotator.annotate(\n...     scene=image.copy(),\n...     detections=detections\n... )\n
  • Added #476: sv.assets allowing download of video files that you can use in your demos.
>>> from supervision.assets import download_assets, VideoAssets\n>>> download_assets(VideoAssets.VEHICLES)\n\"vehicles.mp4\"\n
  • Added #605: Position.CENTER_OF_MASS allowing to place labels in center of mass of segmentation masks.

  • Added #651: sv.scale_boxes allowing to scale sv.Detections.xyxy values.

  • Added #637: sv.calculate_dynamic_text_scale and sv.calculate_dynamic_line_thickness allowing text scale and line thickness to match image resolution.

  • Added #620: sv.Color.as_hex allowing to extract color value in HEX format.

  • Added #572: sv.Classifications.from_timm allowing to load classification result from timm models.

  • Added #478: sv.Classifications.from_clip allowing to load classification result from clip model.

  • Added #571: sv.Detections.from_azure_analyze_image allowing to load detection results from Azure Image Analysis.

  • Changed #646: sv.BoxMaskAnnotator renaming it to sv.ColorAnnotator.

  • Changed #606: sv.MaskAnnotator to make it 5x faster.

  • Fixed #584: sv.DetectionDataset.from_yolo to ignore empty lines in annotation files.

  • Fixed #555: sv.BlurAnnotator to trim negative coordinates before bluring detections.

  • Fixed #511: sv.TraceAnnotator to respect trace position.

"},{"location":"changelog/#0160-october-19-2023","title":"0.16.0 October 19, 2023","text":"
  • Added #422: sv.BoxMaskAnnotator allowing to annotate images and videos with mox masks.

  • Added #433: sv.HaloAnnotator allowing to annotate images and videos with halo effect.

>>> import supervision as sv\n\n>>> image = ...\n>>> detections = sv.Detections(...)\n\n>>> halo_annotator = sv.HaloAnnotator()\n>>> annotated_frame = halo_annotator.annotate(\n...     scene=image.copy(),\n...     detections=detections\n... )\n
  • Added #466: sv.HeatMapAnnotator allowing to annotate videos with heat maps.

  • Added #492: sv.DotAnnotator allowing to annotate images and videos with dots.

  • Added #449: sv.draw_image allowing to draw an image onto a given scene with specified opacity and dimensions.

  • Added #280: sv.FPSMonitor for monitoring frames per second (FPS) to benchmark latency.

  • Added #454: \ud83e\udd17 Hugging Face Annotators space.

  • Changed #482: sv.LineZone.trigger now return Tuple[np.ndarray, np.ndarray]. The first array indicates which detections have crossed the line from outside to inside. The second array indicates which detections have crossed the line from inside to outside.

  • Changed #465: Annotator argument name from color_map: str to color_lookup: ColorLookup enum to increase type safety.

  • Changed #426: sv.MaskAnnotator allowing 2x faster annotation.

  • Fixed #477: Poetry env definition allowing proper local installation.

  • Fixed #430: sv.ByteTrack to return np.array([], dtype=int) when svDetections is empty.

Deprecated

sv.Detections.from_yolov8 and sv.Classifications.from_yolov8 as those are now replaced by sv.Detections.from_ultralytics and sv.Classifications.from_ultralytics.

"},{"location":"changelog/#0150-october-5-2023","title":"0.15.0 October 5, 2023","text":"
  • Added #170: sv.BoundingBoxAnnotator allowing to annotate images and videos with bounding boxes.

  • Added #170: sv.BoxCornerAnnotator allowing to annotate images and videos with just bounding box corners.

  • Added #170: sv.MaskAnnotator allowing to annotate images and videos with segmentation masks.

  • Added #170: sv.EllipseAnnotator allowing to annotate images and videos with ellipses (sports game style).

  • Added #386: sv.CircleAnnotator allowing to annotate images and videos with circles.

  • Added #354: sv.TraceAnnotator allowing to draw path of moving objects on videos.

  • Added #405: sv.BlurAnnotator allowing to blur objects on images and videos.

>>> import supervision as sv\n\n>>> image = ...\n>>> detections = sv.Detections(...)\n\n>>> bounding_box_annotator = sv.BoundingBoxAnnotator()\n>>> annotated_frame = bounding_box_annotator.annotate(\n...     scene=image.copy(),\n...     detections=detections\n... )\n
  • Added #354: Supervision usage example. You can now learn how to perform traffic flow analysis with Supervision.

  • Changed #399: sv.Detections.from_roboflow now does not require class_list to be specified. The class_id value can be extracted directly from the inference response.

  • Changed #381: sv.VideoSink now allows to customize the output codec.

  • Changed #361: sv.InferenceSlicer can now operate in multithreading mode.

  • Fixed #348: sv.Detections.from_deepsparse to allow processing empty deepsparse result object.

"},{"location":"changelog/#0140-august-31-2023","title":"0.14.0 August 31, 2023","text":"
  • Added #282: support for SAHI inference technique with sv.InferenceSlicer.
>>> import cv2\n>>> import supervision as sv\n>>> from ultralytics import YOLO\n\n>>> image = cv2.imread(SOURCE_IMAGE_PATH)\n>>> model = YOLO(...)\n\n>>> def callback(image_slice: np.ndarray) -> sv.Detections:\n...     result = model(image_slice)[0]\n...     return sv.Detections.from_ultralytics(result)\n\n>>> slicer = sv.InferenceSlicer(callback = callback)\n\n>>> detections = slicer(image)\n
  • Added #297: Detections.from_deepsparse to enable seamless integration with DeepSparse framework.

  • Added #281: sv.Classifications.from_ultralytics to enable seamless integration with Ultralytics framework. This will enable you to use supervision with all models that Ultralytics supports.

Deprecated

sv.Detections.from_yolov8 and sv.Classifications.from_yolov8 are now deprecated and will be removed with supervision-0.16.0 release.

  • Added #341: First supervision usage example script showing how to detect and track objects on video using YOLOv8 + Supervision.

  • Changed #296: sv.ClassificationDataset and sv.DetectionDataset now use image path (not image name) as dataset keys.

  • Fixed #300: Detections.from_roboflow to filter out polygons with less than 3 points.

"},{"location":"changelog/#0130-august-8-2023","title":"0.13.0 August 8, 2023","text":"
  • Added #236: support for mean average precision (mAP) for object detection models with sv.MeanAveragePrecision.
>>> import supervision as sv\n>>> from ultralytics import YOLO\n\n>>> dataset = sv.DetectionDataset.from_yolo(...)\n\n>>> model = YOLO(...)\n>>> def callback(image: np.ndarray) -> sv.Detections:\n...     result = model(image)[0]\n...     return sv.Detections.from_yolov8(result)\n\n>>> mean_average_precision = sv.MeanAveragePrecision.benchmark(\n...     dataset = dataset,\n...     callback = callback\n... )\n\n>>> mean_average_precision.map50_95\n0.433\n
  • Added #256: support for ByteTrack for object tracking with sv.ByteTrack.

  • Added #222: sv.Detections.from_ultralytics to enable seamless integration with Ultralytics framework. This will enable you to use supervision with all models that Ultralytics supports.

Deprecated

sv.Detections.from_yolov8 is now deprecated and will be removed with supervision-0.15.0 release.

  • Added #191: sv.Detections.from_paddledet to enable seamless integration with PaddleDetection framework.

  • Added #245: support for loading PASCAL VOC segmentation datasets with sv.DetectionDataset..

"},{"location":"changelog/#0120-july-24-2023","title":"0.12.0 July 24, 2023","text":"

Python 3.7. Support Terminated

With the supervision-0.12.0 release, we are terminating official support for Python 3.7.

  • Added #177: initial support for object detection model benchmarking with sv.ConfusionMatrix.
>>> import supervision as sv\n>>> from ultralytics import YOLO\n\n>>> dataset = sv.DetectionDataset.from_yolo(...)\n\n>>> model = YOLO(...)\n>>> def callback(image: np.ndarray) -> sv.Detections:\n...     result = model(image)[0]\n...     return sv.Detections.from_yolov8(result)\n\n>>> confusion_matrix = sv.ConfusionMatrix.benchmark(\n...     dataset = dataset,\n...     callback = callback\n... )\n\n>>> confusion_matrix.matrix\narray([\n    [0., 0., 0., 0.],\n    [0., 1., 0., 1.],\n    [0., 1., 1., 0.],\n    [1., 1., 0., 0.]\n])\n
  • Added #173: Detections.from_mmdetection to enable seamless integration with MMDetection framework.

  • Added #130: ability to install package in headless or desktop mode.

  • Changed #180: packing method from setup.py to pyproject.toml.

  • Fixed #188: sv.DetectionDataset.from_cooc can't be loaded when there are images without annotations.

  • Fixed #226: sv.DetectionDataset.from_yolo can't load background instances.

"},{"location":"changelog/#0111-june-29-2023","title":"0.11.1 June 29, 2023","text":"
  • Fix #165: as_folder_structure fails to save sv.ClassificationDataset when it is result of inference.
"},{"location":"changelog/#0110-june-28-2023","title":"0.11.0 June 28, 2023","text":"
  • Added #150: ability to load and save sv.DetectionDataset in COCO format using as_coco and from_coco methods.
>>> import supervision as sv\n\n>>> ds = sv.DetectionDataset.from_coco(\n...     images_directory_path='...',\n...     annotations_path='...'\n... )\n\n>>> ds.as_coco(\n...     images_directory_path='...',\n...     annotations_path='...'\n... )\n
  • Added #158: ability to merge multiple sv.DetectionDataset together using merge method.
>>> import supervision as sv\n\n>>> ds_1 = sv.DetectionDataset(...)\n>>> len(ds_1)\n100\n>>> ds_1.classes\n['dog', 'person']\n\n>>> ds_2 = sv.DetectionDataset(...)\n>>> len(ds_2)\n200\n>>> ds_2.classes\n['cat']\n\n>>> ds_merged = sv.DetectionDataset.merge([ds_1, ds_2])\n>>> len(ds_merged)\n300\n>>> ds_merged.classes\n['cat', 'dog', 'person']\n
  • Added #162: additional start and end arguments to sv.get_video_frames_generator allowing to generate frames only for a selected part of the video.

  • Fix #157: incorrect loading of YOLO dataset class names from data.yaml.

"},{"location":"changelog/#0100-june-14-2023","title":"0.10.0 June 14, 2023","text":"
  • Added #125: ability to load and save sv.ClassificationDataset in a folder structure format.
>>> import supervision as sv\n\n>>> cs = sv.ClassificationDataset.from_folder_structure(\n...     root_directory_path='...'\n... )\n\n>>> cs.as_folder_structure(\n...     root_directory_path='...'\n... )\n
  • Added #125: support for sv.ClassificationDataset.split allowing to divide sv.ClassificationDataset into two parts.

  • Added #110: ability to extract masks from Roboflow API results using sv.Detections.from_roboflow.

  • Added commit hash: Supervision Quickstart notebook where you can learn more about Detection, Dataset and Video APIs.

  • Changed #135: sv.get_video_frames_generator documentation to better describe actual behavior.

"},{"location":"changelog/#090-june-7-2023","title":"0.9.0 June 7, 2023","text":"
  • Added #118: ability to select sv.Detections by index, list of indexes or slice. Here is an example illustrating the new selection methods.
>>> import supervision as sv\n\n>>> detections = sv.Detections(...)\n>>> len(detections[0])\n1\n>>> len(detections[[0, 1]])\n2\n>>> len(detections[0:2])\n2\n
  • Added #101: ability to extract masks from YOLOv8 result using sv.Detections.from_yolov8. Here is an example illustrating how to extract boolean masks from the result of the YOLOv8 model inference.

  • Added #122: ability to crop image using sv.crop. Here is an example showing how to get a separate crop for each detection in sv.Detections.

  • Added #120: ability to conveniently save multiple images into directory using sv.ImageSink. Here is an example showing how to save every tenth video frame as a separate image.

>>> import supervision as sv\n\n>>> with sv.ImageSink(target_dir_path='target/directory/path') as sink:\n...     for image in sv.get_video_frames_generator(source_path='source_video.mp4', stride=10):\n...         sink.save_image(image=image)\n
  • Fixed #106: inconvenient handling of sv.PolygonZone coordinates. Now sv.PolygonZone accepts coordinates in the form of [[x1, y1], [x2, y2], ...] that can be both integers and floats.
"},{"location":"changelog/#080-may-17-2023","title":"0.8.0 May 17, 2023","text":"
  • Added #100: support for dataset inheritance. The current Dataset got renamed to DetectionDataset. Now DetectionDataset inherits from BaseDataset. This change was made to enforce the future consistency of APIs of different types of computer vision datasets.
  • Added #100: ability to save datasets in YOLO format using DetectionDataset.as_yolo.
>>> import roboflow\n>>> from roboflow import Roboflow\n>>> import supervision as sv\n\n>>> roboflow.login()\n\n>>> rf = Roboflow()\n\n>>> project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n>>> dataset = project.version(PROJECT_VERSION).download(\"yolov5\")\n\n>>> ds = sv.DetectionDataset.from_yolo(\n...     images_directory_path=f\"{dataset.location}/train/images\",\n...     annotations_directory_path=f\"{dataset.location}/train/labels\",\n...     data_yaml_path=f\"{dataset.location}/data.yaml\"\n... )\n\n>>> ds.classes\n['dog', 'person']\n
  • Added #102: support for DetectionDataset.split allowing to divide DetectionDataset into two parts.
>>> import supervision as sv\n\n>>> ds = sv.DetectionDataset(...)\n>>> train_ds, test_ds = ds.split(split_ratio=0.7, random_state=42, shuffle=True)\n\n>>> len(train_ds), len(test_ds)\n(700, 300)\n
  • Changed #100: default value of approximation_percentage parameter from 0.75 to 0.0 in DetectionDataset.as_yolo and DetectionDataset.as_pascal_voc.
"},{"location":"changelog/#070-may-11-2023","title":"0.7.0 May 11, 2023","text":"
  • Added #91: Detections.from_yolo_nas to enable seamless integration with YOLO-NAS model.
  • Added #86: ability to load datasets in YOLO format using Dataset.from_yolo.
  • Added #84: Detections.merge to merge multiple Detections objects together.
  • Fixed #81: LineZoneAnnotator.annotate does not return annotated frame.
  • Changed #44: LineZoneAnnotator.annotate to allow for custom text for the in and out tags.
"},{"location":"changelog/#060-april-19-2023","title":"0.6.0 April 19, 2023","text":"
  • Added #71: initial Dataset support and ability to save Detections in Pascal VOC XML format.
  • Added #71: new mask_to_polygons, filter_polygons_by_area, polygon_to_xyxy and approximate_polygon utilities.
  • Added #72: ability to load Pascal VOC XML object detections dataset as Dataset.
  • Changed #70: order of Detections attributes to make it consistent with order of objects in __iter__ tuple.
  • Changed #71: generate_2d_mask to polygon_to_mask.
"},{"location":"changelog/#052-april-13-2023","title":"0.5.2 April 13, 2023","text":"
  • Fixed #63: LineZone.trigger function expects 4 values instead of 5.
"},{"location":"changelog/#051-april-12-2023","title":"0.5.1 April 12, 2023","text":"
  • Fixed Detections.__getitem__ method did not return mask for selected item.
  • Fixed Detections.area crashed for mask detections.
"},{"location":"changelog/#050-april-10-2023","title":"0.5.0 April 10, 2023","text":"
  • Added #58: Detections.mask to enable segmentation support.
  • Added #58: MaskAnnotator to allow easy Detections.mask annotation.
  • Added #58: Detections.from_sam to enable native Segment Anything Model (SAM) support.
  • Changed #58: Detections.area behaviour to work not only with boxes but also with masks.
"},{"location":"changelog/#040-april-5-2023","title":"0.4.0 April 5, 2023","text":"
  • Added #46: Detections.empty to allow easy creation of empty Detections objects.
  • Added #56: Detections.from_roboflow to allow easy creation of Detections objects from Roboflow API inference results.
  • Added #56: plot_images_grid to allow easy plotting of multiple images on single plot.
  • Added #56: initial support for Pascal VOC XML format with detections_to_voc_xml method.
  • Changed #56: show_frame_in_notebook refactored and renamed to plot_image.
"},{"location":"changelog/#032-march-23-2023","title":"0.3.2 March 23, 2023","text":"
  • Changed #50: Allow Detections.class_id to be None.
"},{"location":"changelog/#031-march-6-2023","title":"0.3.1 March 6, 2023","text":"
  • Fixed #41: PolygonZone throws an exception when the object touches the bottom edge of the image.
  • Fixed #42: Detections.wth_nms method throws an exception when Detections is empty.
  • Changed #36: Detections.wth_nms support class agnostic and non-class agnostic case.
"},{"location":"changelog/#030-march-6-2023","title":"0.3.0 March 6, 2023","text":"
  • Changed: Allow Detections.confidence to be None.
  • Added: Detections.from_transformers and Detections.from_detectron2 to enable seamless integration with Transformers and Detectron2 models.
  • Added: Detections.area to dynamically calculate bounding box area.
  • Added: Detections.wth_nms to filter out double detections with NMS. Initial - only class agnostic - implementation.
"},{"location":"changelog/#020-february-2-2023","title":"0.2.0 February 2, 2023","text":"
  • Added: Advanced Detections filtering with pandas-like API.
  • Added: Detections.from_yolov5 and Detections.from_yolov8 to enable seamless integration with YOLOv5 and YOLOv8 models.
"},{"location":"changelog/#010-january-19-2023","title":"0.1.0 January 19, 2023","text":"

Say hello to Supervision \ud83d\udc4b

"},{"location":"code_of_conduct/","title":"Code of conduct","text":"
# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make participation in our\ncommunity a harassment-free experience for everyone, regardless of age, body\nsize, visible or invisible disability, ethnicity, sex characteristics, gender\nidentity and expression, level of experience, education, socio-economic status,\nnationality, personal appearance, race, caste, color, religion, or sexual\nidentity and orientation.\n\nWe pledge to act and interact in ways that contribute to an open, welcoming,\ndiverse, inclusive, and healthy community.\n\n## Our Standards\n\nExamples of behavior that contributes to a positive environment for our\ncommunity include:\n\n* Demonstrating empathy and kindness toward other people\n* Being respectful of differing opinions, viewpoints, and experiences\n* Giving and gracefully accepting constructive feedback\n* Accepting responsibility and apologizing to those affected by our mistakes,\n  and learning from the experience\n* Focusing on what is best not just for us as individuals, but for the overall\n  community\n\nExamples of unacceptable behavior include:\n\n* The use of sexualized language or imagery, and sexual attention or advances of\n  any kind\n* Trolling, insulting or derogatory comments, and personal or political attacks\n* Public or private harassment\n* Publishing others' private information, such as a physical or email address,\n  without their explicit permission\n* Other conduct which could reasonably be considered inappropriate in a\n  professional setting\n\n## Enforcement Responsibilities\n\nCommunity leaders are responsible for clarifying and enforcing our standards of\nacceptable behavior and will take appropriate and fair corrective action in\nresponse to any behavior that they deem inappropriate, threatening, offensive,\nor harmful.\n\nCommunity leaders have the right and responsibility to remove, edit, or reject\ncomments, commits, code, wiki edits, issues, and other contributions that are\nnot aligned to this Code of Conduct, and will communicate reasons for moderation\ndecisions when appropriate.\n\n## Scope\n\nThis Code of Conduct applies within all community spaces, and also applies when\nan individual is officially representing the community in public spaces.\nExamples of representing our community include using an official e-mail address,\nposting via an official social media account, or acting as an appointed\nrepresentative at an online or offline event.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be\nreported to the community leaders responsible for enforcement at\ncommunity-reports@roboflow.com.\n\nAll complaints will be reviewed and investigated promptly and fairly.\n\nAll community leaders are obligated to respect the privacy and security of the\nreporter of any incident.\n\n## Enforcement Guidelines\n\nCommunity leaders will follow these Community Impact Guidelines in determining\nthe consequences for any action they deem in violation of this Code of Conduct:\n\n### 1. Correction\n\n**Community Impact**: Use of inappropriate language or other behavior deemed\nunprofessional or unwelcome in the community.\n\n**Consequence**: A private, written warning from community leaders, providing\nclarity around the nature of the violation and an explanation of why the\nbehavior was inappropriate. A public apology may be requested.\n\n### 2. Warning\n\n**Community Impact**: A violation through a single incident or series of\nactions.\n\n**Consequence**: A warning with consequences for continued behavior. No\ninteraction with the people involved, including unsolicited interaction with\nthose enforcing the Code of Conduct, for a specified period of time. This\nincludes avoiding interactions in community spaces as well as external channels\nlike social media. Violating these terms may lead to a temporary or permanent\nban.\n\n### 3. Temporary Ban\n\n**Community Impact**: A serious violation of community standards, including\nsustained inappropriate behavior.\n\n**Consequence**: A temporary ban from any sort of interaction or public\ncommunication with the community for a specified period of time. No public or\nprivate interaction with the people involved, including unsolicited interaction\nwith those enforcing the Code of Conduct, is allowed during this period.\nViolating these terms may lead to a permanent ban.\n\n### 4. Permanent Ban\n\n**Community Impact**: Demonstrating a pattern of violation of community\nstandards, including sustained inappropriate behavior, harassment of an\nindividual, or aggression toward or disparagement of classes of individuals.\n\n**Consequence**: A permanent ban from any sort of public interaction within the\ncommunity.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage],\nversion 2.1, available at\n[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].\n\nCommunity Impact Guidelines were inspired by\n[Mozilla's code of conduct enforcement ladder][Mozilla CoC].\n\nFor answers to common questions about this code of conduct, see the FAQ at\n[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at\n[https://www.contributor-covenant.org/translations][translations].\n\n[homepage]: https://www.contributor-covenant.org\n[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html\n[Mozilla CoC]: https://github.com/mozilla/diversity\n[FAQ]: https://www.contributor-covenant.org/faq\n[translations]: https://www.contributor-covenant.org/translations\n
"},{"location":"contributing/","title":"Contributing to Supervision \ud83d\udee0\ufe0f","text":"

Thank you for your interest in contributing to Supervision!

We are actively improving this library to reduce the amount of work you need to do to solve common computer vision problems.

"},{"location":"contributing/#contribution-guidelines","title":"Contribution Guidelines","text":"

We welcome contributions to:

  1. Add a new feature to the library (guidance below).
  2. Improve our documentation and add examples to make it clear how to leverage the supervision library.
  3. Report bugs and issues in the project.
  4. Submit a request for a new feature.
  5. Improve our test coverage.
"},{"location":"contributing/#contributing-features","title":"Contributing Features \u2728","text":"

Supervision is designed to provide generic utilities to solve problems. Thus, we focus on contributions that can have an impact on a wide range of projects.

For example, counting objects that cross a line anywhere on an image is a common problem in computer vision, but counting objects that cross a line 75% of the way through is less useful.

Before you contribute a new feature, consider submitting an Issue to discuss the feature so the community can weigh in and assist.

"},{"location":"contributing/#how-to-contribute-changes","title":"How to Contribute Changes","text":"

First, fork this repository to your own GitHub account. Click \"fork\" in the top corner of the supervision repository to get started:

Then, run git clone to download the project code to your computer.

Move to a new branch using the git checkout command:

git checkout -b <your_branch_name>\n

The name you choose for your branch should describe the change you want to make (i.e. line-counter-docs).

Make any changes you want to the project code, then run the following commands to commit your changes:

git add .\ngit commit -m \"Your commit message\"\ngit push -u origin main\n
"},{"location":"contributing/#code-quality","title":"\ud83c\udfa8 Code quality","text":""},{"location":"contributing/#pre-commit-tool","title":"Pre-commit tool","text":"

This project uses the pre-commit tool to maintain code quality and consistency. Before submitting a pull request or making any commits, it is important to run the pre-commit tool to ensure that your changes meet the project's guidelines.

Furthermore, we have integrated a pre-commit GitHub Action into our workflow. This means that with every pull request opened, the pre-commit checks will be automatically enforced, streamlining the code review process and ensuring that all contributions adhere to our quality standards.

To run the pre-commit tool, follow these steps:

  1. Install pre-commit by running the following command: poetry install. It will not only install pre-commit but also install all the deps and dev-deps of project

  2. Once pre-commit is installed, navigate to the project's root directory.

  3. Run the command pre-commit run --all-files. This will execute the pre-commit hooks configured for this project against the modified files. If any issues are found, the pre-commit tool will provide feedback on how to resolve them. Make the necessary changes and re-run the pre-commit command until all issues are resolved.

  4. You can also install pre-commit as a git hook by executing pre-commit install. Every time you do a git commit pre-commit run automatically for you.

"},{"location":"contributing/#docstrings","title":"Docstrings","text":"

All new functions and classes in supervision should include docstrings. This is a prerequisite for any new functions and classes to be added to the library.

supervision adheres to the Google Python docstring style. Please refer to the style guide while writing docstrings for your contribution.

"},{"location":"contributing/#type-checking","title":"Type checking","text":"

So far, there is no type checking with mypy. See issue.

Then, go back to your fork of the supervision repository, click \"Pull Requests\", and click \"New Pull Request\".

Make sure the base branch is develop before submitting your PR.

On the next page, review your changes then click \"Create pull request\":

Next, write a description for your pull request, and click \"Create pull request\" again to submit it for review:

When creating new functions, please ensure you have the following:

  1. Docstrings for the function and all parameters.
  2. Unit tests for the function.
  3. Examples in the documentation for the function.
  4. Created an entry in our docs to autogenerate the documentation for the function.
  5. Please share a Google Colab with minimal code to test new feature or reproduce PR whenever it is possible. Please ensure that Google Colab can be accessed without any issue.

When you submit your Pull Request, you will be asked to sign a Contributor License Agreement (CLA) by the cla-assistant GitHub bot. We can only respond to PRs from contributors who have signed the project CLA.

All pull requests will be reviewed by the maintainers of the project. We will provide feedback and ask for changes if necessary.

PRs must pass all tests and linting requirements before they can be merged.

"},{"location":"contributing/#documentation","title":"\ud83d\udcdd documentation","text":"

The supervision documentation is stored in a folder called docs. The project documentation is built using mkdocs.

To run the documentation, install the project requirements with poetry install --with dev. Then, run mkdocs serve to start the documentation server.

You can learn more about mkdocs on the mkdocs website.

"},{"location":"contributing/#cookbooks","title":"\ud83e\uddd1\u200d\ud83c\udf73 cookbooks","text":"

We are always looking for new examples and cookbooks to add to the supervision documentation. If you have a use case that you think would be helpful to others, please submit a PR with your example. Here are some guidelines for submitting a new example:

  • Create a new notebook in the docs/nodebooks folder.
  • Add a link to the new notebook in docs/theme/cookbooks.html. Make sure to add the path to the new notebook, as well as a title, labels, author and supervision version.
  • Use the Count Objects Crossing the Line example as a template for your new example.
  • Freeze the version of supervision you are using.
  • Place an appropriate Open in Colab button at the top of the notebook. You can find an example of such a button in the aforementioned Count Objects Crossing the Line cookbook.
  • Notebook should be self-contained. If you rely on external data ( videos, images, etc.) or libraries, include download and installation commands in the notebook.
  • Annotate the code with appropriate comments, including links to the documentation describing each of the tools you have used.
"},{"location":"contributing/#tests","title":"\ud83e\uddea tests","text":"

pytests is used to run our tests.

"},{"location":"contributing/#license","title":"\ud83d\udcc4 license","text":"

By contributing, you agree that your contributions will be licensed under an MIT license.

"},{"location":"deprecated/","title":"Deprecated","text":"

These features are phased out due to better alternatives or potential issues in future versions. Deprecated functionalities are supported for three subsequent releases, providing time for users to transition to updated methods.

  • Detections.from_froboflow is deprecated and will be removed in supervision-0.22.0. Use Detections.from_inference instead.
  • The method Color.white() is deprecated and will be removed in supervision-0.22.0. Use the constant Color.WHITE instead.
  • The method Color.black() is deprecated and will be removed in supervision-0.22.0. Use the constant Color.BLACK instead.
  • The method Color.red() is deprecated and will be removed in supervision-0.22.0. Use the constant Color.RED instead.
  • The method Color.green() is deprecated and will be removed in supervision-0.22.0. Use the constant Color.GREEN instead.
  • The method Color.blue() is deprecated and will be removed in supervision-0.22.0. Use the constant Color.BLUE instead.
  • The method ColorPalette.default() is deprecated and will be removed in supervision-0.22.0. Use the constant ColorPalette.DEFAULT instead.
  • BoxAnnotator is deprecated and will be removed in supervision-0.22.0. Use BoundingBoxAnnotator and LabelAnnotator instead.
  • The method FPSMonitor.__call__ is deprecated and will be removed in supervision-0.22.0. Use the attribute FPSMonitor.fps instead.
  • The track_buffer, track_thresh, and match_thresh parameters in ByterTrack are deprecated and will be removed in supervision-0.23.0. Use lost_track_buffer, track_activation_threshold, and minimum_matching_threshold instead.
  • The triggering_position parameter in sv.PolygonZone is deprecated and will be removed in supervision-0.23.0. Use triggering_anchors instead.
  • The frame_resolution_wh parameter in sv.PolygonZone is deprecated and will be removed in supervision-0.24.0.
"},{"location":"license/","title":"License","text":"
MIT License\n\nCopyright (c) 2022 Roboflow\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n
"},{"location":"trackers/","title":"ByteTrack","text":"

Initialize the ByteTrack object.

Parameters:

Name Type Description Default track_activation_threshold float

Detection confidence threshold for track activation. Increasing track_activation_threshold improves accuracy and stability but might miss true detections. Decreasing it increases completeness but risks introducing noise and instability.

0.25 lost_track_buffer int

Number of frames to buffer when a track is lost. Increasing lost_track_buffer enhances occlusion handling, significantly reducing the likelihood of track fragmentation or disappearance caused by brief detection gaps.

30 minimum_matching_threshold float

Threshold for matching tracks with detections. Increasing minimum_matching_threshold improves accuracy but risks fragmentation. Decreasing it improves completeness but risks false positives and drift.

0.8 frame_rate int

The frame rate of the video.

30 minimum_consecutive_frames int

Number of consecutive frames that an object must be tracked before it is considered a 'valid' track. Increasing minimum_consecutive_frames prevents the creation of accidental tracks from false detection or double detection, but risks missing shorter tracks.

1 Source code in supervision/tracker/byte_tracker/core.py
class ByteTrack:\n    \"\"\"\n    Initialize the ByteTrack object.\n\n    <video controls>\n        <source src=\"https://media.roboflow.com/supervision/video-examples/how-to/track-objects/annotate-video-with-traces.mp4\" type=\"video/mp4\">\n    </video>\n\n    Parameters:\n        track_activation_threshold (float, optional): Detection confidence threshold\n            for track activation. Increasing track_activation_threshold improves accuracy\n            and stability but might miss true detections. Decreasing it increases\n            completeness but risks introducing noise and instability.\n        lost_track_buffer (int, optional): Number of frames to buffer when a track is lost.\n            Increasing lost_track_buffer enhances occlusion handling, significantly\n            reducing the likelihood of track fragmentation or disappearance caused\n            by brief detection gaps.\n        minimum_matching_threshold (float, optional): Threshold for matching tracks with detections.\n            Increasing minimum_matching_threshold improves accuracy but risks fragmentation.\n            Decreasing it improves completeness but risks false positives and drift.\n        frame_rate (int, optional): The frame rate of the video.\n        minimum_consecutive_frames (int, optional): Number of consecutive frames that an object must\n            be tracked before it is considered a 'valid' track.\n            Increasing minimum_consecutive_frames prevents the creation of accidental tracks from\n            false detection or double detection, but risks missing shorter tracks.\n    \"\"\"  # noqa: E501 // docs\n\n    @deprecated_parameter(\n        old_parameter=\"track_buffer\",\n        new_parameter=\"lost_track_buffer\",\n        map_function=lambda x: x,\n        warning_message=\"`{old_parameter}` in `{function_name}` is deprecated and will \"\n        \"be remove in `supervision-0.23.0`. Use '{new_parameter}' \"\n        \"instead.\",\n    )\n    @deprecated_parameter(\n        old_parameter=\"track_thresh\",\n        new_parameter=\"track_activation_threshold\",\n        map_function=lambda x: x,\n        warning_message=\"`{old_parameter}` in `{function_name}` is deprecated and will \"\n        \"be remove in `supervision-0.23.0`. Use '{new_parameter}' \"\n        \"instead.\",\n    )\n    @deprecated_parameter(\n        old_parameter=\"match_thresh\",\n        new_parameter=\"minimum_matching_threshold\",\n        map_function=lambda x: x,\n        warning_message=\"`{old_parameter}` in `{function_name}` is deprecated and will \"\n        \"be remove in `supervision-0.23.0`. Use '{new_parameter}' \"\n        \"instead.\",\n    )\n    def __init__(\n        self,\n        track_activation_threshold: float = 0.25,\n        lost_track_buffer: int = 30,\n        minimum_matching_threshold: float = 0.8,\n        frame_rate: int = 30,\n        minimum_consecutive_frames: int = 1,\n    ):\n        self.track_activation_threshold = track_activation_threshold\n        self.minimum_matching_threshold = minimum_matching_threshold\n\n        self.frame_id = 0\n        self.det_thresh = self.track_activation_threshold + 0.1\n        self.max_time_lost = int(frame_rate / 30.0 * lost_track_buffer)\n        self.minimum_consecutive_frames = minimum_consecutive_frames\n        self.kalman_filter = KalmanFilter()\n\n        self.tracked_tracks: List[STrack] = []\n        self.lost_tracks: List[STrack] = []\n        self.removed_tracks: List[STrack] = []\n\n    def update_with_detections(self, detections: Detections) -> Detections:\n        \"\"\"\n        Updates the tracker with the provided detections and returns the updated\n        detection results.\n\n        Args:\n            detections (Detections): The detections to pass through the tracker.\n\n        Example:\n            ```python\n            import supervision as sv\n            from ultralytics import YOLO\n\n            model = YOLO(<MODEL_PATH>)\n            tracker = sv.ByteTrack()\n\n            bounding_box_annotator = sv.BoundingBoxAnnotator()\n            label_annotator = sv.LabelAnnotator()\n\n            def callback(frame: np.ndarray, index: int) -> np.ndarray:\n                results = model(frame)[0]\n                detections = sv.Detections.from_ultralytics(results)\n                detections = tracker.update_with_detections(detections)\n\n                labels = [f\"#{tracker_id}\" for tracker_id in detections.tracker_id]\n\n                annotated_frame = bounding_box_annotator.annotate(\n                    scene=frame.copy(), detections=detections)\n                annotated_frame = label_annotator.annotate(\n                    scene=annotated_frame, detections=detections, labels=labels)\n                return annotated_frame\n\n            sv.process_video(\n                source_path=<SOURCE_VIDEO_PATH>,\n                target_path=<TARGET_VIDEO_PATH>,\n                callback=callback\n            )\n            ```\n        \"\"\"\n\n        tensors = detections2boxes(detections=detections)\n        tracks = self.update_with_tensors(tensors=tensors)\n\n        if len(tracks) > 0:\n            detection_bounding_boxes = np.asarray([det[:4] for det in tensors])\n            track_bounding_boxes = np.asarray([track.tlbr for track in tracks])\n\n            ious = box_iou_batch(detection_bounding_boxes, track_bounding_boxes)\n\n            iou_costs = 1 - ious\n\n            matches, _, _ = matching.linear_assignment(iou_costs, 0.5)\n            detections.tracker_id = np.full(len(detections), -1, dtype=int)\n            for i_detection, i_track in matches:\n                detections.tracker_id[i_detection] = int(\n                    tracks[i_track].external_track_id\n                )\n\n            return detections[detections.tracker_id != -1]\n\n        else:\n            detections = Detections.empty()\n            detections.tracker_id = np.array([], dtype=int)\n\n            return detections\n\n    def reset(self):\n        \"\"\"\n        Resets the internal state of the ByteTrack tracker.\n\n        This method clears the tracking data, including tracked, lost,\n        and removed tracks, as well as resetting the frame counter. It's\n        particularly useful when processing multiple videos sequentially,\n        ensuring the tracker starts with a clean state for each new video.\n        \"\"\"\n        self.frame_id = 0\n        self.tracked_tracks: List[STrack] = []\n        self.lost_tracks: List[STrack] = []\n        self.removed_tracks: List[STrack] = []\n        BaseTrack.reset_counter()\n        STrack.reset_external_counter()\n\n    def update_with_tensors(self, tensors: np.ndarray) -> List[STrack]:\n        \"\"\"\n        Updates the tracker with the provided tensors and returns the updated tracks.\n\n        Parameters:\n            tensors: The new tensors to update with.\n\n        Returns:\n            List[STrack]: Updated tracks.\n        \"\"\"\n        self.frame_id += 1\n        activated_starcks = []\n        refind_stracks = []\n        lost_stracks = []\n        removed_stracks = []\n\n        class_ids = tensors[:, 5]\n        scores = tensors[:, 4]\n        bboxes = tensors[:, :4]\n\n        remain_inds = scores > self.track_activation_threshold\n        inds_low = scores > 0.1\n        inds_high = scores < self.track_activation_threshold\n\n        inds_second = np.logical_and(inds_low, inds_high)\n        dets_second = bboxes[inds_second]\n        dets = bboxes[remain_inds]\n        scores_keep = scores[remain_inds]\n        scores_second = scores[inds_second]\n\n        class_ids_keep = class_ids[remain_inds]\n        class_ids_second = class_ids[inds_second]\n\n        if len(dets) > 0:\n            \"\"\"Detections\"\"\"\n            detections = [\n                STrack(STrack.tlbr_to_tlwh(tlbr), s, c, self.minimum_consecutive_frames)\n                for (tlbr, s, c) in zip(dets, scores_keep, class_ids_keep)\n            ]\n        else:\n            detections = []\n\n        \"\"\" Add newly detected tracklets to tracked_stracks\"\"\"\n        unconfirmed = []\n        tracked_stracks = []  # type: list[STrack]\n\n        for track in self.tracked_tracks:\n            if not track.is_activated:\n                unconfirmed.append(track)\n            else:\n                tracked_stracks.append(track)\n\n        \"\"\" Step 2: First association, with high score detection boxes\"\"\"\n        strack_pool = joint_tracks(tracked_stracks, self.lost_tracks)\n        # Predict the current location with KF\n        STrack.multi_predict(strack_pool)\n        dists = matching.iou_distance(strack_pool, detections)\n\n        dists = matching.fuse_score(dists, detections)\n        matches, u_track, u_detection = matching.linear_assignment(\n            dists, thresh=self.minimum_matching_threshold\n        )\n\n        for itracked, idet in matches:\n            track = strack_pool[itracked]\n            det = detections[idet]\n            if track.state == TrackState.Tracked:\n                track.update(detections[idet], self.frame_id)\n                activated_starcks.append(track)\n            else:\n                track.re_activate(det, self.frame_id, new_id=False)\n                refind_stracks.append(track)\n\n        \"\"\" Step 3: Second association, with low score detection boxes\"\"\"\n        # association the untrack to the low score detections\n        if len(dets_second) > 0:\n            \"\"\"Detections\"\"\"\n            detections_second = [\n                STrack(STrack.tlbr_to_tlwh(tlbr), s, c, self.minimum_consecutive_frames)\n                for (tlbr, s, c) in zip(dets_second, scores_second, class_ids_second)\n            ]\n        else:\n            detections_second = []\n        r_tracked_stracks = [\n            strack_pool[i]\n            for i in u_track\n            if strack_pool[i].state == TrackState.Tracked\n        ]\n        dists = matching.iou_distance(r_tracked_stracks, detections_second)\n        matches, u_track, u_detection_second = matching.linear_assignment(\n            dists, thresh=0.5\n        )\n        for itracked, idet in matches:\n            track = r_tracked_stracks[itracked]\n            det = detections_second[idet]\n            if track.state == TrackState.Tracked:\n                track.update(det, self.frame_id)\n                activated_starcks.append(track)\n            else:\n                track.re_activate(det, self.frame_id, new_id=False)\n                refind_stracks.append(track)\n\n        for it in u_track:\n            track = r_tracked_stracks[it]\n            if not track.state == TrackState.Lost:\n                track.mark_lost()\n                lost_stracks.append(track)\n\n        \"\"\"Deal with unconfirmed tracks, usually tracks with only one beginning frame\"\"\"\n        detections = [detections[i] for i in u_detection]\n        dists = matching.iou_distance(unconfirmed, detections)\n\n        dists = matching.fuse_score(dists, detections)\n        matches, u_unconfirmed, u_detection = matching.linear_assignment(\n            dists, thresh=0.7\n        )\n        for itracked, idet in matches:\n            unconfirmed[itracked].update(detections[idet], self.frame_id)\n            activated_starcks.append(unconfirmed[itracked])\n        for it in u_unconfirmed:\n            track = unconfirmed[it]\n            track.mark_removed()\n            removed_stracks.append(track)\n\n        \"\"\" Step 4: Init new stracks\"\"\"\n        for inew in u_detection:\n            track = detections[inew]\n            if track.score < self.det_thresh:\n                continue\n            track.activate(self.kalman_filter, self.frame_id)\n            activated_starcks.append(track)\n        \"\"\" Step 5: Update state\"\"\"\n        for track in self.lost_tracks:\n            if self.frame_id - track.end_frame > self.max_time_lost:\n                track.mark_removed()\n                removed_stracks.append(track)\n\n        self.tracked_tracks = [\n            t for t in self.tracked_tracks if t.state == TrackState.Tracked\n        ]\n        self.tracked_tracks = joint_tracks(self.tracked_tracks, activated_starcks)\n        self.tracked_tracks = joint_tracks(self.tracked_tracks, refind_stracks)\n        self.lost_tracks = sub_tracks(self.lost_tracks, self.tracked_tracks)\n        self.lost_tracks.extend(lost_stracks)\n        self.lost_tracks = sub_tracks(self.lost_tracks, self.removed_tracks)\n        self.removed_tracks = removed_stracks\n        self.tracked_tracks, self.lost_tracks = remove_duplicate_tracks(\n            self.tracked_tracks, self.lost_tracks\n        )\n        output_stracks = [track for track in self.tracked_tracks if track.is_activated]\n\n        return output_stracks\n
"},{"location":"trackers/#supervision.tracker.byte_tracker.core.ByteTrack-functions","title":"Functions","text":""},{"location":"trackers/#supervision.tracker.byte_tracker.core.ByteTrack.reset","title":"reset()","text":"

Resets the internal state of the ByteTrack tracker.

This method clears the tracking data, including tracked, lost, and removed tracks, as well as resetting the frame counter. It's particularly useful when processing multiple videos sequentially, ensuring the tracker starts with a clean state for each new video.

Source code in supervision/tracker/byte_tracker/core.py
def reset(self):\n    \"\"\"\n    Resets the internal state of the ByteTrack tracker.\n\n    This method clears the tracking data, including tracked, lost,\n    and removed tracks, as well as resetting the frame counter. It's\n    particularly useful when processing multiple videos sequentially,\n    ensuring the tracker starts with a clean state for each new video.\n    \"\"\"\n    self.frame_id = 0\n    self.tracked_tracks: List[STrack] = []\n    self.lost_tracks: List[STrack] = []\n    self.removed_tracks: List[STrack] = []\n    BaseTrack.reset_counter()\n    STrack.reset_external_counter()\n
"},{"location":"trackers/#supervision.tracker.byte_tracker.core.ByteTrack.update_with_detections","title":"update_with_detections(detections)","text":"

Updates the tracker with the provided detections and returns the updated detection results.

Parameters:

Name Type Description Default detections Detections

The detections to pass through the tracker.

required Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<MODEL_PATH>)\ntracker = sv.ByteTrack()\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\ndef callback(frame: np.ndarray, index: int) -> np.ndarray:\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    detections = tracker.update_with_detections(detections)\n\n    labels = [f\"#{tracker_id}\" for tracker_id in detections.tracker_id]\n\n    annotated_frame = bounding_box_annotator.annotate(\n        scene=frame.copy(), detections=detections)\n    annotated_frame = label_annotator.annotate(\n        scene=annotated_frame, detections=detections, labels=labels)\n    return annotated_frame\n\nsv.process_video(\n    source_path=<SOURCE_VIDEO_PATH>,\n    target_path=<TARGET_VIDEO_PATH>,\n    callback=callback\n)\n
Source code in supervision/tracker/byte_tracker/core.py
def update_with_detections(self, detections: Detections) -> Detections:\n    \"\"\"\n    Updates the tracker with the provided detections and returns the updated\n    detection results.\n\n    Args:\n        detections (Detections): The detections to pass through the tracker.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO(<MODEL_PATH>)\n        tracker = sv.ByteTrack()\n\n        bounding_box_annotator = sv.BoundingBoxAnnotator()\n        label_annotator = sv.LabelAnnotator()\n\n        def callback(frame: np.ndarray, index: int) -> np.ndarray:\n            results = model(frame)[0]\n            detections = sv.Detections.from_ultralytics(results)\n            detections = tracker.update_with_detections(detections)\n\n            labels = [f\"#{tracker_id}\" for tracker_id in detections.tracker_id]\n\n            annotated_frame = bounding_box_annotator.annotate(\n                scene=frame.copy(), detections=detections)\n            annotated_frame = label_annotator.annotate(\n                scene=annotated_frame, detections=detections, labels=labels)\n            return annotated_frame\n\n        sv.process_video(\n            source_path=<SOURCE_VIDEO_PATH>,\n            target_path=<TARGET_VIDEO_PATH>,\n            callback=callback\n        )\n        ```\n    \"\"\"\n\n    tensors = detections2boxes(detections=detections)\n    tracks = self.update_with_tensors(tensors=tensors)\n\n    if len(tracks) > 0:\n        detection_bounding_boxes = np.asarray([det[:4] for det in tensors])\n        track_bounding_boxes = np.asarray([track.tlbr for track in tracks])\n\n        ious = box_iou_batch(detection_bounding_boxes, track_bounding_boxes)\n\n        iou_costs = 1 - ious\n\n        matches, _, _ = matching.linear_assignment(iou_costs, 0.5)\n        detections.tracker_id = np.full(len(detections), -1, dtype=int)\n        for i_detection, i_track in matches:\n            detections.tracker_id[i_detection] = int(\n                tracks[i_track].external_track_id\n            )\n\n        return detections[detections.tracker_id != -1]\n\n    else:\n        detections = Detections.empty()\n        detections.tracker_id = np.array([], dtype=int)\n\n        return detections\n
"},{"location":"trackers/#supervision.tracker.byte_tracker.core.ByteTrack.update_with_tensors","title":"update_with_tensors(tensors)","text":"

Updates the tracker with the provided tensors and returns the updated tracks.

Parameters:

Name Type Description Default tensors ndarray

The new tensors to update with.

required

Returns:

Type Description List[STrack]

List[STrack]: Updated tracks.

Source code in supervision/tracker/byte_tracker/core.py
def update_with_tensors(self, tensors: np.ndarray) -> List[STrack]:\n    \"\"\"\n    Updates the tracker with the provided tensors and returns the updated tracks.\n\n    Parameters:\n        tensors: The new tensors to update with.\n\n    Returns:\n        List[STrack]: Updated tracks.\n    \"\"\"\n    self.frame_id += 1\n    activated_starcks = []\n    refind_stracks = []\n    lost_stracks = []\n    removed_stracks = []\n\n    class_ids = tensors[:, 5]\n    scores = tensors[:, 4]\n    bboxes = tensors[:, :4]\n\n    remain_inds = scores > self.track_activation_threshold\n    inds_low = scores > 0.1\n    inds_high = scores < self.track_activation_threshold\n\n    inds_second = np.logical_and(inds_low, inds_high)\n    dets_second = bboxes[inds_second]\n    dets = bboxes[remain_inds]\n    scores_keep = scores[remain_inds]\n    scores_second = scores[inds_second]\n\n    class_ids_keep = class_ids[remain_inds]\n    class_ids_second = class_ids[inds_second]\n\n    if len(dets) > 0:\n        \"\"\"Detections\"\"\"\n        detections = [\n            STrack(STrack.tlbr_to_tlwh(tlbr), s, c, self.minimum_consecutive_frames)\n            for (tlbr, s, c) in zip(dets, scores_keep, class_ids_keep)\n        ]\n    else:\n        detections = []\n\n    \"\"\" Add newly detected tracklets to tracked_stracks\"\"\"\n    unconfirmed = []\n    tracked_stracks = []  # type: list[STrack]\n\n    for track in self.tracked_tracks:\n        if not track.is_activated:\n            unconfirmed.append(track)\n        else:\n            tracked_stracks.append(track)\n\n    \"\"\" Step 2: First association, with high score detection boxes\"\"\"\n    strack_pool = joint_tracks(tracked_stracks, self.lost_tracks)\n    # Predict the current location with KF\n    STrack.multi_predict(strack_pool)\n    dists = matching.iou_distance(strack_pool, detections)\n\n    dists = matching.fuse_score(dists, detections)\n    matches, u_track, u_detection = matching.linear_assignment(\n        dists, thresh=self.minimum_matching_threshold\n    )\n\n    for itracked, idet in matches:\n        track = strack_pool[itracked]\n        det = detections[idet]\n        if track.state == TrackState.Tracked:\n            track.update(detections[idet], self.frame_id)\n            activated_starcks.append(track)\n        else:\n            track.re_activate(det, self.frame_id, new_id=False)\n            refind_stracks.append(track)\n\n    \"\"\" Step 3: Second association, with low score detection boxes\"\"\"\n    # association the untrack to the low score detections\n    if len(dets_second) > 0:\n        \"\"\"Detections\"\"\"\n        detections_second = [\n            STrack(STrack.tlbr_to_tlwh(tlbr), s, c, self.minimum_consecutive_frames)\n            for (tlbr, s, c) in zip(dets_second, scores_second, class_ids_second)\n        ]\n    else:\n        detections_second = []\n    r_tracked_stracks = [\n        strack_pool[i]\n        for i in u_track\n        if strack_pool[i].state == TrackState.Tracked\n    ]\n    dists = matching.iou_distance(r_tracked_stracks, detections_second)\n    matches, u_track, u_detection_second = matching.linear_assignment(\n        dists, thresh=0.5\n    )\n    for itracked, idet in matches:\n        track = r_tracked_stracks[itracked]\n        det = detections_second[idet]\n        if track.state == TrackState.Tracked:\n            track.update(det, self.frame_id)\n            activated_starcks.append(track)\n        else:\n            track.re_activate(det, self.frame_id, new_id=False)\n            refind_stracks.append(track)\n\n    for it in u_track:\n        track = r_tracked_stracks[it]\n        if not track.state == TrackState.Lost:\n            track.mark_lost()\n            lost_stracks.append(track)\n\n    \"\"\"Deal with unconfirmed tracks, usually tracks with only one beginning frame\"\"\"\n    detections = [detections[i] for i in u_detection]\n    dists = matching.iou_distance(unconfirmed, detections)\n\n    dists = matching.fuse_score(dists, detections)\n    matches, u_unconfirmed, u_detection = matching.linear_assignment(\n        dists, thresh=0.7\n    )\n    for itracked, idet in matches:\n        unconfirmed[itracked].update(detections[idet], self.frame_id)\n        activated_starcks.append(unconfirmed[itracked])\n    for it in u_unconfirmed:\n        track = unconfirmed[it]\n        track.mark_removed()\n        removed_stracks.append(track)\n\n    \"\"\" Step 4: Init new stracks\"\"\"\n    for inew in u_detection:\n        track = detections[inew]\n        if track.score < self.det_thresh:\n            continue\n        track.activate(self.kalman_filter, self.frame_id)\n        activated_starcks.append(track)\n    \"\"\" Step 5: Update state\"\"\"\n    for track in self.lost_tracks:\n        if self.frame_id - track.end_frame > self.max_time_lost:\n            track.mark_removed()\n            removed_stracks.append(track)\n\n    self.tracked_tracks = [\n        t for t in self.tracked_tracks if t.state == TrackState.Tracked\n    ]\n    self.tracked_tracks = joint_tracks(self.tracked_tracks, activated_starcks)\n    self.tracked_tracks = joint_tracks(self.tracked_tracks, refind_stracks)\n    self.lost_tracks = sub_tracks(self.lost_tracks, self.tracked_tracks)\n    self.lost_tracks.extend(lost_stracks)\n    self.lost_tracks = sub_tracks(self.lost_tracks, self.removed_tracks)\n    self.removed_tracks = removed_stracks\n    self.tracked_tracks, self.lost_tracks = remove_duplicate_tracks(\n        self.tracked_tracks, self.lost_tracks\n    )\n    output_stracks = [track for track in self.tracked_tracks if track.is_activated]\n\n    return output_stracks\n
"},{"location":"classification/core/","title":"Classifications","text":"Source code in supervision/classification/core.py
@dataclass\nclass Classifications:\n    class_id: np.ndarray\n    confidence: Optional[np.ndarray] = None\n\n    def __post_init__(self) -> None:\n        \"\"\"\n        Validate the classification inputs.\n        \"\"\"\n        n = len(self.class_id)\n\n        _validate_class_ids(self.class_id, n)\n        _validate_confidence(self.confidence, n)\n\n    def __len__(self) -> int:\n        \"\"\"\n        Returns the number of classifications.\n        \"\"\"\n        return len(self.class_id)\n\n    @classmethod\n    def from_clip(cls, clip_results) -> Classifications:\n        \"\"\"\n        Creates a Classifications instance from a\n        [clip](https://github.com/openai/clip) inference result.\n\n        Args:\n            clip_results (np.ndarray): The inference result from clip model.\n\n        Returns:\n            Classifications: A new Classifications object.\n\n        Example:\n            ```python\n            from PIL import Image\n            import clip\n            import supervision as sv\n\n            model, preprocess = clip.load('ViT-B/32')\n\n            image = cv2.imread(SOURCE_IMAGE_PATH)\n            image = preprocess(image).unsqueeze(0)\n\n            text = clip.tokenize([\"a diagram\", \"a dog\", \"a cat\"])\n            output, _ = model(image, text)\n            classifications = sv.Classifications.from_clip(output)\n            ```\n        \"\"\"\n\n        confidence = clip_results.softmax(dim=-1).cpu().detach().numpy()[0]\n\n        if len(confidence) == 0:\n            return cls(class_id=np.array([]), confidence=np.array([]))\n\n        class_ids = np.arange(len(confidence))\n        return cls(class_id=class_ids, confidence=confidence)\n\n    @classmethod\n    def from_ultralytics(cls, ultralytics_results) -> Classifications:\n        \"\"\"\n        Creates a Classifications instance from a\n        [ultralytics](https://github.com/ultralytics/ultralytics) inference result.\n\n        Args:\n            ultralytics_results (ultralytics.engine.results.Results):\n                The inference result from ultralytics model.\n\n        Returns:\n            Classifications: A new Classifications object.\n\n        Example:\n            ```python\n            import cv2\n            from ultralytics import YOLO\n            import supervision as sv\n\n            image = cv2.imread(SOURCE_IMAGE_PATH)\n            model = YOLO('yolov8n-cls.pt')\n\n            output = model(image)[0]\n            classifications = sv.Classifications.from_ultralytics(output)\n            ```\n        \"\"\"\n        confidence = ultralytics_results.probs.data.cpu().numpy()\n        return cls(class_id=np.arange(confidence.shape[0]), confidence=confidence)\n\n    @classmethod\n    def from_timm(cls, timm_results) -> Classifications:\n        \"\"\"\n        Creates a Classifications instance from a\n        [timm](https://huggingface.co/docs/hub/timm) inference result.\n\n        Args:\n            timm_results (torch.Tensor): The inference result from timm model.\n\n        Returns:\n            Classifications: A new Classifications object.\n\n        Example:\n            ```python\n            from PIL import Image\n            import timm\n            from timm.data import resolve_data_config, create_transform\n            import supervision as sv\n\n            model = timm.create_model(\n                model_name='hf-hub:nateraw/resnet50-oxford-iiit-pet',\n                pretrained=True\n            ).eval()\n\n            config = resolve_data_config({}, model=model)\n            transform = create_transform(**config)\n\n            image = Image.open(SOURCE_IMAGE_PATH).convert('RGB')\n            x = transform(image).unsqueeze(0)\n\n            output = model(x)\n\n            classifications = sv.Classifications.from_timm(output)\n            ```\n        \"\"\"\n        confidence = timm_results.cpu().detach().numpy()[0]\n\n        if len(confidence) == 0:\n            return cls(class_id=np.array([]), confidence=np.array([]))\n\n        class_id = np.arange(len(confidence))\n        return cls(class_id=class_id, confidence=confidence)\n\n    def get_top_k(self, k: int) -> Tuple[np.ndarray, np.ndarray]:\n        \"\"\"\n        Retrieve the top k class IDs and confidences,\n            ordered in descending order by confidence.\n\n        Args:\n            k (int): The number of top class IDs and confidences to retrieve.\n\n        Returns:\n            Tuple[np.ndarray, np.ndarray]: A tuple containing\n                the top k class IDs and confidences.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            classifications = sv.Classifications(...)\n\n            classifications.get_top_k(1)\n\n            (array([1]), array([0.9]))\n            ```\n        \"\"\"\n        if self.confidence is None:\n            raise ValueError(\"top_k could not be calculated, confidence is None\")\n\n        order = np.argsort(self.confidence)[::-1]\n        top_k_order = order[:k]\n        top_k_class_id = self.class_id[top_k_order]\n        top_k_confidence = self.confidence[top_k_order]\n\n        return top_k_class_id, top_k_confidence\n
"},{"location":"classification/core/#supervision.classification.core.Classifications-functions","title":"Functions","text":""},{"location":"classification/core/#supervision.classification.core.Classifications.__len__","title":"__len__()","text":"

Returns the number of classifications.

Source code in supervision/classification/core.py
def __len__(self) -> int:\n    \"\"\"\n    Returns the number of classifications.\n    \"\"\"\n    return len(self.class_id)\n
"},{"location":"classification/core/#supervision.classification.core.Classifications.__post_init__","title":"__post_init__()","text":"

Validate the classification inputs.

Source code in supervision/classification/core.py
def __post_init__(self) -> None:\n    \"\"\"\n    Validate the classification inputs.\n    \"\"\"\n    n = len(self.class_id)\n\n    _validate_class_ids(self.class_id, n)\n    _validate_confidence(self.confidence, n)\n
"},{"location":"classification/core/#supervision.classification.core.Classifications.from_clip","title":"from_clip(clip_results) classmethod","text":"

Creates a Classifications instance from a clip inference result.

Parameters:

Name Type Description Default clip_results ndarray

The inference result from clip model.

required

Returns:

Name Type Description Classifications Classifications

A new Classifications object.

Example
from PIL import Image\nimport clip\nimport supervision as sv\n\nmodel, preprocess = clip.load('ViT-B/32')\n\nimage = cv2.imread(SOURCE_IMAGE_PATH)\nimage = preprocess(image).unsqueeze(0)\n\ntext = clip.tokenize([\"a diagram\", \"a dog\", \"a cat\"])\noutput, _ = model(image, text)\nclassifications = sv.Classifications.from_clip(output)\n
Source code in supervision/classification/core.py
@classmethod\ndef from_clip(cls, clip_results) -> Classifications:\n    \"\"\"\n    Creates a Classifications instance from a\n    [clip](https://github.com/openai/clip) inference result.\n\n    Args:\n        clip_results (np.ndarray): The inference result from clip model.\n\n    Returns:\n        Classifications: A new Classifications object.\n\n    Example:\n        ```python\n        from PIL import Image\n        import clip\n        import supervision as sv\n\n        model, preprocess = clip.load('ViT-B/32')\n\n        image = cv2.imread(SOURCE_IMAGE_PATH)\n        image = preprocess(image).unsqueeze(0)\n\n        text = clip.tokenize([\"a diagram\", \"a dog\", \"a cat\"])\n        output, _ = model(image, text)\n        classifications = sv.Classifications.from_clip(output)\n        ```\n    \"\"\"\n\n    confidence = clip_results.softmax(dim=-1).cpu().detach().numpy()[0]\n\n    if len(confidence) == 0:\n        return cls(class_id=np.array([]), confidence=np.array([]))\n\n    class_ids = np.arange(len(confidence))\n    return cls(class_id=class_ids, confidence=confidence)\n
"},{"location":"classification/core/#supervision.classification.core.Classifications.from_timm","title":"from_timm(timm_results) classmethod","text":"

Creates a Classifications instance from a timm inference result.

Parameters:

Name Type Description Default timm_results Tensor

The inference result from timm model.

required

Returns:

Name Type Description Classifications Classifications

A new Classifications object.

Example
from PIL import Image\nimport timm\nfrom timm.data import resolve_data_config, create_transform\nimport supervision as sv\n\nmodel = timm.create_model(\n    model_name='hf-hub:nateraw/resnet50-oxford-iiit-pet',\n    pretrained=True\n).eval()\n\nconfig = resolve_data_config({}, model=model)\ntransform = create_transform(**config)\n\nimage = Image.open(SOURCE_IMAGE_PATH).convert('RGB')\nx = transform(image).unsqueeze(0)\n\noutput = model(x)\n\nclassifications = sv.Classifications.from_timm(output)\n
Source code in supervision/classification/core.py
@classmethod\ndef from_timm(cls, timm_results) -> Classifications:\n    \"\"\"\n    Creates a Classifications instance from a\n    [timm](https://huggingface.co/docs/hub/timm) inference result.\n\n    Args:\n        timm_results (torch.Tensor): The inference result from timm model.\n\n    Returns:\n        Classifications: A new Classifications object.\n\n    Example:\n        ```python\n        from PIL import Image\n        import timm\n        from timm.data import resolve_data_config, create_transform\n        import supervision as sv\n\n        model = timm.create_model(\n            model_name='hf-hub:nateraw/resnet50-oxford-iiit-pet',\n            pretrained=True\n        ).eval()\n\n        config = resolve_data_config({}, model=model)\n        transform = create_transform(**config)\n\n        image = Image.open(SOURCE_IMAGE_PATH).convert('RGB')\n        x = transform(image).unsqueeze(0)\n\n        output = model(x)\n\n        classifications = sv.Classifications.from_timm(output)\n        ```\n    \"\"\"\n    confidence = timm_results.cpu().detach().numpy()[0]\n\n    if len(confidence) == 0:\n        return cls(class_id=np.array([]), confidence=np.array([]))\n\n    class_id = np.arange(len(confidence))\n    return cls(class_id=class_id, confidence=confidence)\n
"},{"location":"classification/core/#supervision.classification.core.Classifications.from_ultralytics","title":"from_ultralytics(ultralytics_results) classmethod","text":"

Creates a Classifications instance from a ultralytics inference result.

Parameters:

Name Type Description Default ultralytics_results Results

The inference result from ultralytics model.

required

Returns:

Name Type Description Classifications Classifications

A new Classifications object.

Example
import cv2\nfrom ultralytics import YOLO\nimport supervision as sv\n\nimage = cv2.imread(SOURCE_IMAGE_PATH)\nmodel = YOLO('yolov8n-cls.pt')\n\noutput = model(image)[0]\nclassifications = sv.Classifications.from_ultralytics(output)\n
Source code in supervision/classification/core.py
@classmethod\ndef from_ultralytics(cls, ultralytics_results) -> Classifications:\n    \"\"\"\n    Creates a Classifications instance from a\n    [ultralytics](https://github.com/ultralytics/ultralytics) inference result.\n\n    Args:\n        ultralytics_results (ultralytics.engine.results.Results):\n            The inference result from ultralytics model.\n\n    Returns:\n        Classifications: A new Classifications object.\n\n    Example:\n        ```python\n        import cv2\n        from ultralytics import YOLO\n        import supervision as sv\n\n        image = cv2.imread(SOURCE_IMAGE_PATH)\n        model = YOLO('yolov8n-cls.pt')\n\n        output = model(image)[0]\n        classifications = sv.Classifications.from_ultralytics(output)\n        ```\n    \"\"\"\n    confidence = ultralytics_results.probs.data.cpu().numpy()\n    return cls(class_id=np.arange(confidence.shape[0]), confidence=confidence)\n
"},{"location":"classification/core/#supervision.classification.core.Classifications.get_top_k","title":"get_top_k(k)","text":"

Retrieve the top k class IDs and confidences, ordered in descending order by confidence.

Parameters:

Name Type Description Default k int

The number of top class IDs and confidences to retrieve.

required

Returns:

Type Description Tuple[ndarray, ndarray]

Tuple[np.ndarray, np.ndarray]: A tuple containing the top k class IDs and confidences.

Example
import supervision as sv\n\nclassifications = sv.Classifications(...)\n\nclassifications.get_top_k(1)\n\n(array([1]), array([0.9]))\n
Source code in supervision/classification/core.py
def get_top_k(self, k: int) -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"\n    Retrieve the top k class IDs and confidences,\n        ordered in descending order by confidence.\n\n    Args:\n        k (int): The number of top class IDs and confidences to retrieve.\n\n    Returns:\n        Tuple[np.ndarray, np.ndarray]: A tuple containing\n            the top k class IDs and confidences.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        classifications = sv.Classifications(...)\n\n        classifications.get_top_k(1)\n\n        (array([1]), array([0.9]))\n        ```\n    \"\"\"\n    if self.confidence is None:\n        raise ValueError(\"top_k could not be calculated, confidence is None\")\n\n    order = np.argsort(self.confidence)[::-1]\n    top_k_order = order[:k]\n    top_k_class_id = self.class_id[top_k_order]\n    top_k_confidence = self.confidence[top_k_order]\n\n    return top_k_class_id, top_k_confidence\n
"},{"location":"datasets/core/","title":"Datasets","text":"

Warning

Dataset API is still fluid and may change. If you use Dataset API in your project until further notice, freeze the supervision version in your requirements.txt or setup.py.

DetectionDataset

Bases: BaseDataset

Dataclass containing information about object detection dataset.

Attributes:

Name Type Description classes List[str]

List containing dataset class names.

images Dict[str, ndarray]

Dictionary mapping image name to image.

annotations Dict[str, Detections]

Dictionary mapping image name to annotations.

Source code in supervision/dataset/core.py
@dataclass\nclass DetectionDataset(BaseDataset):\n    \"\"\"\n    Dataclass containing information about object detection dataset.\n\n    Attributes:\n        classes (List[str]): List containing dataset class names.\n        images (Dict[str, np.ndarray]): Dictionary mapping image name to image.\n        annotations (Dict[str, Detections]): Dictionary mapping\n            image name to annotations.\n    \"\"\"\n\n    classes: List[str]\n    images: Dict[str, np.ndarray]\n    annotations: Dict[str, Detections]\n\n    def __len__(self) -> int:\n        \"\"\"\n        Return the number of images in the dataset.\n\n        Returns:\n            int: The number of images.\n        \"\"\"\n        return len(self.images)\n\n    def __iter__(self) -> Iterator[Tuple[str, np.ndarray, Detections]]:\n        \"\"\"\n        Iterate over the images and annotations in the dataset.\n\n        Yields:\n            Iterator[Tuple[str, np.ndarray, Detections]]:\n                An iterator that yields tuples containing the image name,\n                the image data, and its corresponding annotation.\n        \"\"\"\n        for image_name, image in self.images.items():\n            yield image_name, image, self.annotations.get(image_name, None)\n\n    def __eq__(self, other):\n        if not isinstance(other, DetectionDataset):\n            return False\n\n        if set(self.classes) != set(other.classes):\n            return False\n\n        for key in self.images:\n            if not np.array_equal(self.images[key], other.images[key]):\n                return False\n            if not self.annotations[key] == other.annotations[key]:\n                return False\n\n        return True\n\n    def split(\n        self, split_ratio=0.8, random_state=None, shuffle: bool = True\n    ) -> Tuple[DetectionDataset, DetectionDataset]:\n        \"\"\"\n        Splits the dataset into two parts (training and testing)\n            using the provided split_ratio.\n\n        Args:\n            split_ratio (float, optional): The ratio of the training\n                set to the entire dataset.\n            random_state (int, optional): The seed for the random number generator.\n                This is used for reproducibility.\n            shuffle (bool, optional): Whether to shuffle the data before splitting.\n\n        Returns:\n            Tuple[DetectionDataset, DetectionDataset]: A tuple containing\n                the training and testing datasets.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            ds = sv.DetectionDataset(...)\n            train_ds, test_ds = ds.split(split_ratio=0.7, random_state=42, shuffle=True)\n            len(train_ds), len(test_ds)\n            # (700, 300)\n            ```\n        \"\"\"\n\n        image_names = list(self.images.keys())\n        train_names, test_names = train_test_split(\n            data=image_names,\n            train_ratio=split_ratio,\n            random_state=random_state,\n            shuffle=shuffle,\n        )\n\n        train_dataset = DetectionDataset(\n            classes=self.classes,\n            images={name: self.images[name] for name in train_names},\n            annotations={name: self.annotations[name] for name in train_names},\n        )\n        test_dataset = DetectionDataset(\n            classes=self.classes,\n            images={name: self.images[name] for name in test_names},\n            annotations={name: self.annotations[name] for name in test_names},\n        )\n        return train_dataset, test_dataset\n\n    def as_pascal_voc(\n        self,\n        images_directory_path: Optional[str] = None,\n        annotations_directory_path: Optional[str] = None,\n        min_image_area_percentage: float = 0.0,\n        max_image_area_percentage: float = 1.0,\n        approximation_percentage: float = 0.0,\n    ) -> None:\n        \"\"\"\n        Exports the dataset to PASCAL VOC format. This method saves the images\n        and their corresponding annotations in PASCAL VOC format.\n\n        Args:\n            images_directory_path (Optional[str]): The path to the directory\n                where the images should be saved.\n                If not provided, images will not be saved.\n            annotations_directory_path (Optional[str]): The path to\n                the directory where the annotations in PASCAL VOC format should be\n                saved. If not provided, annotations will not be saved.\n            min_image_area_percentage (float): The minimum percentage of\n                detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            max_image_area_percentage (float): The maximum percentage\n                of detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            approximation_percentage (float): The percentage of\n                polygon points to be removed from the input polygon,\n                in the range [0, 1). Argument is used only for segmentation datasets.\n        \"\"\"\n        if images_directory_path:\n            save_dataset_images(\n                images_directory_path=images_directory_path, images=self.images\n            )\n        if annotations_directory_path:\n            Path(annotations_directory_path).mkdir(parents=True, exist_ok=True)\n\n        for image_path, image in self.images.items():\n            detections = self.annotations[image_path]\n\n            if annotations_directory_path:\n                annotation_name = Path(image_path).stem\n                annotations_path = os.path.join(\n                    annotations_directory_path, f\"{annotation_name}.xml\"\n                )\n                image_name = Path(image_path).name\n                pascal_voc_xml = detections_to_pascal_voc(\n                    detections=detections,\n                    classes=self.classes,\n                    filename=image_name,\n                    image_shape=image.shape,\n                    min_image_area_percentage=min_image_area_percentage,\n                    max_image_area_percentage=max_image_area_percentage,\n                    approximation_percentage=approximation_percentage,\n                )\n\n                with open(annotations_path, \"w\") as f:\n                    f.write(pascal_voc_xml)\n\n    @classmethod\n    def from_pascal_voc(\n        cls,\n        images_directory_path: str,\n        annotations_directory_path: str,\n        force_masks: bool = False,\n    ) -> DetectionDataset:\n        \"\"\"\n        Creates a Dataset instance from PASCAL VOC formatted data.\n\n        Args:\n            images_directory_path (str): Path to the directory containing the images.\n            annotations_directory_path (str): Path to the directory\n                containing the PASCAL VOC XML annotations.\n            force_masks (bool, optional): If True, forces masks to\n                be loaded for all annotations, regardless of whether they are present.\n\n        Returns:\n            DetectionDataset: A DetectionDataset instance containing\n                the loaded images and annotations.\n\n        Examples:\n            ```python\n            import roboflow\n            from roboflow import Roboflow\n            import supervision as sv\n\n            roboflow.login()\n\n            rf = Roboflow()\n\n            project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n            dataset = project.version(PROJECT_VERSION).download(\"voc\")\n\n            ds = sv.DetectionDataset.from_pascal_voc(\n                images_directory_path=f\"{dataset.location}/train/images\",\n                annotations_directory_path=f\"{dataset.location}/train/labels\"\n            )\n\n            ds.classes\n            # ['dog', 'person']\n            ```\n        \"\"\"\n\n        classes, images, annotations = load_pascal_voc_annotations(\n            images_directory_path=images_directory_path,\n            annotations_directory_path=annotations_directory_path,\n            force_masks=force_masks,\n        )\n\n        return DetectionDataset(classes=classes, images=images, annotations=annotations)\n\n    @classmethod\n    def from_yolo(\n        cls,\n        images_directory_path: str,\n        annotations_directory_path: str,\n        data_yaml_path: str,\n        force_masks: bool = False,\n        is_obb: bool = False,\n    ) -> DetectionDataset:\n        \"\"\"\n        Creates a Dataset instance from YOLO formatted data.\n\n        Args:\n            images_directory_path (str): The path to the\n                directory containing the images.\n            annotations_directory_path (str): The path to the directory\n                containing the YOLO annotation files.\n            data_yaml_path (str): The path to the data\n                YAML file containing class information.\n            force_masks (bool, optional): If True, forces\n                masks to be loaded for all annotations,\n                regardless of whether they are present.\n            is_obb (bool, optional): If True, loads the annotations in OBB format.\n                OBB annotations are defined as `[class_id, x, y, x, y, x, y, x, y]`,\n                where pairs of [x, y] are box corners.\n\n        Returns:\n            DetectionDataset: A DetectionDataset instance\n                containing the loaded images and annotations.\n\n        Examples:\n            ```python\n            import roboflow\n            from roboflow import Roboflow\n            import supervision as sv\n\n            roboflow.login()\n            rf = Roboflow()\n\n            project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n            dataset = project.version(PROJECT_VERSION).download(\"yolov5\")\n\n            ds = sv.DetectionDataset.from_yolo(\n                images_directory_path=f\"{dataset.location}/train/images\",\n                annotations_directory_path=f\"{dataset.location}/train/labels\",\n                data_yaml_path=f\"{dataset.location}/data.yaml\"\n            )\n\n            ds.classes\n            # ['dog', 'person']\n            ```\n        \"\"\"\n        classes, images, annotations = load_yolo_annotations(\n            images_directory_path=images_directory_path,\n            annotations_directory_path=annotations_directory_path,\n            data_yaml_path=data_yaml_path,\n            force_masks=force_masks,\n            is_obb=is_obb,\n        )\n        return DetectionDataset(classes=classes, images=images, annotations=annotations)\n\n    def as_yolo(\n        self,\n        images_directory_path: Optional[str] = None,\n        annotations_directory_path: Optional[str] = None,\n        data_yaml_path: Optional[str] = None,\n        min_image_area_percentage: float = 0.0,\n        max_image_area_percentage: float = 1.0,\n        approximation_percentage: float = 0.0,\n    ) -> None:\n        \"\"\"\n        Exports the dataset to YOLO format. This method saves the\n        images and their corresponding annotations in YOLO format.\n\n        Args:\n            images_directory_path (Optional[str]): The path to the\n                directory where the images should be saved.\n                If not provided, images will not be saved.\n            annotations_directory_path (Optional[str]): The path to the\n                directory where the annotations in\n                YOLO format should be saved. If not provided,\n                annotations will not be saved.\n            data_yaml_path (Optional[str]): The path where the data.yaml\n                file should be saved.\n                If not provided, the file will not be saved.\n            min_image_area_percentage (float): The minimum percentage of\n                detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            max_image_area_percentage (float): The maximum percentage\n                of detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            approximation_percentage (float): The percentage of polygon points to\n                be removed from the input polygon, in the range [0, 1).\n                This is useful for simplifying the annotations.\n                Argument is used only for segmentation datasets.\n        \"\"\"\n        if images_directory_path is not None:\n            save_dataset_images(\n                images_directory_path=images_directory_path, images=self.images\n            )\n        if annotations_directory_path is not None:\n            save_yolo_annotations(\n                annotations_directory_path=annotations_directory_path,\n                images=self.images,\n                annotations=self.annotations,\n                min_image_area_percentage=min_image_area_percentage,\n                max_image_area_percentage=max_image_area_percentage,\n                approximation_percentage=approximation_percentage,\n            )\n        if data_yaml_path is not None:\n            save_data_yaml(data_yaml_path=data_yaml_path, classes=self.classes)\n\n    @classmethod\n    def from_coco(\n        cls,\n        images_directory_path: str,\n        annotations_path: str,\n        force_masks: bool = False,\n    ) -> DetectionDataset:\n        \"\"\"\n        Creates a Dataset instance from COCO formatted data.\n\n        Args:\n            images_directory_path (str): The path to the\n                directory containing the images.\n            annotations_path (str): The path to the json annotation files.\n            force_masks (bool, optional): If True,\n                forces masks to be loaded for all annotations,\n                regardless of whether they are present.\n\n        Returns:\n            DetectionDataset: A DetectionDataset instance containing\n                the loaded images and annotations.\n\n        Examples:\n            ```python\n            import roboflow\n            from roboflow import Roboflow\n            import supervision as sv\n\n            roboflow.login()\n            rf = Roboflow()\n\n            project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n            dataset = project.version(PROJECT_VERSION).download(\"coco\")\n\n            ds = sv.DetectionDataset.from_coco(\n                images_directory_path=f\"{dataset.location}/train\",\n                annotations_path=f\"{dataset.location}/train/_annotations.coco.json\",\n            )\n\n            ds.classes\n            # ['dog', 'person']\n            ```\n        \"\"\"\n        classes, images, annotations = load_coco_annotations(\n            images_directory_path=images_directory_path,\n            annotations_path=annotations_path,\n            force_masks=force_masks,\n        )\n        return DetectionDataset(classes=classes, images=images, annotations=annotations)\n\n    def as_coco(\n        self,\n        images_directory_path: Optional[str] = None,\n        annotations_path: Optional[str] = None,\n        min_image_area_percentage: float = 0.0,\n        max_image_area_percentage: float = 1.0,\n        approximation_percentage: float = 0.0,\n    ) -> None:\n        \"\"\"\n        Exports the dataset to COCO format. This method saves the\n        images and their corresponding annotations in COCO format.\n\n        !!! tip\n\n            The format of the mask is determined automatically based on its structure:\n\n            - If a mask contains multiple disconnected components or holes, it will be\n            saved using the Run-Length Encoding (RLE) format for efficient storage and\n            processing.\n            - If a mask consists of a single, contiguous region without any holes, it\n            will be encoded as a polygon, preserving the outline of the object.\n\n            This automatic selection ensures that the masks are stored in the most\n            appropriate and space-efficient format, complying with COCO dataset\n            standards.\n\n        Args:\n            images_directory_path (Optional[str]): The path to the directory\n                where the images should be saved.\n                If not provided, images will not be saved.\n            annotations_path (Optional[str]): The path to COCO annotation file.\n            min_image_area_percentage (float): The minimum percentage of\n                detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            max_image_area_percentage (float): The maximum percentage of\n                detection area relative to\n                the image area for a detection to be included.\n                Argument is used only for segmentation datasets.\n            approximation_percentage (float): The percentage of polygon points\n                to be removed from the input polygon,\n                in the range [0, 1). This is useful for simplifying the annotations.\n                Argument is used only for segmentation datasets.\n        \"\"\"\n        if images_directory_path is not None:\n            save_dataset_images(\n                images_directory_path=images_directory_path, images=self.images\n            )\n        if annotations_path is not None:\n            save_coco_annotations(\n                annotation_path=annotations_path,\n                images=self.images,\n                annotations=self.annotations,\n                classes=self.classes,\n                min_image_area_percentage=min_image_area_percentage,\n                max_image_area_percentage=max_image_area_percentage,\n                approximation_percentage=approximation_percentage,\n            )\n\n    @classmethod\n    def merge(cls, dataset_list: List[DetectionDataset]) -> DetectionDataset:\n        \"\"\"\n        Merge a list of `DetectionDataset` objects into a single\n            `DetectionDataset` object.\n\n        This method takes a list of `DetectionDataset` objects and combines\n        their respective fields (`classes`, `images`,\n        `annotations`) into a single `DetectionDataset` object.\n\n        Args:\n            dataset_list (List[DetectionDataset]): A list of `DetectionDataset`\n                objects to merge.\n\n        Returns:\n            (DetectionDataset): A single `DetectionDataset` object containing\n            the merged data from the input list.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            ds_1 = sv.DetectionDataset(...)\n            len(ds_1)\n            # 100\n            ds_1.classes\n            # ['dog', 'person']\n\n            ds_2 = sv.DetectionDataset(...)\n            len(ds_2)\n            # 200\n            ds_2.classes\n            # ['cat']\n\n            ds_merged = sv.DetectionDataset.merge([ds_1, ds_2])\n            len(ds_merged)\n            # 300\n            ds_merged.classes\n            # ['cat', 'dog', 'person']\n            ```\n        \"\"\"\n        merged_images, merged_annotations = {}, {}\n        class_lists = [dataset.classes for dataset in dataset_list]\n        merged_classes = merge_class_lists(class_lists=class_lists)\n\n        for dataset in dataset_list:\n            class_index_mapping = build_class_index_mapping(\n                source_classes=dataset.classes, target_classes=merged_classes\n            )\n            for image_name, image, detections in dataset:\n                if image_name in merged_annotations:\n                    raise ValueError(\n                        f\"Image name {image_name} is not unique across datasets.\"\n                    )\n\n                merged_images[image_name] = image\n                merged_annotations[image_name] = map_detections_class_id(\n                    source_to_target_mapping=class_index_mapping,\n                    detections=detections,\n                )\n\n        return cls(\n            classes=merged_classes, images=merged_images, annotations=merged_annotations\n        )\n
ClassificationDataset

Bases: BaseDataset

Dataclass containing information about a classification dataset.

Attributes:

Name Type Description classes List[str]

List containing dataset class names.

images Dict[str, ndarray]

Dictionary mapping image name to image.

annotations Dict[str, Detections]

Dictionary mapping image name to annotations.

Source code in supervision/dataset/core.py
@dataclass\nclass ClassificationDataset(BaseDataset):\n    \"\"\"\n    Dataclass containing information about a classification dataset.\n\n    Attributes:\n        classes (List[str]): List containing dataset class names.\n        images (Dict[str, np.ndarray]): Dictionary mapping image name to image.\n        annotations (Dict[str, Detections]): Dictionary mapping\n            image name to annotations.\n    \"\"\"\n\n    classes: List[str]\n    images: Dict[str, np.ndarray]\n    annotations: Dict[str, Classifications]\n\n    def __len__(self) -> int:\n        return len(self.images)\n\n    def split(\n        self, split_ratio=0.8, random_state=None, shuffle: bool = True\n    ) -> Tuple[ClassificationDataset, ClassificationDataset]:\n        \"\"\"\n        Splits the dataset into two parts (training and testing)\n            using the provided split_ratio.\n\n        Args:\n            split_ratio (float, optional): The ratio of the training\n                set to the entire dataset.\n            random_state (int, optional): The seed for the\n                random number generator. This is used for reproducibility.\n            shuffle (bool, optional): Whether to shuffle the data before splitting.\n\n        Returns:\n            Tuple[ClassificationDataset, ClassificationDataset]: A tuple containing\n            the training and testing datasets.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            cd = sv.ClassificationDataset(...)\n            train_cd,test_cd = cd.split(split_ratio=0.7, random_state=42,shuffle=True)\n            len(train_cd), len(test_cd)\n            # (700, 300)\n            ```\n        \"\"\"\n        image_names = list(self.images.keys())\n        train_names, test_names = train_test_split(\n            data=image_names,\n            train_ratio=split_ratio,\n            random_state=random_state,\n            shuffle=shuffle,\n        )\n\n        train_dataset = ClassificationDataset(\n            classes=self.classes,\n            images={name: self.images[name] for name in train_names},\n            annotations={name: self.annotations[name] for name in train_names},\n        )\n        test_dataset = ClassificationDataset(\n            classes=self.classes,\n            images={name: self.images[name] for name in test_names},\n            annotations={name: self.annotations[name] for name in test_names},\n        )\n        return train_dataset, test_dataset\n\n    def as_folder_structure(self, root_directory_path: str) -> None:\n        \"\"\"\n        Saves the dataset as a multi-class folder structure.\n\n        Args:\n            root_directory_path (str): The path to the directory\n                where the dataset will be saved.\n        \"\"\"\n        os.makedirs(root_directory_path, exist_ok=True)\n\n        for class_name in self.classes:\n            os.makedirs(os.path.join(root_directory_path, class_name), exist_ok=True)\n\n        for image_path in self.images:\n            classification = self.annotations[image_path]\n            image = self.images[image_path]\n            image_name = Path(image_path).name\n            class_id = (\n                classification.class_id[0]\n                if classification.confidence is None\n                else classification.get_top_k(1)[0][0]\n            )\n            class_name = self.classes[class_id]\n            image_path = os.path.join(root_directory_path, class_name, image_name)\n            cv2.imwrite(image_path, image)\n\n    @classmethod\n    def from_folder_structure(cls, root_directory_path: str) -> ClassificationDataset:\n        \"\"\"\n        Load data from a multiclass folder structure into a ClassificationDataset.\n\n        Args:\n            root_directory_path (str): The path to the dataset directory.\n\n        Returns:\n            ClassificationDataset: The dataset.\n\n        Examples:\n            ```python\n            import roboflow\n            from roboflow import Roboflow\n            import supervision as sv\n\n            roboflow.login()\n            rf = Roboflow()\n\n            project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n            dataset = project.version(PROJECT_VERSION).download(\"folder\")\n\n            cd = sv.ClassificationDataset.from_folder_structure(\n                root_directory_path=f\"{dataset.location}/train\"\n            )\n            ```\n        \"\"\"\n        classes = os.listdir(root_directory_path)\n        classes = sorted(set(classes))\n\n        images = {}\n        annotations = {}\n\n        for class_name in classes:\n            class_id = classes.index(class_name)\n\n            for image in os.listdir(os.path.join(root_directory_path, class_name)):\n                image_path = str(os.path.join(root_directory_path, class_name, image))\n                images[image_path] = cv2.imread(image_path)\n                annotations[image_path] = Classifications(\n                    class_id=np.array([class_id]),\n                )\n\n        return cls(\n            classes=classes,\n            images=images,\n            annotations=annotations,\n        )\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset-functions","title":"Functions","text":""},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.__iter__","title":"__iter__()","text":"

Iterate over the images and annotations in the dataset.

Yields:

Type Description str

Iterator[Tuple[str, np.ndarray, Detections]]: An iterator that yields tuples containing the image name, the image data, and its corresponding annotation.

Source code in supervision/dataset/core.py
def __iter__(self) -> Iterator[Tuple[str, np.ndarray, Detections]]:\n    \"\"\"\n    Iterate over the images and annotations in the dataset.\n\n    Yields:\n        Iterator[Tuple[str, np.ndarray, Detections]]:\n            An iterator that yields tuples containing the image name,\n            the image data, and its corresponding annotation.\n    \"\"\"\n    for image_name, image in self.images.items():\n        yield image_name, image, self.annotations.get(image_name, None)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.__len__","title":"__len__()","text":"

Return the number of images in the dataset.

Returns:

Name Type Description int int

The number of images.

Source code in supervision/dataset/core.py
def __len__(self) -> int:\n    \"\"\"\n    Return the number of images in the dataset.\n\n    Returns:\n        int: The number of images.\n    \"\"\"\n    return len(self.images)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.as_coco","title":"as_coco(images_directory_path=None, annotations_path=None, min_image_area_percentage=0.0, max_image_area_percentage=1.0, approximation_percentage=0.0)","text":"

Exports the dataset to COCO format. This method saves the images and their corresponding annotations in COCO format.

Tip

The format of the mask is determined automatically based on its structure:

  • If a mask contains multiple disconnected components or holes, it will be saved using the Run-Length Encoding (RLE) format for efficient storage and processing.
  • If a mask consists of a single, contiguous region without any holes, it will be encoded as a polygon, preserving the outline of the object.

This automatic selection ensures that the masks are stored in the most appropriate and space-efficient format, complying with COCO dataset standards.

Parameters:

Name Type Description Default images_directory_path Optional[str]

The path to the directory where the images should be saved. If not provided, images will not be saved.

None annotations_path Optional[str]

The path to COCO annotation file.

None min_image_area_percentage float

The minimum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

0.0 max_image_area_percentage float

The maximum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

1.0 approximation_percentage float

The percentage of polygon points to be removed from the input polygon, in the range [0, 1). This is useful for simplifying the annotations. Argument is used only for segmentation datasets.

0.0 Source code in supervision/dataset/core.py
def as_coco(\n    self,\n    images_directory_path: Optional[str] = None,\n    annotations_path: Optional[str] = None,\n    min_image_area_percentage: float = 0.0,\n    max_image_area_percentage: float = 1.0,\n    approximation_percentage: float = 0.0,\n) -> None:\n    \"\"\"\n    Exports the dataset to COCO format. This method saves the\n    images and their corresponding annotations in COCO format.\n\n    !!! tip\n\n        The format of the mask is determined automatically based on its structure:\n\n        - If a mask contains multiple disconnected components or holes, it will be\n        saved using the Run-Length Encoding (RLE) format for efficient storage and\n        processing.\n        - If a mask consists of a single, contiguous region without any holes, it\n        will be encoded as a polygon, preserving the outline of the object.\n\n        This automatic selection ensures that the masks are stored in the most\n        appropriate and space-efficient format, complying with COCO dataset\n        standards.\n\n    Args:\n        images_directory_path (Optional[str]): The path to the directory\n            where the images should be saved.\n            If not provided, images will not be saved.\n        annotations_path (Optional[str]): The path to COCO annotation file.\n        min_image_area_percentage (float): The minimum percentage of\n            detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        max_image_area_percentage (float): The maximum percentage of\n            detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        approximation_percentage (float): The percentage of polygon points\n            to be removed from the input polygon,\n            in the range [0, 1). This is useful for simplifying the annotations.\n            Argument is used only for segmentation datasets.\n    \"\"\"\n    if images_directory_path is not None:\n        save_dataset_images(\n            images_directory_path=images_directory_path, images=self.images\n        )\n    if annotations_path is not None:\n        save_coco_annotations(\n            annotation_path=annotations_path,\n            images=self.images,\n            annotations=self.annotations,\n            classes=self.classes,\n            min_image_area_percentage=min_image_area_percentage,\n            max_image_area_percentage=max_image_area_percentage,\n            approximation_percentage=approximation_percentage,\n        )\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.as_pascal_voc","title":"as_pascal_voc(images_directory_path=None, annotations_directory_path=None, min_image_area_percentage=0.0, max_image_area_percentage=1.0, approximation_percentage=0.0)","text":"

Exports the dataset to PASCAL VOC format. This method saves the images and their corresponding annotations in PASCAL VOC format.

Parameters:

Name Type Description Default images_directory_path Optional[str]

The path to the directory where the images should be saved. If not provided, images will not be saved.

None annotations_directory_path Optional[str]

The path to the directory where the annotations in PASCAL VOC format should be saved. If not provided, annotations will not be saved.

None min_image_area_percentage float

The minimum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

0.0 max_image_area_percentage float

The maximum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

1.0 approximation_percentage float

The percentage of polygon points to be removed from the input polygon, in the range [0, 1). Argument is used only for segmentation datasets.

0.0 Source code in supervision/dataset/core.py
def as_pascal_voc(\n    self,\n    images_directory_path: Optional[str] = None,\n    annotations_directory_path: Optional[str] = None,\n    min_image_area_percentage: float = 0.0,\n    max_image_area_percentage: float = 1.0,\n    approximation_percentage: float = 0.0,\n) -> None:\n    \"\"\"\n    Exports the dataset to PASCAL VOC format. This method saves the images\n    and their corresponding annotations in PASCAL VOC format.\n\n    Args:\n        images_directory_path (Optional[str]): The path to the directory\n            where the images should be saved.\n            If not provided, images will not be saved.\n        annotations_directory_path (Optional[str]): The path to\n            the directory where the annotations in PASCAL VOC format should be\n            saved. If not provided, annotations will not be saved.\n        min_image_area_percentage (float): The minimum percentage of\n            detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        max_image_area_percentage (float): The maximum percentage\n            of detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        approximation_percentage (float): The percentage of\n            polygon points to be removed from the input polygon,\n            in the range [0, 1). Argument is used only for segmentation datasets.\n    \"\"\"\n    if images_directory_path:\n        save_dataset_images(\n            images_directory_path=images_directory_path, images=self.images\n        )\n    if annotations_directory_path:\n        Path(annotations_directory_path).mkdir(parents=True, exist_ok=True)\n\n    for image_path, image in self.images.items():\n        detections = self.annotations[image_path]\n\n        if annotations_directory_path:\n            annotation_name = Path(image_path).stem\n            annotations_path = os.path.join(\n                annotations_directory_path, f\"{annotation_name}.xml\"\n            )\n            image_name = Path(image_path).name\n            pascal_voc_xml = detections_to_pascal_voc(\n                detections=detections,\n                classes=self.classes,\n                filename=image_name,\n                image_shape=image.shape,\n                min_image_area_percentage=min_image_area_percentage,\n                max_image_area_percentage=max_image_area_percentage,\n                approximation_percentage=approximation_percentage,\n            )\n\n            with open(annotations_path, \"w\") as f:\n                f.write(pascal_voc_xml)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.as_yolo","title":"as_yolo(images_directory_path=None, annotations_directory_path=None, data_yaml_path=None, min_image_area_percentage=0.0, max_image_area_percentage=1.0, approximation_percentage=0.0)","text":"

Exports the dataset to YOLO format. This method saves the images and their corresponding annotations in YOLO format.

Parameters:

Name Type Description Default images_directory_path Optional[str]

The path to the directory where the images should be saved. If not provided, images will not be saved.

None annotations_directory_path Optional[str]

The path to the directory where the annotations in YOLO format should be saved. If not provided, annotations will not be saved.

None data_yaml_path Optional[str]

The path where the data.yaml file should be saved. If not provided, the file will not be saved.

None min_image_area_percentage float

The minimum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

0.0 max_image_area_percentage float

The maximum percentage of detection area relative to the image area for a detection to be included. Argument is used only for segmentation datasets.

1.0 approximation_percentage float

The percentage of polygon points to be removed from the input polygon, in the range [0, 1). This is useful for simplifying the annotations. Argument is used only for segmentation datasets.

0.0 Source code in supervision/dataset/core.py
def as_yolo(\n    self,\n    images_directory_path: Optional[str] = None,\n    annotations_directory_path: Optional[str] = None,\n    data_yaml_path: Optional[str] = None,\n    min_image_area_percentage: float = 0.0,\n    max_image_area_percentage: float = 1.0,\n    approximation_percentage: float = 0.0,\n) -> None:\n    \"\"\"\n    Exports the dataset to YOLO format. This method saves the\n    images and their corresponding annotations in YOLO format.\n\n    Args:\n        images_directory_path (Optional[str]): The path to the\n            directory where the images should be saved.\n            If not provided, images will not be saved.\n        annotations_directory_path (Optional[str]): The path to the\n            directory where the annotations in\n            YOLO format should be saved. If not provided,\n            annotations will not be saved.\n        data_yaml_path (Optional[str]): The path where the data.yaml\n            file should be saved.\n            If not provided, the file will not be saved.\n        min_image_area_percentage (float): The minimum percentage of\n            detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        max_image_area_percentage (float): The maximum percentage\n            of detection area relative to\n            the image area for a detection to be included.\n            Argument is used only for segmentation datasets.\n        approximation_percentage (float): The percentage of polygon points to\n            be removed from the input polygon, in the range [0, 1).\n            This is useful for simplifying the annotations.\n            Argument is used only for segmentation datasets.\n    \"\"\"\n    if images_directory_path is not None:\n        save_dataset_images(\n            images_directory_path=images_directory_path, images=self.images\n        )\n    if annotations_directory_path is not None:\n        save_yolo_annotations(\n            annotations_directory_path=annotations_directory_path,\n            images=self.images,\n            annotations=self.annotations,\n            min_image_area_percentage=min_image_area_percentage,\n            max_image_area_percentage=max_image_area_percentage,\n            approximation_percentage=approximation_percentage,\n        )\n    if data_yaml_path is not None:\n        save_data_yaml(data_yaml_path=data_yaml_path, classes=self.classes)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.from_coco","title":"from_coco(images_directory_path, annotations_path, force_masks=False) classmethod","text":"

Creates a Dataset instance from COCO formatted data.

Parameters:

Name Type Description Default images_directory_path str

The path to the directory containing the images.

required annotations_path str

The path to the json annotation files.

required force_masks bool

If True, forces masks to be loaded for all annotations, regardless of whether they are present.

False

Returns:

Name Type Description DetectionDataset DetectionDataset

A DetectionDataset instance containing the loaded images and annotations.

Examples:

import roboflow\nfrom roboflow import Roboflow\nimport supervision as sv\n\nroboflow.login()\nrf = Roboflow()\n\nproject = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\ndataset = project.version(PROJECT_VERSION).download(\"coco\")\n\nds = sv.DetectionDataset.from_coco(\n    images_directory_path=f\"{dataset.location}/train\",\n    annotations_path=f\"{dataset.location}/train/_annotations.coco.json\",\n)\n\nds.classes\n# ['dog', 'person']\n
Source code in supervision/dataset/core.py
@classmethod\ndef from_coco(\n    cls,\n    images_directory_path: str,\n    annotations_path: str,\n    force_masks: bool = False,\n) -> DetectionDataset:\n    \"\"\"\n    Creates a Dataset instance from COCO formatted data.\n\n    Args:\n        images_directory_path (str): The path to the\n            directory containing the images.\n        annotations_path (str): The path to the json annotation files.\n        force_masks (bool, optional): If True,\n            forces masks to be loaded for all annotations,\n            regardless of whether they are present.\n\n    Returns:\n        DetectionDataset: A DetectionDataset instance containing\n            the loaded images and annotations.\n\n    Examples:\n        ```python\n        import roboflow\n        from roboflow import Roboflow\n        import supervision as sv\n\n        roboflow.login()\n        rf = Roboflow()\n\n        project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n        dataset = project.version(PROJECT_VERSION).download(\"coco\")\n\n        ds = sv.DetectionDataset.from_coco(\n            images_directory_path=f\"{dataset.location}/train\",\n            annotations_path=f\"{dataset.location}/train/_annotations.coco.json\",\n        )\n\n        ds.classes\n        # ['dog', 'person']\n        ```\n    \"\"\"\n    classes, images, annotations = load_coco_annotations(\n        images_directory_path=images_directory_path,\n        annotations_path=annotations_path,\n        force_masks=force_masks,\n    )\n    return DetectionDataset(classes=classes, images=images, annotations=annotations)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.from_pascal_voc","title":"from_pascal_voc(images_directory_path, annotations_directory_path, force_masks=False) classmethod","text":"

Creates a Dataset instance from PASCAL VOC formatted data.

Parameters:

Name Type Description Default images_directory_path str

Path to the directory containing the images.

required annotations_directory_path str

Path to the directory containing the PASCAL VOC XML annotations.

required force_masks bool

If True, forces masks to be loaded for all annotations, regardless of whether they are present.

False

Returns:

Name Type Description DetectionDataset DetectionDataset

A DetectionDataset instance containing the loaded images and annotations.

Examples:

import roboflow\nfrom roboflow import Roboflow\nimport supervision as sv\n\nroboflow.login()\n\nrf = Roboflow()\n\nproject = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\ndataset = project.version(PROJECT_VERSION).download(\"voc\")\n\nds = sv.DetectionDataset.from_pascal_voc(\n    images_directory_path=f\"{dataset.location}/train/images\",\n    annotations_directory_path=f\"{dataset.location}/train/labels\"\n)\n\nds.classes\n# ['dog', 'person']\n
Source code in supervision/dataset/core.py
@classmethod\ndef from_pascal_voc(\n    cls,\n    images_directory_path: str,\n    annotations_directory_path: str,\n    force_masks: bool = False,\n) -> DetectionDataset:\n    \"\"\"\n    Creates a Dataset instance from PASCAL VOC formatted data.\n\n    Args:\n        images_directory_path (str): Path to the directory containing the images.\n        annotations_directory_path (str): Path to the directory\n            containing the PASCAL VOC XML annotations.\n        force_masks (bool, optional): If True, forces masks to\n            be loaded for all annotations, regardless of whether they are present.\n\n    Returns:\n        DetectionDataset: A DetectionDataset instance containing\n            the loaded images and annotations.\n\n    Examples:\n        ```python\n        import roboflow\n        from roboflow import Roboflow\n        import supervision as sv\n\n        roboflow.login()\n\n        rf = Roboflow()\n\n        project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n        dataset = project.version(PROJECT_VERSION).download(\"voc\")\n\n        ds = sv.DetectionDataset.from_pascal_voc(\n            images_directory_path=f\"{dataset.location}/train/images\",\n            annotations_directory_path=f\"{dataset.location}/train/labels\"\n        )\n\n        ds.classes\n        # ['dog', 'person']\n        ```\n    \"\"\"\n\n    classes, images, annotations = load_pascal_voc_annotations(\n        images_directory_path=images_directory_path,\n        annotations_directory_path=annotations_directory_path,\n        force_masks=force_masks,\n    )\n\n    return DetectionDataset(classes=classes, images=images, annotations=annotations)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.from_yolo","title":"from_yolo(images_directory_path, annotations_directory_path, data_yaml_path, force_masks=False, is_obb=False) classmethod","text":"

Creates a Dataset instance from YOLO formatted data.

Parameters:

Name Type Description Default images_directory_path str

The path to the directory containing the images.

required annotations_directory_path str

The path to the directory containing the YOLO annotation files.

required data_yaml_path str

The path to the data YAML file containing class information.

required force_masks bool

If True, forces masks to be loaded for all annotations, regardless of whether they are present.

False is_obb bool

If True, loads the annotations in OBB format. OBB annotations are defined as [class_id, x, y, x, y, x, y, x, y], where pairs of [x, y] are box corners.

False

Returns:

Name Type Description DetectionDataset DetectionDataset

A DetectionDataset instance containing the loaded images and annotations.

Examples:

import roboflow\nfrom roboflow import Roboflow\nimport supervision as sv\n\nroboflow.login()\nrf = Roboflow()\n\nproject = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\ndataset = project.version(PROJECT_VERSION).download(\"yolov5\")\n\nds = sv.DetectionDataset.from_yolo(\n    images_directory_path=f\"{dataset.location}/train/images\",\n    annotations_directory_path=f\"{dataset.location}/train/labels\",\n    data_yaml_path=f\"{dataset.location}/data.yaml\"\n)\n\nds.classes\n# ['dog', 'person']\n
Source code in supervision/dataset/core.py
@classmethod\ndef from_yolo(\n    cls,\n    images_directory_path: str,\n    annotations_directory_path: str,\n    data_yaml_path: str,\n    force_masks: bool = False,\n    is_obb: bool = False,\n) -> DetectionDataset:\n    \"\"\"\n    Creates a Dataset instance from YOLO formatted data.\n\n    Args:\n        images_directory_path (str): The path to the\n            directory containing the images.\n        annotations_directory_path (str): The path to the directory\n            containing the YOLO annotation files.\n        data_yaml_path (str): The path to the data\n            YAML file containing class information.\n        force_masks (bool, optional): If True, forces\n            masks to be loaded for all annotations,\n            regardless of whether they are present.\n        is_obb (bool, optional): If True, loads the annotations in OBB format.\n            OBB annotations are defined as `[class_id, x, y, x, y, x, y, x, y]`,\n            where pairs of [x, y] are box corners.\n\n    Returns:\n        DetectionDataset: A DetectionDataset instance\n            containing the loaded images and annotations.\n\n    Examples:\n        ```python\n        import roboflow\n        from roboflow import Roboflow\n        import supervision as sv\n\n        roboflow.login()\n        rf = Roboflow()\n\n        project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n        dataset = project.version(PROJECT_VERSION).download(\"yolov5\")\n\n        ds = sv.DetectionDataset.from_yolo(\n            images_directory_path=f\"{dataset.location}/train/images\",\n            annotations_directory_path=f\"{dataset.location}/train/labels\",\n            data_yaml_path=f\"{dataset.location}/data.yaml\"\n        )\n\n        ds.classes\n        # ['dog', 'person']\n        ```\n    \"\"\"\n    classes, images, annotations = load_yolo_annotations(\n        images_directory_path=images_directory_path,\n        annotations_directory_path=annotations_directory_path,\n        data_yaml_path=data_yaml_path,\n        force_masks=force_masks,\n        is_obb=is_obb,\n    )\n    return DetectionDataset(classes=classes, images=images, annotations=annotations)\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.merge","title":"merge(dataset_list) classmethod","text":"

Merge a list of DetectionDataset objects into a single DetectionDataset object.

This method takes a list of DetectionDataset objects and combines their respective fields (classes, images, annotations) into a single DetectionDataset object.

Parameters:

Name Type Description Default dataset_list List[DetectionDataset]

A list of DetectionDataset objects to merge.

required

Returns:

Type Description DetectionDataset

A single DetectionDataset object containing

DetectionDataset

the merged data from the input list.

Examples:

import supervision as sv\n\nds_1 = sv.DetectionDataset(...)\nlen(ds_1)\n# 100\nds_1.classes\n# ['dog', 'person']\n\nds_2 = sv.DetectionDataset(...)\nlen(ds_2)\n# 200\nds_2.classes\n# ['cat']\n\nds_merged = sv.DetectionDataset.merge([ds_1, ds_2])\nlen(ds_merged)\n# 300\nds_merged.classes\n# ['cat', 'dog', 'person']\n
Source code in supervision/dataset/core.py
@classmethod\ndef merge(cls, dataset_list: List[DetectionDataset]) -> DetectionDataset:\n    \"\"\"\n    Merge a list of `DetectionDataset` objects into a single\n        `DetectionDataset` object.\n\n    This method takes a list of `DetectionDataset` objects and combines\n    their respective fields (`classes`, `images`,\n    `annotations`) into a single `DetectionDataset` object.\n\n    Args:\n        dataset_list (List[DetectionDataset]): A list of `DetectionDataset`\n            objects to merge.\n\n    Returns:\n        (DetectionDataset): A single `DetectionDataset` object containing\n        the merged data from the input list.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        ds_1 = sv.DetectionDataset(...)\n        len(ds_1)\n        # 100\n        ds_1.classes\n        # ['dog', 'person']\n\n        ds_2 = sv.DetectionDataset(...)\n        len(ds_2)\n        # 200\n        ds_2.classes\n        # ['cat']\n\n        ds_merged = sv.DetectionDataset.merge([ds_1, ds_2])\n        len(ds_merged)\n        # 300\n        ds_merged.classes\n        # ['cat', 'dog', 'person']\n        ```\n    \"\"\"\n    merged_images, merged_annotations = {}, {}\n    class_lists = [dataset.classes for dataset in dataset_list]\n    merged_classes = merge_class_lists(class_lists=class_lists)\n\n    for dataset in dataset_list:\n        class_index_mapping = build_class_index_mapping(\n            source_classes=dataset.classes, target_classes=merged_classes\n        )\n        for image_name, image, detections in dataset:\n            if image_name in merged_annotations:\n                raise ValueError(\n                    f\"Image name {image_name} is not unique across datasets.\"\n                )\n\n            merged_images[image_name] = image\n            merged_annotations[image_name] = map_detections_class_id(\n                source_to_target_mapping=class_index_mapping,\n                detections=detections,\n            )\n\n    return cls(\n        classes=merged_classes, images=merged_images, annotations=merged_annotations\n    )\n
"},{"location":"datasets/core/#supervision.dataset.core.DetectionDataset.split","title":"split(split_ratio=0.8, random_state=None, shuffle=True)","text":"

Splits the dataset into two parts (training and testing) using the provided split_ratio.

Parameters:

Name Type Description Default split_ratio float

The ratio of the training set to the entire dataset.

0.8 random_state int

The seed for the random number generator. This is used for reproducibility.

None shuffle bool

Whether to shuffle the data before splitting.

True

Returns:

Type Description Tuple[DetectionDataset, DetectionDataset]

Tuple[DetectionDataset, DetectionDataset]: A tuple containing the training and testing datasets.

Examples:

import supervision as sv\n\nds = sv.DetectionDataset(...)\ntrain_ds, test_ds = ds.split(split_ratio=0.7, random_state=42, shuffle=True)\nlen(train_ds), len(test_ds)\n# (700, 300)\n
Source code in supervision/dataset/core.py
def split(\n    self, split_ratio=0.8, random_state=None, shuffle: bool = True\n) -> Tuple[DetectionDataset, DetectionDataset]:\n    \"\"\"\n    Splits the dataset into two parts (training and testing)\n        using the provided split_ratio.\n\n    Args:\n        split_ratio (float, optional): The ratio of the training\n            set to the entire dataset.\n        random_state (int, optional): The seed for the random number generator.\n            This is used for reproducibility.\n        shuffle (bool, optional): Whether to shuffle the data before splitting.\n\n    Returns:\n        Tuple[DetectionDataset, DetectionDataset]: A tuple containing\n            the training and testing datasets.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        ds = sv.DetectionDataset(...)\n        train_ds, test_ds = ds.split(split_ratio=0.7, random_state=42, shuffle=True)\n        len(train_ds), len(test_ds)\n        # (700, 300)\n        ```\n    \"\"\"\n\n    image_names = list(self.images.keys())\n    train_names, test_names = train_test_split(\n        data=image_names,\n        train_ratio=split_ratio,\n        random_state=random_state,\n        shuffle=shuffle,\n    )\n\n    train_dataset = DetectionDataset(\n        classes=self.classes,\n        images={name: self.images[name] for name in train_names},\n        annotations={name: self.annotations[name] for name in train_names},\n    )\n    test_dataset = DetectionDataset(\n        classes=self.classes,\n        images={name: self.images[name] for name in test_names},\n        annotations={name: self.annotations[name] for name in test_names},\n    )\n    return train_dataset, test_dataset\n
"},{"location":"datasets/core/#supervision.dataset.core.ClassificationDataset-functions","title":"Functions","text":""},{"location":"datasets/core/#supervision.dataset.core.ClassificationDataset.as_folder_structure","title":"as_folder_structure(root_directory_path)","text":"

Saves the dataset as a multi-class folder structure.

Parameters:

Name Type Description Default root_directory_path str

The path to the directory where the dataset will be saved.

required Source code in supervision/dataset/core.py
def as_folder_structure(self, root_directory_path: str) -> None:\n    \"\"\"\n    Saves the dataset as a multi-class folder structure.\n\n    Args:\n        root_directory_path (str): The path to the directory\n            where the dataset will be saved.\n    \"\"\"\n    os.makedirs(root_directory_path, exist_ok=True)\n\n    for class_name in self.classes:\n        os.makedirs(os.path.join(root_directory_path, class_name), exist_ok=True)\n\n    for image_path in self.images:\n        classification = self.annotations[image_path]\n        image = self.images[image_path]\n        image_name = Path(image_path).name\n        class_id = (\n            classification.class_id[0]\n            if classification.confidence is None\n            else classification.get_top_k(1)[0][0]\n        )\n        class_name = self.classes[class_id]\n        image_path = os.path.join(root_directory_path, class_name, image_name)\n        cv2.imwrite(image_path, image)\n
"},{"location":"datasets/core/#supervision.dataset.core.ClassificationDataset.from_folder_structure","title":"from_folder_structure(root_directory_path) classmethod","text":"

Load data from a multiclass folder structure into a ClassificationDataset.

Parameters:

Name Type Description Default root_directory_path str

The path to the dataset directory.

required

Returns:

Name Type Description ClassificationDataset ClassificationDataset

The dataset.

Examples:

import roboflow\nfrom roboflow import Roboflow\nimport supervision as sv\n\nroboflow.login()\nrf = Roboflow()\n\nproject = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\ndataset = project.version(PROJECT_VERSION).download(\"folder\")\n\ncd = sv.ClassificationDataset.from_folder_structure(\n    root_directory_path=f\"{dataset.location}/train\"\n)\n
Source code in supervision/dataset/core.py
@classmethod\ndef from_folder_structure(cls, root_directory_path: str) -> ClassificationDataset:\n    \"\"\"\n    Load data from a multiclass folder structure into a ClassificationDataset.\n\n    Args:\n        root_directory_path (str): The path to the dataset directory.\n\n    Returns:\n        ClassificationDataset: The dataset.\n\n    Examples:\n        ```python\n        import roboflow\n        from roboflow import Roboflow\n        import supervision as sv\n\n        roboflow.login()\n        rf = Roboflow()\n\n        project = rf.workspace(WORKSPACE_ID).project(PROJECT_ID)\n        dataset = project.version(PROJECT_VERSION).download(\"folder\")\n\n        cd = sv.ClassificationDataset.from_folder_structure(\n            root_directory_path=f\"{dataset.location}/train\"\n        )\n        ```\n    \"\"\"\n    classes = os.listdir(root_directory_path)\n    classes = sorted(set(classes))\n\n    images = {}\n    annotations = {}\n\n    for class_name in classes:\n        class_id = classes.index(class_name)\n\n        for image in os.listdir(os.path.join(root_directory_path, class_name)):\n            image_path = str(os.path.join(root_directory_path, class_name, image))\n            images[image_path] = cv2.imread(image_path)\n            annotations[image_path] = Classifications(\n                class_id=np.array([class_id]),\n            )\n\n    return cls(\n        classes=classes,\n        images=images,\n        annotations=annotations,\n    )\n
"},{"location":"datasets/core/#supervision.dataset.core.ClassificationDataset.split","title":"split(split_ratio=0.8, random_state=None, shuffle=True)","text":"

Splits the dataset into two parts (training and testing) using the provided split_ratio.

Parameters:

Name Type Description Default split_ratio float

The ratio of the training set to the entire dataset.

0.8 random_state int

The seed for the random number generator. This is used for reproducibility.

None shuffle bool

Whether to shuffle the data before splitting.

True

Returns:

Type Description ClassificationDataset

Tuple[ClassificationDataset, ClassificationDataset]: A tuple containing

ClassificationDataset

the training and testing datasets.

Examples:

import supervision as sv\n\ncd = sv.ClassificationDataset(...)\ntrain_cd,test_cd = cd.split(split_ratio=0.7, random_state=42,shuffle=True)\nlen(train_cd), len(test_cd)\n# (700, 300)\n
Source code in supervision/dataset/core.py
def split(\n    self, split_ratio=0.8, random_state=None, shuffle: bool = True\n) -> Tuple[ClassificationDataset, ClassificationDataset]:\n    \"\"\"\n    Splits the dataset into two parts (training and testing)\n        using the provided split_ratio.\n\n    Args:\n        split_ratio (float, optional): The ratio of the training\n            set to the entire dataset.\n        random_state (int, optional): The seed for the\n            random number generator. This is used for reproducibility.\n        shuffle (bool, optional): Whether to shuffle the data before splitting.\n\n    Returns:\n        Tuple[ClassificationDataset, ClassificationDataset]: A tuple containing\n        the training and testing datasets.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        cd = sv.ClassificationDataset(...)\n        train_cd,test_cd = cd.split(split_ratio=0.7, random_state=42,shuffle=True)\n        len(train_cd), len(test_cd)\n        # (700, 300)\n        ```\n    \"\"\"\n    image_names = list(self.images.keys())\n    train_names, test_names = train_test_split(\n        data=image_names,\n        train_ratio=split_ratio,\n        random_state=random_state,\n        shuffle=shuffle,\n    )\n\n    train_dataset = ClassificationDataset(\n        classes=self.classes,\n        images={name: self.images[name] for name in train_names},\n        annotations={name: self.annotations[name] for name in train_names},\n    )\n    test_dataset = ClassificationDataset(\n        classes=self.classes,\n        images={name: self.images[name] for name in test_names},\n        annotations={name: self.annotations[name] for name in test_names},\n    )\n    return train_dataset, test_dataset\n
"},{"location":"datasets/utils/","title":"Datasets Utils","text":"rle_to_mask

Converts run-length encoding (RLE) to a binary mask.

Parameters:

Name Type Description Default rle Union[NDArray[int_], List[int]]

The 1D RLE array, the format used in the COCO dataset (column-wise encoding, values of an array with even indices represent the number of pixels assigned as background, values of an array with odd indices represent the number of pixels assigned as foreground object).

required resolution_wh Tuple[int, int]

The width (w) and height (h) of the desired binary mask.

required

Returns:

Type Description NDArray[bool_]

The generated 2D Boolean mask of shape (h, w), where the foreground object is marked with True's and the rest is filled with False's.

Raises:

Type Description AssertionError

If the sum of pixels encoded in RLE differs from the number of pixels in the expected mask (computed based on resolution_wh).

Examples:

import supervision as sv\n\nsv.rle_to_mask([5, 2, 2, 2, 5], (4, 4))\n# array([\n#     [False, False, False, False],\n#     [False, True,  True,  False],\n#     [False, True,  True,  False],\n#     [False, False, False, False],\n# ])\n
Source code in supervision/dataset/utils.py
def rle_to_mask(\n    rle: Union[npt.NDArray[np.int_], List[int]], resolution_wh: Tuple[int, int]\n) -> npt.NDArray[np.bool_]:\n    \"\"\"\n    Converts run-length encoding (RLE) to a binary mask.\n\n    Args:\n        rle (Union[npt.NDArray[np.int_], List[int]]): The 1D RLE array, the format\n            used in the COCO dataset (column-wise encoding, values of an array with\n            even indices represent the number of pixels assigned as background,\n            values of an array with odd indices represent the number of pixels\n            assigned as foreground object).\n        resolution_wh (Tuple[int, int]): The width (w) and height (h)\n            of the desired binary mask.\n\n    Returns:\n        The generated 2D Boolean mask of shape `(h, w)`, where the foreground object is\n            marked with `True`'s and the rest is filled with `False`'s.\n\n    Raises:\n        AssertionError: If the sum of pixels encoded in RLE differs from the\n            number of pixels in the expected mask (computed based on resolution_wh).\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        sv.rle_to_mask([5, 2, 2, 2, 5], (4, 4))\n        # array([\n        #     [False, False, False, False],\n        #     [False, True,  True,  False],\n        #     [False, True,  True,  False],\n        #     [False, False, False, False],\n        # ])\n        ```\n    \"\"\"\n    if isinstance(rle, list):\n        rle = np.array(rle, dtype=int)\n\n    width, height = resolution_wh\n\n    assert width * height == np.sum(rle), (\n        \"the sum of the number of pixels in the RLE must be the same \"\n        \"as the number of pixels in the expected mask\"\n    )\n\n    zero_one_values = np.zeros(shape=(rle.size, 1), dtype=np.uint8)\n    zero_one_values[1::2] = 1\n\n    decoded_rle = np.repeat(zero_one_values, rle, axis=0)\n    decoded_rle = np.append(\n        decoded_rle, np.zeros(width * height - len(decoded_rle), dtype=np.uint8)\n    )\n    return decoded_rle.reshape((height, width), order=\"F\")\n
mask_to_rle

Converts a binary mask into a run-length encoding (RLE).

Parameters:

Name Type Description Default mask NDArray[bool_]

2D binary mask where True indicates foreground object and False indicates background.

required

Returns:

Type Description List[int]

The run-length encoded mask. Values of a list with even indices represent the number of pixels assigned as background (False), values of a list with odd indices represent the number of pixels assigned as foreground object (True).

Raises:

Type Description AssertionError

If input mask is not 2D or is empty.

Examples:

import numpy as np\nimport supervision as sv\n\nmask = np.array([\n    [True, True, True, True],\n    [True, True, True, True],\n    [True, True, True, True],\n    [True, True, True, True],\n])\nsv.mask_to_rle(mask)\n# [0, 16]\n\nmask = np.array([\n    [False, False, False, False],\n    [False, True,  True,  False],\n    [False, True,  True,  False],\n    [False, False, False, False],\n])\nsv.mask_to_rle(mask)\n# [5, 2, 2, 2, 5]\n

Source code in supervision/dataset/utils.py
def mask_to_rle(mask: npt.NDArray[np.bool_]) -> List[int]:\n    \"\"\"\n    Converts a binary mask into a run-length encoding (RLE).\n\n    Args:\n        mask (npt.NDArray[np.bool_]): 2D binary mask where `True` indicates foreground\n            object and `False` indicates background.\n\n    Returns:\n        The run-length encoded mask. Values of a list with even indices\n            represent the number of pixels assigned as background (`False`), values\n            of a list with odd indices represent the number of pixels assigned\n            as foreground object (`True`).\n\n    Raises:\n        AssertionError: If input mask is not 2D or is empty.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        mask = np.array([\n            [True, True, True, True],\n            [True, True, True, True],\n            [True, True, True, True],\n            [True, True, True, True],\n        ])\n        sv.mask_to_rle(mask)\n        # [0, 16]\n\n        mask = np.array([\n            [False, False, False, False],\n            [False, True,  True,  False],\n            [False, True,  True,  False],\n            [False, False, False, False],\n        ])\n        sv.mask_to_rle(mask)\n        # [5, 2, 2, 2, 5]\n        ```\n\n    ![mask_to_rle](https://media.roboflow.com/supervision-docs/mask-to-rle.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    assert mask.ndim == 2, \"Input mask must be 2D\"\n    assert mask.size != 0, \"Input mask cannot be empty\"\n\n    on_value_change_indices = np.where(\n        mask.ravel(order=\"F\") != np.roll(mask.ravel(order=\"F\"), 1)\n    )[0]\n\n    on_value_change_indices = np.append(on_value_change_indices, mask.size)\n    # need to add 0 at the beginning when the same value is in the first and\n    # last element of the flattened mask\n    if on_value_change_indices[0] != 0:\n        on_value_change_indices = np.insert(on_value_change_indices, 0, 0)\n\n    rle = np.diff(on_value_change_indices)\n\n    if mask[0][0] == 1:\n        rle = np.insert(rle, 0, 0)\n\n    return list(rle)\n
"},{"location":"detection/annotators/","title":"Annotators","text":"BoundingBoxRoundBoxBoxCornerColorCircleDotTriangleEllipseHaloPercentageBarMaskPolygonLabelRichLabelCropBlurPixelateTraceHeatMap
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nannotated_frame = bounding_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nround_box_annotator = sv.RoundBoxAnnotator()\nannotated_frame = round_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncorner_annotator = sv.BoxCornerAnnotator()\nannotated_frame = corner_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncolor_annotator = sv.ColorAnnotator()\nannotated_frame = color_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncircle_annotator = sv.CircleAnnotator()\nannotated_frame = circle_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ndot_annotator = sv.DotAnnotator()\nannotated_frame = dot_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ntriangle_annotator = sv.TriangleAnnotator()\nannotated_frame = triangle_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nellipse_annotator = sv.EllipseAnnotator()\nannotated_frame = ellipse_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nhalo_annotator = sv.HaloAnnotator()\nannotated_frame = halo_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npercentage_bar_annotator = sv.PercentageBarAnnotator()\nannotated_frame = percentage_bar_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nmask_annotator = sv.MaskAnnotator()\nannotated_frame = mask_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npolygon_annotator = sv.PolygonAnnotator()\nannotated_frame = polygon_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)\nannotated_frame = label_annotator.annotate(\n    scene=image.copy(),\n    detections=detections,\n    labels=labels\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nrich_label_annotator = sv.RichLabelAnnotator(\n    font_path=\".../font.ttf\",\n    text_position=sv.Position.CENTER\n)\nannotated_frame = label_annotator.annotate(\n    scene=image.copy(),\n    detections=detections,\n    labels=labels\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncrop_annotator = sv.CropAnnotator()\nannotated_frame = crop_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nblur_annotator = sv.BlurAnnotator()\nannotated_frame = blur_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npixelate_annotator = sv.PixelateAnnotator()\nannotated_frame = pixelate_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO('yolov8x.pt')\n\ntrace_annotator = sv.TraceAnnotator()\n\nvideo_info = sv.VideoInfo.from_video_path(video_path='...')\nframes_generator = get_video_frames_generator(source_path='...')\ntracker = sv.ByteTrack()\n\nwith sv.VideoSink(target_path='...', video_info=video_info) as sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        detections = tracker.update_with_detections(detections)\n        annotated_frame = trace_annotator.annotate(\n            scene=frame.copy(),\n            detections=detections)\n        sink.write_frame(frame=annotated_frame)\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO('yolov8x.pt')\n\nheat_map_annotator = sv.HeatMapAnnotator()\n\nvideo_info = sv.VideoInfo.from_video_path(video_path='...')\nframes_generator = get_video_frames_generator(source_path='...')\n\nwith sv.VideoSink(target_path='...', video_info=video_info) as sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        annotated_frame = heat_map_annotator.annotate(\n            scene=frame.copy(),\n            detections=detections)\n        sink.write_frame(frame=annotated_frame)\n
BoundingBoxAnnotator

Bases: BaseAnnotator

A class for drawing bounding boxes on an image using provided detections.

Source code in supervision/annotators/core.py
class BoundingBoxAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing bounding boxes on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the bounding box lines.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with bounding boxes based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where bounding boxes will be drawn. `ImageType`\n            is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            bounding_box_annotator = sv.BoundingBoxAnnotator()\n            annotated_frame = bounding_box_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![bounding-box-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/bounding-box-annotator-example-purple.png)\n        \"\"\"\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            cv2.rectangle(\n                img=scene,\n                pt1=(x1, y1),\n                pt2=(x2, y2),\n                color=color.as_bgr(),\n                thickness=self.thickness,\n            )\n        return scene\n
RoundBoxAnnotator

Bases: BaseAnnotator

A class for drawing bounding boxes with round edges on an image using provided detections.

Source code in supervision/annotators/core.py
class RoundBoxAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing bounding boxes with round edges on an image\n    using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n        roundness: float = 0.6,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the bounding box lines.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n            roundness (float): Percent of roundness for edges of bounding box.\n                Value must be float 0 < roundness <= 1.0\n                By default roundness percent is calculated based on smaller side\n                length (width or height).\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.color_lookup: ColorLookup = color_lookup\n        if not 0 < roundness <= 1.0:\n            raise ValueError(\"roundness attribute must be float between (0, 1.0]\")\n        self.roundness: float = roundness\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with bounding boxes with rounded edges\n        based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where rounded bounding boxes will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            round_box_annotator = sv.RoundBoxAnnotator()\n            annotated_frame = round_box_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![round-box-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/round-box-annotator-example-purple.png)\n        \"\"\"\n\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n\n            radius = (\n                int((x2 - x1) // 2 * self.roundness)\n                if abs(x1 - x2) < abs(y1 - y2)\n                else int((y2 - y1) // 2 * self.roundness)\n            )\n\n            circle_coordinates = [\n                ((x1 + radius), (y1 + radius)),\n                ((x2 - radius), (y1 + radius)),\n                ((x2 - radius), (y2 - radius)),\n                ((x1 + radius), (y2 - radius)),\n            ]\n\n            line_coordinates = [\n                ((x1 + radius, y1), (x2 - radius, y1)),\n                ((x2, y1 + radius), (x2, y2 - radius)),\n                ((x1 + radius, y2), (x2 - radius, y2)),\n                ((x1, y1 + radius), (x1, y2 - radius)),\n            ]\n\n            start_angles = (180, 270, 0, 90)\n            end_angles = (270, 360, 90, 180)\n\n            for center_coordinates, line, start_angle, end_angle in zip(\n                circle_coordinates, line_coordinates, start_angles, end_angles\n            ):\n                cv2.ellipse(\n                    img=scene,\n                    center=center_coordinates,\n                    axes=(radius, radius),\n                    angle=0,\n                    startAngle=start_angle,\n                    endAngle=end_angle,\n                    color=color.as_bgr(),\n                    thickness=self.thickness,\n                )\n\n                cv2.line(\n                    img=scene,\n                    pt1=line[0],\n                    pt2=line[1],\n                    color=color.as_bgr(),\n                    thickness=self.thickness,\n                )\n\n        return scene\n
BoxCornerAnnotator

Bases: BaseAnnotator

A class for drawing box corners on an image using provided detections.

Source code in supervision/annotators/core.py
class BoxCornerAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing box corners on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 4,\n        corner_length: int = 15,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the corner lines.\n            corner_length (int): Length of each corner line.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.corner_length: int = corner_length\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with box corners based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where box corners will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            corner_annotator = sv.BoxCornerAnnotator()\n            annotated_frame = corner_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![box-corner-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/box-corner-annotator-example-purple.png)\n        \"\"\"\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            corners = [(x1, y1), (x2, y1), (x1, y2), (x2, y2)]\n\n            for x, y in corners:\n                x_end = x + self.corner_length if x == x1 else x - self.corner_length\n                cv2.line(\n                    scene, (x, y), (x_end, y), color.as_bgr(), thickness=self.thickness\n                )\n\n                y_end = y + self.corner_length if y == y1 else y - self.corner_length\n                cv2.line(\n                    scene, (x, y), (x, y_end), color.as_bgr(), thickness=self.thickness\n                )\n        return scene\n
OrientedBoxAnnotator

Bases: BaseAnnotator

A class for drawing oriented bounding boxes on an image using provided detections.

Source code in supervision/annotators/core.py
class OrientedBoxAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing oriented bounding boxes on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the bounding box lines.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with oriented bounding boxes based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where bounding boxes will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = YOLO(\"yolov8n-obb.pt\")\n\n            result = model(image)[0]\n            detections = sv.Detections.from_ultralytics(result)\n\n            oriented_box_annotator = sv.OrientedBoxAnnotator()\n            annotated_frame = oriented_box_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n        \"\"\"  # noqa E501 // docs\n\n        if detections.data is None or ORIENTED_BOX_COORDINATES not in detections.data:\n            return scene\n\n        for detection_idx in range(len(detections)):\n            bbox = np.intp(detections.data.get(ORIENTED_BOX_COORDINATES)[detection_idx])\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n\n            cv2.drawContours(scene, [bbox], 0, color.as_bgr(), self.thickness)\n\n        return scene\n
ColorAnnotator

Bases: BaseAnnotator

A class for drawing box masks on an image using provided detections.

Source code in supervision/annotators/core.py
class ColorAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing box masks on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        opacity: float = 0.5,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.color_lookup: ColorLookup = color_lookup\n        self.opacity = opacity\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with box masks based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where bounding boxes will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            color_annotator = sv.ColorAnnotator()\n            annotated_frame = color_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![box-mask-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/box-mask-annotator-example-purple.png)\n        \"\"\"\n        mask_image = scene.copy()\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            cv2.rectangle(\n                img=scene,\n                pt1=(x1, y1),\n                pt2=(x2, y2),\n                color=color.as_bgr(),\n                thickness=-1,\n            )\n        scene = cv2.addWeighted(\n            scene, self.opacity, mask_image, 1 - self.opacity, gamma=0\n        )\n        return scene\n
CircleAnnotator

Bases: BaseAnnotator

A class for drawing circle on an image using provided detections.

Source code in supervision/annotators/core.py
class CircleAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing circle on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the circle line.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with circles based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where box corners will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            circle_annotator = sv.CircleAnnotator()\n            annotated_frame = circle_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n\n        ![circle-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/circle-annotator-example-purple.png)\n        \"\"\"\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            center = ((x1 + x2) // 2, (y1 + y2) // 2)\n            distance = sqrt((x1 - center[0]) ** 2 + (y1 - center[1]) ** 2)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            cv2.circle(\n                img=scene,\n                center=center,\n                radius=int(distance),\n                color=color.as_bgr(),\n                thickness=self.thickness,\n            )\n\n        return scene\n
DotAnnotator

Bases: BaseAnnotator

A class for drawing dots on an image at specific coordinates based on provided detections.

Source code in supervision/annotators/core.py
class DotAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing dots on an image at specific coordinates based on provided\n    detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        radius: int = 4,\n        position: Position = Position.CENTER,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n        outline_thickness: int = 0,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            radius (int): Radius of the drawn dots.\n            position (Position): The anchor position for placing the dot.\n            color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n            outline_thickness (int): Thickness of the outline of the dot.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.radius: int = radius\n        self.position: Position = position\n        self.color_lookup: ColorLookup = color_lookup\n        self.outline_thickness = outline_thickness\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with dots based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where dots will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            dot_annotator = sv.DotAnnotator()\n            annotated_frame = dot_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![dot-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/dot-annotator-example-purple.png)\n        \"\"\"\n        xy = detections.get_anchors_coordinates(anchor=self.position)\n        for detection_idx in range(len(detections)):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            center = (int(xy[detection_idx, 0]), int(xy[detection_idx, 1]))\n\n            cv2.circle(scene, center, self.radius, color.as_bgr(), -1)\n            if self.outline_thickness:\n                cv2.circle(\n                    scene, center, self.radius, (0, 0, 0), self.outline_thickness\n                )\n        return scene\n
TriangleAnnotator

Bases: BaseAnnotator

A class for drawing triangle markers on an image at specific coordinates based on provided detections.

Source code in supervision/annotators/core.py
class TriangleAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing triangle markers on an image at specific coordinates based on\n    provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        base: int = 10,\n        height: int = 10,\n        position: Position = Position.TOP_CENTER,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n        outline_thickness: int = 0,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            base (int): The base width of the triangle.\n            height (int): The height of the triangle.\n            position (Position): The anchor position for placing the triangle.\n            color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n            outline_thickness (int): Thickness of the outline of the triangle.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.base: int = base\n        self.height: int = height\n        self.position: Position = position\n        self.color_lookup: ColorLookup = color_lookup\n        self.outline_thickness: int = outline_thickness\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with triangles based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where triangles will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            triangle_annotator = sv.TriangleAnnotator()\n            annotated_frame = triangle_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![triangle-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/triangle-annotator-example.png)\n        \"\"\"\n        xy = detections.get_anchors_coordinates(anchor=self.position)\n        for detection_idx in range(len(detections)):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            tip_x, tip_y = int(xy[detection_idx, 0]), int(xy[detection_idx, 1])\n            vertices = np.array(\n                [\n                    [tip_x - self.base // 2, tip_y - self.height],\n                    [tip_x + self.base // 2, tip_y - self.height],\n                    [tip_x, tip_y],\n                ],\n                np.int32,\n            )\n\n            cv2.fillPoly(scene, [vertices], color.as_bgr())\n            if self.outline_thickness:\n                cv2.polylines(\n                    scene, [vertices], True, (0, 0, 0), thickness=self.outline_thickness\n                )\n        return scene\n
EllipseAnnotator

Bases: BaseAnnotator

A class for drawing ellipses on an image using provided detections.

Source code in supervision/annotators/core.py
class EllipseAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing ellipses on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        start_angle: int = -45,\n        end_angle: int = 235,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the ellipse lines.\n            start_angle (int): Starting angle of the ellipse.\n            end_angle (int): Ending angle of the ellipse.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.start_angle: int = start_angle\n        self.end_angle: int = end_angle\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with ellipses based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where ellipses will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            ellipse_annotator = sv.EllipseAnnotator()\n            annotated_frame = ellipse_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![ellipse-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/ellipse-annotator-example-purple.png)\n        \"\"\"\n        for detection_idx in range(len(detections)):\n            x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            center = (int((x1 + x2) / 2), y2)\n            width = x2 - x1\n            cv2.ellipse(\n                scene,\n                center=center,\n                axes=(int(width), int(0.35 * width)),\n                angle=0.0,\n                startAngle=self.start_angle,\n                endAngle=self.end_angle,\n                color=color.as_bgr(),\n                thickness=self.thickness,\n                lineType=cv2.LINE_4,\n            )\n        return scene\n
HaloAnnotator

Bases: BaseAnnotator

A class for drawing Halos on an image using provided detections.

Warning

This annotator uses sv.Detections.mask.

Source code in supervision/annotators/core.py
class HaloAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing Halos on an image using provided detections.\n\n    !!! warning\n\n        This annotator uses `sv.Detections.mask`.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        opacity: float = 0.8,\n        kernel_size: int = 40,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n            kernel_size (int): The size of the average pooling kernel used for creating\n                the halo.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.opacity = opacity\n        self.color_lookup: ColorLookup = color_lookup\n        self.kernel_size: int = kernel_size\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with halos based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where masks will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            halo_annotator = sv.HaloAnnotator()\n            annotated_frame = halo_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![halo-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/halo-annotator-example-purple.png)\n        \"\"\"\n        if detections.mask is None:\n            return scene\n        colored_mask = np.zeros_like(scene, dtype=np.uint8)\n        fmask = np.array([False] * scene.shape[0] * scene.shape[1]).reshape(\n            scene.shape[0], scene.shape[1]\n        )\n\n        for detection_idx in np.flip(np.argsort(detections.area)):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            mask = detections.mask[detection_idx]\n            fmask = np.logical_or(fmask, mask)\n            color_bgr = color.as_bgr()\n            colored_mask[mask] = color_bgr\n\n        colored_mask = cv2.blur(colored_mask, (self.kernel_size, self.kernel_size))\n        colored_mask[fmask] = [0, 0, 0]\n        gray = cv2.cvtColor(colored_mask, cv2.COLOR_BGR2GRAY)\n        alpha = self.opacity * gray / gray.max()\n        alpha_mask = alpha[:, :, np.newaxis]\n        scene = np.uint8(scene * (1 - alpha_mask) + colored_mask * self.opacity)\n        return scene\n
PercentageBarAnnotator

Bases: BaseAnnotator

A class for drawing percentage bars on an image using provided detections.

Source code in supervision/annotators/core.py
class PercentageBarAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing percentage bars on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        height: int = 16,\n        width: int = 80,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        border_color: Color = Color.BLACK,\n        position: Position = Position.TOP_CENTER,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n        border_thickness: int = None,\n    ):\n        \"\"\"\n        Args:\n            height (int): The height in pixels of the percentage bar.\n            width (int): The width in pixels of the percentage bar.\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            border_color (Color): The color of the border lines.\n            position (Position): The anchor position of drawing the percentage bar.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n            border_thickness (int): The thickness of the border lines.\n        \"\"\"\n        self.height: int = height\n        self.width: int = width\n        self.color: Union[Color, ColorPalette] = color\n        self.border_color: Color = border_color\n        self.position: Position = position\n        self.color_lookup: ColorLookup = color_lookup\n\n        if border_thickness is None:\n            self.border_thickness = int(0.15 * self.height)\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n        custom_values: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with percentage bars based on the provided\n        detections. The percentage bars visually represent the confidence or custom\n        values associated with each detection.\n\n        Args:\n            scene (ImageType): The image where percentage bars will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n            custom_values (Optional[np.ndarray]): Custom values array to use instead\n                of the default detection confidences. This array should have the\n                same length as the number of detections and contain a value between\n                0 and 1 (inclusive) for each detection, representing the percentage\n                to be displayed.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            percentage_bar_annotator = sv.PercentageBarAnnotator()\n            annotated_frame = percentage_bar_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![percentage-bar-example](https://media.roboflow.com/\n        supervision-annotator-examples/percentage-bar-annotator-example-purple.png)\n        \"\"\"\n        self.validate_custom_values(\n            custom_values=custom_values, detections_count=len(detections)\n        )\n        anchors = detections.get_anchors_coordinates(anchor=self.position)\n        for detection_idx in range(len(detections)):\n            anchor = anchors[detection_idx]\n            border_coordinates = self.calculate_border_coordinates(\n                anchor_xy=(int(anchor[0]), int(anchor[1])),\n                border_wh=(self.width, self.height),\n                position=self.position,\n            )\n            border_width = border_coordinates[1][0] - border_coordinates[0][0]\n\n            value = (\n                custom_values[detection_idx]\n                if custom_values is not None\n                else detections.confidence[detection_idx]\n            )\n\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            cv2.rectangle(\n                img=scene,\n                pt1=border_coordinates[0],\n                pt2=(\n                    border_coordinates[0][0] + int(border_width * value),\n                    border_coordinates[1][1],\n                ),\n                color=color.as_bgr(),\n                thickness=-1,\n            )\n            cv2.rectangle(\n                img=scene,\n                pt1=border_coordinates[0],\n                pt2=border_coordinates[1],\n                color=self.border_color.as_bgr(),\n                thickness=self.border_thickness,\n            )\n        return scene\n\n    @staticmethod\n    def calculate_border_coordinates(\n        anchor_xy: Tuple[int, int], border_wh: Tuple[int, int], position: Position\n    ) -> Tuple[Tuple[int, int], Tuple[int, int]]:\n        cx, cy = anchor_xy\n        width, height = border_wh\n\n        if position == Position.TOP_LEFT:\n            return (cx - width, cy - height), (cx, cy)\n        elif position == Position.TOP_CENTER:\n            return (cx - width // 2, cy), (cx + width // 2, cy - height)\n        elif position == Position.TOP_RIGHT:\n            return (cx, cy), (cx + width, cy - height)\n        elif position == Position.CENTER_LEFT:\n            return (cx - width, cy - height // 2), (cx, cy + height // 2)\n        elif position == Position.CENTER or position == Position.CENTER_OF_MASS:\n            return (\n                (cx - width // 2, cy - height // 2),\n                (cx + width // 2, cy + height // 2),\n            )\n        elif position == Position.CENTER_RIGHT:\n            return (cx, cy - height // 2), (cx + width, cy + height // 2)\n        elif position == Position.BOTTOM_LEFT:\n            return (cx - width, cy), (cx, cy + height)\n        elif position == Position.BOTTOM_CENTER:\n            return (cx - width // 2, cy), (cx + width // 2, cy + height)\n        elif position == Position.BOTTOM_RIGHT:\n            return (cx, cy), (cx + width, cy + height)\n\n    @staticmethod\n    def validate_custom_values(\n        custom_values: Optional[Union[np.ndarray, List[float]]], detections_count: int\n    ) -> None:\n        if custom_values is not None:\n            if not isinstance(custom_values, (np.ndarray, list)):\n                raise TypeError(\n                    \"custom_values must be either a numpy array or a list of floats.\"\n                )\n\n            if len(custom_values) != detections_count:\n                raise ValueError(\n                    \"The length of custom_values must match the number of detections.\"\n                )\n\n            if not all(0 <= value <= 1 for value in custom_values):\n                raise ValueError(\"All values in custom_values must be between 0 and 1.\")\n
HeatMapAnnotator

A class for drawing heatmaps on an image based on provided detections. Heat accumulates over time and is drawn as a semi-transparent overlay of blurred circles.

Source code in supervision/annotators/core.py
class HeatMapAnnotator:\n    \"\"\"\n    A class for drawing heatmaps on an image based on provided detections.\n    Heat accumulates over time and is drawn as a semi-transparent overlay\n    of blurred circles.\n    \"\"\"\n\n    def __init__(\n        self,\n        position: Position = Position.BOTTOM_CENTER,\n        opacity: float = 0.2,\n        radius: int = 40,\n        kernel_size: int = 25,\n        top_hue: int = 0,\n        low_hue: int = 125,\n    ):\n        \"\"\"\n        Args:\n            position (Position): The position of the heatmap. Defaults to\n                `BOTTOM_CENTER`.\n            opacity (float): Opacity of the overlay mask, between 0 and 1.\n            radius (int): Radius of the heat circle.\n            kernel_size (int): Kernel size for blurring the heatmap.\n            top_hue (int): Hue at the top of the heatmap. Defaults to 0 (red).\n            low_hue (int): Hue at the bottom of the heatmap. Defaults to 125 (blue).\n        \"\"\"\n        self.position = position\n        self.opacity = opacity\n        self.radius = radius\n        self.kernel_size = kernel_size\n        self.heat_mask = None\n        self.top_hue = top_hue\n        self.low_hue = low_hue\n\n    @convert_for_annotation_method\n    def annotate(self, scene: ImageType, detections: Detections) -> ImageType:\n        \"\"\"\n        Annotates the scene with a heatmap based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where the heatmap will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n            from ultralytics import YOLO\n\n            model = YOLO('yolov8x.pt')\n\n            heat_map_annotator = sv.HeatMapAnnotator()\n\n            video_info = sv.VideoInfo.from_video_path(video_path='...')\n            frames_generator = get_video_frames_generator(source_path='...')\n\n            with sv.VideoSink(target_path='...', video_info=video_info) as sink:\n               for frame in frames_generator:\n                   result = model(frame)[0]\n                   detections = sv.Detections.from_ultralytics(result)\n                   annotated_frame = heat_map_annotator.annotate(\n                       scene=frame.copy(),\n                       detections=detections)\n                   sink.write_frame(frame=annotated_frame)\n            ```\n\n        ![heatmap-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/heat-map-annotator-example-purple.png)\n        \"\"\"\n\n        if self.heat_mask is None:\n            self.heat_mask = np.zeros(scene.shape[:2])\n        mask = np.zeros(scene.shape[:2])\n        for xy in detections.get_anchors_coordinates(self.position):\n            cv2.circle(mask, (int(xy[0]), int(xy[1])), self.radius, 1, -1)\n        self.heat_mask = mask + self.heat_mask\n        temp = self.heat_mask.copy()\n        temp = self.low_hue - temp / temp.max() * (self.low_hue - self.top_hue)\n        temp = temp.astype(np.uint8)\n        if self.kernel_size is not None:\n            temp = cv2.blur(temp, (self.kernel_size, self.kernel_size))\n        hsv = np.zeros(scene.shape)\n        hsv[..., 0] = temp\n        hsv[..., 1] = 255\n        hsv[..., 2] = 255\n        temp = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)\n        mask = cv2.cvtColor(self.heat_mask.astype(np.uint8), cv2.COLOR_GRAY2BGR) > 0\n        scene[mask] = cv2.addWeighted(temp, self.opacity, scene, 1 - self.opacity, 0)[\n            mask\n        ]\n        return scene\n
MaskAnnotator

Bases: BaseAnnotator

A class for drawing masks on an image using provided detections.

Warning

This annotator uses sv.Detections.mask.

Source code in supervision/annotators/core.py
class MaskAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing masks on an image using provided detections.\n\n    !!! warning\n\n        This annotator uses `sv.Detections.mask`.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        opacity: float = 0.5,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.opacity = opacity\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with masks based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where masks will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            mask_annotator = sv.MaskAnnotator()\n            annotated_frame = mask_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![mask-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/mask-annotator-example-purple.png)\n        \"\"\"\n        if detections.mask is None:\n            return scene\n\n        colored_mask = np.array(scene, copy=True, dtype=np.uint8)\n\n        for detection_idx in np.flip(np.argsort(detections.area)):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            mask = detections.mask[detection_idx]\n            colored_mask[mask] = color.as_bgr()\n\n        scene = cv2.addWeighted(colored_mask, self.opacity, scene, 1 - self.opacity, 0)\n        return scene.astype(np.uint8)\n
PolygonAnnotator

Bases: BaseAnnotator

A class for drawing polygons on an image using provided detections.

Warning

This annotator uses sv.Detections.mask.

Source code in supervision/annotators/core.py
class PolygonAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing polygons on an image using provided detections.\n\n    !!! warning\n\n        This annotator uses `sv.Detections.mask`.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating detections.\n            thickness (int): Thickness of the polygon lines.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.thickness: int = thickness\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with polygons based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where polygons will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            polygon_annotator = sv.PolygonAnnotator()\n            annotated_frame = polygon_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![polygon-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/polygon-annotator-example-purple.png)\n        \"\"\"\n        if detections.mask is None:\n            return scene\n\n        for detection_idx in range(len(detections)):\n            mask = detections.mask[detection_idx]\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            for polygon in mask_to_polygons(mask=mask):\n                scene = draw_polygon(\n                    scene=scene,\n                    polygon=polygon,\n                    color=color,\n                    thickness=self.thickness,\n                )\n\n        return scene\n
LabelAnnotator

A class for annotating labels on an image using provided detections.

Source code in supervision/annotators/core.py
class LabelAnnotator:\n    \"\"\"\n    A class for annotating labels on an image using provided detections.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        text_color: Color = Color.WHITE,\n        text_scale: float = 0.5,\n        text_thickness: int = 1,\n        text_padding: int = 10,\n        text_position: Position = Position.TOP_LEFT,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n        border_radius: int = 0,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating the text background.\n            text_color (Color): The color to use for the text.\n            text_scale (float): Font scale for the text.\n            text_thickness (int): Thickness of the text characters.\n            text_padding (int): Padding around the text within its background box.\n            text_position (Position): Position of the text relative to the detection.\n                Possible values are defined in the `Position` enum.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n            border_radius (int): The radius to apply round edges. If the selected\n                value is higher than the lower dimension, width or height, is clipped.\n        \"\"\"\n        self.border_radius: int = border_radius\n        self.color: Union[Color, ColorPalette] = color\n        self.text_color: Color = text_color\n        self.text_scale: float = text_scale\n        self.text_thickness: int = text_thickness\n        self.text_padding: int = text_padding\n        self.text_anchor: Position = text_position\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        labels: List[str] = None,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with labels based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where labels will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            labels (List[str]): Optional. Custom labels for each detection.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n             import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            labels = [\n                f\"{class_name} {confidence:.2f}\"\n                for class_name, confidence\n                in zip(detections['class_name'], detections.confidence)\n            ]\n\n            label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)\n            annotated_frame = label_annotator.annotate(\n                scene=image.copy(),\n                detections=detections,\n                labels=labels\n            )\n            ```\n\n        ![label-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/label-annotator-example-purple.png)\n        \"\"\"\n        font = cv2.FONT_HERSHEY_SIMPLEX\n        anchors_coordinates = detections.get_anchors_coordinates(\n            anchor=self.text_anchor\n        ).astype(int)\n        if labels is not None and len(labels) != len(detections):\n            raise ValueError(\n                f\"The number of labels provided ({len(labels)}) does not match the \"\n                f\"number of detections ({len(detections)}). Each detection should have \"\n                f\"a corresponding label. This discrepancy can occur if the labels and \"\n                f\"detections are not aligned or if an incorrect number of labels has \"\n                f\"been provided. Please ensure that the labels array has the same \"\n                f\"length as the Detections object.\"\n            )\n\n        for detection_idx, center_coordinates in enumerate(anchors_coordinates):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=(\n                    self.color_lookup\n                    if custom_color_lookup is None\n                    else custom_color_lookup\n                ),\n            )\n\n            if labels is not None:\n                text = labels[detection_idx]\n            elif detections[CLASS_NAME_DATA_FIELD] is not None:\n                text = detections[CLASS_NAME_DATA_FIELD][detection_idx]\n            elif detections.class_id is not None:\n                text = str(detections.class_id[detection_idx])\n            else:\n                text = str(detection_idx)\n\n            text_w, text_h = cv2.getTextSize(\n                text=text,\n                fontFace=font,\n                fontScale=self.text_scale,\n                thickness=self.text_thickness,\n            )[0]\n            text_w_padded = text_w + 2 * self.text_padding\n            text_h_padded = text_h + 2 * self.text_padding\n            text_background_xyxy = resolve_text_background_xyxy(\n                center_coordinates=tuple(center_coordinates),\n                text_wh=(text_w_padded, text_h_padded),\n                position=self.text_anchor,\n            )\n\n            text_x = text_background_xyxy[0] + self.text_padding\n            text_y = text_background_xyxy[1] + self.text_padding + text_h\n\n            self.draw_rounded_rectangle(\n                scene=scene,\n                xyxy=text_background_xyxy,\n                color=color.as_bgr(),\n                border_radius=self.border_radius,\n            )\n            cv2.putText(\n                img=scene,\n                text=text,\n                org=(text_x, text_y),\n                fontFace=font,\n                fontScale=self.text_scale,\n                color=self.text_color.as_rgb(),\n                thickness=self.text_thickness,\n                lineType=cv2.LINE_AA,\n            )\n        return scene\n\n    @staticmethod\n    def draw_rounded_rectangle(\n        scene: np.ndarray,\n        xyxy: Tuple[int, int, int, int],\n        color: Tuple[int, int, int],\n        border_radius: int,\n    ) -> np.ndarray:\n        x1, y1, x2, y2 = xyxy\n        width = x2 - x1\n        height = y2 - y1\n\n        border_radius = min(border_radius, min(width, height) // 2)\n\n        rectangle_coordinates = [\n            ((x1 + border_radius, y1), (x2 - border_radius, y2)),\n            ((x1, y1 + border_radius), (x2, y2 - border_radius)),\n        ]\n        circle_centers = [\n            (x1 + border_radius, y1 + border_radius),\n            (x2 - border_radius, y1 + border_radius),\n            (x1 + border_radius, y2 - border_radius),\n            (x2 - border_radius, y2 - border_radius),\n        ]\n\n        for coordinates in rectangle_coordinates:\n            cv2.rectangle(\n                img=scene,\n                pt1=coordinates[0],\n                pt2=coordinates[1],\n                color=color,\n                thickness=-1,\n            )\n        for center in circle_centers:\n            cv2.circle(\n                img=scene,\n                center=center,\n                radius=border_radius,\n                color=color,\n                thickness=-1,\n            )\n        return scene\n
RichLabelAnnotator

A class for annotating labels on an image using provided detections, with support for Unicode characters by using a custom font.

Source code in supervision/annotators/core.py
class RichLabelAnnotator:\n    \"\"\"\n    A class for annotating labels on an image using provided detections,\n    with support for Unicode characters by using a custom font.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        text_color: Color = Color.WHITE,\n        font_path: str = None,\n        font_size: int = 10,\n        text_padding: int = 10,\n        text_position: Position = Position.TOP_LEFT,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n        border_radius: int = 0,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color or color palette to use for\n                annotating the text background.\n            text_color (Color): The color to use for the text.\n            font_path (str): Path to the font file (e.g., \".ttf\" or \".otf\") to use for\n                rendering text. If `None`, the default PIL font will be used.\n            font_size (int): Font size for the text.\n            text_padding (int): Padding around the text within its background box.\n            text_position (Position): Position of the text relative to the detection.\n                Possible values are defined in the `Position` enum.\n            color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n            border_radius (int): The radius to apply round edges. If the selected\n                value is higher than the lower dimension, width or height, is clipped.\n        \"\"\"\n        self.color = color\n        self.text_color = text_color\n        self.text_padding = text_padding\n        self.text_anchor = text_position\n        self.color_lookup = color_lookup\n        self.border_radius = border_radius\n        if font_path is not None:\n            try:\n                self.font = ImageFont.truetype(font_path, font_size)\n            except OSError:\n                print(f\"Font path '{font_path}' not found. Using PIL's default font.\")\n                self.font = ImageFont.load_default(size=font_size)\n        else:\n            self.font = ImageFont.load_default(size=font_size)\n\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        labels: List[str] = None,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene with labels based on the provided\n        detections, with support for Unicode characters.\n\n        Args:\n            scene (ImageType): The image where labels will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            labels (List[str]): Optional. Custom labels for each detection.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            labels = [\n                f\"{class_name} {confidence:.2f}\"\n                for class_name, confidence\n                in zip(detections['class_name'], detections.confidence)\n            ]\n\n            rich_label_annotator = sv.RichLabelAnnotator(font_path=\"path/to/font.ttf\")\n            annotated_frame = label_annotator.annotate(\n                scene=image.copy(),\n                detections=detections,\n                labels=labels\n            )\n            ```\n\n        \"\"\"\n        if isinstance(scene, np.ndarray):\n            scene = Image.fromarray(cv2.cvtColor(scene, cv2.COLOR_BGR2RGB))\n        draw = ImageDraw.Draw(scene)\n        anchors_coordinates = detections.get_anchors_coordinates(\n            anchor=self.text_anchor\n        ).astype(int)\n        if labels is not None and len(labels) != len(detections):\n            raise ValueError(\n                f\"The number of labels provided ({len(labels)}) does not match the \"\n                f\"number of detections ({len(detections)}). Each detection should have \"\n                f\"a corresponding label. This discrepancy can occur if the labels and \"\n                f\"detections are not aligned or if an incorrect number of labels has \"\n                f\"been provided. Please ensure that the labels array has the same \"\n                f\"length as the Detections object.\"\n            )\n        for detection_idx, center_coordinates in enumerate(anchors_coordinates):\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=(\n                    self.color_lookup\n                    if custom_color_lookup is None\n                    else custom_color_lookup\n                ),\n            )\n            if labels is not None:\n                text = labels[detection_idx]\n            elif detections[CLASS_NAME_DATA_FIELD] is not None:\n                text = detections[CLASS_NAME_DATA_FIELD][detection_idx]\n            elif detections.class_id is not None:\n                text = str(detections.class_id[detection_idx])\n            else:\n                text = str(detection_idx)\n\n            left, top, right, bottom = draw.textbbox((0, 0), text, font=self.font)\n            text_width = right - left\n            text_height = bottom - top\n            text_w_padded = text_width + 2 * self.text_padding\n            text_h_padded = text_height + 2 * self.text_padding\n            text_background_xyxy = resolve_text_background_xyxy(\n                center_coordinates=tuple(center_coordinates),\n                text_wh=(text_w_padded, text_h_padded),\n                position=self.text_anchor,\n            )\n\n            text_x = text_background_xyxy[0] + self.text_padding - left\n            text_y = text_background_xyxy[1] + self.text_padding - top\n\n            draw.rounded_rectangle(\n                text_background_xyxy,\n                radius=self.border_radius,\n                fill=color.as_rgb(),\n                outline=None,\n            )\n            draw.text(\n                xy=(text_x, text_y),\n                text=text,\n                font=self.font,\n                fill=self.text_color.as_rgb(),\n            )\n\n        return scene\n
BlurAnnotator

Bases: BaseAnnotator

A class for blurring regions in an image using provided detections.

Source code in supervision/annotators/core.py
class BlurAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for blurring regions in an image using provided detections.\n    \"\"\"\n\n    def __init__(self, kernel_size: int = 15):\n        \"\"\"\n        Args:\n            kernel_size (int): The size of the average pooling kernel used for blurring.\n        \"\"\"\n        self.kernel_size: int = kernel_size\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene by blurring regions based on the provided detections.\n\n        Args:\n            scene (ImageType): The image where blurring will be applied.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            blur_annotator = sv.BlurAnnotator()\n            annotated_frame = circle_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![blur-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/blur-annotator-example-purple.png)\n        \"\"\"\n        image_height, image_width = scene.shape[:2]\n        clipped_xyxy = clip_boxes(\n            xyxy=detections.xyxy, resolution_wh=(image_width, image_height)\n        ).astype(int)\n\n        for x1, y1, x2, y2 in clipped_xyxy:\n            roi = scene[y1:y2, x1:x2]\n            roi = cv2.blur(roi, (self.kernel_size, self.kernel_size))\n            scene[y1:y2, x1:x2] = roi\n\n        return scene\n
PixelateAnnotator

Bases: BaseAnnotator

A class for pixelating regions in an image using provided detections.

Source code in supervision/annotators/core.py
class PixelateAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for pixelating regions in an image using provided detections.\n    \"\"\"\n\n    def __init__(self, pixel_size: int = 20):\n        \"\"\"\n        Args:\n            pixel_size (int): The size of the pixelation.\n        \"\"\"\n        self.pixel_size: int = pixel_size\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the given scene by pixelating regions based on the provided\n            detections.\n\n        Args:\n            scene (ImageType): The image where pixelating will be applied.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            pixelate_annotator = sv.PixelateAnnotator()\n            annotated_frame = pixelate_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n\n        ![pixelate-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/pixelate-annotator-example-10.png)\n        \"\"\"\n        image_height, image_width = scene.shape[:2]\n        clipped_xyxy = clip_boxes(\n            xyxy=detections.xyxy, resolution_wh=(image_width, image_height)\n        ).astype(int)\n\n        for x1, y1, x2, y2 in clipped_xyxy:\n            roi = scene[y1:y2, x1:x2]\n            scaled_up_roi = cv2.resize(\n                src=roi, dsize=None, fx=1 / self.pixel_size, fy=1 / self.pixel_size\n            )\n            scaled_down_roi = cv2.resize(\n                src=scaled_up_roi,\n                dsize=(roi.shape[1], roi.shape[0]),\n                interpolation=cv2.INTER_NEAREST,\n            )\n\n            scene[y1:y2, x1:x2] = scaled_down_roi\n\n        return scene\n
TraceAnnotator

A class for drawing trace paths on an image based on detection coordinates.

Warning

This annotator uses the sv.Detections.tracker_id. Read here to learn how to plug tracking into your inference pipeline.

Source code in supervision/annotators/core.py
class TraceAnnotator:\n    \"\"\"\n    A class for drawing trace paths on an image based on detection coordinates.\n\n    !!! warning\n\n        This annotator uses the `sv.Detections.tracker_id`. Read\n        [here](/latest/trackers/) to learn how to plug\n        tracking into your inference pipeline.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        position: Position = Position.CENTER,\n        trace_length: int = 30,\n        thickness: int = 2,\n        color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, ColorPalette]): The color to draw the trace, can be\n                a single color or a color palette.\n            position (Position): The position of the trace.\n                Defaults to `CENTER`.\n            trace_length (int): The maximum length of the trace in terms of historical\n                points. Defaults to `30`.\n            thickness (int): The thickness of the trace lines. Defaults to `2`.\n            color_lookup (str): Strategy for mapping colors to annotations.\n                Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.color: Union[Color, ColorPalette] = color\n        self.trace = Trace(max_size=trace_length, anchor=position)\n        self.thickness = thickness\n        self.color_lookup: ColorLookup = color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Draws trace paths on the frame based on the detection coordinates provided.\n\n        Args:\n            scene (ImageType): The image on which the traces will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): The detections which include coordinates for\n                which the traces will be drawn.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n            from ultralytics import YOLO\n\n            model = YOLO('yolov8x.pt')\n            trace_annotator = sv.TraceAnnotator()\n\n            video_info = sv.VideoInfo.from_video_path(video_path='...')\n            frames_generator = sv.get_video_frames_generator(source_path='...')\n            tracker = sv.ByteTrack()\n\n            with sv.VideoSink(target_path='...', video_info=video_info) as sink:\n               for frame in frames_generator:\n                   result = model(frame)[0]\n                   detections = sv.Detections.from_ultralytics(result)\n                   detections = tracker.update_with_detections(detections)\n                   annotated_frame = trace_annotator.annotate(\n                       scene=frame.copy(),\n                       detections=detections)\n                   sink.write_frame(frame=annotated_frame)\n            ```\n\n        ![trace-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/trace-annotator-example-purple.png)\n        \"\"\"\n        self.trace.put(detections)\n\n        for detection_idx in range(len(detections)):\n            tracker_id = int(detections.tracker_id[detection_idx])\n            color = resolve_color(\n                color=self.color,\n                detections=detections,\n                detection_idx=detection_idx,\n                color_lookup=self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            xy = self.trace.get(tracker_id=tracker_id)\n            if len(xy) > 1:\n                scene = cv2.polylines(\n                    scene,\n                    [xy.astype(np.int32)],\n                    False,\n                    color=color.as_bgr(),\n                    thickness=self.thickness,\n                )\n        return scene\n
CropAnnotator

Bases: BaseAnnotator

A class for drawing scaled up crops of detections on the scene.

Source code in supervision/annotators/core.py
class CropAnnotator(BaseAnnotator):\n    \"\"\"\n    A class for drawing scaled up crops of detections on the scene.\n    \"\"\"\n\n    def __init__(\n        self,\n        position: Position = Position.TOP_CENTER,\n        scale_factor: int = 2,\n        border_color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n        border_thickness: int = 2,\n        border_color_lookup: ColorLookup = ColorLookup.CLASS,\n    ):\n        \"\"\"\n        Args:\n            position (Position): The anchor position for placing the cropped and scaled\n                part of the detection in the scene.\n            scale_factor (int): The factor by which to scale the cropped image part. A\n                factor of 2, for example, would double the size of the cropped area,\n                allowing for a closer view of the detection.\n            border_color (Union[Color, ColorPalette]): The color or color palette to\n                use for annotating border around the cropped area.\n            border_thickness (int): The thickness of the border around the cropped area.\n            border_color_lookup (ColorLookup): Strategy for mapping colors to\n                annotations. Options are `INDEX`, `CLASS`, `TRACK`.\n        \"\"\"\n        self.position: Position = position\n        self.scale_factor: int = scale_factor\n        self.border_color: Union[Color, ColorPalette] = border_color\n        self.border_thickness: int = border_thickness\n        self.border_color_lookup: ColorLookup = border_color_lookup\n\n    @convert_for_annotation_method\n    def annotate(\n        self,\n        scene: ImageType,\n        detections: Detections,\n        custom_color_lookup: Optional[np.ndarray] = None,\n    ) -> ImageType:\n        \"\"\"\n        Annotates the provided scene with scaled and cropped parts of the image based\n        on the provided detections. Each detection is cropped from the original scene\n        and scaled according to the annotator's scale factor before being placed back\n        onto the scene at the specified position.\n\n\n        Args:\n            scene (ImageType): The image where cropped detection will be placed.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray`\n                or `PIL.Image.Image`.\n            detections (Detections): Object detections to annotate.\n            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n                Allows to override the default color mapping strategy.\n\n        Returns:\n            The annotated image.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            detections = sv.Detections(...)\n\n            crop_annotator = sv.CropAnnotator()\n            annotated_frame = crop_annotator.annotate(\n                scene=image.copy(),\n                detections=detections\n            )\n            ```\n        \"\"\"\n        crops = [\n            crop_image(image=scene, xyxy=xyxy) for xyxy in detections.xyxy.astype(int)\n        ]\n        resized_crops = [\n            scale_image(image=crop, scale_factor=self.scale_factor) for crop in crops\n        ]\n        anchors = detections.get_anchors_coordinates(anchor=self.position).astype(int)\n\n        for idx, (resized_crop, anchor) in enumerate(zip(resized_crops, anchors)):\n            crop_wh = resized_crop.shape[1], resized_crop.shape[0]\n            (x1, y1), (x2, y2) = self.calculate_crop_coordinates(\n                anchor=anchor, crop_wh=crop_wh, position=self.position\n            )\n            scene = overlay_image(\n                scene=scene, inserted_image=resized_crop, anchor=(x1, y1)\n            )\n            color = resolve_color(\n                color=self.border_color,\n                detections=detections,\n                detection_idx=idx,\n                color_lookup=self.border_color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup,\n            )\n            cv2.rectangle(\n                img=scene,\n                pt1=(x1, y1),\n                pt2=(x2, y2),\n                color=color.as_bgr(),\n                thickness=self.border_thickness,\n            )\n\n        return scene\n\n    @staticmethod\n    def calculate_crop_coordinates(\n        anchor: Tuple[int, int], crop_wh: Tuple[int, int], position: Position\n    ) -> Tuple[Tuple[int, int], Tuple[int, int]]:\n        anchor_x, anchor_y = anchor\n        width, height = crop_wh\n\n        if position == Position.TOP_LEFT:\n            return (anchor_x - width, anchor_y - height), (anchor_x, anchor_y)\n        elif position == Position.TOP_CENTER:\n            return (\n                (anchor_x - width // 2, anchor_y - height),\n                (anchor_x + width // 2, anchor_y),\n            )\n        elif position == Position.TOP_RIGHT:\n            return (anchor_x, anchor_y - height), (anchor_x + width, anchor_y)\n        elif position == Position.CENTER_LEFT:\n            return (\n                (anchor_x - width, anchor_y - height // 2),\n                (anchor_x, anchor_y + height // 2),\n            )\n        elif position == Position.CENTER or position == Position.CENTER_OF_MASS:\n            return (\n                (anchor_x - width // 2, anchor_y - height // 2),\n                (anchor_x + width // 2, anchor_y + height // 2),\n            )\n        elif position == Position.CENTER_RIGHT:\n            return (\n                (anchor_x, anchor_y - height // 2),\n                (anchor_x + width, anchor_y + height // 2),\n            )\n        elif position == Position.BOTTOM_LEFT:\n            return (anchor_x - width, anchor_y), (anchor_x, anchor_y + height)\n        elif position == Position.BOTTOM_CENTER:\n            return (\n                (anchor_x - width // 2, anchor_y),\n                (anchor_x + width // 2, anchor_y + height),\n            )\n        elif position == Position.BOTTOM_RIGHT:\n            return (anchor_x, anchor_y), (anchor_x + width, anchor_y + height)\n
ColorLookup

Bases: Enum

Enumeration class to define strategies for mapping colors to annotations.

This enum supports three different lookup strategies
  • INDEX: Colors are determined by the index of the detection within the scene.
  • CLASS: Colors are determined by the class label of the detected object.
  • TRACK: Colors are determined by the tracking identifier of the object.
Source code in supervision/annotators/utils.py
class ColorLookup(Enum):\n    \"\"\"\n    Enumeration class to define strategies for mapping colors to annotations.\n\n    This enum supports three different lookup strategies:\n        - `INDEX`: Colors are determined by the index of the detection within the scene.\n        - `CLASS`: Colors are determined by the class label of the detected object.\n        - `TRACK`: Colors are determined by the tracking identifier of the object.\n    \"\"\"\n\n    INDEX = \"index\"\n    CLASS = \"class\"\n    TRACK = \"track\"\n\n    @classmethod\n    def list(cls):\n        return list(map(lambda c: c.value, cls))\n
"},{"location":"detection/annotators/#supervision.annotators.core.BoundingBoxAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.BoundingBoxAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the bounding box lines.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the bounding box lines.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.BoundingBoxAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with bounding boxes based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where bounding boxes will be drawn. ImageType

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nannotated_frame = bounding_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with bounding boxes based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where bounding boxes will be drawn. `ImageType`\n        is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        bounding_box_annotator = sv.BoundingBoxAnnotator()\n        annotated_frame = bounding_box_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![bounding-box-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/bounding-box-annotator-example-purple.png)\n    \"\"\"\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        cv2.rectangle(\n            img=scene,\n            pt1=(x1, y1),\n            pt2=(x2, y2),\n            color=color.as_bgr(),\n            thickness=self.thickness,\n        )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.RoundBoxAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.RoundBoxAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, color_lookup=ColorLookup.CLASS, roundness=0.6)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the bounding box lines.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS roundness float

Percent of roundness for edges of bounding box. Value must be float 0 < roundness <= 1.0 By default roundness percent is calculated based on smaller side length (width or height).

0.6 Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n    roundness: float = 0.6,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the bounding box lines.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n        roundness (float): Percent of roundness for edges of bounding box.\n            Value must be float 0 < roundness <= 1.0\n            By default roundness percent is calculated based on smaller side\n            length (width or height).\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.color_lookup: ColorLookup = color_lookup\n    if not 0 < roundness <= 1.0:\n        raise ValueError(\"roundness attribute must be float between (0, 1.0]\")\n    self.roundness: float = roundness\n
"},{"location":"detection/annotators/#supervision.annotators.core.RoundBoxAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with bounding boxes with rounded edges based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where rounded bounding boxes will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nround_box_annotator = sv.RoundBoxAnnotator()\nannotated_frame = round_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with bounding boxes with rounded edges\n    based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where rounded bounding boxes will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        round_box_annotator = sv.RoundBoxAnnotator()\n        annotated_frame = round_box_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![round-box-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/round-box-annotator-example-purple.png)\n    \"\"\"\n\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n\n        radius = (\n            int((x2 - x1) // 2 * self.roundness)\n            if abs(x1 - x2) < abs(y1 - y2)\n            else int((y2 - y1) // 2 * self.roundness)\n        )\n\n        circle_coordinates = [\n            ((x1 + radius), (y1 + radius)),\n            ((x2 - radius), (y1 + radius)),\n            ((x2 - radius), (y2 - radius)),\n            ((x1 + radius), (y2 - radius)),\n        ]\n\n        line_coordinates = [\n            ((x1 + radius, y1), (x2 - radius, y1)),\n            ((x2, y1 + radius), (x2, y2 - radius)),\n            ((x1 + radius, y2), (x2 - radius, y2)),\n            ((x1, y1 + radius), (x1, y2 - radius)),\n        ]\n\n        start_angles = (180, 270, 0, 90)\n        end_angles = (270, 360, 90, 180)\n\n        for center_coordinates, line, start_angle, end_angle in zip(\n            circle_coordinates, line_coordinates, start_angles, end_angles\n        ):\n            cv2.ellipse(\n                img=scene,\n                center=center_coordinates,\n                axes=(radius, radius),\n                angle=0,\n                startAngle=start_angle,\n                endAngle=end_angle,\n                color=color.as_bgr(),\n                thickness=self.thickness,\n            )\n\n            cv2.line(\n                img=scene,\n                pt1=line[0],\n                pt2=line[1],\n                color=color.as_bgr(),\n                thickness=self.thickness,\n            )\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.BoxCornerAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.BoxCornerAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=4, corner_length=15, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the corner lines.

4 corner_length int

Length of each corner line.

15 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 4,\n    corner_length: int = 15,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the corner lines.\n        corner_length (int): Length of each corner line.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.corner_length: int = corner_length\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.BoxCornerAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with box corners based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where box corners will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncorner_annotator = sv.BoxCornerAnnotator()\nannotated_frame = corner_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with box corners based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where box corners will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        corner_annotator = sv.BoxCornerAnnotator()\n        annotated_frame = corner_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![box-corner-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/box-corner-annotator-example-purple.png)\n    \"\"\"\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        corners = [(x1, y1), (x2, y1), (x1, y2), (x2, y2)]\n\n        for x, y in corners:\n            x_end = x + self.corner_length if x == x1 else x - self.corner_length\n            cv2.line(\n                scene, (x, y), (x_end, y), color.as_bgr(), thickness=self.thickness\n            )\n\n            y_end = y + self.corner_length if y == y1 else y - self.corner_length\n            cv2.line(\n                scene, (x, y), (x, y_end), color.as_bgr(), thickness=self.thickness\n            )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.OrientedBoxAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.OrientedBoxAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the bounding box lines.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the bounding box lines.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.OrientedBoxAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with oriented bounding boxes based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where bounding boxes will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO(\"yolov8n-obb.pt\")\n\nresult = model(image)[0]\ndetections = sv.Detections.from_ultralytics(result)\n\noriented_box_annotator = sv.OrientedBoxAnnotator()\nannotated_frame = oriented_box_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with oriented bounding boxes based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where bounding boxes will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO(\"yolov8n-obb.pt\")\n\n        result = model(image)[0]\n        detections = sv.Detections.from_ultralytics(result)\n\n        oriented_box_annotator = sv.OrientedBoxAnnotator()\n        annotated_frame = oriented_box_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n    \"\"\"  # noqa E501 // docs\n\n    if detections.data is None or ORIENTED_BOX_COORDINATES not in detections.data:\n        return scene\n\n    for detection_idx in range(len(detections)):\n        bbox = np.intp(detections.data.get(ORIENTED_BOX_COORDINATES)[detection_idx])\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n\n        cv2.drawContours(scene, [bbox], 0, color.as_bgr(), self.thickness)\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.ColorAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.ColorAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, opacity=0.5, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT opacity float

Opacity of the overlay mask. Must be between 0 and 1.

0.5 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    opacity: float = 0.5,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.color_lookup: ColorLookup = color_lookup\n    self.opacity = opacity\n
"},{"location":"detection/annotators/#supervision.annotators.core.ColorAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with box masks based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where bounding boxes will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncolor_annotator = sv.ColorAnnotator()\nannotated_frame = color_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with box masks based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where bounding boxes will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        color_annotator = sv.ColorAnnotator()\n        annotated_frame = color_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![box-mask-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/box-mask-annotator-example-purple.png)\n    \"\"\"\n    mask_image = scene.copy()\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        cv2.rectangle(\n            img=scene,\n            pt1=(x1, y1),\n            pt2=(x2, y2),\n            color=color.as_bgr(),\n            thickness=-1,\n        )\n    scene = cv2.addWeighted(\n        scene, self.opacity, mask_image, 1 - self.opacity, gamma=0\n    )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.CircleAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.CircleAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the circle line.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the circle line.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.CircleAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with circles based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where box corners will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncircle_annotator = sv.CircleAnnotator()\nannotated_frame = circle_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with circles based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where box corners will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        circle_annotator = sv.CircleAnnotator()\n        annotated_frame = circle_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n\n    ![circle-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/circle-annotator-example-purple.png)\n    \"\"\"\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        center = ((x1 + x2) // 2, (y1 + y2) // 2)\n        distance = sqrt((x1 - center[0]) ** 2 + (y1 - center[1]) ** 2)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        cv2.circle(\n            img=scene,\n            center=center,\n            radius=int(distance),\n            color=color.as_bgr(),\n            thickness=self.thickness,\n        )\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.DotAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.DotAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, radius=4, position=Position.CENTER, color_lookup=ColorLookup.CLASS, outline_thickness=0)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT radius int

Radius of the drawn dots.

4 position Position

The anchor position for placing the dot.

CENTER color_lookup ColorLookup

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS outline_thickness int

Thickness of the outline of the dot.

0 Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    radius: int = 4,\n    position: Position = Position.CENTER,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n    outline_thickness: int = 0,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        radius (int): Radius of the drawn dots.\n        position (Position): The anchor position for placing the dot.\n        color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n        outline_thickness (int): Thickness of the outline of the dot.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.radius: int = radius\n    self.position: Position = position\n    self.color_lookup: ColorLookup = color_lookup\n    self.outline_thickness = outline_thickness\n
"},{"location":"detection/annotators/#supervision.annotators.core.DotAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with dots based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where dots will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ndot_annotator = sv.DotAnnotator()\nannotated_frame = dot_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with dots based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where dots will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        dot_annotator = sv.DotAnnotator()\n        annotated_frame = dot_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![dot-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/dot-annotator-example-purple.png)\n    \"\"\"\n    xy = detections.get_anchors_coordinates(anchor=self.position)\n    for detection_idx in range(len(detections)):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        center = (int(xy[detection_idx, 0]), int(xy[detection_idx, 1]))\n\n        cv2.circle(scene, center, self.radius, color.as_bgr(), -1)\n        if self.outline_thickness:\n            cv2.circle(\n                scene, center, self.radius, (0, 0, 0), self.outline_thickness\n            )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.TriangleAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.TriangleAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, base=10, height=10, position=Position.TOP_CENTER, color_lookup=ColorLookup.CLASS, outline_thickness=0)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT base int

The base width of the triangle.

10 height int

The height of the triangle.

10 position Position

The anchor position for placing the triangle.

TOP_CENTER color_lookup ColorLookup

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS outline_thickness int

Thickness of the outline of the triangle.

0 Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    base: int = 10,\n    height: int = 10,\n    position: Position = Position.TOP_CENTER,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n    outline_thickness: int = 0,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        base (int): The base width of the triangle.\n        height (int): The height of the triangle.\n        position (Position): The anchor position for placing the triangle.\n        color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n        outline_thickness (int): Thickness of the outline of the triangle.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.base: int = base\n    self.height: int = height\n    self.position: Position = position\n    self.color_lookup: ColorLookup = color_lookup\n    self.outline_thickness: int = outline_thickness\n
"},{"location":"detection/annotators/#supervision.annotators.core.TriangleAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with triangles based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where triangles will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ntriangle_annotator = sv.TriangleAnnotator()\nannotated_frame = triangle_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with triangles based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where triangles will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        triangle_annotator = sv.TriangleAnnotator()\n        annotated_frame = triangle_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![triangle-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/triangle-annotator-example.png)\n    \"\"\"\n    xy = detections.get_anchors_coordinates(anchor=self.position)\n    for detection_idx in range(len(detections)):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        tip_x, tip_y = int(xy[detection_idx, 0]), int(xy[detection_idx, 1])\n        vertices = np.array(\n            [\n                [tip_x - self.base // 2, tip_y - self.height],\n                [tip_x + self.base // 2, tip_y - self.height],\n                [tip_x, tip_y],\n            ],\n            np.int32,\n        )\n\n        cv2.fillPoly(scene, [vertices], color.as_bgr())\n        if self.outline_thickness:\n            cv2.polylines(\n                scene, [vertices], True, (0, 0, 0), thickness=self.outline_thickness\n            )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.EllipseAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.EllipseAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, start_angle=-45, end_angle=235, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the ellipse lines.

2 start_angle int

Starting angle of the ellipse.

-45 end_angle int

Ending angle of the ellipse.

235 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    start_angle: int = -45,\n    end_angle: int = 235,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the ellipse lines.\n        start_angle (int): Starting angle of the ellipse.\n        end_angle (int): Ending angle of the ellipse.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.start_angle: int = start_angle\n    self.end_angle: int = end_angle\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.EllipseAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with ellipses based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where ellipses will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nellipse_annotator = sv.EllipseAnnotator()\nannotated_frame = ellipse_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with ellipses based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where ellipses will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        ellipse_annotator = sv.EllipseAnnotator()\n        annotated_frame = ellipse_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![ellipse-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/ellipse-annotator-example-purple.png)\n    \"\"\"\n    for detection_idx in range(len(detections)):\n        x1, y1, x2, y2 = detections.xyxy[detection_idx].astype(int)\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        center = (int((x1 + x2) / 2), y2)\n        width = x2 - x1\n        cv2.ellipse(\n            scene,\n            center=center,\n            axes=(int(width), int(0.35 * width)),\n            angle=0.0,\n            startAngle=self.start_angle,\n            endAngle=self.end_angle,\n            color=color.as_bgr(),\n            thickness=self.thickness,\n            lineType=cv2.LINE_4,\n        )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.HaloAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.HaloAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, opacity=0.8, kernel_size=40, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT opacity float

Opacity of the overlay mask. Must be between 0 and 1.

0.8 kernel_size int

The size of the average pooling kernel used for creating the halo.

40 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    opacity: float = 0.8,\n    kernel_size: int = 40,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n        kernel_size (int): The size of the average pooling kernel used for creating\n            the halo.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.opacity = opacity\n    self.color_lookup: ColorLookup = color_lookup\n    self.kernel_size: int = kernel_size\n
"},{"location":"detection/annotators/#supervision.annotators.core.HaloAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with halos based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where masks will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nhalo_annotator = sv.HaloAnnotator()\nannotated_frame = halo_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with halos based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where masks will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        halo_annotator = sv.HaloAnnotator()\n        annotated_frame = halo_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![halo-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/halo-annotator-example-purple.png)\n    \"\"\"\n    if detections.mask is None:\n        return scene\n    colored_mask = np.zeros_like(scene, dtype=np.uint8)\n    fmask = np.array([False] * scene.shape[0] * scene.shape[1]).reshape(\n        scene.shape[0], scene.shape[1]\n    )\n\n    for detection_idx in np.flip(np.argsort(detections.area)):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        mask = detections.mask[detection_idx]\n        fmask = np.logical_or(fmask, mask)\n        color_bgr = color.as_bgr()\n        colored_mask[mask] = color_bgr\n\n    colored_mask = cv2.blur(colored_mask, (self.kernel_size, self.kernel_size))\n    colored_mask[fmask] = [0, 0, 0]\n    gray = cv2.cvtColor(colored_mask, cv2.COLOR_BGR2GRAY)\n    alpha = self.opacity * gray / gray.max()\n    alpha_mask = alpha[:, :, np.newaxis]\n    scene = np.uint8(scene * (1 - alpha_mask) + colored_mask * self.opacity)\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.PercentageBarAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.PercentageBarAnnotator.__init__","title":"__init__(height=16, width=80, color=ColorPalette.DEFAULT, border_color=Color.BLACK, position=Position.TOP_CENTER, color_lookup=ColorLookup.CLASS, border_thickness=None)","text":"

Parameters:

Name Type Description Default height int

The height in pixels of the percentage bar.

16 width int

The width in pixels of the percentage bar.

80 color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT border_color Color

The color of the border lines.

BLACK position Position

The anchor position of drawing the percentage bar.

TOP_CENTER color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS border_thickness int

The thickness of the border lines.

None Source code in supervision/annotators/core.py
def __init__(\n    self,\n    height: int = 16,\n    width: int = 80,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    border_color: Color = Color.BLACK,\n    position: Position = Position.TOP_CENTER,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n    border_thickness: int = None,\n):\n    \"\"\"\n    Args:\n        height (int): The height in pixels of the percentage bar.\n        width (int): The width in pixels of the percentage bar.\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        border_color (Color): The color of the border lines.\n        position (Position): The anchor position of drawing the percentage bar.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n        border_thickness (int): The thickness of the border lines.\n    \"\"\"\n    self.height: int = height\n    self.width: int = width\n    self.color: Union[Color, ColorPalette] = color\n    self.border_color: Color = border_color\n    self.position: Position = position\n    self.color_lookup: ColorLookup = color_lookup\n\n    if border_thickness is None:\n        self.border_thickness = int(0.15 * self.height)\n
"},{"location":"detection/annotators/#supervision.annotators.core.PercentageBarAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None, custom_values=None)","text":"

Annotates the given scene with percentage bars based on the provided detections. The percentage bars visually represent the confidence or custom values associated with each detection.

Parameters:

Name Type Description Default scene ImageType

The image where percentage bars will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None custom_values Optional[ndarray]

Custom values array to use instead of the default detection confidences. This array should have the same length as the number of detections and contain a value between 0 and 1 (inclusive) for each detection, representing the percentage to be displayed.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npercentage_bar_annotator = sv.PercentageBarAnnotator()\nannotated_frame = percentage_bar_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n    custom_values: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with percentage bars based on the provided\n    detections. The percentage bars visually represent the confidence or custom\n    values associated with each detection.\n\n    Args:\n        scene (ImageType): The image where percentage bars will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n        custom_values (Optional[np.ndarray]): Custom values array to use instead\n            of the default detection confidences. This array should have the\n            same length as the number of detections and contain a value between\n            0 and 1 (inclusive) for each detection, representing the percentage\n            to be displayed.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        percentage_bar_annotator = sv.PercentageBarAnnotator()\n        annotated_frame = percentage_bar_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![percentage-bar-example](https://media.roboflow.com/\n    supervision-annotator-examples/percentage-bar-annotator-example-purple.png)\n    \"\"\"\n    self.validate_custom_values(\n        custom_values=custom_values, detections_count=len(detections)\n    )\n    anchors = detections.get_anchors_coordinates(anchor=self.position)\n    for detection_idx in range(len(detections)):\n        anchor = anchors[detection_idx]\n        border_coordinates = self.calculate_border_coordinates(\n            anchor_xy=(int(anchor[0]), int(anchor[1])),\n            border_wh=(self.width, self.height),\n            position=self.position,\n        )\n        border_width = border_coordinates[1][0] - border_coordinates[0][0]\n\n        value = (\n            custom_values[detection_idx]\n            if custom_values is not None\n            else detections.confidence[detection_idx]\n        )\n\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        cv2.rectangle(\n            img=scene,\n            pt1=border_coordinates[0],\n            pt2=(\n                border_coordinates[0][0] + int(border_width * value),\n                border_coordinates[1][1],\n            ),\n            color=color.as_bgr(),\n            thickness=-1,\n        )\n        cv2.rectangle(\n            img=scene,\n            pt1=border_coordinates[0],\n            pt2=border_coordinates[1],\n            color=self.border_color.as_bgr(),\n            thickness=self.border_thickness,\n        )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.HeatMapAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.HeatMapAnnotator.__init__","title":"__init__(position=Position.BOTTOM_CENTER, opacity=0.2, radius=40, kernel_size=25, top_hue=0, low_hue=125)","text":"

Parameters:

Name Type Description Default position Position

The position of the heatmap. Defaults to BOTTOM_CENTER.

BOTTOM_CENTER opacity float

Opacity of the overlay mask, between 0 and 1.

0.2 radius int

Radius of the heat circle.

40 kernel_size int

Kernel size for blurring the heatmap.

25 top_hue int

Hue at the top of the heatmap. Defaults to 0 (red).

0 low_hue int

Hue at the bottom of the heatmap. Defaults to 125 (blue).

125 Source code in supervision/annotators/core.py
def __init__(\n    self,\n    position: Position = Position.BOTTOM_CENTER,\n    opacity: float = 0.2,\n    radius: int = 40,\n    kernel_size: int = 25,\n    top_hue: int = 0,\n    low_hue: int = 125,\n):\n    \"\"\"\n    Args:\n        position (Position): The position of the heatmap. Defaults to\n            `BOTTOM_CENTER`.\n        opacity (float): Opacity of the overlay mask, between 0 and 1.\n        radius (int): Radius of the heat circle.\n        kernel_size (int): Kernel size for blurring the heatmap.\n        top_hue (int): Hue at the top of the heatmap. Defaults to 0 (red).\n        low_hue (int): Hue at the bottom of the heatmap. Defaults to 125 (blue).\n    \"\"\"\n    self.position = position\n    self.opacity = opacity\n    self.radius = radius\n    self.kernel_size = kernel_size\n    self.heat_mask = None\n    self.top_hue = top_hue\n    self.low_hue = low_hue\n
"},{"location":"detection/annotators/#supervision.annotators.core.HeatMapAnnotator.annotate","title":"annotate(scene, detections)","text":"

Annotates the scene with a heatmap based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where the heatmap will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO('yolov8x.pt')\n\nheat_map_annotator = sv.HeatMapAnnotator()\n\nvideo_info = sv.VideoInfo.from_video_path(video_path='...')\nframes_generator = get_video_frames_generator(source_path='...')\n\nwith sv.VideoSink(target_path='...', video_info=video_info) as sink:\n   for frame in frames_generator:\n       result = model(frame)[0]\n       detections = sv.Detections.from_ultralytics(result)\n       annotated_frame = heat_map_annotator.annotate(\n           scene=frame.copy(),\n           detections=detections)\n       sink.write_frame(frame=annotated_frame)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(self, scene: ImageType, detections: Detections) -> ImageType:\n    \"\"\"\n    Annotates the scene with a heatmap based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where the heatmap will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO('yolov8x.pt')\n\n        heat_map_annotator = sv.HeatMapAnnotator()\n\n        video_info = sv.VideoInfo.from_video_path(video_path='...')\n        frames_generator = get_video_frames_generator(source_path='...')\n\n        with sv.VideoSink(target_path='...', video_info=video_info) as sink:\n           for frame in frames_generator:\n               result = model(frame)[0]\n               detections = sv.Detections.from_ultralytics(result)\n               annotated_frame = heat_map_annotator.annotate(\n                   scene=frame.copy(),\n                   detections=detections)\n               sink.write_frame(frame=annotated_frame)\n        ```\n\n    ![heatmap-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/heat-map-annotator-example-purple.png)\n    \"\"\"\n\n    if self.heat_mask is None:\n        self.heat_mask = np.zeros(scene.shape[:2])\n    mask = np.zeros(scene.shape[:2])\n    for xy in detections.get_anchors_coordinates(self.position):\n        cv2.circle(mask, (int(xy[0]), int(xy[1])), self.radius, 1, -1)\n    self.heat_mask = mask + self.heat_mask\n    temp = self.heat_mask.copy()\n    temp = self.low_hue - temp / temp.max() * (self.low_hue - self.top_hue)\n    temp = temp.astype(np.uint8)\n    if self.kernel_size is not None:\n        temp = cv2.blur(temp, (self.kernel_size, self.kernel_size))\n    hsv = np.zeros(scene.shape)\n    hsv[..., 0] = temp\n    hsv[..., 1] = 255\n    hsv[..., 2] = 255\n    temp = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)\n    mask = cv2.cvtColor(self.heat_mask.astype(np.uint8), cv2.COLOR_GRAY2BGR) > 0\n    scene[mask] = cv2.addWeighted(temp, self.opacity, scene, 1 - self.opacity, 0)[\n        mask\n    ]\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.MaskAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.MaskAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, opacity=0.5, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT opacity float

Opacity of the overlay mask. Must be between 0 and 1.

0.5 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    opacity: float = 0.5,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        opacity (float): Opacity of the overlay mask. Must be between `0` and `1`.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.opacity = opacity\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.MaskAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with masks based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where masks will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nmask_annotator = sv.MaskAnnotator()\nannotated_frame = mask_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with masks based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where masks will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        mask_annotator = sv.MaskAnnotator()\n        annotated_frame = mask_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![mask-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/mask-annotator-example-purple.png)\n    \"\"\"\n    if detections.mask is None:\n        return scene\n\n    colored_mask = np.array(scene, copy=True, dtype=np.uint8)\n\n    for detection_idx in np.flip(np.argsort(detections.area)):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        mask = detections.mask[detection_idx]\n        colored_mask[mask] = color.as_bgr()\n\n    scene = cv2.addWeighted(colored_mask, self.opacity, scene, 1 - self.opacity, 0)\n    return scene.astype(np.uint8)\n
"},{"location":"detection/annotators/#supervision.annotators.core.PolygonAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.PolygonAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, thickness=2, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating detections.

DEFAULT thickness int

Thickness of the polygon lines.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating detections.\n        thickness (int): Thickness of the polygon lines.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.thickness: int = thickness\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.PolygonAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the given scene with polygons based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where polygons will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npolygon_annotator = sv.PolygonAnnotator()\nannotated_frame = polygon_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with polygons based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where polygons will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        polygon_annotator = sv.PolygonAnnotator()\n        annotated_frame = polygon_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![polygon-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/polygon-annotator-example-purple.png)\n    \"\"\"\n    if detections.mask is None:\n        return scene\n\n    for detection_idx in range(len(detections)):\n        mask = detections.mask[detection_idx]\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        for polygon in mask_to_polygons(mask=mask):\n            scene = draw_polygon(\n                scene=scene,\n                polygon=polygon,\n                color=color,\n                thickness=self.thickness,\n            )\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.LabelAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.LabelAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, text_color=Color.WHITE, text_scale=0.5, text_thickness=1, text_padding=10, text_position=Position.TOP_LEFT, color_lookup=ColorLookup.CLASS, border_radius=0)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating the text background.

DEFAULT text_color Color

The color to use for the text.

WHITE text_scale float

Font scale for the text.

0.5 text_thickness int

Thickness of the text characters.

1 text_padding int

Padding around the text within its background box.

10 text_position Position

Position of the text relative to the detection. Possible values are defined in the Position enum.

TOP_LEFT color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS border_radius int

The radius to apply round edges. If the selected value is higher than the lower dimension, width or height, is clipped.

0 Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    text_color: Color = Color.WHITE,\n    text_scale: float = 0.5,\n    text_thickness: int = 1,\n    text_padding: int = 10,\n    text_position: Position = Position.TOP_LEFT,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n    border_radius: int = 0,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating the text background.\n        text_color (Color): The color to use for the text.\n        text_scale (float): Font scale for the text.\n        text_thickness (int): Thickness of the text characters.\n        text_padding (int): Padding around the text within its background box.\n        text_position (Position): Position of the text relative to the detection.\n            Possible values are defined in the `Position` enum.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n        border_radius (int): The radius to apply round edges. If the selected\n            value is higher than the lower dimension, width or height, is clipped.\n    \"\"\"\n    self.border_radius: int = border_radius\n    self.color: Union[Color, ColorPalette] = color\n    self.text_color: Color = text_color\n    self.text_scale: float = text_scale\n    self.text_thickness: int = text_thickness\n    self.text_padding: int = text_padding\n    self.text_anchor: Position = text_position\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.LabelAnnotator.annotate","title":"annotate(scene, detections, labels=None, custom_color_lookup=None)","text":"

Annotates the given scene with labels based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where labels will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required labels List[str]

Optional. Custom labels for each detection.

None custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
 import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)\nannotated_frame = label_annotator.annotate(\n    scene=image.copy(),\n    detections=detections,\n    labels=labels\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    labels: List[str] = None,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with labels based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where labels will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        labels (List[str]): Optional. Custom labels for each detection.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n         import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        labels = [\n            f\"{class_name} {confidence:.2f}\"\n            for class_name, confidence\n            in zip(detections['class_name'], detections.confidence)\n        ]\n\n        label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)\n        annotated_frame = label_annotator.annotate(\n            scene=image.copy(),\n            detections=detections,\n            labels=labels\n        )\n        ```\n\n    ![label-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/label-annotator-example-purple.png)\n    \"\"\"\n    font = cv2.FONT_HERSHEY_SIMPLEX\n    anchors_coordinates = detections.get_anchors_coordinates(\n        anchor=self.text_anchor\n    ).astype(int)\n    if labels is not None and len(labels) != len(detections):\n        raise ValueError(\n            f\"The number of labels provided ({len(labels)}) does not match the \"\n            f\"number of detections ({len(detections)}). Each detection should have \"\n            f\"a corresponding label. This discrepancy can occur if the labels and \"\n            f\"detections are not aligned or if an incorrect number of labels has \"\n            f\"been provided. Please ensure that the labels array has the same \"\n            f\"length as the Detections object.\"\n        )\n\n    for detection_idx, center_coordinates in enumerate(anchors_coordinates):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=(\n                self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup\n            ),\n        )\n\n        if labels is not None:\n            text = labels[detection_idx]\n        elif detections[CLASS_NAME_DATA_FIELD] is not None:\n            text = detections[CLASS_NAME_DATA_FIELD][detection_idx]\n        elif detections.class_id is not None:\n            text = str(detections.class_id[detection_idx])\n        else:\n            text = str(detection_idx)\n\n        text_w, text_h = cv2.getTextSize(\n            text=text,\n            fontFace=font,\n            fontScale=self.text_scale,\n            thickness=self.text_thickness,\n        )[0]\n        text_w_padded = text_w + 2 * self.text_padding\n        text_h_padded = text_h + 2 * self.text_padding\n        text_background_xyxy = resolve_text_background_xyxy(\n            center_coordinates=tuple(center_coordinates),\n            text_wh=(text_w_padded, text_h_padded),\n            position=self.text_anchor,\n        )\n\n        text_x = text_background_xyxy[0] + self.text_padding\n        text_y = text_background_xyxy[1] + self.text_padding + text_h\n\n        self.draw_rounded_rectangle(\n            scene=scene,\n            xyxy=text_background_xyxy,\n            color=color.as_bgr(),\n            border_radius=self.border_radius,\n        )\n        cv2.putText(\n            img=scene,\n            text=text,\n            org=(text_x, text_y),\n            fontFace=font,\n            fontScale=self.text_scale,\n            color=self.text_color.as_rgb(),\n            thickness=self.text_thickness,\n            lineType=cv2.LINE_AA,\n        )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.RichLabelAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.RichLabelAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, text_color=Color.WHITE, font_path=None, font_size=10, text_padding=10, text_position=Position.TOP_LEFT, color_lookup=ColorLookup.CLASS, border_radius=0)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color or color palette to use for annotating the text background.

DEFAULT text_color Color

The color to use for the text.

WHITE font_path str

Path to the font file (e.g., \".ttf\" or \".otf\") to use for rendering text. If None, the default PIL font will be used.

None font_size int

Font size for the text.

10 text_padding int

Padding around the text within its background box.

10 text_position Position

Position of the text relative to the detection. Possible values are defined in the Position enum.

TOP_LEFT color_lookup ColorLookup

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS border_radius int

The radius to apply round edges. If the selected value is higher than the lower dimension, width or height, is clipped.

0 Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    text_color: Color = Color.WHITE,\n    font_path: str = None,\n    font_size: int = 10,\n    text_padding: int = 10,\n    text_position: Position = Position.TOP_LEFT,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n    border_radius: int = 0,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color or color palette to use for\n            annotating the text background.\n        text_color (Color): The color to use for the text.\n        font_path (str): Path to the font file (e.g., \".ttf\" or \".otf\") to use for\n            rendering text. If `None`, the default PIL font will be used.\n        font_size (int): Font size for the text.\n        text_padding (int): Padding around the text within its background box.\n        text_position (Position): Position of the text relative to the detection.\n            Possible values are defined in the `Position` enum.\n        color_lookup (ColorLookup): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n        border_radius (int): The radius to apply round edges. If the selected\n            value is higher than the lower dimension, width or height, is clipped.\n    \"\"\"\n    self.color = color\n    self.text_color = text_color\n    self.text_padding = text_padding\n    self.text_anchor = text_position\n    self.color_lookup = color_lookup\n    self.border_radius = border_radius\n    if font_path is not None:\n        try:\n            self.font = ImageFont.truetype(font_path, font_size)\n        except OSError:\n            print(f\"Font path '{font_path}' not found. Using PIL's default font.\")\n            self.font = ImageFont.load_default(size=font_size)\n    else:\n        self.font = ImageFont.load_default(size=font_size)\n
"},{"location":"detection/annotators/#supervision.annotators.core.RichLabelAnnotator.annotate","title":"annotate(scene, detections, labels=None, custom_color_lookup=None)","text":"

Annotates the given scene with labels based on the provided detections, with support for Unicode characters.

Parameters:

Name Type Description Default scene ImageType

The image where labels will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required labels List[str]

Optional. Custom labels for each detection.

None custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nrich_label_annotator = sv.RichLabelAnnotator(font_path=\"path/to/font.ttf\")\nannotated_frame = label_annotator.annotate(\n    scene=image.copy(),\n    detections=detections,\n    labels=labels\n)\n
Source code in supervision/annotators/core.py
def annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    labels: List[str] = None,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene with labels based on the provided\n    detections, with support for Unicode characters.\n\n    Args:\n        scene (ImageType): The image where labels will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        labels (List[str]): Optional. Custom labels for each detection.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        labels = [\n            f\"{class_name} {confidence:.2f}\"\n            for class_name, confidence\n            in zip(detections['class_name'], detections.confidence)\n        ]\n\n        rich_label_annotator = sv.RichLabelAnnotator(font_path=\"path/to/font.ttf\")\n        annotated_frame = label_annotator.annotate(\n            scene=image.copy(),\n            detections=detections,\n            labels=labels\n        )\n        ```\n\n    \"\"\"\n    if isinstance(scene, np.ndarray):\n        scene = Image.fromarray(cv2.cvtColor(scene, cv2.COLOR_BGR2RGB))\n    draw = ImageDraw.Draw(scene)\n    anchors_coordinates = detections.get_anchors_coordinates(\n        anchor=self.text_anchor\n    ).astype(int)\n    if labels is not None and len(labels) != len(detections):\n        raise ValueError(\n            f\"The number of labels provided ({len(labels)}) does not match the \"\n            f\"number of detections ({len(detections)}). Each detection should have \"\n            f\"a corresponding label. This discrepancy can occur if the labels and \"\n            f\"detections are not aligned or if an incorrect number of labels has \"\n            f\"been provided. Please ensure that the labels array has the same \"\n            f\"length as the Detections object.\"\n        )\n    for detection_idx, center_coordinates in enumerate(anchors_coordinates):\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=(\n                self.color_lookup\n                if custom_color_lookup is None\n                else custom_color_lookup\n            ),\n        )\n        if labels is not None:\n            text = labels[detection_idx]\n        elif detections[CLASS_NAME_DATA_FIELD] is not None:\n            text = detections[CLASS_NAME_DATA_FIELD][detection_idx]\n        elif detections.class_id is not None:\n            text = str(detections.class_id[detection_idx])\n        else:\n            text = str(detection_idx)\n\n        left, top, right, bottom = draw.textbbox((0, 0), text, font=self.font)\n        text_width = right - left\n        text_height = bottom - top\n        text_w_padded = text_width + 2 * self.text_padding\n        text_h_padded = text_height + 2 * self.text_padding\n        text_background_xyxy = resolve_text_background_xyxy(\n            center_coordinates=tuple(center_coordinates),\n            text_wh=(text_w_padded, text_h_padded),\n            position=self.text_anchor,\n        )\n\n        text_x = text_background_xyxy[0] + self.text_padding - left\n        text_y = text_background_xyxy[1] + self.text_padding - top\n\n        draw.rounded_rectangle(\n            text_background_xyxy,\n            radius=self.border_radius,\n            fill=color.as_rgb(),\n            outline=None,\n        )\n        draw.text(\n            xy=(text_x, text_y),\n            text=text,\n            font=self.font,\n            fill=self.text_color.as_rgb(),\n        )\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.BlurAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.BlurAnnotator.__init__","title":"__init__(kernel_size=15)","text":"

Parameters:

Name Type Description Default kernel_size int

The size of the average pooling kernel used for blurring.

15 Source code in supervision/annotators/core.py
def __init__(self, kernel_size: int = 15):\n    \"\"\"\n    Args:\n        kernel_size (int): The size of the average pooling kernel used for blurring.\n    \"\"\"\n    self.kernel_size: int = kernel_size\n
"},{"location":"detection/annotators/#supervision.annotators.core.BlurAnnotator.annotate","title":"annotate(scene, detections)","text":"

Annotates the given scene by blurring regions based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where blurring will be applied. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\nblur_annotator = sv.BlurAnnotator()\nannotated_frame = circle_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene by blurring regions based on the provided detections.\n\n    Args:\n        scene (ImageType): The image where blurring will be applied.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        blur_annotator = sv.BlurAnnotator()\n        annotated_frame = circle_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![blur-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/blur-annotator-example-purple.png)\n    \"\"\"\n    image_height, image_width = scene.shape[:2]\n    clipped_xyxy = clip_boxes(\n        xyxy=detections.xyxy, resolution_wh=(image_width, image_height)\n    ).astype(int)\n\n    for x1, y1, x2, y2 in clipped_xyxy:\n        roi = scene[y1:y2, x1:x2]\n        roi = cv2.blur(roi, (self.kernel_size, self.kernel_size))\n        scene[y1:y2, x1:x2] = roi\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.PixelateAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.PixelateAnnotator.__init__","title":"__init__(pixel_size=20)","text":"

Parameters:

Name Type Description Default pixel_size int

The size of the pixelation.

20 Source code in supervision/annotators/core.py
def __init__(self, pixel_size: int = 20):\n    \"\"\"\n    Args:\n        pixel_size (int): The size of the pixelation.\n    \"\"\"\n    self.pixel_size: int = pixel_size\n
"},{"location":"detection/annotators/#supervision.annotators.core.PixelateAnnotator.annotate","title":"annotate(scene, detections)","text":"

Annotates the given scene by pixelating regions based on the provided detections.

Parameters:

Name Type Description Default scene ImageType

The image where pixelating will be applied. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\npixelate_annotator = sv.PixelateAnnotator()\nannotated_frame = pixelate_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n) -> ImageType:\n    \"\"\"\n    Annotates the given scene by pixelating regions based on the provided\n        detections.\n\n    Args:\n        scene (ImageType): The image where pixelating will be applied.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        pixelate_annotator = sv.PixelateAnnotator()\n        annotated_frame = pixelate_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n\n    ![pixelate-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/pixelate-annotator-example-10.png)\n    \"\"\"\n    image_height, image_width = scene.shape[:2]\n    clipped_xyxy = clip_boxes(\n        xyxy=detections.xyxy, resolution_wh=(image_width, image_height)\n    ).astype(int)\n\n    for x1, y1, x2, y2 in clipped_xyxy:\n        roi = scene[y1:y2, x1:x2]\n        scaled_up_roi = cv2.resize(\n            src=roi, dsize=None, fx=1 / self.pixel_size, fy=1 / self.pixel_size\n        )\n        scaled_down_roi = cv2.resize(\n            src=scaled_up_roi,\n            dsize=(roi.shape[1], roi.shape[0]),\n            interpolation=cv2.INTER_NEAREST,\n        )\n\n        scene[y1:y2, x1:x2] = scaled_down_roi\n\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.TraceAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.TraceAnnotator.__init__","title":"__init__(color=ColorPalette.DEFAULT, position=Position.CENTER, trace_length=30, thickness=2, color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default color Union[Color, ColorPalette]

The color to draw the trace, can be a single color or a color palette.

DEFAULT position Position

The position of the trace. Defaults to CENTER.

CENTER trace_length int

The maximum length of the trace in terms of historical points. Defaults to 30.

30 thickness int

The thickness of the trace lines. Defaults to 2.

2 color_lookup str

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    position: Position = Position.CENTER,\n    trace_length: int = 30,\n    thickness: int = 2,\n    color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, ColorPalette]): The color to draw the trace, can be\n            a single color or a color palette.\n        position (Position): The position of the trace.\n            Defaults to `CENTER`.\n        trace_length (int): The maximum length of the trace in terms of historical\n            points. Defaults to `30`.\n        thickness (int): The thickness of the trace lines. Defaults to `2`.\n        color_lookup (str): Strategy for mapping colors to annotations.\n            Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.color: Union[Color, ColorPalette] = color\n    self.trace = Trace(max_size=trace_length, anchor=position)\n    self.thickness = thickness\n    self.color_lookup: ColorLookup = color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.TraceAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Draws trace paths on the frame based on the detection coordinates provided.

Parameters:

Name Type Description Default scene ImageType

The image on which the traces will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

The detections which include coordinates for which the traces will be drawn.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO('yolov8x.pt')\ntrace_annotator = sv.TraceAnnotator()\n\nvideo_info = sv.VideoInfo.from_video_path(video_path='...')\nframes_generator = sv.get_video_frames_generator(source_path='...')\ntracker = sv.ByteTrack()\n\nwith sv.VideoSink(target_path='...', video_info=video_info) as sink:\n   for frame in frames_generator:\n       result = model(frame)[0]\n       detections = sv.Detections.from_ultralytics(result)\n       detections = tracker.update_with_detections(detections)\n       annotated_frame = trace_annotator.annotate(\n           scene=frame.copy(),\n           detections=detections)\n       sink.write_frame(frame=annotated_frame)\n

Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Draws trace paths on the frame based on the detection coordinates provided.\n\n    Args:\n        scene (ImageType): The image on which the traces will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): The detections which include coordinates for\n            which the traces will be drawn.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO('yolov8x.pt')\n        trace_annotator = sv.TraceAnnotator()\n\n        video_info = sv.VideoInfo.from_video_path(video_path='...')\n        frames_generator = sv.get_video_frames_generator(source_path='...')\n        tracker = sv.ByteTrack()\n\n        with sv.VideoSink(target_path='...', video_info=video_info) as sink:\n           for frame in frames_generator:\n               result = model(frame)[0]\n               detections = sv.Detections.from_ultralytics(result)\n               detections = tracker.update_with_detections(detections)\n               annotated_frame = trace_annotator.annotate(\n                   scene=frame.copy(),\n                   detections=detections)\n               sink.write_frame(frame=annotated_frame)\n        ```\n\n    ![trace-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/trace-annotator-example-purple.png)\n    \"\"\"\n    self.trace.put(detections)\n\n    for detection_idx in range(len(detections)):\n        tracker_id = int(detections.tracker_id[detection_idx])\n        color = resolve_color(\n            color=self.color,\n            detections=detections,\n            detection_idx=detection_idx,\n            color_lookup=self.color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        xy = self.trace.get(tracker_id=tracker_id)\n        if len(xy) > 1:\n            scene = cv2.polylines(\n                scene,\n                [xy.astype(np.int32)],\n                False,\n                color=color.as_bgr(),\n                thickness=self.thickness,\n            )\n    return scene\n
"},{"location":"detection/annotators/#supervision.annotators.core.CropAnnotator-functions","title":"Functions","text":""},{"location":"detection/annotators/#supervision.annotators.core.CropAnnotator.__init__","title":"__init__(position=Position.TOP_CENTER, scale_factor=2, border_color=ColorPalette.DEFAULT, border_thickness=2, border_color_lookup=ColorLookup.CLASS)","text":"

Parameters:

Name Type Description Default position Position

The anchor position for placing the cropped and scaled part of the detection in the scene.

TOP_CENTER scale_factor int

The factor by which to scale the cropped image part. A factor of 2, for example, would double the size of the cropped area, allowing for a closer view of the detection.

2 border_color Union[Color, ColorPalette]

The color or color palette to use for annotating border around the cropped area.

DEFAULT border_thickness int

The thickness of the border around the cropped area.

2 border_color_lookup ColorLookup

Strategy for mapping colors to annotations. Options are INDEX, CLASS, TRACK.

CLASS Source code in supervision/annotators/core.py
def __init__(\n    self,\n    position: Position = Position.TOP_CENTER,\n    scale_factor: int = 2,\n    border_color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,\n    border_thickness: int = 2,\n    border_color_lookup: ColorLookup = ColorLookup.CLASS,\n):\n    \"\"\"\n    Args:\n        position (Position): The anchor position for placing the cropped and scaled\n            part of the detection in the scene.\n        scale_factor (int): The factor by which to scale the cropped image part. A\n            factor of 2, for example, would double the size of the cropped area,\n            allowing for a closer view of the detection.\n        border_color (Union[Color, ColorPalette]): The color or color palette to\n            use for annotating border around the cropped area.\n        border_thickness (int): The thickness of the border around the cropped area.\n        border_color_lookup (ColorLookup): Strategy for mapping colors to\n            annotations. Options are `INDEX`, `CLASS`, `TRACK`.\n    \"\"\"\n    self.position: Position = position\n    self.scale_factor: int = scale_factor\n    self.border_color: Union[Color, ColorPalette] = border_color\n    self.border_thickness: int = border_thickness\n    self.border_color_lookup: ColorLookup = border_color_lookup\n
"},{"location":"detection/annotators/#supervision.annotators.core.CropAnnotator.annotate","title":"annotate(scene, detections, custom_color_lookup=None)","text":"

Annotates the provided scene with scaled and cropped parts of the image based on the provided detections. Each detection is cropped from the original scene and scaled according to the annotator's scale factor before being placed back onto the scene at the specified position.

Parameters:

Name Type Description Default scene ImageType

The image where cropped detection will be placed. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required detections Detections

Object detections to annotate.

required custom_color_lookup Optional[ndarray]

Custom color lookup array. Allows to override the default color mapping strategy.

None

Returns:

Type Description ImageType

The annotated image.

Example
import supervision as sv\n\nimage = ...\ndetections = sv.Detections(...)\n\ncrop_annotator = sv.CropAnnotator()\nannotated_frame = crop_annotator.annotate(\n    scene=image.copy(),\n    detections=detections\n)\n
Source code in supervision/annotators/core.py
@convert_for_annotation_method\ndef annotate(\n    self,\n    scene: ImageType,\n    detections: Detections,\n    custom_color_lookup: Optional[np.ndarray] = None,\n) -> ImageType:\n    \"\"\"\n    Annotates the provided scene with scaled and cropped parts of the image based\n    on the provided detections. Each detection is cropped from the original scene\n    and scaled according to the annotator's scale factor before being placed back\n    onto the scene at the specified position.\n\n\n    Args:\n        scene (ImageType): The image where cropped detection will be placed.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray`\n            or `PIL.Image.Image`.\n        detections (Detections): Object detections to annotate.\n        custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.\n            Allows to override the default color mapping strategy.\n\n    Returns:\n        The annotated image.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        detections = sv.Detections(...)\n\n        crop_annotator = sv.CropAnnotator()\n        annotated_frame = crop_annotator.annotate(\n            scene=image.copy(),\n            detections=detections\n        )\n        ```\n    \"\"\"\n    crops = [\n        crop_image(image=scene, xyxy=xyxy) for xyxy in detections.xyxy.astype(int)\n    ]\n    resized_crops = [\n        scale_image(image=crop, scale_factor=self.scale_factor) for crop in crops\n    ]\n    anchors = detections.get_anchors_coordinates(anchor=self.position).astype(int)\n\n    for idx, (resized_crop, anchor) in enumerate(zip(resized_crops, anchors)):\n        crop_wh = resized_crop.shape[1], resized_crop.shape[0]\n        (x1, y1), (x2, y2) = self.calculate_crop_coordinates(\n            anchor=anchor, crop_wh=crop_wh, position=self.position\n        )\n        scene = overlay_image(\n            scene=scene, inserted_image=resized_crop, anchor=(x1, y1)\n        )\n        color = resolve_color(\n            color=self.border_color,\n            detections=detections,\n            detection_idx=idx,\n            color_lookup=self.border_color_lookup\n            if custom_color_lookup is None\n            else custom_color_lookup,\n        )\n        cv2.rectangle(\n            img=scene,\n            pt1=(x1, y1),\n            pt2=(x2, y2),\n            color=color.as_bgr(),\n            thickness=self.border_thickness,\n        )\n\n    return scene\n
"},{"location":"detection/core/","title":"Detections","text":"

The sv.Detections class in the Supervision library standardizes results from various object detection and segmentation models into a consistent format. This class simplifies data manipulation and filtering, providing a uniform API for integration with Supervision trackers, annotators, and tools.

InferenceUltralyticsTransformers

Use sv.Detections.from_inference method, which accepts model results from both detection and segmentation models.

import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n

Use sv.Detections.from_ultralytics method, which accepts model results from both detection and segmentation models.

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n

Use sv.Detections.from_transformers method, which accepts model results from both detection and segmentation models.

import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label)\n

Attributes:

Name Type Description xyxy ndarray

An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]

mask Optional[ndarray]

(Optional[np.ndarray]): An array of shape (n, H, W) containing the segmentation masks.

confidence Optional[ndarray]

An array of shape (n,) containing the confidence scores of the detections.

class_id Optional[ndarray]

An array of shape (n,) containing the class ids of the detections.

tracker_id Optional[ndarray]

An array of shape (n,) containing the tracker ids of the detections.

data Dict[str, Union[ndarray, List]]

A dictionary containing additional data where each key is a string representing the data type, and the value is either a NumPy array or a list of corresponding data.

Source code in supervision/detection/core.py
@dataclass\nclass Detections:\n    \"\"\"\n    The `sv.Detections` class in the Supervision library standardizes results from\n    various object detection and segmentation models into a consistent format. This\n    class simplifies data manipulation and filtering, providing a uniform API for\n    integration with Supervision [trackers](/trackers/), [annotators](/detection/annotators/), and [tools](/detection/tools/line_zone/).\n\n    === \"Inference\"\n\n        Use [`sv.Detections.from_inference`](/detection/core/#supervision.detection.core.Detections.from_inference)\n        method, which accepts model results from both detection and segmentation models.\n\n        ```python\n        import cv2\n        import supervision as sv\n        from inference import get_model\n\n        model = get_model(model_id=\"yolov8n-640\")\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        results = model.infer(image)[0]\n        detections = sv.Detections.from_inference(results)\n        ```\n\n    === \"Ultralytics\"\n\n        Use [`sv.Detections.from_ultralytics`](/detection/core/#supervision.detection.core.Detections.from_ultralytics)\n        method, which accepts model results from both detection and segmentation models.\n\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO(\"yolov8n.pt\")\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        results = model(image)[0]\n        detections = sv.Detections.from_ultralytics(results)\n        ```\n\n    === \"Transformers\"\n\n        Use [`sv.Detections.from_transformers`](/detection/core/#supervision.detection.core.Detections.from_transformers)\n        method, which accepts model results from both detection and segmentation models.\n\n        ```python\n        import torch\n        import supervision as sv\n        from PIL import Image\n        from transformers import DetrImageProcessor, DetrForObjectDetection\n\n        processor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\n        model = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        inputs = processor(images=image, return_tensors=\"pt\")\n\n        with torch.no_grad():\n            outputs = model(**inputs)\n\n        width, height = image.size\n        target_size = torch.tensor([[height, width]])\n        results = processor.post_process_object_detection(\n            outputs=outputs, target_sizes=target_size)[0]\n        detections = sv.Detections.from_transformers(\n            transformers_results=results,\n            id2label=model.config.id2label)\n        ```\n\n    Attributes:\n        xyxy (np.ndarray): An array of shape `(n, 4)` containing\n            the bounding boxes coordinates in format `[x1, y1, x2, y2]`\n        mask: (Optional[np.ndarray]): An array of shape\n            `(n, H, W)` containing the segmentation masks.\n        confidence (Optional[np.ndarray]): An array of shape\n            `(n,)` containing the confidence scores of the detections.\n        class_id (Optional[np.ndarray]): An array of shape\n            `(n,)` containing the class ids of the detections.\n        tracker_id (Optional[np.ndarray]): An array of shape\n            `(n,)` containing the tracker ids of the detections.\n        data (Dict[str, Union[np.ndarray, List]]): A dictionary containing additional\n            data where each key is a string representing the data type, and the value\n            is either a NumPy array or a list of corresponding data.\n    \"\"\"  # noqa: E501 // docs\n\n    xyxy: np.ndarray\n    mask: Optional[np.ndarray] = None\n    confidence: Optional[np.ndarray] = None\n    class_id: Optional[np.ndarray] = None\n    tracker_id: Optional[np.ndarray] = None\n    data: Dict[str, Union[np.ndarray, List]] = field(default_factory=dict)\n\n    def __post_init__(self):\n        validate_detections_fields(\n            xyxy=self.xyxy,\n            mask=self.mask,\n            confidence=self.confidence,\n            class_id=self.class_id,\n            tracker_id=self.tracker_id,\n            data=self.data,\n        )\n\n    def __len__(self):\n        \"\"\"\n        Returns the number of detections in the Detections object.\n        \"\"\"\n        return len(self.xyxy)\n\n    def __iter__(\n        self,\n    ) -> Iterator[\n        Tuple[\n            np.ndarray,\n            Optional[np.ndarray],\n            Optional[float],\n            Optional[int],\n            Optional[int],\n            Dict[str, Union[np.ndarray, List]],\n        ]\n    ]:\n        \"\"\"\n        Iterates over the Detections object and yield a tuple of\n        `(xyxy, mask, confidence, class_id, tracker_id, data)` for each detection.\n        \"\"\"\n        for i in range(len(self.xyxy)):\n            yield (\n                self.xyxy[i],\n                self.mask[i] if self.mask is not None else None,\n                self.confidence[i] if self.confidence is not None else None,\n                self.class_id[i] if self.class_id is not None else None,\n                self.tracker_id[i] if self.tracker_id is not None else None,\n                get_data_item(self.data, i),\n            )\n\n    def __eq__(self, other: Detections):\n        return all(\n            [\n                np.array_equal(self.xyxy, other.xyxy),\n                np.array_equal(self.mask, other.mask),\n                np.array_equal(self.class_id, other.class_id),\n                np.array_equal(self.confidence, other.confidence),\n                np.array_equal(self.tracker_id, other.tracker_id),\n                is_data_equal(self.data, other.data),\n            ]\n        )\n\n    @classmethod\n    def from_yolov5(cls, yolov5_results) -> Detections:\n        \"\"\"\n        Creates a Detections instance from a\n        [YOLOv5](https://github.com/ultralytics/yolov5) inference result.\n\n        Args:\n            yolov5_results (yolov5.models.common.Detections):\n                The output Detections instance from YOLOv5\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import cv2\n            import torch\n            import supervision as sv\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = torch.hub.load('ultralytics/yolov5', 'yolov5s')\n            result = model(image)\n            detections = sv.Detections.from_yolov5(result)\n            ```\n        \"\"\"\n        yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy()\n\n        return cls(\n            xyxy=yolov5_detections_predictions[:, :4],\n            confidence=yolov5_detections_predictions[:, 4],\n            class_id=yolov5_detections_predictions[:, 5].astype(int),\n        )\n\n    @classmethod\n    def from_ultralytics(cls, ultralytics_results) -> Detections:\n        \"\"\"\n        Creates a `sv.Detections` instance from a\n        [YOLOv8](https://github.com/ultralytics/ultralytics) inference result.\n\n        !!! Note\n\n            `from_ultralytics` is compatible with\n            [detection](https://docs.ultralytics.com/tasks/detect/),\n            [segmentation](https://docs.ultralytics.com/tasks/segment/), and\n            [OBB](https://docs.ultralytics.com/tasks/obb/) models.\n\n        Args:\n            ultralytics_results (ultralytics.yolo.engine.results.Results):\n                The output Results instance from Ultralytics\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = YOLO('yolov8s.pt')\n            results = model(image)[0]\n            detections = sv.Detections.from_ultralytics(results)\n            ```\n\n        !!! tip\n\n            Class names values can be accessed using `detections[\"class_name\"]`.\n        \"\"\"  # noqa: E501 // docs\n\n        if hasattr(ultralytics_results, \"obb\") and ultralytics_results.obb is not None:\n            class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int)\n            class_names = np.array([ultralytics_results.names[i] for i in class_id])\n            oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy()\n            return cls(\n                xyxy=ultralytics_results.obb.xyxy.cpu().numpy(),\n                confidence=ultralytics_results.obb.conf.cpu().numpy(),\n                class_id=class_id,\n                tracker_id=ultralytics_results.obb.id.int().cpu().numpy()\n                if ultralytics_results.obb.id is not None\n                else None,\n                data={\n                    ORIENTED_BOX_COORDINATES: oriented_box_coordinates,\n                    CLASS_NAME_DATA_FIELD: class_names,\n                },\n            )\n\n        class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)\n        class_names = np.array([ultralytics_results.names[i] for i in class_id])\n        return cls(\n            xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),\n            confidence=ultralytics_results.boxes.conf.cpu().numpy(),\n            class_id=class_id,\n            mask=extract_ultralytics_masks(ultralytics_results),\n            tracker_id=ultralytics_results.boxes.id.int().cpu().numpy()\n            if ultralytics_results.boxes.id is not None\n            else None,\n            data={CLASS_NAME_DATA_FIELD: class_names},\n        )\n\n    @classmethod\n    def from_yolo_nas(cls, yolo_nas_results) -> Detections:\n        \"\"\"\n        Creates a Detections instance from a\n        [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS.md)\n        inference result.\n\n        Args:\n            yolo_nas_results (ImageDetectionPrediction):\n                The output Results instance from YOLO-NAS\n                ImageDetectionPrediction is coming from\n                'super_gradients.training.models.prediction_results'\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import cv2\n            from super_gradients.training import models\n            import supervision as sv\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = models.get('yolo_nas_l', pretrained_weights=\"coco\")\n\n            result = list(model.predict(image, conf=0.35))[0]\n            detections = sv.Detections.from_yolo_nas(result)\n            ```\n        \"\"\"\n        if np.asarray(yolo_nas_results.prediction.bboxes_xyxy).shape[0] == 0:\n            return cls.empty()\n\n        return cls(\n            xyxy=yolo_nas_results.prediction.bboxes_xyxy,\n            confidence=yolo_nas_results.prediction.confidence,\n            class_id=yolo_nas_results.prediction.labels.astype(int),\n        )\n\n    @classmethod\n    def from_tensorflow(\n        cls, tensorflow_results: dict, resolution_wh: tuple\n    ) -> Detections:\n        \"\"\"\n        Creates a Detections instance from a\n        [Tensorflow Hub](https://www.tensorflow.org/hub/tutorials/tf2_object_detection)\n        inference result.\n\n        Args:\n            tensorflow_results (dict):\n                The output results from Tensorflow Hub.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import tensorflow as tf\n            import tensorflow_hub as hub\n            import numpy as np\n            import cv2\n\n            module_handle = \"https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1\"\n            model = hub.load(module_handle)\n            img = np.array(cv2.imread(SOURCE_IMAGE_PATH))\n            result = model(img)\n            detections = sv.Detections.from_tensorflow(result)\n            ```\n        \"\"\"  # noqa: E501 // docs\n\n        boxes = tensorflow_results[\"detection_boxes\"][0].numpy()\n        boxes[:, [0, 2]] *= resolution_wh[0]\n        boxes[:, [1, 3]] *= resolution_wh[1]\n        boxes = boxes[:, [1, 0, 3, 2]]\n        return cls(\n            xyxy=boxes,\n            confidence=tensorflow_results[\"detection_scores\"][0].numpy(),\n            class_id=tensorflow_results[\"detection_classes\"][0].numpy().astype(int),\n        )\n\n    @classmethod\n    def from_deepsparse(cls, deepsparse_results) -> Detections:\n        \"\"\"\n        Creates a Detections instance from a\n        [DeepSparse](https://github.com/neuralmagic/deepsparse)\n        inference result.\n\n        Args:\n            deepsparse_results (deepsparse.yolo.schemas.YOLOOutput):\n                The output Results instance from DeepSparse.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import supervision as sv\n            from deepsparse import Pipeline\n\n            yolo_pipeline = Pipeline.create(\n                task=\"yolo\",\n                model_path = \"zoo:cv/detection/yolov5-l/pytorch/ultralytics/coco/pruned80_quant-none\"\n             )\n            result = yolo_pipeline(<SOURCE IMAGE PATH>)\n            detections = sv.Detections.from_deepsparse(result)\n            ```\n        \"\"\"  # noqa: E501 // docs\n\n        if np.asarray(deepsparse_results.boxes[0]).shape[0] == 0:\n            return cls.empty()\n\n        return cls(\n            xyxy=np.array(deepsparse_results.boxes[0]),\n            confidence=np.array(deepsparse_results.scores[0]),\n            class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int),\n        )\n\n    @classmethod\n    def from_mmdetection(cls, mmdet_results) -> Detections:\n        \"\"\"\n        Creates a Detections instance from a\n        [mmdetection](https://github.com/open-mmlab/mmdetection) and\n        [mmyolo](https://github.com/open-mmlab/mmyolo) inference result.\n\n        Args:\n            mmdet_results (mmdet.structures.DetDataSample):\n                The output Results instance from MMDetection.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from mmdet.apis import init_detector, inference_detector\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = init_detector(<CONFIG_PATH>, <WEIGHTS_PATH>, device=<DEVICE>)\n\n            result = inference_detector(model, image)\n            detections = sv.Detections.from_mmdetection(result)\n            ```\n        \"\"\"  # noqa: E501 // docs\n\n        return cls(\n            xyxy=mmdet_results.pred_instances.bboxes.cpu().numpy(),\n            confidence=mmdet_results.pred_instances.scores.cpu().numpy(),\n            class_id=mmdet_results.pred_instances.labels.cpu().numpy().astype(int),\n            mask=mmdet_results.pred_instances.masks.cpu().numpy()\n            if \"masks\" in mmdet_results.pred_instances\n            else None,\n        )\n\n    @classmethod\n    def from_transformers(\n        cls, transformers_results: dict, id2label: Optional[Dict[int, str]] = None\n    ) -> Detections:\n        \"\"\"\n        Creates a Detections instance from object detection or segmentation\n        [Transformer](https://github.com/huggingface/transformers) inference result.\n\n        Args:\n            transformers_results (dict): The output of Transformers model inference. A\n                dictionary containing the `scores`, `labels`, `boxes` and `masks` keys.\n            id2label (Optional[Dict[int, str]]): A dictionary mapping class IDs to\n                class names. If provided, the resulting Detections object will contain\n                `class_name` data field with the class names.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import torch\n            import supervision as sv\n            from PIL import Image\n            from transformers import DetrImageProcessor, DetrForObjectDetection\n\n            processor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\n            model = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\n            image = Image.open(<SOURCE_IMAGE_PATH>)\n            inputs = processor(images=image, return_tensors=\"pt\")\n\n            with torch.no_grad():\n                outputs = model(**inputs)\n\n            width, height = image.size\n            target_size = torch.tensor([[height, width]])\n            results = processor.post_process_object_detection(\n                outputs=outputs, target_sizes=target_size)[0]\n\n            detections = sv.Detections.from_transformers(\n                transformers_results=results,\n                id2label=model.config.id2label\n            )\n            ```\n\n        !!! tip\n\n            Class names values can be accessed using `detections[\"class_name\"]`.\n        \"\"\"  # noqa: E501 // docs\n\n        class_ids = transformers_results[\"labels\"].cpu().detach().numpy().astype(int)\n        data = {}\n        if id2label is not None:\n            class_names = np.array([id2label[class_id] for class_id in class_ids])\n            data[CLASS_NAME_DATA_FIELD] = class_names\n        if \"boxes\" in transformers_results:\n            return cls(\n                xyxy=transformers_results[\"boxes\"].cpu().detach().numpy(),\n                confidence=transformers_results[\"scores\"].cpu().detach().numpy(),\n                class_id=class_ids,\n                data=data,\n            )\n        elif \"masks\" in transformers_results:\n            masks = transformers_results[\"masks\"].cpu().detach().numpy().astype(bool)\n            return cls(\n                xyxy=mask_to_xyxy(masks),\n                mask=masks,\n                confidence=transformers_results[\"scores\"].cpu().detach().numpy(),\n                class_id=class_ids,\n                data=data,\n            )\n        else:\n            raise NotImplementedError(\n                \"Only object detection and semantic segmentation results are supported.\"\n            )\n\n    @classmethod\n    def from_detectron2(cls, detectron2_results) -> Detections:\n        \"\"\"\n        Create a Detections object from the\n        [Detectron2](https://github.com/facebookresearch/detectron2) inference result.\n\n        Args:\n            detectron2_results: The output of a\n                Detectron2 model containing instances with prediction data.\n\n        Returns:\n            (Detections): A Detections object containing the bounding boxes,\n                class IDs, and confidences of the predictions.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from detectron2.engine import DefaultPredictor\n            from detectron2.config import get_cfg\n\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            cfg = get_cfg()\n            cfg.merge_from_file(<CONFIG_PATH>)\n            cfg.MODEL.WEIGHTS = <WEIGHTS_PATH>\n            predictor = DefaultPredictor(cfg)\n\n            result = predictor(image)\n            detections = sv.Detections.from_detectron2(result)\n            ```\n        \"\"\"\n\n        return cls(\n            xyxy=detectron2_results[\"instances\"].pred_boxes.tensor.cpu().numpy(),\n            confidence=detectron2_results[\"instances\"].scores.cpu().numpy(),\n            class_id=detectron2_results[\"instances\"]\n            .pred_classes.cpu()\n            .numpy()\n            .astype(int),\n        )\n\n    @classmethod\n    def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections:\n        \"\"\"\n        Create a `sv.Detections` object from the [Roboflow](https://roboflow.com/)\n        API inference result or the [Inference](https://inference.roboflow.com/)\n        package results. This method extracts bounding boxes, class IDs,\n        confidences, and class names from the Roboflow API result and encapsulates\n        them into a Detections object.\n\n        Args:\n            roboflow_result (dict, any): The result from the\n                Roboflow API or Inference package containing predictions.\n\n        Returns:\n            (Detections): A Detections object containing the bounding boxes, class IDs,\n                and confidences of the predictions.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from inference import get_model\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = get_model(model_id=\"yolov8s-640\")\n\n            result = model.infer(image)[0]\n            detections = sv.Detections.from_inference(result)\n            ```\n\n        !!! tip\n\n            Class names values can be accessed using `detections[\"class_name\"]`.\n        \"\"\"\n        with suppress(AttributeError):\n            roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True)\n        xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result(\n            roboflow_result=roboflow_result\n        )\n\n        if np.asarray(xyxy).shape[0] == 0:\n            empty_detection = cls.empty()\n            empty_detection.data = {CLASS_NAME_DATA_FIELD: np.empty(0)}\n            return empty_detection\n\n        return cls(\n            xyxy=xyxy,\n            confidence=confidence,\n            class_id=class_id,\n            mask=masks,\n            tracker_id=trackers,\n            data=data,\n        )\n\n    @classmethod\n    @deprecated(\n        \"`Detections.from_roboflow` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Detections.from_inference` instead.\"\n    )\n    def from_roboflow(cls, roboflow_result: Union[dict, Any]) -> Detections:\n        \"\"\"\n        !!! failure \"Deprecated\"\n\n            `Detections.from_roboflow` is deprecated and will be removed in\n            `supervision-0.22.0`. Use `Detections.from_inference` instead.\n\n        Create a Detections object from the [Roboflow](https://roboflow.com/)\n            API inference result or the [Inference](https://inference.roboflow.com/)\n            package results.\n\n        Args:\n            roboflow_result (dict): The result from the\n                Roboflow API containing predictions.\n\n        Returns:\n            (Detections): A Detections object containing the bounding boxes, class IDs,\n                and confidences of the predictions.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from inference import get_model\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = get_model(model_id=\"yolov8s-640\")\n\n            result = model.infer(image)[0]\n            detections = sv.Detections.from_roboflow(result)\n            ```\n        \"\"\"\n        return cls.from_inference(roboflow_result)\n\n    @classmethod\n    def from_sam(cls, sam_result: List[dict]) -> Detections:\n        \"\"\"\n        Creates a Detections instance from\n        [Segment Anything Model](https://github.com/facebookresearch/segment-anything)\n        inference result.\n\n        Args:\n            sam_result (List[dict]): The output Results instance from SAM\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import supervision as sv\n            from segment_anything import (\n                sam_model_registry,\n                SamAutomaticMaskGenerator\n             )\n\n            sam_model_reg = sam_model_registry[MODEL_TYPE]\n            sam = sam_model_reg(checkpoint=CHECKPOINT_PATH).to(device=DEVICE)\n            mask_generator = SamAutomaticMaskGenerator(sam)\n            sam_result = mask_generator.generate(IMAGE)\n            detections = sv.Detections.from_sam(sam_result=sam_result)\n            ```\n        \"\"\"\n\n        sorted_generated_masks = sorted(\n            sam_result, key=lambda x: x[\"area\"], reverse=True\n        )\n\n        xywh = np.array([mask[\"bbox\"] for mask in sorted_generated_masks])\n        mask = np.array([mask[\"segmentation\"] for mask in sorted_generated_masks])\n\n        if np.asarray(xywh).shape[0] == 0:\n            return cls.empty()\n\n        xyxy = xywh_to_xyxy(boxes_xywh=xywh)\n        return cls(xyxy=xyxy, mask=mask)\n\n    @classmethod\n    def from_azure_analyze_image(\n        cls, azure_result: dict, class_map: Optional[Dict[int, str]] = None\n    ) -> Detections:\n        \"\"\"\n        Creates a Detections instance from [Azure Image Analysis 4.0](\n        https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/\n        concept-object-detection-40).\n\n        Args:\n            azure_result (dict): The result from Azure Image Analysis. It should\n                contain detected objects and their bounding box coordinates.\n            class_map (Optional[Dict[int, str]]): A mapping ofclass IDs (int) to class\n                names (str). If None, a new mapping is created dynamically.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import requests\n            import supervision as sv\n\n            image = open(input, \"rb\").read()\n\n            endpoint = \"https://.cognitiveservices.azure.com/\"\n            subscription_key = \"\"\n\n            headers = {\n                \"Content-Type\": \"application/octet-stream\",\n                \"Ocp-Apim-Subscription-Key\": subscription_key\n             }\n\n            response = requests.post(endpoint,\n                headers=self.headers,\n                data=image\n             ).json()\n\n            detections = sv.Detections.from_azure_analyze_image(response)\n            ```\n        \"\"\"\n        if \"error\" in azure_result:\n            raise ValueError(\n                f'Azure API returned an error {azure_result[\"error\"][\"message\"]}'\n            )\n\n        xyxy, confidences, class_ids = [], [], []\n\n        is_dynamic_mapping = class_map is None\n        if is_dynamic_mapping:\n            class_map = {}\n\n        class_map = {value: key for key, value in class_map.items()}\n\n        for detection in azure_result[\"objectsResult\"][\"values\"]:\n            bbox = detection[\"boundingBox\"]\n\n            tags = detection[\"tags\"]\n\n            x0 = bbox[\"x\"]\n            y0 = bbox[\"y\"]\n            x1 = x0 + bbox[\"w\"]\n            y1 = y0 + bbox[\"h\"]\n\n            for tag in tags:\n                confidence = tag[\"confidence\"]\n                class_name = tag[\"name\"]\n                class_id = class_map.get(class_name, None)\n\n                if is_dynamic_mapping and class_id is None:\n                    class_id = len(class_map)\n                    class_map[class_name] = class_id\n\n                if class_id is not None:\n                    xyxy.append([x0, y0, x1, y1])\n                    confidences.append(confidence)\n                    class_ids.append(class_id)\n\n        if len(xyxy) == 0:\n            return Detections.empty()\n\n        return cls(\n            xyxy=np.array(xyxy),\n            class_id=np.array(class_ids),\n            confidence=np.array(confidences),\n        )\n\n    @classmethod\n    def from_paddledet(cls, paddledet_result) -> Detections:\n        \"\"\"\n        Creates a Detections instance from\n            [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)\n            inference result.\n\n        Args:\n            paddledet_result (List[dict]): The output Results instance from PaddleDet\n\n        Returns:\n            Detections: A new Detections object.\n\n        Example:\n            ```python\n            import supervision as sv\n            import paddle\n            from ppdet.engine import Trainer\n            from ppdet.core.workspace import load_config\n\n            weights = ()\n            config = ()\n\n            cfg = load_config(config)\n            trainer = Trainer(cfg, mode='test')\n            trainer.load_weights(weights)\n\n            paddledet_result = trainer.predict([images])[0]\n\n            detections = sv.Detections.from_paddledet(paddledet_result)\n            ```\n        \"\"\"\n\n        if np.asarray(paddledet_result[\"bbox\"][:, 2:6]).shape[0] == 0:\n            return cls.empty()\n\n        return cls(\n            xyxy=paddledet_result[\"bbox\"][:, 2:6],\n            confidence=paddledet_result[\"bbox\"][:, 1],\n            class_id=paddledet_result[\"bbox\"][:, 0].astype(int),\n        )\n\n    @classmethod\n    def from_lmm(cls, lmm: Union[LMM, str], result: str, **kwargs) -> Detections:\n        \"\"\"\n        Creates a Detections object from the given result string based on the specified\n        Large Multimodal Model (LMM).\n\n        Args:\n            lmm (Union[LMM, str]): The type of LMM (Large Multimodal Model) to use.\n            result (str): The result string containing the detection data.\n            **kwargs: Additional keyword arguments required by the specified LMM.\n\n        Returns:\n            Detections: A new Detections object.\n\n        Raises:\n            ValueError: If the LMM is invalid, required arguments are missing, or\n                disallowed arguments are provided.\n            ValueError: If the specified LMM is not supported.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            paligemma_result = \"<loc0256><loc0256><loc0768><loc0768> cat\"\n            detections = sv.Detections.from_lmm(\n                sv.LMM.PALIGEMMA,\n                paligemma_result,\n                resolution_wh=(1000, 1000),\n                classes=['cat', 'dog']\n            )\n            detections.xyxy\n            # array([[250., 250., 750., 750.]])\n\n            detections.class_id\n            # array([0])\n            ```\n        \"\"\"\n        lmm = validate_lmm_and_kwargs(lmm, kwargs)\n\n        if lmm == LMM.PALIGEMMA:\n            xyxy, class_id, class_name = from_paligemma(result, **kwargs)\n            data = {CLASS_NAME_DATA_FIELD: class_name}\n            return cls(xyxy=xyxy, class_id=class_id, data=data)\n\n        raise ValueError(f\"Unsupported LMM: {lmm}\")\n\n    @classmethod\n    def empty(cls) -> Detections:\n        \"\"\"\n        Create an empty Detections object with no bounding boxes,\n            confidences, or class IDs.\n\n        Returns:\n            (Detections): An empty Detections object.\n\n        Example:\n            ```python\n            from supervision import Detections\n\n            empty_detections = Detections.empty()\n            ```\n        \"\"\"\n        return cls(\n            xyxy=np.empty((0, 4), dtype=np.float32),\n            confidence=np.array([], dtype=np.float32),\n            class_id=np.array([], dtype=int),\n        )\n\n    def is_empty(self) -> bool:\n        \"\"\"\n        Returns `True` if the `Detections` object is considered empty.\n        \"\"\"\n        empty_detections = Detections.empty()\n        empty_detections.data = self.data\n        return self == empty_detections\n\n    @classmethod\n    def merge(cls, detections_list: List[Detections]) -> Detections:\n        \"\"\"\n        Merge a list of Detections objects into a single Detections object.\n\n        This method takes a list of Detections objects and combines their\n        respective fields (`xyxy`, `mask`, `confidence`, `class_id`, and `tracker_id`)\n        into a single Detections object.\n\n        For example, if merging Detections with 3 and 4 detected objects, this method\n        will return a Detections with 7 objects (7 entries in `xyxy`, `mask`, etc).\n\n        !!! Note\n\n            When merging, empty `Detections` objects are ignored.\n\n        Args:\n            detections_list (List[Detections]): A list of Detections objects to merge.\n\n        Returns:\n            (Detections): A single Detections object containing\n                the merged data from the input list.\n\n        Example:\n            ```python\n            import numpy as np\n            import supervision as sv\n\n            detections_1 = sv.Detections(\n                xyxy=np.array([[15, 15, 100, 100], [200, 200, 300, 300]]),\n                class_id=np.array([1, 2]),\n                data={'feature_vector': np.array([0.1, 0.2)])}\n             )\n\n            detections_2 = sv.Detections(\n                xyxy=np.array([[30, 30, 120, 120]]),\n                class_id=np.array([1]),\n                data={'feature_vector': [np.array([0.3])]}\n             )\n\n            merged_detections = Detections.merge([detections_1, detections_2])\n\n            merged_detections.xyxy\n            array([[ 15,  15, 100, 100],\n                   [200, 200, 300, 300],\n                   [ 30,  30, 120, 120]])\n\n            merged_detections.class_id\n            array([1, 2, 1])\n\n            merged_detections.data['feature_vector']\n            array([0.1, 0.2, 0.3])\n            ```\n        \"\"\"\n        detections_list = [\n            detections for detections in detections_list if not detections.is_empty()\n        ]\n\n        if len(detections_list) == 0:\n            return Detections.empty()\n\n        for detections in detections_list:\n            validate_detections_fields(\n                xyxy=detections.xyxy,\n                mask=detections.mask,\n                confidence=detections.confidence,\n                class_id=detections.class_id,\n                tracker_id=detections.tracker_id,\n                data=detections.data,\n            )\n\n        xyxy = np.vstack([d.xyxy for d in detections_list])\n\n        def stack_or_none(name: str):\n            if all(d.__getattribute__(name) is None for d in detections_list):\n                return None\n            if any(d.__getattribute__(name) is None for d in detections_list):\n                raise ValueError(f\"All or none of the '{name}' fields must be None\")\n            return (\n                np.vstack([d.__getattribute__(name) for d in detections_list])\n                if name == \"mask\"\n                else np.hstack([d.__getattribute__(name) for d in detections_list])\n            )\n\n        mask = stack_or_none(\"mask\")\n        confidence = stack_or_none(\"confidence\")\n        class_id = stack_or_none(\"class_id\")\n        tracker_id = stack_or_none(\"tracker_id\")\n\n        data = merge_data([d.data for d in detections_list])\n\n        return cls(\n            xyxy=xyxy,\n            mask=mask,\n            confidence=confidence,\n            class_id=class_id,\n            tracker_id=tracker_id,\n            data=data,\n        )\n\n    def get_anchors_coordinates(self, anchor: Position) -> np.ndarray:\n        \"\"\"\n        Calculates and returns the coordinates of a specific anchor point\n        within the bounding boxes defined by the `xyxy` attribute. The anchor\n        point can be any of the predefined positions in the `Position` enum,\n        such as `CENTER`, `CENTER_LEFT`, `BOTTOM_RIGHT`, etc.\n\n        Args:\n            anchor (Position): An enum specifying the position of the anchor point\n                within the bounding box. Supported positions are defined in the\n                `Position` enum.\n\n        Returns:\n            np.ndarray: An array of shape `(n, 2)`, where `n` is the number of bounding\n                boxes. Each row contains the `[x, y]` coordinates of the specified\n                anchor point for the corresponding bounding box.\n\n        Raises:\n            ValueError: If the provided `anchor` is not supported.\n        \"\"\"\n        if anchor == Position.CENTER:\n            return np.array(\n                [\n                    (self.xyxy[:, 0] + self.xyxy[:, 2]) / 2,\n                    (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n                ]\n            ).transpose()\n        elif anchor == Position.CENTER_OF_MASS:\n            if self.mask is None:\n                raise ValueError(\n                    \"Cannot use `Position.CENTER_OF_MASS` without a detection mask.\"\n                )\n            return calculate_masks_centroids(masks=self.mask)\n        elif anchor == Position.CENTER_LEFT:\n            return np.array(\n                [\n                    self.xyxy[:, 0],\n                    (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n                ]\n            ).transpose()\n        elif anchor == Position.CENTER_RIGHT:\n            return np.array(\n                [\n                    self.xyxy[:, 2],\n                    (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n                ]\n            ).transpose()\n        elif anchor == Position.BOTTOM_CENTER:\n            return np.array(\n                [(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 3]]\n            ).transpose()\n        elif anchor == Position.BOTTOM_LEFT:\n            return np.array([self.xyxy[:, 0], self.xyxy[:, 3]]).transpose()\n        elif anchor == Position.BOTTOM_RIGHT:\n            return np.array([self.xyxy[:, 2], self.xyxy[:, 3]]).transpose()\n        elif anchor == Position.TOP_CENTER:\n            return np.array(\n                [(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 1]]\n            ).transpose()\n        elif anchor == Position.TOP_LEFT:\n            return np.array([self.xyxy[:, 0], self.xyxy[:, 1]]).transpose()\n        elif anchor == Position.TOP_RIGHT:\n            return np.array([self.xyxy[:, 2], self.xyxy[:, 1]]).transpose()\n\n        raise ValueError(f\"{anchor} is not supported.\")\n\n    def __getitem__(\n        self, index: Union[int, slice, List[int], np.ndarray, str]\n    ) -> Union[Detections, List, np.ndarray, None]:\n        \"\"\"\n        Get a subset of the Detections object or access an item from its data field.\n\n        When provided with an integer, slice, list of integers, or a numpy array, this\n        method returns a new Detections object that represents a subset of the original\n        detections. When provided with a string, it accesses the corresponding item in\n        the data dictionary.\n\n        Args:\n            index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,\n                or key to access a subset of the Detections or an item from the data.\n\n        Returns:\n            Union[Detections, Any]: A subset of the Detections object or an item from\n                the data field.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            detections = sv.Detections()\n\n            first_detection = detections[0]\n            first_10_detections = detections[0:10]\n            some_detections = detections[[0, 2, 4]]\n            class_0_detections = detections[detections.class_id == 0]\n            high_confidence_detections = detections[detections.confidence > 0.5]\n\n            feature_vector = detections['feature_vector']\n            ```\n        \"\"\"\n        if isinstance(index, str):\n            return self.data.get(index)\n        if isinstance(index, int):\n            index = [index]\n        return Detections(\n            xyxy=self.xyxy[index],\n            mask=self.mask[index] if self.mask is not None else None,\n            confidence=self.confidence[index] if self.confidence is not None else None,\n            class_id=self.class_id[index] if self.class_id is not None else None,\n            tracker_id=self.tracker_id[index] if self.tracker_id is not None else None,\n            data=get_data_item(self.data, index),\n        )\n\n    def __setitem__(self, key: str, value: Union[np.ndarray, List]):\n        \"\"\"\n        Set a value in the data dictionary of the Detections object.\n\n        Args:\n            key (str): The key in the data dictionary to set.\n            value (Union[np.ndarray, List]): The value to set for the key.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = YOLO('yolov8s.pt')\n\n            result = model(image)[0]\n            detections = sv.Detections.from_ultralytics(result)\n\n            detections['names'] = [\n                 model.model.names[class_id]\n                 for class_id\n                 in detections.class_id\n             ]\n            ```\n        \"\"\"\n        if not isinstance(value, (np.ndarray, list)):\n            raise TypeError(\"Value must be a np.ndarray or a list\")\n\n        if isinstance(value, list):\n            value = np.array(value)\n\n        self.data[key] = value\n\n    @property\n    def area(self) -> np.ndarray:\n        \"\"\"\n        Calculate the area of each detection in the set of object detections.\n        If masks field is defined property returns are of each mask.\n        If only box is given property return area of each box.\n\n        Returns:\n          np.ndarray: An array of floats containing the area of each detection\n            in the format of `(area_1, area_2, , area_n)`,\n            where n is the number of detections.\n        \"\"\"\n        if self.mask is not None:\n            return np.array([np.sum(mask) for mask in self.mask])\n        else:\n            return self.box_area\n\n    @property\n    def box_area(self) -> np.ndarray:\n        \"\"\"\n        Calculate the area of each bounding box in the set of object detections.\n\n        Returns:\n            np.ndarray: An array of floats containing the area of each bounding\n                box in the format of `(area_1, area_2, , area_n)`,\n                where n is the number of detections.\n        \"\"\"\n        return (self.xyxy[:, 3] - self.xyxy[:, 1]) * (self.xyxy[:, 2] - self.xyxy[:, 0])\n\n    def with_nms(\n        self, threshold: float = 0.5, class_agnostic: bool = False\n    ) -> Detections:\n        \"\"\"\n        Performs non-max suppression on detection set. If the detections result\n        from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used.\n\n        Args:\n            threshold (float, optional): The intersection-over-union threshold\n                to use for non-maximum suppression. I'm the lower the value the more\n                restrictive the NMS becomes. Defaults to 0.5.\n            class_agnostic (bool, optional): Whether to perform class-agnostic\n                non-maximum suppression. If True, the class_id of each detection\n                will be ignored. Defaults to False.\n\n        Returns:\n            Detections: A new Detections object containing the subset of detections\n                after non-maximum suppression.\n\n        Raises:\n            AssertionError: If `confidence` is None and class_agnostic is False.\n                If `class_id` is None and class_agnostic is False.\n        \"\"\"\n        if len(self) == 0:\n            return self\n\n        assert (\n            self.confidence is not None\n        ), \"Detections confidence must be given for NMS to be executed.\"\n\n        if class_agnostic:\n            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))\n        else:\n            assert self.class_id is not None, (\n                \"Detections class_id must be given for NMS to be executed. If you\"\n                \" intended to perform class agnostic NMS set class_agnostic=True.\"\n            )\n            predictions = np.hstack(\n                (\n                    self.xyxy,\n                    self.confidence.reshape(-1, 1),\n                    self.class_id.reshape(-1, 1),\n                )\n            )\n\n        if self.mask is not None:\n            indices = mask_non_max_suppression(\n                predictions=predictions, masks=self.mask, iou_threshold=threshold\n            )\n        else:\n            indices = box_non_max_suppression(\n                predictions=predictions, iou_threshold=threshold\n            )\n\n        return self[indices]\n\n    def with_nmm(\n        self, threshold: float = 0.5, class_agnostic: bool = False\n    ) -> Detections:\n        \"\"\"\n        Perform non-maximum merging on the current set of object detections.\n\n        Args:\n            threshold (float, optional): The intersection-over-union threshold\n                to use for non-maximum merging. Defaults to 0.5.\n            class_agnostic (bool, optional): Whether to perform class-agnostic\n                non-maximum merging. If True, the class_id of each detection\n                will be ignored. Defaults to False.\n\n        Returns:\n            Detections: A new Detections object containing the subset of detections\n                after non-maximum merging.\n\n        Raises:\n            AssertionError: If `confidence` is None or `class_id` is None and\n                class_agnostic is False.\n\n        ![non-max-merging](https://media.roboflow.com/supervision-docs/non-max-merging.png){ align=center width=\"800\" }\n        \"\"\"  # noqa: E501 // docs\n        if len(self) == 0:\n            return self\n\n        assert (\n            self.confidence is not None\n        ), \"Detections confidence must be given for NMM to be executed.\"\n\n        if class_agnostic:\n            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))\n        else:\n            assert self.class_id is not None, (\n                \"Detections class_id must be given for NMM to be executed. If you\"\n                \" intended to perform class agnostic NMM set class_agnostic=True.\"\n            )\n            predictions = np.hstack(\n                (\n                    self.xyxy,\n                    self.confidence.reshape(-1, 1),\n                    self.class_id.reshape(-1, 1),\n                )\n            )\n\n        merge_groups = box_non_max_merge(\n            predictions=predictions, iou_threshold=threshold\n        )\n\n        result = []\n        for merge_group in merge_groups:\n            unmerged_detections = [self[i] for i in merge_group]\n            merged_detections = merge_inner_detections_objects(\n                unmerged_detections, threshold\n            )\n            result.append(merged_detections)\n\n        return Detections.merge(result)\n
"},{"location":"detection/core/#supervision.detection.core.Detections-attributes","title":"Attributes","text":""},{"location":"detection/core/#supervision.detection.core.Detections.area","title":"area: np.ndarray property","text":"

Calculate the area of each detection in the set of object detections. If masks field is defined property returns are of each mask. If only box is given property return area of each box.

Returns:

Type Description ndarray

np.ndarray: An array of floats containing the area of each detection in the format of (area_1, area_2, , area_n), where n is the number of detections.

"},{"location":"detection/core/#supervision.detection.core.Detections.box_area","title":"box_area: np.ndarray property","text":"

Calculate the area of each bounding box in the set of object detections.

Returns:

Type Description ndarray

np.ndarray: An array of floats containing the area of each bounding box in the format of (area_1, area_2, , area_n), where n is the number of detections.

"},{"location":"detection/core/#supervision.detection.core.Detections-functions","title":"Functions","text":""},{"location":"detection/core/#supervision.detection.core.Detections.__getitem__","title":"__getitem__(index)","text":"

Get a subset of the Detections object or access an item from its data field.

When provided with an integer, slice, list of integers, or a numpy array, this method returns a new Detections object that represents a subset of the original detections. When provided with a string, it accesses the corresponding item in the data dictionary.

Parameters:

Name Type Description Default index Union[int, slice, List[int], ndarray, str]

The index, indices, or key to access a subset of the Detections or an item from the data.

required

Returns:

Type Description Union[Detections, List, ndarray, None]

Union[Detections, Any]: A subset of the Detections object or an item from the data field.

Example
import supervision as sv\n\ndetections = sv.Detections()\n\nfirst_detection = detections[0]\nfirst_10_detections = detections[0:10]\nsome_detections = detections[[0, 2, 4]]\nclass_0_detections = detections[detections.class_id == 0]\nhigh_confidence_detections = detections[detections.confidence > 0.5]\n\nfeature_vector = detections['feature_vector']\n
Source code in supervision/detection/core.py
def __getitem__(\n    self, index: Union[int, slice, List[int], np.ndarray, str]\n) -> Union[Detections, List, np.ndarray, None]:\n    \"\"\"\n    Get a subset of the Detections object or access an item from its data field.\n\n    When provided with an integer, slice, list of integers, or a numpy array, this\n    method returns a new Detections object that represents a subset of the original\n    detections. When provided with a string, it accesses the corresponding item in\n    the data dictionary.\n\n    Args:\n        index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,\n            or key to access a subset of the Detections or an item from the data.\n\n    Returns:\n        Union[Detections, Any]: A subset of the Detections object or an item from\n            the data field.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        detections = sv.Detections()\n\n        first_detection = detections[0]\n        first_10_detections = detections[0:10]\n        some_detections = detections[[0, 2, 4]]\n        class_0_detections = detections[detections.class_id == 0]\n        high_confidence_detections = detections[detections.confidence > 0.5]\n\n        feature_vector = detections['feature_vector']\n        ```\n    \"\"\"\n    if isinstance(index, str):\n        return self.data.get(index)\n    if isinstance(index, int):\n        index = [index]\n    return Detections(\n        xyxy=self.xyxy[index],\n        mask=self.mask[index] if self.mask is not None else None,\n        confidence=self.confidence[index] if self.confidence is not None else None,\n        class_id=self.class_id[index] if self.class_id is not None else None,\n        tracker_id=self.tracker_id[index] if self.tracker_id is not None else None,\n        data=get_data_item(self.data, index),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.__iter__","title":"__iter__()","text":"

Iterates over the Detections object and yield a tuple of (xyxy, mask, confidence, class_id, tracker_id, data) for each detection.

Source code in supervision/detection/core.py
def __iter__(\n    self,\n) -> Iterator[\n    Tuple[\n        np.ndarray,\n        Optional[np.ndarray],\n        Optional[float],\n        Optional[int],\n        Optional[int],\n        Dict[str, Union[np.ndarray, List]],\n    ]\n]:\n    \"\"\"\n    Iterates over the Detections object and yield a tuple of\n    `(xyxy, mask, confidence, class_id, tracker_id, data)` for each detection.\n    \"\"\"\n    for i in range(len(self.xyxy)):\n        yield (\n            self.xyxy[i],\n            self.mask[i] if self.mask is not None else None,\n            self.confidence[i] if self.confidence is not None else None,\n            self.class_id[i] if self.class_id is not None else None,\n            self.tracker_id[i] if self.tracker_id is not None else None,\n            get_data_item(self.data, i),\n        )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.__len__","title":"__len__()","text":"

Returns the number of detections in the Detections object.

Source code in supervision/detection/core.py
def __len__(self):\n    \"\"\"\n    Returns the number of detections in the Detections object.\n    \"\"\"\n    return len(self.xyxy)\n
"},{"location":"detection/core/#supervision.detection.core.Detections.__setitem__","title":"__setitem__(key, value)","text":"

Set a value in the data dictionary of the Detections object.

Parameters:

Name Type Description Default key str

The key in the data dictionary to set.

required value Union[ndarray, List]

The value to set for the key.

required Example
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8s.pt')\n\nresult = model(image)[0]\ndetections = sv.Detections.from_ultralytics(result)\n\ndetections['names'] = [\n     model.model.names[class_id]\n     for class_id\n     in detections.class_id\n ]\n
Source code in supervision/detection/core.py
def __setitem__(self, key: str, value: Union[np.ndarray, List]):\n    \"\"\"\n    Set a value in the data dictionary of the Detections object.\n\n    Args:\n        key (str): The key in the data dictionary to set.\n        value (Union[np.ndarray, List]): The value to set for the key.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO('yolov8s.pt')\n\n        result = model(image)[0]\n        detections = sv.Detections.from_ultralytics(result)\n\n        detections['names'] = [\n             model.model.names[class_id]\n             for class_id\n             in detections.class_id\n         ]\n        ```\n    \"\"\"\n    if not isinstance(value, (np.ndarray, list)):\n        raise TypeError(\"Value must be a np.ndarray or a list\")\n\n    if isinstance(value, list):\n        value = np.array(value)\n\n    self.data[key] = value\n
"},{"location":"detection/core/#supervision.detection.core.Detections.empty","title":"empty() classmethod","text":"

Create an empty Detections object with no bounding boxes, confidences, or class IDs.

Returns:

Type Description Detections

An empty Detections object.

Example
from supervision import Detections\n\nempty_detections = Detections.empty()\n
Source code in supervision/detection/core.py
@classmethod\ndef empty(cls) -> Detections:\n    \"\"\"\n    Create an empty Detections object with no bounding boxes,\n        confidences, or class IDs.\n\n    Returns:\n        (Detections): An empty Detections object.\n\n    Example:\n        ```python\n        from supervision import Detections\n\n        empty_detections = Detections.empty()\n        ```\n    \"\"\"\n    return cls(\n        xyxy=np.empty((0, 4), dtype=np.float32),\n        confidence=np.array([], dtype=np.float32),\n        class_id=np.array([], dtype=int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_azure_analyze_image","title":"from_azure_analyze_image(azure_result, class_map=None) classmethod","text":"

Creates a Detections instance from Azure Image Analysis 4.0.

Parameters:

Name Type Description Default azure_result dict

The result from Azure Image Analysis. It should contain detected objects and their bounding box coordinates.

required class_map Optional[Dict[int, str]]

A mapping ofclass IDs (int) to class names (str). If None, a new mapping is created dynamically.

None

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import requests\nimport supervision as sv\n\nimage = open(input, \"rb\").read()\n\nendpoint = \"https://.cognitiveservices.azure.com/\"\nsubscription_key = \"\"\n\nheaders = {\n    \"Content-Type\": \"application/octet-stream\",\n    \"Ocp-Apim-Subscription-Key\": subscription_key\n }\n\nresponse = requests.post(endpoint,\n    headers=self.headers,\n    data=image\n ).json()\n\ndetections = sv.Detections.from_azure_analyze_image(response)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_azure_analyze_image(\n    cls, azure_result: dict, class_map: Optional[Dict[int, str]] = None\n) -> Detections:\n    \"\"\"\n    Creates a Detections instance from [Azure Image Analysis 4.0](\n    https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/\n    concept-object-detection-40).\n\n    Args:\n        azure_result (dict): The result from Azure Image Analysis. It should\n            contain detected objects and their bounding box coordinates.\n        class_map (Optional[Dict[int, str]]): A mapping ofclass IDs (int) to class\n            names (str). If None, a new mapping is created dynamically.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import requests\n        import supervision as sv\n\n        image = open(input, \"rb\").read()\n\n        endpoint = \"https://.cognitiveservices.azure.com/\"\n        subscription_key = \"\"\n\n        headers = {\n            \"Content-Type\": \"application/octet-stream\",\n            \"Ocp-Apim-Subscription-Key\": subscription_key\n         }\n\n        response = requests.post(endpoint,\n            headers=self.headers,\n            data=image\n         ).json()\n\n        detections = sv.Detections.from_azure_analyze_image(response)\n        ```\n    \"\"\"\n    if \"error\" in azure_result:\n        raise ValueError(\n            f'Azure API returned an error {azure_result[\"error\"][\"message\"]}'\n        )\n\n    xyxy, confidences, class_ids = [], [], []\n\n    is_dynamic_mapping = class_map is None\n    if is_dynamic_mapping:\n        class_map = {}\n\n    class_map = {value: key for key, value in class_map.items()}\n\n    for detection in azure_result[\"objectsResult\"][\"values\"]:\n        bbox = detection[\"boundingBox\"]\n\n        tags = detection[\"tags\"]\n\n        x0 = bbox[\"x\"]\n        y0 = bbox[\"y\"]\n        x1 = x0 + bbox[\"w\"]\n        y1 = y0 + bbox[\"h\"]\n\n        for tag in tags:\n            confidence = tag[\"confidence\"]\n            class_name = tag[\"name\"]\n            class_id = class_map.get(class_name, None)\n\n            if is_dynamic_mapping and class_id is None:\n                class_id = len(class_map)\n                class_map[class_name] = class_id\n\n            if class_id is not None:\n                xyxy.append([x0, y0, x1, y1])\n                confidences.append(confidence)\n                class_ids.append(class_id)\n\n    if len(xyxy) == 0:\n        return Detections.empty()\n\n    return cls(\n        xyxy=np.array(xyxy),\n        class_id=np.array(class_ids),\n        confidence=np.array(confidences),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_deepsparse","title":"from_deepsparse(deepsparse_results) classmethod","text":"

Creates a Detections instance from a DeepSparse inference result.

Parameters:

Name Type Description Default deepsparse_results YOLOOutput

The output Results instance from DeepSparse.

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import supervision as sv\nfrom deepsparse import Pipeline\n\nyolo_pipeline = Pipeline.create(\n    task=\"yolo\",\n    model_path = \"zoo:cv/detection/yolov5-l/pytorch/ultralytics/coco/pruned80_quant-none\"\n )\nresult = yolo_pipeline(<SOURCE IMAGE PATH>)\ndetections = sv.Detections.from_deepsparse(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_deepsparse(cls, deepsparse_results) -> Detections:\n    \"\"\"\n    Creates a Detections instance from a\n    [DeepSparse](https://github.com/neuralmagic/deepsparse)\n    inference result.\n\n    Args:\n        deepsparse_results (deepsparse.yolo.schemas.YOLOOutput):\n            The output Results instance from DeepSparse.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import supervision as sv\n        from deepsparse import Pipeline\n\n        yolo_pipeline = Pipeline.create(\n            task=\"yolo\",\n            model_path = \"zoo:cv/detection/yolov5-l/pytorch/ultralytics/coco/pruned80_quant-none\"\n         )\n        result = yolo_pipeline(<SOURCE IMAGE PATH>)\n        detections = sv.Detections.from_deepsparse(result)\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    if np.asarray(deepsparse_results.boxes[0]).shape[0] == 0:\n        return cls.empty()\n\n    return cls(\n        xyxy=np.array(deepsparse_results.boxes[0]),\n        confidence=np.array(deepsparse_results.scores[0]),\n        class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_detectron2","title":"from_detectron2(detectron2_results) classmethod","text":"

Create a Detections object from the Detectron2 inference result.

Parameters:

Name Type Description Default detectron2_results

The output of a Detectron2 model containing instances with prediction data.

required

Returns:

Type Description Detections

A Detections object containing the bounding boxes, class IDs, and confidences of the predictions.

Example
import cv2\nimport supervision as sv\nfrom detectron2.engine import DefaultPredictor\nfrom detectron2.config import get_cfg\n\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\ncfg = get_cfg()\ncfg.merge_from_file(<CONFIG_PATH>)\ncfg.MODEL.WEIGHTS = <WEIGHTS_PATH>\npredictor = DefaultPredictor(cfg)\n\nresult = predictor(image)\ndetections = sv.Detections.from_detectron2(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_detectron2(cls, detectron2_results) -> Detections:\n    \"\"\"\n    Create a Detections object from the\n    [Detectron2](https://github.com/facebookresearch/detectron2) inference result.\n\n    Args:\n        detectron2_results: The output of a\n            Detectron2 model containing instances with prediction data.\n\n    Returns:\n        (Detections): A Detections object containing the bounding boxes,\n            class IDs, and confidences of the predictions.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from detectron2.engine import DefaultPredictor\n        from detectron2.config import get_cfg\n\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        cfg = get_cfg()\n        cfg.merge_from_file(<CONFIG_PATH>)\n        cfg.MODEL.WEIGHTS = <WEIGHTS_PATH>\n        predictor = DefaultPredictor(cfg)\n\n        result = predictor(image)\n        detections = sv.Detections.from_detectron2(result)\n        ```\n    \"\"\"\n\n    return cls(\n        xyxy=detectron2_results[\"instances\"].pred_boxes.tensor.cpu().numpy(),\n        confidence=detectron2_results[\"instances\"].scores.cpu().numpy(),\n        class_id=detectron2_results[\"instances\"]\n        .pred_classes.cpu()\n        .numpy()\n        .astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_inference","title":"from_inference(roboflow_result) classmethod","text":"

Create a sv.Detections object from the Roboflow API inference result or the Inference package results. This method extracts bounding boxes, class IDs, confidences, and class names from the Roboflow API result and encapsulates them into a Detections object.

Parameters:

Name Type Description Default roboflow_result (dict, any)

The result from the Roboflow API or Inference package containing predictions.

required

Returns:

Type Description Detections

A Detections object containing the bounding boxes, class IDs, and confidences of the predictions.

Example
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = get_model(model_id=\"yolov8s-640\")\n\nresult = model.infer(image)[0]\ndetections = sv.Detections.from_inference(result)\n

Tip

Class names values can be accessed using detections[\"class_name\"].

Source code in supervision/detection/core.py
@classmethod\ndef from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections:\n    \"\"\"\n    Create a `sv.Detections` object from the [Roboflow](https://roboflow.com/)\n    API inference result or the [Inference](https://inference.roboflow.com/)\n    package results. This method extracts bounding boxes, class IDs,\n    confidences, and class names from the Roboflow API result and encapsulates\n    them into a Detections object.\n\n    Args:\n        roboflow_result (dict, any): The result from the\n            Roboflow API or Inference package containing predictions.\n\n    Returns:\n        (Detections): A Detections object containing the bounding boxes, class IDs,\n            and confidences of the predictions.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from inference import get_model\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = get_model(model_id=\"yolov8s-640\")\n\n        result = model.infer(image)[0]\n        detections = sv.Detections.from_inference(result)\n        ```\n\n    !!! tip\n\n        Class names values can be accessed using `detections[\"class_name\"]`.\n    \"\"\"\n    with suppress(AttributeError):\n        roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True)\n    xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result(\n        roboflow_result=roboflow_result\n    )\n\n    if np.asarray(xyxy).shape[0] == 0:\n        empty_detection = cls.empty()\n        empty_detection.data = {CLASS_NAME_DATA_FIELD: np.empty(0)}\n        return empty_detection\n\n    return cls(\n        xyxy=xyxy,\n        confidence=confidence,\n        class_id=class_id,\n        mask=masks,\n        tracker_id=trackers,\n        data=data,\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_lmm","title":"from_lmm(lmm, result, **kwargs) classmethod","text":"

Creates a Detections object from the given result string based on the specified Large Multimodal Model (LMM).

Parameters:

Name Type Description Default lmm Union[LMM, str]

The type of LMM (Large Multimodal Model) to use.

required result str

The result string containing the detection data.

required **kwargs

Additional keyword arguments required by the specified LMM.

{}

Returns:

Name Type Description Detections Detections

A new Detections object.

Raises:

Type Description ValueError

If the LMM is invalid, required arguments are missing, or disallowed arguments are provided.

ValueError

If the specified LMM is not supported.

Examples:

import supervision as sv\n\npaligemma_result = \"<loc0256><loc0256><loc0768><loc0768> cat\"\ndetections = sv.Detections.from_lmm(\n    sv.LMM.PALIGEMMA,\n    paligemma_result,\n    resolution_wh=(1000, 1000),\n    classes=['cat', 'dog']\n)\ndetections.xyxy\n# array([[250., 250., 750., 750.]])\n\ndetections.class_id\n# array([0])\n
Source code in supervision/detection/core.py
@classmethod\ndef from_lmm(cls, lmm: Union[LMM, str], result: str, **kwargs) -> Detections:\n    \"\"\"\n    Creates a Detections object from the given result string based on the specified\n    Large Multimodal Model (LMM).\n\n    Args:\n        lmm (Union[LMM, str]): The type of LMM (Large Multimodal Model) to use.\n        result (str): The result string containing the detection data.\n        **kwargs: Additional keyword arguments required by the specified LMM.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Raises:\n        ValueError: If the LMM is invalid, required arguments are missing, or\n            disallowed arguments are provided.\n        ValueError: If the specified LMM is not supported.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        paligemma_result = \"<loc0256><loc0256><loc0768><loc0768> cat\"\n        detections = sv.Detections.from_lmm(\n            sv.LMM.PALIGEMMA,\n            paligemma_result,\n            resolution_wh=(1000, 1000),\n            classes=['cat', 'dog']\n        )\n        detections.xyxy\n        # array([[250., 250., 750., 750.]])\n\n        detections.class_id\n        # array([0])\n        ```\n    \"\"\"\n    lmm = validate_lmm_and_kwargs(lmm, kwargs)\n\n    if lmm == LMM.PALIGEMMA:\n        xyxy, class_id, class_name = from_paligemma(result, **kwargs)\n        data = {CLASS_NAME_DATA_FIELD: class_name}\n        return cls(xyxy=xyxy, class_id=class_id, data=data)\n\n    raise ValueError(f\"Unsupported LMM: {lmm}\")\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_mmdetection","title":"from_mmdetection(mmdet_results) classmethod","text":"

Creates a Detections instance from a mmdetection and mmyolo inference result.

Parameters:

Name Type Description Default mmdet_results DetDataSample

The output Results instance from MMDetection.

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import cv2\nimport supervision as sv\nfrom mmdet.apis import init_detector, inference_detector\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = init_detector(<CONFIG_PATH>, <WEIGHTS_PATH>, device=<DEVICE>)\n\nresult = inference_detector(model, image)\ndetections = sv.Detections.from_mmdetection(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_mmdetection(cls, mmdet_results) -> Detections:\n    \"\"\"\n    Creates a Detections instance from a\n    [mmdetection](https://github.com/open-mmlab/mmdetection) and\n    [mmyolo](https://github.com/open-mmlab/mmyolo) inference result.\n\n    Args:\n        mmdet_results (mmdet.structures.DetDataSample):\n            The output Results instance from MMDetection.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from mmdet.apis import init_detector, inference_detector\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = init_detector(<CONFIG_PATH>, <WEIGHTS_PATH>, device=<DEVICE>)\n\n        result = inference_detector(model, image)\n        detections = sv.Detections.from_mmdetection(result)\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    return cls(\n        xyxy=mmdet_results.pred_instances.bboxes.cpu().numpy(),\n        confidence=mmdet_results.pred_instances.scores.cpu().numpy(),\n        class_id=mmdet_results.pred_instances.labels.cpu().numpy().astype(int),\n        mask=mmdet_results.pred_instances.masks.cpu().numpy()\n        if \"masks\" in mmdet_results.pred_instances\n        else None,\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_paddledet","title":"from_paddledet(paddledet_result) classmethod","text":"

Creates a Detections instance from PaddleDetection inference result.

Parameters:

Name Type Description Default paddledet_result List[dict]

The output Results instance from PaddleDet

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import supervision as sv\nimport paddle\nfrom ppdet.engine import Trainer\nfrom ppdet.core.workspace import load_config\n\nweights = ()\nconfig = ()\n\ncfg = load_config(config)\ntrainer = Trainer(cfg, mode='test')\ntrainer.load_weights(weights)\n\npaddledet_result = trainer.predict([images])[0]\n\ndetections = sv.Detections.from_paddledet(paddledet_result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_paddledet(cls, paddledet_result) -> Detections:\n    \"\"\"\n    Creates a Detections instance from\n        [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)\n        inference result.\n\n    Args:\n        paddledet_result (List[dict]): The output Results instance from PaddleDet\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import supervision as sv\n        import paddle\n        from ppdet.engine import Trainer\n        from ppdet.core.workspace import load_config\n\n        weights = ()\n        config = ()\n\n        cfg = load_config(config)\n        trainer = Trainer(cfg, mode='test')\n        trainer.load_weights(weights)\n\n        paddledet_result = trainer.predict([images])[0]\n\n        detections = sv.Detections.from_paddledet(paddledet_result)\n        ```\n    \"\"\"\n\n    if np.asarray(paddledet_result[\"bbox\"][:, 2:6]).shape[0] == 0:\n        return cls.empty()\n\n    return cls(\n        xyxy=paddledet_result[\"bbox\"][:, 2:6],\n        confidence=paddledet_result[\"bbox\"][:, 1],\n        class_id=paddledet_result[\"bbox\"][:, 0].astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_roboflow","title":"from_roboflow(roboflow_result) classmethod","text":"

Deprecated

Detections.from_roboflow is deprecated and will be removed in supervision-0.22.0. Use Detections.from_inference instead.

Create a Detections object from the Roboflow API inference result or the Inference package results.

Parameters:

Name Type Description Default roboflow_result dict

The result from the Roboflow API containing predictions.

required

Returns:

Type Description Detections

A Detections object containing the bounding boxes, class IDs, and confidences of the predictions.

Example
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = get_model(model_id=\"yolov8s-640\")\n\nresult = model.infer(image)[0]\ndetections = sv.Detections.from_roboflow(result)\n
Source code in supervision/detection/core.py
@classmethod\n@deprecated(\n    \"`Detections.from_roboflow` is deprecated and will be removed in \"\n    \"`supervision-0.22.0`. Use `Detections.from_inference` instead.\"\n)\ndef from_roboflow(cls, roboflow_result: Union[dict, Any]) -> Detections:\n    \"\"\"\n    !!! failure \"Deprecated\"\n\n        `Detections.from_roboflow` is deprecated and will be removed in\n        `supervision-0.22.0`. Use `Detections.from_inference` instead.\n\n    Create a Detections object from the [Roboflow](https://roboflow.com/)\n        API inference result or the [Inference](https://inference.roboflow.com/)\n        package results.\n\n    Args:\n        roboflow_result (dict): The result from the\n            Roboflow API containing predictions.\n\n    Returns:\n        (Detections): A Detections object containing the bounding boxes, class IDs,\n            and confidences of the predictions.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from inference import get_model\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = get_model(model_id=\"yolov8s-640\")\n\n        result = model.infer(image)[0]\n        detections = sv.Detections.from_roboflow(result)\n        ```\n    \"\"\"\n    return cls.from_inference(roboflow_result)\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_sam","title":"from_sam(sam_result) classmethod","text":"

Creates a Detections instance from Segment Anything Model inference result.

Parameters:

Name Type Description Default sam_result List[dict]

The output Results instance from SAM

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import supervision as sv\nfrom segment_anything import (\n    sam_model_registry,\n    SamAutomaticMaskGenerator\n )\n\nsam_model_reg = sam_model_registry[MODEL_TYPE]\nsam = sam_model_reg(checkpoint=CHECKPOINT_PATH).to(device=DEVICE)\nmask_generator = SamAutomaticMaskGenerator(sam)\nsam_result = mask_generator.generate(IMAGE)\ndetections = sv.Detections.from_sam(sam_result=sam_result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_sam(cls, sam_result: List[dict]) -> Detections:\n    \"\"\"\n    Creates a Detections instance from\n    [Segment Anything Model](https://github.com/facebookresearch/segment-anything)\n    inference result.\n\n    Args:\n        sam_result (List[dict]): The output Results instance from SAM\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import supervision as sv\n        from segment_anything import (\n            sam_model_registry,\n            SamAutomaticMaskGenerator\n         )\n\n        sam_model_reg = sam_model_registry[MODEL_TYPE]\n        sam = sam_model_reg(checkpoint=CHECKPOINT_PATH).to(device=DEVICE)\n        mask_generator = SamAutomaticMaskGenerator(sam)\n        sam_result = mask_generator.generate(IMAGE)\n        detections = sv.Detections.from_sam(sam_result=sam_result)\n        ```\n    \"\"\"\n\n    sorted_generated_masks = sorted(\n        sam_result, key=lambda x: x[\"area\"], reverse=True\n    )\n\n    xywh = np.array([mask[\"bbox\"] for mask in sorted_generated_masks])\n    mask = np.array([mask[\"segmentation\"] for mask in sorted_generated_masks])\n\n    if np.asarray(xywh).shape[0] == 0:\n        return cls.empty()\n\n    xyxy = xywh_to_xyxy(boxes_xywh=xywh)\n    return cls(xyxy=xyxy, mask=mask)\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_tensorflow","title":"from_tensorflow(tensorflow_results, resolution_wh) classmethod","text":"

Creates a Detections instance from a Tensorflow Hub inference result.

Parameters:

Name Type Description Default tensorflow_results dict

The output results from Tensorflow Hub.

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import tensorflow as tf\nimport tensorflow_hub as hub\nimport numpy as np\nimport cv2\n\nmodule_handle = \"https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1\"\nmodel = hub.load(module_handle)\nimg = np.array(cv2.imread(SOURCE_IMAGE_PATH))\nresult = model(img)\ndetections = sv.Detections.from_tensorflow(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_tensorflow(\n    cls, tensorflow_results: dict, resolution_wh: tuple\n) -> Detections:\n    \"\"\"\n    Creates a Detections instance from a\n    [Tensorflow Hub](https://www.tensorflow.org/hub/tutorials/tf2_object_detection)\n    inference result.\n\n    Args:\n        tensorflow_results (dict):\n            The output results from Tensorflow Hub.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import tensorflow as tf\n        import tensorflow_hub as hub\n        import numpy as np\n        import cv2\n\n        module_handle = \"https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1\"\n        model = hub.load(module_handle)\n        img = np.array(cv2.imread(SOURCE_IMAGE_PATH))\n        result = model(img)\n        detections = sv.Detections.from_tensorflow(result)\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    boxes = tensorflow_results[\"detection_boxes\"][0].numpy()\n    boxes[:, [0, 2]] *= resolution_wh[0]\n    boxes[:, [1, 3]] *= resolution_wh[1]\n    boxes = boxes[:, [1, 0, 3, 2]]\n    return cls(\n        xyxy=boxes,\n        confidence=tensorflow_results[\"detection_scores\"][0].numpy(),\n        class_id=tensorflow_results[\"detection_classes\"][0].numpy().astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_transformers","title":"from_transformers(transformers_results, id2label=None) classmethod","text":"

Creates a Detections instance from object detection or segmentation Transformer inference result.

Parameters:

Name Type Description Default transformers_results dict

The output of Transformers model inference. A dictionary containing the scores, labels, boxes and masks keys.

required id2label Optional[Dict[int, str]]

A dictionary mapping class IDs to class names. If provided, the resulting Detections object will contain class_name data field with the class names.

None

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\n\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label\n)\n

Tip

Class names values can be accessed using detections[\"class_name\"].

Source code in supervision/detection/core.py
@classmethod\ndef from_transformers(\n    cls, transformers_results: dict, id2label: Optional[Dict[int, str]] = None\n) -> Detections:\n    \"\"\"\n    Creates a Detections instance from object detection or segmentation\n    [Transformer](https://github.com/huggingface/transformers) inference result.\n\n    Args:\n        transformers_results (dict): The output of Transformers model inference. A\n            dictionary containing the `scores`, `labels`, `boxes` and `masks` keys.\n        id2label (Optional[Dict[int, str]]): A dictionary mapping class IDs to\n            class names. If provided, the resulting Detections object will contain\n            `class_name` data field with the class names.\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import torch\n        import supervision as sv\n        from PIL import Image\n        from transformers import DetrImageProcessor, DetrForObjectDetection\n\n        processor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\n        model = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        inputs = processor(images=image, return_tensors=\"pt\")\n\n        with torch.no_grad():\n            outputs = model(**inputs)\n\n        width, height = image.size\n        target_size = torch.tensor([[height, width]])\n        results = processor.post_process_object_detection(\n            outputs=outputs, target_sizes=target_size)[0]\n\n        detections = sv.Detections.from_transformers(\n            transformers_results=results,\n            id2label=model.config.id2label\n        )\n        ```\n\n    !!! tip\n\n        Class names values can be accessed using `detections[\"class_name\"]`.\n    \"\"\"  # noqa: E501 // docs\n\n    class_ids = transformers_results[\"labels\"].cpu().detach().numpy().astype(int)\n    data = {}\n    if id2label is not None:\n        class_names = np.array([id2label[class_id] for class_id in class_ids])\n        data[CLASS_NAME_DATA_FIELD] = class_names\n    if \"boxes\" in transformers_results:\n        return cls(\n            xyxy=transformers_results[\"boxes\"].cpu().detach().numpy(),\n            confidence=transformers_results[\"scores\"].cpu().detach().numpy(),\n            class_id=class_ids,\n            data=data,\n        )\n    elif \"masks\" in transformers_results:\n        masks = transformers_results[\"masks\"].cpu().detach().numpy().astype(bool)\n        return cls(\n            xyxy=mask_to_xyxy(masks),\n            mask=masks,\n            confidence=transformers_results[\"scores\"].cpu().detach().numpy(),\n            class_id=class_ids,\n            data=data,\n        )\n    else:\n        raise NotImplementedError(\n            \"Only object detection and semantic segmentation results are supported.\"\n        )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_ultralytics","title":"from_ultralytics(ultralytics_results) classmethod","text":"

Creates a sv.Detections instance from a YOLOv8 inference result.

Note

from_ultralytics is compatible with detection, segmentation, and OBB models.

Parameters:

Name Type Description Default ultralytics_results Results

The output Results instance from Ultralytics

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8s.pt')\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n

Tip

Class names values can be accessed using detections[\"class_name\"].

Source code in supervision/detection/core.py
@classmethod\ndef from_ultralytics(cls, ultralytics_results) -> Detections:\n    \"\"\"\n    Creates a `sv.Detections` instance from a\n    [YOLOv8](https://github.com/ultralytics/ultralytics) inference result.\n\n    !!! Note\n\n        `from_ultralytics` is compatible with\n        [detection](https://docs.ultralytics.com/tasks/detect/),\n        [segmentation](https://docs.ultralytics.com/tasks/segment/), and\n        [OBB](https://docs.ultralytics.com/tasks/obb/) models.\n\n    Args:\n        ultralytics_results (ultralytics.yolo.engine.results.Results):\n            The output Results instance from Ultralytics\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO('yolov8s.pt')\n        results = model(image)[0]\n        detections = sv.Detections.from_ultralytics(results)\n        ```\n\n    !!! tip\n\n        Class names values can be accessed using `detections[\"class_name\"]`.\n    \"\"\"  # noqa: E501 // docs\n\n    if hasattr(ultralytics_results, \"obb\") and ultralytics_results.obb is not None:\n        class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int)\n        class_names = np.array([ultralytics_results.names[i] for i in class_id])\n        oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy()\n        return cls(\n            xyxy=ultralytics_results.obb.xyxy.cpu().numpy(),\n            confidence=ultralytics_results.obb.conf.cpu().numpy(),\n            class_id=class_id,\n            tracker_id=ultralytics_results.obb.id.int().cpu().numpy()\n            if ultralytics_results.obb.id is not None\n            else None,\n            data={\n                ORIENTED_BOX_COORDINATES: oriented_box_coordinates,\n                CLASS_NAME_DATA_FIELD: class_names,\n            },\n        )\n\n    class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)\n    class_names = np.array([ultralytics_results.names[i] for i in class_id])\n    return cls(\n        xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),\n        confidence=ultralytics_results.boxes.conf.cpu().numpy(),\n        class_id=class_id,\n        mask=extract_ultralytics_masks(ultralytics_results),\n        tracker_id=ultralytics_results.boxes.id.int().cpu().numpy()\n        if ultralytics_results.boxes.id is not None\n        else None,\n        data={CLASS_NAME_DATA_FIELD: class_names},\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_yolo_nas","title":"from_yolo_nas(yolo_nas_results) classmethod","text":"

Creates a Detections instance from a YOLO-NAS inference result.

Parameters:

Name Type Description Default yolo_nas_results ImageDetectionPrediction

The output Results instance from YOLO-NAS ImageDetectionPrediction is coming from 'super_gradients.training.models.prediction_results'

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import cv2\nfrom super_gradients.training import models\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = models.get('yolo_nas_l', pretrained_weights=\"coco\")\n\nresult = list(model.predict(image, conf=0.35))[0]\ndetections = sv.Detections.from_yolo_nas(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_yolo_nas(cls, yolo_nas_results) -> Detections:\n    \"\"\"\n    Creates a Detections instance from a\n    [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS.md)\n    inference result.\n\n    Args:\n        yolo_nas_results (ImageDetectionPrediction):\n            The output Results instance from YOLO-NAS\n            ImageDetectionPrediction is coming from\n            'super_gradients.training.models.prediction_results'\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import cv2\n        from super_gradients.training import models\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = models.get('yolo_nas_l', pretrained_weights=\"coco\")\n\n        result = list(model.predict(image, conf=0.35))[0]\n        detections = sv.Detections.from_yolo_nas(result)\n        ```\n    \"\"\"\n    if np.asarray(yolo_nas_results.prediction.bboxes_xyxy).shape[0] == 0:\n        return cls.empty()\n\n    return cls(\n        xyxy=yolo_nas_results.prediction.bboxes_xyxy,\n        confidence=yolo_nas_results.prediction.confidence,\n        class_id=yolo_nas_results.prediction.labels.astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.from_yolov5","title":"from_yolov5(yolov5_results) classmethod","text":"

Creates a Detections instance from a YOLOv5 inference result.

Parameters:

Name Type Description Default yolov5_results Detections

The output Detections instance from YOLOv5

required

Returns:

Name Type Description Detections Detections

A new Detections object.

Example
import cv2\nimport torch\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = torch.hub.load('ultralytics/yolov5', 'yolov5s')\nresult = model(image)\ndetections = sv.Detections.from_yolov5(result)\n
Source code in supervision/detection/core.py
@classmethod\ndef from_yolov5(cls, yolov5_results) -> Detections:\n    \"\"\"\n    Creates a Detections instance from a\n    [YOLOv5](https://github.com/ultralytics/yolov5) inference result.\n\n    Args:\n        yolov5_results (yolov5.models.common.Detections):\n            The output Detections instance from YOLOv5\n\n    Returns:\n        Detections: A new Detections object.\n\n    Example:\n        ```python\n        import cv2\n        import torch\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = torch.hub.load('ultralytics/yolov5', 'yolov5s')\n        result = model(image)\n        detections = sv.Detections.from_yolov5(result)\n        ```\n    \"\"\"\n    yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy()\n\n    return cls(\n        xyxy=yolov5_detections_predictions[:, :4],\n        confidence=yolov5_detections_predictions[:, 4],\n        class_id=yolov5_detections_predictions[:, 5].astype(int),\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.get_anchors_coordinates","title":"get_anchors_coordinates(anchor)","text":"

Calculates and returns the coordinates of a specific anchor point within the bounding boxes defined by the xyxy attribute. The anchor point can be any of the predefined positions in the Position enum, such as CENTER, CENTER_LEFT, BOTTOM_RIGHT, etc.

Parameters:

Name Type Description Default anchor Position

An enum specifying the position of the anchor point within the bounding box. Supported positions are defined in the Position enum.

required

Returns:

Type Description ndarray

np.ndarray: An array of shape (n, 2), where n is the number of bounding boxes. Each row contains the [x, y] coordinates of the specified anchor point for the corresponding bounding box.

Raises:

Type Description ValueError

If the provided anchor is not supported.

Source code in supervision/detection/core.py
def get_anchors_coordinates(self, anchor: Position) -> np.ndarray:\n    \"\"\"\n    Calculates and returns the coordinates of a specific anchor point\n    within the bounding boxes defined by the `xyxy` attribute. The anchor\n    point can be any of the predefined positions in the `Position` enum,\n    such as `CENTER`, `CENTER_LEFT`, `BOTTOM_RIGHT`, etc.\n\n    Args:\n        anchor (Position): An enum specifying the position of the anchor point\n            within the bounding box. Supported positions are defined in the\n            `Position` enum.\n\n    Returns:\n        np.ndarray: An array of shape `(n, 2)`, where `n` is the number of bounding\n            boxes. Each row contains the `[x, y]` coordinates of the specified\n            anchor point for the corresponding bounding box.\n\n    Raises:\n        ValueError: If the provided `anchor` is not supported.\n    \"\"\"\n    if anchor == Position.CENTER:\n        return np.array(\n            [\n                (self.xyxy[:, 0] + self.xyxy[:, 2]) / 2,\n                (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n            ]\n        ).transpose()\n    elif anchor == Position.CENTER_OF_MASS:\n        if self.mask is None:\n            raise ValueError(\n                \"Cannot use `Position.CENTER_OF_MASS` without a detection mask.\"\n            )\n        return calculate_masks_centroids(masks=self.mask)\n    elif anchor == Position.CENTER_LEFT:\n        return np.array(\n            [\n                self.xyxy[:, 0],\n                (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n            ]\n        ).transpose()\n    elif anchor == Position.CENTER_RIGHT:\n        return np.array(\n            [\n                self.xyxy[:, 2],\n                (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,\n            ]\n        ).transpose()\n    elif anchor == Position.BOTTOM_CENTER:\n        return np.array(\n            [(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 3]]\n        ).transpose()\n    elif anchor == Position.BOTTOM_LEFT:\n        return np.array([self.xyxy[:, 0], self.xyxy[:, 3]]).transpose()\n    elif anchor == Position.BOTTOM_RIGHT:\n        return np.array([self.xyxy[:, 2], self.xyxy[:, 3]]).transpose()\n    elif anchor == Position.TOP_CENTER:\n        return np.array(\n            [(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 1]]\n        ).transpose()\n    elif anchor == Position.TOP_LEFT:\n        return np.array([self.xyxy[:, 0], self.xyxy[:, 1]]).transpose()\n    elif anchor == Position.TOP_RIGHT:\n        return np.array([self.xyxy[:, 2], self.xyxy[:, 1]]).transpose()\n\n    raise ValueError(f\"{anchor} is not supported.\")\n
"},{"location":"detection/core/#supervision.detection.core.Detections.is_empty","title":"is_empty()","text":"

Returns True if the Detections object is considered empty.

Source code in supervision/detection/core.py
def is_empty(self) -> bool:\n    \"\"\"\n    Returns `True` if the `Detections` object is considered empty.\n    \"\"\"\n    empty_detections = Detections.empty()\n    empty_detections.data = self.data\n    return self == empty_detections\n
"},{"location":"detection/core/#supervision.detection.core.Detections.merge","title":"merge(detections_list) classmethod","text":"

Merge a list of Detections objects into a single Detections object.

This method takes a list of Detections objects and combines their respective fields (xyxy, mask, confidence, class_id, and tracker_id) into a single Detections object.

For example, if merging Detections with 3 and 4 detected objects, this method will return a Detections with 7 objects (7 entries in xyxy, mask, etc).

Note

When merging, empty Detections objects are ignored.

Parameters:

Name Type Description Default detections_list List[Detections]

A list of Detections objects to merge.

required

Returns:

Type Description Detections

A single Detections object containing the merged data from the input list.

Example
import numpy as np\nimport supervision as sv\n\ndetections_1 = sv.Detections(\n    xyxy=np.array([[15, 15, 100, 100], [200, 200, 300, 300]]),\n    class_id=np.array([1, 2]),\n    data={'feature_vector': np.array([0.1, 0.2)])}\n )\n\ndetections_2 = sv.Detections(\n    xyxy=np.array([[30, 30, 120, 120]]),\n    class_id=np.array([1]),\n    data={'feature_vector': [np.array([0.3])]}\n )\n\nmerged_detections = Detections.merge([detections_1, detections_2])\n\nmerged_detections.xyxy\narray([[ 15,  15, 100, 100],\n       [200, 200, 300, 300],\n       [ 30,  30, 120, 120]])\n\nmerged_detections.class_id\narray([1, 2, 1])\n\nmerged_detections.data['feature_vector']\narray([0.1, 0.2, 0.3])\n
Source code in supervision/detection/core.py
@classmethod\ndef merge(cls, detections_list: List[Detections]) -> Detections:\n    \"\"\"\n    Merge a list of Detections objects into a single Detections object.\n\n    This method takes a list of Detections objects and combines their\n    respective fields (`xyxy`, `mask`, `confidence`, `class_id`, and `tracker_id`)\n    into a single Detections object.\n\n    For example, if merging Detections with 3 and 4 detected objects, this method\n    will return a Detections with 7 objects (7 entries in `xyxy`, `mask`, etc).\n\n    !!! Note\n\n        When merging, empty `Detections` objects are ignored.\n\n    Args:\n        detections_list (List[Detections]): A list of Detections objects to merge.\n\n    Returns:\n        (Detections): A single Detections object containing\n            the merged data from the input list.\n\n    Example:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        detections_1 = sv.Detections(\n            xyxy=np.array([[15, 15, 100, 100], [200, 200, 300, 300]]),\n            class_id=np.array([1, 2]),\n            data={'feature_vector': np.array([0.1, 0.2)])}\n         )\n\n        detections_2 = sv.Detections(\n            xyxy=np.array([[30, 30, 120, 120]]),\n            class_id=np.array([1]),\n            data={'feature_vector': [np.array([0.3])]}\n         )\n\n        merged_detections = Detections.merge([detections_1, detections_2])\n\n        merged_detections.xyxy\n        array([[ 15,  15, 100, 100],\n               [200, 200, 300, 300],\n               [ 30,  30, 120, 120]])\n\n        merged_detections.class_id\n        array([1, 2, 1])\n\n        merged_detections.data['feature_vector']\n        array([0.1, 0.2, 0.3])\n        ```\n    \"\"\"\n    detections_list = [\n        detections for detections in detections_list if not detections.is_empty()\n    ]\n\n    if len(detections_list) == 0:\n        return Detections.empty()\n\n    for detections in detections_list:\n        validate_detections_fields(\n            xyxy=detections.xyxy,\n            mask=detections.mask,\n            confidence=detections.confidence,\n            class_id=detections.class_id,\n            tracker_id=detections.tracker_id,\n            data=detections.data,\n        )\n\n    xyxy = np.vstack([d.xyxy for d in detections_list])\n\n    def stack_or_none(name: str):\n        if all(d.__getattribute__(name) is None for d in detections_list):\n            return None\n        if any(d.__getattribute__(name) is None for d in detections_list):\n            raise ValueError(f\"All or none of the '{name}' fields must be None\")\n        return (\n            np.vstack([d.__getattribute__(name) for d in detections_list])\n            if name == \"mask\"\n            else np.hstack([d.__getattribute__(name) for d in detections_list])\n        )\n\n    mask = stack_or_none(\"mask\")\n    confidence = stack_or_none(\"confidence\")\n    class_id = stack_or_none(\"class_id\")\n    tracker_id = stack_or_none(\"tracker_id\")\n\n    data = merge_data([d.data for d in detections_list])\n\n    return cls(\n        xyxy=xyxy,\n        mask=mask,\n        confidence=confidence,\n        class_id=class_id,\n        tracker_id=tracker_id,\n        data=data,\n    )\n
"},{"location":"detection/core/#supervision.detection.core.Detections.with_nmm","title":"with_nmm(threshold=0.5, class_agnostic=False)","text":"

Perform non-maximum merging on the current set of object detections.

Parameters:

Name Type Description Default threshold float

The intersection-over-union threshold to use for non-maximum merging. Defaults to 0.5.

0.5 class_agnostic bool

Whether to perform class-agnostic non-maximum merging. If True, the class_id of each detection will be ignored. Defaults to False.

False

Returns:

Name Type Description Detections Detections

A new Detections object containing the subset of detections after non-maximum merging.

Raises:

Type Description AssertionError

If confidence is None or class_id is None and class_agnostic is False.

Source code in supervision/detection/core.py
def with_nmm(\n    self, threshold: float = 0.5, class_agnostic: bool = False\n) -> Detections:\n    \"\"\"\n    Perform non-maximum merging on the current set of object detections.\n\n    Args:\n        threshold (float, optional): The intersection-over-union threshold\n            to use for non-maximum merging. Defaults to 0.5.\n        class_agnostic (bool, optional): Whether to perform class-agnostic\n            non-maximum merging. If True, the class_id of each detection\n            will be ignored. Defaults to False.\n\n    Returns:\n        Detections: A new Detections object containing the subset of detections\n            after non-maximum merging.\n\n    Raises:\n        AssertionError: If `confidence` is None or `class_id` is None and\n            class_agnostic is False.\n\n    ![non-max-merging](https://media.roboflow.com/supervision-docs/non-max-merging.png){ align=center width=\"800\" }\n    \"\"\"  # noqa: E501 // docs\n    if len(self) == 0:\n        return self\n\n    assert (\n        self.confidence is not None\n    ), \"Detections confidence must be given for NMM to be executed.\"\n\n    if class_agnostic:\n        predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))\n    else:\n        assert self.class_id is not None, (\n            \"Detections class_id must be given for NMM to be executed. If you\"\n            \" intended to perform class agnostic NMM set class_agnostic=True.\"\n        )\n        predictions = np.hstack(\n            (\n                self.xyxy,\n                self.confidence.reshape(-1, 1),\n                self.class_id.reshape(-1, 1),\n            )\n        )\n\n    merge_groups = box_non_max_merge(\n        predictions=predictions, iou_threshold=threshold\n    )\n\n    result = []\n    for merge_group in merge_groups:\n        unmerged_detections = [self[i] for i in merge_group]\n        merged_detections = merge_inner_detections_objects(\n            unmerged_detections, threshold\n        )\n        result.append(merged_detections)\n\n    return Detections.merge(result)\n
"},{"location":"detection/core/#supervision.detection.core.Detections.with_nms","title":"with_nms(threshold=0.5, class_agnostic=False)","text":"

Performs non-max suppression on detection set. If the detections result from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used.

Parameters:

Name Type Description Default threshold float

The intersection-over-union threshold to use for non-maximum suppression. I'm the lower the value the more restrictive the NMS becomes. Defaults to 0.5.

0.5 class_agnostic bool

Whether to perform class-agnostic non-maximum suppression. If True, the class_id of each detection will be ignored. Defaults to False.

False

Returns:

Name Type Description Detections Detections

A new Detections object containing the subset of detections after non-maximum suppression.

Raises:

Type Description AssertionError

If confidence is None and class_agnostic is False. If class_id is None and class_agnostic is False.

Source code in supervision/detection/core.py
def with_nms(\n    self, threshold: float = 0.5, class_agnostic: bool = False\n) -> Detections:\n    \"\"\"\n    Performs non-max suppression on detection set. If the detections result\n    from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used.\n\n    Args:\n        threshold (float, optional): The intersection-over-union threshold\n            to use for non-maximum suppression. I'm the lower the value the more\n            restrictive the NMS becomes. Defaults to 0.5.\n        class_agnostic (bool, optional): Whether to perform class-agnostic\n            non-maximum suppression. If True, the class_id of each detection\n            will be ignored. Defaults to False.\n\n    Returns:\n        Detections: A new Detections object containing the subset of detections\n            after non-maximum suppression.\n\n    Raises:\n        AssertionError: If `confidence` is None and class_agnostic is False.\n            If `class_id` is None and class_agnostic is False.\n    \"\"\"\n    if len(self) == 0:\n        return self\n\n    assert (\n        self.confidence is not None\n    ), \"Detections confidence must be given for NMS to be executed.\"\n\n    if class_agnostic:\n        predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))\n    else:\n        assert self.class_id is not None, (\n            \"Detections class_id must be given for NMS to be executed. If you\"\n            \" intended to perform class agnostic NMS set class_agnostic=True.\"\n        )\n        predictions = np.hstack(\n            (\n                self.xyxy,\n                self.confidence.reshape(-1, 1),\n                self.class_id.reshape(-1, 1),\n            )\n        )\n\n    if self.mask is not None:\n        indices = mask_non_max_suppression(\n            predictions=predictions, masks=self.mask, iou_threshold=threshold\n        )\n    else:\n        indices = box_non_max_suppression(\n            predictions=predictions, iou_threshold=threshold\n        )\n\n    return self[indices]\n
"},{"location":"detection/double_detection_filter/","title":"Double Detection Filter","text":"OverlapFilter

Bases: Enum

Enum specifying the strategy for filtering overlapping detections.

Attributes:

Name Type Description NONE

Do not filter detections based on overlap.

NON_MAX_SUPPRESSION

Filter detections using non-max suppression. This means, detections that overlap by more than a set threshold will be discarded, except for the one with the highest confidence.

NON_MAX_MERGE

Merge detections with non-max merging. This means, detections that overlap by more than a set threshold will be merged into a single detection.

Source code in supervision/detection/overlap_filter.py
class OverlapFilter(Enum):\n    \"\"\"\n    Enum specifying the strategy for filtering overlapping detections.\n\n    Attributes:\n        NONE: Do not filter detections based on overlap.\n        NON_MAX_SUPPRESSION: Filter detections using non-max suppression. This means,\n            detections that overlap by more than a set threshold will be discarded,\n            except for the one with the highest confidence.\n        NON_MAX_MERGE: Merge detections with non-max merging. This means,\n            detections that overlap by more than a set threshold will be merged\n            into a single detection.\n    \"\"\"\n\n    NONE = \"none\"\n    NON_MAX_SUPPRESSION = \"non_max_suppression\"\n    NON_MAX_MERGE = \"non_max_merge\"\n
box_non_max_suppression

Perform Non-Maximum Suppression (NMS) on object detection predictions.

Parameters:

Name Type Description Default predictions ndarray

An array of object detection predictions in the format of (x_min, y_min, x_max, y_max, score) or (x_min, y_min, x_max, y_max, score, class).

required iou_threshold float

The intersection-over-union threshold to use for non-maximum suppression.

0.5

Returns:

Type Description ndarray

np.ndarray: A boolean array indicating which predictions to keep after n on-maximum suppression.

Raises:

Type Description AssertionError

If iou_threshold is not within the closed range from 0 to 1.

Source code in supervision/detection/overlap_filter.py
def box_non_max_suppression(\n    predictions: np.ndarray, iou_threshold: float = 0.5\n) -> np.ndarray:\n    \"\"\"\n    Perform Non-Maximum Suppression (NMS) on object detection predictions.\n\n    Args:\n        predictions (np.ndarray): An array of object detection predictions in\n            the format of `(x_min, y_min, x_max, y_max, score)`\n            or `(x_min, y_min, x_max, y_max, score, class)`.\n        iou_threshold (float, optional): The intersection-over-union threshold\n            to use for non-maximum suppression.\n\n    Returns:\n        np.ndarray: A boolean array indicating which predictions to keep after n\n            on-maximum suppression.\n\n    Raises:\n        AssertionError: If `iou_threshold` is not within the\n            closed range from `0` to `1`.\n    \"\"\"\n    assert 0 <= iou_threshold <= 1, (\n        \"Value of `iou_threshold` must be in the closed range from 0 to 1, \"\n        f\"{iou_threshold} given.\"\n    )\n    rows, columns = predictions.shape\n\n    # add column #5 - category filled with zeros for agnostic nms\n    if columns == 5:\n        predictions = np.c_[predictions, np.zeros(rows)]\n\n    # sort predictions column #4 - score\n    sort_index = np.flip(predictions[:, 4].argsort())\n    predictions = predictions[sort_index]\n\n    boxes = predictions[:, :4]\n    categories = predictions[:, 5]\n    ious = box_iou_batch(boxes, boxes)\n    ious = ious - np.eye(rows)\n\n    keep = np.ones(rows, dtype=bool)\n\n    for index, (iou, category) in enumerate(zip(ious, categories)):\n        if not keep[index]:\n            continue\n\n        # drop detections with iou > iou_threshold and\n        # same category as current detections\n        condition = (iou > iou_threshold) & (categories == category)\n        keep = keep & ~condition\n\n    return keep[sort_index.argsort()]\n
mask_non_max_suppression

Perform Non-Maximum Suppression (NMS) on segmentation predictions.

Parameters:

Name Type Description Default predictions ndarray

A 2D array of object detection predictions in the format of (x_min, y_min, x_max, y_max, score) or (x_min, y_min, x_max, y_max, score, class). Shape: (N, 5) or (N, 6), where N is the number of predictions.

required masks ndarray

A 3D array of binary masks corresponding to the predictions. Shape: (N, H, W), where N is the number of predictions, and H, W are the dimensions of each mask.

required iou_threshold float

The intersection-over-union threshold to use for non-maximum suppression.

0.5 mask_dimension int

The dimension to which the masks should be resized before computing IOU values. Defaults to 640.

640

Returns:

Type Description ndarray

np.ndarray: A boolean array indicating which predictions to keep after non-maximum suppression.

Raises:

Type Description AssertionError

If iou_threshold is not within the closed

Source code in supervision/detection/overlap_filter.py
def mask_non_max_suppression(\n    predictions: np.ndarray,\n    masks: np.ndarray,\n    iou_threshold: float = 0.5,\n    mask_dimension: int = 640,\n) -> np.ndarray:\n    \"\"\"\n    Perform Non-Maximum Suppression (NMS) on segmentation predictions.\n\n    Args:\n        predictions (np.ndarray): A 2D array of object detection predictions in\n            the format of `(x_min, y_min, x_max, y_max, score)`\n            or `(x_min, y_min, x_max, y_max, score, class)`. Shape: `(N, 5)` or\n            `(N, 6)`, where N is the number of predictions.\n        masks (np.ndarray): A 3D array of binary masks corresponding to the predictions.\n            Shape: `(N, H, W)`, where N is the number of predictions, and H, W are the\n            dimensions of each mask.\n        iou_threshold (float, optional): The intersection-over-union threshold\n            to use for non-maximum suppression.\n        mask_dimension (int, optional): The dimension to which the masks should be\n            resized before computing IOU values. Defaults to 640.\n\n    Returns:\n        np.ndarray: A boolean array indicating which predictions to keep after\n            non-maximum suppression.\n\n    Raises:\n        AssertionError: If `iou_threshold` is not within the closed\n        range from `0` to `1`.\n    \"\"\"\n    assert 0 <= iou_threshold <= 1, (\n        \"Value of `iou_threshold` must be in the closed range from 0 to 1, \"\n        f\"{iou_threshold} given.\"\n    )\n    rows, columns = predictions.shape\n\n    if columns == 5:\n        predictions = np.c_[predictions, np.zeros(rows)]\n\n    sort_index = predictions[:, 4].argsort()[::-1]\n    predictions = predictions[sort_index]\n    masks = masks[sort_index]\n    masks_resized = resize_masks(masks, mask_dimension)\n    ious = mask_iou_batch(masks_resized, masks_resized)\n    categories = predictions[:, 5]\n\n    keep = np.ones(rows, dtype=bool)\n    for i in range(rows):\n        if keep[i]:\n            condition = (ious[i] > iou_threshold) & (categories[i] == categories)\n            keep[i + 1 :] = np.where(condition[i + 1 :], False, keep[i + 1 :])\n\n    return keep[sort_index.argsort()]\n
box_non_max_merge

Apply greedy version of non-maximum merging per category to avoid detecting too many overlapping bounding boxes for a given object.

Parameters:

Name Type Description Default predictions NDArray[float64]

An array of shape (n, 5) or (n, 6) containing the bounding boxes coordinates in format [x1, y1, x2, y2], the confidence scores and class_ids. Omit class_id column to allow detections of different classes to be merged.

required iou_threshold float

The intersection-over-union threshold to use for non-maximum suppression. Defaults to 0.5.

0.5

Returns:

Type Description List[List[int]]

List[List[int]]: Groups of prediction indices be merged. Each group may have 1 or more elements.

Source code in supervision/detection/overlap_filter.py
def box_non_max_merge(\n    predictions: npt.NDArray[np.float64],\n    iou_threshold: float = 0.5,\n) -> List[List[int]]:\n    \"\"\"\n    Apply greedy version of non-maximum merging per category to avoid detecting\n    too many overlapping bounding boxes for a given object.\n\n    Args:\n        predictions (npt.NDArray[np.float64]): An array of shape `(n, 5)` or `(n, 6)`\n            containing the bounding boxes coordinates in format `[x1, y1, x2, y2]`,\n            the confidence scores and class_ids. Omit class_id column to allow\n            detections of different classes to be merged.\n        iou_threshold (float, optional): The intersection-over-union threshold\n            to use for non-maximum suppression. Defaults to 0.5.\n\n    Returns:\n        List[List[int]]: Groups of prediction indices be merged.\n            Each group may have 1 or more elements.\n    \"\"\"\n    if predictions.shape[1] == 5:\n        return group_overlapping_boxes(predictions, iou_threshold)\n\n    category_ids = predictions[:, 5]\n    merge_groups = []\n    for category_id in np.unique(category_ids):\n        curr_indices = np.where(category_ids == category_id)[0]\n        merge_class_groups = group_overlapping_boxes(\n            predictions[curr_indices], iou_threshold\n        )\n\n        for merge_class_group in merge_class_groups:\n            merge_groups.append(curr_indices[merge_class_group].tolist())\n\n    for merge_group in merge_groups:\n        if len(merge_group) == 0:\n            raise ValueError(\n                f\"Empty group detected when non-max-merging \"\n                f\"detections: {merge_groups}\"\n            )\n    return merge_groups\n
"},{"location":"detection/metrics/","title":"Metrics","text":"ConfusionMatrix

Confusion matrix for object detection tasks.

Attributes:

Name Type Description matrix ndarray

An 2D np.ndarray of shape (len(classes) + 1, len(classes) + 1) containing the number of TP, FP, FN and TN for each class.

classes List[str]

Model class names.

conf_threshold float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded from the matrix.

iou_threshold float

Detection IoU threshold between 0 and 1. Detections with lower IoU will be classified as FP.

Source code in supervision/metrics/detection.py
@dataclass\nclass ConfusionMatrix:\n    \"\"\"\n    Confusion matrix for object detection tasks.\n\n    Attributes:\n        matrix (np.ndarray): An 2D `np.ndarray` of shape\n            `(len(classes) + 1, len(classes) + 1)`\n            containing the number of `TP`, `FP`, `FN` and `TN` for each class.\n        classes (List[str]): Model class names.\n        conf_threshold (float): Detection confidence threshold between `0` and `1`.\n            Detections with lower confidence will be excluded from the matrix.\n        iou_threshold (float): Detection IoU threshold between `0` and `1`.\n            Detections with lower IoU will be classified as `FP`.\n    \"\"\"\n\n    matrix: np.ndarray\n    classes: List[str]\n    conf_threshold: float\n    iou_threshold: float\n\n    @classmethod\n    def from_detections(\n        cls,\n        predictions: List[Detections],\n        targets: List[Detections],\n        classes: List[str],\n        conf_threshold: float = 0.3,\n        iou_threshold: float = 0.5,\n    ) -> ConfusionMatrix:\n        \"\"\"\n        Calculate confusion matrix based on predicted and ground-truth detections.\n\n        Args:\n            targets (List[Detections]): Detections objects from ground-truth.\n            predictions (List[Detections]): Detections objects predicted by the model.\n            classes (List[str]): Model class names.\n            conf_threshold (float): Detection confidence threshold between `0` and `1`.\n                Detections with lower confidence will be excluded.\n            iou_threshold (float): Detection IoU threshold between `0` and `1`.\n                Detections with lower IoU will be classified as `FP`.\n\n        Returns:\n            ConfusionMatrix: New instance of ConfusionMatrix.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            targets = [\n                sv.Detections(...),\n                sv.Detections(...)\n            ]\n\n            predictions = [\n                sv.Detections(...),\n                sv.Detections(...)\n            ]\n\n            confusion_matrix = sv.ConfusionMatrix.from_detections(\n                predictions=predictions,\n                targets=target,\n                classes=['person', ...]\n            )\n\n            print(confusion_matrix.matrix)\n            # np.array([\n            #    [0., 0., 0., 0.],\n            #    [0., 1., 0., 1.],\n            #    [0., 1., 1., 0.],\n            #    [1., 1., 0., 0.]\n            # ])\n            ```\n        \"\"\"\n\n        prediction_tensors = []\n        target_tensors = []\n        for prediction, target in zip(predictions, targets):\n            prediction_tensors.append(\n                detections_to_tensor(prediction, with_confidence=True)\n            )\n            target_tensors.append(detections_to_tensor(target, with_confidence=False))\n        return cls.from_tensors(\n            predictions=prediction_tensors,\n            targets=target_tensors,\n            classes=classes,\n            conf_threshold=conf_threshold,\n            iou_threshold=iou_threshold,\n        )\n\n    @classmethod\n    def from_tensors(\n        cls,\n        predictions: List[np.ndarray],\n        targets: List[np.ndarray],\n        classes: List[str],\n        conf_threshold: float = 0.3,\n        iou_threshold: float = 0.5,\n    ) -> ConfusionMatrix:\n        \"\"\"\n        Calculate confusion matrix based on predicted and ground-truth detections.\n\n        Args:\n            predictions (List[np.ndarray]): Each element of the list describes a single\n                image and has `shape = (M, 6)` where `M` is the number of detected\n                objects. Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class, conf)` format.\n            targets (List[np.ndarray]): Each element of the list describes a single\n                image and has `shape = (N, 5)` where `N` is the number of\n                ground-truth objects. Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class)` format.\n            classes (List[str]): Model class names.\n            conf_threshold (float): Detection confidence threshold between `0` and `1`.\n                Detections with lower confidence will be excluded.\n            iou_threshold (float): Detection iou  threshold between `0` and `1`.\n                Detections with lower iou will be classified as `FP`.\n\n        Returns:\n            ConfusionMatrix: New instance of ConfusionMatrix.\n\n        Example:\n            ```python\n            import supervision as sv\n            import numpy as np\n\n            targets = (\n                [\n                    np.array(\n                        [\n                            [0.0, 0.0, 3.0, 3.0, 1],\n                            [2.0, 2.0, 5.0, 5.0, 1],\n                            [6.0, 1.0, 8.0, 3.0, 2],\n                        ]\n                    ),\n                    np.array([1.0, 1.0, 2.0, 2.0, 2]),\n                ]\n            )\n\n            predictions = [\n                np.array(\n                    [\n                        [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n                        [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n                        [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n                        [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n                    ]\n                ),\n                np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n            ]\n\n            confusion_matrix = sv.ConfusionMatrix.from_tensors(\n                predictions=predictions,\n                targets=targets,\n                classes=['person', ...]\n            )\n\n            print(confusion_matrix.matrix)\n            # np.array([\n            #     [0., 0., 0., 0.],\n            #     [0., 1., 0., 1.],\n            #     [0., 1., 1., 0.],\n            #     [1., 1., 0., 0.]\n            # ])\n            ```\n        \"\"\"\n        validate_input_tensors(predictions, targets)\n\n        num_classes = len(classes)\n        matrix = np.zeros((num_classes + 1, num_classes + 1))\n        for true_batch, detection_batch in zip(targets, predictions):\n            matrix += cls.evaluate_detection_batch(\n                predictions=detection_batch,\n                targets=true_batch,\n                num_classes=num_classes,\n                conf_threshold=conf_threshold,\n                iou_threshold=iou_threshold,\n            )\n        return cls(\n            matrix=matrix,\n            classes=classes,\n            conf_threshold=conf_threshold,\n            iou_threshold=iou_threshold,\n        )\n\n    @staticmethod\n    def evaluate_detection_batch(\n        predictions: np.ndarray,\n        targets: np.ndarray,\n        num_classes: int,\n        conf_threshold: float,\n        iou_threshold: float,\n    ) -> np.ndarray:\n        \"\"\"\n        Calculate confusion matrix for a batch of detections for a single image.\n\n        Args:\n            predictions (np.ndarray): Batch prediction. Describes a single image and\n                has `shape = (M, 6)` where `M` is the number of detected objects.\n                Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class, conf)` format.\n            targets (np.ndarray): Batch target labels. Describes a single image and\n                has `shape = (N, 5)` where `N` is the number of ground-truth objects.\n                Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class)` format.\n            num_classes (int): Number of classes.\n            conf_threshold (float): Detection confidence threshold between `0` and `1`.\n                Detections with lower confidence will be excluded.\n            iou_threshold (float): Detection iou  threshold between `0` and `1`.\n                Detections with lower iou will be classified as `FP`.\n\n        Returns:\n            np.ndarray: Confusion matrix based on a single image.\n        \"\"\"\n        result_matrix = np.zeros((num_classes + 1, num_classes + 1))\n\n        conf_idx = 5\n        confidence = predictions[:, conf_idx]\n        detection_batch_filtered = predictions[confidence > conf_threshold]\n\n        class_id_idx = 4\n        true_classes = np.array(targets[:, class_id_idx], dtype=np.int16)\n        detection_classes = np.array(\n            detection_batch_filtered[:, class_id_idx], dtype=np.int16\n        )\n        true_boxes = targets[:, :class_id_idx]\n        detection_boxes = detection_batch_filtered[:, :class_id_idx]\n\n        iou_batch = box_iou_batch(\n            boxes_true=true_boxes, boxes_detection=detection_boxes\n        )\n        matched_idx = np.asarray(iou_batch > iou_threshold).nonzero()\n\n        if matched_idx[0].shape[0]:\n            matches = np.stack(\n                (matched_idx[0], matched_idx[1], iou_batch[matched_idx]), axis=1\n            )\n            matches = ConfusionMatrix._drop_extra_matches(matches=matches)\n        else:\n            matches = np.zeros((0, 3))\n\n        matched_true_idx, matched_detection_idx, _ = matches.transpose().astype(\n            np.int16\n        )\n\n        for i, true_class_value in enumerate(true_classes):\n            j = matched_true_idx == i\n            if matches.shape[0] > 0 and sum(j) == 1:\n                result_matrix[\n                    true_class_value, detection_classes[matched_detection_idx[j]]\n                ] += 1  # TP\n            else:\n                result_matrix[true_class_value, num_classes] += 1  # FN\n\n        for i, detection_class_value in enumerate(detection_classes):\n            if not any(matched_detection_idx == i):\n                result_matrix[num_classes, detection_class_value] += 1  # FP\n\n        return result_matrix\n\n    @staticmethod\n    def _drop_extra_matches(matches: np.ndarray) -> np.ndarray:\n        \"\"\"\n        Deduplicate matches. If there are multiple matches for the same true or\n        predicted box, only the one with the highest IoU is kept.\n        \"\"\"\n        if matches.shape[0] > 0:\n            matches = matches[matches[:, 2].argsort()[::-1]]\n            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]\n            matches = matches[matches[:, 2].argsort()[::-1]]\n            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]\n        return matches\n\n    @classmethod\n    def benchmark(\n        cls,\n        dataset: DetectionDataset,\n        callback: Callable[[np.ndarray], Detections],\n        conf_threshold: float = 0.3,\n        iou_threshold: float = 0.5,\n    ) -> ConfusionMatrix:\n        \"\"\"\n        Calculate confusion matrix from dataset and callback function.\n\n        Args:\n            dataset (DetectionDataset): Object detection dataset used for evaluation.\n            callback (Callable[[np.ndarray], Detections]): Function that takes an image\n                as input and returns Detections object.\n            conf_threshold (float): Detection confidence threshold between `0` and `1`.\n                Detections with lower confidence will be excluded.\n            iou_threshold (float): Detection IoU threshold between `0` and `1`.\n                Detections with lower IoU will be classified as `FP`.\n\n        Returns:\n            ConfusionMatrix: New instance of ConfusionMatrix.\n\n        Example:\n            ```python\n            import supervision as sv\n            from ultralytics import YOLO\n\n            dataset = sv.DetectionDataset.from_yolo(...)\n\n            model = YOLO(...)\n            def callback(image: np.ndarray) -> sv.Detections:\n                result = model(image)[0]\n                return sv.Detections.from_ultralytics(result)\n\n            confusion_matrix = sv.ConfusionMatrix.benchmark(\n                dataset = dataset,\n                callback = callback\n            )\n\n            print(confusion_matrix.matrix)\n            # np.array([\n            #     [0., 0., 0., 0.],\n            #     [0., 1., 0., 1.],\n            #     [0., 1., 1., 0.],\n            #     [1., 1., 0., 0.]\n            # ])\n            ```\n        \"\"\"\n        predictions, targets = [], []\n        for img_name, img in dataset.images.items():\n            predictions_batch = callback(img)\n            predictions.append(predictions_batch)\n            targets_batch = dataset.annotations[img_name]\n            targets.append(targets_batch)\n        return cls.from_detections(\n            predictions=predictions,\n            targets=targets,\n            classes=dataset.classes,\n            conf_threshold=conf_threshold,\n            iou_threshold=iou_threshold,\n        )\n\n    def plot(\n        self,\n        save_path: Optional[str] = None,\n        title: Optional[str] = None,\n        classes: Optional[List[str]] = None,\n        normalize: bool = False,\n        fig_size: Tuple[int, int] = (12, 10),\n    ) -> matplotlib.figure.Figure:\n        \"\"\"\n        Create confusion matrix plot and save it at selected location.\n\n        Args:\n            save_path (Optional[str]): Path to save the plot. If not provided,\n                plot will be displayed.\n            title (Optional[str]): Title of the plot.\n            classes (Optional[List[str]]): List of classes to be displayed on the plot.\n                If not provided, all classes will be displayed.\n            normalize (bool): If True, normalize the confusion matrix.\n            fig_size (Tuple[int, int]): Size of the plot.\n\n        Returns:\n            matplotlib.figure.Figure: Confusion matrix plot.\n        \"\"\"\n\n        array = self.matrix.copy()\n\n        if normalize:\n            eps = 1e-8\n            array = array / (array.sum(0).reshape(1, -1) + eps)\n\n        array[array < 0.005] = np.nan\n\n        fig, ax = plt.subplots(figsize=fig_size, tight_layout=True, facecolor=\"white\")\n\n        class_names = classes if classes is not None else self.classes\n        use_labels_for_ticks = class_names is not None and (0 < len(class_names) < 99)\n        if use_labels_for_ticks:\n            x_tick_labels = class_names + [\"FN\"]\n            y_tick_labels = class_names + [\"FP\"]\n            num_ticks = len(x_tick_labels)\n        else:\n            x_tick_labels = None\n            y_tick_labels = None\n            num_ticks = len(array)\n        im = ax.imshow(array, cmap=\"Blues\")\n\n        cbar = ax.figure.colorbar(im, ax=ax)\n        cbar.mappable.set_clim(vmin=0, vmax=np.nanmax(array))\n\n        if x_tick_labels is None:\n            tick_interval = 2\n        else:\n            tick_interval = 1\n        ax.set_xticks(np.arange(0, num_ticks, tick_interval), labels=x_tick_labels)\n        ax.set_yticks(np.arange(0, num_ticks, tick_interval), labels=y_tick_labels)\n\n        plt.setp(ax.get_xticklabels(), rotation=90, ha=\"right\", rotation_mode=\"default\")\n\n        labelsize = 10 if num_ticks < 50 else 8\n        ax.tick_params(axis=\"both\", which=\"both\", labelsize=labelsize)\n\n        if num_ticks < 30:\n            for i in range(array.shape[0]):\n                for j in range(array.shape[1]):\n                    n_preds = array[i, j]\n                    if not np.isnan(n_preds):\n                        ax.text(\n                            j,\n                            i,\n                            f\"{n_preds:.2f}\" if normalize else f\"{n_preds:.0f}\",\n                            ha=\"center\",\n                            va=\"center\",\n                            color=\"black\"\n                            if n_preds < 0.5 * np.nanmax(array)\n                            else \"white\",\n                        )\n\n        if title:\n            ax.set_title(title, fontsize=20)\n\n        ax.set_xlabel(\"Predicted\")\n        ax.set_ylabel(\"True\")\n        ax.set_facecolor(\"white\")\n        if save_path:\n            fig.savefig(\n                save_path, dpi=250, facecolor=fig.get_facecolor(), transparent=True\n            )\n        return fig\n
MeanAveragePrecision

Mean Average Precision for object detection tasks.

Attributes:

Name Type Description map50_95 float

Mean Average Precision (mAP) calculated over IoU thresholds ranging from 0.50 to 0.95 with a step size of 0.05.

map50 float

Mean Average Precision (mAP) calculated specifically at an IoU threshold of 0.50.

map75 float

Mean Average Precision (mAP) calculated specifically at an IoU threshold of 0.75.

per_class_ap50_95 ndarray

Average Precision (AP) values calculated over IoU thresholds ranging from 0.50 to 0.95 with a step size of 0.05, provided for each individual class.

Source code in supervision/metrics/detection.py
@dataclass(frozen=True)\nclass MeanAveragePrecision:\n    \"\"\"\n    Mean Average Precision for object detection tasks.\n\n    Attributes:\n        map50_95 (float): Mean Average Precision (mAP) calculated over IoU thresholds\n            ranging from `0.50` to `0.95` with a step size of `0.05`.\n        map50 (float): Mean Average Precision (mAP) calculated specifically at\n            an IoU threshold of `0.50`.\n        map75 (float): Mean Average Precision (mAP) calculated specifically at\n            an IoU threshold of `0.75`.\n        per_class_ap50_95 (np.ndarray): Average Precision (AP) values calculated over\n            IoU thresholds ranging from `0.50` to `0.95` with a step size of `0.05`,\n            provided for each individual class.\n    \"\"\"\n\n    map50_95: float\n    map50: float\n    map75: float\n    per_class_ap50_95: np.ndarray\n\n    @classmethod\n    def from_detections(\n        cls,\n        predictions: List[Detections],\n        targets: List[Detections],\n    ) -> MeanAveragePrecision:\n        \"\"\"\n        Calculate mean average precision based on predicted and ground-truth detections.\n\n        Args:\n            targets (List[Detections]): Detections objects from ground-truth.\n            predictions (List[Detections]): Detections objects predicted by the model.\n        Returns:\n            MeanAveragePrecision: New instance of ConfusionMatrix.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            targets = [\n                sv.Detections(...),\n                sv.Detections(...)\n            ]\n\n            predictions = [\n                sv.Detections(...),\n                sv.Detections(...)\n            ]\n\n            mean_average_precision = sv.MeanAveragePrecision.from_detections(\n                predictions=predictions,\n                targets=target,\n            )\n\n            print(mean_average_precison.map50_95)\n            # 0.2899\n            ```\n        \"\"\"\n        prediction_tensors = []\n        target_tensors = []\n        for prediction, target in zip(predictions, targets):\n            prediction_tensors.append(\n                detections_to_tensor(prediction, with_confidence=True)\n            )\n            target_tensors.append(detections_to_tensor(target, with_confidence=False))\n        return cls.from_tensors(\n            predictions=prediction_tensors,\n            targets=target_tensors,\n        )\n\n    @classmethod\n    def benchmark(\n        cls,\n        dataset: DetectionDataset,\n        callback: Callable[[np.ndarray], Detections],\n    ) -> MeanAveragePrecision:\n        \"\"\"\n        Calculate mean average precision from dataset and callback function.\n\n        Args:\n            dataset (DetectionDataset): Object detection dataset used for evaluation.\n            callback (Callable[[np.ndarray], Detections]): Function that takes\n                an image as input and returns Detections object.\n        Returns:\n            MeanAveragePrecision: New instance of MeanAveragePrecision.\n\n        Example:\n            ```python\n            import supervision as sv\n            from ultralytics import YOLO\n\n            dataset = sv.DetectionDataset.from_yolo(...)\n\n            model = YOLO(...)\n            def callback(image: np.ndarray) -> sv.Detections:\n                result = model(image)[0]\n                return sv.Detections.from_ultralytics(result)\n\n            mean_average_precision = sv.MeanAveragePrecision.benchmark(\n                dataset = dataset,\n                callback = callback\n            )\n\n            print(mean_average_precision.map50_95)\n            # 0.433\n            ```\n        \"\"\"\n        predictions, targets = [], []\n        for img_name, img in dataset.images.items():\n            predictions_batch = callback(img)\n            predictions.append(predictions_batch)\n            targets_batch = dataset.annotations[img_name]\n            targets.append(targets_batch)\n        return cls.from_detections(\n            predictions=predictions,\n            targets=targets,\n        )\n\n    @classmethod\n    def from_tensors(\n        cls,\n        predictions: List[np.ndarray],\n        targets: List[np.ndarray],\n    ) -> MeanAveragePrecision:\n        \"\"\"\n        Calculate Mean Average Precision based on predicted and ground-truth\n            detections at different threshold.\n\n        Args:\n            predictions (List[np.ndarray]): Each element of the list describes\n                a single image and has `shape = (M, 6)` where `M` is\n                the number of detected objects. Each row is expected to be\n                in `(x_min, y_min, x_max, y_max, class, conf)` format.\n            targets (List[np.ndarray]): Each element of the list describes a single\n                image and has `shape = (N, 5)` where `N` is the\n                number of ground-truth objects. Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class)` format.\n        Returns:\n            MeanAveragePrecision: New instance of MeanAveragePrecision.\n\n        Example:\n            ```python\n            import supervision as sv\n            import numpy as np\n\n            targets = (\n                [\n                    np.array(\n                        [\n                            [0.0, 0.0, 3.0, 3.0, 1],\n                            [2.0, 2.0, 5.0, 5.0, 1],\n                            [6.0, 1.0, 8.0, 3.0, 2],\n                        ]\n                    ),\n                    np.array([[1.0, 1.0, 2.0, 2.0, 2]]),\n                ]\n            )\n\n            predictions = [\n                np.array(\n                    [\n                        [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n                        [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n                        [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n                        [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n                    ]\n                ),\n                np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n            ]\n\n            mean_average_precison = sv.MeanAveragePrecision.from_tensors(\n                predictions=predictions,\n                targets=targets,\n            )\n\n            print(mean_average_precison.map50_95)\n            # 0.6649\n            ```\n        \"\"\"\n        validate_input_tensors(predictions, targets)\n        iou_thresholds = np.linspace(0.5, 0.95, 10)\n        stats = []\n\n        # Gather matching stats for predictions and targets\n        for true_objs, predicted_objs in zip(targets, predictions):\n            if predicted_objs.shape[0] == 0:\n                if true_objs.shape[0]:\n                    stats.append(\n                        (\n                            np.zeros((0, iou_thresholds.size), dtype=bool),\n                            *np.zeros((2, 0)),\n                            true_objs[:, 4],\n                        )\n                    )\n                continue\n\n            if true_objs.shape[0]:\n                matches = cls._match_detection_batch(\n                    predicted_objs, true_objs, iou_thresholds\n                )\n                stats.append(\n                    (\n                        matches,\n                        predicted_objs[:, 5],\n                        predicted_objs[:, 4],\n                        true_objs[:, 4],\n                    )\n                )\n\n        # Compute average precisions if any matches exist\n        if stats:\n            concatenated_stats = [np.concatenate(items, 0) for items in zip(*stats)]\n            average_precisions = cls._average_precisions_per_class(*concatenated_stats)\n            map50 = average_precisions[:, 0].mean()\n            map75 = average_precisions[:, 5].mean()\n            map50_95 = average_precisions.mean()\n        else:\n            map50, map75, map50_95 = 0, 0, 0\n            average_precisions = []\n\n        return cls(\n            map50_95=map50_95,\n            map50=map50,\n            map75=map75,\n            per_class_ap50_95=average_precisions,\n        )\n\n    @staticmethod\n    def compute_average_precision(recall: np.ndarray, precision: np.ndarray) -> float:\n        \"\"\"\n        Compute the average precision using 101-point interpolation (COCO), given\n            the recall and precision curves.\n\n        Args:\n            recall (np.ndarray): The recall curve.\n            precision (np.ndarray): The precision curve.\n\n        Returns:\n            float: Average precision.\n        \"\"\"\n        extended_recall = np.concatenate(([0.0], recall, [1.0]))\n        extended_precision = np.concatenate(([1.0], precision, [0.0]))\n        max_accumulated_precision = np.flip(\n            np.maximum.accumulate(np.flip(extended_precision))\n        )\n        interpolated_recall_levels = np.linspace(0, 1, 101)\n        interpolated_precision = np.interp(\n            interpolated_recall_levels, extended_recall, max_accumulated_precision\n        )\n        average_precision = np.trapz(interpolated_precision, interpolated_recall_levels)\n        return average_precision\n\n    @staticmethod\n    def _match_detection_batch(\n        predictions: np.ndarray, targets: np.ndarray, iou_thresholds: np.ndarray\n    ) -> np.ndarray:\n        \"\"\"\n        Match predictions with target labels based on IoU levels.\n\n        Args:\n            predictions (np.ndarray): Batch prediction. Describes a single image and\n                has `shape = (M, 6)` where `M` is the number of detected objects.\n                Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class, conf)` format.\n            targets (np.ndarray): Batch target labels. Describes a single image and\n                has `shape = (N, 5)` where `N` is the number of ground-truth objects.\n                Each row is expected to be in\n                `(x_min, y_min, x_max, y_max, class)` format.\n            iou_thresholds (np.ndarray): Array contains different IoU thresholds.\n\n        Returns:\n            np.ndarray: Matched prediction with target labels result.\n        \"\"\"\n        num_predictions, num_iou_levels = predictions.shape[0], iou_thresholds.shape[0]\n        correct = np.zeros((num_predictions, num_iou_levels), dtype=bool)\n        iou = box_iou_batch(targets[:, :4], predictions[:, :4])\n        correct_class = targets[:, 4:5] == predictions[:, 4]\n\n        for i, iou_level in enumerate(iou_thresholds):\n            matched_indices = np.where((iou >= iou_level) & correct_class)\n\n            if matched_indices[0].shape[0]:\n                combined_indices = np.stack(matched_indices, axis=1)\n                iou_values = iou[matched_indices][:, None]\n                matches = np.hstack([combined_indices, iou_values])\n\n                if matched_indices[0].shape[0] > 1:\n                    matches = matches[matches[:, 2].argsort()[::-1]]\n                    matches = matches[np.unique(matches[:, 1], return_index=True)[1]]\n                    matches = matches[np.unique(matches[:, 0], return_index=True)[1]]\n\n                correct[matches[:, 1].astype(int), i] = True\n\n        return correct\n\n    @staticmethod\n    def _average_precisions_per_class(\n        matches: np.ndarray,\n        prediction_confidence: np.ndarray,\n        prediction_class_ids: np.ndarray,\n        true_class_ids: np.ndarray,\n        eps: float = 1e-16,\n    ) -> np.ndarray:\n        \"\"\"\n        Compute the average precision, given the recall and precision curves.\n        Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.\n\n        Args:\n            matches (np.ndarray): True positives.\n            prediction_confidence (np.ndarray): Objectness value from 0-1.\n            prediction_class_ids (np.ndarray): Predicted object classes.\n            true_class_ids (np.ndarray): True object classes.\n            eps (float, optional): Small value to prevent division by zero.\n\n        Returns:\n            np.ndarray: Average precision for different IoU levels.\n        \"\"\"\n        sorted_indices = np.argsort(-prediction_confidence)\n        matches = matches[sorted_indices]\n        prediction_class_ids = prediction_class_ids[sorted_indices]\n\n        unique_classes, class_counts = np.unique(true_class_ids, return_counts=True)\n        num_classes = unique_classes.shape[0]\n\n        average_precisions = np.zeros((num_classes, matches.shape[1]))\n\n        for class_idx, class_id in enumerate(unique_classes):\n            is_class = prediction_class_ids == class_id\n            total_true = class_counts[class_idx]\n            total_prediction = is_class.sum()\n\n            if total_prediction == 0 or total_true == 0:\n                continue\n\n            false_positives = (1 - matches[is_class]).cumsum(0)\n            true_positives = matches[is_class].cumsum(0)\n            recall = true_positives / (total_true + eps)\n            precision = true_positives / (true_positives + false_positives)\n\n            for iou_level_idx in range(matches.shape[1]):\n                average_precisions[class_idx, iou_level_idx] = (\n                    MeanAveragePrecision.compute_average_precision(\n                        recall[:, iou_level_idx], precision[:, iou_level_idx]\n                    )\n                )\n\n        return average_precisions\n
"},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix-functions","title":"Functions","text":""},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix.benchmark","title":"benchmark(dataset, callback, conf_threshold=0.3, iou_threshold=0.5) classmethod","text":"

Calculate confusion matrix from dataset and callback function.

Parameters:

Name Type Description Default dataset DetectionDataset

Object detection dataset used for evaluation.

required callback Callable[[ndarray], Detections]

Function that takes an image as input and returns Detections object.

required conf_threshold float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

0.3 iou_threshold float

Detection IoU threshold between 0 and 1. Detections with lower IoU will be classified as FP.

0.5

Returns:

Name Type Description ConfusionMatrix ConfusionMatrix

New instance of ConfusionMatrix.

Example
import supervision as sv\nfrom ultralytics import YOLO\n\ndataset = sv.DetectionDataset.from_yolo(...)\n\nmodel = YOLO(...)\ndef callback(image: np.ndarray) -> sv.Detections:\n    result = model(image)[0]\n    return sv.Detections.from_ultralytics(result)\n\nconfusion_matrix = sv.ConfusionMatrix.benchmark(\n    dataset = dataset,\n    callback = callback\n)\n\nprint(confusion_matrix.matrix)\n# np.array([\n#     [0., 0., 0., 0.],\n#     [0., 1., 0., 1.],\n#     [0., 1., 1., 0.],\n#     [1., 1., 0., 0.]\n# ])\n
Source code in supervision/metrics/detection.py
@classmethod\ndef benchmark(\n    cls,\n    dataset: DetectionDataset,\n    callback: Callable[[np.ndarray], Detections],\n    conf_threshold: float = 0.3,\n    iou_threshold: float = 0.5,\n) -> ConfusionMatrix:\n    \"\"\"\n    Calculate confusion matrix from dataset and callback function.\n\n    Args:\n        dataset (DetectionDataset): Object detection dataset used for evaluation.\n        callback (Callable[[np.ndarray], Detections]): Function that takes an image\n            as input and returns Detections object.\n        conf_threshold (float): Detection confidence threshold between `0` and `1`.\n            Detections with lower confidence will be excluded.\n        iou_threshold (float): Detection IoU threshold between `0` and `1`.\n            Detections with lower IoU will be classified as `FP`.\n\n    Returns:\n        ConfusionMatrix: New instance of ConfusionMatrix.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        dataset = sv.DetectionDataset.from_yolo(...)\n\n        model = YOLO(...)\n        def callback(image: np.ndarray) -> sv.Detections:\n            result = model(image)[0]\n            return sv.Detections.from_ultralytics(result)\n\n        confusion_matrix = sv.ConfusionMatrix.benchmark(\n            dataset = dataset,\n            callback = callback\n        )\n\n        print(confusion_matrix.matrix)\n        # np.array([\n        #     [0., 0., 0., 0.],\n        #     [0., 1., 0., 1.],\n        #     [0., 1., 1., 0.],\n        #     [1., 1., 0., 0.]\n        # ])\n        ```\n    \"\"\"\n    predictions, targets = [], []\n    for img_name, img in dataset.images.items():\n        predictions_batch = callback(img)\n        predictions.append(predictions_batch)\n        targets_batch = dataset.annotations[img_name]\n        targets.append(targets_batch)\n    return cls.from_detections(\n        predictions=predictions,\n        targets=targets,\n        classes=dataset.classes,\n        conf_threshold=conf_threshold,\n        iou_threshold=iou_threshold,\n    )\n
"},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix.evaluate_detection_batch","title":"evaluate_detection_batch(predictions, targets, num_classes, conf_threshold, iou_threshold) staticmethod","text":"

Calculate confusion matrix for a batch of detections for a single image.

Parameters:

Name Type Description Default predictions ndarray

Batch prediction. Describes a single image and has shape = (M, 6) where M is the number of detected objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class, conf) format.

required targets ndarray

Batch target labels. Describes a single image and has shape = (N, 5) where N is the number of ground-truth objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class) format.

required num_classes int

Number of classes.

required conf_threshold float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

required iou_threshold float

Detection iou threshold between 0 and 1. Detections with lower iou will be classified as FP.

required

Returns:

Type Description ndarray

np.ndarray: Confusion matrix based on a single image.

Source code in supervision/metrics/detection.py
@staticmethod\ndef evaluate_detection_batch(\n    predictions: np.ndarray,\n    targets: np.ndarray,\n    num_classes: int,\n    conf_threshold: float,\n    iou_threshold: float,\n) -> np.ndarray:\n    \"\"\"\n    Calculate confusion matrix for a batch of detections for a single image.\n\n    Args:\n        predictions (np.ndarray): Batch prediction. Describes a single image and\n            has `shape = (M, 6)` where `M` is the number of detected objects.\n            Each row is expected to be in\n            `(x_min, y_min, x_max, y_max, class, conf)` format.\n        targets (np.ndarray): Batch target labels. Describes a single image and\n            has `shape = (N, 5)` where `N` is the number of ground-truth objects.\n            Each row is expected to be in\n            `(x_min, y_min, x_max, y_max, class)` format.\n        num_classes (int): Number of classes.\n        conf_threshold (float): Detection confidence threshold between `0` and `1`.\n            Detections with lower confidence will be excluded.\n        iou_threshold (float): Detection iou  threshold between `0` and `1`.\n            Detections with lower iou will be classified as `FP`.\n\n    Returns:\n        np.ndarray: Confusion matrix based on a single image.\n    \"\"\"\n    result_matrix = np.zeros((num_classes + 1, num_classes + 1))\n\n    conf_idx = 5\n    confidence = predictions[:, conf_idx]\n    detection_batch_filtered = predictions[confidence > conf_threshold]\n\n    class_id_idx = 4\n    true_classes = np.array(targets[:, class_id_idx], dtype=np.int16)\n    detection_classes = np.array(\n        detection_batch_filtered[:, class_id_idx], dtype=np.int16\n    )\n    true_boxes = targets[:, :class_id_idx]\n    detection_boxes = detection_batch_filtered[:, :class_id_idx]\n\n    iou_batch = box_iou_batch(\n        boxes_true=true_boxes, boxes_detection=detection_boxes\n    )\n    matched_idx = np.asarray(iou_batch > iou_threshold).nonzero()\n\n    if matched_idx[0].shape[0]:\n        matches = np.stack(\n            (matched_idx[0], matched_idx[1], iou_batch[matched_idx]), axis=1\n        )\n        matches = ConfusionMatrix._drop_extra_matches(matches=matches)\n    else:\n        matches = np.zeros((0, 3))\n\n    matched_true_idx, matched_detection_idx, _ = matches.transpose().astype(\n        np.int16\n    )\n\n    for i, true_class_value in enumerate(true_classes):\n        j = matched_true_idx == i\n        if matches.shape[0] > 0 and sum(j) == 1:\n            result_matrix[\n                true_class_value, detection_classes[matched_detection_idx[j]]\n            ] += 1  # TP\n        else:\n            result_matrix[true_class_value, num_classes] += 1  # FN\n\n    for i, detection_class_value in enumerate(detection_classes):\n        if not any(matched_detection_idx == i):\n            result_matrix[num_classes, detection_class_value] += 1  # FP\n\n    return result_matrix\n
"},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix.from_detections","title":"from_detections(predictions, targets, classes, conf_threshold=0.3, iou_threshold=0.5) classmethod","text":"

Calculate confusion matrix based on predicted and ground-truth detections.

Parameters:

Name Type Description Default targets List[Detections]

Detections objects from ground-truth.

required predictions List[Detections]

Detections objects predicted by the model.

required classes List[str]

Model class names.

required conf_threshold float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

0.3 iou_threshold float

Detection IoU threshold between 0 and 1. Detections with lower IoU will be classified as FP.

0.5

Returns:

Name Type Description ConfusionMatrix ConfusionMatrix

New instance of ConfusionMatrix.

Example
import supervision as sv\n\ntargets = [\n    sv.Detections(...),\n    sv.Detections(...)\n]\n\npredictions = [\n    sv.Detections(...),\n    sv.Detections(...)\n]\n\nconfusion_matrix = sv.ConfusionMatrix.from_detections(\n    predictions=predictions,\n    targets=target,\n    classes=['person', ...]\n)\n\nprint(confusion_matrix.matrix)\n# np.array([\n#    [0., 0., 0., 0.],\n#    [0., 1., 0., 1.],\n#    [0., 1., 1., 0.],\n#    [1., 1., 0., 0.]\n# ])\n
Source code in supervision/metrics/detection.py
@classmethod\ndef from_detections(\n    cls,\n    predictions: List[Detections],\n    targets: List[Detections],\n    classes: List[str],\n    conf_threshold: float = 0.3,\n    iou_threshold: float = 0.5,\n) -> ConfusionMatrix:\n    \"\"\"\n    Calculate confusion matrix based on predicted and ground-truth detections.\n\n    Args:\n        targets (List[Detections]): Detections objects from ground-truth.\n        predictions (List[Detections]): Detections objects predicted by the model.\n        classes (List[str]): Model class names.\n        conf_threshold (float): Detection confidence threshold between `0` and `1`.\n            Detections with lower confidence will be excluded.\n        iou_threshold (float): Detection IoU threshold between `0` and `1`.\n            Detections with lower IoU will be classified as `FP`.\n\n    Returns:\n        ConfusionMatrix: New instance of ConfusionMatrix.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        targets = [\n            sv.Detections(...),\n            sv.Detections(...)\n        ]\n\n        predictions = [\n            sv.Detections(...),\n            sv.Detections(...)\n        ]\n\n        confusion_matrix = sv.ConfusionMatrix.from_detections(\n            predictions=predictions,\n            targets=target,\n            classes=['person', ...]\n        )\n\n        print(confusion_matrix.matrix)\n        # np.array([\n        #    [0., 0., 0., 0.],\n        #    [0., 1., 0., 1.],\n        #    [0., 1., 1., 0.],\n        #    [1., 1., 0., 0.]\n        # ])\n        ```\n    \"\"\"\n\n    prediction_tensors = []\n    target_tensors = []\n    for prediction, target in zip(predictions, targets):\n        prediction_tensors.append(\n            detections_to_tensor(prediction, with_confidence=True)\n        )\n        target_tensors.append(detections_to_tensor(target, with_confidence=False))\n    return cls.from_tensors(\n        predictions=prediction_tensors,\n        targets=target_tensors,\n        classes=classes,\n        conf_threshold=conf_threshold,\n        iou_threshold=iou_threshold,\n    )\n
"},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix.from_tensors","title":"from_tensors(predictions, targets, classes, conf_threshold=0.3, iou_threshold=0.5) classmethod","text":"

Calculate confusion matrix based on predicted and ground-truth detections.

Parameters:

Name Type Description Default predictions List[ndarray]

Each element of the list describes a single image and has shape = (M, 6) where M is the number of detected objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class, conf) format.

required targets List[ndarray]

Each element of the list describes a single image and has shape = (N, 5) where N is the number of ground-truth objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class) format.

required classes List[str]

Model class names.

required conf_threshold float

Detection confidence threshold between 0 and 1. Detections with lower confidence will be excluded.

0.3 iou_threshold float

Detection iou threshold between 0 and 1. Detections with lower iou will be classified as FP.

0.5

Returns:

Name Type Description ConfusionMatrix ConfusionMatrix

New instance of ConfusionMatrix.

Example
import supervision as sv\nimport numpy as np\n\ntargets = (\n    [\n        np.array(\n            [\n                [0.0, 0.0, 3.0, 3.0, 1],\n                [2.0, 2.0, 5.0, 5.0, 1],\n                [6.0, 1.0, 8.0, 3.0, 2],\n            ]\n        ),\n        np.array([1.0, 1.0, 2.0, 2.0, 2]),\n    ]\n)\n\npredictions = [\n    np.array(\n        [\n            [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n            [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n            [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n            [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n        ]\n    ),\n    np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n]\n\nconfusion_matrix = sv.ConfusionMatrix.from_tensors(\n    predictions=predictions,\n    targets=targets,\n    classes=['person', ...]\n)\n\nprint(confusion_matrix.matrix)\n# np.array([\n#     [0., 0., 0., 0.],\n#     [0., 1., 0., 1.],\n#     [0., 1., 1., 0.],\n#     [1., 1., 0., 0.]\n# ])\n
Source code in supervision/metrics/detection.py
@classmethod\ndef from_tensors(\n    cls,\n    predictions: List[np.ndarray],\n    targets: List[np.ndarray],\n    classes: List[str],\n    conf_threshold: float = 0.3,\n    iou_threshold: float = 0.5,\n) -> ConfusionMatrix:\n    \"\"\"\n    Calculate confusion matrix based on predicted and ground-truth detections.\n\n    Args:\n        predictions (List[np.ndarray]): Each element of the list describes a single\n            image and has `shape = (M, 6)` where `M` is the number of detected\n            objects. Each row is expected to be in\n            `(x_min, y_min, x_max, y_max, class, conf)` format.\n        targets (List[np.ndarray]): Each element of the list describes a single\n            image and has `shape = (N, 5)` where `N` is the number of\n            ground-truth objects. Each row is expected to be in\n            `(x_min, y_min, x_max, y_max, class)` format.\n        classes (List[str]): Model class names.\n        conf_threshold (float): Detection confidence threshold between `0` and `1`.\n            Detections with lower confidence will be excluded.\n        iou_threshold (float): Detection iou  threshold between `0` and `1`.\n            Detections with lower iou will be classified as `FP`.\n\n    Returns:\n        ConfusionMatrix: New instance of ConfusionMatrix.\n\n    Example:\n        ```python\n        import supervision as sv\n        import numpy as np\n\n        targets = (\n            [\n                np.array(\n                    [\n                        [0.0, 0.0, 3.0, 3.0, 1],\n                        [2.0, 2.0, 5.0, 5.0, 1],\n                        [6.0, 1.0, 8.0, 3.0, 2],\n                    ]\n                ),\n                np.array([1.0, 1.0, 2.0, 2.0, 2]),\n            ]\n        )\n\n        predictions = [\n            np.array(\n                [\n                    [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n                    [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n                    [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n                    [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n                ]\n            ),\n            np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n        ]\n\n        confusion_matrix = sv.ConfusionMatrix.from_tensors(\n            predictions=predictions,\n            targets=targets,\n            classes=['person', ...]\n        )\n\n        print(confusion_matrix.matrix)\n        # np.array([\n        #     [0., 0., 0., 0.],\n        #     [0., 1., 0., 1.],\n        #     [0., 1., 1., 0.],\n        #     [1., 1., 0., 0.]\n        # ])\n        ```\n    \"\"\"\n    validate_input_tensors(predictions, targets)\n\n    num_classes = len(classes)\n    matrix = np.zeros((num_classes + 1, num_classes + 1))\n    for true_batch, detection_batch in zip(targets, predictions):\n        matrix += cls.evaluate_detection_batch(\n            predictions=detection_batch,\n            targets=true_batch,\n            num_classes=num_classes,\n            conf_threshold=conf_threshold,\n            iou_threshold=iou_threshold,\n        )\n    return cls(\n        matrix=matrix,\n        classes=classes,\n        conf_threshold=conf_threshold,\n        iou_threshold=iou_threshold,\n    )\n
"},{"location":"detection/metrics/#supervision.metrics.detection.ConfusionMatrix.plot","title":"plot(save_path=None, title=None, classes=None, normalize=False, fig_size=(12, 10))","text":"

Create confusion matrix plot and save it at selected location.

Parameters:

Name Type Description Default save_path Optional[str]

Path to save the plot. If not provided, plot will be displayed.

None title Optional[str]

Title of the plot.

None classes Optional[List[str]]

List of classes to be displayed on the plot. If not provided, all classes will be displayed.

None normalize bool

If True, normalize the confusion matrix.

False fig_size Tuple[int, int]

Size of the plot.

(12, 10)

Returns:

Type Description Figure

matplotlib.figure.Figure: Confusion matrix plot.

Source code in supervision/metrics/detection.py
def plot(\n    self,\n    save_path: Optional[str] = None,\n    title: Optional[str] = None,\n    classes: Optional[List[str]] = None,\n    normalize: bool = False,\n    fig_size: Tuple[int, int] = (12, 10),\n) -> matplotlib.figure.Figure:\n    \"\"\"\n    Create confusion matrix plot and save it at selected location.\n\n    Args:\n        save_path (Optional[str]): Path to save the plot. If not provided,\n            plot will be displayed.\n        title (Optional[str]): Title of the plot.\n        classes (Optional[List[str]]): List of classes to be displayed on the plot.\n            If not provided, all classes will be displayed.\n        normalize (bool): If True, normalize the confusion matrix.\n        fig_size (Tuple[int, int]): Size of the plot.\n\n    Returns:\n        matplotlib.figure.Figure: Confusion matrix plot.\n    \"\"\"\n\n    array = self.matrix.copy()\n\n    if normalize:\n        eps = 1e-8\n        array = array / (array.sum(0).reshape(1, -1) + eps)\n\n    array[array < 0.005] = np.nan\n\n    fig, ax = plt.subplots(figsize=fig_size, tight_layout=True, facecolor=\"white\")\n\n    class_names = classes if classes is not None else self.classes\n    use_labels_for_ticks = class_names is not None and (0 < len(class_names) < 99)\n    if use_labels_for_ticks:\n        x_tick_labels = class_names + [\"FN\"]\n        y_tick_labels = class_names + [\"FP\"]\n        num_ticks = len(x_tick_labels)\n    else:\n        x_tick_labels = None\n        y_tick_labels = None\n        num_ticks = len(array)\n    im = ax.imshow(array, cmap=\"Blues\")\n\n    cbar = ax.figure.colorbar(im, ax=ax)\n    cbar.mappable.set_clim(vmin=0, vmax=np.nanmax(array))\n\n    if x_tick_labels is None:\n        tick_interval = 2\n    else:\n        tick_interval = 1\n    ax.set_xticks(np.arange(0, num_ticks, tick_interval), labels=x_tick_labels)\n    ax.set_yticks(np.arange(0, num_ticks, tick_interval), labels=y_tick_labels)\n\n    plt.setp(ax.get_xticklabels(), rotation=90, ha=\"right\", rotation_mode=\"default\")\n\n    labelsize = 10 if num_ticks < 50 else 8\n    ax.tick_params(axis=\"both\", which=\"both\", labelsize=labelsize)\n\n    if num_ticks < 30:\n        for i in range(array.shape[0]):\n            for j in range(array.shape[1]):\n                n_preds = array[i, j]\n                if not np.isnan(n_preds):\n                    ax.text(\n                        j,\n                        i,\n                        f\"{n_preds:.2f}\" if normalize else f\"{n_preds:.0f}\",\n                        ha=\"center\",\n                        va=\"center\",\n                        color=\"black\"\n                        if n_preds < 0.5 * np.nanmax(array)\n                        else \"white\",\n                    )\n\n    if title:\n        ax.set_title(title, fontsize=20)\n\n    ax.set_xlabel(\"Predicted\")\n    ax.set_ylabel(\"True\")\n    ax.set_facecolor(\"white\")\n    if save_path:\n        fig.savefig(\n            save_path, dpi=250, facecolor=fig.get_facecolor(), transparent=True\n        )\n    return fig\n
"},{"location":"detection/metrics/#supervision.metrics.detection.MeanAveragePrecision-functions","title":"Functions","text":""},{"location":"detection/metrics/#supervision.metrics.detection.MeanAveragePrecision.benchmark","title":"benchmark(dataset, callback) classmethod","text":"

Calculate mean average precision from dataset and callback function.

Parameters:

Name Type Description Default dataset DetectionDataset

Object detection dataset used for evaluation.

required callback Callable[[ndarray], Detections]

Function that takes an image as input and returns Detections object.

required

Returns: MeanAveragePrecision: New instance of MeanAveragePrecision.

Example
import supervision as sv\nfrom ultralytics import YOLO\n\ndataset = sv.DetectionDataset.from_yolo(...)\n\nmodel = YOLO(...)\ndef callback(image: np.ndarray) -> sv.Detections:\n    result = model(image)[0]\n    return sv.Detections.from_ultralytics(result)\n\nmean_average_precision = sv.MeanAveragePrecision.benchmark(\n    dataset = dataset,\n    callback = callback\n)\n\nprint(mean_average_precision.map50_95)\n# 0.433\n
Source code in supervision/metrics/detection.py
@classmethod\ndef benchmark(\n    cls,\n    dataset: DetectionDataset,\n    callback: Callable[[np.ndarray], Detections],\n) -> MeanAveragePrecision:\n    \"\"\"\n    Calculate mean average precision from dataset and callback function.\n\n    Args:\n        dataset (DetectionDataset): Object detection dataset used for evaluation.\n        callback (Callable[[np.ndarray], Detections]): Function that takes\n            an image as input and returns Detections object.\n    Returns:\n        MeanAveragePrecision: New instance of MeanAveragePrecision.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        dataset = sv.DetectionDataset.from_yolo(...)\n\n        model = YOLO(...)\n        def callback(image: np.ndarray) -> sv.Detections:\n            result = model(image)[0]\n            return sv.Detections.from_ultralytics(result)\n\n        mean_average_precision = sv.MeanAveragePrecision.benchmark(\n            dataset = dataset,\n            callback = callback\n        )\n\n        print(mean_average_precision.map50_95)\n        # 0.433\n        ```\n    \"\"\"\n    predictions, targets = [], []\n    for img_name, img in dataset.images.items():\n        predictions_batch = callback(img)\n        predictions.append(predictions_batch)\n        targets_batch = dataset.annotations[img_name]\n        targets.append(targets_batch)\n    return cls.from_detections(\n        predictions=predictions,\n        targets=targets,\n    )\n
"},{"location":"detection/metrics/#supervision.metrics.detection.MeanAveragePrecision.compute_average_precision","title":"compute_average_precision(recall, precision) staticmethod","text":"

Compute the average precision using 101-point interpolation (COCO), given the recall and precision curves.

Parameters:

Name Type Description Default recall ndarray

The recall curve.

required precision ndarray

The precision curve.

required

Returns:

Name Type Description float float

Average precision.

Source code in supervision/metrics/detection.py
@staticmethod\ndef compute_average_precision(recall: np.ndarray, precision: np.ndarray) -> float:\n    \"\"\"\n    Compute the average precision using 101-point interpolation (COCO), given\n        the recall and precision curves.\n\n    Args:\n        recall (np.ndarray): The recall curve.\n        precision (np.ndarray): The precision curve.\n\n    Returns:\n        float: Average precision.\n    \"\"\"\n    extended_recall = np.concatenate(([0.0], recall, [1.0]))\n    extended_precision = np.concatenate(([1.0], precision, [0.0]))\n    max_accumulated_precision = np.flip(\n        np.maximum.accumulate(np.flip(extended_precision))\n    )\n    interpolated_recall_levels = np.linspace(0, 1, 101)\n    interpolated_precision = np.interp(\n        interpolated_recall_levels, extended_recall, max_accumulated_precision\n    )\n    average_precision = np.trapz(interpolated_precision, interpolated_recall_levels)\n    return average_precision\n
"},{"location":"detection/metrics/#supervision.metrics.detection.MeanAveragePrecision.from_detections","title":"from_detections(predictions, targets) classmethod","text":"

Calculate mean average precision based on predicted and ground-truth detections.

Parameters:

Name Type Description Default targets List[Detections]

Detections objects from ground-truth.

required predictions List[Detections]

Detections objects predicted by the model.

required

Returns: MeanAveragePrecision: New instance of ConfusionMatrix.

Example
import supervision as sv\n\ntargets = [\n    sv.Detections(...),\n    sv.Detections(...)\n]\n\npredictions = [\n    sv.Detections(...),\n    sv.Detections(...)\n]\n\nmean_average_precision = sv.MeanAveragePrecision.from_detections(\n    predictions=predictions,\n    targets=target,\n)\n\nprint(mean_average_precison.map50_95)\n# 0.2899\n
Source code in supervision/metrics/detection.py
@classmethod\ndef from_detections(\n    cls,\n    predictions: List[Detections],\n    targets: List[Detections],\n) -> MeanAveragePrecision:\n    \"\"\"\n    Calculate mean average precision based on predicted and ground-truth detections.\n\n    Args:\n        targets (List[Detections]): Detections objects from ground-truth.\n        predictions (List[Detections]): Detections objects predicted by the model.\n    Returns:\n        MeanAveragePrecision: New instance of ConfusionMatrix.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        targets = [\n            sv.Detections(...),\n            sv.Detections(...)\n        ]\n\n        predictions = [\n            sv.Detections(...),\n            sv.Detections(...)\n        ]\n\n        mean_average_precision = sv.MeanAveragePrecision.from_detections(\n            predictions=predictions,\n            targets=target,\n        )\n\n        print(mean_average_precison.map50_95)\n        # 0.2899\n        ```\n    \"\"\"\n    prediction_tensors = []\n    target_tensors = []\n    for prediction, target in zip(predictions, targets):\n        prediction_tensors.append(\n            detections_to_tensor(prediction, with_confidence=True)\n        )\n        target_tensors.append(detections_to_tensor(target, with_confidence=False))\n    return cls.from_tensors(\n        predictions=prediction_tensors,\n        targets=target_tensors,\n    )\n
"},{"location":"detection/metrics/#supervision.metrics.detection.MeanAveragePrecision.from_tensors","title":"from_tensors(predictions, targets) classmethod","text":"

Calculate Mean Average Precision based on predicted and ground-truth detections at different threshold.

Parameters:

Name Type Description Default predictions List[ndarray]

Each element of the list describes a single image and has shape = (M, 6) where M is the number of detected objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class, conf) format.

required targets List[ndarray]

Each element of the list describes a single image and has shape = (N, 5) where N is the number of ground-truth objects. Each row is expected to be in (x_min, y_min, x_max, y_max, class) format.

required

Returns: MeanAveragePrecision: New instance of MeanAveragePrecision.

Example
import supervision as sv\nimport numpy as np\n\ntargets = (\n    [\n        np.array(\n            [\n                [0.0, 0.0, 3.0, 3.0, 1],\n                [2.0, 2.0, 5.0, 5.0, 1],\n                [6.0, 1.0, 8.0, 3.0, 2],\n            ]\n        ),\n        np.array([[1.0, 1.0, 2.0, 2.0, 2]]),\n    ]\n)\n\npredictions = [\n    np.array(\n        [\n            [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n            [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n            [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n            [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n        ]\n    ),\n    np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n]\n\nmean_average_precison = sv.MeanAveragePrecision.from_tensors(\n    predictions=predictions,\n    targets=targets,\n)\n\nprint(mean_average_precison.map50_95)\n# 0.6649\n
Source code in supervision/metrics/detection.py
@classmethod\ndef from_tensors(\n    cls,\n    predictions: List[np.ndarray],\n    targets: List[np.ndarray],\n) -> MeanAveragePrecision:\n    \"\"\"\n    Calculate Mean Average Precision based on predicted and ground-truth\n        detections at different threshold.\n\n    Args:\n        predictions (List[np.ndarray]): Each element of the list describes\n            a single image and has `shape = (M, 6)` where `M` is\n            the number of detected objects. Each row is expected to be\n            in `(x_min, y_min, x_max, y_max, class, conf)` format.\n        targets (List[np.ndarray]): Each element of the list describes a single\n            image and has `shape = (N, 5)` where `N` is the\n            number of ground-truth objects. Each row is expected to be in\n            `(x_min, y_min, x_max, y_max, class)` format.\n    Returns:\n        MeanAveragePrecision: New instance of MeanAveragePrecision.\n\n    Example:\n        ```python\n        import supervision as sv\n        import numpy as np\n\n        targets = (\n            [\n                np.array(\n                    [\n                        [0.0, 0.0, 3.0, 3.0, 1],\n                        [2.0, 2.0, 5.0, 5.0, 1],\n                        [6.0, 1.0, 8.0, 3.0, 2],\n                    ]\n                ),\n                np.array([[1.0, 1.0, 2.0, 2.0, 2]]),\n            ]\n        )\n\n        predictions = [\n            np.array(\n                [\n                    [0.0, 0.0, 3.0, 3.0, 1, 0.9],\n                    [0.1, 0.1, 3.0, 3.0, 0, 0.9],\n                    [6.0, 1.0, 8.0, 3.0, 1, 0.8],\n                    [1.0, 6.0, 2.0, 7.0, 1, 0.8],\n                ]\n            ),\n            np.array([[1.0, 1.0, 2.0, 2.0, 2, 0.8]])\n        ]\n\n        mean_average_precison = sv.MeanAveragePrecision.from_tensors(\n            predictions=predictions,\n            targets=targets,\n        )\n\n        print(mean_average_precison.map50_95)\n        # 0.6649\n        ```\n    \"\"\"\n    validate_input_tensors(predictions, targets)\n    iou_thresholds = np.linspace(0.5, 0.95, 10)\n    stats = []\n\n    # Gather matching stats for predictions and targets\n    for true_objs, predicted_objs in zip(targets, predictions):\n        if predicted_objs.shape[0] == 0:\n            if true_objs.shape[0]:\n                stats.append(\n                    (\n                        np.zeros((0, iou_thresholds.size), dtype=bool),\n                        *np.zeros((2, 0)),\n                        true_objs[:, 4],\n                    )\n                )\n            continue\n\n        if true_objs.shape[0]:\n            matches = cls._match_detection_batch(\n                predicted_objs, true_objs, iou_thresholds\n            )\n            stats.append(\n                (\n                    matches,\n                    predicted_objs[:, 5],\n                    predicted_objs[:, 4],\n                    true_objs[:, 4],\n                )\n            )\n\n    # Compute average precisions if any matches exist\n    if stats:\n        concatenated_stats = [np.concatenate(items, 0) for items in zip(*stats)]\n        average_precisions = cls._average_precisions_per_class(*concatenated_stats)\n        map50 = average_precisions[:, 0].mean()\n        map75 = average_precisions[:, 5].mean()\n        map50_95 = average_precisions.mean()\n    else:\n        map50, map75, map50_95 = 0, 0, 0\n        average_precisions = []\n\n    return cls(\n        map50_95=map50_95,\n        map50=map50,\n        map75=map75,\n        per_class_ap50_95=average_precisions,\n    )\n
"},{"location":"detection/utils/","title":"Detection Utils","text":"box_iou_batch

Compute Intersection over Union (IoU) of two sets of bounding boxes - boxes_true and boxes_detection. Both sets of boxes are expected to be in (x_min, y_min, x_max, y_max) format.

Parameters:

Name Type Description Default boxes_true ndarray

2D np.ndarray representing ground-truth boxes. shape = (N, 4) where N is number of true objects.

required boxes_detection ndarray

2D np.ndarray representing detection boxes. shape = (M, 4) where M is number of detected objects.

required

Returns:

Type Description ndarray

np.ndarray: Pairwise IoU of boxes from boxes_true and boxes_detection. shape = (N, M) where N is number of true objects and M is number of detected objects.

Source code in supervision/detection/utils.py
def box_iou_batch(boxes_true: np.ndarray, boxes_detection: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Compute Intersection over Union (IoU) of two sets of bounding boxes -\n        `boxes_true` and `boxes_detection`. Both sets\n        of boxes are expected to be in `(x_min, y_min, x_max, y_max)` format.\n\n    Args:\n        boxes_true (np.ndarray): 2D `np.ndarray` representing ground-truth boxes.\n            `shape = (N, 4)` where `N` is number of true objects.\n        boxes_detection (np.ndarray): 2D `np.ndarray` representing detection boxes.\n            `shape = (M, 4)` where `M` is number of detected objects.\n\n    Returns:\n        np.ndarray: Pairwise IoU of boxes from `boxes_true` and `boxes_detection`.\n            `shape = (N, M)` where `N` is number of true objects and\n            `M` is number of detected objects.\n    \"\"\"\n\n    def box_area(box):\n        return (box[2] - box[0]) * (box[3] - box[1])\n\n    area_true = box_area(boxes_true.T)\n    area_detection = box_area(boxes_detection.T)\n\n    top_left = np.maximum(boxes_true[:, None, :2], boxes_detection[:, :2])\n    bottom_right = np.minimum(boxes_true[:, None, 2:], boxes_detection[:, 2:])\n\n    area_inter = np.prod(np.clip(bottom_right - top_left, a_min=0, a_max=None), 2)\n    ious = area_inter / (area_true[:, None] + area_detection - area_inter)\n    ious = np.nan_to_num(ious)\n    return ious\n
mask_iou_batch

Compute Intersection over Union (IoU) of two sets of masks - masks_true and masks_detection.

Parameters:

Name Type Description Default masks_true ndarray

3D np.ndarray representing ground-truth masks.

required masks_detection ndarray

3D np.ndarray representing detection masks.

required memory_limit int

memory limit in MB, default is 1024 * 5 MB (5GB).

1024 * 5

Returns:

Type Description ndarray

np.ndarray: Pairwise IoU of masks from masks_true and masks_detection.

Source code in supervision/detection/utils.py
def mask_iou_batch(\n    masks_true: np.ndarray,\n    masks_detection: np.ndarray,\n    memory_limit: int = 1024 * 5,\n) -> np.ndarray:\n    \"\"\"\n    Compute Intersection over Union (IoU) of two sets of masks -\n        `masks_true` and `masks_detection`.\n\n    Args:\n        masks_true (np.ndarray): 3D `np.ndarray` representing ground-truth masks.\n        masks_detection (np.ndarray): 3D `np.ndarray` representing detection masks.\n        memory_limit (int, optional): memory limit in MB, default is 1024 * 5 MB (5GB).\n\n    Returns:\n        np.ndarray: Pairwise IoU of masks from `masks_true` and `masks_detection`.\n    \"\"\"\n    memory = (\n        masks_true.shape[0]\n        * masks_true.shape[1]\n        * masks_true.shape[2]\n        * masks_detection.shape[0]\n        / 1024\n        / 1024\n    )\n    if memory <= memory_limit:\n        return _mask_iou_batch_split(masks_true, masks_detection)\n\n    ious = []\n    step = max(\n        memory_limit\n        * 1024\n        * 1024\n        // (\n            masks_detection.shape[0]\n            * masks_detection.shape[1]\n            * masks_detection.shape[2]\n        ),\n        1,\n    )\n    for i in range(0, masks_true.shape[0], step):\n        ious.append(_mask_iou_batch_split(masks_true[i : i + step], masks_detection))\n\n    return np.vstack(ious)\n
polygon_to_mask

Generate a mask from a polygon.

Parameters:

Name Type Description Default polygon ndarray

The polygon for which the mask should be generated, given as a list of vertices.

required resolution_wh Tuple[int, int]

The width and height of the desired resolution.

required

Returns:

Type Description ndarray

np.ndarray: The generated 2D mask, where the polygon is marked with 1's and the rest is filled with 0's.

Source code in supervision/detection/utils.py
def polygon_to_mask(polygon: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray:\n    \"\"\"Generate a mask from a polygon.\n\n    Args:\n        polygon (np.ndarray): The polygon for which the mask should be generated,\n            given as a list of vertices.\n        resolution_wh (Tuple[int, int]): The width and height of the desired resolution.\n\n    Returns:\n        np.ndarray: The generated 2D mask, where the polygon is marked with\n            `1`'s and the rest is filled with `0`'s.\n    \"\"\"\n    width, height = resolution_wh\n    mask = np.zeros((height, width))\n\n    cv2.fillPoly(mask, [polygon], color=1)\n    return mask\n
mask_to_xyxy

Converts a 3D np.array of 2D bool masks into a 2D np.array of bounding boxes.

Parameters:

Name Type Description Default masks ndarray

A 3D np.array of shape (N, W, H) containing 2D bool masks

required

Returns:

Type Description ndarray

np.ndarray: A 2D np.array of shape (N, 4) containing the bounding boxes (x_min, y_min, x_max, y_max) for each mask

Source code in supervision/detection/utils.py
def mask_to_xyxy(masks: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Converts a 3D `np.array` of 2D bool masks into a 2D `np.array` of bounding boxes.\n\n    Parameters:\n        masks (np.ndarray): A 3D `np.array` of shape `(N, W, H)`\n            containing 2D bool masks\n\n    Returns:\n        np.ndarray: A 2D `np.array` of shape `(N, 4)` containing the bounding boxes\n            `(x_min, y_min, x_max, y_max)` for each mask\n    \"\"\"\n    n = masks.shape[0]\n    xyxy = np.zeros((n, 4), dtype=int)\n\n    for i, mask in enumerate(masks):\n        rows, cols = np.where(mask)\n\n        if len(rows) > 0 and len(cols) > 0:\n            x_min, x_max = np.min(cols), np.max(cols)\n            y_min, y_max = np.min(rows), np.max(rows)\n            xyxy[i, :] = [x_min, y_min, x_max, y_max]\n\n    return xyxy\n
mask_to_polygons

Converts a binary mask to a list of polygons.

Parameters:

Name Type Description Default mask ndarray

A binary mask represented as a 2D NumPy array of shape (H, W), where H and W are the height and width of the mask, respectively.

required

Returns:

Type Description List[ndarray]

List[np.ndarray]: A list of polygons, where each polygon is represented by a NumPy array of shape (N, 2), containing the x, y coordinates of the points. Polygons with fewer points than MIN_POLYGON_POINT_COUNT = 3 are excluded from the output.

Source code in supervision/detection/utils.py
def mask_to_polygons(mask: np.ndarray) -> List[np.ndarray]:\n    \"\"\"\n    Converts a binary mask to a list of polygons.\n\n    Parameters:\n        mask (np.ndarray): A binary mask represented as a 2D NumPy array of\n            shape `(H, W)`, where H and W are the height and width of\n            the mask, respectively.\n\n    Returns:\n        List[np.ndarray]: A list of polygons, where each polygon is represented by a\n            NumPy array of shape `(N, 2)`, containing the `x`, `y` coordinates\n            of the points. Polygons with fewer points than `MIN_POLYGON_POINT_COUNT = 3`\n            are excluded from the output.\n    \"\"\"\n\n    contours, _ = cv2.findContours(\n        mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE\n    )\n    return [\n        np.squeeze(contour, axis=1)\n        for contour in contours\n        if contour.shape[0] >= MIN_POLYGON_POINT_COUNT\n    ]\n
polygon_to_xyxy

Converts a polygon represented by a NumPy array into a bounding box.

Parameters:

Name Type Description Default polygon ndarray

A polygon represented by a NumPy array of shape (N, 2), containing the x, y coordinates of the points.

required

Returns:

Type Description ndarray

np.ndarray: A 1D NumPy array containing the bounding box (x_min, y_min, x_max, y_max) of the input polygon.

Source code in supervision/detection/utils.py
def polygon_to_xyxy(polygon: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Converts a polygon represented by a NumPy array into a bounding box.\n\n    Parameters:\n        polygon (np.ndarray): A polygon represented by a NumPy array of shape `(N, 2)`,\n            containing the `x`, `y` coordinates of the points.\n\n    Returns:\n        np.ndarray: A 1D NumPy array containing the bounding box\n            `(x_min, y_min, x_max, y_max)` of the input polygon.\n    \"\"\"\n    x_min, y_min = np.min(polygon, axis=0)\n    x_max, y_max = np.max(polygon, axis=0)\n    return np.array([x_min, y_min, x_max, y_max])\n
filter_polygons_by_area

Filters a list of polygons based on their area.

Parameters:

Name Type Description Default polygons List[ndarray]

A list of polygons, where each polygon is represented by a NumPy array of shape (N, 2), containing the x, y coordinates of the points.

required min_area Optional[float]

The minimum area threshold. Only polygons with an area greater than or equal to this value will be included in the output. If set to None, no minimum area constraint will be applied.

None max_area Optional[float]

The maximum area threshold. Only polygons with an area less than or equal to this value will be included in the output. If set to None, no maximum area constraint will be applied.

None

Returns:

Type Description List[ndarray]

List[np.ndarray]: A new list of polygons containing only those with areas within the specified thresholds.

Source code in supervision/detection/utils.py
def filter_polygons_by_area(\n    polygons: List[np.ndarray],\n    min_area: Optional[float] = None,\n    max_area: Optional[float] = None,\n) -> List[np.ndarray]:\n    \"\"\"\n    Filters a list of polygons based on their area.\n\n    Parameters:\n        polygons (List[np.ndarray]): A list of polygons, where each polygon is\n            represented by a NumPy array of shape `(N, 2)`,\n            containing the `x`, `y` coordinates of the points.\n        min_area (Optional[float]): The minimum area threshold.\n            Only polygons with an area greater than or equal to this value\n            will be included in the output. If set to None,\n            no minimum area constraint will be applied.\n        max_area (Optional[float]): The maximum area threshold.\n            Only polygons with an area less than or equal to this value\n            will be included in the output. If set to None,\n            no maximum area constraint will be applied.\n\n    Returns:\n        List[np.ndarray]: A new list of polygons containing only those with\n            areas within the specified thresholds.\n    \"\"\"\n    if min_area is None and max_area is None:\n        return polygons\n    ares = [cv2.contourArea(polygon) for polygon in polygons]\n    return [\n        polygon\n        for polygon, area in zip(polygons, ares)\n        if (min_area is None or area >= min_area)\n        and (max_area is None or area <= max_area)\n    ]\n
move_boxes

Parameters:

Name Type Description Default xyxy NDArray[float64]

An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]

required offset array

An array of shape (2,) containing offset values in format is [dx, dy].

required

Returns:

Type Description NDArray[float64]

npt.NDArray[np.float64]: Repositioned bounding boxes.

Examples:

import numpy as np\nimport supervision as sv\n\nxyxy = np.array([\n    [10, 10, 20, 20],\n    [30, 30, 40, 40]\n])\noffset = np.array([5, 5])\n\nsv.move_boxes(xyxy=xyxy, offset=offset)\n# array([\n#    [15, 15, 25, 25],\n#    [35, 35, 45, 45]\n# ])\n
Source code in supervision/detection/utils.py
def move_boxes(\n    xyxy: npt.NDArray[np.float64], offset: npt.NDArray[np.int32]\n) -> npt.NDArray[np.float64]:\n    \"\"\"\n    Parameters:\n        xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the\n            bounding boxes coordinates in format `[x1, y1, x2, y2]`\n        offset (np.array): An array of shape `(2,)` containing offset values in format\n            is `[dx, dy]`.\n\n    Returns:\n        npt.NDArray[np.float64]: Repositioned bounding boxes.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        xyxy = np.array([\n            [10, 10, 20, 20],\n            [30, 30, 40, 40]\n        ])\n        offset = np.array([5, 5])\n\n        sv.move_boxes(xyxy=xyxy, offset=offset)\n        # array([\n        #    [15, 15, 25, 25],\n        #    [35, 35, 45, 45]\n        # ])\n        ```\n    \"\"\"\n    return xyxy + np.hstack([offset, offset])\n
move_masks

Offset the masks in an array by the specified (x, y) amount.

Parameters:

Name Type Description Default masks NDArray[bool_]

A 3D array of binary masks corresponding to the predictions. Shape: (N, H, W), where N is the number of predictions, and H, W are the dimensions of each mask.

required offset NDArray[int32]

An array of shape (2,) containing non-negative int values [dx, dy].

required resolution_wh Tuple[int, int]

The width and height of the desired mask resolution.

required

Returns:

Type Description NDArray[bool_]

(npt.NDArray[np.bool_]) repositioned masks, optionally padded to the specified shape.

Source code in supervision/detection/utils.py
def move_masks(\n    masks: npt.NDArray[np.bool_],\n    offset: npt.NDArray[np.int32],\n    resolution_wh: Tuple[int, int],\n) -> npt.NDArray[np.bool_]:\n    \"\"\"\n    Offset the masks in an array by the specified (x, y) amount.\n\n    Args:\n        masks (npt.NDArray[np.bool_]): A 3D array of binary masks corresponding to the\n            predictions. Shape: `(N, H, W)`, where N is the number of predictions, and\n            H, W are the dimensions of each mask.\n        offset (npt.NDArray[np.int32]): An array of shape `(2,)` containing non-negative\n            int values `[dx, dy]`.\n        resolution_wh (Tuple[int, int]): The width and height of the desired mask\n            resolution.\n\n    Returns:\n        (npt.NDArray[np.bool_]) repositioned masks, optionally padded to the specified\n            shape.\n    \"\"\"\n\n    if offset[0] < 0 or offset[1] < 0:\n        raise ValueError(f\"Offset values must be non-negative integers. Got: {offset}\")\n\n    mask_array = np.full((masks.shape[0], resolution_wh[1], resolution_wh[0]), False)\n    mask_array[\n        :,\n        offset[1] : masks.shape[1] + offset[1],\n        offset[0] : masks.shape[2] + offset[0],\n    ] = masks\n\n    return mask_array\n
scale_boxes

Scale the dimensions of bounding boxes.

Parameters:

Name Type Description Default xyxy NDArray[float64]

An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]

required factor float

A float value representing the factor by which the box dimensions are scaled. A factor greater than 1 enlarges the boxes, while a factor less than 1 shrinks them.

required

Returns:

Type Description NDArray[float64]

npt.NDArray[np.float64]: Scaled bounding boxes.

Examples:

import numpy as np\nimport supervision as sv\n\nxyxy = np.array([\n    [10, 10, 20, 20],\n    [30, 30, 40, 40]\n])\n\nsv.scale_boxes(xyxy=xyxy, factor=1.5)\n# array([\n#    [ 7.5,  7.5, 22.5, 22.5],\n#    [27.5, 27.5, 42.5, 42.5]\n# ])\n
Source code in supervision/detection/utils.py
def scale_boxes(\n    xyxy: npt.NDArray[np.float64], factor: float\n) -> npt.NDArray[np.float64]:\n    \"\"\"\n    Scale the dimensions of bounding boxes.\n\n    Parameters:\n        xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the\n            bounding boxes coordinates in format `[x1, y1, x2, y2]`\n        factor (float): A float value representing the factor by which the box\n            dimensions are scaled. A factor greater than 1 enlarges the boxes, while a\n            factor less than 1 shrinks them.\n\n    Returns:\n        npt.NDArray[np.float64]: Scaled bounding boxes.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        xyxy = np.array([\n            [10, 10, 20, 20],\n            [30, 30, 40, 40]\n        ])\n\n        sv.scale_boxes(xyxy=xyxy, factor=1.5)\n        # array([\n        #    [ 7.5,  7.5, 22.5, 22.5],\n        #    [27.5, 27.5, 42.5, 42.5]\n        # ])\n        ```\n    \"\"\"\n    centers = (xyxy[:, :2] + xyxy[:, 2:]) / 2\n    new_sizes = (xyxy[:, 2:] - xyxy[:, :2]) * factor\n    return np.concatenate((centers - new_sizes / 2, centers + new_sizes / 2), axis=1)\n
clip_boxes

Clips bounding boxes coordinates to fit within the frame resolution.

Parameters:

Name Type Description Default xyxy ndarray

A numpy array of shape (N, 4) where each row corresponds to a bounding box in

required resolution_wh Tuple[int, int]

A tuple of the form (width, height) representing the resolution of the frame.

required

Returns:

Type Description ndarray

np.ndarray: A numpy array of shape (N, 4) where each row corresponds to a bounding box with coordinates clipped to fit within the frame resolution.

Examples:

import numpy as np\nimport supervision as sv\n\nxyxy = np.array([\n    [10, 20, 300, 200],\n    [15, 25, 350, 450],\n    [-10, -20, 30, 40]\n])\n\nsv.clip_boxes(xyxy=xyxy, resolution_wh=(320, 240))\n# array([\n#     [ 10,  20, 300, 200],\n#     [ 15,  25, 320, 240],\n#     [  0,   0,  30,  40]\n# ])\n
Source code in supervision/detection/utils.py
def clip_boxes(xyxy: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray:\n    \"\"\"\n    Clips bounding boxes coordinates to fit within the frame resolution.\n\n    Args:\n        xyxy (np.ndarray): A numpy array of shape `(N, 4)` where each\n            row corresponds to a bounding box in\n        the format `(x_min, y_min, x_max, y_max)`.\n        resolution_wh (Tuple[int, int]): A tuple of the form `(width, height)`\n            representing the resolution of the frame.\n\n    Returns:\n        np.ndarray: A numpy array of shape `(N, 4)` where each row\n            corresponds to a bounding box with coordinates clipped to fit\n            within the frame resolution.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        xyxy = np.array([\n            [10, 20, 300, 200],\n            [15, 25, 350, 450],\n            [-10, -20, 30, 40]\n        ])\n\n        sv.clip_boxes(xyxy=xyxy, resolution_wh=(320, 240))\n        # array([\n        #     [ 10,  20, 300, 200],\n        #     [ 15,  25, 320, 240],\n        #     [  0,   0,  30,  40]\n        # ])\n        ```\n    \"\"\"\n    result = np.copy(xyxy)\n    width, height = resolution_wh\n    result[:, [0, 2]] = result[:, [0, 2]].clip(0, width)\n    result[:, [1, 3]] = result[:, [1, 3]].clip(0, height)\n    return result\n
pad_boxes

Pads bounding boxes coordinates with a constant padding.

Parameters:

Name Type Description Default xyxy ndarray

A numpy array of shape (N, 4) where each row corresponds to a bounding box in the format (x_min, y_min, x_max, y_max).

required px int

The padding value to be added to both the left and right sides of each bounding box.

required py Optional[int]

The padding value to be added to both the top and bottom sides of each bounding box. If not provided, px will be used for both dimensions.

None

Returns:

Type Description ndarray

np.ndarray: A numpy array of shape (N, 4) where each row corresponds to a bounding box with coordinates padded according to the provided padding values.

Examples:

import numpy as np\nimport supervision as sv\n\nxyxy = np.array([\n    [10, 20, 30, 40],\n    [15, 25, 35, 45]\n])\n\nsv.pad_boxes(xyxy=xyxy, px=5, py=10)\n# array([\n#     [ 5, 10, 35, 50],\n#     [10, 15, 40, 55]\n# ])\n
Source code in supervision/detection/utils.py
def pad_boxes(xyxy: np.ndarray, px: int, py: Optional[int] = None) -> np.ndarray:\n    \"\"\"\n    Pads bounding boxes coordinates with a constant padding.\n\n    Args:\n        xyxy (np.ndarray): A numpy array of shape `(N, 4)` where each\n            row corresponds to a bounding box in the format\n            `(x_min, y_min, x_max, y_max)`.\n        px (int): The padding value to be added to both the left and right sides of\n            each bounding box.\n        py (Optional[int]): The padding value to be added to both the top and bottom\n            sides of each bounding box. If not provided, `px` will be used for both\n            dimensions.\n\n    Returns:\n        np.ndarray: A numpy array of shape `(N, 4)` where each row corresponds to a\n            bounding box with coordinates padded according to the provided padding\n            values.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        xyxy = np.array([\n            [10, 20, 30, 40],\n            [15, 25, 35, 45]\n        ])\n\n        sv.pad_boxes(xyxy=xyxy, px=5, py=10)\n        # array([\n        #     [ 5, 10, 35, 50],\n        #     [10, 15, 40, 55]\n        # ])\n        ```\n    \"\"\"\n    if py is None:\n        py = px\n\n    result = xyxy.copy()\n    result[:, [0, 1]] -= [px, py]\n    result[:, [2, 3]] += [px, py]\n\n    return result\n
contains_holes

Checks if the binary mask contains holes (background pixels fully enclosed by foreground pixels).

Parameters:

Name Type Description Default mask NDArray[bool_]

2D binary mask where True indicates foreground object and False indicates background.

required

Returns:

Type Description bool

True if holes are detected, False otherwise.

Examples:

import numpy as np\nimport supervision as sv\n\nmask = np.array([\n    [0, 0, 0, 0, 0],\n    [0, 1, 1, 1, 0],\n    [0, 1, 0, 1, 0],\n    [0, 1, 1, 1, 0],\n    [0, 0, 0, 0, 0]\n]).astype(bool)\n\nsv.contains_holes(mask=mask)\n# True\n\nmask = np.array([\n    [0, 0, 0, 0, 0],\n    [0, 1, 1, 1, 0],\n    [0, 1, 1, 1, 0],\n    [0, 1, 1, 1, 0],\n    [0, 0, 0, 0, 0]\n]).astype(bool)\n\nsv.contains_holes(mask=mask)\n# False\n

Source code in supervision/detection/utils.py
def contains_holes(mask: npt.NDArray[np.bool_]) -> bool:\n    \"\"\"\n    Checks if the binary mask contains holes (background pixels fully enclosed by\n    foreground pixels).\n\n    Args:\n        mask (npt.NDArray[np.bool_]): 2D binary mask where `True` indicates foreground\n            object and `False` indicates background.\n\n    Returns:\n        True if holes are detected, False otherwise.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        mask = np.array([\n            [0, 0, 0, 0, 0],\n            [0, 1, 1, 1, 0],\n            [0, 1, 0, 1, 0],\n            [0, 1, 1, 1, 0],\n            [0, 0, 0, 0, 0]\n        ]).astype(bool)\n\n        sv.contains_holes(mask=mask)\n        # True\n\n        mask = np.array([\n            [0, 0, 0, 0, 0],\n            [0, 1, 1, 1, 0],\n            [0, 1, 1, 1, 0],\n            [0, 1, 1, 1, 0],\n            [0, 0, 0, 0, 0]\n        ]).astype(bool)\n\n        sv.contains_holes(mask=mask)\n        # False\n        ```\n\n    ![contains_holes](https://media.roboflow.com/supervision-docs/contains-holes.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    mask_uint8 = mask.astype(np.uint8)\n    _, hierarchy = cv2.findContours(mask_uint8, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)\n\n    if hierarchy is not None:\n        parent_contour_index = 3\n        for h in hierarchy[0]:\n            if h[parent_contour_index] != -1:\n                return True\n    return False\n
contains_multiple_segments

Checks if the binary mask contains multiple unconnected foreground segments.

Parameters:

Name Type Description Default mask NDArray[bool_]

2D binary mask where True indicates foreground object and False indicates background.

required connectivity int)

Default: 4 is 4-way connectivity, which means that foreground pixels are the part of the same segment/component if their edges touch. Alternatively: 8 for 8-way connectivity, when foreground pixels are connected by their edges or corners touch.

4

Returns:

Type Description bool

True when the mask contains multiple not connected components, False otherwise.

Raises:

Type Description ValueError

If connectivity(int) parameter value is not 4 or 8.

Examples:

import numpy as np\nimport supervision as sv\n\nmask = np.array([\n    [0, 0, 0, 0, 0, 0],\n    [0, 1, 1, 0, 1, 1],\n    [0, 1, 1, 0, 1, 1],\n    [0, 0, 0, 0, 0, 0],\n    [0, 1, 1, 1, 0, 0],\n    [0, 1, 1, 1, 0, 0]\n]).astype(bool)\n\nsv.contains_multiple_segments(mask=mask, connectivity=4)\n# True\n\nmask = np.array([\n    [0, 0, 0, 0, 0, 0],\n    [0, 1, 1, 1, 1, 1],\n    [0, 1, 1, 1, 1, 1],\n    [0, 1, 1, 1, 1, 1],\n    [0, 1, 1, 1, 1, 1],\n    [0, 0, 0, 0, 0, 0]\n]).astype(bool)\n\nsv.contains_multiple_segments(mask=mask, connectivity=4)\n# False\n

Source code in supervision/detection/utils.py
def contains_multiple_segments(\n    mask: npt.NDArray[np.bool_], connectivity: int = 4\n) -> bool:\n    \"\"\"\n    Checks if the binary mask contains multiple unconnected foreground segments.\n\n    Args:\n        mask (npt.NDArray[np.bool_]): 2D binary mask where `True` indicates foreground\n            object and `False` indicates background.\n        connectivity (int) : Default: 4 is 4-way connectivity, which means that\n            foreground pixels are the part of the same segment/component\n            if their edges touch.\n            Alternatively: 8 for 8-way connectivity, when foreground pixels are\n            connected by their edges or corners touch.\n\n    Returns:\n        True when the mask contains multiple not connected components, False otherwise.\n\n    Raises:\n        ValueError: If connectivity(int) parameter value is not 4 or 8.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        mask = np.array([\n            [0, 0, 0, 0, 0, 0],\n            [0, 1, 1, 0, 1, 1],\n            [0, 1, 1, 0, 1, 1],\n            [0, 0, 0, 0, 0, 0],\n            [0, 1, 1, 1, 0, 0],\n            [0, 1, 1, 1, 0, 0]\n        ]).astype(bool)\n\n        sv.contains_multiple_segments(mask=mask, connectivity=4)\n        # True\n\n        mask = np.array([\n            [0, 0, 0, 0, 0, 0],\n            [0, 1, 1, 1, 1, 1],\n            [0, 1, 1, 1, 1, 1],\n            [0, 1, 1, 1, 1, 1],\n            [0, 1, 1, 1, 1, 1],\n            [0, 0, 0, 0, 0, 0]\n        ]).astype(bool)\n\n        sv.contains_multiple_segments(mask=mask, connectivity=4)\n        # False\n        ```\n\n    ![contains_multiple_segments](https://media.roboflow.com/supervision-docs/contains-multiple-segments.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    if connectivity != 4 and connectivity != 8:\n        raise ValueError(\n            \"Incorrect connectivity value. Possible connectivity values: 4 or 8.\"\n        )\n    mask_uint8 = mask.astype(np.uint8)\n    labels = np.zeros_like(mask_uint8, dtype=np.int32)\n    number_of_labels, _ = cv2.connectedComponents(\n        mask_uint8, labels, connectivity=connectivity\n    )\n    return number_of_labels > 2\n
"},{"location":"detection/tools/inference_slicer/","title":"InferenceSlicer","text":"

InferenceSlicer performs slicing-based inference for small target detection. This method, often referred to as Slicing Adaptive Inference (SAHI), involves dividing a larger image into smaller slices, performing inference on each slice, and then merging the detections.

Parameters:

Name Type Description Default slice_wh Tuple[int, int]

Dimensions of each slice in the format (width, height).

(320, 320) overlap_ratio_wh Tuple[float, float]

Overlap ratio between consecutive slices in the format (width_ratio, height_ratio).

(0.2, 0.2) overlap_filter_strategy Union[OverlapFilter, str]

Strategy for filtering or merging overlapping detections in slices.

NON_MAX_SUPPRESSION iou_threshold float

Intersection over Union (IoU) threshold used when filtering by overlap.

0.5 callback Callable

A function that performs inference on a given image slice and returns detections.

required thread_workers int

Number of threads for parallel execution.

1 Note

The class ensures that slices do not exceed the boundaries of the original image. As a result, the final slices in the row and column dimensions might be smaller than the specified slice dimensions if the image's width or height is not a multiple of the slice's width or height minus the overlap.

Source code in supervision/detection/tools/inference_slicer.py
class InferenceSlicer:\n    \"\"\"\n    InferenceSlicer performs slicing-based inference for small target detection. This\n    method, often referred to as\n    [Slicing Adaptive Inference (SAHI)](https://ieeexplore.ieee.org/document/9897990),\n    involves dividing a larger image into smaller slices, performing inference on each\n    slice, and then merging the detections.\n\n    Args:\n        slice_wh (Tuple[int, int]): Dimensions of each slice in the format\n            `(width, height)`.\n        overlap_ratio_wh (Tuple[float, float]): Overlap ratio between consecutive\n            slices in the format `(width_ratio, height_ratio)`.\n        overlap_filter_strategy (Union[OverlapFilter, str]): Strategy for\n            filtering or merging overlapping detections in slices.\n        iou_threshold (float): Intersection over Union (IoU) threshold\n            used when filtering by overlap.\n        callback (Callable): A function that performs inference on a given image\n            slice and returns detections.\n        thread_workers (int): Number of threads for parallel execution.\n\n    Note:\n        The class ensures that slices do not exceed the boundaries of the original\n        image. As a result, the final slices in the row and column dimensions might be\n        smaller than the specified slice dimensions if the image's width or height is\n        not a multiple of the slice's width or height minus the overlap.\n    \"\"\"\n\n    def __init__(\n        self,\n        callback: Callable[[np.ndarray], Detections],\n        slice_wh: Tuple[int, int] = (320, 320),\n        overlap_ratio_wh: Tuple[float, float] = (0.2, 0.2),\n        overlap_filter_strategy: Union[\n            OverlapFilter, str\n        ] = OverlapFilter.NON_MAX_SUPPRESSION,\n        iou_threshold: float = 0.5,\n        thread_workers: int = 1,\n    ):\n        overlap_filter_strategy = validate_overlap_filter(overlap_filter_strategy)\n\n        self.slice_wh = slice_wh\n        self.overlap_ratio_wh = overlap_ratio_wh\n        self.iou_threshold = iou_threshold\n        self.overlap_filter_strategy = overlap_filter_strategy\n        self.callback = callback\n        self.thread_workers = thread_workers\n\n    def __call__(self, image: np.ndarray) -> Detections:\n        \"\"\"\n        Performs slicing-based inference on the provided image using the specified\n            callback.\n\n        Args:\n            image (np.ndarray): The input image on which inference needs to be\n                performed. The image should be in the format\n                `(height, width, channels)`.\n\n        Returns:\n            Detections: A collection of detections for the entire image after merging\n                results from all slices and applying NMS.\n\n        Example:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(SOURCE_IMAGE_PATH)\n            model = YOLO(...)\n\n            def callback(image_slice: np.ndarray) -> sv.Detections:\n                result = model(image_slice)[0]\n                return sv.Detections.from_ultralytics(result)\n\n            slicer = sv.InferenceSlicer(\n                callback=callback,\n                overlap_filter_strategy=sv.OverlapFilter.NON_MAX_SUPPRESSION,\n            )\n\n            detections = slicer(image)\n            ```\n        \"\"\"\n        detections_list = []\n        resolution_wh = (image.shape[1], image.shape[0])\n        offsets = self._generate_offset(\n            resolution_wh=resolution_wh,\n            slice_wh=self.slice_wh,\n            overlap_ratio_wh=self.overlap_ratio_wh,\n        )\n\n        with ThreadPoolExecutor(max_workers=self.thread_workers) as executor:\n            futures = [\n                executor.submit(self._run_callback, image, offset) for offset in offsets\n            ]\n            for future in as_completed(futures):\n                detections_list.append(future.result())\n\n        merged = Detections.merge(detections_list=detections_list)\n        if self.overlap_filter_strategy == OverlapFilter.NONE:\n            return merged\n        elif self.overlap_filter_strategy == OverlapFilter.NON_MAX_SUPPRESSION:\n            return merged.with_nms(threshold=self.iou_threshold)\n        elif self.overlap_filter_strategy == OverlapFilter.NON_MAX_MERGE:\n            return merged.with_nmm(threshold=self.iou_threshold)\n        else:\n            warnings.warn(\n                f\"Invalid overlap filter strategy: {self.overlap_filter_strategy}\",\n                category=SupervisionWarnings,\n            )\n            return merged\n\n    def _run_callback(self, image, offset) -> Detections:\n        \"\"\"\n        Run the provided callback on a slice of an image.\n\n        Args:\n            image (np.ndarray): The input image on which inference needs to run\n            offset (np.ndarray): An array of shape `(4,)` containing coordinates\n                for the slice.\n\n        Returns:\n            Detections: A collection of detections for the slice.\n        \"\"\"\n        image_slice = crop_image(image=image, xyxy=offset)\n        detections = self.callback(image_slice)\n        resolution_wh = (image.shape[1], image.shape[0])\n        detections = move_detections(\n            detections=detections, offset=offset[:2], resolution_wh=resolution_wh\n        )\n\n        return detections\n\n    @staticmethod\n    def _generate_offset(\n        resolution_wh: Tuple[int, int],\n        slice_wh: Tuple[int, int],\n        overlap_ratio_wh: Tuple[float, float],\n    ) -> np.ndarray:\n        \"\"\"\n        Generate offset coordinates for slicing an image based on the given resolution,\n        slice dimensions, and overlap ratios.\n\n        Args:\n            resolution_wh (Tuple[int, int]): A tuple representing the width and height\n                of the image to be sliced.\n            slice_wh (Tuple[int, int]): A tuple representing the desired width and\n                height of each slice.\n            overlap_ratio_wh (Tuple[float, float]): A tuple representing the desired\n                overlap ratio for width and height between consecutive slices. Each\n                value should be in the range [0, 1), where 0 means no overlap and a\n                value close to 1 means high overlap.\n\n        Returns:\n            np.ndarray: An array of shape `(n, 4)` containing coordinates for each\n                slice in the format `[xmin, ymin, xmax, ymax]`.\n\n        Note:\n            The function ensures that slices do not exceed the boundaries of the\n                original image. As a result, the final slices in the row and column\n                dimensions might be smaller than the specified slice dimensions if the\n                image's width or height is not a multiple of the slice's width or\n                height minus the overlap.\n        \"\"\"\n        slice_width, slice_height = slice_wh\n        image_width, image_height = resolution_wh\n        overlap_ratio_width, overlap_ratio_height = overlap_ratio_wh\n\n        width_stride = slice_width - int(overlap_ratio_width * slice_width)\n        height_stride = slice_height - int(overlap_ratio_height * slice_height)\n\n        ws = np.arange(0, image_width, width_stride)\n        hs = np.arange(0, image_height, height_stride)\n\n        xmin, ymin = np.meshgrid(ws, hs)\n        xmax = np.clip(xmin + slice_width, 0, image_width)\n        ymax = np.clip(ymin + slice_height, 0, image_height)\n\n        offsets = np.stack([xmin, ymin, xmax, ymax], axis=-1).reshape(-1, 4)\n\n        return offsets\n
"},{"location":"detection/tools/inference_slicer/#supervision.detection.tools.inference_slicer.InferenceSlicer-functions","title":"Functions","text":""},{"location":"detection/tools/inference_slicer/#supervision.detection.tools.inference_slicer.InferenceSlicer.__call__","title":"__call__(image)","text":"

Performs slicing-based inference on the provided image using the specified callback.

Parameters:

Name Type Description Default image ndarray

The input image on which inference needs to be performed. The image should be in the format (height, width, channels).

required

Returns:

Name Type Description Detections Detections

A collection of detections for the entire image after merging results from all slices and applying NMS.

Example
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(SOURCE_IMAGE_PATH)\nmodel = YOLO(...)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    result = model(image_slice)[0]\n    return sv.Detections.from_ultralytics(result)\n\nslicer = sv.InferenceSlicer(\n    callback=callback,\n    overlap_filter_strategy=sv.OverlapFilter.NON_MAX_SUPPRESSION,\n)\n\ndetections = slicer(image)\n
Source code in supervision/detection/tools/inference_slicer.py
def __call__(self, image: np.ndarray) -> Detections:\n    \"\"\"\n    Performs slicing-based inference on the provided image using the specified\n        callback.\n\n    Args:\n        image (np.ndarray): The input image on which inference needs to be\n            performed. The image should be in the format\n            `(height, width, channels)`.\n\n    Returns:\n        Detections: A collection of detections for the entire image after merging\n            results from all slices and applying NMS.\n\n    Example:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(SOURCE_IMAGE_PATH)\n        model = YOLO(...)\n\n        def callback(image_slice: np.ndarray) -> sv.Detections:\n            result = model(image_slice)[0]\n            return sv.Detections.from_ultralytics(result)\n\n        slicer = sv.InferenceSlicer(\n            callback=callback,\n            overlap_filter_strategy=sv.OverlapFilter.NON_MAX_SUPPRESSION,\n        )\n\n        detections = slicer(image)\n        ```\n    \"\"\"\n    detections_list = []\n    resolution_wh = (image.shape[1], image.shape[0])\n    offsets = self._generate_offset(\n        resolution_wh=resolution_wh,\n        slice_wh=self.slice_wh,\n        overlap_ratio_wh=self.overlap_ratio_wh,\n    )\n\n    with ThreadPoolExecutor(max_workers=self.thread_workers) as executor:\n        futures = [\n            executor.submit(self._run_callback, image, offset) for offset in offsets\n        ]\n        for future in as_completed(futures):\n            detections_list.append(future.result())\n\n    merged = Detections.merge(detections_list=detections_list)\n    if self.overlap_filter_strategy == OverlapFilter.NONE:\n        return merged\n    elif self.overlap_filter_strategy == OverlapFilter.NON_MAX_SUPPRESSION:\n        return merged.with_nms(threshold=self.iou_threshold)\n    elif self.overlap_filter_strategy == OverlapFilter.NON_MAX_MERGE:\n        return merged.with_nmm(threshold=self.iou_threshold)\n    else:\n        warnings.warn(\n            f\"Invalid overlap filter strategy: {self.overlap_filter_strategy}\",\n            category=SupervisionWarnings,\n        )\n        return merged\n
"},{"location":"detection/tools/line_zone/","title":"Line Zone","text":"LineZone

This class is responsible for counting the number of objects that cross a predefined line.

Warning

LineZone uses the tracker_id. Read here to learn how to plug tracking into your inference pipeline.

Attributes:

Name Type Description in_count int

The number of objects that have crossed the line from outside to inside.

out_count int

The number of objects that have crossed the line from inside to outside.

Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<SOURCE_MODEL_PATH>)\ntracker = sv.ByteTrack()\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\nstart, end = sv.Point(x=0, y=1080), sv.Point(x=3840, y=1080)\nline_zone = sv.LineZone(start=start, end=end)\n\nfor frame in frames_generator:\n    result = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(result)\n    detections = tracker.update_with_detections(detections)\n    crossed_in, crossed_out = line_zone.trigger(detections)\n\nline_zone.in_count, line_zone.out_count\n# 7, 2\n
Source code in supervision/detection/line_zone.py
class LineZone:\n    \"\"\"\n    This class is responsible for counting the number of objects that cross a\n    predefined line.\n\n    <video controls>\n        <source\n            src=\"https://media.roboflow.com/supervision/cookbooks/count-objects-crossing-the-line-result-1280x720.mp4\"\n            type=\"video/mp4\">\n    </video>\n\n    !!! warning\n\n        LineZone uses the `tracker_id`. Read\n        [here](/latest/trackers/) to learn how to plug\n        tracking into your inference pipeline.\n\n    Attributes:\n        in_count (int): The number of objects that have crossed the line from outside\n            to inside.\n        out_count (int): The number of objects that have crossed the line from inside\n            to outside.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO(<SOURCE_MODEL_PATH>)\n        tracker = sv.ByteTrack()\n        frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n        start, end = sv.Point(x=0, y=1080), sv.Point(x=3840, y=1080)\n        line_zone = sv.LineZone(start=start, end=end)\n\n        for frame in frames_generator:\n            result = model(frame)[0]\n            detections = sv.Detections.from_ultralytics(result)\n            detections = tracker.update_with_detections(detections)\n            crossed_in, crossed_out = line_zone.trigger(detections)\n\n        line_zone.in_count, line_zone.out_count\n        # 7, 2\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    def __init__(\n        self,\n        start: Point,\n        end: Point,\n        triggering_anchors: Iterable[Position] = (\n            Position.TOP_LEFT,\n            Position.TOP_RIGHT,\n            Position.BOTTOM_LEFT,\n            Position.BOTTOM_RIGHT,\n        ),\n    ):\n        \"\"\"\n        Args:\n            start (Point): The starting point of the line.\n            end (Point): The ending point of the line.\n            triggering_anchors (List[sv.Position]): A list of positions\n                specifying which anchors of the detections bounding box\n                to consider when deciding on whether the detection\n                has passed the line counter or not. By default, this\n                contains the four corners of the detection's bounding box\n        \"\"\"\n        self.vector = Vector(start=start, end=end)\n        self.limits = self.calculate_region_of_interest_limits(vector=self.vector)\n        self.tracker_state: Dict[str, bool] = {}\n        self.in_count: int = 0\n        self.out_count: int = 0\n        self.triggering_anchors = triggering_anchors\n        if not list(self.triggering_anchors):\n            raise ValueError(\"Triggering anchors cannot be empty.\")\n\n    @staticmethod\n    def calculate_region_of_interest_limits(vector: Vector) -> Tuple[Vector, Vector]:\n        magnitude = vector.magnitude\n\n        if magnitude == 0:\n            raise ValueError(\"The magnitude of the vector cannot be zero.\")\n\n        delta_x = vector.end.x - vector.start.x\n        delta_y = vector.end.y - vector.start.y\n\n        unit_vector_x = delta_x / magnitude\n        unit_vector_y = delta_y / magnitude\n\n        perpendicular_vector_x = -unit_vector_y\n        perpendicular_vector_y = unit_vector_x\n\n        start_region_limit = Vector(\n            start=vector.start,\n            end=Point(\n                x=vector.start.x + perpendicular_vector_x,\n                y=vector.start.y + perpendicular_vector_y,\n            ),\n        )\n        end_region_limit = Vector(\n            start=vector.end,\n            end=Point(\n                x=vector.end.x - perpendicular_vector_x,\n                y=vector.end.y - perpendicular_vector_y,\n            ),\n        )\n        return start_region_limit, end_region_limit\n\n    @staticmethod\n    def is_point_in_limits(point: Point, limits: Tuple[Vector, Vector]) -> bool:\n        cross_product_1 = limits[0].cross_product(point)\n        cross_product_2 = limits[1].cross_product(point)\n        return (cross_product_1 > 0) == (cross_product_2 > 0)\n\n    def trigger(self, detections: Detections) -> Tuple[np.ndarray, np.ndarray]:\n        \"\"\"\n        Update the `in_count` and `out_count` based on the objects that cross the line.\n\n        Args:\n            detections (Detections): A list of detections for which to update the\n                counts.\n\n        Returns:\n            A tuple of two boolean NumPy arrays. The first array indicates which\n                detections have crossed the line from outside to inside. The second\n                array indicates which detections have crossed the line from inside to\n                outside.\n        \"\"\"\n        crossed_in = np.full(len(detections), False)\n        crossed_out = np.full(len(detections), False)\n\n        if len(detections) == 0:\n            return crossed_in, crossed_out\n\n        if detections.tracker_id is None:\n            warnings.warn(\n                \"Line zone counting skipped. LineZone requires tracker_id. Refer to \"\n                \"https://supervision.roboflow.com/latest/trackers for more \"\n                \"information.\",\n                category=SupervisionWarnings,\n            )\n            return crossed_in, crossed_out\n\n        all_anchors = np.array(\n            [\n                detections.get_anchors_coordinates(anchor)\n                for anchor in self.triggering_anchors\n            ]\n        )\n\n        cross_products_1 = cross_product(all_anchors, self.limits[0])\n        cross_products_2 = cross_product(all_anchors, self.limits[1])\n        in_limits = (cross_products_1 > 0) == (cross_products_2 > 0)\n        in_limits = np.all(in_limits, axis=0)\n\n        triggers = cross_product(all_anchors, self.vector) < 0\n        has_any_left_trigger = np.any(triggers, axis=0)\n        has_any_right_trigger = np.any(~triggers, axis=0)\n        is_uniformly_triggered = ~(has_any_left_trigger & has_any_right_trigger)\n        for i, tracker_id in enumerate(detections.tracker_id):\n            if not in_limits[i]:\n                continue\n\n            if not is_uniformly_triggered[i]:\n                continue\n\n            tracker_state = has_any_left_trigger[i]\n            if tracker_id not in self.tracker_state:\n                self.tracker_state[tracker_id] = tracker_state\n                continue\n\n            if self.tracker_state.get(tracker_id) == tracker_state:\n                continue\n\n            self.tracker_state[tracker_id] = tracker_state\n            if tracker_state:\n                self.in_count += 1\n                crossed_in[i] = True\n            else:\n                self.out_count += 1\n                crossed_out[i] = True\n\n        return crossed_in, crossed_out\n
LineZoneAnnotator Source code in supervision/detection/line_zone.py
class LineZoneAnnotator:\n    def __init__(\n        self,\n        thickness: float = 2,\n        color: Color = Color.WHITE,\n        text_thickness: float = 2,\n        text_color: Color = Color.BLACK,\n        text_scale: float = 0.5,\n        text_offset: float = 1.5,\n        text_padding: int = 10,\n        custom_in_text: Optional[str] = None,\n        custom_out_text: Optional[str] = None,\n        display_in_count: bool = True,\n        display_out_count: bool = True,\n    ):\n        \"\"\"\n        Initialize the LineCounterAnnotator object with default values.\n\n        Attributes:\n            thickness (float): The thickness of the line that will be drawn.\n            color (Color): The color of the line that will be drawn.\n            text_thickness (float): The thickness of the text that will be drawn.\n            text_color (Color): The color of the text that will be drawn.\n            text_scale (float): The scale of the text that will be drawn.\n            text_offset (float): The offset of the text that will be drawn.\n            text_padding (int): The padding of the text that will be drawn.\n            display_in_count (bool): Whether to display the in count or not.\n            display_out_count (bool): Whether to display the out count or not.\n\n        \"\"\"\n        self.thickness: float = thickness\n        self.color: Color = color\n        self.text_thickness: float = text_thickness\n        self.text_color: Color = text_color\n        self.text_scale: float = text_scale\n        self.text_offset: float = text_offset\n        self.text_padding: int = text_padding\n        self.custom_in_text: str = custom_in_text\n        self.custom_out_text: str = custom_out_text\n        self.display_in_count: bool = display_in_count\n        self.display_out_count: bool = display_out_count\n\n    def _annotate_count(\n        self,\n        frame: np.ndarray,\n        center_text_anchor: Point,\n        text: str,\n        is_in_count: bool,\n    ) -> None:\n        \"\"\"This method is drawing the text on the frame.\n\n        Args:\n            frame (np.ndarray): The image on which the text will be drawn.\n            center_text_anchor: The center point that the text will be drawn.\n            text (str): The text that will be drawn.\n            is_in_count (bool): Whether to display the in count or out count.\n        \"\"\"\n        _, text_height = cv2.getTextSize(\n            text, cv2.FONT_HERSHEY_SIMPLEX, self.text_scale, self.text_thickness\n        )[0]\n\n        if is_in_count:\n            center_text_anchor.y -= int(self.text_offset * text_height)\n        else:\n            center_text_anchor.y += int(self.text_offset * text_height)\n\n        draw_text(\n            scene=frame,\n            text=text,\n            text_anchor=center_text_anchor,\n            text_color=self.text_color,\n            text_scale=self.text_scale,\n            text_thickness=self.text_thickness,\n            text_padding=self.text_padding,\n            background_color=self.color,\n        )\n\n    def annotate(self, frame: np.ndarray, line_counter: LineZone) -> np.ndarray:\n        \"\"\"\n        Draws the line on the frame using the line_counter provided.\n\n        Attributes:\n            frame (np.ndarray): The image on which the line will be drawn.\n            line_counter (LineCounter): The line counter\n                that will be used to draw the line.\n\n        Returns:\n            np.ndarray: The image with the line drawn on it.\n\n        \"\"\"\n        cv2.line(\n            frame,\n            line_counter.vector.start.as_xy_int_tuple(),\n            line_counter.vector.end.as_xy_int_tuple(),\n            self.color.as_bgr(),\n            self.thickness,\n            lineType=cv2.LINE_AA,\n            shift=0,\n        )\n        cv2.circle(\n            frame,\n            line_counter.vector.start.as_xy_int_tuple(),\n            radius=5,\n            color=self.text_color.as_bgr(),\n            thickness=-1,\n            lineType=cv2.LINE_AA,\n        )\n        cv2.circle(\n            frame,\n            line_counter.vector.end.as_xy_int_tuple(),\n            radius=5,\n            color=self.text_color.as_bgr(),\n            thickness=-1,\n            lineType=cv2.LINE_AA,\n        )\n\n        text_anchor = Vector(\n            start=line_counter.vector.start, end=line_counter.vector.end\n        )\n\n        if self.display_in_count:\n            in_text = (\n                f\"{self.custom_in_text}: {line_counter.in_count}\"\n                if self.custom_in_text is not None\n                else f\"in: {line_counter.in_count}\"\n            )\n            self._annotate_count(\n                frame=frame,\n                center_text_anchor=text_anchor.center,\n                text=in_text,\n                is_in_count=True,\n            )\n\n        if self.display_out_count:\n            out_text = (\n                f\"{self.custom_out_text}: {line_counter.out_count}\"\n                if self.custom_out_text is not None\n                else f\"out: {line_counter.out_count}\"\n            )\n            self._annotate_count(\n                frame=frame,\n                center_text_anchor=text_anchor.center,\n                text=out_text,\n                is_in_count=False,\n            )\n        return frame\n
"},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZone-functions","title":"Functions","text":""},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZone.__init__","title":"__init__(start, end, triggering_anchors=(Position.TOP_LEFT, Position.TOP_RIGHT, Position.BOTTOM_LEFT, Position.BOTTOM_RIGHT))","text":"

Parameters:

Name Type Description Default start Point

The starting point of the line.

required end Point

The ending point of the line.

required triggering_anchors List[Position]

A list of positions specifying which anchors of the detections bounding box to consider when deciding on whether the detection has passed the line counter or not. By default, this contains the four corners of the detection's bounding box

(TOP_LEFT, TOP_RIGHT, BOTTOM_LEFT, BOTTOM_RIGHT) Source code in supervision/detection/line_zone.py
def __init__(\n    self,\n    start: Point,\n    end: Point,\n    triggering_anchors: Iterable[Position] = (\n        Position.TOP_LEFT,\n        Position.TOP_RIGHT,\n        Position.BOTTOM_LEFT,\n        Position.BOTTOM_RIGHT,\n    ),\n):\n    \"\"\"\n    Args:\n        start (Point): The starting point of the line.\n        end (Point): The ending point of the line.\n        triggering_anchors (List[sv.Position]): A list of positions\n            specifying which anchors of the detections bounding box\n            to consider when deciding on whether the detection\n            has passed the line counter or not. By default, this\n            contains the four corners of the detection's bounding box\n    \"\"\"\n    self.vector = Vector(start=start, end=end)\n    self.limits = self.calculate_region_of_interest_limits(vector=self.vector)\n    self.tracker_state: Dict[str, bool] = {}\n    self.in_count: int = 0\n    self.out_count: int = 0\n    self.triggering_anchors = triggering_anchors\n    if not list(self.triggering_anchors):\n        raise ValueError(\"Triggering anchors cannot be empty.\")\n
"},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZone.trigger","title":"trigger(detections)","text":"

Update the in_count and out_count based on the objects that cross the line.

Parameters:

Name Type Description Default detections Detections

A list of detections for which to update the counts.

required

Returns:

Type Description Tuple[ndarray, ndarray]

A tuple of two boolean NumPy arrays. The first array indicates which detections have crossed the line from outside to inside. The second array indicates which detections have crossed the line from inside to outside.

Source code in supervision/detection/line_zone.py
def trigger(self, detections: Detections) -> Tuple[np.ndarray, np.ndarray]:\n    \"\"\"\n    Update the `in_count` and `out_count` based on the objects that cross the line.\n\n    Args:\n        detections (Detections): A list of detections for which to update the\n            counts.\n\n    Returns:\n        A tuple of two boolean NumPy arrays. The first array indicates which\n            detections have crossed the line from outside to inside. The second\n            array indicates which detections have crossed the line from inside to\n            outside.\n    \"\"\"\n    crossed_in = np.full(len(detections), False)\n    crossed_out = np.full(len(detections), False)\n\n    if len(detections) == 0:\n        return crossed_in, crossed_out\n\n    if detections.tracker_id is None:\n        warnings.warn(\n            \"Line zone counting skipped. LineZone requires tracker_id. Refer to \"\n            \"https://supervision.roboflow.com/latest/trackers for more \"\n            \"information.\",\n            category=SupervisionWarnings,\n        )\n        return crossed_in, crossed_out\n\n    all_anchors = np.array(\n        [\n            detections.get_anchors_coordinates(anchor)\n            for anchor in self.triggering_anchors\n        ]\n    )\n\n    cross_products_1 = cross_product(all_anchors, self.limits[0])\n    cross_products_2 = cross_product(all_anchors, self.limits[1])\n    in_limits = (cross_products_1 > 0) == (cross_products_2 > 0)\n    in_limits = np.all(in_limits, axis=0)\n\n    triggers = cross_product(all_anchors, self.vector) < 0\n    has_any_left_trigger = np.any(triggers, axis=0)\n    has_any_right_trigger = np.any(~triggers, axis=0)\n    is_uniformly_triggered = ~(has_any_left_trigger & has_any_right_trigger)\n    for i, tracker_id in enumerate(detections.tracker_id):\n        if not in_limits[i]:\n            continue\n\n        if not is_uniformly_triggered[i]:\n            continue\n\n        tracker_state = has_any_left_trigger[i]\n        if tracker_id not in self.tracker_state:\n            self.tracker_state[tracker_id] = tracker_state\n            continue\n\n        if self.tracker_state.get(tracker_id) == tracker_state:\n            continue\n\n        self.tracker_state[tracker_id] = tracker_state\n        if tracker_state:\n            self.in_count += 1\n            crossed_in[i] = True\n        else:\n            self.out_count += 1\n            crossed_out[i] = True\n\n    return crossed_in, crossed_out\n
"},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZoneAnnotator-functions","title":"Functions","text":""},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZoneAnnotator.__init__","title":"__init__(thickness=2, color=Color.WHITE, text_thickness=2, text_color=Color.BLACK, text_scale=0.5, text_offset=1.5, text_padding=10, custom_in_text=None, custom_out_text=None, display_in_count=True, display_out_count=True)","text":"

Initialize the LineCounterAnnotator object with default values.

Attributes:

Name Type Description thickness float

The thickness of the line that will be drawn.

color Color

The color of the line that will be drawn.

text_thickness float

The thickness of the text that will be drawn.

text_color Color

The color of the text that will be drawn.

text_scale float

The scale of the text that will be drawn.

text_offset float

The offset of the text that will be drawn.

text_padding int

The padding of the text that will be drawn.

display_in_count bool

Whether to display the in count or not.

display_out_count bool

Whether to display the out count or not.

Source code in supervision/detection/line_zone.py
def __init__(\n    self,\n    thickness: float = 2,\n    color: Color = Color.WHITE,\n    text_thickness: float = 2,\n    text_color: Color = Color.BLACK,\n    text_scale: float = 0.5,\n    text_offset: float = 1.5,\n    text_padding: int = 10,\n    custom_in_text: Optional[str] = None,\n    custom_out_text: Optional[str] = None,\n    display_in_count: bool = True,\n    display_out_count: bool = True,\n):\n    \"\"\"\n    Initialize the LineCounterAnnotator object with default values.\n\n    Attributes:\n        thickness (float): The thickness of the line that will be drawn.\n        color (Color): The color of the line that will be drawn.\n        text_thickness (float): The thickness of the text that will be drawn.\n        text_color (Color): The color of the text that will be drawn.\n        text_scale (float): The scale of the text that will be drawn.\n        text_offset (float): The offset of the text that will be drawn.\n        text_padding (int): The padding of the text that will be drawn.\n        display_in_count (bool): Whether to display the in count or not.\n        display_out_count (bool): Whether to display the out count or not.\n\n    \"\"\"\n    self.thickness: float = thickness\n    self.color: Color = color\n    self.text_thickness: float = text_thickness\n    self.text_color: Color = text_color\n    self.text_scale: float = text_scale\n    self.text_offset: float = text_offset\n    self.text_padding: int = text_padding\n    self.custom_in_text: str = custom_in_text\n    self.custom_out_text: str = custom_out_text\n    self.display_in_count: bool = display_in_count\n    self.display_out_count: bool = display_out_count\n
"},{"location":"detection/tools/line_zone/#supervision.detection.line_zone.LineZoneAnnotator.annotate","title":"annotate(frame, line_counter)","text":"

Draws the line on the frame using the line_counter provided.

Attributes:

Name Type Description frame ndarray

The image on which the line will be drawn.

line_counter LineCounter

The line counter that will be used to draw the line.

Returns:

Type Description ndarray

np.ndarray: The image with the line drawn on it.

Source code in supervision/detection/line_zone.py
def annotate(self, frame: np.ndarray, line_counter: LineZone) -> np.ndarray:\n    \"\"\"\n    Draws the line on the frame using the line_counter provided.\n\n    Attributes:\n        frame (np.ndarray): The image on which the line will be drawn.\n        line_counter (LineCounter): The line counter\n            that will be used to draw the line.\n\n    Returns:\n        np.ndarray: The image with the line drawn on it.\n\n    \"\"\"\n    cv2.line(\n        frame,\n        line_counter.vector.start.as_xy_int_tuple(),\n        line_counter.vector.end.as_xy_int_tuple(),\n        self.color.as_bgr(),\n        self.thickness,\n        lineType=cv2.LINE_AA,\n        shift=0,\n    )\n    cv2.circle(\n        frame,\n        line_counter.vector.start.as_xy_int_tuple(),\n        radius=5,\n        color=self.text_color.as_bgr(),\n        thickness=-1,\n        lineType=cv2.LINE_AA,\n    )\n    cv2.circle(\n        frame,\n        line_counter.vector.end.as_xy_int_tuple(),\n        radius=5,\n        color=self.text_color.as_bgr(),\n        thickness=-1,\n        lineType=cv2.LINE_AA,\n    )\n\n    text_anchor = Vector(\n        start=line_counter.vector.start, end=line_counter.vector.end\n    )\n\n    if self.display_in_count:\n        in_text = (\n            f\"{self.custom_in_text}: {line_counter.in_count}\"\n            if self.custom_in_text is not None\n            else f\"in: {line_counter.in_count}\"\n        )\n        self._annotate_count(\n            frame=frame,\n            center_text_anchor=text_anchor.center,\n            text=in_text,\n            is_in_count=True,\n        )\n\n    if self.display_out_count:\n        out_text = (\n            f\"{self.custom_out_text}: {line_counter.out_count}\"\n            if self.custom_out_text is not None\n            else f\"out: {line_counter.out_count}\"\n        )\n        self._annotate_count(\n            frame=frame,\n            center_text_anchor=text_anchor.center,\n            text=out_text,\n            is_in_count=False,\n        )\n    return frame\n
"},{"location":"detection/tools/polygon_zone/","title":"Polygon Zone","text":"PolygonZone

A class for defining a polygon-shaped zone within a frame for detecting objects.

Attributes:

Name Type Description polygon ndarray

A polygon represented by a numpy array of shape (N, 2), containing the x, y coordinates of the points.

triggering_anchors Iterable[Position]

A list of positions specifying which anchors of the detections bounding box to consider when deciding on whether the detection fits within the PolygonZone (default: (sv.Position.BOTTOM_CENTER,)).

current_count int

The current count of detected objects within the zone

mask ndarray

The 2D bool mask for the polygon zone

Source code in supervision/detection/tools/polygon_zone.py
class PolygonZone:\n    \"\"\"\n    A class for defining a polygon-shaped zone within a frame for detecting objects.\n\n    Attributes:\n        polygon (np.ndarray): A polygon represented by a numpy array of shape\n            `(N, 2)`, containing the `x`, `y` coordinates of the points.\n        triggering_anchors (Iterable[sv.Position]): A list of positions specifying\n            which anchors of the detections bounding box to consider when deciding on\n            whether the detection fits within the PolygonZone\n            (default: (sv.Position.BOTTOM_CENTER,)).\n        current_count (int): The current count of detected objects within the zone\n        mask (np.ndarray): The 2D bool mask for the polygon zone\n    \"\"\"\n\n    @deprecated_parameter(\n        old_parameter=\"triggering_position\",\n        new_parameter=\"triggering_anchors\",\n        map_function=lambda x: [x],\n        warning_message=\"`{old_parameter}` in `{function_name}` is deprecated and will \"\n        \"be remove in `supervision-0.23.0`. Use '{new_parameter}' \"\n        \"instead.\",\n    )\n    def __init__(\n        self,\n        polygon: npt.NDArray[np.int64],\n        frame_resolution_wh: Optional[Tuple[int, int]] = None,\n        triggering_anchors: Iterable[Position] = (Position.BOTTOM_CENTER,),\n    ):\n        if frame_resolution_wh is not None:\n            warnings.warn(\n                \"The `frame_resolution_wh` parameter is no longer required and will be \"\n                \"dropped in version supervision-0.24.0. The mask resolution is now \"\n                \"calculated automatically based on the polygon coordinates.\",\n                category=SupervisionWarnings,\n            )\n\n        self.polygon = polygon.astype(int)\n        self.triggering_anchors = triggering_anchors\n        if not list(self.triggering_anchors):\n            raise ValueError(\"Triggering anchors cannot be empty.\")\n\n        self.current_count = 0\n\n        x_max, y_max = np.max(polygon, axis=0)\n        self.frame_resolution_wh = (x_max + 1, y_max + 1)\n        self.mask = polygon_to_mask(\n            polygon=polygon, resolution_wh=(x_max + 2, y_max + 2)\n        )\n\n    def trigger(self, detections: Detections) -> npt.NDArray[np.bool_]:\n        \"\"\"\n        Determines if the detections are within the polygon zone.\n\n        Parameters:\n            detections (Detections): The detections\n                to be checked against the polygon zone\n\n        Returns:\n            np.ndarray: A boolean numpy array indicating\n                if each detection is within the polygon zone\n        \"\"\"\n\n        clipped_xyxy = clip_boxes(\n            xyxy=detections.xyxy, resolution_wh=self.frame_resolution_wh\n        )\n        clipped_detections = replace(detections, xyxy=clipped_xyxy)\n        all_clipped_anchors = np.array(\n            [\n                np.ceil(clipped_detections.get_anchors_coordinates(anchor)).astype(int)\n                for anchor in self.triggering_anchors\n            ]\n        )\n\n        is_in_zone: npt.NDArray[np.bool_] = (\n            self.mask[all_clipped_anchors[:, :, 1], all_clipped_anchors[:, :, 0]]\n            .transpose()\n            .astype(bool)\n        )\n\n        is_in_zone: npt.NDArray[np.bool_] = np.all(is_in_zone, axis=1)\n        self.current_count = int(np.sum(is_in_zone))\n        return is_in_zone.astype(bool)\n
PolygonZoneAnnotator

A class for annotating a polygon-shaped zone within a frame with a count of detected objects.

Attributes:

Name Type Description zone PolygonZone

The polygon zone to be annotated

color Color

The color to draw the polygon lines

thickness int

The thickness of the polygon lines, default is 2

text_color Color

The color of the text on the polygon, default is black

text_scale float

The scale of the text on the polygon, default is 0.5

text_thickness int

The thickness of the text on the polygon, default is 1

text_padding int

The padding around the text on the polygon, default is 10

font int

The font type for the text on the polygon, default is cv2.FONT_HERSHEY_SIMPLEX

center Tuple[int, int]

The center of the polygon for text placement

display_in_zone_count bool

Show the label of the zone or not. Default is True

Source code in supervision/detection/tools/polygon_zone.py
class PolygonZoneAnnotator:\n    \"\"\"\n    A class for annotating a polygon-shaped zone within a\n        frame with a count of detected objects.\n\n    Attributes:\n        zone (PolygonZone): The polygon zone to be annotated\n        color (Color): The color to draw the polygon lines\n        thickness (int): The thickness of the polygon lines, default is 2\n        text_color (Color): The color of the text on the polygon, default is black\n        text_scale (float): The scale of the text on the polygon, default is 0.5\n        text_thickness (int): The thickness of the text on the polygon, default is 1\n        text_padding (int): The padding around the text on the polygon, default is 10\n        font (int): The font type for the text on the polygon,\n            default is cv2.FONT_HERSHEY_SIMPLEX\n        center (Tuple[int, int]): The center of the polygon for text placement\n        display_in_zone_count (bool): Show the label of the zone or not. Default is True\n    \"\"\"\n\n    def __init__(\n        self,\n        zone: PolygonZone,\n        color: Color,\n        thickness: int = 2,\n        text_color: Color = Color.BLACK,\n        text_scale: float = 0.5,\n        text_thickness: int = 1,\n        text_padding: int = 10,\n        display_in_zone_count: bool = True,\n    ):\n        self.zone = zone\n        self.color = color\n        self.thickness = thickness\n        self.text_color = text_color\n        self.text_scale = text_scale\n        self.text_thickness = text_thickness\n        self.text_padding = text_padding\n        self.font = cv2.FONT_HERSHEY_SIMPLEX\n        self.center = get_polygon_center(polygon=zone.polygon)\n        self.display_in_zone_count = display_in_zone_count\n\n    def annotate(self, scene: np.ndarray, label: Optional[str] = None) -> np.ndarray:\n        \"\"\"\n        Annotates the polygon zone within a frame with a count of detected objects.\n\n        Parameters:\n            scene (np.ndarray): The image on which the polygon zone will be annotated\n            label (Optional[str]): An optional label for the count of detected objects\n                within the polygon zone (default: None)\n\n        Returns:\n            np.ndarray: The image with the polygon zone and count of detected objects\n        \"\"\"\n        annotated_frame = draw_polygon(\n            scene=scene,\n            polygon=self.zone.polygon,\n            color=self.color,\n            thickness=self.thickness,\n        )\n\n        if self.display_in_zone_count:\n            annotated_frame = draw_text(\n                scene=annotated_frame,\n                text=str(self.zone.current_count) if label is None else label,\n                text_anchor=self.center,\n                background_color=self.color,\n                text_color=self.text_color,\n                text_scale=self.text_scale,\n                text_thickness=self.text_thickness,\n                text_padding=self.text_padding,\n                text_font=self.font,\n            )\n\n        return annotated_frame\n
"},{"location":"detection/tools/polygon_zone/#supervision.detection.tools.polygon_zone.PolygonZone-functions","title":"Functions","text":""},{"location":"detection/tools/polygon_zone/#supervision.detection.tools.polygon_zone.PolygonZone.trigger","title":"trigger(detections)","text":"

Determines if the detections are within the polygon zone.

Parameters:

Name Type Description Default detections Detections

The detections to be checked against the polygon zone

required

Returns:

Type Description NDArray[bool_]

np.ndarray: A boolean numpy array indicating if each detection is within the polygon zone

Source code in supervision/detection/tools/polygon_zone.py
def trigger(self, detections: Detections) -> npt.NDArray[np.bool_]:\n    \"\"\"\n    Determines if the detections are within the polygon zone.\n\n    Parameters:\n        detections (Detections): The detections\n            to be checked against the polygon zone\n\n    Returns:\n        np.ndarray: A boolean numpy array indicating\n            if each detection is within the polygon zone\n    \"\"\"\n\n    clipped_xyxy = clip_boxes(\n        xyxy=detections.xyxy, resolution_wh=self.frame_resolution_wh\n    )\n    clipped_detections = replace(detections, xyxy=clipped_xyxy)\n    all_clipped_anchors = np.array(\n        [\n            np.ceil(clipped_detections.get_anchors_coordinates(anchor)).astype(int)\n            for anchor in self.triggering_anchors\n        ]\n    )\n\n    is_in_zone: npt.NDArray[np.bool_] = (\n        self.mask[all_clipped_anchors[:, :, 1], all_clipped_anchors[:, :, 0]]\n        .transpose()\n        .astype(bool)\n    )\n\n    is_in_zone: npt.NDArray[np.bool_] = np.all(is_in_zone, axis=1)\n    self.current_count = int(np.sum(is_in_zone))\n    return is_in_zone.astype(bool)\n
"},{"location":"detection/tools/polygon_zone/#supervision.detection.tools.polygon_zone.PolygonZoneAnnotator-functions","title":"Functions","text":""},{"location":"detection/tools/polygon_zone/#supervision.detection.tools.polygon_zone.PolygonZoneAnnotator.annotate","title":"annotate(scene, label=None)","text":"

Annotates the polygon zone within a frame with a count of detected objects.

Parameters:

Name Type Description Default scene ndarray

The image on which the polygon zone will be annotated

required label Optional[str]

An optional label for the count of detected objects within the polygon zone (default: None)

None

Returns:

Type Description ndarray

np.ndarray: The image with the polygon zone and count of detected objects

Source code in supervision/detection/tools/polygon_zone.py
def annotate(self, scene: np.ndarray, label: Optional[str] = None) -> np.ndarray:\n    \"\"\"\n    Annotates the polygon zone within a frame with a count of detected objects.\n\n    Parameters:\n        scene (np.ndarray): The image on which the polygon zone will be annotated\n        label (Optional[str]): An optional label for the count of detected objects\n            within the polygon zone (default: None)\n\n    Returns:\n        np.ndarray: The image with the polygon zone and count of detected objects\n    \"\"\"\n    annotated_frame = draw_polygon(\n        scene=scene,\n        polygon=self.zone.polygon,\n        color=self.color,\n        thickness=self.thickness,\n    )\n\n    if self.display_in_zone_count:\n        annotated_frame = draw_text(\n            scene=annotated_frame,\n            text=str(self.zone.current_count) if label is None else label,\n            text_anchor=self.center,\n            background_color=self.color,\n            text_color=self.text_color,\n            text_scale=self.text_scale,\n            text_thickness=self.text_thickness,\n            text_padding=self.text_padding,\n            text_font=self.font,\n        )\n\n    return annotated_frame\n
"},{"location":"detection/tools/save_detections/","title":"Save Detections","text":"CSV Sink

A utility class for saving detection data to a CSV file. This class is designed to efficiently serialize detection objects into a CSV format, allowing for the inclusion of bounding box coordinates and additional attributes like confidence, class_id, and tracker_id.

Tip

CSVSink allow to pass custom data alongside the detection fields, providing flexibility for logging various types of information.

Parameters:

Name Type Description Default file_name str

The name of the CSV file where the detections will be stored. Defaults to 'output.csv'.

'output.csv' Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<SOURCE_MODEL_PATH>)\ncsv_sink = sv.CSVSink(<RESULT_CSV_FILE_PATH>)\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith csv_sink as sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        sink.append(detections, custom_data={'<CUSTOM_LABEL>':'<CUSTOM_DATA>'})\n
Source code in supervision/detection/tools/csv_sink.py
class CSVSink:\n    \"\"\"\n    A utility class for saving detection data to a CSV file. This class is designed to\n    efficiently serialize detection objects into a CSV format, allowing for the\n    inclusion of bounding box coordinates and additional attributes like `confidence`,\n    `class_id`, and `tracker_id`.\n\n    !!! tip\n\n        CSVSink allow to pass custom data alongside the detection fields, providing\n        flexibility for logging various types of information.\n\n    Args:\n        file_name (str): The name of the CSV file where the detections will be stored.\n            Defaults to 'output.csv'.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO(<SOURCE_MODEL_PATH>)\n        csv_sink = sv.CSVSink(<RESULT_CSV_FILE_PATH>)\n        frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\n        with csv_sink as sink:\n            for frame in frames_generator:\n                result = model(frame)[0]\n                detections = sv.Detections.from_ultralytics(result)\n                sink.append(detections, custom_data={'<CUSTOM_LABEL>':'<CUSTOM_DATA>'})\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    def __init__(self, file_name: str = \"output.csv\") -> None:\n        \"\"\"\n        Initialize the CSVSink instance.\n\n        Args:\n            file_name (str): The name of the CSV file.\n\n        Returns:\n            None\n        \"\"\"\n        self.file_name = file_name\n        self.file: Optional[open] = None\n        self.writer: Optional[csv.writer] = None\n        self.header_written = False\n        self.field_names = []\n\n    def __enter__(self) -> CSVSink:\n        self.open()\n        return self\n\n    def __exit__(\n        self,\n        exc_type: Optional[type],\n        exc_val: Optional[Exception],\n        exc_tb: Optional[Any],\n    ) -> None:\n        self.close()\n\n    def open(self) -> None:\n        \"\"\"\n        Open the CSV file for writing.\n\n        Returns:\n            None\n        \"\"\"\n        parent_directory = os.path.dirname(self.file_name)\n        if parent_directory and not os.path.exists(parent_directory):\n            os.makedirs(parent_directory)\n\n        self.file = open(self.file_name, \"w\", newline=\"\")\n        self.writer = csv.writer(self.file)\n\n    def close(self) -> None:\n        \"\"\"\n        Close the CSV file.\n\n        Returns:\n            None\n        \"\"\"\n        if self.file:\n            self.file.close()\n\n    @staticmethod\n    def parse_detection_data(\n        detections: Detections, custom_data: Dict[str, Any] = None\n    ) -> List[Dict[str, Any]]:\n        parsed_rows = []\n        for i in range(len(detections.xyxy)):\n            row = {\n                \"x_min\": detections.xyxy[i][0],\n                \"y_min\": detections.xyxy[i][1],\n                \"x_max\": detections.xyxy[i][2],\n                \"y_max\": detections.xyxy[i][3],\n                \"class_id\": \"\"\n                if detections.class_id is None\n                else str(detections.class_id[i]),\n                \"confidence\": \"\"\n                if detections.confidence is None\n                else str(detections.confidence[i]),\n                \"tracker_id\": \"\"\n                if detections.tracker_id is None\n                else str(detections.tracker_id[i]),\n            }\n\n            if hasattr(detections, \"data\"):\n                for key, value in detections.data.items():\n                    if value.ndim == 0:\n                        row[key] = value\n                    else:\n                        row[key] = value[i]\n\n            if custom_data:\n                row.update(custom_data)\n            parsed_rows.append(row)\n        return parsed_rows\n\n    def append(\n        self, detections: Detections, custom_data: Dict[str, Any] = None\n    ) -> None:\n        \"\"\"\n        Append detection data to the CSV file.\n\n        Args:\n            detections (Detections): The detection data.\n            custom_data (Dict[str, Any]): Custom data to include.\n\n        Returns:\n            None\n        \"\"\"\n        if not self.writer:\n            raise Exception(\n                f\"Cannot append to CSV: The file '{self.file_name}' is not open.\"\n            )\n        field_names = CSVSink.parse_field_names(detections, custom_data)\n        if not self.header_written:\n            self.field_names = field_names\n            self.writer.writerow(field_names)\n            self.header_written = True\n\n        if field_names != self.field_names:\n            print(\n                f\"Field names do not match the header. \"\n                f\"Expected: {self.field_names}, given: {field_names}\"\n            )\n\n        parsed_rows = CSVSink.parse_detection_data(detections, custom_data)\n        for row in parsed_rows:\n            self.writer.writerow(\n                [row.get(field_name, \"\") for field_name in self.field_names]\n            )\n\n    @staticmethod\n    def parse_field_names(\n        detections: Detections, custom_data: Dict[str, Any]\n    ) -> List[str]:\n        dynamic_header = sorted(\n            set(custom_data.keys()) | set(getattr(detections, \"data\", {}).keys())\n        )\n        return BASE_HEADER + dynamic_header\n
JSON Sink

A utility class for saving detection data to a JSON file. This class is designed to efficiently serialize detection objects into a JSON format, allowing for the inclusion of bounding box coordinates and additional attributes like confidence, class_id, and tracker_id.

Tip

JSONsink allow to pass custom data alongside the detection fields, providing flexibility for logging various types of information.

Parameters:

Name Type Description Default file_name str

The name of the JSON file where the detections will be stored. Defaults to 'output.json'.

'output.json' Example
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(<SOURCE_MODEL_PATH>)\njson_sink = sv.JSONSink(<RESULT_JSON_FILE_PATH>)\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith json_sink as sink:\n    for frame in frames_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        sink.append(detections, custom_data={'<CUSTOM_LABEL>':'<CUSTOM_DATA>'})\n
Source code in supervision/detection/tools/json_sink.py
class JSONSink:\n    \"\"\"\n    A utility class for saving detection data to a JSON file. This class is designed to\n    efficiently serialize detection objects into a JSON format, allowing for the\n    inclusion of bounding box coordinates and additional attributes like `confidence`,\n    `class_id`, and `tracker_id`.\n\n    !!! tip\n\n        JSONsink allow to pass custom data alongside the detection fields, providing\n        flexibility for logging various types of information.\n\n    Args:\n        file_name (str): The name of the JSON file where the detections will be stored.\n            Defaults to 'output.json'.\n\n    Example:\n        ```python\n        import supervision as sv\n        from ultralytics import YOLO\n\n        model = YOLO(<SOURCE_MODEL_PATH>)\n        json_sink = sv.JSONSink(<RESULT_JSON_FILE_PATH>)\n        frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\n        with json_sink as sink:\n            for frame in frames_generator:\n                result = model(frame)[0]\n                detections = sv.Detections.from_ultralytics(result)\n                sink.append(detections, custom_data={'<CUSTOM_LABEL>':'<CUSTOM_DATA>'})\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    def __init__(self, file_name: str = \"output.json\") -> None:\n        \"\"\"\n        Initialize the JSONSink instance.\n\n        Args:\n            file_name (str): The name of the JSON file.\n\n        Returns:\n            None\n        \"\"\"\n        self.file_name = file_name\n        self.file: Optional[open] = None\n        self.data: List[Dict[str, Any]] = []\n\n    def __enter__(self) -> JSONSink:\n        self.open()\n        return self\n\n    def __exit__(\n        self,\n        exc_type: Optional[type],\n        exc_val: Optional[Exception],\n        exc_tb: Optional[Any],\n    ) -> None:\n        self.write_and_close()\n\n    def open(self) -> None:\n        \"\"\"\n        Open the JSON file for writing.\n\n        Returns:\n            None\n        \"\"\"\n        parent_directory = os.path.dirname(self.file_name)\n        if parent_directory and not os.path.exists(parent_directory):\n            os.makedirs(parent_directory)\n\n        self.file = open(self.file_name, \"w\")\n\n    def write_and_close(self) -> None:\n        \"\"\"\n        Write and close the JSON file.\n\n        Returns:\n            None\n        \"\"\"\n        if self.file:\n            json.dump(self.data, self.file, indent=4)\n            self.file.close()\n\n    @staticmethod\n    def parse_detection_data(\n        detections: Detections, custom_data: Dict[str, Any] = None\n    ) -> List[Dict[str, Any]]:\n        parsed_rows = []\n        for i in range(len(detections.xyxy)):\n            row = {\n                \"x_min\": float(detections.xyxy[i][0]),\n                \"y_min\": float(detections.xyxy[i][1]),\n                \"x_max\": float(detections.xyxy[i][2]),\n                \"y_max\": float(detections.xyxy[i][3]),\n                \"class_id\": \"\"\n                if detections.class_id is None\n                else int(detections.class_id[i]),\n                \"confidence\": \"\"\n                if detections.confidence is None\n                else float(detections.confidence[i]),\n                \"tracker_id\": \"\"\n                if detections.tracker_id is None\n                else int(detections.tracker_id[i]),\n            }\n\n            if hasattr(detections, \"data\"):\n                for key, value in detections.data.items():\n                    row[key] = (\n                        str(value[i])\n                        if hasattr(value, \"__getitem__\") and value.ndim != 0\n                        else str(value)\n                    )\n\n            if custom_data:\n                row.update(custom_data)\n            parsed_rows.append(row)\n        return parsed_rows\n\n    def append(\n        self, detections: Detections, custom_data: Dict[str, Any] = None\n    ) -> None:\n        \"\"\"\n        Append detection data to the JSON file.\n\n        Args:\n            detections (Detections): The detection data.\n            custom_data (Dict[str, Any]): Custom data to include.\n\n        Returns:\n            None\n        \"\"\"\n        parsed_rows = JSONSink.parse_detection_data(detections, custom_data)\n        self.data.extend(parsed_rows)\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink-functions","title":"Functions","text":""},{"location":"detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink.__init__","title":"__init__(file_name='output.csv')","text":"

Initialize the CSVSink instance.

Parameters:

Name Type Description Default file_name str

The name of the CSV file.

'output.csv'

Returns:

Type Description None

None

Source code in supervision/detection/tools/csv_sink.py
def __init__(self, file_name: str = \"output.csv\") -> None:\n    \"\"\"\n    Initialize the CSVSink instance.\n\n    Args:\n        file_name (str): The name of the CSV file.\n\n    Returns:\n        None\n    \"\"\"\n    self.file_name = file_name\n    self.file: Optional[open] = None\n    self.writer: Optional[csv.writer] = None\n    self.header_written = False\n    self.field_names = []\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink.append","title":"append(detections, custom_data=None)","text":"

Append detection data to the CSV file.

Parameters:

Name Type Description Default detections Detections

The detection data.

required custom_data Dict[str, Any]

Custom data to include.

None

Returns:

Type Description None

None

Source code in supervision/detection/tools/csv_sink.py
def append(\n    self, detections: Detections, custom_data: Dict[str, Any] = None\n) -> None:\n    \"\"\"\n    Append detection data to the CSV file.\n\n    Args:\n        detections (Detections): The detection data.\n        custom_data (Dict[str, Any]): Custom data to include.\n\n    Returns:\n        None\n    \"\"\"\n    if not self.writer:\n        raise Exception(\n            f\"Cannot append to CSV: The file '{self.file_name}' is not open.\"\n        )\n    field_names = CSVSink.parse_field_names(detections, custom_data)\n    if not self.header_written:\n        self.field_names = field_names\n        self.writer.writerow(field_names)\n        self.header_written = True\n\n    if field_names != self.field_names:\n        print(\n            f\"Field names do not match the header. \"\n            f\"Expected: {self.field_names}, given: {field_names}\"\n        )\n\n    parsed_rows = CSVSink.parse_detection_data(detections, custom_data)\n    for row in parsed_rows:\n        self.writer.writerow(\n            [row.get(field_name, \"\") for field_name in self.field_names]\n        )\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink.close","title":"close()","text":"

Close the CSV file.

Returns:

Type Description None

None

Source code in supervision/detection/tools/csv_sink.py
def close(self) -> None:\n    \"\"\"\n    Close the CSV file.\n\n    Returns:\n        None\n    \"\"\"\n    if self.file:\n        self.file.close()\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink.open","title":"open()","text":"

Open the CSV file for writing.

Returns:

Type Description None

None

Source code in supervision/detection/tools/csv_sink.py
def open(self) -> None:\n    \"\"\"\n    Open the CSV file for writing.\n\n    Returns:\n        None\n    \"\"\"\n    parent_directory = os.path.dirname(self.file_name)\n    if parent_directory and not os.path.exists(parent_directory):\n        os.makedirs(parent_directory)\n\n    self.file = open(self.file_name, \"w\", newline=\"\")\n    self.writer = csv.writer(self.file)\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.json_sink.JSONSink-functions","title":"Functions","text":""},{"location":"detection/tools/save_detections/#supervision.detection.tools.json_sink.JSONSink.__init__","title":"__init__(file_name='output.json')","text":"

Initialize the JSONSink instance.

Parameters:

Name Type Description Default file_name str

The name of the JSON file.

'output.json'

Returns:

Type Description None

None

Source code in supervision/detection/tools/json_sink.py
def __init__(self, file_name: str = \"output.json\") -> None:\n    \"\"\"\n    Initialize the JSONSink instance.\n\n    Args:\n        file_name (str): The name of the JSON file.\n\n    Returns:\n        None\n    \"\"\"\n    self.file_name = file_name\n    self.file: Optional[open] = None\n    self.data: List[Dict[str, Any]] = []\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.json_sink.JSONSink.append","title":"append(detections, custom_data=None)","text":"

Append detection data to the JSON file.

Parameters:

Name Type Description Default detections Detections

The detection data.

required custom_data Dict[str, Any]

Custom data to include.

None

Returns:

Type Description None

None

Source code in supervision/detection/tools/json_sink.py
def append(\n    self, detections: Detections, custom_data: Dict[str, Any] = None\n) -> None:\n    \"\"\"\n    Append detection data to the JSON file.\n\n    Args:\n        detections (Detections): The detection data.\n        custom_data (Dict[str, Any]): Custom data to include.\n\n    Returns:\n        None\n    \"\"\"\n    parsed_rows = JSONSink.parse_detection_data(detections, custom_data)\n    self.data.extend(parsed_rows)\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.json_sink.JSONSink.open","title":"open()","text":"

Open the JSON file for writing.

Returns:

Type Description None

None

Source code in supervision/detection/tools/json_sink.py
def open(self) -> None:\n    \"\"\"\n    Open the JSON file for writing.\n\n    Returns:\n        None\n    \"\"\"\n    parent_directory = os.path.dirname(self.file_name)\n    if parent_directory and not os.path.exists(parent_directory):\n        os.makedirs(parent_directory)\n\n    self.file = open(self.file_name, \"w\")\n
"},{"location":"detection/tools/save_detections/#supervision.detection.tools.json_sink.JSONSink.write_and_close","title":"write_and_close()","text":"

Write and close the JSON file.

Returns:

Type Description None

None

Source code in supervision/detection/tools/json_sink.py
def write_and_close(self) -> None:\n    \"\"\"\n    Write and close the JSON file.\n\n    Returns:\n        None\n    \"\"\"\n    if self.file:\n        json.dump(self.data, self.file, indent=4)\n        self.file.close()\n
"},{"location":"detection/tools/smoother/","title":"Detection Smoother","text":"

A utility class for smoothing detections over multiple frames in video tracking. It maintains a history of detections for each track and provides smoothed predictions based on these histories.

Warning

  • DetectionsSmoother requires the tracker_id for each detection. Refer to Roboflow Trackers for information on integrating tracking into your inference pipeline.
  • This class is not compatible with segmentation models.
Example
import supervision as sv\n\nfrom ultralytics import YOLO\n\nvideo_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_FILE_PATH>)\nframe_generator = sv.get_video_frames_generator(source_path=<SOURCE_FILE_PATH>)\n\nmodel = YOLO(<MODEL_PATH>)\ntracker = sv.ByteTrack(frame_rate=video_info.fps)\nsmoother = sv.DetectionsSmoother()\n\nannotator = sv.BoundingBoxAnnotator()\n\nwith sv.VideoSink(<TARGET_FILE_PATH>, video_info=video_info) as sink:\n    for frame in frame_generator:\n        result = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(result)\n        detections = tracker.update_with_detections(detections)\n        detections = smoother.update_with_detections(detections)\n\n        annotated_frame = bounding_box_annotator.annotate(frame.copy(), detections)\n        sink.write_frame(annotated_frame)\n
Source code in supervision/detection/tools/smoother.py
class DetectionsSmoother:\n    \"\"\"\n    A utility class for smoothing detections over multiple frames in video tracking.\n    It maintains a history of detections for each track and provides smoothed\n    predictions based on these histories.\n\n    <video controls>\n        <source\n            src=\"https://media.roboflow.com/supervision-detection-smoothing.mp4\"\n            type=\"video/mp4\">\n    </video>\n\n    !!! warning\n\n        - `DetectionsSmoother` requires the `tracker_id` for each detection. Refer to\n          [Roboflow Trackers](/latest/trackers/) for\n          information on integrating tracking into your inference pipeline.\n        - This class is not compatible with segmentation models.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        from ultralytics import YOLO\n\n        video_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_FILE_PATH>)\n        frame_generator = sv.get_video_frames_generator(source_path=<SOURCE_FILE_PATH>)\n\n        model = YOLO(<MODEL_PATH>)\n        tracker = sv.ByteTrack(frame_rate=video_info.fps)\n        smoother = sv.DetectionsSmoother()\n\n        annotator = sv.BoundingBoxAnnotator()\n\n        with sv.VideoSink(<TARGET_FILE_PATH>, video_info=video_info) as sink:\n            for frame in frame_generator:\n                result = model(frame)[0]\n                detections = sv.Detections.from_ultralytics(result)\n                detections = tracker.update_with_detections(detections)\n                detections = smoother.update_with_detections(detections)\n\n                annotated_frame = bounding_box_annotator.annotate(frame.copy(), detections)\n                sink.write_frame(annotated_frame)\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    def __init__(self, length: int = 5) -> None:\n        \"\"\"\n        Args:\n            length (int): The maximum number of frames to consider for smoothing\n                detections. Defaults to 5.\n        \"\"\"\n        self.tracks = defaultdict(lambda: deque(maxlen=length))\n\n    def update_with_detections(self, detections: Detections) -> Detections:\n        \"\"\"\n        Updates the smoother with a new set of detections from a frame.\n\n        Args:\n            detections (Detections): The detections to add to the smoother.\n        \"\"\"\n\n        if detections.tracker_id is None:\n            warnings.warn(\n                \"Smoothing skipped. DetectionsSmoother requires tracker_id. Refer to \"\n                \"https://supervision.roboflow.com/latest/trackers for more \"\n                \"information.\",\n                category=SupervisionWarnings,\n            )\n            return detections\n\n        for detection_idx in range(len(detections)):\n            tracker_id = detections.tracker_id[detection_idx]\n\n            self.tracks[tracker_id].append(detections[detection_idx])\n\n        for track_id in self.tracks.keys():\n            if track_id not in detections.tracker_id:\n                self.tracks[track_id].append(None)\n\n        for track_id in list(self.tracks.keys()):\n            if all([d is None for d in self.tracks[track_id]]):\n                del self.tracks[track_id]\n\n        return self.get_smoothed_detections()\n\n    def get_track(self, track_id: int) -> Optional[Detections]:\n        track = self.tracks.get(track_id, None)\n        if track is None:\n            return None\n\n        track = [d for d in track if d is not None]\n        if len(track) == 0:\n            return None\n\n        ret = deepcopy(track[0])\n        ret.xyxy = np.mean([d.xyxy for d in track], axis=0)\n        ret.confidence = np.mean([d.confidence for d in track], axis=0)\n\n        return ret\n\n    def get_smoothed_detections(self) -> Detections:\n        tracked_detections = []\n        for track_id in self.tracks:\n            track = self.get_track(track_id)\n            if track is not None:\n                tracked_detections.append(track)\n\n        detections = Detections.merge(tracked_detections)\n        if len(detections) == 0:\n            detections.tracker_id = np.array([], dtype=int)\n\n        return detections\n
"},{"location":"detection/tools/smoother/#supervision.detection.tools.smoother.DetectionsSmoother-functions","title":"Functions","text":""},{"location":"detection/tools/smoother/#supervision.detection.tools.smoother.DetectionsSmoother.__init__","title":"__init__(length=5)","text":"

Parameters:

Name Type Description Default length int

The maximum number of frames to consider for smoothing detections. Defaults to 5.

5 Source code in supervision/detection/tools/smoother.py
def __init__(self, length: int = 5) -> None:\n    \"\"\"\n    Args:\n        length (int): The maximum number of frames to consider for smoothing\n            detections. Defaults to 5.\n    \"\"\"\n    self.tracks = defaultdict(lambda: deque(maxlen=length))\n
"},{"location":"detection/tools/smoother/#supervision.detection.tools.smoother.DetectionsSmoother.update_with_detections","title":"update_with_detections(detections)","text":"

Updates the smoother with a new set of detections from a frame.

Parameters:

Name Type Description Default detections Detections

The detections to add to the smoother.

required Source code in supervision/detection/tools/smoother.py
def update_with_detections(self, detections: Detections) -> Detections:\n    \"\"\"\n    Updates the smoother with a new set of detections from a frame.\n\n    Args:\n        detections (Detections): The detections to add to the smoother.\n    \"\"\"\n\n    if detections.tracker_id is None:\n        warnings.warn(\n            \"Smoothing skipped. DetectionsSmoother requires tracker_id. Refer to \"\n            \"https://supervision.roboflow.com/latest/trackers for more \"\n            \"information.\",\n            category=SupervisionWarnings,\n        )\n        return detections\n\n    for detection_idx in range(len(detections)):\n        tracker_id = detections.tracker_id[detection_idx]\n\n        self.tracks[tracker_id].append(detections[detection_idx])\n\n    for track_id in self.tracks.keys():\n        if track_id not in detections.tracker_id:\n            self.tracks[track_id].append(None)\n\n    for track_id in list(self.tracks.keys()):\n        if all([d is None for d in self.tracks[track_id]]):\n            del self.tracks[track_id]\n\n    return self.get_smoothed_detections()\n
"},{"location":"how_to/detect_and_annotate/","title":"Detect and Annotate","text":"

Supervision provides a seamless process for annotating predictions generated by various object detection and segmentation models. This guide shows how to perform inference with the Inference, Ultralytics or Transformers packages. Following this, you'll learn how to import these predictions into Supervision and use them to annotate source image.

"},{"location":"how_to/detect_and_annotate/#run-detection","title":"Run Detection","text":"

First, you'll need to obtain predictions from your object detection or segmentation model.

InferenceUltralyticsTransformers
import cv2\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\n
import cv2\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\n
import torch\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\n
"},{"location":"how_to/detect_and_annotate/#load-predictions-into-supervision","title":"Load Predictions into Supervision","text":"

Now that we have predictions from a model, we can load them into Supervision.

InferenceUltralyticsTransformers

We can do so using the sv.Detections.from_inference method, which accepts model results from both detection and segmentation models.

import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n

We can do so using the sv.Detections.from_ultralytics method, which accepts model results from both detection and segmentation models.

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n

We can do so using the sv.Detections.from_transformers method, which accepts model results from both detection and segmentation models.

import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label)\n

You can load predictions from other computer vision frameworks and libraries using:

  • from_deepsparse (Deepsparse)
  • from_detectron2 (Detectron2)
  • from_mmdetection (MMDetection)
  • from_sam (Segment Anything Model)
  • from_yolo_nas (YOLO-NAS)
"},{"location":"how_to/detect_and_annotate/#annotate-image-with-detections","title":"Annotate Image with Detections","text":"

Finally, we can annotate the image with the predictions. Since we are working with an object detection model, we will use the sv.BoundingBoxAnnotator and sv.LabelAnnotator classes.

InferenceUltralyticsTransformers
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n

"},{"location":"how_to/detect_and_annotate/#display-custom-labels","title":"Display Custom Labels","text":"

By default, sv.LabelAnnotator will label each detection with its class_name (if possible) or class_id. You can override this behavior by passing a list of custom labels to the annotate method.

InferenceUltralyticsTransformers
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n
import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n

"},{"location":"how_to/detect_and_annotate/#annotate-image-with-segmentations","title":"Annotate Image with Segmentations","text":"

If you are running the segmentation model sv.MaskAnnotator is a drop-in replacement for sv.BoundingBoxAnnotator that will allow you to draw masks instead of boxes.

InferenceUltralyticsTransformers
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-seg-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n-seg.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForSegmentation\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50-panoptic\")\nmodel = DetrForSegmentation.from_pretrained(\"facebook/detr-resnet-50-panoptic\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image.size\ntarget_size = torch.tensor([[height, width]])\nresults = processor.post_process_segmentation(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(\n    transformers_results=results,\n    id2label=model.config.id2label)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)\n\nlabels = [\n    f\"{class_name} {confidence:.2f}\"\n    for class_name, confidence\n    in zip(detections['class_name'], detections.confidence)\n]\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n

"},{"location":"how_to/detect_small_objects/","title":"Detect Small Objects","text":"

This guide shows how to detect small objects with the Inference, Ultralytics or Transformers packages using InferenceSlicer.

"},{"location":"how_to/detect_small_objects/#baseline-detection","title":"Baseline Detection","text":"

Small object detection in high-resolution images presents challenges due to the objects' size relative to the image resolution.

InferenceUltralyticsTransformers
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8x-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8x.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image)[0]\ndetections = sv.Detections.from_ultralytics(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import torch\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForSegmentation\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForSegmentation.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\ninputs = processor(images=image, return_tensors=\"pt\")\n\nwith torch.no_grad():\n    outputs = model(**inputs)\n\nwidth, height = image_slice.size\ntarget_size = torch.tensor([[width, height]])\nresults = processor.post_process_object_detection(\n    outputs=outputs, target_sizes=target_size)[0]\ndetections = sv.Detections.from_transformers(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nlabels = [\n    model.config.id2label[class_id]\n    for class_id\n    in detections.class_id\n]\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n

"},{"location":"how_to/detect_small_objects/#input-resolution","title":"Input Resolution","text":"

Modifying the input resolution of images before detection can enhance small object identification at the cost of processing speed and increased memory usage. This method is less effective for ultra-high-resolution images (4K and above).

InferenceUltralytics
import cv2\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8x-1280\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model.infer(image)[0]\ndetections = sv.Detections.from_inference(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8x.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nresults = model(image, imgsz=1280)[0]\ndetections = sv.Detections.from_ultralytics(results)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n

"},{"location":"how_to/detect_small_objects/#inference-slicer","title":"Inference Slicer","text":"

InferenceSlicer processes high-resolution images by dividing them into smaller segments, detecting objects within each, and aggregating the results.

InferenceUltralyticsTransformers
import cv2\nimport numpy as np\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8x-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    results = model.infer(image_slice)[0]\n    return sv.Detections.from_inference(results)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8x.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    result = model(image_slice)[0]\n    return sv.Detections.from_ultralytics(result)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport torch\nimport numpy as np\nimport supervision as sv\nfrom PIL import Image\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    image_slice = cv2.cvtColor(image_slice, cv2.COLOR_BGR2RGB)\n    image_slice = Image.fromarray(image_slice)\n    inputs = processor(images=image_slice, return_tensors=\"pt\")\n\n    with torch.no_grad():\n        outputs = model(**inputs)\n\n    width, height = image_slice.size\n    target_size = torch.tensor([[width, height]])\n    results = processor.post_process_object_detection(\n        outputs=outputs, target_sizes=target_size)[0]\n    return sv.Detections.from_transformers(results)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nbounding_box_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nlabels = [\n    model.config.id2label[class_id]\n    for class_id\n    in detections.class_id\n]\n\nannotated_image = bounding_box_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections, labels=labels)\n

"},{"location":"how_to/detect_small_objects/#small-object-segmentation","title":"Small Object Segmentation","text":"

InferenceSlicer can perform segmentation tasks too.

InferenceUltralytics
import cv2\nimport numpy as np\nimport supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8x-seg-640\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    results = model.infer(image_slice)[0]\n    return sv.Detections.from_inference(results)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n
import cv2\nimport numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8x-seg.pt\")\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndef callback(image_slice: np.ndarray) -> sv.Detections:\n    result = model(image_slice)[0]\n    return sv.Detections.from_ultralytics(result)\n\nslicer = sv.InferenceSlicer(callback = callback)\ndetections = slicer(image)\n\nmask_annotator = sv.MaskAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\nannotated_image = mask_annotator.annotate(\n    scene=image, detections=detections)\nannotated_image = label_annotator.annotate(\n    scene=annotated_image, detections=detections)\n

"},{"location":"how_to/filter_detections/","title":"Filter Detections","text":"

The advanced filtering capabilities of the Detections class offer users a versatile and efficient way to narrow down and refine object detections. This section outlines various filtering methods, including filtering by specific class or a set of classes, confidence, object area, bounding box area, relative area, box dimensions, and designated zones. Each method is demonstrated with concise code examples to provide users with a clear understanding of how to implement the filters in their applications.

"},{"location":"how_to/filter_detections/#by-specific-class","title":"by specific class","text":"

Allows you to select detections that belong only to one selected class.

AfterBefore
import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.class_id == 0]\n

import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.class_id == 0]\n

"},{"location":"how_to/filter_detections/#by-set-of-classes","title":"by set of classes","text":"

Allows you to select detections that belong only to selected set of classes.

AfterBefore
import numpy as np\nimport supervision as sv\n\nselected_classes = [0, 2, 3]\ndetections = sv.Detections(...)\ndetections = detections[np.isin(detections.class_id, selected_classes)]\n

import numpy as np\nimport supervision as sv\n\nclass_id = [0, 2, 3]\ndetections = sv.Detections(...)\ndetections = detections[np.isin(detections.class_id, class_id)]\n

"},{"location":"how_to/filter_detections/#by-confidence","title":"by confidence","text":"

Allows you to select detections with specific confidence value, for example higher than selected threshold.

AfterBefore
import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.confidence > 0.5]\n

import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.confidence > 0.5]\n

"},{"location":"how_to/filter_detections/#by-area","title":"by area","text":"

Allows you to select detections based on their size. We define the area as the number of pixels occupied by the detection in the image. In the example below, we have sifted out the detections that are too small.

AfterBefore
import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.area > 1000]\n

import supervision as sv\n\ndetections = sv.Detections(...)\ndetections = detections[detections.area > 1000]\n

"},{"location":"how_to/filter_detections/#by-relative-area","title":"by relative area","text":"

Allows you to select detections based on their size in relation to the size of whole image. Sometimes the concept of detection size changes depending on the image. Detection occupying 10000 square px can be large on a 1280x720 image but small on a 3840x2160 image. In such cases, we can filter out detections based on the percentage of the image area occupied by them. In the example below, we remove too large detections.

AfterBefore
import supervision as sv\n\nimage = ...\nheight, width, channels = image.shape\nimage_area = height * width\n\ndetections = sv.Detections(...)\ndetections = detections[(detections.area / image_area) < 0.8]\n

import supervision as sv\n\nimage = ...\nheight, width, channels = image.shape\nimage_area = height * width\n\ndetections = sv.Detections(...)\ndetections = detections[(detections.area / image_area) < 0.8]\n

"},{"location":"how_to/filter_detections/#by-box-dimensions","title":"by box dimensions","text":"

Allows you to select detections based on their dimensions. The size of the bounding box, as well as its coordinates, can be criteria for rejecting detection. Implementing such filtering requires a bit of custom code but is relatively simple and fast.

AfterBefore
import supervision as sv\n\ndetections = sv.Detections(...)\nw = detections.xyxy[:, 2] - detections.xyxy[:, 0]\nh = detections.xyxy[:, 3] - detections.xyxy[:, 1]\ndetections = detections[(w > 200) & (h > 200)]\n

import supervision as sv\n\ndetections = sv.Detections(...)\nw = detections.xyxy[:, 2] - detections.xyxy[:, 0]\nh = detections.xyxy[:, 3] - detections.xyxy[:, 1]\ndetections = detections[(w > 200) & (h > 200)]\n

"},{"location":"how_to/filter_detections/#by-polygonzone","title":"by PolygonZone","text":"

Allows you to use Detections in combination with PolygonZone to weed out bounding boxes that are in and out of the zone. In the example below you can see how to filter out all detections located in the lower part of the image.

AfterBefore
import supervision as sv\n\nzone = sv.PolygonZone(...)\ndetections = sv.Detections(...)\nmask = zone.trigger(detections=detections)\ndetections = detections[mask]\n

import supervision as sv\n\nzone = sv.PolygonZone(...)\ndetections = sv.Detections(...)\nmask = zone.trigger(detections=detections)\ndetections = detections[mask]\n

"},{"location":"how_to/filter_detections/#by-mixed-conditions","title":"by mixed conditions","text":"

Detections' greatest strength, however, is that you can build arbitrarily complex logical conditions by simply combining separate conditions using & or |.

AfterBefore
import supervision as sv\n\nzone = sv.PolygonZone(...)\ndetections = sv.Detections(...)\nmask = zone.trigger(detections=detections)\ndetections = detections[(detections.confidence > 0.7) & mask]\n

import supervision as sv\n\nzone = sv.PolygonZone(...)\ndetections = sv.Detections(...)\nmask = zone.trigger(detections=detections)\ndetections = detections[mask]\n

"},{"location":"how_to/save_detections/","title":"Save Detections","text":"

Supervision enables an easy way to save detections in .CSV and .JSON files for offline processing. This guide demonstrates how to perform video inference using the Inference, Ultralytics or Transformers packages and save their results with sv.CSVSink and sv.JSONSink.

"},{"location":"how_to/save_detections/#run-detection","title":"Run Detection","text":"

First, you'll need to obtain predictions from your object detection or segmentation model. You can learn more on this topic in our How to Detect and Annotate guide.

InferenceUltralyticsTransformers
import supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nfor frame in frames_generator:\n\n    results = model.infer(image)[0]\n    detections = sv.Detections.from_inference(results)\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nfor frame in frames_generator:\n\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n
import torch\nimport supervision as sv\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nfor frame in frames_generator:\n\n    frame = sv.cv2_to_pillow(frame)\n    inputs = processor(images=frame, return_tensors=\"pt\")\n\n    with torch.no_grad():\n        outputs = model(**inputs)\n\n    width, height = frame.size\n    target_size = torch.tensor([[height, width]])\n    results = processor.post_process_object_detection(\n        outputs=outputs, target_sizes=target_size)[0]\n    detections = sv.Detections.from_transformers(results)\n
"},{"location":"how_to/save_detections/#save-detections-as-csv","title":"Save Detections as CSV","text":"

To save detections to a .CSV file, open our sv.CSVSink and then pass the sv.Detections object resulting from the inference to it. Its fields are parsed and saved on disk.

InferenceUltralyticsTransformers
import supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame in frames_generator:\n\n        results = model.infer(image)[0]\n        detections = sv.Detections.from_inference(results)\n        sink.append(detections, {})\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame in frames_generator:\n\n        results = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(results)\n        sink.append(detections, {})\n
import torch\nimport supervision as sv\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame in frames_generator:\n\n        frame = sv.cv2_to_pillow(frame)\n        inputs = processor(images=frame, return_tensors=\"pt\")\n\n        with torch.no_grad():\n            outputs = model(**inputs)\n\n        width, height = frame.size\n        target_size = torch.tensor([[height, width]])\n        results = processor.post_process_object_detection(\n            outputs=outputs, target_sizes=target_size)[0]\n        detections = sv.Detections.from_transformers(results)\n        sink.append(detections, {})\n
x_min y_min x_max y_max class_id confidence tracker_id class_name 2941.14 1269.31 3220.77 1500.67 2 0.8517 car 944.889 899.641 1235.42 1308.80 7 0.6752 truck 1439.78 1077.79 1621.27 1231.40 2 0.6450 car"},{"location":"how_to/save_detections/#custom-fields","title":"Custom Fields","text":"

Besides regular fields in sv.Detections, sv.CSVSink also allows you to add custom information to each row, which can be passed via the custom_data dictionary. Let's utilize this feature to save information about the frame index from which the detections originate.

InferenceUltralyticsTransformers
import supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        results = model.infer(image)[0]\n        detections = sv.Detections.from_inference(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        results = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
import torch\nimport supervision as sv\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.CSVSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        frame = sv.cv2_to_pillow(frame)\n        inputs = processor(images=frame, return_tensors=\"pt\")\n\n        with torch.no_grad():\n            outputs = model(**inputs)\n\n        width, height = frame.size\n        target_size = torch.tensor([[height, width]])\n        results = processor.post_process_object_detection(\n            outputs=outputs, target_sizes=target_size)[0]\n        detections = sv.Detections.from_transformers(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
x_min y_min x_max y_max class_id confidence tracker_id class_name frame_index 2941.14 1269.31 3220.77 1500.67 2 0.8517 car 0 944.889 899.641 1235.42 1308.80 7 0.6752 truck 0 1439.78 1077.79 1621.27 1231.40 2 0.6450 car 0"},{"location":"how_to/save_detections/#save-detections-as-json","title":"Save Detections as JSON","text":"

If you prefer to save the result in a .JSON file instead of a .CSV file, all you need to do is replace sv.CSVSink with sv.JSONSink.

InferenceUltralyticsTransformers
import supervision as sv\nfrom inference import get_model\n\nmodel = get_model(model_id=\"yolov8n-640\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.JSONSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        results = model.infer(image)[0]\n        detections = sv.Detections.from_inference(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
import supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.JSONSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        results = model(frame)[0]\n        detections = sv.Detections.from_ultralytics(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
import torch\nimport supervision as sv\nfrom transformers import DetrImageProcessor, DetrForObjectDetection\n\nprocessor = DetrImageProcessor.from_pretrained(\"facebook/detr-resnet-50\")\nmodel = DetrForObjectDetection.from_pretrained(\"facebook/detr-resnet-50\")\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.JSONSink(<TARGET_CSV_PATH>) as sink:\n    for frame_index, frame in enumerate(frames_generator):\n\n        frame = sv.cv2_to_pillow(frame)\n        inputs = processor(images=frame, return_tensors=\"pt\")\n\n        with torch.no_grad():\n            outputs = model(**inputs)\n\n        width, height = frame.size\n        target_size = torch.tensor([[height, width]])\n        results = processor.post_process_object_detection(\n            outputs=outputs, target_sizes=target_size)[0]\n        detections = sv.Detections.from_transformers(results)\n        sink.append(detections, {\"frame_index\": frame_index})\n
"},{"location":"how_to/track_objects/","title":"Track Objects","text":"

Leverage Supervision's advanced capabilities for enhancing your video analysis by seamlessly tracking objects recognized by a multitude of object detection and segmentation models. This comprehensive guide will take you through the steps to perform inference using the YOLOv8 model via either the Inference or Ultralytics packages. Following this, you'll discover how to track these objects efficiently and annotate your video content for a deeper analysis.

To make it easier for you to follow our tutorial download the video we will use as an example. You can do this using supervision[assets] extension.

from supervision.assets import download_assets, VideoAssets\n\ndownload_assets(VideoAssets.PEOPLE_WALKING)\n
"},{"location":"how_to/track_objects/#run-inference","title":"Run Inference","text":"

First, you'll need to obtain predictions from your object detection or segmentation model. In this tutorial, we are using the YOLOv8 model as an example. However, Supervision is versatile and compatible with various models. Check this link for guidance on how to plug in other models.

We will define a callback function, which will process each frame of the video by obtaining model predictions and then annotating the frame based on these predictions. This callback function will be essential in the subsequent steps of the tutorial, as it will be modified to include tracking, labeling, and trace annotations.

UltralyticsInference
import numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\nbox_annotator = sv.BoundingBoxAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    return box_annotator.annotate(frame.copy(), detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
import numpy as np\nimport supervision as sv\nfrom inference.models.utils import get_roboflow_model\n\nmodel = get_roboflow_model(model_id=\"yolov8n-640\", api_key=<ROBOFLOW API KEY>)\nbox_annotator = sv.BoundingBoxAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model.infer(frame)[0]\n    detections = sv.Detections.from_inference(results)\n    return box_annotator.annotate(frame.copy(), detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
"},{"location":"how_to/track_objects/#tracking","title":"Tracking","text":"

After running inference and obtaining predictions, the next step is to track the detected objects throughout the video. Utilizing Supervision\u2019s sv.ByteTrack functionality, each detected object is assigned a unique tracker ID, enabling the continuous following of the object's motion path across different frames.

UltralyticsInference
import numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    detections = tracker.update_with_detections(detections)\n    return box_annotator.annotate(frame.copy(), detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
import numpy as np\nimport supervision as sv\nfrom inference.models.utils import get_roboflow_model\n\nmodel = get_roboflow_model(model_id=\"yolov8n-640\", api_key=<ROBOFLOW API KEY>)\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model.infer(frame)[0]\n    detections = sv.Detections.from_inference(results)\n    detections = tracker.update_with_detections(detections)\n    return box_annotator.annotate(frame.copy(), detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
"},{"location":"how_to/track_objects/#annotate-video-with-tracking-ids","title":"Annotate Video with Tracking IDs","text":"

Annotating the video with tracking IDs helps in distinguishing and following each object distinctly. With the sv.LabelAnnotator in Supervision, we can overlay the tracker IDs and class labels on the detected objects, offering a clear visual representation of each object's class and unique identifier.

UltralyticsInference
import numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    detections = tracker.update_with_detections(detections)\n\n    labels = [\n        f\"#{tracker_id} {results.names[class_id]}\"\n        for class_id, tracker_id\n        in zip(detections.class_id, detections.tracker_id)\n    ]\n\n    annotated_frame = box_annotator.annotate(\n        frame.copy(), detections=detections)\n    return label_annotator.annotate(\n        annotated_frame, detections=detections, labels=labels)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
import numpy as np\nimport supervision as sv\nfrom inference.models.utils import get_roboflow_model\n\nmodel = get_roboflow_model(model_id=\"yolov8n-640\", api_key=<ROBOFLOW API KEY>)\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model.infer(frame)[0]\n    detections = sv.Detections.from_inference(results)\n    detections = tracker.update_with_detections(detections)\n\n    labels = [\n        f\"#{tracker_id} {results.names[class_id]}\"\n        for class_id, tracker_id\n        in zip(detections.class_id, detections.tracker_id)\n    ]\n\n    annotated_frame = box_annotator.annotate(\n        frame.copy(), detections=detections)\n    return label_annotator.annotate(\n        annotated_frame, detections=detections, labels=labels)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
"},{"location":"how_to/track_objects/#annotate-video-with-traces","title":"Annotate Video with Traces","text":"

Adding traces to the video involves overlaying the historical paths of the detected objects. This feature, powered by the sv.TraceAnnotator, allows for visualizing the trajectories of objects, helping in understanding the movement patterns and interactions between objects in the video.

UltralyticsInference
import numpy as np\nimport supervision as sv\nfrom ultralytics import YOLO\n\nmodel = YOLO(\"yolov8n.pt\")\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\ntrace_annotator = sv.TraceAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model(frame)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    detections = tracker.update_with_detections(detections)\n\n    labels = [\n        f\"#{tracker_id} {results.names[class_id]}\"\n        for class_id, tracker_id\n        in zip(detections.class_id, detections.tracker_id)\n    ]\n\n    annotated_frame = box_annotator.annotate(\n        frame.copy(), detections=detections)\n    annotated_frame = label_annotator.annotate(\n        annotated_frame, detections=detections, labels=labels)\n    return trace_annotator.annotate(\n        annotated_frame, detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n
import numpy as np\nimport supervision as sv\nfrom inference.models.utils import get_roboflow_model\n\nmodel = get_roboflow_model(model_id=\"yolov8n-640\", api_key=<ROBOFLOW API KEY>)\ntracker = sv.ByteTrack()\nbox_annotator = sv.BoundingBoxAnnotator()\nlabel_annotator = sv.LabelAnnotator()\ntrace_annotator = sv.TraceAnnotator()\n\ndef callback(frame: np.ndarray, _: int) -> np.ndarray:\n    results = model.infer(frame)[0]\n    detections = sv.Detections.from_inference(results)\n    detections = tracker.update_with_detections(detections)\n\n    labels = [\n        f\"#{tracker_id} {results.names[class_id]}\"\n        for class_id, tracker_id\n        in zip(detections.class_id, detections.tracker_id)\n    ]\n\n    annotated_frame = box_annotator.annotate(\n        frame.copy(), detections=detections)\n    annotated_frame = label_annotator.annotate(\n        annotated_frame, detections=detections, labels=labels)\n    return trace_annotator.annotate(\n        annotated_frame, detections=detections)\n\nsv.process_video(\n    source_path=\"people-walking.mp4\",\n    target_path=\"result.mp4\",\n    callback=callback\n)\n

This structured walkthrough should give a detailed pathway to annotate videos effectively using Supervision\u2019s various functionalities, including object tracking and trace annotations.

"},{"location":"keypoint/annotators/","title":"Annotators","text":"VertexAnnotatorEdgeAnnotatorVertexLabelAnnotator
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nvertex_annotator = sv.VertexAnnotator(\n    color=sv.Color.GREEN,\n    radius=10\n)\nannotated_frame = vertex_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nedge_annotator = sv.EdgeAnnotator(\n    color=sv.Color.GREEN,\n    thickness=5\n)\nannotated_frame = edge_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nvertex_label_annotator = sv.VertexLabelAnnotator(\n    color=sv.Color.GREEN,\n    text_color=sv.Color.BLACK,\n    border_radius=5\n)\nannotated_frame = vertex_label_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n
VertexAnnotator

Bases: BaseKeyPointAnnotator

A class that specializes in drawing skeleton vertices on images. It uses specified key points to determine the locations where the vertices should be drawn.

Source code in supervision/keypoint/annotators.py
class VertexAnnotator(BaseKeyPointAnnotator):\n    \"\"\"\n    A class that specializes in drawing skeleton vertices on images. It uses\n    specified key points to determine the locations where the vertices should be\n    drawn.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Color = Color.ROBOFLOW,\n        radius: int = 4,\n    ) -> None:\n        \"\"\"\n        Args:\n            color (Color, optional): The color to use for annotating key points.\n            radius (int, optional): The radius of the circles used to represent the key\n                points.\n        \"\"\"\n        self.color = color\n        self.radius = radius\n\n    @convert_for_annotation_method\n    def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:\n        \"\"\"\n        Annotates the given scene with skeleton vertices based on the provided key\n        points. It draws circles at each key point location.\n\n        Args:\n            scene (ImageType): The image where skeleton vertices will be drawn.\n                `ImageType` is a flexible type, accepting either `numpy.ndarray` or\n                `PIL.Image.Image`.\n            key_points (KeyPoints): A collection of key points where each key point\n                consists of x and y coordinates.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            key_points = sv.KeyPoints(...)\n\n            vertex_annotator = sv.VertexAnnotator(\n                color=sv.Color.GREEN,\n                radius=10\n            )\n            annotated_frame = vertex_annotator.annotate(\n                scene=image.copy(),\n                key_points=key_points\n            )\n            ```\n\n        ![vertex-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/vertex-annotator-example.png)\n        \"\"\"\n        if len(key_points) == 0:\n            return scene\n\n        for xy in key_points.xy:\n            for x, y in xy:\n                cv2.circle(\n                    img=scene,\n                    center=(int(x), int(y)),\n                    radius=self.radius,\n                    color=self.color.as_bgr(),\n                    thickness=-1,\n                )\n\n        return scene\n
EdgeAnnotator

Bases: BaseKeyPointAnnotator

A class that specializes in drawing skeleton edges on images using specified key points. It connects key points with lines to form the skeleton structure.

Source code in supervision/keypoint/annotators.py
class EdgeAnnotator(BaseKeyPointAnnotator):\n    \"\"\"\n    A class that specializes in drawing skeleton edges on images using specified key\n    points. It connects key points with lines to form the skeleton structure.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Color = Color.ROBOFLOW,\n        thickness: int = 2,\n        edges: Optional[List[Tuple[int, int]]] = None,\n    ) -> None:\n        \"\"\"\n        Args:\n            color (Color, optional): The color to use for the edges.\n            thickness (int, optional): The thickness of the edges.\n            edges (Optional[List[Tuple[int, int]]]): The edges to draw.\n                If set to `None`, will attempt to select automatically.\n        \"\"\"\n        self.color = color\n        self.thickness = thickness\n        self.edges = edges\n\n    @convert_for_annotation_method\n    def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:\n        \"\"\"\n        Annotates the given scene by drawing lines between specified key points to form\n        edges.\n\n        Args:\n            scene (ImageType): The image where skeleton edges will be drawn. `ImageType`\n                is a flexible type, accepting either `numpy.ndarray` or\n                `PIL.Image.Image`.\n            key_points (KeyPoints): A collection of key points where each key point\n                consists of x and y coordinates.\n\n        Returns:\n            Returns:\n                The annotated image, matching the type of `scene` (`numpy.ndarray`\n                    or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            key_points = sv.KeyPoints(...)\n\n            edge_annotator = sv.EdgeAnnotator(\n                color=sv.Color.GREEN,\n                thickness=5\n            )\n            annotated_frame = edge_annotator.annotate(\n                scene=image.copy(),\n                key_points=key_points\n            )\n            ```\n\n        ![edge-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/edge-annotator-example.png)\n        \"\"\"\n        if len(key_points) == 0:\n            return scene\n\n        for xy in key_points.xy:\n            edges = self.edges\n            if not edges:\n                edges = SKELETONS_BY_VERTEX_COUNT.get(len(xy))\n            if not edges:\n                warn(f\"No skeleton found with {len(xy)} vertices\")\n                return scene\n\n            for class_a, class_b in edges:\n                xy_a = xy[class_a - 1]\n                xy_b = xy[class_b - 1]\n                missing_a = np.allclose(xy_a, 0)\n                missing_b = np.allclose(xy_b, 0)\n                if missing_a or missing_b:\n                    continue\n\n                cv2.line(\n                    img=scene,\n                    pt1=(int(xy_a[0]), int(xy_a[1])),\n                    pt2=(int(xy_b[0]), int(xy_b[1])),\n                    color=self.color.as_bgr(),\n                    thickness=self.thickness,\n                )\n\n        return scene\n
VertexLabelAnnotator

A class that draws labels of skeleton vertices on images. It uses specified key points to determine the locations where the vertices should be drawn.

Source code in supervision/keypoint/annotators.py
class VertexLabelAnnotator:\n    \"\"\"\n    A class that draws labels of skeleton vertices on images. It uses specified key\n    points to determine the locations where the vertices should be drawn.\n    \"\"\"\n\n    def __init__(\n        self,\n        color: Union[Color, List[Color]] = Color.ROBOFLOW,\n        text_color: Color = Color.WHITE,\n        text_scale: float = 0.5,\n        text_thickness: int = 1,\n        text_padding: int = 10,\n        border_radius: int = 0,\n    ):\n        \"\"\"\n        Args:\n            color (Union[Color, List[Color]], optional): The color to use for each\n                keypoint label. If a list is provided, the colors will be used in order\n                for each keypoint.\n            text_color (Color, optional): The color to use for the labels.\n            text_scale (float, optional): The scale of the text.\n            text_thickness (int, optional): The thickness of the text.\n            text_padding (int, optional): The padding around the text.\n            border_radius (int, optional): The radius of the rounded corners of the\n                boxes. Set to a high value to produce circles.\n        \"\"\"\n        self.border_radius: int = border_radius\n        self.color: Union[Color, List[Color]] = color\n        self.text_color: Color = text_color\n        self.text_scale: float = text_scale\n        self.text_thickness: int = text_thickness\n        self.text_padding: int = text_padding\n\n    def annotate(\n        self, scene: ImageType, key_points: KeyPoints, labels: List[str] = None\n    ) -> ImageType:\n        \"\"\"\n        A class that draws labels of skeleton vertices on images. It uses specified key\n            points to determine the locations where the vertices should be drawn.\n\n        Args:\n            scene (ImageType): The image where vertex labels will be drawn. `ImageType`\n                is a flexible type, accepting either `numpy.ndarray` or\n                `PIL.Image.Image`.\n            key_points (KeyPoints): A collection of key points where each key point\n                consists of x and y coordinates.\n            labels (List[str], optional): A list of labels to be displayed on the\n                annotated image. If not provided, keypoint indices will be used.\n\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            key_points = sv.KeyPoints(...)\n\n            vertex_label_annotator = sv.VertexLabelAnnotator(\n                color=sv.Color.GREEN,\n                text_color=sv.Color.BLACK,\n                border_radius=5\n            )\n            annotated_frame = vertex_label_annotator.annotate(\n                scene=image.copy(),\n                key_points=key_points\n            )\n            ```\n\n        ![vertex-label-annotator-example](https://media.roboflow.com/\n        supervision-annotator-examples/vertex-label-annotator-example.png)\n\n        !!! tip\n\n            `VertexLabelAnnotator` allows to customize the color of each keypoint label\n            values.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            image = ...\n            key_points = sv.KeyPoints(...)\n\n            LABELS = [\n                \"nose\", \"left eye\", \"right eye\", \"left ear\",\n                \"right ear\", \"left shoulder\", \"right shoulder\", \"left elbow\",\n                \"right elbow\", \"left wrist\", \"right wrist\", \"left hip\",\n                \"right hip\", \"left knee\", \"right knee\", \"left ankle\",\n                \"right ankle\"\n            ]\n\n            COLORS = [\n                \"#FF6347\", \"#FF6347\", \"#FF6347\", \"#FF6347\",\n                \"#FF6347\", \"#FF1493\", \"#00FF00\", \"#FF1493\",\n                \"#00FF00\", \"#FF1493\", \"#00FF00\", \"#FFD700\",\n                \"#00BFFF\", \"#FFD700\", \"#00BFFF\", \"#FFD700\",\n                \"#00BFFF\"\n            ]\n            COLORS = [sv.Color.from_hex(color_hex=c) for c in COLORS]\n\n            vertex_label_annotator = sv.VertexLabelAnnotator(\n                color=COLORS,\n                text_color=sv.Color.BLACK,\n                border_radius=5\n            )\n            annotated_frame = vertex_label_annotator.annotate(\n                scene=image.copy(),\n                key_points=key_points,\n                labels=labels\n            )\n            ```\n        ![vertex-label-annotator-custom-example](https://media.roboflow.com/\n        supervision-annotator-examples/vertex-label-annotator-custom-example.png)\n        \"\"\"\n        font = cv2.FONT_HERSHEY_SIMPLEX\n\n        skeletons_count, points_count, _ = key_points.xy.shape\n        if skeletons_count == 0:\n            return scene\n\n        anchors = key_points.xy.reshape(points_count * skeletons_count, 2).astype(int)\n        mask = np.all(anchors != 0, axis=1)\n\n        if not np.any(mask):\n            return scene\n\n        colors = self.preprocess_and_validate_colors(\n            colors=self.color,\n            points_count=points_count,\n            skeletons_count=skeletons_count,\n        )\n\n        labels = self.preprocess_and_validate_labels(\n            labels=labels, points_count=points_count, skeletons_count=skeletons_count\n        )\n\n        anchors = anchors[mask]\n        colors = colors[mask]\n        labels = labels[mask]\n\n        xyxy = np.array(\n            [\n                self.get_text_bounding_box(\n                    text=label,\n                    font=font,\n                    text_scale=self.text_scale,\n                    text_thickness=self.text_thickness,\n                    center_coordinates=tuple(anchor),\n                )\n                for anchor, label in zip(anchors, labels)\n            ]\n        )\n\n        xyxy_padded = pad_boxes(xyxy=xyxy, px=self.text_padding)\n\n        for text, color, box, box_padded in zip(labels, colors, xyxy, xyxy_padded):\n            draw_rounded_rectangle(\n                scene=scene,\n                rect=Rect.from_xyxy(box_padded),\n                color=color,\n                border_radius=self.border_radius,\n            )\n            cv2.putText(\n                img=scene,\n                text=text,\n                org=(box[0], box[1] + self.text_padding),\n                fontFace=font,\n                fontScale=self.text_scale,\n                color=self.text_color.as_rgb(),\n                thickness=self.text_thickness,\n                lineType=cv2.LINE_AA,\n            )\n\n        return scene\n\n    @staticmethod\n    def get_text_bounding_box(\n        text: str,\n        font: int,\n        text_scale: float,\n        text_thickness: int,\n        center_coordinates: Tuple[int, int],\n    ) -> Tuple[int, int, int, int]:\n        text_w, text_h = cv2.getTextSize(\n            text=text,\n            fontFace=font,\n            fontScale=text_scale,\n            thickness=text_thickness,\n        )[0]\n        center_x, center_y = center_coordinates\n        return (\n            center_x - text_w // 2,\n            center_y - text_h // 2,\n            center_x + text_w // 2,\n            center_y + text_h // 2,\n        )\n\n    @staticmethod\n    def preprocess_and_validate_labels(\n        labels: Optional[List[str]], points_count: int, skeletons_count: int\n    ) -> np.array:\n        if labels and len(labels) != points_count:\n            raise ValueError(\n                f\"Number of labels ({len(labels)}) must match number of key points \"\n                f\"({points_count}).\"\n            )\n        if labels is None:\n            labels = [str(i) for i in range(points_count)]\n\n        return np.array(labels * skeletons_count)\n\n    @staticmethod\n    def preprocess_and_validate_colors(\n        colors: Optional[Union[Color, List[Color]]],\n        points_count: int,\n        skeletons_count: int,\n    ) -> np.array:\n        if isinstance(colors, list) and len(colors) != points_count:\n            raise ValueError(\n                f\"Number of colors ({len(colors)}) must match number of key points \"\n                f\"({points_count}).\"\n            )\n        return (\n            np.array(colors * skeletons_count)\n            if isinstance(colors, list)\n            else np.array([colors] * points_count * skeletons_count)\n        )\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexAnnotator-functions","title":"Functions","text":""},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexAnnotator.__init__","title":"__init__(color=Color.ROBOFLOW, radius=4)","text":"

Parameters:

Name Type Description Default color Color

The color to use for annotating key points.

ROBOFLOW radius int

The radius of the circles used to represent the key points.

4 Source code in supervision/keypoint/annotators.py
def __init__(\n    self,\n    color: Color = Color.ROBOFLOW,\n    radius: int = 4,\n) -> None:\n    \"\"\"\n    Args:\n        color (Color, optional): The color to use for annotating key points.\n        radius (int, optional): The radius of the circles used to represent the key\n            points.\n    \"\"\"\n    self.color = color\n    self.radius = radius\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexAnnotator.annotate","title":"annotate(scene, key_points)","text":"

Annotates the given scene with skeleton vertices based on the provided key points. It draws circles at each key point location.

Parameters:

Name Type Description Default scene ImageType

The image where skeleton vertices will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required key_points KeyPoints

A collection of key points where each key point consists of x and y coordinates.

required

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nvertex_annotator = sv.VertexAnnotator(\n    color=sv.Color.GREEN,\n    radius=10\n)\nannotated_frame = vertex_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n

Source code in supervision/keypoint/annotators.py
@convert_for_annotation_method\ndef annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:\n    \"\"\"\n    Annotates the given scene with skeleton vertices based on the provided key\n    points. It draws circles at each key point location.\n\n    Args:\n        scene (ImageType): The image where skeleton vertices will be drawn.\n            `ImageType` is a flexible type, accepting either `numpy.ndarray` or\n            `PIL.Image.Image`.\n        key_points (KeyPoints): A collection of key points where each key point\n            consists of x and y coordinates.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        key_points = sv.KeyPoints(...)\n\n        vertex_annotator = sv.VertexAnnotator(\n            color=sv.Color.GREEN,\n            radius=10\n        )\n        annotated_frame = vertex_annotator.annotate(\n            scene=image.copy(),\n            key_points=key_points\n        )\n        ```\n\n    ![vertex-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/vertex-annotator-example.png)\n    \"\"\"\n    if len(key_points) == 0:\n        return scene\n\n    for xy in key_points.xy:\n        for x, y in xy:\n            cv2.circle(\n                img=scene,\n                center=(int(x), int(y)),\n                radius=self.radius,\n                color=self.color.as_bgr(),\n                thickness=-1,\n            )\n\n    return scene\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.EdgeAnnotator-functions","title":"Functions","text":""},{"location":"keypoint/annotators/#supervision.keypoint.annotators.EdgeAnnotator.__init__","title":"__init__(color=Color.ROBOFLOW, thickness=2, edges=None)","text":"

Parameters:

Name Type Description Default color Color

The color to use for the edges.

ROBOFLOW thickness int

The thickness of the edges.

2 edges Optional[List[Tuple[int, int]]]

The edges to draw. If set to None, will attempt to select automatically.

None Source code in supervision/keypoint/annotators.py
def __init__(\n    self,\n    color: Color = Color.ROBOFLOW,\n    thickness: int = 2,\n    edges: Optional[List[Tuple[int, int]]] = None,\n) -> None:\n    \"\"\"\n    Args:\n        color (Color, optional): The color to use for the edges.\n        thickness (int, optional): The thickness of the edges.\n        edges (Optional[List[Tuple[int, int]]]): The edges to draw.\n            If set to `None`, will attempt to select automatically.\n    \"\"\"\n    self.color = color\n    self.thickness = thickness\n    self.edges = edges\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.EdgeAnnotator.annotate","title":"annotate(scene, key_points)","text":"

Annotates the given scene by drawing lines between specified key points to form edges.

Parameters:

Name Type Description Default scene ImageType

The image where skeleton edges will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required key_points KeyPoints

A collection of key points where each key point consists of x and y coordinates.

required

Returns:

Name Type Description Returns ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nedge_annotator = sv.EdgeAnnotator(\n    color=sv.Color.GREEN,\n    thickness=5\n)\nannotated_frame = edge_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n

Source code in supervision/keypoint/annotators.py
@convert_for_annotation_method\ndef annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:\n    \"\"\"\n    Annotates the given scene by drawing lines between specified key points to form\n    edges.\n\n    Args:\n        scene (ImageType): The image where skeleton edges will be drawn. `ImageType`\n            is a flexible type, accepting either `numpy.ndarray` or\n            `PIL.Image.Image`.\n        key_points (KeyPoints): A collection of key points where each key point\n            consists of x and y coordinates.\n\n    Returns:\n        Returns:\n            The annotated image, matching the type of `scene` (`numpy.ndarray`\n                or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        key_points = sv.KeyPoints(...)\n\n        edge_annotator = sv.EdgeAnnotator(\n            color=sv.Color.GREEN,\n            thickness=5\n        )\n        annotated_frame = edge_annotator.annotate(\n            scene=image.copy(),\n            key_points=key_points\n        )\n        ```\n\n    ![edge-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/edge-annotator-example.png)\n    \"\"\"\n    if len(key_points) == 0:\n        return scene\n\n    for xy in key_points.xy:\n        edges = self.edges\n        if not edges:\n            edges = SKELETONS_BY_VERTEX_COUNT.get(len(xy))\n        if not edges:\n            warn(f\"No skeleton found with {len(xy)} vertices\")\n            return scene\n\n        for class_a, class_b in edges:\n            xy_a = xy[class_a - 1]\n            xy_b = xy[class_b - 1]\n            missing_a = np.allclose(xy_a, 0)\n            missing_b = np.allclose(xy_b, 0)\n            if missing_a or missing_b:\n                continue\n\n            cv2.line(\n                img=scene,\n                pt1=(int(xy_a[0]), int(xy_a[1])),\n                pt2=(int(xy_b[0]), int(xy_b[1])),\n                color=self.color.as_bgr(),\n                thickness=self.thickness,\n            )\n\n    return scene\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexLabelAnnotator-functions","title":"Functions","text":""},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexLabelAnnotator.__init__","title":"__init__(color=Color.ROBOFLOW, text_color=Color.WHITE, text_scale=0.5, text_thickness=1, text_padding=10, border_radius=0)","text":"

Parameters:

Name Type Description Default color Union[Color, List[Color]]

The color to use for each keypoint label. If a list is provided, the colors will be used in order for each keypoint.

ROBOFLOW text_color Color

The color to use for the labels.

WHITE text_scale float

The scale of the text.

0.5 text_thickness int

The thickness of the text.

1 text_padding int

The padding around the text.

10 border_radius int

The radius of the rounded corners of the boxes. Set to a high value to produce circles.

0 Source code in supervision/keypoint/annotators.py
def __init__(\n    self,\n    color: Union[Color, List[Color]] = Color.ROBOFLOW,\n    text_color: Color = Color.WHITE,\n    text_scale: float = 0.5,\n    text_thickness: int = 1,\n    text_padding: int = 10,\n    border_radius: int = 0,\n):\n    \"\"\"\n    Args:\n        color (Union[Color, List[Color]], optional): The color to use for each\n            keypoint label. If a list is provided, the colors will be used in order\n            for each keypoint.\n        text_color (Color, optional): The color to use for the labels.\n        text_scale (float, optional): The scale of the text.\n        text_thickness (int, optional): The thickness of the text.\n        text_padding (int, optional): The padding around the text.\n        border_radius (int, optional): The radius of the rounded corners of the\n            boxes. Set to a high value to produce circles.\n    \"\"\"\n    self.border_radius: int = border_radius\n    self.color: Union[Color, List[Color]] = color\n    self.text_color: Color = text_color\n    self.text_scale: float = text_scale\n    self.text_thickness: int = text_thickness\n    self.text_padding: int = text_padding\n
"},{"location":"keypoint/annotators/#supervision.keypoint.annotators.VertexLabelAnnotator.annotate","title":"annotate(scene, key_points, labels=None)","text":"

A class that draws labels of skeleton vertices on images. It uses specified key points to determine the locations where the vertices should be drawn.

Parameters:

Name Type Description Default scene ImageType

The image where vertex labels will be drawn. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required key_points KeyPoints

A collection of key points where each key point consists of x and y coordinates.

required labels List[str]

A list of labels to be displayed on the annotated image. If not provided, keypoint indices will be used.

None

Returns:

Type Description ImageType

The annotated image, matching the type of scene (numpy.ndarray or PIL.Image.Image)

Example
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nvertex_label_annotator = sv.VertexLabelAnnotator(\n    color=sv.Color.GREEN,\n    text_color=sv.Color.BLACK,\n    border_radius=5\n)\nannotated_frame = vertex_label_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points\n)\n

Tip

VertexLabelAnnotator allows to customize the color of each keypoint label values.

Example
import supervision as sv\n\nimage = ...\nkey_points = sv.KeyPoints(...)\n\nLABELS = [\n    \"nose\", \"left eye\", \"right eye\", \"left ear\",\n    \"right ear\", \"left shoulder\", \"right shoulder\", \"left elbow\",\n    \"right elbow\", \"left wrist\", \"right wrist\", \"left hip\",\n    \"right hip\", \"left knee\", \"right knee\", \"left ankle\",\n    \"right ankle\"\n]\n\nCOLORS = [\n    \"#FF6347\", \"#FF6347\", \"#FF6347\", \"#FF6347\",\n    \"#FF6347\", \"#FF1493\", \"#00FF00\", \"#FF1493\",\n    \"#00FF00\", \"#FF1493\", \"#00FF00\", \"#FFD700\",\n    \"#00BFFF\", \"#FFD700\", \"#00BFFF\", \"#FFD700\",\n    \"#00BFFF\"\n]\nCOLORS = [sv.Color.from_hex(color_hex=c) for c in COLORS]\n\nvertex_label_annotator = sv.VertexLabelAnnotator(\n    color=COLORS,\n    text_color=sv.Color.BLACK,\n    border_radius=5\n)\nannotated_frame = vertex_label_annotator.annotate(\n    scene=image.copy(),\n    key_points=key_points,\n    labels=labels\n)\n

Source code in supervision/keypoint/annotators.py
def annotate(\n    self, scene: ImageType, key_points: KeyPoints, labels: List[str] = None\n) -> ImageType:\n    \"\"\"\n    A class that draws labels of skeleton vertices on images. It uses specified key\n        points to determine the locations where the vertices should be drawn.\n\n    Args:\n        scene (ImageType): The image where vertex labels will be drawn. `ImageType`\n            is a flexible type, accepting either `numpy.ndarray` or\n            `PIL.Image.Image`.\n        key_points (KeyPoints): A collection of key points where each key point\n            consists of x and y coordinates.\n        labels (List[str], optional): A list of labels to be displayed on the\n            annotated image. If not provided, keypoint indices will be used.\n\n    Returns:\n        The annotated image, matching the type of `scene` (`numpy.ndarray`\n            or `PIL.Image.Image`)\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        key_points = sv.KeyPoints(...)\n\n        vertex_label_annotator = sv.VertexLabelAnnotator(\n            color=sv.Color.GREEN,\n            text_color=sv.Color.BLACK,\n            border_radius=5\n        )\n        annotated_frame = vertex_label_annotator.annotate(\n            scene=image.copy(),\n            key_points=key_points\n        )\n        ```\n\n    ![vertex-label-annotator-example](https://media.roboflow.com/\n    supervision-annotator-examples/vertex-label-annotator-example.png)\n\n    !!! tip\n\n        `VertexLabelAnnotator` allows to customize the color of each keypoint label\n        values.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        image = ...\n        key_points = sv.KeyPoints(...)\n\n        LABELS = [\n            \"nose\", \"left eye\", \"right eye\", \"left ear\",\n            \"right ear\", \"left shoulder\", \"right shoulder\", \"left elbow\",\n            \"right elbow\", \"left wrist\", \"right wrist\", \"left hip\",\n            \"right hip\", \"left knee\", \"right knee\", \"left ankle\",\n            \"right ankle\"\n        ]\n\n        COLORS = [\n            \"#FF6347\", \"#FF6347\", \"#FF6347\", \"#FF6347\",\n            \"#FF6347\", \"#FF1493\", \"#00FF00\", \"#FF1493\",\n            \"#00FF00\", \"#FF1493\", \"#00FF00\", \"#FFD700\",\n            \"#00BFFF\", \"#FFD700\", \"#00BFFF\", \"#FFD700\",\n            \"#00BFFF\"\n        ]\n        COLORS = [sv.Color.from_hex(color_hex=c) for c in COLORS]\n\n        vertex_label_annotator = sv.VertexLabelAnnotator(\n            color=COLORS,\n            text_color=sv.Color.BLACK,\n            border_radius=5\n        )\n        annotated_frame = vertex_label_annotator.annotate(\n            scene=image.copy(),\n            key_points=key_points,\n            labels=labels\n        )\n        ```\n    ![vertex-label-annotator-custom-example](https://media.roboflow.com/\n    supervision-annotator-examples/vertex-label-annotator-custom-example.png)\n    \"\"\"\n    font = cv2.FONT_HERSHEY_SIMPLEX\n\n    skeletons_count, points_count, _ = key_points.xy.shape\n    if skeletons_count == 0:\n        return scene\n\n    anchors = key_points.xy.reshape(points_count * skeletons_count, 2).astype(int)\n    mask = np.all(anchors != 0, axis=1)\n\n    if not np.any(mask):\n        return scene\n\n    colors = self.preprocess_and_validate_colors(\n        colors=self.color,\n        points_count=points_count,\n        skeletons_count=skeletons_count,\n    )\n\n    labels = self.preprocess_and_validate_labels(\n        labels=labels, points_count=points_count, skeletons_count=skeletons_count\n    )\n\n    anchors = anchors[mask]\n    colors = colors[mask]\n    labels = labels[mask]\n\n    xyxy = np.array(\n        [\n            self.get_text_bounding_box(\n                text=label,\n                font=font,\n                text_scale=self.text_scale,\n                text_thickness=self.text_thickness,\n                center_coordinates=tuple(anchor),\n            )\n            for anchor, label in zip(anchors, labels)\n        ]\n    )\n\n    xyxy_padded = pad_boxes(xyxy=xyxy, px=self.text_padding)\n\n    for text, color, box, box_padded in zip(labels, colors, xyxy, xyxy_padded):\n        draw_rounded_rectangle(\n            scene=scene,\n            rect=Rect.from_xyxy(box_padded),\n            color=color,\n            border_radius=self.border_radius,\n        )\n        cv2.putText(\n            img=scene,\n            text=text,\n            org=(box[0], box[1] + self.text_padding),\n            fontFace=font,\n            fontScale=self.text_scale,\n            color=self.text_color.as_rgb(),\n            thickness=self.text_thickness,\n            lineType=cv2.LINE_AA,\n        )\n\n    return scene\n
"},{"location":"keypoint/core/","title":"Keypoint Detection","text":"

The sv.KeyPoints class in the Supervision library standardizes results from various keypoint detection and pose estimation models into a consistent format. This class simplifies data manipulation and filtering, providing a uniform API for integration with Supervision keypoints annotators.

UltralyticsInferenceMediaPipe

Use sv.KeyPoints.from_ultralytics method, which accepts YOLOv8 pose result.

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8s-pose.pt')\n\nresult = model(image)[0]\nkey_points = sv.KeyPoints.from_ultralytics(result)\n

Use sv.KeyPoints.from_inference method, which accepts Inference pose result.

import cv2\nimport supervision as sv\nfrom inference import get_model\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)\n\nresult = model.infer(image)[0]\nkey_points = sv.KeyPoints.from_inference(result)\n

Use sv.KeyPoints.from_mediapipe method, which accepts MediaPipe pose result.

import cv2\nimport mediapipe as mp\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage_height, image_width, _ = image.shape\nmediapipe_image = mp.Image(\n    image_format=mp.ImageFormat.SRGB,\n    data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\noptions = mp.tasks.vision.PoseLandmarkerOptions(\n    base_options=mp.tasks.BaseOptions(\n        model_asset_path=\"pose_landmarker_heavy.task\"\n    ),\n    running_mode=mp.tasks.vision.RunningMode.IMAGE,\n    num_poses=2)\n\nPoseLandmarker = mp.tasks.vision.PoseLandmarker\nwith PoseLandmarker.create_from_options(options) as landmarker:\n    pose_landmarker_result = landmarker.detect(mediapipe_image)\n\nkey_points = sv.KeyPoints.from_mediapipe(\n    pose_landmarker_result, (image_width, image_height))\n

Attributes:

Name Type Description xy ndarray

An array of shape (n, 2) containing the bounding boxes coordinates in format [x1, y1]

confidence Optional[ndarray]

An array of shape (n,) containing the confidence scores of the keypoint keypoints.

class_id Optional[ndarray]

An array of shape (n,) containing the class ids of the keypoint keypoints.

data Dict[str, Union[ndarray, List]]

A dictionary containing additional data where each key is a string representing the data type, and the value is either a NumPy array or a list of corresponding data.

Source code in supervision/keypoint/core.py
@dataclass\nclass KeyPoints:\n    \"\"\"\n    The `sv.KeyPoints` class in the Supervision library standardizes results from\n    various keypoint detection and pose estimation models into a consistent format. This\n    class simplifies data manipulation and filtering, providing a uniform API for\n    integration with Supervision [keypoints annotators](/keypoint/annotators).\n\n    === \"Ultralytics\"\n\n        Use [`sv.KeyPoints.from_ultralytics`](/keypoint/core/#supervision.keypoint.core.KeyPoints.from_ultralytics)\n        method, which accepts [YOLOv8](https://github.com/ultralytics/ultralytics)\n        pose result.\n\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO('yolov8s-pose.pt')\n\n        result = model(image)[0]\n        key_points = sv.KeyPoints.from_ultralytics(result)\n        ```\n\n    === \"Inference\"\n\n        Use [`sv.KeyPoints.from_inference`](/keypoint/core/#supervision.keypoint.core.KeyPoints.from_inference)\n        method, which accepts [Inference](https://inference.roboflow.com/) pose result.\n\n        ```python\n        import cv2\n        import supervision as sv\n        from inference import get_model\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)\n\n        result = model.infer(image)[0]\n        key_points = sv.KeyPoints.from_inference(result)\n        ```\n\n    === \"MediaPipe\"\n\n        Use [`sv.KeyPoints.from_mediapipe`](/keypoint/core/#supervision.keypoint.core.KeyPoints.from_mediapipe)\n        method, which accepts [MediaPipe](https://github.com/google-ai-edge/mediapipe)\n        pose result.\n\n        ```python\n        import cv2\n        import mediapipe as mp\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image_height, image_width, _ = image.shape\n        mediapipe_image = mp.Image(\n            image_format=mp.ImageFormat.SRGB,\n            data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\n        options = mp.tasks.vision.PoseLandmarkerOptions(\n            base_options=mp.tasks.BaseOptions(\n                model_asset_path=\"pose_landmarker_heavy.task\"\n            ),\n            running_mode=mp.tasks.vision.RunningMode.IMAGE,\n            num_poses=2)\n\n        PoseLandmarker = mp.tasks.vision.PoseLandmarker\n        with PoseLandmarker.create_from_options(options) as landmarker:\n            pose_landmarker_result = landmarker.detect(mediapipe_image)\n\n        key_points = sv.KeyPoints.from_mediapipe(\n            pose_landmarker_result, (image_width, image_height))\n        ```\n\n    Attributes:\n        xy (np.ndarray): An array of shape `(n, 2)` containing\n            the bounding boxes coordinates in format `[x1, y1]`\n        confidence (Optional[np.ndarray]): An array of shape\n            `(n,)` containing the confidence scores of the keypoint keypoints.\n        class_id (Optional[np.ndarray]): An array of shape\n            `(n,)` containing the class ids of the keypoint keypoints.\n        data (Dict[str, Union[np.ndarray, List]]): A dictionary containing additional\n            data where each key is a string representing the data type, and the value\n            is either a NumPy array or a list of corresponding data.\n    \"\"\"  # noqa: E501 // docs\n\n    xy: npt.NDArray[np.float32]\n    class_id: Optional[npt.NDArray[np.int_]] = None\n    confidence: Optional[npt.NDArray[np.float32]] = None\n    data: Dict[str, Union[npt.NDArray[Any], List]] = field(default_factory=dict)\n\n    def __post_init__(self):\n        validate_keypoints_fields(\n            xy=self.xy,\n            confidence=self.confidence,\n            class_id=self.class_id,\n            data=self.data,\n        )\n\n    def __len__(self) -> int:\n        \"\"\"\n        Returns the number of keypoints in the `sv.KeyPoints` object.\n        \"\"\"\n        return len(self.xy)\n\n    def __iter__(\n        self,\n    ) -> Iterator[\n        Tuple[\n            np.ndarray,\n            Optional[np.ndarray],\n            Optional[float],\n            Optional[int],\n            Optional[int],\n            Dict[str, Union[np.ndarray, List]],\n        ]\n    ]:\n        \"\"\"\n        Iterates over the Keypoint object and yield a tuple of\n        `(xy, confidence, class_id, data)` for each keypoint detection.\n        \"\"\"\n        for i in range(len(self.xy)):\n            yield (\n                self.xy[i],\n                self.confidence[i] if self.confidence is not None else None,\n                self.class_id[i] if self.class_id is not None else None,\n                get_data_item(self.data, i),\n            )\n\n    def __eq__(self, other: KeyPoints) -> bool:\n        return all(\n            [\n                np.array_equal(self.xy, other.xy),\n                np.array_equal(self.class_id, other.class_id),\n                np.array_equal(self.confidence, other.confidence),\n                is_data_equal(self.data, other.data),\n            ]\n        )\n\n    @classmethod\n    def from_inference(cls, inference_result: Union[dict, Any]) -> KeyPoints:\n        \"\"\"\n        Create a `sv.KeyPoints` object from the [Roboflow](https://roboflow.com/)\n        API inference result or the [Inference](https://inference.roboflow.com/)\n        package results.\n\n        Args:\n            inference_result (dict, any): The result from the\n                Roboflow API or Inference package containing predictions with keypoints.\n\n        Returns:\n            A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n                and class names, and confidences of each keypoint.\n\n        Examples:\n            ```python\n            import cv2\n            import supervision as sv\n            from inference import get_model\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)\n\n            result = model.infer(image)[0]\n            key_points = sv.KeyPoints.from_inference(result)\n            ```\n\n            ```python\n            import cv2\n            import supervision as sv\n            from inference_sdk import InferenceHTTPClient\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            client = InferenceHTTPClient(\n                api_url=\"https://detect.roboflow.com\",\n                api_key=<ROBOFLOW_API_KEY>\n            )\n\n            result = client.infer(image, model_id=<POSE_MODEL_ID>)\n            key_points = sv.KeyPoints.from_inference(result)\n            ```\n        \"\"\"\n        if isinstance(inference_result, list):\n            raise ValueError(\n                \"from_inference() operates on a single result at a time.\"\n                \"You can retrieve it like so:  inference_result = model.infer(image)[0]\"\n            )\n\n        with suppress(AttributeError):\n            inference_result = inference_result.dict(exclude_none=True, by_alias=True)\n\n        if not inference_result.get(\"predictions\"):\n            return cls.empty()\n\n        xy = []\n        confidence = []\n        class_id = []\n        class_names = []\n\n        for prediction in inference_result[\"predictions\"]:\n            prediction_xy = []\n            prediction_confidence = []\n            for keypoint in prediction[\"keypoints\"]:\n                prediction_xy.append([keypoint[\"x\"], keypoint[\"y\"]])\n                prediction_confidence.append(keypoint[\"confidence\"])\n            xy.append(prediction_xy)\n            confidence.append(prediction_confidence)\n\n            class_id.append(prediction[\"class_id\"])\n            class_names.append(prediction[\"class\"])\n\n        data = {CLASS_NAME_DATA_FIELD: np.array(class_names)}\n\n        return cls(\n            xy=np.array(xy, dtype=np.float32),\n            confidence=np.array(confidence, dtype=np.float32),\n            class_id=np.array(class_id, dtype=int),\n            data=data,\n        )\n\n    @classmethod\n    def from_mediapipe(\n        cls, mediapipe_results, resolution_wh: Tuple[int, int]\n    ) -> KeyPoints:\n        \"\"\"\n        Creates a `sv.KeyPoints` instance from a\n        [MediaPipe](https://github.com/google-ai-edge/mediapipe)\n        pose landmark detection inference result.\n\n        Args:\n            mediapipe_results (Union[PoseLandmarkerResult, SolutionOutputs]):\n                The output results from Mediapipe. It supports both: the inference\n                result `PoseLandmarker` and the legacy one from `Pose`.\n            resolution_wh (Tuple[int, int]): A tuple of the form `(width, height)`\n                representing the resolution of the frame.\n\n        Returns:\n            A `sv.KeyPoints` object containing the keypoint coordinates and\n                confidences of each keypoint.\n\n        !!! tip\n            Before you start, download model bundles from the\n            [MediaPipe website](https://ai.google.dev/edge/mediapipe/solutions/vision/pose_landmarker/index#models).\n\n        Examples:\n            ```python\n            import cv2\n            import mediapipe as mp\n            import supervision as sv\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            image_height, image_width, _ = image.shape\n            mediapipe_image = mp.Image(\n                image_format=mp.ImageFormat.SRGB,\n                data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\n            options = mp.tasks.vision.PoseLandmarkerOptions(\n                base_options=mp.tasks.BaseOptions(\n                    model_asset_path=\"pose_landmarker_heavy.task\"\n                ),\n                running_mode=mp.tasks.vision.RunningMode.IMAGE,\n                num_poses=2)\n\n            PoseLandmarker = mp.tasks.vision.PoseLandmarker\n            with PoseLandmarker.create_from_options(options) as landmarker:\n                pose_landmarker_result = landmarker.detect(mediapipe_image)\n\n            key_points = sv.KeyPoints.from_mediapipe(\n                pose_landmarker_result, (image_width, image_height))\n            ```\n        \"\"\"  # noqa: E501 // docs\n        results = mediapipe_results.pose_landmarks\n        if not isinstance(mediapipe_results.pose_landmarks, list):\n            if mediapipe_results.pose_landmarks is None:\n                results = []\n            else:\n                results = [\n                    [landmark for landmark in mediapipe_results.pose_landmarks.landmark]\n                ]\n\n        if len(results) == 0:\n            return cls.empty()\n\n        xy = []\n        confidence = []\n        for pose in results:\n            prediction_xy = []\n            prediction_confidence = []\n            for landmark in pose:\n                keypoint_xy = [\n                    landmark.x * resolution_wh[0],\n                    landmark.y * resolution_wh[1],\n                ]\n                prediction_xy.append(keypoint_xy)\n                prediction_confidence.append(landmark.visibility)\n\n            xy.append(prediction_xy)\n            confidence.append(prediction_confidence)\n\n        return cls(\n            xy=np.array(xy, dtype=np.float32),\n            confidence=np.array(confidence, dtype=np.float32),\n        )\n\n    @classmethod\n    def from_ultralytics(cls, ultralytics_results) -> KeyPoints:\n        \"\"\"\n        Creates a `sv.KeyPoints` instance from a\n        [YOLOv8](https://github.com/ultralytics/ultralytics) pose inference result.\n\n        Args:\n            ultralytics_results (ultralytics.engine.results.Keypoints):\n                The output Results instance from YOLOv8\n\n        Returns:\n            A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n                and class names, and confidences of each keypoint.\n\n        Examples:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = YOLO('yolov8s-pose.pt')\n\n            result = model(image)[0]\n            key_points = sv.KeyPoints.from_ultralytics(result)\n            ```\n        \"\"\"\n        if ultralytics_results.keypoints.xy.numel() == 0:\n            return cls.empty()\n\n        xy = ultralytics_results.keypoints.xy.cpu().numpy()\n        class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)\n        class_names = np.array([ultralytics_results.names[i] for i in class_id])\n\n        confidence = ultralytics_results.keypoints.conf.cpu().numpy()\n        data = {CLASS_NAME_DATA_FIELD: class_names}\n        return cls(xy, class_id, confidence, data)\n\n    @classmethod\n    def from_yolo_nas(cls, yolo_nas_results) -> KeyPoints:\n        \"\"\"\n        Create a `sv.KeyPoints` instance from a [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS-POSE.md)\n        pose inference results.\n\n        Args:\n            yolo_nas_results (ImagePoseEstimationPrediction): The output object from\n                YOLO NAS.\n\n        Returns:\n            A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n                and class names, and confidences of each keypoint.\n\n        Examples:\n            ```python\n            import cv2\n            import torch\n            import supervision as sv\n            import super_gradients\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n\n            device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n            model = super_gradients.training.models.get(\n                \"yolo_nas_pose_s\", pretrained_weights=\"coco_pose\").to(device)\n\n            results = model.predict(image, conf=0.1)\n            key_points = sv.KeyPoints.from_yolo_nas(results)\n            ```\n        \"\"\"  # noqa: E501 // docs\n        if len(yolo_nas_results.prediction.poses) == 0:\n            return cls.empty()\n\n        xy = yolo_nas_results.prediction.poses[:, :, :2]\n        confidence = yolo_nas_results.prediction.poses[:, :, 2]\n\n        # yolo_nas_results treats params differently.\n        # prediction.labels may not exist, whereas class_names might be None\n        if hasattr(yolo_nas_results.prediction, \"labels\"):\n            class_id = yolo_nas_results.prediction.labels  # np.array[int]\n        else:\n            class_id = None\n\n        data = {}\n        if class_id is not None and yolo_nas_results.class_names is not None:\n            class_names = []\n            for c_id in class_id:\n                name = yolo_nas_results.class_names[c_id]  # tuple[str]\n                class_names.append(name)\n            data[CLASS_NAME_DATA_FIELD] = class_names\n\n        return cls(\n            xy=xy,\n            confidence=confidence,\n            class_id=class_id,\n            data=data,\n        )\n\n    def __getitem__(\n        self, index: Union[int, slice, List[int], np.ndarray, str]\n    ) -> Union[KeyPoints, List, np.ndarray, None]:\n        \"\"\"\n        Get a subset of the `sv.KeyPoints` object or access an item from its data field.\n\n        When provided with an integer, slice, list of integers, or a numpy array, this\n        method returns a new `sv.KeyPoints` object that represents a subset of the\n        original `sv.KeyPoints`. When provided with a string, it accesses the\n        corresponding item in the data dictionary.\n\n        Args:\n            index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,\n                or key to access a subset of the `sv.KeyPoints` or an item from the\n                data.\n\n        Returns:\n            A subset of the `sv.KeyPoints` object or an item from the data field.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            key_points = sv.KeyPoints()\n\n            # access the first keypoint using an integer index\n            key_points[0]\n\n            # access the first 10 keypoints using index slice\n            key_points[0:10]\n\n            # access selected keypoints using a list of indices\n            key_points[[0, 2, 4]]\n\n            # access keypoints with selected class_id\n            key_points[key_points.class_id == 0]\n\n            # access keypoints with confidence greater than 0.5\n            key_points[key_points.confidence > 0.5]\n            ```\n        \"\"\"\n        if isinstance(index, str):\n            return self.data.get(index)\n        if isinstance(index, int):\n            index = [index]\n        return KeyPoints(\n            xy=self.xy[index],\n            confidence=self.confidence[index] if self.confidence is not None else None,\n            class_id=self.class_id[index] if self.class_id is not None else None,\n            data=get_data_item(self.data, index),\n        )\n\n    def __setitem__(self, key: str, value: Union[np.ndarray, List]):\n        \"\"\"\n        Set a value in the data dictionary of the `sv.KeyPoints` object.\n\n        Args:\n            key (str): The key in the data dictionary to set.\n            value (Union[np.ndarray, List]): The value to set for the key.\n\n        Examples:\n            ```python\n            import cv2\n            import supervision as sv\n            from ultralytics import YOLO\n\n            image = cv2.imread(<SOURCE_IMAGE_PATH>)\n            model = YOLO('yolov8s.pt')\n\n            result = model(image)[0]\n            keypoints = sv.KeyPoints.from_ultralytics(result)\n\n            keypoints['class_name'] = [\n                 model.model.names[class_id]\n                 for class_id\n                 in keypoints.class_id\n             ]\n            ```\n        \"\"\"\n        if not isinstance(value, (np.ndarray, list)):\n            raise TypeError(\"Value must be a np.ndarray or a list\")\n\n        if isinstance(value, list):\n            value = np.array(value)\n\n        self.data[key] = value\n\n    @classmethod\n    def empty(cls) -> KeyPoints:\n        \"\"\"\n        Create an empty Keypoints object with no keypoints.\n\n        Returns:\n            An empty `sv.KeyPoints` object.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            key_points = sv.KeyPoints.empty()\n            ```\n        \"\"\"\n        return cls(xy=np.empty((0, 0, 2), dtype=np.float32))\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints-functions","title":"Functions","text":""},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.__getitem__","title":"__getitem__(index)","text":"

Get a subset of the sv.KeyPoints object or access an item from its data field.

When provided with an integer, slice, list of integers, or a numpy array, this method returns a new sv.KeyPoints object that represents a subset of the original sv.KeyPoints. When provided with a string, it accesses the corresponding item in the data dictionary.

Parameters:

Name Type Description Default index Union[int, slice, List[int], ndarray, str]

The index, indices, or key to access a subset of the sv.KeyPoints or an item from the data.

required

Returns:

Type Description Union[KeyPoints, List, ndarray, None]

A subset of the sv.KeyPoints object or an item from the data field.

Examples:

import supervision as sv\n\nkey_points = sv.KeyPoints()\n\n# access the first keypoint using an integer index\nkey_points[0]\n\n# access the first 10 keypoints using index slice\nkey_points[0:10]\n\n# access selected keypoints using a list of indices\nkey_points[[0, 2, 4]]\n\n# access keypoints with selected class_id\nkey_points[key_points.class_id == 0]\n\n# access keypoints with confidence greater than 0.5\nkey_points[key_points.confidence > 0.5]\n
Source code in supervision/keypoint/core.py
def __getitem__(\n    self, index: Union[int, slice, List[int], np.ndarray, str]\n) -> Union[KeyPoints, List, np.ndarray, None]:\n    \"\"\"\n    Get a subset of the `sv.KeyPoints` object or access an item from its data field.\n\n    When provided with an integer, slice, list of integers, or a numpy array, this\n    method returns a new `sv.KeyPoints` object that represents a subset of the\n    original `sv.KeyPoints`. When provided with a string, it accesses the\n    corresponding item in the data dictionary.\n\n    Args:\n        index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,\n            or key to access a subset of the `sv.KeyPoints` or an item from the\n            data.\n\n    Returns:\n        A subset of the `sv.KeyPoints` object or an item from the data field.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        key_points = sv.KeyPoints()\n\n        # access the first keypoint using an integer index\n        key_points[0]\n\n        # access the first 10 keypoints using index slice\n        key_points[0:10]\n\n        # access selected keypoints using a list of indices\n        key_points[[0, 2, 4]]\n\n        # access keypoints with selected class_id\n        key_points[key_points.class_id == 0]\n\n        # access keypoints with confidence greater than 0.5\n        key_points[key_points.confidence > 0.5]\n        ```\n    \"\"\"\n    if isinstance(index, str):\n        return self.data.get(index)\n    if isinstance(index, int):\n        index = [index]\n    return KeyPoints(\n        xy=self.xy[index],\n        confidence=self.confidence[index] if self.confidence is not None else None,\n        class_id=self.class_id[index] if self.class_id is not None else None,\n        data=get_data_item(self.data, index),\n    )\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.__iter__","title":"__iter__()","text":"

Iterates over the Keypoint object and yield a tuple of (xy, confidence, class_id, data) for each keypoint detection.

Source code in supervision/keypoint/core.py
def __iter__(\n    self,\n) -> Iterator[\n    Tuple[\n        np.ndarray,\n        Optional[np.ndarray],\n        Optional[float],\n        Optional[int],\n        Optional[int],\n        Dict[str, Union[np.ndarray, List]],\n    ]\n]:\n    \"\"\"\n    Iterates over the Keypoint object and yield a tuple of\n    `(xy, confidence, class_id, data)` for each keypoint detection.\n    \"\"\"\n    for i in range(len(self.xy)):\n        yield (\n            self.xy[i],\n            self.confidence[i] if self.confidence is not None else None,\n            self.class_id[i] if self.class_id is not None else None,\n            get_data_item(self.data, i),\n        )\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.__len__","title":"__len__()","text":"

Returns the number of keypoints in the sv.KeyPoints object.

Source code in supervision/keypoint/core.py
def __len__(self) -> int:\n    \"\"\"\n    Returns the number of keypoints in the `sv.KeyPoints` object.\n    \"\"\"\n    return len(self.xy)\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.__setitem__","title":"__setitem__(key, value)","text":"

Set a value in the data dictionary of the sv.KeyPoints object.

Parameters:

Name Type Description Default key str

The key in the data dictionary to set.

required value Union[ndarray, List]

The value to set for the key.

required

Examples:

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8s.pt')\n\nresult = model(image)[0]\nkeypoints = sv.KeyPoints.from_ultralytics(result)\n\nkeypoints['class_name'] = [\n     model.model.names[class_id]\n     for class_id\n     in keypoints.class_id\n ]\n
Source code in supervision/keypoint/core.py
def __setitem__(self, key: str, value: Union[np.ndarray, List]):\n    \"\"\"\n    Set a value in the data dictionary of the `sv.KeyPoints` object.\n\n    Args:\n        key (str): The key in the data dictionary to set.\n        value (Union[np.ndarray, List]): The value to set for the key.\n\n    Examples:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO('yolov8s.pt')\n\n        result = model(image)[0]\n        keypoints = sv.KeyPoints.from_ultralytics(result)\n\n        keypoints['class_name'] = [\n             model.model.names[class_id]\n             for class_id\n             in keypoints.class_id\n         ]\n        ```\n    \"\"\"\n    if not isinstance(value, (np.ndarray, list)):\n        raise TypeError(\"Value must be a np.ndarray or a list\")\n\n    if isinstance(value, list):\n        value = np.array(value)\n\n    self.data[key] = value\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.empty","title":"empty() classmethod","text":"

Create an empty Keypoints object with no keypoints.

Returns:

Type Description KeyPoints

An empty sv.KeyPoints object.

Examples:

import supervision as sv\n\nkey_points = sv.KeyPoints.empty()\n
Source code in supervision/keypoint/core.py
@classmethod\ndef empty(cls) -> KeyPoints:\n    \"\"\"\n    Create an empty Keypoints object with no keypoints.\n\n    Returns:\n        An empty `sv.KeyPoints` object.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        key_points = sv.KeyPoints.empty()\n        ```\n    \"\"\"\n    return cls(xy=np.empty((0, 0, 2), dtype=np.float32))\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.from_inference","title":"from_inference(inference_result) classmethod","text":"

Create a sv.KeyPoints object from the Roboflow API inference result or the Inference package results.

Parameters:

Name Type Description Default inference_result (dict, any)

The result from the Roboflow API or Inference package containing predictions with keypoints.

required

Returns:

Type Description KeyPoints

A sv.KeyPoints object containing the keypoint coordinates, class IDs, and class names, and confidences of each keypoint.

Examples:

import cv2\nimport supervision as sv\nfrom inference import get_model\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)\n\nresult = model.infer(image)[0]\nkey_points = sv.KeyPoints.from_inference(result)\n
import cv2\nimport supervision as sv\nfrom inference_sdk import InferenceHTTPClient\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nclient = InferenceHTTPClient(\n    api_url=\"https://detect.roboflow.com\",\n    api_key=<ROBOFLOW_API_KEY>\n)\n\nresult = client.infer(image, model_id=<POSE_MODEL_ID>)\nkey_points = sv.KeyPoints.from_inference(result)\n
Source code in supervision/keypoint/core.py
@classmethod\ndef from_inference(cls, inference_result: Union[dict, Any]) -> KeyPoints:\n    \"\"\"\n    Create a `sv.KeyPoints` object from the [Roboflow](https://roboflow.com/)\n    API inference result or the [Inference](https://inference.roboflow.com/)\n    package results.\n\n    Args:\n        inference_result (dict, any): The result from the\n            Roboflow API or Inference package containing predictions with keypoints.\n\n    Returns:\n        A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n            and class names, and confidences of each keypoint.\n\n    Examples:\n        ```python\n        import cv2\n        import supervision as sv\n        from inference import get_model\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = get_model(model_id=<POSE_MODEL_ID>, api_key=<ROBOFLOW_API_KEY>)\n\n        result = model.infer(image)[0]\n        key_points = sv.KeyPoints.from_inference(result)\n        ```\n\n        ```python\n        import cv2\n        import supervision as sv\n        from inference_sdk import InferenceHTTPClient\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        client = InferenceHTTPClient(\n            api_url=\"https://detect.roboflow.com\",\n            api_key=<ROBOFLOW_API_KEY>\n        )\n\n        result = client.infer(image, model_id=<POSE_MODEL_ID>)\n        key_points = sv.KeyPoints.from_inference(result)\n        ```\n    \"\"\"\n    if isinstance(inference_result, list):\n        raise ValueError(\n            \"from_inference() operates on a single result at a time.\"\n            \"You can retrieve it like so:  inference_result = model.infer(image)[0]\"\n        )\n\n    with suppress(AttributeError):\n        inference_result = inference_result.dict(exclude_none=True, by_alias=True)\n\n    if not inference_result.get(\"predictions\"):\n        return cls.empty()\n\n    xy = []\n    confidence = []\n    class_id = []\n    class_names = []\n\n    for prediction in inference_result[\"predictions\"]:\n        prediction_xy = []\n        prediction_confidence = []\n        for keypoint in prediction[\"keypoints\"]:\n            prediction_xy.append([keypoint[\"x\"], keypoint[\"y\"]])\n            prediction_confidence.append(keypoint[\"confidence\"])\n        xy.append(prediction_xy)\n        confidence.append(prediction_confidence)\n\n        class_id.append(prediction[\"class_id\"])\n        class_names.append(prediction[\"class\"])\n\n    data = {CLASS_NAME_DATA_FIELD: np.array(class_names)}\n\n    return cls(\n        xy=np.array(xy, dtype=np.float32),\n        confidence=np.array(confidence, dtype=np.float32),\n        class_id=np.array(class_id, dtype=int),\n        data=data,\n    )\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.from_mediapipe","title":"from_mediapipe(mediapipe_results, resolution_wh) classmethod","text":"

Creates a sv.KeyPoints instance from a MediaPipe pose landmark detection inference result.

Parameters:

Name Type Description Default mediapipe_results Union[PoseLandmarkerResult, SolutionOutputs]

The output results from Mediapipe. It supports both: the inference result PoseLandmarker and the legacy one from Pose.

required resolution_wh Tuple[int, int]

A tuple of the form (width, height) representing the resolution of the frame.

required

Returns:

Type Description KeyPoints

A sv.KeyPoints object containing the keypoint coordinates and confidences of each keypoint.

Tip

Before you start, download model bundles from the MediaPipe website.

Examples:

import cv2\nimport mediapipe as mp\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage_height, image_width, _ = image.shape\nmediapipe_image = mp.Image(\n    image_format=mp.ImageFormat.SRGB,\n    data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\noptions = mp.tasks.vision.PoseLandmarkerOptions(\n    base_options=mp.tasks.BaseOptions(\n        model_asset_path=\"pose_landmarker_heavy.task\"\n    ),\n    running_mode=mp.tasks.vision.RunningMode.IMAGE,\n    num_poses=2)\n\nPoseLandmarker = mp.tasks.vision.PoseLandmarker\nwith PoseLandmarker.create_from_options(options) as landmarker:\n    pose_landmarker_result = landmarker.detect(mediapipe_image)\n\nkey_points = sv.KeyPoints.from_mediapipe(\n    pose_landmarker_result, (image_width, image_height))\n
Source code in supervision/keypoint/core.py
@classmethod\ndef from_mediapipe(\n    cls, mediapipe_results, resolution_wh: Tuple[int, int]\n) -> KeyPoints:\n    \"\"\"\n    Creates a `sv.KeyPoints` instance from a\n    [MediaPipe](https://github.com/google-ai-edge/mediapipe)\n    pose landmark detection inference result.\n\n    Args:\n        mediapipe_results (Union[PoseLandmarkerResult, SolutionOutputs]):\n            The output results from Mediapipe. It supports both: the inference\n            result `PoseLandmarker` and the legacy one from `Pose`.\n        resolution_wh (Tuple[int, int]): A tuple of the form `(width, height)`\n            representing the resolution of the frame.\n\n    Returns:\n        A `sv.KeyPoints` object containing the keypoint coordinates and\n            confidences of each keypoint.\n\n    !!! tip\n        Before you start, download model bundles from the\n        [MediaPipe website](https://ai.google.dev/edge/mediapipe/solutions/vision/pose_landmarker/index#models).\n\n    Examples:\n        ```python\n        import cv2\n        import mediapipe as mp\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image_height, image_width, _ = image.shape\n        mediapipe_image = mp.Image(\n            image_format=mp.ImageFormat.SRGB,\n            data=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\n        options = mp.tasks.vision.PoseLandmarkerOptions(\n            base_options=mp.tasks.BaseOptions(\n                model_asset_path=\"pose_landmarker_heavy.task\"\n            ),\n            running_mode=mp.tasks.vision.RunningMode.IMAGE,\n            num_poses=2)\n\n        PoseLandmarker = mp.tasks.vision.PoseLandmarker\n        with PoseLandmarker.create_from_options(options) as landmarker:\n            pose_landmarker_result = landmarker.detect(mediapipe_image)\n\n        key_points = sv.KeyPoints.from_mediapipe(\n            pose_landmarker_result, (image_width, image_height))\n        ```\n    \"\"\"  # noqa: E501 // docs\n    results = mediapipe_results.pose_landmarks\n    if not isinstance(mediapipe_results.pose_landmarks, list):\n        if mediapipe_results.pose_landmarks is None:\n            results = []\n        else:\n            results = [\n                [landmark for landmark in mediapipe_results.pose_landmarks.landmark]\n            ]\n\n    if len(results) == 0:\n        return cls.empty()\n\n    xy = []\n    confidence = []\n    for pose in results:\n        prediction_xy = []\n        prediction_confidence = []\n        for landmark in pose:\n            keypoint_xy = [\n                landmark.x * resolution_wh[0],\n                landmark.y * resolution_wh[1],\n            ]\n            prediction_xy.append(keypoint_xy)\n            prediction_confidence.append(landmark.visibility)\n\n        xy.append(prediction_xy)\n        confidence.append(prediction_confidence)\n\n    return cls(\n        xy=np.array(xy, dtype=np.float32),\n        confidence=np.array(confidence, dtype=np.float32),\n    )\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.from_ultralytics","title":"from_ultralytics(ultralytics_results) classmethod","text":"

Creates a sv.KeyPoints instance from a YOLOv8 pose inference result.

Parameters:

Name Type Description Default ultralytics_results Keypoints

The output Results instance from YOLOv8

required

Returns:

Type Description KeyPoints

A sv.KeyPoints object containing the keypoint coordinates, class IDs, and class names, and confidences of each keypoint.

Examples:

import cv2\nimport supervision as sv\nfrom ultralytics import YOLO\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nmodel = YOLO('yolov8s-pose.pt')\n\nresult = model(image)[0]\nkey_points = sv.KeyPoints.from_ultralytics(result)\n
Source code in supervision/keypoint/core.py
@classmethod\ndef from_ultralytics(cls, ultralytics_results) -> KeyPoints:\n    \"\"\"\n    Creates a `sv.KeyPoints` instance from a\n    [YOLOv8](https://github.com/ultralytics/ultralytics) pose inference result.\n\n    Args:\n        ultralytics_results (ultralytics.engine.results.Keypoints):\n            The output Results instance from YOLOv8\n\n    Returns:\n        A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n            and class names, and confidences of each keypoint.\n\n    Examples:\n        ```python\n        import cv2\n        import supervision as sv\n        from ultralytics import YOLO\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        model = YOLO('yolov8s-pose.pt')\n\n        result = model(image)[0]\n        key_points = sv.KeyPoints.from_ultralytics(result)\n        ```\n    \"\"\"\n    if ultralytics_results.keypoints.xy.numel() == 0:\n        return cls.empty()\n\n    xy = ultralytics_results.keypoints.xy.cpu().numpy()\n    class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)\n    class_names = np.array([ultralytics_results.names[i] for i in class_id])\n\n    confidence = ultralytics_results.keypoints.conf.cpu().numpy()\n    data = {CLASS_NAME_DATA_FIELD: class_names}\n    return cls(xy, class_id, confidence, data)\n
"},{"location":"keypoint/core/#supervision.keypoint.core.KeyPoints.from_yolo_nas","title":"from_yolo_nas(yolo_nas_results) classmethod","text":"

Create a sv.KeyPoints instance from a YOLO-NAS pose inference results.

Parameters:

Name Type Description Default yolo_nas_results ImagePoseEstimationPrediction

The output object from YOLO NAS.

required

Returns:

Type Description KeyPoints

A sv.KeyPoints object containing the keypoint coordinates, class IDs, and class names, and confidences of each keypoint.

Examples:

import cv2\nimport torch\nimport supervision as sv\nimport super_gradients\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\n\ndevice = \"cuda\" if torch.cuda.is_available() else \"cpu\"\nmodel = super_gradients.training.models.get(\n    \"yolo_nas_pose_s\", pretrained_weights=\"coco_pose\").to(device)\n\nresults = model.predict(image, conf=0.1)\nkey_points = sv.KeyPoints.from_yolo_nas(results)\n
Source code in supervision/keypoint/core.py
@classmethod\ndef from_yolo_nas(cls, yolo_nas_results) -> KeyPoints:\n    \"\"\"\n    Create a `sv.KeyPoints` instance from a [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS-POSE.md)\n    pose inference results.\n\n    Args:\n        yolo_nas_results (ImagePoseEstimationPrediction): The output object from\n            YOLO NAS.\n\n    Returns:\n        A `sv.KeyPoints` object containing the keypoint coordinates, class IDs,\n            and class names, and confidences of each keypoint.\n\n    Examples:\n        ```python\n        import cv2\n        import torch\n        import supervision as sv\n        import super_gradients\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n\n        device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n        model = super_gradients.training.models.get(\n            \"yolo_nas_pose_s\", pretrained_weights=\"coco_pose\").to(device)\n\n        results = model.predict(image, conf=0.1)\n        key_points = sv.KeyPoints.from_yolo_nas(results)\n        ```\n    \"\"\"  # noqa: E501 // docs\n    if len(yolo_nas_results.prediction.poses) == 0:\n        return cls.empty()\n\n    xy = yolo_nas_results.prediction.poses[:, :, :2]\n    confidence = yolo_nas_results.prediction.poses[:, :, 2]\n\n    # yolo_nas_results treats params differently.\n    # prediction.labels may not exist, whereas class_names might be None\n    if hasattr(yolo_nas_results.prediction, \"labels\"):\n        class_id = yolo_nas_results.prediction.labels  # np.array[int]\n    else:\n        class_id = None\n\n    data = {}\n    if class_id is not None and yolo_nas_results.class_names is not None:\n        class_names = []\n        for c_id in class_id:\n            name = yolo_nas_results.class_names[c_id]  # tuple[str]\n            class_names.append(name)\n        data[CLASS_NAME_DATA_FIELD] = class_names\n\n    return cls(\n        xy=xy,\n        confidence=confidence,\n        class_id=class_id,\n        data=data,\n    )\n
"},{"location":"notebooks/annotate-video-with-detections/","title":"Annotate Video with Detections","text":"In\u00a0[1]: Copied!
!nvidia-smi\n
!nvidia-smi
Fri Feb 23 03:15:00 2024       \n+---------------------------------------------------------------------------------------+\n| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n|-----------------------------------------+----------------------+----------------------+\n| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n|                                         |                      |               MIG M. |\n|=========================================+======================+======================|\n|   0  Tesla V100-SXM2-16GB           Off | 00000000:00:04.0 Off |                    0 |\n| N/A   33C    P0              24W / 300W |      0MiB / 16384MiB |      0%      Default |\n|                                         |                      |                  N/A |\n+-----------------------------------------+----------------------+----------------------+\n                                                                                         \n+---------------------------------------------------------------------------------------+\n| Processes:                                                                            |\n|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n|        ID   ID                                                             Usage      |\n|=======================================================================================|\n|  No running processes found                                                           |\n+---------------------------------------------------------------------------------------+\n
In\u00a0[\u00a0]: Copied!
!pip install -q inference-gpu \"supervision[assets]\"\n
!pip install -q inference-gpu \"supervision[assets]\" In\u00a0[\u00a0]: Copied!
from supervision.assets import download_assets, VideoAssets\n\n# Download a supervision video asset\npath_to_video = download_assets(VideoAssets.PEOPLE_WALKING)\n
from supervision.assets import download_assets, VideoAssets # Download a supervision video asset path_to_video = download_assets(VideoAssets.PEOPLE_WALKING)

As a result, we've downloaded a video. Let's take a look at the video below. Keep in mind that the video preview below works only in the web version of the cookbooks and not in Google Colab.

In\u00a0[4]: Copied!
import supervision as sv\nfrom supervision.assets import download_assets, VideoAssets\nfrom inference.models.utils import get_roboflow_model\n\n\n# Load a yolov8 model from roboflow.\nmodel = get_roboflow_model(\"yolov8s-640\")\n\n# Create a frame generator and video info object from supervision utilities.\nframe_generator = sv.get_video_frames_generator(path_to_video)\n\n# Yield a single frame from the generator.\nframe = next(frame_generator)\n\n# Run inference on our frame\nresult = model.infer(frame)[0]\n\n# Parse result into detections data model.\ndetections = sv.Detections.from_inference(result)\n\n# Pretty Print the resulting detections.\nfrom pprint import pprint\npprint(detections)\n
import supervision as sv from supervision.assets import download_assets, VideoAssets from inference.models.utils import get_roboflow_model # Load a yolov8 model from roboflow. model = get_roboflow_model(\"yolov8s-640\") # Create a frame generator and video info object from supervision utilities. frame_generator = sv.get_video_frames_generator(path_to_video) # Yield a single frame from the generator. frame = next(frame_generator) # Run inference on our frame result = model.infer(frame)[0] # Parse result into detections data model. detections = sv.Detections.from_inference(result) # Pretty Print the resulting detections. from pprint import pprint pprint(detections)
Detections(xyxy=array([[1140.,  951., 1245., 1079.],\n       [ 666.,  648.,  745.,  854.],\n       [  34.,  794.,  142.,  990.],\n       [1140.,  505., 1211.,  657.],\n       [ 260.,  438.,  332.,  612.],\n       [1413.,  702., 1523.,  887.],\n       [1462.,  472., 1543.,  643.],\n       [1446.,  318., 1516.,  483.],\n       [ 753.,  451.,  821.,  623.],\n       [ 924.,  172.,  983.,  307.],\n       [1791.,  144., 1852.,  275.],\n       [  93.,  132.,  146.,  251.],\n       [ 708.,  240.,  765.,  388.],\n       [ 200.,   44.,  267.,  161.],\n       [1204.,  131., 1255.,  266.],\n       [ 569.,  267.,  628.,  408.],\n       [1163.,  150., 1210.,  280.],\n       [ 799.,   78.,  847.,  204.],\n       [1690.,  152., 1751.,  283.],\n       [ 344.,  495.,  396.,  641.],\n       [1722.,   77., 1782.,  178.]]),\n           mask=None,\n           confidence=array([0.83215541, 0.80572134, 0.7919845 , 0.7912274 , 0.77121079,\n       0.7599591 , 0.75711554, 0.75494027, 0.73076195, 0.71452248,\n       0.69572842, 0.65269446, 0.63952065, 0.62914598, 0.61361706,\n       0.5968492 , 0.55311316, 0.5470854 , 0.54070991, 0.52209878,\n       0.41217673]),\n           class_id=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),\n           tracker_id=None,\n           data={'class_name': array(['person', 'person', 'person', 'person', 'person', 'person',\n       'person', 'person', 'person', 'person', 'person', 'person',\n       'person', 'person', 'person', 'person', 'person', 'person',\n       'person', 'person', 'person'], dtype='<U6')})\n

First, we load our model using the method get_roboflow_model(). Notice how we pass in a model_id? We're using an alias here. This is where we can pass in other models from Roboflow Universe like this rock, paper, scissors model utilizing our roboflow api key.

model = get_roboflow_mode(\n    model_id=\"rock-paper-scissors-sxsw/11\",\n    api_key=\"roboflow_private_api_key\"\n)\n

If you don't have an api key, you can create an free Roboflow account. This model wouldn't be much help with detecting people, but it's a nice exercise to see how our code becomes model agnostic!

We then create a frame_generator object and yeild a single frame for inference using next(). We pass our frame to model.infer() to run inference. After, we pass that data into a little helpfer function called sv.Detections.from_inference() to parse it. Lastly we print our detections to show we are in fact detecting a few people in the frame!

In\u00a0[5]: Copied!
# Create a bounding box annotator object.\nbounding_box = sv.BoundingBoxAnnotator()\n\n# Annotate our frame with detections.\nannotated_frame = bounding_box.annotate(scene=frame.copy(), detections=detections)\n\n# Display the frame.\nsv.plot_image(annotated_frame)\n
# Create a bounding box annotator object. bounding_box = sv.BoundingBoxAnnotator() # Annotate our frame with detections. annotated_frame = bounding_box.annotate(scene=frame.copy(), detections=detections) # Display the frame. sv.plot_image(annotated_frame)

Notice that we create a box_annoator variable by initalizing a BoundingBoxAnnotator. We can change the color and thickness, but for simplicity we keep the defaults. There are a ton of easy to use annotators available in the Supervision package other than a bounding box that are fun to play with.

In\u00a0[\u00a0]: Copied!
from tqdm import tqdm\n\n# Create a video_info object for use in the VideoSink.\nvideo_info = sv.VideoInfo.from_video_path(video_path=path_to_video)\n\n# Create a VideoSink context manager to save our frames.\nwith sv.VideoSink(target_path=\"output.mp4\", video_info=video_info) as sink:\n\n    # Iterate through frames yielded from the frame_generator.\n    for frame in tqdm(frame_generator, total=video_info.total_frames):\n\n        # Run inference on our frame.\n        result = model.infer(frame)[0]\n\n        # Parse the result into the detections data model.\n        detections = sv.Detections.from_inference(result)\n\n        # Apply bounding box to detections on a copy of the frame.\n        annotated_frame = bounding_box.annotate(\n            scene=frame.copy(),\n            detections=detections\n        )\n\n        # Write the annotated frame to the video sink.\n        sink.write_frame(frame=annotated_frame)\n
from tqdm import tqdm # Create a video_info object for use in the VideoSink. video_info = sv.VideoInfo.from_video_path(video_path=path_to_video) # Create a VideoSink context manager to save our frames. with sv.VideoSink(target_path=\"output.mp4\", video_info=video_info) as sink: # Iterate through frames yielded from the frame_generator. for frame in tqdm(frame_generator, total=video_info.total_frames): # Run inference on our frame. result = model.infer(frame)[0] # Parse the result into the detections data model. detections = sv.Detections.from_inference(result) # Apply bounding box to detections on a copy of the frame. annotated_frame = bounding_box.annotate( scene=frame.copy(), detections=detections ) # Write the annotated frame to the video sink. sink.write_frame(frame=annotated_frame)

In the code above we've created avideo_info variable to pass information about the video to our VideoSink. The VideoSink is a cool little context manager that allows us to write_frames() to a video ouput file. We're also optionally using tqdm to display a progress bar with a % complete. We only scratched the surface of all of the customizable Annotators and additional features that Supervision and Inference have to offer. Stay tuned for more cookbooks on how to take advantge of them in your computer vision applications. Happy building! \ud83d\ude80

"},{"location":"notebooks/annotate-video-with-detections/#annotate-video-with-detections","title":"Annotate Video with Detections\u00b6","text":"

One of the most common requirements of computer vision applications is detecting objects in images and displaying bounding boxes around those objects. In this cookbook we'll walk through the steps on how to utilize the open source Roboflow ecosystem to accomplish this task on a video. Let's dive in!

"},{"location":"notebooks/annotate-video-with-detections/#before-you-start","title":"Before you start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/annotate-video-with-detections/#installing-dependencies","title":"Installing Dependencies\u00b6","text":"

In this cookbook we'll be utilizing the open source packages Inference and Supervision to accomplish our goals. Let's get those installed in our notebook with pip.

"},{"location":"notebooks/annotate-video-with-detections/#download-a-video-asset","title":"Download a Video Asset\u00b6","text":"

First, let's download a video that we can detect objects in. Supervision comes with a great utility called Assets to help us hit the ground running. When we run this script, the video is saved in our local directory and can be accessed with the variable path_to_video.

"},{"location":"notebooks/annotate-video-with-detections/#detecting-objects","title":"Detecting Objects\u00b6","text":"

For this example, the objects in the video that we'd like to detect are people. In order to display bounding boxes around the people in the video, we first need a way to detect them. We'll be using the open source Inference package for this task. Inference allows us to quickly use thousands of models, including fine tuned models from Roboflow Universe, with a few lines of code. We'll also utilize a few utilities for working with our video data from the Supervision package.

"},{"location":"notebooks/annotate-video-with-detections/#annotaing-the-frame-with-bounding-boxes","title":"Annotaing the Frame with Bounding Boxes\u00b6","text":"

Now that we're detecting images, let's get to the fun part. Let's annotate the frame and display the bounding boxes on the frame.

"},{"location":"notebooks/annotate-video-with-detections/#saving-bounding-boxes-to-a-video","title":"Saving Bounding Boxes to a Video\u00b6","text":"

Let's wrap up our code by utilizing a VideoSink to draw bounding boxes and save the resulting video. Take a peak at the final code example below. This can take a couple minutes deppending on your runtime and since since we're processing a full video. Feel free to skip ahead to see the resulting video.

"},{"location":"notebooks/count-objects-crossing-the-line/","title":"Count Objects Crossing the Line","text":"

Click the Open in Colab button to run the cookbook on Google Colab.

In\u00a0[1]: Copied!
!nvidia-smi\n
!nvidia-smi
Mon Feb 12 13:03:38 2024       \n+---------------------------------------------------------------------------------------+\n| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n|-----------------------------------------+----------------------+----------------------+\n| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n|                                         |                      |               MIG M. |\n|=========================================+======================+======================|\n|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |\n| N/A   48C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |\n|                                         |                      |                  N/A |\n+-----------------------------------------+----------------------+----------------------+\n                                                                                         \n+---------------------------------------------------------------------------------------+\n| Processes:                                                                            |\n|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n|        ID   ID                                                             Usage      |\n|=======================================================================================|\n|  No running processes found                                                           |\n+---------------------------------------------------------------------------------------+\n

NOTE: To make it easier for us to manage datasets, images and models we create a HOME constant.

In\u00a0[2]: Copied!
import os\nHOME = os.getcwd()\nprint(HOME)\n
import os HOME = os.getcwd() print(HOME)
/content\n
In\u00a0[\u00a0]: Copied!
!pip install -q ultralytics supervision==0.18.0\n
!pip install -q ultralytics supervision==0.18.0 In\u00a0[4]: Copied!
import numpy as np\nimport supervision as sv\n\nfrom ultralytics import YOLO\nfrom supervision.assets import download_assets, VideoAssets\n
import numpy as np import supervision as sv from ultralytics import YOLO from supervision.assets import download_assets, VideoAssets

As an example input video, we will use one of the videos available in supervision.assets. Supervision offers an assets download utility that allows you to download video files that you can use in your demos.

In\u00a0[\u00a0]: Copied!
download_assets(VideoAssets.VEHICLES)\n
download_assets(VideoAssets.VEHICLES)

NOTE: If you want to run the cookbook using your own file as input, simply upload video to Google Colab and replace SOURCE_VIDEO_PATH with the path to your file.

In\u00a0[6]: Copied!
SOURCE_VIDEO_PATH = f\"{HOME}/vehicles.mp4\"\n
SOURCE_VIDEO_PATH = f\"{HOME}/vehicles.mp4\"

As a result of executing the above commands, you will download a video file and save it at the SOURCE_VIDEO_PATH. Keep in mind that the video preview below works only in the web version of the cookbooks and not in Google Colab.

The get_video_frames_generator enables us to easily iterate over video frames. Let's create a video generator for our sample input file and display its first frame on the screen.

In\u00a0[7]: Copied!
generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH)\nframe = next(generator)\n\nsv.plot_image(frame, (12, 12))\n
generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH) frame = next(generator) sv.plot_image(frame, (12, 12))

We can also use VideoInfo.from_video_path to learn basic information about our video, such as duration, resolution, or FPS.

In\u00a0[8]: Copied!
sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)\n
sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH) Out[8]:
VideoInfo(width=3840, height=2160, fps=25, total_frames=538)

We initiate the model and perform detection on the first frame of the video. Then, we convert the result into a sv.Detections object, which will be useful in the later parts of the cookbook.

In\u00a0[9]: Copied!
model = YOLO(\"yolov8x.pt\")\n\nresults = model(frame, verbose=False)[0]\ndetections = sv.Detections.from_ultralytics(results)\n
model = YOLO(\"yolov8x.pt\") results = model(frame, verbose=False)[0] detections = sv.Detections.from_ultralytics(results)
Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x.pt to 'yolov8x.pt'...\n
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 131M/131M [00:00<00:00, 241MB/s]\n

The results we've obtained can be easily visualized with sv.BoundingBoxAnnotator. By default, this annotator uses the same color to highlight objects of the same category. However, with the integration of a tracker, it becomes possible to assign unique colors to each tracked object. We can easily define our own color palettes and adjust parameters such as line thickness, allowing for a highly tailored visualization experience.

In\u00a0[10]: Copied!
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4)\nannotated_frame = bounding_box_annotator.annotate(frame.copy(), detections)\nsv.plot_image(annotated_frame, (12, 12))\n
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4) annotated_frame = bounding_box_annotator.annotate(frame.copy(), detections) sv.plot_image(annotated_frame, (12, 12))

Supervision annotators can be easily combined with one another. Let's enhance our visualization by adding sv.LabelAnnotator, which we will use to mark detections with a label indicating their category and confidence level.

In\u00a0[12]: Copied!
labels = [\n    f\"{results.names[class_id]} {confidence:0.2f}\"\n    for class_id, confidence\n    in zip(detections.class_id, detections.confidence)\n]\n\nbounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4)\nlabel_annotator = sv.LabelAnnotator(text_thickness=4, text_scale=2)\n\nannotated_frame = frame.copy()\nannotated_frame = bounding_box_annotator.annotate(annotated_frame, detections)\nannotated_frame = label_annotator.annotate(annotated_frame, detections, labels)\nsv.plot_image(annotated_frame, (12, 12))\n
labels = [ f\"{results.names[class_id]} {confidence:0.2f}\" for class_id, confidence in zip(detections.class_id, detections.confidence) ] bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4) label_annotator = sv.LabelAnnotator(text_thickness=4, text_scale=2) annotated_frame = frame.copy() annotated_frame = bounding_box_annotator.annotate(annotated_frame, detections) annotated_frame = label_annotator.annotate(annotated_frame, detections, labels) sv.plot_image(annotated_frame, (12, 12))

To set the position of sv.LineZone, we need to define the start and end points. The position of each point is defined as a pair of coordinates (x, y). The origin of the coordinate system is located in the top-left corner of the frame. The x axis runs from left to right, and the y axis runs from top to bottom.

I decided to place my line horizontally, at the midpoint of the frame's height. I obtained the full dimensions of the frame using sv.VideoInfo.

In\u00a0[14]: Copied!
sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)\n
sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH) Out[14]:
VideoInfo(width=3840, height=2160, fps=25, total_frames=538)

The line we've created, together with the in_count and out_count, can be elegantly visualized using sv.LineZoneAnnotator. This tool also allows for extensive customization options; we can alter the color of both the line and the text, opt to hide the in/out counts, and adjust the labels. By default, the labels are set to in and out, but they can be tailored to fit the context of our project, providing a clear and intuitive display of object movement across the designated line.

In\u00a0[22]: Copied!
START = sv.Point(0, 1500)\nEND = sv.Point(3840, 1500)\n\nline_zone = sv.LineZone(start=START, end=END)\n\nline_zone_annotator = sv.LineZoneAnnotator(\n    thickness=4,\n    text_thickness=4,\n    text_scale=2)\n\nannotated_frame = frame.copy()\nannotated_frame = line_zone_annotator.annotate(annotated_frame, line_counter=line_zone)\nsv.plot_image(annotated_frame, (12, 12))\n
START = sv.Point(0, 1500) END = sv.Point(3840, 1500) line_zone = sv.LineZone(start=START, end=END) line_zone_annotator = sv.LineZoneAnnotator( thickness=4, text_thickness=4, text_scale=2) annotated_frame = frame.copy() annotated_frame = line_zone_annotator.annotate(annotated_frame, line_counter=line_zone) sv.plot_image(annotated_frame, (12, 12)) In\u00a0[18]: Copied!
byte_tracker = sv.ByteTrack()\n
byte_tracker = sv.ByteTrack()

For an even better visualization, we will add another annotator - sv.TraceAnnotator, which allows for drawing the path traversed by each object over the last few frames. We will use it in combination with sv.BoundingBoxAnnotator and sv.LabelAnnotator, which we became familiar with earlier.

In\u00a0[17]: Copied!
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4)\nlabel_annotator = sv.LabelAnnotator(text_thickness=4, text_scale=2)\ntrace_annotator = sv.TraceAnnotator(thickness=4)\n
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=4) label_annotator = sv.LabelAnnotator(text_thickness=4, text_scale=2) trace_annotator = sv.TraceAnnotator(thickness=4)

All the operations we plan to perform for each frame of our video - detection, tracking, annotation, and counting - are encapsulated in a function named callback.

In\u00a0[19]: Copied!
def callback(frame: np.ndarray, index:int) -> np.ndarray:\n    results = model(frame, verbose=False)[0]\n    detections = sv.Detections.from_ultralytics(results)\n    detections = byte_tracker.update_with_detections(detections)\n\n    labels = [\n        f\"#{tracker_id} {model.model.names[class_id]} {confidence:0.2f}\"\n        for confidence, class_id, tracker_id\n        in zip(detections.confidence, detections.class_id, detections.tracker_id)\n    ]\n\n    annotated_frame = frame.copy()\n    annotated_frame = trace_annotator.annotate(\n        scene=annotated_frame,\n        detections=detections)\n    annotated_frame = bounding_box_annotator.annotate(\n        scene=annotated_frame,\n        detections=detections)\n    annotated_frame = label_annotator.annotate(\n        scene=annotated_frame,\n        detections=detections,\n        labels=labels)\n\n    line_zone.trigger(detections)\n\n    return  line_zone_annotator.annotate(annotated_frame, line_counter=line_zone)\n
def callback(frame: np.ndarray, index:int) -> np.ndarray: results = model(frame, verbose=False)[0] detections = sv.Detections.from_ultralytics(results) detections = byte_tracker.update_with_detections(detections) labels = [ f\"#{tracker_id} {model.model.names[class_id]} {confidence:0.2f}\" for confidence, class_id, tracker_id in zip(detections.confidence, detections.class_id, detections.tracker_id) ] annotated_frame = frame.copy() annotated_frame = trace_annotator.annotate( scene=annotated_frame, detections=detections) annotated_frame = bounding_box_annotator.annotate( scene=annotated_frame, detections=detections) annotated_frame = label_annotator.annotate( scene=annotated_frame, detections=detections, labels=labels) line_zone.trigger(detections) return line_zone_annotator.annotate(annotated_frame, line_counter=line_zone)

Finally, we are ready to process our entire video. We will use sv.process_video and pass to it the previously defined SOURCE_VIDEO_PATH, TARGET_VIDEO_PATH, and callback.

In\u00a0[21]: Copied!
TARGET_VIDEO_PATH = f\"{HOME}/count-objects-crossing-the-line-result.mp4\"\n
TARGET_VIDEO_PATH = f\"{HOME}/count-objects-crossing-the-line-result.mp4\" In\u00a0[20]: Copied!
sv.process_video(\n    source_path = SOURCE_VIDEO_PATH,\n    target_path = TARGET_VIDEO_PATH,\n    callback=callback\n)\n
sv.process_video( source_path = SOURCE_VIDEO_PATH, target_path = TARGET_VIDEO_PATH, callback=callback )

Keep in mind that the video preview below works only in the web version of the cookbooks and not in Google Colab.

"},{"location":"notebooks/count-objects-crossing-the-line/#count-objects-crossing-the-line","title":"Count Objects Crossing the Line\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#before-you-start","title":"Before you start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/count-objects-crossing-the-line/#install-required-packages","title":"Install required packages\u00b6","text":"

In this cookbook, we'll leverage two Python packages - ultralytics for running object detection, and supervision for tracking, visualizing detections, and crucially, counting objects that cross a line.

"},{"location":"notebooks/count-objects-crossing-the-line/#imports","title":"Imports\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#download-video","title":"Download video\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#read-single-frame-from-video","title":"Read single frame from video\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#run-object-detection","title":"Run Object Detection\u00b6","text":"

Let's start by running the detection model on the first frame and annotating the results. In this cookbook, we use Ultralytics YOLOv8, but it can be successfully replaced with other models.

"},{"location":"notebooks/count-objects-crossing-the-line/#improve-vizualizations","title":"Improve Vizualizations\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#define-line-position","title":"Define Line Position\u00b6","text":""},{"location":"notebooks/count-objects-crossing-the-line/#process-video","title":"Process Video\u00b6","text":""},{"location":"notebooks/download-supervision-assets/","title":"Download Supervision Assets","text":"In\u00a0[10]: Copied!
!pip install -q \"supervision[assets]\"\n
!pip install -q \"supervision[assets]\" In\u00a0[\u00a0]: Copied!
from supervision.assets import download_assets, VideoAssets\n\n# Download the a video of the subway.\npath_to_video = download_assets(VideoAssets.SUBWAY)\n
from supervision.assets import download_assets, VideoAssets # Download the a video of the subway. path_to_video = download_assets(VideoAssets.SUBWAY)

With this, we now have a high quality video asset for use in demos. Let's take a look at what we downloaded. Keep in mind that the video preview below works only in the web version of the cookbooks and not in Google Colab.

We're now equipt with a video asset from Supervision to run some experiments on! For more information on available assets, visit the Supervision API Reference. Happy building!

"},{"location":"notebooks/download-supervision-assets/#download-supervision-assets","title":"Download Supervision Assets\u00b6","text":"

When experimenting with interesting and useful features of the Supervision package, it's important to have some sort of image or video data to experiment with. Luckily for us, Supervision ships with Assets! Assets is a collection of videos that you can utilize to start experimenting with the various features Supervision has to offer. Let's take a look at how to use this resource.

"},{"location":"notebooks/download-supervision-assets/#install-dependencies","title":"Install Dependencies\u00b6","text":""},{"location":"notebooks/download-supervision-assets/#download-a-video","title":"Download a Video\u00b6","text":"

From here we can download and utilize a video asset directly from a python script! Note below that we're utilizing the method download_assets to download the VideoAssets.SUBWAY video to our local directory. This method returns the file path, so we can then utilize this path for additional experimentation. From here, you will see a video asset to experiment with in your local directory.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/","title":"Evaluating Alignment of Text-to-image Diffusion Models","text":"In\u00a0[1]: Copied!
!nvidia-smi\n
!nvidia-smi
Thu Feb 29 18:16:26 2024       \n+---------------------------------------------------------------------------------------+\n| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n|-----------------------------------------+----------------------+----------------------+\n| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n|                                         |                      |               MIG M. |\n|=========================================+======================+======================|\n|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |\n| N/A   46C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |\n|                                         |                      |                  N/A |\n+-----------------------------------------+----------------------+----------------------+\n                                                                                         \n+---------------------------------------------------------------------------------------+\n| Processes:                                                                            |\n|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n|        ID   ID                                                             Usage      |\n|=======================================================================================|\n|  No running processes found                                                           |\n+---------------------------------------------------------------------------------------+\n
In\u00a0[\u00a0]: Copied!
!pip install -q torch diffusers accelerate inference-gpu[yolo-world] dill git+https://github.com/openai/CLIP.git supervision==0.19.0rc5\n
!pip install -q torch diffusers accelerate inference-gpu[yolo-world] dill git+https://github.com/openai/CLIP.git supervision==0.19.0rc5 In\u00a0[\u00a0]: Copied!
import itertools\nimport cv2\nfrom diffusers import StableDiffusionXLPipeline\nimport numpy as np\nfrom PIL import Image\nimport supervision as sv\nimport torch\nfrom inference.models import YOLOWorld\n
import itertools import cv2 from diffusers import StableDiffusionXLPipeline import numpy as np from PIL import Image import supervision as sv import torch from inference.models import YOLOWorld In\u00a0[\u00a0]: Copied!
pipeline = StableDiffusionXLPipeline.from_pretrained(\n    \"stabilityai/stable-diffusion-xl-base-1.0\",\n    torch_dtype=torch.float16,\n    variant=\"fp16\",\n    use_safetensors=True,\n).to(\"cuda\")\n
pipeline = StableDiffusionXLPipeline.from_pretrained( \"stabilityai/stable-diffusion-xl-base-1.0\", torch_dtype=torch.float16, variant=\"fp16\", use_safetensors=True, ).to(\"cuda\")

In this example, we'll focus on generating an image of a black cat playing with a blue ball next to a parked white car. We don't care about the aesthetic aspect of the image.

In\u00a0[41]: Copied!
PROMPT = \"a black cat playing with a blue ball next to a parked white car, wide angle, photorealistic\"\nNEGATIVE_PROMPT = \"low quality, blurred, text, illustration\"\nWIDTH, HEIGHT = 1024, 768\nSEED = 9213799\n\nimage = pipeline(\n    prompt=PROMPT,\n    negative_prompt=NEGATIVE_PROMPT,\n    generator=torch.manual_seed(SEED),\n    width=WIDTH,\n    height=HEIGHT,\n).images[0]\nimage\n
PROMPT = \"a black cat playing with a blue ball next to a parked white car, wide angle, photorealistic\" NEGATIVE_PROMPT = \"low quality, blurred, text, illustration\" WIDTH, HEIGHT = 1024, 768 SEED = 9213799 image = pipeline( prompt=PROMPT, negative_prompt=NEGATIVE_PROMPT, generator=torch.manual_seed(SEED), width=WIDTH, height=HEIGHT, ).images[0] image
  0%|          | 0/50 [00:00<?, ?it/s]
Out[41]:

Not bad! The results seem to be well-aligned with the prompt.

In\u00a0[42]: Copied!
model = YOLOWorld(model_id=\"yolo_world/l\")\n
model = YOLOWorld(model_id=\"yolo_world/l\")

YOLO-World model allows us to define our own set of labels. Let's create it by combining lists of pre-defined colors and objects.

In\u00a0[43]: Copied!
COLORS = [\"green\", \"yellow\", \"black\", \"blue\", \"red\", \"white\", \"orange\"]\nOBJECTS = [\"car\", \"cat\", \"ball\", \"dog\", \"tree\", \"house\", \"person\"]\nCLASSES = [f\"{color} {obj}\" for color, obj in itertools.product(COLORS, OBJECTS)]\nprint(\"Number of labels:\", len(CLASSES))\n
COLORS = [\"green\", \"yellow\", \"black\", \"blue\", \"red\", \"white\", \"orange\"] OBJECTS = [\"car\", \"cat\", \"ball\", \"dog\", \"tree\", \"house\", \"person\"] CLASSES = [f\"{color} {obj}\" for color, obj in itertools.product(COLORS, OBJECTS)] print(\"Number of labels:\", len(CLASSES))
Number of labels: 49\n

Let's feed these labels into our model:

In\u00a0[44]: Copied!
model.set_classes(CLASSES)\n
model.set_classes(CLASSES)

Time to detect some objects!

In\u00a0[45]: Copied!
results = model.infer(image)\n
results = model.infer(image)

We'll convert the results to the sv.Detections format to enable features like filtering or annotations.

In\u00a0[46]: Copied!
detections = sv.Detections.from_inference(results)\n
detections = sv.Detections.from_inference(results)

Speaking of which: we only care about strong detections, so we filter out ones that are below 0.6 confidence.

In\u00a0[47]: Copied!
valid_detections = detections[detections.confidence >= 0.6]\n
valid_detections = detections[detections.confidence >= 0.6]

A quick peek on the detected labels and their score:

In\u00a0[48]: Copied!
labels = [\n    f\"{CLASSES[class_id]} {confidence:0.2f}\"\n    for class_id, confidence\n    in zip(valid_detections.class_id, valid_detections.confidence)\n]\nlabels\n
labels = [ f\"{CLASSES[class_id]} {confidence:0.2f}\" for class_id, confidence in zip(valid_detections.class_id, valid_detections.confidence) ] labels Out[48]:
['blue ball 0.95', 'black cat 0.72', 'white car 0.68']
In\u00a0[49]: Copied!
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=2)\nlabel_annotator = sv.LabelAnnotator(text_thickness=1, text_scale=0.5,text_color=sv.Color.BLACK)\n
bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=2) label_annotator = sv.LabelAnnotator(text_thickness=1, text_scale=0.5,text_color=sv.Color.BLACK)

Finally, annotating our image is as simple as calling annotate methods from our annotators:

In\u00a0[50]: Copied!
annotated_image = bounding_box_annotator.annotate(image, valid_detections)\nannotated_image = label_annotator.annotate(annotated_image, valid_detections, labels)\n\nsv.plot_image(annotated_image, (12, 12))\n
annotated_image = bounding_box_annotator.annotate(image, valid_detections) annotated_image = label_annotator.annotate(annotated_image, valid_detections, labels) sv.plot_image(annotated_image, (12, 12)) In\u00a0[51]: Copied!
GROUND_TRUTH = {\"black cat\", \"blue ball\", \"white car\"}\nprediction = {CLASSES[class_id] for class_id in valid_detections.class_id}\n\nprediction.issubset(GROUND_TRUTH)\n
GROUND_TRUTH = {\"black cat\", \"blue ball\", \"white car\"} prediction = {CLASSES[class_id] for class_id in valid_detections.class_id} prediction.issubset(GROUND_TRUTH) Out[51]:
True

Using sv.Detections makes it super easy to do.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#evaluating-alignment-of-text-to-image-diffusion-models","title":"Evaluating Alignment of Text-to-image Diffusion Models\u00b6","text":"

Click the Open in Colab button to run the cookbook on Google Colab.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#introduction","title":"Introduction\u00b6","text":"

It is a common scenario to evaluate text-to-image models for its alignment to the prompt. One way to test it is to use a set of prompts, consisting of number of objects and their basic physical properties (e.g. color), to generate images and manually evaluate the results. This process can be greatly improved using object detection models.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#before-you-start","title":"Before you start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#install-required-packages","title":"Install required packages\u00b6","text":"

In this cookbook, we'll leverage the following Python packages:

  • diffusers for image generation pipelines,
  • inference for running object detection,
  • supervision for visualizing detections.
"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#imports","title":"Imports\u00b6","text":""},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#generating-an-image","title":"Generating an image\u00b6","text":"

We'll use SDXL model to generate our image. Let's initialize our pipeline first:

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#detecting-objects","title":"Detecting objects\u00b6","text":"

Now, let's see how can we detect the objects automatically. For this, we'll use YOLO-World model from inference library.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#visualizing-results","title":"Visualizing results\u00b6","text":"

Now, let's use the power of supervision to visualize them. Our output image is in Pillow format, but annotators can accept the image to be a BGR np.ndarray or pillow's PIL.Image.Image.

Time to define how we want our detections to be visualized. A combination of sv.BoundingBoxAnnotator and sv.LabelAnnotator should be perfect.

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#testing-it-automatically","title":"Testing it automatically\u00b6","text":"

We can also test if all requested objects are in the generated image by comparing a set of ground-truth labels with predicted ones:

"},{"location":"notebooks/evaluating-alignment-of-text-to-image-diffusion-models/#next-steps","title":"Next steps\u00b6","text":"

In this tutorial you learned how to detect and visualize objects for a simple image generation evaluation study.

Having a pipeline capable of evaluating a single image, the natural next step should be to run it on a set of pre-defined scenarios and calculate metrics.

"},{"location":"notebooks/object-tracking/","title":"Object Tracking","text":"In\u00a0[1]: Copied!
!nvidia-smi\n
!nvidia-smi
Fri Feb 23 03:18:02 2024       \n+---------------------------------------------------------------------------------------+\n| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n|-----------------------------------------+----------------------+----------------------+\n| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n|                                         |                      |               MIG M. |\n|=========================================+======================+======================|\n|   0  Tesla V100-SXM2-16GB           Off | 00000000:00:04.0 Off |                    0 |\n| N/A   33C    P0              24W / 300W |      0MiB / 16384MiB |      0%      Default |\n|                                         |                      |                  N/A |\n+-----------------------------------------+----------------------+----------------------+\n                                                                                         \n+---------------------------------------------------------------------------------------+\n| Processes:                                                                            |\n|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n|        ID   ID                                                             Usage      |\n|=======================================================================================|\n|  No running processes found                                                           |\n+---------------------------------------------------------------------------------------+\n
In\u00a0[\u00a0]: Copied!
!pip install -q inference-gpu \"supervision[assets]\"\n
!pip install -q inference-gpu \"supervision[assets]\" In\u00a0[\u00a0]: Copied!
from supervision.assets import download_assets, VideoAssets\n\n# Download a supervision video asset\npath_to_video = download_assets(VideoAssets.PEOPLE_WALKING)\n
from supervision.assets import download_assets, VideoAssets # Download a supervision video asset path_to_video = download_assets(VideoAssets.PEOPLE_WALKING) In\u00a0[4]: Copied!
import supervision as sv\nfrom inference.models.utils import get_roboflow_model\n\n# Load a pre trained yolov8 nano model from Roboflow Inference.\nmodel = get_roboflow_model('yolov8n-640')\n\n# Create a video info object from the video path.\nvideo_info = sv.VideoInfo.from_video_path(path_to_video)\n\n# Create a label annotator for labeling detections with our tracker_id.\nlabel = sv.LabelAnnotator()\n\n# Create a ByteTrack object to track detections.\nbyte_tracker = sv.ByteTrack(frame_rate=video_info.fps)\n\n# Create a frame generator from video path for iteration of frames.\nframe_generator = sv.get_video_frames_generator(path_to_video)\n\n# Grab a frame from the frame_generator.\nframe = next(frame_generator)\n\n# Run inference on the frame by passing it to our model.\nresult = model.infer(frame)[0]\n\n# Convert model results to a supervision detection object.\ndetections = sv.Detections.from_inference(result)\n\n# Update detections with tracker ids fro byte_tracker.\ntracked_detections = byte_tracker.update_with_detections(detections)\n\n# Create labels with tracker_id for label annotator.\nlabels = [ f\"{tracker_id}\" for tracker_id in tracked_detections.tracker_id ]\n\n# Apply label annotator to frame.\nannotated_frame = label.annotate(scene=frame.copy(), detections=tracked_detections, labels=labels)\n\n# Display the frame.\nsv.plot_image(annotated_frame)\n
import supervision as sv from inference.models.utils import get_roboflow_model # Load a pre trained yolov8 nano model from Roboflow Inference. model = get_roboflow_model('yolov8n-640') # Create a video info object from the video path. video_info = sv.VideoInfo.from_video_path(path_to_video) # Create a label annotator for labeling detections with our tracker_id. label = sv.LabelAnnotator() # Create a ByteTrack object to track detections. byte_tracker = sv.ByteTrack(frame_rate=video_info.fps) # Create a frame generator from video path for iteration of frames. frame_generator = sv.get_video_frames_generator(path_to_video) # Grab a frame from the frame_generator. frame = next(frame_generator) # Run inference on the frame by passing it to our model. result = model.infer(frame)[0] # Convert model results to a supervision detection object. detections = sv.Detections.from_inference(result) # Update detections with tracker ids fro byte_tracker. tracked_detections = byte_tracker.update_with_detections(detections) # Create labels with tracker_id for label annotator. labels = [ f\"{tracker_id}\" for tracker_id in tracked_detections.tracker_id ] # Apply label annotator to frame. annotated_frame = label.annotate(scene=frame.copy(), detections=tracked_detections, labels=labels) # Display the frame. sv.plot_image(annotated_frame) In\u00a0[\u00a0]: Copied!
from tqdm import tqdm\n\n# Load a pre trained yolov8 nano model from Roboflow Inference.\nmodel = get_roboflow_model('yolov8n-640')\n\n# Create a video info object from the video path.\nvideo_info = sv.VideoInfo.from_video_path(path_to_video)\n\n# Create a label annotator for labeling detections with our tracker_id.\nlabel = sv.LabelAnnotator()\n\n# Create a ByteTrack object to track detections.\nbyte_tracker = sv.ByteTrack(frame_rate=video_info.fps)\n\n# Create a frame generator from video path for iteration of frames.\nframe_generator = sv.get_video_frames_generator(path_to_video)\n\n# Create a video sink context manager to save resulting video.\nwith sv.VideoSink(target_path=\"output.mp4\", video_info=video_info) as sink:\n\n    # Iterate through frames yielded from the frame_generator.\n    for frame in tqdm(frame_generator, total=video_info.total_frames):\n\n        # Run inference on the frame by passing it to our model.\n        result = model.infer(frame)[0]\n\n        # Convert model results to a supervision detection object.\n        detections = sv.Detections.from_inference(result)\n\n        # Update detections with tracker ids fro byte_tracker.\n        tracked_detections = byte_tracker.update_with_detections(detections)\n\n        # Create labels with tracker_id for label annotator.\n        labels = [ f\"{tracker_id}\" for tracker_id in tracked_detections.tracker_id ]\n\n        # Apply label annotator to frame.\n        annotated_frame = label.annotate(scene=frame.copy(), detections=tracked_detections, labels=labels)\n\n        # Save the annotated frame to an output video.\n        sink.write_frame(frame=annotated_frame)\n
from tqdm import tqdm # Load a pre trained yolov8 nano model from Roboflow Inference. model = get_roboflow_model('yolov8n-640') # Create a video info object from the video path. video_info = sv.VideoInfo.from_video_path(path_to_video) # Create a label annotator for labeling detections with our tracker_id. label = sv.LabelAnnotator() # Create a ByteTrack object to track detections. byte_tracker = sv.ByteTrack(frame_rate=video_info.fps) # Create a frame generator from video path for iteration of frames. frame_generator = sv.get_video_frames_generator(path_to_video) # Create a video sink context manager to save resulting video. with sv.VideoSink(target_path=\"output.mp4\", video_info=video_info) as sink: # Iterate through frames yielded from the frame_generator. for frame in tqdm(frame_generator, total=video_info.total_frames): # Run inference on the frame by passing it to our model. result = model.infer(frame)[0] # Convert model results to a supervision detection object. detections = sv.Detections.from_inference(result) # Update detections with tracker ids fro byte_tracker. tracked_detections = byte_tracker.update_with_detections(detections) # Create labels with tracker_id for label annotator. labels = [ f\"{tracker_id}\" for tracker_id in tracked_detections.tracker_id ] # Apply label annotator to frame. annotated_frame = label.annotate(scene=frame.copy(), detections=tracked_detections, labels=labels) # Save the annotated frame to an output video. sink.write_frame(frame=annotated_frame)

Let's take a look at our resulting video. It will also be created in your current directory with the name output.mp4 Notice how even with a little flicker, we can see the tracker_id on the people walking in the video. With trackers under your belt, there are now a wide variety of use cases you can solve for! Happy building!

"},{"location":"notebooks/object-tracking/#object-tracking","title":"Object Tracking\u00b6","text":"

In some cases, it's important for us to track objects across multiple frames of a video. For example, we may need to figure out the direction a vehicle is moving, or count objects in a frame. Some Supervision Annotators and Tools like LineZone require tracking to be setup. In this cookbook, we'll cover how to get a tracker up and running for use in your computer vision applications.

"},{"location":"notebooks/object-tracking/#what-is-a-tracker","title":"What is a Tracker?\u00b6","text":"

Trackers are a piece of code that identifies objects across frames and assigns them a unique tracker_id. There are a few popular trackers at the time of writing this including ByteTrack and Bot-SORT. Supervision makes using trackers a breeze and comes with ByteTrack built-in.

"},{"location":"notebooks/object-tracking/#before-you-start","title":"Before you start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/object-tracking/#install-dependencies","title":"Install Dependencies\u00b6","text":""},{"location":"notebooks/object-tracking/#download-a-video-asset","title":"Download a Video Asset\u00b6","text":"

Now that we have our enviornment setup, lets download a video that we can detect objects in. Supervision comes with a great utility to help us hit the ground running. We can use the below snippet to he video is save a video asset in our local directory. It can also be accessed with the variable path_to_video for additional application logic.

"},{"location":"notebooks/object-tracking/#tracking-objects-in-a-frame","title":"Tracking Objects in a Frame\u00b6","text":"

Now that we have our video installed, let's get to work on tracking objects. We'll first pull in a model from roboflow Inference to detect people in our video. Then let's create a byte_tracker object that we'll pass our detections to. This will give us a tracker_id. We'll then utilize that tracker id to label our detections with a label_annotator to display the tracker id.

"},{"location":"notebooks/object-tracking/#tracking-objects-in-a-video","title":"Tracking Objects in a Video\u00b6","text":"

Finally, we'll use a utility called VideoSink to save the annotated frames to a video. Let's dive in to the code.

"},{"location":"notebooks/occupancy_analytics/","title":"How To Analyze Occupancy with Supervision","text":"In\u00a0[\u00a0]: Copied!
!nvidia-smi\n
!nvidia-smi In\u00a0[\u00a0]: Copied!
!pip install roboflow supervision==0.19.0 -q\n
!pip install roboflow supervision==0.19.0 -q In\u00a0[\u00a0]: Copied!
VIDEO_PATH = \"/content/parkinglot1080.mov\"\n
VIDEO_PATH = \"/content/parkinglot1080.mov\"

First, let's create a directory to save the video frames

In\u00a0[\u00a0]: Copied!
import os\n\nFRAMES_DIR = \"/content/frames\"\nos.mkdir(FRAMES_DIR)\n
import os FRAMES_DIR = \"/content/frames\" os.mkdir(FRAMES_DIR)

Then, we can use Supervision's get_video_frames_generator function to get, then save, our video frames

In\u00a0[\u00a0]: Copied!
import supervision as sv\nfrom PIL import Image\n\nframes_generator = sv.get_video_frames_generator(VIDEO_PATH)\n\nfor i, frame in enumerate(frames_generator):\n  img = Image.fromarray(frame)\n  img.save(f\"{FRAMES_DIR}/{i}.jpg\")\n\nprint(f\"Saved frames to {FRAMES_DIR}\")\n
import supervision as sv from PIL import Image frames_generator = sv.get_video_frames_generator(VIDEO_PATH) for i, frame in enumerate(frames_generator): img = Image.fromarray(frame) img.save(f\"{FRAMES_DIR}/{i}.jpg\") print(f\"Saved frames to {FRAMES_DIR}\")
Saved frames to /content/frames\n
In\u00a0[\u00a0]: Copied!
# Note: This code block was written by ChatGPT\n\nimport os\nimport random\nfrom PIL import Image\nimport numpy as np\n\n# import shutil\n# shutil.rmtree(\"augmented\")\n\ndef random_crop(img):\n    width, height = img.size\n\n    crop_width = random.randint(int(width * 0.1), int(width * 0.4))\n    crop_height = random.randint(int(height * 0.1), int(height * 0.4))\n\n    left = random.randint(0, width - crop_width)\n    top = random.randint(0, height - crop_height)\n\n    return img.crop((left, top, left + crop_width, top + crop_height))\n\ndef augment_images(source_folder, target_folder, num_images=100):\n    if not os.path.exists(target_folder):\n        os.makedirs(target_folder)\n\n    all_images = [file for file in os.listdir(source_folder) if file.endswith('.jpg')]\n\n    selected_images = np.random.choice(all_images, size=min(num_images, len(all_images)), replace=False)\n\n    for i, filename in enumerate(selected_images):\n        with Image.open(os.path.join(source_folder, filename)) as img:\n            cropped_img = random_crop(img)\n            cropped_img.save(os.path.join(target_folder, f'augmented_{i}.jpg'))\n\n# Paths to the source and target folders\nsource_folder = '/content/frames'\ntarget_folder = '/content/augmented'\n\n# Augment images\naugment_images(source_folder, target_folder)\n
# Note: This code block was written by ChatGPT import os import random from PIL import Image import numpy as np # import shutil # shutil.rmtree(\"augmented\") def random_crop(img): width, height = img.size crop_width = random.randint(int(width * 0.1), int(width * 0.4)) crop_height = random.randint(int(height * 0.1), int(height * 0.4)) left = random.randint(0, width - crop_width) top = random.randint(0, height - crop_height) return img.crop((left, top, left + crop_width, top + crop_height)) def augment_images(source_folder, target_folder, num_images=100): if not os.path.exists(target_folder): os.makedirs(target_folder) all_images = [file for file in os.listdir(source_folder) if file.endswith('.jpg')] selected_images = np.random.choice(all_images, size=min(num_images, len(all_images)), replace=False) for i, filename in enumerate(selected_images): with Image.open(os.path.join(source_folder, filename)) as img: cropped_img = random_crop(img) cropped_img.save(os.path.join(target_folder, f'augmented_{i}.jpg')) # Paths to the source and target folders source_folder = '/content/frames' target_folder = '/content/augmented' # Augment images augment_images(source_folder, target_folder) In\u00a0[\u00a0]: Copied!
# Upload the extracted frames to Roboflow\nimport os\nimport roboflow\n\nrf = roboflow.Roboflow(api_key=\"YOUR_ROBOFLOW_API_KEY\")\nproject = rf.workspace().project(\"parking-lot-occupancy-detection-eoaek\")\n\nfor filename in os.listdir(FRAMES_DIR):\n  img_path = os.path.join(FRAMES_DIR, filename)\n  if os.path.isfile(img_path):\n      project.upload(image_path=img_path)\n
# Upload the extracted frames to Roboflow import os import roboflow rf = roboflow.Roboflow(api_key=\"YOUR_ROBOFLOW_API_KEY\") project = rf.workspace().project(\"parking-lot-occupancy-detection-eoaek\") for filename in os.listdir(FRAMES_DIR): img_path = os.path.join(FRAMES_DIR, filename) if os.path.isfile(img_path): project.upload(image_path=img_path)
loading Roboflow workspace...\nloading Roboflow project...\n
In\u00a0[\u00a0]: Copied!
# PASTE CODE FROM ROBOFLOW HERE\n
# PASTE CODE FROM ROBOFLOW HERE In\u00a0[\u00a0]: Copied!
from roboflow import Roboflow\nimport supervision as sv\nimport numpy as np\nimport cv2\n\nrf = Roboflow(api_key=\"YOUR_ROBOFLOW_API_KEY\") # Get your own API key - This one won't work\nproject = rf.workspace().project(\"parking-lot-occupancy-detection-eoaek\")\nmodel = project.version(\"5\").model\n\ndef callback(x: np.ndarray) -> sv.Detections:\n    result = model.predict(x, confidence=25, overlap=30).json()\n    return sv.Detections.from_inference(result)\n
from roboflow import Roboflow import supervision as sv import numpy as np import cv2 rf = Roboflow(api_key=\"YOUR_ROBOFLOW_API_KEY\") # Get your own API key - This one won't work project = rf.workspace().project(\"parking-lot-occupancy-detection-eoaek\") model = project.version(\"5\").model def callback(x: np.ndarray) -> sv.Detections: result = model.predict(x, confidence=25, overlap=30).json() return sv.Detections.from_inference(result)
loading Roboflow workspace...\nloading Roboflow project...\n
In\u00a0[\u00a0]: Copied!
# Polygons From PolygonZone\n\nzones = [\n    {\n        'name': \"Zone 1\",\n        'polygon': np.array([[229, 50],[-3, 306],[1, 614],[369, 50]]),\n        'max': 32\n    },\n    {\n        'name': 'Zone 2',\n        'polygon': np.array([[465, 46],[177, 574],[401, 578],[609, 46]]),\n        'max': 38\n    },\n    {\n        'name': 'Zone 3',\n        'polygon': np.array([[697, 58],[461, 858],[737, 858],[849, 58]]),\n        'max': 46\n    },\n    {\n        'name': 'Zone 4',\n        'polygon': np.array([[941, 58],[909, 862],[1273, 858],[1137, 58]]),\n        'max': 48\n    },\n    {\n        'name': 'Zone 5',\n        'polygon': np.array([[1229, 46],[1501, 1078],[1889, 1078],[1405, 46]]),\n        'max': 52\n    }\n]\n
# Polygons From PolygonZone zones = [ { 'name': \"Zone 1\", 'polygon': np.array([[229, 50],[-3, 306],[1, 614],[369, 50]]), 'max': 32 }, { 'name': 'Zone 2', 'polygon': np.array([[465, 46],[177, 574],[401, 578],[609, 46]]), 'max': 38 }, { 'name': 'Zone 3', 'polygon': np.array([[697, 58],[461, 858],[737, 858],[849, 58]]), 'max': 46 }, { 'name': 'Zone 4', 'polygon': np.array([[941, 58],[909, 862],[1273, 858],[1137, 58]]), 'max': 48 }, { 'name': 'Zone 5', 'polygon': np.array([[1229, 46],[1501, 1078],[1889, 1078],[1405, 46]]), 'max': 52 } ] In\u00a0[\u00a0]: Copied!
tracker = sv.ByteTrack()\nslicer = sv.InferenceSlicer(\n    callback=callback,\n    slice_wh=(800, 800),\n    overlap_ratio_wh=(0.2, 0.2),\n    thread_workers=10,\n    iou_threshold=0.2\n)\ntriangle_annotator = sv.TriangleAnnotator(\n    base=20,\n    height=20\n)\nheat_map_annotator = sv.HeatMapAnnotator()\n\ndef setup_zones(frame_wh):\n  if zones:\n    for zone in zones:\n      zone['history'] = []\n      zone['PolygonZone'] = sv.PolygonZone(\n          polygon=zone['polygon'],\n          frame_resolution_wh=frame_wh\n      )\n      zone['PolygonZoneAnnotator'] = sv.PolygonZoneAnnotator(\n        zone=zone['PolygonZone'],\n        color=sv.Color.WHITE,\n        thickness=4,\n    )\n\ndef process_frame(frame,heatmap=None):\n    detections = slicer(image=frame)\n    detections = tracker.update_with_detections(detections)\n\n    annotated_frame = frame.copy()\n\n    annotated_frame = triangle_annotator.annotate(\n        scene=annotated_frame,\n        detections=detections\n    )\n\n    if heatmap is None:\n      heatmap = np.full(frame.shape, 255, dtype=np.uint8)\n\n    heat_map_annotator.annotate(\n      scene=heatmap,\n      detections=detections\n    )\n\n    if zones:\n      for zone in zones:\n        zone_presence = zone['PolygonZone'].trigger(detections)\n        zone_present_idxs = [idx for idx, present in enumerate(zone_presence) if present]\n        zone_present = detections[zone_present_idxs]\n\n        zone_count = len(zone_present)\n        zone['history'].append(zone_count)\n\n\n        annotated_frame = zone['PolygonZoneAnnotator'].annotate(\n            scene=annotated_frame,\n            label=f\"{zone['name']}: {zone_count}\"\n        )\n\n        # Heatmap\n        heatmap = zone['PolygonZoneAnnotator'].annotate(\n            scene=heatmap,\n            label=\" \"\n        )\n\n    return annotated_frame, heatmap\n
tracker = sv.ByteTrack() slicer = sv.InferenceSlicer( callback=callback, slice_wh=(800, 800), overlap_ratio_wh=(0.2, 0.2), thread_workers=10, iou_threshold=0.2 ) triangle_annotator = sv.TriangleAnnotator( base=20, height=20 ) heat_map_annotator = sv.HeatMapAnnotator() def setup_zones(frame_wh): if zones: for zone in zones: zone['history'] = [] zone['PolygonZone'] = sv.PolygonZone( polygon=zone['polygon'], frame_resolution_wh=frame_wh ) zone['PolygonZoneAnnotator'] = sv.PolygonZoneAnnotator( zone=zone['PolygonZone'], color=sv.Color.WHITE, thickness=4, ) def process_frame(frame,heatmap=None): detections = slicer(image=frame) detections = tracker.update_with_detections(detections) annotated_frame = frame.copy() annotated_frame = triangle_annotator.annotate( scene=annotated_frame, detections=detections ) if heatmap is None: heatmap = np.full(frame.shape, 255, dtype=np.uint8) heat_map_annotator.annotate( scene=heatmap, detections=detections ) if zones: for zone in zones: zone_presence = zone['PolygonZone'].trigger(detections) zone_present_idxs = [idx for idx, present in enumerate(zone_presence) if present] zone_present = detections[zone_present_idxs] zone_count = len(zone_present) zone['history'].append(zone_count) annotated_frame = zone['PolygonZoneAnnotator'].annotate( scene=annotated_frame, label=f\"{zone['name']}: {zone_count}\" ) # Heatmap heatmap = zone['PolygonZoneAnnotator'].annotate( scene=heatmap, label=\" \" ) return annotated_frame, heatmap In\u00a0[\u00a0]: Copied!
image = cv2.imread(\"./frames/5.jpg\")\nimage_wh = (image.shape[1],image.shape[0])\nsetup_zones(image_wh)\n\nannotated_image, heatmap = process_frame(image)\n\nsv.plot_image(annotated_image)\nsv.plot_image(heatmap)\n
image = cv2.imread(\"./frames/5.jpg\") image_wh = (image.shape[1],image.shape[0]) setup_zones(image_wh) annotated_image, heatmap = process_frame(image) sv.plot_image(annotated_image) sv.plot_image(heatmap)
(1920, 1080)\n
In\u00a0[\u00a0]: Copied!
# Credit to https://matplotlib.org/matplotblog/posts/matplotlib-cyberpunk-style/ for graph styles\n%matplotlib agg\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom PIL import Image\nfrom io import BytesIO\n\ndef generate_graphs(max_frames):\n  plt.ioff()\n  # Plot Styles\n  plt.style.use(\"seaborn-dark\")\n  for param in ['figure.facecolor', 'axes.facecolor', 'savefig.facecolor']:\n      plt.rcParams[param] = '#212946'\n\n  for param in ['text.color', 'axes.labelcolor', 'xtick.color', 'ytick.color']:\n      plt.rcParams[param] = '0.9'\n\n\n  dataframe = pd.DataFrame()\n  graphs = {}\n\n\n  for zone in zones:\n    percentage_history = [(count/zone['max'])*100 for count in zone['history']]\n    dataframe[zone['name']] = percentage_history\n    plt.title(f'{zone[\"name\"]} Usage')\n\n    # Extra Styles\n    fig, ax1 = plt.subplots()\n    ax1.grid(color='#2A3459')\n\n    # Data\n    ax1.plot(zone[\"history\"])\n\n    # Axis Labeling\n    plt.ylabel('Vehicles')\n    plt.ylim(top=zone[\"max\"])\n    plt.xlim(right=max_frames)\n    ax2 = ax1.twinx()\n    ax2.set_ylabel('Occupied Percentage (%)')\n\n    # Export Graph Image\n    buf = BytesIO()\n    fig.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)\n    buf.seek(0)\n    graphs[zone['name']] = Image.open(buf)\n    plt.close(fig)\n\n\n  plt.ioff()\n  dataframe.plot()\n\n  # Axis\n  plt.ylabel('Occupied (%)', fontsize=15)\n  plt.ylim(top=100)\n  plt.xlim(right=max_frames)\n\n  # Export combined\n  buf = BytesIO()\n  plt.savefig(buf, format='png', bbox_inches='tight')\n  buf.seek(0)\n\n  plt.close()\n\n  graphs['combined_percentage'] = Image.open(buf)\n\n  return graphs\n
# Credit to https://matplotlib.org/matplotblog/posts/matplotlib-cyberpunk-style/ for graph styles %matplotlib agg import pandas as pd import matplotlib.pyplot as plt from PIL import Image from io import BytesIO def generate_graphs(max_frames): plt.ioff() # Plot Styles plt.style.use(\"seaborn-dark\") for param in ['figure.facecolor', 'axes.facecolor', 'savefig.facecolor']: plt.rcParams[param] = '#212946' for param in ['text.color', 'axes.labelcolor', 'xtick.color', 'ytick.color']: plt.rcParams[param] = '0.9' dataframe = pd.DataFrame() graphs = {} for zone in zones: percentage_history = [(count/zone['max'])*100 for count in zone['history']] dataframe[zone['name']] = percentage_history plt.title(f'{zone[\"name\"]} Usage') # Extra Styles fig, ax1 = plt.subplots() ax1.grid(color='#2A3459') # Data ax1.plot(zone[\"history\"]) # Axis Labeling plt.ylabel('Vehicles') plt.ylim(top=zone[\"max\"]) plt.xlim(right=max_frames) ax2 = ax1.twinx() ax2.set_ylabel('Occupied Percentage (%)') # Export Graph Image buf = BytesIO() fig.savefig(buf, format='png', bbox_inches='tight', pad_inches=0) buf.seek(0) graphs[zone['name']] = Image.open(buf) plt.close(fig) plt.ioff() dataframe.plot() # Axis plt.ylabel('Occupied (%)', fontsize=15) plt.ylim(top=100) plt.xlim(right=max_frames) # Export combined buf = BytesIO() plt.savefig(buf, format='png', bbox_inches='tight') buf.seek(0) plt.close() graphs['combined_percentage'] = Image.open(buf) return graphs In\u00a0[\u00a0]: Copied!
generate_graphs(400)['combined_percentage']\n
generate_graphs(400)['combined_percentage'] Out[\u00a0]: In\u00a0[\u00a0]: Copied!
VIDEO_PATH = \"/content/parkinglot1080.mov\"\nMAIN_OUTPUT_PATH = \"/content/parkinglot_annotated.mp4\"\nframes_generator = sv.get_video_frames_generator(source_path=VIDEO_PATH)\nvideo_info = sv.VideoInfo.from_video_path(video_path=VIDEO_PATH)\n\nsetup_zones(video_info.resolution_wh)\n\n\nwith sv.VideoSink(target_path=MAIN_OUTPUT_PATH, video_info=video_info) as sink:\n  heatmap = None\n  for i, frame in enumerate(frames_generator):\n    print(f\"Processing frame {i}\")\n\n    # Infer\n    annotated_frame, heatmap = process_frame(frame, heatmap)\n\n    # Save the latest heatmap\n    Image.fromarray(heatmap).save(f\"/content/heatmap/{i}.jpg\")\n\n    # Create Graphs\n    graphs = generate_graphs(video_info.total_frames)\n    graph = graphs[\"combined_percentage\"].convert(\"RGB\")\n    graph.save(f\"/content/graphs/{i}.jpg\")\n\n    # sv.plot_image(annotated_frame)\n\n    # Send as frame to video\n    sink.write_frame(frame=annotated_frame)\n
VIDEO_PATH = \"/content/parkinglot1080.mov\" MAIN_OUTPUT_PATH = \"/content/parkinglot_annotated.mp4\" frames_generator = sv.get_video_frames_generator(source_path=VIDEO_PATH) video_info = sv.VideoInfo.from_video_path(video_path=VIDEO_PATH) setup_zones(video_info.resolution_wh) with sv.VideoSink(target_path=MAIN_OUTPUT_PATH, video_info=video_info) as sink: heatmap = None for i, frame in enumerate(frames_generator): print(f\"Processing frame {i}\") # Infer annotated_frame, heatmap = process_frame(frame, heatmap) # Save the latest heatmap Image.fromarray(heatmap).save(f\"/content/heatmap/{i}.jpg\") # Create Graphs graphs = generate_graphs(video_info.total_frames) graph = graphs[\"combined_percentage\"].convert(\"RGB\") graph.save(f\"/content/graphs/{i}.jpg\") # sv.plot_image(annotated_frame) # Send as frame to video sink.write_frame(frame=annotated_frame)
Processing frame 0\n
Processing frame 1\n
Processing frame 2\n
Processing frame 3\n
Processing frame 4\n
Processing frame 5\n
Processing frame 6\n
Processing frame 7\n
Processing frame 8\n
Processing frame 9\n
Processing frame 10\n
...
In\u00a0[\u00a0]: Copied!
import cv2\ndef create_videos_from_dir(dir,output):\n  images = len(os.listdir(dir))-1\n\n  sample_img_path = os.path.join(dir,f\"1.jpg\")\n  sample_img = cv2.imread(sample_img_path)\n  height, width, channels = sample_img.shape\n  video_info = sv.VideoInfo(width=width,height=height,fps=24,total_frames=images)\n\n  with sv.VideoSink(target_path=output, video_info=video_info) as sink:\n    for i in range(images):\n      path = os.path.join(dir,f\"{i}.jpg\")\n      img = cv2.imread(path)\n      sink.write_frame(frame=img)\n\n# Graphs\ncreate_videos_from_dir(\"/content/graphs\",\"/content/parkinglot_graph.mp4\")\n\n# Heatmap\ncreate_videos_from_dir(\"/content/heatmap\",\"/content/parkinglot_heatmap.mp4\")\n
import cv2 def create_videos_from_dir(dir,output): images = len(os.listdir(dir))-1 sample_img_path = os.path.join(dir,f\"1.jpg\") sample_img = cv2.imread(sample_img_path) height, width, channels = sample_img.shape video_info = sv.VideoInfo(width=width,height=height,fps=24,total_frames=images) with sv.VideoSink(target_path=output, video_info=video_info) as sink: for i in range(images): path = os.path.join(dir,f\"{i}.jpg\") img = cv2.imread(path) sink.write_frame(frame=img) # Graphs create_videos_from_dir(\"/content/graphs\",\"/content/parkinglot_graph.mp4\") # Heatmap create_videos_from_dir(\"/content/heatmap\",\"/content/parkinglot_heatmap.mp4\") In\u00a0[\u00a0]: Copied!
import pickle\n\nwith open('parkinglot_zonedata.pkl', 'wb') as outp:\n  pickle.dump(zones, outp, pickle.HIGHEST_PROTOCOL)\n
import pickle with open('parkinglot_zonedata.pkl', 'wb') as outp: pickle.dump(zones, outp, pickle.HIGHEST_PROTOCOL) In\u00a0[\u00a0]: Copied!
with open('parkinglot_zonedata.pkl', 'rb') as inp:\n    zones_imported = pickle.load(inp)\n    zones = zones_imported\n
with open('parkinglot_zonedata.pkl', 'rb') as inp: zones_imported = pickle.load(inp) zones = zones_imported In\u00a0[\u00a0]: Copied!
import statistics\nfor zone in zones:\n    occupancy_percent_history = [(count/zone['max'])*100 for count in zone['history']]\n    average_occupancy = round(statistics.mean(occupancy_percent_history))\n    median_occupancy = round(statistics.median(occupancy_percent_history))\n    highest_occupancy = round(max(occupancy_percent_history))\n    lowest_occupancy = round(min(occupancy_percent_history))\n    print(f\"{zone['name']} had an average occupancy of {average_occupancy}% with a median occupancy of {median_occupancy}%.\")\n
import statistics for zone in zones: occupancy_percent_history = [(count/zone['max'])*100 for count in zone['history']] average_occupancy = round(statistics.mean(occupancy_percent_history)) median_occupancy = round(statistics.median(occupancy_percent_history)) highest_occupancy = round(max(occupancy_percent_history)) lowest_occupancy = round(min(occupancy_percent_history)) print(f\"{zone['name']} had an average occupancy of {average_occupancy}% with a median occupancy of {median_occupancy}%.\")
Zone 1 had an average occupancy of 60% with a median occupancy of 59%.\nZone 2 had an average occupancy of 69% with a median occupancy of 68%.\nZone 3 had an average occupancy of 85% with a median occupancy of 85%.\nZone 4 had an average occupancy of 85% with a median occupancy of 85%.\nZone 5 had an average occupancy of 91% with a median occupancy of 92%.\n
In\u00a0[\u00a0]: Copied!
lot_history = []\nfor zone in zones:\n    for idx, entry in enumerate(zone['history']):\n      if(idx >= len(lot_history) or len(lot_history)==0): lot_history.append([])\n      lot_history[idx].append(zone['history'][idx]/zone['max'])\n\nlot_occupancy_history = [sum(entry)/len(entry)*100 for entry in lot_history]\n\naverage_occupancy = round(statistics.mean(lot_occupancy_history))\nmedian_occupancy = round(statistics.median(lot_occupancy_history))\nhighest_occupancy = round(max(lot_occupancy_history))\nlowest_occupancy = round(min(lot_occupancy_history))\n\nprint(f\"The entire lot had an average occupancy of {average_occupancy}% with a median occupancy of {median_occupancy}%.\")\n
lot_history = [] for zone in zones: for idx, entry in enumerate(zone['history']): if(idx >= len(lot_history) or len(lot_history)==0): lot_history.append([]) lot_history[idx].append(zone['history'][idx]/zone['max']) lot_occupancy_history = [sum(entry)/len(entry)*100 for entry in lot_history] average_occupancy = round(statistics.mean(lot_occupancy_history)) median_occupancy = round(statistics.median(lot_occupancy_history)) highest_occupancy = round(max(lot_occupancy_history)) lowest_occupancy = round(min(lot_occupancy_history)) print(f\"The entire lot had an average occupancy of {average_occupancy}% with a median occupancy of {median_occupancy}%.\")
The entire lot had an average occupancy of 78% with a median occupancy of 78%.\n
In\u00a0[\u00a0]: Copied!
print(lot_occupancy_history)\n\n# [\n#    ...\n#    73.51691310215338,\n#    73.34063105087132,\n#    73.86694684034501,\n#    ...\n# ]\n
print(lot_occupancy_history) # [ # ... # 73.51691310215338, # 73.34063105087132, # 73.86694684034501, # ... # ]
[0.0, 73.6265622249604, 73.51691310215338, 73.34063105087132, 73.86694684034501, 73.81677961626474, 74.2515622249604, 74.55142873907177, 74.34309540573842, 76.10547585518981, 75.33624508595904, 75.19454468110075, 74.56954468110075, 74.2334462829314, 74.07528017367835, 74.29457841929236, 74.08624508595905, 74.95162970134366, 75.19619491873496, 75.78914363668368, 76.15564307927008, 75.53779410901838, 75.6293272897964, 75.43910989849205, 76.54025846388546, 76.54025846388546, 77.77632312386316, 76.71654051516751, 77.70969019538813, 78.09430558000352, 77.13320718183418, 78.25247168925658, 79.48853634923428, 78.42875374053864, 78.28452297130787, 77.51782256644957, 78.28452297130787, 77.65952297130787, 78.60250542744822, 79.33715455025524, 79.33715455025524, 78.81083876078155, 78.66913835592325, 78.57045414539694, 78.04413835592325, 77.70803995775391, 77.60935574722761, 77.08303995775391, 77.70803995775391, 77.20662442058324, 78.11755559467231, 77.20662442058324, 77.20662442058324, 78.02602241389427, 77.20662442058324, 77.20662442058324, 78.21623980519861, 77.20662442058324, 76.53860822625124, 76.68030863110955, 76.92069324649417, 77.81768908056092, 76.68030863110955, 79.78422226133897, 79.01752185648066, 78.7315906823916, 79.25790647186528, 78.7315906823916, 78.1065906823916, 77.79957313853194, 77.08303995775391, 76.90675790647185, 77.64140702927888, 77.94842457313852, 77.2707269846858, 78.92345831133017, 78.5067916446635, 76.77811271489762, 78.07365927360208, 78.42622337616618, 79.27767265152849, 79.61212081206361, 81.54146863815058, 81.01515284867688, 80.1472378689198, 80.7541219268908, 79.0098574194684, 80.16117320894209, 79.74450654227543, 78.78340814410608, 79.88873731150619, 80.01947280408379, 80.35392096461891, 79.69851992020185, 79.31390453558646, 79.69851992020185, 80.08313530481722, 80.32351992020186, 79.69851992020185, 79.66646863815056, 80.49980197148389, 79.8706213694772, 78.86100598486182, 78.68725429795222, 80.46357008742592, 78.49703690664789, 78.35280613741712, 80.70813530481722, 80.27335269612156, 79.23168602945492, 79.5041219268908, 79.55681951534353, 79.50665229126326, 79.6163014140703, 81.9855659214927, 81.41073314557296, 82.03573314557296, 81.45671976764653, 81.3651865868685, 80.03043771636449, 81.374501261515, 81.40908290793874, 81.64946752332337, 82.17578331279704, 83.04534853018835, 82.17578331279704, 81.84386551663441, 80.88276711846505, 82.12979669072347, 80.46775068943262, 81.17034853018833, 80.75368186352169, 79.51761720354398, 81.18428387021063, 80.55928387021063, 79.7259505368773, 79.91616792818166, 80.35095053687732, 79.77446752332335, 79.79258346535234, 79.82463474740362, 80.01485213870797, 79.77446752332335, 79.3578008566567, 78.55651880537464, 78.58857008742592, 79.38985213870797, 79.38985213870797, 78.97318547204131, 78.41481840051634, 78.721835944376, 79.14946752332335, 79.3578008566567, 80.15908290793874, 79.59818547204131, 80.63985213870797, 81.47318547204131, 80.01485213870797, 81.61741624127208, 81.20074957460541, 80.99241624127208, 81.02446752332335, 81.40908290793874, 82.03408290793874, 81.64946752332337, 81.02446752332335, 81.02446752332335, 81.40908290793874, 80.36741624127208, 80.77014756791645, 80.86883177844277, 80.24383177844277, 81.01053218330107, 79.474601009212, 80.48421639382738, 80.85071583641377, 79.7103649592208, 80.52558235052514, 79.99926656105146, 80.85071583641377, 79.26461743824443, 79.16593322771813, 78.36465117643607, 78.78131784310274, 78.78131784310274, 79.19798450976941, 79.74241624127207, 81.20074957460541, 79.93263363257641, 79.38820190107376, 79.88961743824443, 79.67413307516283, 79.56448395235581, 79.14781728568914, 78.63246640849616, 78.2157997418295, 78.74926656105147, 78.36465117643607, 78.78131784310274, 79.16593322771813, 78.9575998943848, 79.5825998943848, 78.22295077157777, 78.73115061902247, 78.62150149621544, 78.1867188875198, 77.77005222085313, 78.20483482954879, 78.1727835474975, 77.78816816288212, 78.09518570674176, 78.20483482954879, 77.71057032212639, 79.0562841049111, 78.62150149621544, 80.52558235052514, 79.47295077157777, 78.11330164877074, 78.28958370005282, 78.00365252596374, 77.20237047468169, 78.20483482954879, 77.58698585929707, 77.58698585929707, 77.6785190400751, 77.6785190400751, 78.58945021416417, 77.61188611160007, 78.12723698879304, 75.51758786598603, 75.08280525729039, 78.24403714134836, 76.3234905826439, 77.37612216159127, 77.23442175673296, 77.76073754620666, 78.30516927770933, 76.52467288622894, 76.03040837880654, 76.04852432083553, 76.86792231414657, 75.85665669189696, 75.90517367834302, 77.24373643137945, 77.00797248137064, 78.44521944493339, 77.77467288622894, 76.7148902775333, 77.53428827084431, 77.72450566214869, 76.1885744880596, 75.53152320600833, 76.01229243677756, 74.26802792935516, 74.34309540573842, 76.700954937511, 75.29575925599954, 74.87194155958458, 75.94565950830253, 75.32065950830254, 75.16249339904945, 75.91360822625126, 75.88155694419997, 77.14967288622894, 77.29137329108724, 76.56603884292672, 75.07609429091121, 76.80642345831133, 76.38975679164466, 77.09950566214869, 77.90078771343074, 77.0674543800974, 76.10635598192805, 77.74515196855015, 78.58945021416417, 78.74761632341725, 77.83668514932818, 77.66205333568033, 78.97153523440709, 76.8426553423693, 77.11762160417767, 76.73300621956228, 76.90928827084433, 78.01758786598603, 77.28972305345303, 76.36485653934166, 77.6924543800974, 78.63543683623774, 77.41748811828903, 76.78152320600834, 75.35524115472627, 77.16360822625126, 77.07207504547323, 75.98024115472629, 76.49559203191927, 75.64414275655695, 76.35389162706097, 75.19542480783899, 76.36485653934166, 76.46354074986797, 77.97838555418647, 77.71057032212639, 78.07706976471279, 77.11597136654345, 78.60338555418647, 78.50470134366016, 79.03816816288212, 80.36576600363784, 76.60777151909876, 77.54360294549082, 76.63267177140176, 79.22838555418647, 77.97838555418647, 78.27146775802383, 80.78243267030452, 76.76593762835181, 78.57430469987679, 78.1757539752391, 77.35635598192806, 77.35635598192806, 78.74508595904477, 77.8341547849557, 78.86188611160007, 76.89117232881536, 77.55918852314733, 78.36762160417766, 77.19818987267499, 77.63297248137064, 77.63297248137064, 76.43992401572494, 76.92322361086663, 77.43142345831131, 76.48844100217099, 75.82207504547321, 79.310604060318, 79.310604060318, 78.78428827084433, 79.2119198497917, 77.63297248137064, 78.25082145162237, 79.07021944493339, 79.07021944493339, 78.21877016957109, 76.34674059731267, 77.84130581470399, 77.25767177140176, 78.02437217626006, 77.49805638678637, 76.48844100217099, 77.06327377809072, 77.06327377809072, 78.9534192923781, 78.3284192923781, 78.12008595904476, 77.49508595904476, 77.33691984979171, 77.67136801032682, 77.26863668368246, 76.71907087953998, 77.22727072698468, 77.12693627882416, 74.78972305345305, 76.03972305345303, 76.71026961215748, 75.5589538226838, 75.65510766883764, 76.08273924778501, 77.04933843806842, 75.62305638678637, 77.04933843806842, 77.5756542275421, 76.56603884292672, 77.2091547849557, 75.52437217626006, 76.98270550959337, 77.74940591445169, 77.99232089420876, 77.60770550959337, 78.42710350290443, 78.04248811828904, 78.3174543800974, 77.17710350290442, 76.68283899548202, 76.5841547849557, 76.5951196972364, 75.27049228422226, 74.43297834888224, 74.86776095757789, 74.51057618963797, 77.22727072698468, 76.21600510473507, 77.48412104676406, 76.84815613448336, 77.04933843806842, 76.9506542275421, 77.99232089420876, 76.98270550959337, 75.52734260400165, 75.8132737780907, 77.93962330575603, 76.74529132195038]\n
In\u00a0[\u00a0]: Copied!
%matplotlib inline\n\nimport matplotlib.pyplot as plt\n\nfig, ax1 = plt.subplots()\nplt.title('Total Lot Usage')\nax1.grid(color='#2A3459')\n\nax1.plot(lot_occupancy_history)\nax1.set_ylabel('Occupied Percentage (%)')\n\nplt.ylim(top=100)\nplt.xlim(right=len(lot_occupancy_history))\n\nplt.show()\n
%matplotlib inline import matplotlib.pyplot as plt fig, ax1 = plt.subplots() plt.title('Total Lot Usage') ax1.grid(color='#2A3459') ax1.plot(lot_occupancy_history) ax1.set_ylabel('Occupied Percentage (%)') plt.ylim(top=100) plt.xlim(right=len(lot_occupancy_history)) plt.show() In\u00a0[\u00a0]: Copied!
import cv2\nimport numpy as np\n\ndef transform_image(image, points):\n    width = max(np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3]))\n    height = max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2]))\n    dest_points = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]], dtype=\"float32\")\n    matrix = cv2.getPerspectiveTransform(points.astype(\"float32\"), dest_points)\n    transformed_image = cv2.warpPerspective(image, matrix, (int(width), int(height)))\n\n    return transformed_image\n\ndef generate_top_down_views(frame,show=True):\n  heatmap = cv2.imread(f\"heatmap/{frame}.jpg\")\n  image = cv2.imread(f\"frames/{frame}.jpg\")\n\n  images = []\n\n  for zone in zones:\n    if show: print(f\"Occupancy Visualization of {zone['name']}\")\n    top_down_image = transform_image(image, zone['polygon'])\n    top_down_heatmap = transform_image(heatmap, zone['polygon'])\n\n    combined_image = cv2.addWeighted(top_down_image, 0.7, top_down_heatmap, 0.3, 0)\n\n    if show: sv.plot_image(combined_image, size=(5,5))\n\n    images.append(combined_image)\n\n  return images\n
import cv2 import numpy as np def transform_image(image, points): width = max(np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3])) height = max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2])) dest_points = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]], dtype=\"float32\") matrix = cv2.getPerspectiveTransform(points.astype(\"float32\"), dest_points) transformed_image = cv2.warpPerspective(image, matrix, (int(width), int(height))) return transformed_image def generate_top_down_views(frame,show=True): heatmap = cv2.imread(f\"heatmap/{frame}.jpg\") image = cv2.imread(f\"frames/{frame}.jpg\") images = [] for zone in zones: if show: print(f\"Occupancy Visualization of {zone['name']}\") top_down_image = transform_image(image, zone['polygon']) top_down_heatmap = transform_image(heatmap, zone['polygon']) combined_image = cv2.addWeighted(top_down_image, 0.7, top_down_heatmap, 0.3, 0) if show: sv.plot_image(combined_image, size=(5,5)) images.append(combined_image) return images In\u00a0[\u00a0]: Copied!
generate_top_down_views(400)\n
generate_top_down_views(400) In\u00a0[\u00a0]: Copied!
import os\nimport numpy as np\nfrom PIL import Image\nimport supervision as sv\n\nfor filename in os.listdir(\"frames\"):\n  img_path = os.path.join(\"frames\", filename)\n  heatmap_path = os.path.join(\"heatmap\", filename)\n  if os.path.isfile(img_path) and os.path.isfile(heatmap_path):\n    frame = int(filename.replace(\".jpg\",\"\"))\n    images = generate_top_down_views(frame,False)\n    gap = 10\n\n    pil_images = [Image.fromarray(image) for image in images]\n\n    # Resize images to have the same width\n    widths, heights = zip(*(i.size for i in pil_images))\n    max_width = max(widths)\n    total_height = sum(heights) + gap * (len(images) - 1)\n    resized_images = [i.resize((max_width, int(i.height * max_width / i.width))) for i in pil_images]\n\n    # Create a new image with the correct combined size\n    combined_image = Image.new('RGB', (max_width, total_height))\n\n    # Paste each image into the combined image with the specified gap\n    y_offset = 0\n    for img in resized_images:\n        combined_image.paste(img, (0, y_offset))\n        y_offset += img.height + gap\n\n    combined_image = combined_image.rotate(90, expand=True)\n\n    combined_image.save(f\"sectionheatmaps/{frame}.jpg\")\n\n    sv.plot_image(np.array(combined_image))\n
import os import numpy as np from PIL import Image import supervision as sv for filename in os.listdir(\"frames\"): img_path = os.path.join(\"frames\", filename) heatmap_path = os.path.join(\"heatmap\", filename) if os.path.isfile(img_path) and os.path.isfile(heatmap_path): frame = int(filename.replace(\".jpg\",\"\")) images = generate_top_down_views(frame,False) gap = 10 pil_images = [Image.fromarray(image) for image in images] # Resize images to have the same width widths, heights = zip(*(i.size for i in pil_images)) max_width = max(widths) total_height = sum(heights) + gap * (len(images) - 1) resized_images = [i.resize((max_width, int(i.height * max_width / i.width))) for i in pil_images] # Create a new image with the correct combined size combined_image = Image.new('RGB', (max_width, total_height)) # Paste each image into the combined image with the specified gap y_offset = 0 for img in resized_images: combined_image.paste(img, (0, y_offset)) y_offset += img.height + gap combined_image = combined_image.rotate(90, expand=True) combined_image.save(f\"sectionheatmaps/{frame}.jpg\") sv.plot_image(np.array(combined_image))"},{"location":"notebooks/occupancy_analytics/#how-to-analyze-occupancy-with-supervision","title":"How To Analyze Occupancy with Supervision\u00b6","text":"

In this notebook, we'll use a parking lot to demonstrate how we can extract numerous informative metrics and detailed graphics, all from one video, using Supervision.

This notebook accompanies the Occupancy Analytics with Computer Vision tutorial on the Roboflow Blog. Check it out for deeper explanations and context!

In this notebook, we will cover the following:

  1. Getting training data
  2. Training a object detection model
  3. Detect vehicles
  4. Analyze data and generate statistics
"},{"location":"notebooks/occupancy_analytics/#before-you-start","title":"Before You Start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/occupancy_analytics/#install-relevant-packages","title":"Install Relevant Packages\u00b6","text":"

Here, we will install the Roboflow package, for uploading and training our model, and Supervision for visualization and extracting metrics from our predicted model results.

"},{"location":"notebooks/occupancy_analytics/#getting-video-data","title":"Getting Video Data\u00b6","text":"

We will start with turning a single video into a folder of frame images, for training our model. Upload your video and set your video's file path here.

"},{"location":"notebooks/occupancy_analytics/#random-crop-sampling-if-using-sahi","title":"Random Crop Sampling (If Using SAHI)\u00b6","text":"

If we are using SAHI (which we are in our example), randomly sampling cropped portions of our image can help mimic the effect of SAHI detection during training, improving performance.

"},{"location":"notebooks/occupancy_analytics/#training-a-model","title":"Training a Model\u00b6","text":"

Now that we have our images, we can upload our extracted frames as training data to Roboflow.

"},{"location":"notebooks/occupancy_analytics/#upload-training-data","title":"Upload Training Data\u00b6","text":""},{"location":"notebooks/occupancy_analytics/#training-model-using-autodistill-optional","title":"Training Model Using Autodistill (Optional)\u00b6","text":"

We can train our model using Automated Labeling, powered by Autodistill, to automatically label our data. Copy the code required for this section from the Roboflow app.

Note: It's not required to use Autodistill

"},{"location":"notebooks/occupancy_analytics/#vehicle-detection","title":"Vehicle Detection\u00b6","text":"

Now, we can run our model to get inference data for our video data.

"},{"location":"notebooks/occupancy_analytics/#setup-model","title":"Setup Model\u00b6","text":"

First, set the model up as a callback function so that we can call it later on while using Supervision.

"},{"location":"notebooks/occupancy_analytics/#configure-zones","title":"Configure Zones\u00b6","text":"

Next, we will set up a list of the zones to be used with PolygonZone. You can get these polygon coordinates using this web utility.

For our example, we have have zones, but you can add as many or as little zones as you would like.

"},{"location":"notebooks/occupancy_analytics/#setup-supervision","title":"Setup Supervision\u00b6","text":"

For our use case, we will use the following features of Supervision. Refer to the linked documentation for more details:

  • ByteTrack: To track the location of our vehicles, so we can assess how long they are parked
  • InferenceSlicer: A helper utility to run SAHI on our model
  • TriangleAnnotator: To help visualize the locations of the vehicles
  • HeatMapAnnotator: To generate heatmaps so we can identify our busiest areas
  • PolygonZone, PolygonZoneAnnotator: To help count and identify vehicles in our respective zones and the annotator to help visualize those zones.
"},{"location":"notebooks/occupancy_analytics/#try-with-a-single-image","title":"Try With a Single Image\u00b6","text":""},{"location":"notebooks/occupancy_analytics/#setup-graphs","title":"Setup Graphs\u00b6","text":"

Before we run the model on the entire video, we will set up the logic to generate our graphs using matplotlib.

"},{"location":"notebooks/occupancy_analytics/#process-video","title":"Process Video\u00b6","text":"

Now, we can process the video to get detections from the entire video.

"},{"location":"notebooks/occupancy_analytics/#generate-graphsheatmap-video-optional","title":"Generate Graphs/Heatmap Video (optional)\u00b6","text":""},{"location":"notebooks/occupancy_analytics/#analyze-data","title":"Analyze Data\u00b6","text":"

Lastly, we can analyze the data we got to extract quantitative metrics from our video.

"},{"location":"notebooks/occupancy_analytics/#save-your-data-for-later","title":"Save your data for later\u00b6","text":"

Using Pickle, we can save our zone detection data so that we can load it in for later analysis. Remember to download your file from the Colab file manager.

"},{"location":"notebooks/occupancy_analytics/#import-your-data","title":"Import your data\u00b6","text":"

To load your data back in, upload the saved file to the Colab enviorment and run the code cell.

"},{"location":"notebooks/occupancy_analytics/#occupancy-per-section","title":"Occupancy Per Section\u00b6","text":"

Since we recorded the number of objects (vehicles) in each zone, we can compare that against our hardcoded max that we put in while setting up our zones. Using this data, we can calculate the average and median occupancy, as well as any other metrics such as the max or the minimum occupancy throughout that time period.

"},{"location":"notebooks/occupancy_analytics/#total-occupancy","title":"Total Occupancy\u00b6","text":"

Using the occupancy for the zones, we can also add up all the occupancy metrics throughout all the zones in order to calculate metrics for the whole parking lot.

"},{"location":"notebooks/occupancy_analytics/#busy-areas","title":"Busy Areas\u00b6","text":"

Using Supervision's heat map annotator, we can use heatmaps while transforming the images in order to create images on top-down views of each zone.

"},{"location":"notebooks/quickstart/","title":"Supervision Quickstart","text":"In\u00a0[\u00a0]: Copied!
!nvidia-smi\n
!nvidia-smi
Tue Jun 13 13:06:22 2023       \n+-----------------------------------------------------------------------------+\n| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |\n|-------------------------------+----------------------+----------------------+\n| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n|                               |                      |               MIG M. |\n|===============================+======================+======================|\n|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n| N/A   52C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |\n|                               |                      |                  N/A |\n+-------------------------------+----------------------+----------------------+\n                                                                               \n+-----------------------------------------------------------------------------+\n| Processes:                                                                  |\n|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n|        ID   ID                                                   Usage      |\n|=============================================================================|\n|  No running processes found                                                 |\n+-----------------------------------------------------------------------------+\n

NOTE: To make it easier for us to manage datasets, images and models we create a HOME constant.

In\u00a0[\u00a0]: Copied!
import os\n\nHOME = os.getcwd()\nprint(HOME)\n
import os HOME = os.getcwd() print(HOME)
/content\n

NOTE: During our demo, we will need some example images.

In\u00a0[\u00a0]: Copied!
!mkdir {HOME}/images\n
!mkdir {HOME}/images

NOTE: Feel free to use your images. Just make sure to put them into images directory that we just created. \u261d\ufe0f

In\u00a0[\u00a0]: Copied!
%cd {HOME}/images\n\n!wget -q https://media.roboflow.com/notebooks/examples/dog.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-2.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-3.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-4.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-5.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-6.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-7.jpeg\n!wget -q https://media.roboflow.com/notebooks/examples/dog-8.jpeg\n
%cd {HOME}/images !wget -q https://media.roboflow.com/notebooks/examples/dog.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-2.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-3.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-4.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-5.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-6.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-7.jpeg !wget -q https://media.roboflow.com/notebooks/examples/dog-8.jpeg
/content/images\n
In\u00a0[\u00a0]: Copied!
!pip install -q supervision\n\nimport supervision as sv\n\nprint(sv.__version__)\n
!pip install -q supervision import supervision as sv print(sv.__version__)
     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 0.0/45.4 kB ? eta -:--:--\r     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 45.4/45.4 kB 3.4 MB/s eta 0:00:00\n0.17.0\n
In\u00a0[\u00a0]: Copied!
import cv2\n\nIMAGE_PATH = f\"{HOME}/images/dog.jpeg\"\n\nimage = cv2.imread(IMAGE_PATH)\n
import cv2 IMAGE_PATH = f\"{HOME}/images/dog.jpeg\" image = cv2.imread(IMAGE_PATH) In\u00a0[\u00a0]: Copied!
!pip install -q super-gradients\n
!pip install -q super-gradients In\u00a0[\u00a0]: Copied!
from super_gradients.training import models\n\nmodel = models.get(\"yolo_nas_l\", pretrained_weights=\"coco\")\nresult = model.predict(image)\ndetections = sv.Detections.from_yolo_nas(result)\n
from super_gradients.training import models model = models.get(\"yolo_nas_l\", pretrained_weights=\"coco\") result = model.predict(image) detections = sv.Detections.from_yolo_nas(result) In\u00a0[\u00a0]: Copied!
\"detections\", len(detections)\n
\"detections\", len(detections) Out[\u00a0]:
('detections', 7)
In\u00a0[\u00a0]: Copied!
!pip install -q ultralytics\n
!pip install -q ultralytics
     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 0.0/599.6 kB ? eta -:--:--\r     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2578 593.9/599.6 kB 19.5 MB/s eta 0:00:01\r     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501 599.6/599.6 kB 14.3 MB/s eta 0:00:00\n
In\u00a0[\u00a0]: Copied!
from ultralytics import YOLO\n\nmodel = YOLO(\"yolov8s.pt\")\nresult = model(image, verbose=False)[0]\ndetections = sv.Detections.from_ultralytics(result)\n
from ultralytics import YOLO model = YOLO(\"yolov8s.pt\") result = model(image, verbose=False)[0] detections = sv.Detections.from_ultralytics(result) In\u00a0[\u00a0]: Copied!
\"detections\", len(detections)\n
\"detections\", len(detections) Out[\u00a0]:
('detections', 4)
In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\n\nannotated_image = box_annotator.annotate(image.copy(), detections=detections)\n\nsv.plot_image(image=annotated_image, size=(8, 8))\n
box_annotator = sv.BoxAnnotator() annotated_image = box_annotator.annotate(image.copy(), detections=detections) sv.plot_image(image=annotated_image, size=(8, 8))

NOTE: By default sv.BoxAnnotator use corresponding class_id as label, however, the labels can have arbitrary format.

In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\n\nlabels = [\n    f\"{model.model.names[class_id]} {confidence:.2f}\"\n    for class_id, confidence in zip(detections.class_id, detections.confidence)\n]\nannotated_image = box_annotator.annotate(\n    image.copy(), detections=detections, labels=labels\n)\n\nsv.plot_image(image=annotated_image, size=(8, 8))\n
box_annotator = sv.BoxAnnotator() labels = [ f\"{model.model.names[class_id]} {confidence:.2f}\" for class_id, confidence in zip(detections.class_id, detections.confidence) ] annotated_image = box_annotator.annotate( image.copy(), detections=detections, labels=labels ) sv.plot_image(image=annotated_image, size=(8, 8)) In\u00a0[\u00a0]: Copied!
mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)\n\nannotated_image = mask_annotator.annotate(image.copy(), detections=detections)\n\nsv.plot_image(image=annotated_image, size=(8, 8))\n
mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX) annotated_image = mask_annotator.annotate(image.copy(), detections=detections) sv.plot_image(image=annotated_image, size=(8, 8)) In\u00a0[\u00a0]: Copied!
detections_index = detections[0]\ndetections_index_list = detections[[0, 1, 3]]\ndetections_index_slice = detections[:2]\n
detections_index = detections[0] detections_index_list = detections[[0, 1, 3]] detections_index_slice = detections[:2] In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\n\nimages = [\n    box_annotator.annotate(image.copy(), detections=d)\n    for d in [detections_index, detections_index_list, detections_index_slice]\n]\ntitles = [\n    \"by index - detections[0]\",\n    \"by index list - detections[[0, 1, 3]]\",\n    \"by index slice - detections[:2]\",\n]\n\nsv.plot_images_grid(images=images, titles=titles, grid_size=(1, 3))\n
box_annotator = sv.BoxAnnotator() images = [ box_annotator.annotate(image.copy(), detections=d) for d in [detections_index, detections_index_list, detections_index_slice] ] titles = [ \"by index - detections[0]\", \"by index list - detections[[0, 1, 3]]\", \"by index slice - detections[:2]\", ] sv.plot_images_grid(images=images, titles=titles, grid_size=(1, 3)) In\u00a0[\u00a0]: Copied!
detections_filtered = detections[detections.class_id == 0]\n
detections_filtered = detections[detections.class_id == 0] In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\nannotated_image = box_annotator.annotate(image.copy(), detections=detections_filtered)\nsv.plot_image(image=annotated_image, size=(8, 8))\n
box_annotator = sv.BoxAnnotator() annotated_image = box_annotator.annotate(image.copy(), detections=detections_filtered) sv.plot_image(image=annotated_image, size=(8, 8)) In\u00a0[\u00a0]: Copied!
detections_filtered = detections[detections.confidence > 0.75]\n
detections_filtered = detections[detections.confidence > 0.75] In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\nlabels = [\n    f\"{model.model.names[class_id]} {confidence:.2f}\"\n    for class_id, confidence in zip(\n        detections_filtered.class_id, detections_filtered.confidence\n    )\n]\nannotated_image = box_annotator.annotate(\n    image.copy(), detections=detections_filtered, labels=labels\n)\nsv.plot_image(image=annotated_image, size=(8, 8))\n
box_annotator = sv.BoxAnnotator() labels = [ f\"{model.model.names[class_id]} {confidence:.2f}\" for class_id, confidence in zip( detections_filtered.class_id, detections_filtered.confidence ) ] annotated_image = box_annotator.annotate( image.copy(), detections=detections_filtered, labels=labels ) sv.plot_image(image=annotated_image, size=(8, 8)) In\u00a0[\u00a0]: Copied!
detections_filtered = detections[\n    (detections.class_id != 0) & (detections.confidence > 0.75)\n]\n
detections_filtered = detections[ (detections.class_id != 0) & (detections.confidence > 0.75) ] In\u00a0[\u00a0]: Copied!
box_annotator = sv.BoxAnnotator()\nlabels = [\n    f\"{class_id} {confidence:.2f}\"\n    for class_id, confidence in zip(\n        detections_filtered.class_id, detections_filtered.confidence\n    )\n]\nannotated_image = box_annotator.annotate(\n    image.copy(), detections=detections_filtered, labels=labels\n)\nsv.plot_image(image=annotated_image, size=(8, 8))\n
box_annotator = sv.BoxAnnotator() labels = [ f\"{class_id} {confidence:.2f}\" for class_id, confidence in zip( detections_filtered.class_id, detections_filtered.confidence ) ] annotated_image = box_annotator.annotate( image.copy(), detections=detections_filtered, labels=labels ) sv.plot_image(image=annotated_image, size=(8, 8))

NOTE: During our demo, we will need some example videos.

In\u00a0[\u00a0]: Copied!
!pip install -q supervision[assets]\n
!pip install -q supervision[assets] In\u00a0[\u00a0]: Copied!
!mkdir {HOME}/videos\n
!mkdir {HOME}/videos

NOTE: Feel free to use your videos. Just make sure to put them into videos directory that we just created. \u261d\ufe0f

In\u00a0[\u00a0]: Copied!
%cd {HOME}/videos\n
%cd {HOME}/videos In\u00a0[\u00a0]: Copied!
from supervision.assets import download_assets, VideoAssets\n\ndownload_assets(VideoAssets.VEHICLES)\nVIDEO_PATH = f\"{HOME}/videos/vehicle.mp4\"\n
from supervision.assets import download_assets, VideoAssets download_assets(VideoAssets.VEHICLES) VIDEO_PATH = f\"{HOME}/videos/vehicle.mp4\" In\u00a0[\u00a0]: Copied!
sv.VideoInfo.from_video_path(video_path=VIDEO_PATH)\n
sv.VideoInfo.from_video_path(video_path=VIDEO_PATH) Out[\u00a0]:
VideoInfo(width=3840, height=2160, fps=25, total_frames=538)
In\u00a0[\u00a0]: Copied!
frame_generator = sv.get_video_frames_generator(source_path=VIDEO_PATH)\n
frame_generator = sv.get_video_frames_generator(source_path=VIDEO_PATH) In\u00a0[\u00a0]: Copied!
frame = next(iter(frame_generator))\nsv.plot_image(image=frame, size=(8, 8))\n
frame = next(iter(frame_generator)) sv.plot_image(image=frame, size=(8, 8)) In\u00a0[\u00a0]: Copied!
RESULT_VIDEO_PATH = f\"{HOME}/videos/vehicle-counting-result.mp4\"\n
RESULT_VIDEO_PATH = f\"{HOME}/videos/vehicle-counting-result.mp4\"

NOTE: Note that this time we have given a custom value for the stride parameter equal to 2. As a result, get_video_frames_generator will return us every second video frame.

In\u00a0[\u00a0]: Copied!
video_info = sv.VideoInfo.from_video_path(video_path=VIDEO_PATH)\n\nwith sv.VideoSink(target_path=RESULT_VIDEO_PATH, video_info=video_info) as sink:\n    for frame in sv.get_video_frames_generator(source_path=VIDEO_PATH, stride=2):\n        sink.write_frame(frame=frame)\n
video_info = sv.VideoInfo.from_video_path(video_path=VIDEO_PATH) with sv.VideoSink(target_path=RESULT_VIDEO_PATH, video_info=video_info) as sink: for frame in sv.get_video_frames_generator(source_path=VIDEO_PATH, stride=2): sink.write_frame(frame=frame)

NOTE: If we once again use VideoInfo we will notice that the final video has 2 times fewer frames.

In\u00a0[\u00a0]: Copied!
sv.VideoInfo.from_video_path(video_path=RESULT_VIDEO_PATH)\n
sv.VideoInfo.from_video_path(video_path=RESULT_VIDEO_PATH) Out[\u00a0]:
VideoInfo(width=3840, height=2160, fps=25, total_frames=269)
In\u00a0[\u00a0]: Copied!
!pip install -q roboflow\n
!pip install -q roboflow In\u00a0[\u00a0]: Copied!
!mkdir {HOME}/datasets\n%cd {HOME}/datasets\n\nimport roboflow\nfrom roboflow import Roboflow\n\nroboflow.login()\n\nrf = Roboflow()\n\nproject = rf.workspace(\"roboflow-jvuqo\").project(\"fashion-assistant-segmentation\")\ndataset = project.version(5).download(\"yolov8\")\n
!mkdir {HOME}/datasets %cd {HOME}/datasets import roboflow from roboflow import Roboflow roboflow.login() rf = Roboflow() project = rf.workspace(\"roboflow-jvuqo\").project(\"fashion-assistant-segmentation\") dataset = project.version(5).download(\"yolov8\")
/content/datasets\n\rvisit https://app.roboflow.com/auth-cli to get your authentication token.\nPaste the authentication token here: \u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\nloading Roboflow workspace...\nloading Roboflow project...\nDependency ultralytics<=8.0.20 is required but found version=8.0.117, to fix: `pip install ultralytics<=8.0.20`\nDownloading Dataset Version Zip in fashion-assistant-segmentation-5 to yolov8: 100% [125448709 / 125448709] bytes\n
Extracting Dataset Version Zip to fashion-assistant-segmentation-5 in yolov8:: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1254/1254 [00:00<00:00, 3283.33it/s]\n
In\u00a0[\u00a0]: Copied!
ds = sv.DetectionDataset.from_yolo(\n    images_directory_path=f\"{dataset.location}/train/images\",\n    annotations_directory_path=f\"{dataset.location}/train/labels\",\n    data_yaml_path=f\"{dataset.location}/data.yaml\",\n)\n
ds = sv.DetectionDataset.from_yolo( images_directory_path=f\"{dataset.location}/train/images\", annotations_directory_path=f\"{dataset.location}/train/labels\", data_yaml_path=f\"{dataset.location}/data.yaml\", ) In\u00a0[\u00a0]: Copied!
len(ds)\n
len(ds) Out[\u00a0]:
573
In\u00a0[\u00a0]: Copied!
ds.classes\n
ds.classes Out[\u00a0]:
['baseball cap',\n 'hoodie',\n 'jacket',\n 'pants',\n 'shirt',\n 'shorts',\n 'sneaker',\n 'sunglasses',\n 'sweatshirt',\n 'tshirt']
In\u00a0[\u00a0]: Copied!
IMAGE_NAME = list(ds.images.keys())[0]\n\nimage = ds.images[IMAGE_NAME]\nannotations = ds.annotations[IMAGE_NAME]\n\nbox_annotator = sv.BoxAnnotator()\nmask_annotator = sv.MaskAnnotator()\n\nlabels = [f\"{ds.classes[class_id]}\" for class_id in annotations.class_id]\n\nannotated_image = mask_annotator.annotate(image.copy(), detections=annotations)\nannotated_image = box_annotator.annotate(\n    annotated_image, detections=annotations, labels=labels\n)\n\nsv.plot_image(image=annotated_image, size=(8, 8))\n
IMAGE_NAME = list(ds.images.keys())[0] image = ds.images[IMAGE_NAME] annotations = ds.annotations[IMAGE_NAME] box_annotator = sv.BoxAnnotator() mask_annotator = sv.MaskAnnotator() labels = [f\"{ds.classes[class_id]}\" for class_id in annotations.class_id] annotated_image = mask_annotator.annotate(image.copy(), detections=annotations) annotated_image = box_annotator.annotate( annotated_image, detections=annotations, labels=labels ) sv.plot_image(image=annotated_image, size=(8, 8)) In\u00a0[\u00a0]: Copied!
ds_train, ds_test = ds.split(split_ratio=0.8)\n
ds_train, ds_test = ds.split(split_ratio=0.8) In\u00a0[\u00a0]: Copied!
\"ds_train\", len(ds_train), \"ds_test\", len(ds_test)\n
\"ds_train\", len(ds_train), \"ds_test\", len(ds_test) Out[\u00a0]:
('ds_train', 458, 'ds_test', 115)
In\u00a0[\u00a0]: Copied!
ds_train.as_pascal_voc(\n    images_directory_path=f\"{HOME}/datasets/result/images\",\n    annotations_directory_path=f\"{HOME}/datasets/result/labels\",\n)\n
ds_train.as_pascal_voc( images_directory_path=f\"{HOME}/datasets/result/images\", annotations_directory_path=f\"{HOME}/datasets/result/labels\", )"},{"location":"notebooks/quickstart/#supervision-quickstart","title":"Supervision Quickstart\u00b6","text":"

We write your reusable computer vision tools. Whether you need to load your dataset from your hard drive, draw detections on an image or video, or count how many detections are in a zone. You can count on us! \ud83e\udd1d

We hope that the resources in this notebook will help you get the most out of Supervision. Please browse the Supervision Docs for details, raise an issue on GitHub for support, and join our discussions section for questions!

"},{"location":"notebooks/quickstart/#table-of-contents","title":"Table of contents\u00b6","text":"
  • Before you start
  • Install
  • Detection API
    • Plug in your model
      • YOLO-NAS
      • YOLOv8
    • Annotate
      • BoxAnnotator
      • MaskAnnotator
    • Filter
      • By index, index list and index slice
      • By class_id
      • By confidence
      • By advanced logical condition
  • Video API
    • VideoInfo
    • get_video_frames_generator
    • VideoSink
  • Dataset API
    • DetectionDataset.from_yolo
    • Visualize annotations
    • split
    • DetectionDataset.as_pascal_voc
"},{"location":"notebooks/quickstart/#before-you-start","title":"\u26a1 Before you start\u00b6","text":"

NOTE: In this notebook, we aim to show - among other things - how simple it is to integrate supervision with popular object detection and instance segmentation libraries and frameworks. GPU access is optional but will certainly make the ride smoother.

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/quickstart/#install","title":"\u200d\ud83d\udcbb Install\u00b6","text":""},{"location":"notebooks/quickstart/#detection-api","title":"\ud83d\udc41\ufe0f Detection API\u00b6","text":"
  • xyxy (np.ndarray): An array of shape (n, 4) containing the bounding boxes coordinates in format [x1, y1, x2, y2]
  • mask: (Optional[np.ndarray]): An array of shape (n, W, H) containing the segmentation masks.
  • confidence (Optional[np.ndarray]): An array of shape (n,) containing the confidence scores of the detections.
  • class_id (Optional[np.ndarray]): An array of shape (n,) containing the class ids of the detections.
  • tracker_id (Optional[np.ndarray]): An array of shape (n,) containing the tracker ids of the detections.
"},{"location":"notebooks/quickstart/#plug-in-your-model","title":"\ud83d\udd0c Plug in your model\u00b6","text":"

NOTE: In our example, we will focus only on integration with YOLO-NAS and YOLOv8. However, keep in mind that supervision allows seamless integration with many other models like SAM, Transformers, and YOLOv5. You can learn more from our documentation.

"},{"location":"notebooks/quickstart/#yolo-nas","title":"YOLO-NAS \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#ultralytics","title":"Ultralytics \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#annotate","title":"\ud83d\udc69\u200d\ud83c\udfa8 Annotate\u00b6","text":""},{"location":"notebooks/quickstart/#boxannotator","title":"BoxAnnotator \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#maskannotator","title":"MaskAnnotator \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#filter","title":"\ud83d\uddd1 Filter \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#by-index-index-list-and-index-slice","title":"By index, index list and index slice\u00b6","text":"

NOTE: sv.Detections filter API allows you to access detections by index, index list or index slice

"},{"location":"notebooks/quickstart/#by-class_id","title":"By class_id\u00b6","text":"

NOTE: Let's use sv.Detections filter API to display only objects with class_id == 0

"},{"location":"notebooks/quickstart/#by-confidence","title":"By confidence\u00b6","text":"

NOTE: Let's use sv.Detections filter API to display only objects with confidence > 0.75

"},{"location":"notebooks/quickstart/#by-advanced-logical-condition","title":"By advanced logical condition\u00b6","text":"

NOTE: Let's use sv.Detections filter API allows you to build advanced logical conditions. Let's select only detections with class_id != 0 and confidence > 0.75.

"},{"location":"notebooks/quickstart/#video-api","title":"\ud83c\udfac Video API\u00b6","text":"

NOTE: supervision offers a lot of utils to make working with videos easier. Let's take a look at some of them.

"},{"location":"notebooks/quickstart/#videoinfo","title":"VideoInfo \ud83d\udcda\u00b6","text":"

NOTE: VideoInfo allows us to easily retrieve information about video files, such as resolution, FPS and total number of frames.

"},{"location":"notebooks/quickstart/#get_video_frames_generator","title":"get_video_frames_generator \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#videosink","title":"VideoSink \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#dataset-api","title":"\ud83d\uddbc\ufe0f Dataset API\u00b6","text":"

NOTE: In order to demonstrate the capabilities of the Dataset API, we need a dataset. Let's download one from Roboflow Universe. To do this we first need to install the roboflow pip package.

"},{"location":"notebooks/quickstart/#detectiondatasetfrom_yolo","title":"DetectionDataset.from_yolo \ud83d\udcda\u00b6","text":"

NOTE: Currently Dataset API always loads loads images from hard drive. In the future, we plan to add lazy loading.

"},{"location":"notebooks/quickstart/#visualize-annotations","title":"\ud83c\udff7\ufe0f Visualize annotations\u00b6","text":""},{"location":"notebooks/quickstart/#split","title":"split \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#detectiondatasetas_pascal_voc","title":"DetectionDataset.as_pascal_voc \ud83d\udcda\u00b6","text":""},{"location":"notebooks/quickstart/#congratulations","title":"\ud83c\udfc6 Congratulations\u00b6","text":""},{"location":"notebooks/quickstart/#learning-resources","title":"Learning Resources\u00b6","text":"
  • Documentation
  • GitHub
  • YouTube Supervision Playlist
"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/","title":"Zero-Shot Object Detection with YOLO-World","text":"In\u00a0[1]: Copied!
!nvidia-smi\n
!nvidia-smi
Fri Feb 16 12:46:14 2024       \n+---------------------------------------------------------------------------------------+\n| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n|-----------------------------------------+----------------------+----------------------+\n| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n|                                         |                      |               MIG M. |\n|=========================================+======================+======================|\n|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |\n| N/A   65C    P8              13W /  70W |      0MiB / 15360MiB |      0%      Default |\n|                                         |                      |                  N/A |\n+-----------------------------------------+----------------------+----------------------+\n                                                                                         \n+---------------------------------------------------------------------------------------+\n| Processes:                                                                            |\n|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n|        ID   ID                                                             Usage      |\n|=======================================================================================|\n|  No running processes found                                                           |\n+---------------------------------------------------------------------------------------+\n

NOTE: To make it easier for us to manage datasets, images and models we create a HOME constant.

In\u00a0[2]: Copied!
import os\nHOME = os.getcwd()\nprint(HOME)\n
import os HOME = os.getcwd() print(HOME)
/content\n
In\u00a0[\u00a0]: Copied!
!pip install -q inference-gpu[yolo-world]==0.9.12rc1\n
!pip install -q inference-gpu[yolo-world]==0.9.12rc1 In\u00a0[\u00a0]: Copied!
!pip install -q supervision==0.19.0rc3\n
!pip install -q supervision==0.19.0rc3 In\u00a0[\u00a0]: Copied!
import cv2\nimport supervision as sv\n\nfrom tqdm import tqdm\nfrom inference.models.yolo_world.yolo_world import YOLOWorld\n
import cv2 import supervision as sv from tqdm import tqdm from inference.models.yolo_world.yolo_world import YOLOWorld In\u00a0[6]: Copied!
!wget -P {HOME} -q https://media.roboflow.com/notebooks/examples/dog.jpeg\n!wget -P {HOME} -q https://media.roboflow.com/supervision/cookbooks/yellow-filling.mp4\n
!wget -P {HOME} -q https://media.roboflow.com/notebooks/examples/dog.jpeg !wget -P {HOME} -q https://media.roboflow.com/supervision/cookbooks/yellow-filling.mp4 In\u00a0[7]: Copied!
SOURCE_IMAGE_PATH = f\"{HOME}/dog.jpeg\"\nSOURCE_VIDEO_PATH = f\"{HOME}/yellow-filling.mp4\"\n
SOURCE_IMAGE_PATH = f\"{HOME}/dog.jpeg\" SOURCE_VIDEO_PATH = f\"{HOME}/yellow-filling.mp4\"

NOTE: If you want to run the cookbook using your own file as input, simply upload video to Google Colab and replace SOURCE_IMAGE_PATH and SOURCE_VIDEO_PATH with the path to your file.

In\u00a0[8]: Copied!
model = YOLOWorld(model_id=\"yolo_world/l\")\n
model = YOLOWorld(model_id=\"yolo_world/l\")

YOLO-World is a zero-shot model, enabling object detection without any training. You only need to define a prompt as a list of classes (things) you are searching for.

In\u00a0[9]: Copied!
classes = [\"person\", \"backpack\", \"dog\", \"eye\", \"nose\", \"ear\", \"tongue\"]\nmodel.set_classes(classes)\n
classes = [\"person\", \"backpack\", \"dog\", \"eye\", \"nose\", \"ear\", \"tongue\"] model.set_classes(classes)
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 338M/338M [00:03<00:00, 106MiB/s]\n

We perform detection on our sample image. Then, we convert the result into a sv.Detections object, which will be useful in the later parts of the cookbook.

In\u00a0[10]: Copied!
image = cv2.imread(SOURCE_IMAGE_PATH)\nresults = model.infer(image)\ndetections = sv.Detections.from_inference(results)\n
image = cv2.imread(SOURCE_IMAGE_PATH) results = model.infer(image) detections = sv.Detections.from_inference(results)

The results we've obtained can be easily visualized with sv.BoundingBoxAnnotator and sv.LabelAnnotator. We can adjust parameters such as line thickness, text scale, line and text color allowing for a highly tailored visualization experience.

In\u00a0[11]: Copied!
BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=2)\nLABEL_ANNOTATOR = sv.LabelAnnotator(text_thickness=2, text_scale=1, text_color=sv.Color.BLACK)\n
BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=2) LABEL_ANNOTATOR = sv.LabelAnnotator(text_thickness=2, text_scale=1, text_color=sv.Color.BLACK) In\u00a0[12]: Copied!
annotated_image = image.copy()\nannotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)\nannotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections)\nsv.plot_image(annotated_image, (10, 10))\n
annotated_image = image.copy() annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections) annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections) sv.plot_image(annotated_image, (10, 10))

Note that many classes from our prompt were not detected. This is because the default confidence threshold in Inference is set to 0.5. Let's try significantly lowering this value. We've observed that the confidence returned by YOLO-World is significantly lower when querying for classes outside the COCO dataset.

In\u00a0[13]: Copied!
image = cv2.imread(SOURCE_IMAGE_PATH)\nresults = model.infer(image, confidence=0.003)\ndetections = sv.Detections.from_inference(results)\n
image = cv2.imread(SOURCE_IMAGE_PATH) results = model.infer(image, confidence=0.003) detections = sv.Detections.from_inference(results)

By default, sv.LabelAnnotator displays only the names of objects. To also view the confidence levels associated with each detection, we must define custom labels and pass them to sv.LabelAnnotator.

In\u00a0[14]: Copied!
labels = [\n    f\"{classes[class_id]} {confidence:0.3f}\"\n    for class_id, confidence\n    in zip(detections.class_id, detections.confidence)\n]\n\nannotated_image = image.copy()\nannotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)\nannotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections, labels=labels)\nsv.plot_image(annotated_image, (10, 10))\n
labels = [ f\"{classes[class_id]} {confidence:0.3f}\" for class_id, confidence in zip(detections.class_id, detections.confidence) ] annotated_image = image.copy() annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections) annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections, labels=labels) sv.plot_image(annotated_image, (10, 10)) In\u00a0[15]: Copied!
image = cv2.imread(SOURCE_IMAGE_PATH)\nresults = model.infer(image, confidence=0.003)\ndetections = sv.Detections.from_inference(results).with_nms(threshold=0.1)\n
image = cv2.imread(SOURCE_IMAGE_PATH) results = model.infer(image, confidence=0.003) detections = sv.Detections.from_inference(results).with_nms(threshold=0.1) In\u00a0[16]: Copied!
labels = [\n    f\"{classes[class_id]} {confidence:0.3f}\"\n    for class_id, confidence\n    in zip(detections.class_id, detections.confidence)\n]\n\nannotated_image = image.copy()\nannotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)\nannotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections, labels=labels)\nsv.plot_image(annotated_image, (10, 10))\n
labels = [ f\"{classes[class_id]} {confidence:0.3f}\" for class_id, confidence in zip(detections.class_id, detections.confidence) ] annotated_image = image.copy() annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections) annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections, labels=labels) sv.plot_image(annotated_image, (10, 10))

The get_video_frames_generator enables us to easily iterate over video frames. Let's create a video generator for our sample input file and display its first frame on the screen.

In\u00a0[17]: Copied!
generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH)\nframe = next(generator)\n\nsv.plot_image(frame, (10, 10))\n
generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH) frame = next(generator) sv.plot_image(frame, (10, 10))

Let's update our list of classes. This time we are looking for yellow filling. The rest of the code performing detection, filtering and visualization remains unchanged.

In\u00a0[23]: Copied!
classes = [\"yellow filling\"]\nmodel.set_classes(classes)\n
classes = [\"yellow filling\"] model.set_classes(classes) In\u00a0[38]: Copied!
results = model.infer(frame, confidence=0.002)\ndetections = sv.Detections.from_inference(results).with_nms(threshold=0.1)\n
results = model.infer(frame, confidence=0.002) detections = sv.Detections.from_inference(results).with_nms(threshold=0.1) In\u00a0[39]: Copied!
annotated_image = frame.copy()\nannotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)\nannotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections)\nsv.plot_image(annotated_image, (10, 10))\n
annotated_image = frame.copy() annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections) annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections) sv.plot_image(annotated_image, (10, 10))

Our prompt allowed us to locate all filled holes, but we also accidentally marked the entire high-level element. To address this issue, we'll filter detections based on their relative area in relation to the entire video frame. If a detection occupies more than 10% of the frame's total area, it will be discarded.

We can use VideoInfo.from_video_path to learn basic information about our video, such as duration, resolution, or FPS.

In\u00a0[40]: Copied!
video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)\nvideo_info\n
video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH) video_info Out[40]:
VideoInfo(width=1280, height=720, fps=25, total_frames=442)

Knowing the frame's resolution allows us to easily calculate its total area, expressed in pixels.

In\u00a0[41]: Copied!
width, height = video_info.resolution_wh\nframe_area = width * height\nframe_area\n
width, height = video_info.resolution_wh frame_area = width * height frame_area Out[41]:
921600

On the other hand, by using sv.Detections.area property, we can learn the area of each individual bounding box.

In\u00a0[45]: Copied!
results = model.infer(frame, confidence=0.002)\ndetections = sv.Detections.from_inference(results).with_nms(threshold=0.1)\ndetections.area\n
results = model.infer(frame, confidence=0.002) detections = sv.Detections.from_inference(results).with_nms(threshold=0.1) detections.area Out[45]:
array([ 7.5408e+05,       92844,       11255,       12969,      9875.9,      8007.7,      5433.5])

Now, we can combine these two pieces of information to construct a filtering condition for detections with an area greater than 10% of the entire frame.

In\u00a0[46]: Copied!
(detections.area / frame_area) < 0.10\n
(detections.area / frame_area) < 0.10 Out[46]:
array([False, False,  True,  True,  True,  True,  True])
In\u00a0[47]: Copied!
detections = detections[(detections.area / frame_area) < 0.10]\n\nannotated_image = frame.copy()\nannotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections)\nannotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections)\nsv.plot_image(annotated_image, (10, 10))\n
detections = detections[(detections.area / frame_area) < 0.10] annotated_image = frame.copy() annotated_image = BOUNDING_BOX_ANNOTATOR.annotate(annotated_image, detections) annotated_image = LABEL_ANNOTATOR.annotate(annotated_image, detections) sv.plot_image(annotated_image, (10, 10))

Finally, we are ready to process our entire video. Now in truth we can appreciate the speed of YOLO-World.

In\u00a0[49]: Copied!
TARGET_VIDEO_PATH = f\"{HOME}/yellow-filling-output.mp4\"\n
TARGET_VIDEO_PATH = f\"{HOME}/yellow-filling-output.mp4\" In\u00a0[50]: Copied!
frame_generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH)\nvideo_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)\n\nwidth, height = video_info.resolution_wh\nframe_area = width * height\nframe_area\n\nwith sv.VideoSink(target_path=TARGET_VIDEO_PATH, video_info=video_info) as sink:\n    for frame in tqdm(frame_generator, total=video_info.total_frames):\n        results = model.infer(frame, confidence=0.002)\n        detections = sv.Detections.from_inference(results).with_nms(threshold=0.1)\n        detections = detections[(detections.area / frame_area) < 0.10]\n\n        annotated_frame = frame.copy()\n        annotated_frame = BOUNDING_BOX_ANNOTATOR.annotate(annotated_frame, detections)\n        annotated_frame = LABEL_ANNOTATOR.annotate(annotated_frame, detections)\n        sink.write_frame(annotated_frame)\n
frame_generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH) video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH) width, height = video_info.resolution_wh frame_area = width * height frame_area with sv.VideoSink(target_path=TARGET_VIDEO_PATH, video_info=video_info) as sink: for frame in tqdm(frame_generator, total=video_info.total_frames): results = model.infer(frame, confidence=0.002) detections = sv.Detections.from_inference(results).with_nms(threshold=0.1) detections = detections[(detections.area / frame_area) < 0.10] annotated_frame = frame.copy() annotated_frame = BOUNDING_BOX_ANNOTATOR.annotate(annotated_frame, detections) annotated_frame = LABEL_ANNOTATOR.annotate(annotated_frame, detections) sink.write_frame(annotated_frame)
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 442/442 [00:31<00:00, 13.90it/s]\n

Keep in mind that the video preview below works only in the web version of the cookbooks and not in Google Colab.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#zero-shot-object-detection-with-yolo-world","title":"Zero-Shot Object Detection with YOLO-World\u00b6","text":"

Click the Open in Colab button to run the cookbook on Google Colab.

YOLO-World was designed to solve a limitation of existing zero-shot object detection models: speed. Whereas other state-of-the-art models use Transformers, a powerful but typically slower architecture, YOLO-World uses the faster CNN-based YOLO architecture.

According to the paper YOLO-World reached between 35.4 AP with 52.0 FPS for the large version and 26.2 AP with 74.1 FPS for the small version. While the V100 is a powerful GPU, achieving such high FPS on any device is impressive.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#before-you-start","title":"Before you start\u00b6","text":"

Let's make sure that we have access to GPU. We can use nvidia-smi command to do that. In case of any problems navigate to Edit -> Notebook settings -> Hardware accelerator, set it to GPU, and then click Save.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#install-required-packages","title":"Install required packages\u00b6","text":"

In this guide, we utilize two Python packages: inference, for executing zero-shot object detection using YOLO-World, and supervision, for post-processing and visualizing the detected objects.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#imports","title":"Imports\u00b6","text":""},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#download-example-data","title":"Download example data\u00b6","text":""},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#run-object-detection","title":"Run Object Detection\u00b6","text":"

The Inference package provides the YOLO-World model in three versions: S, M, and L. You can load them by defining model_id as yolo_world/s, yolo_world/m, and yolo_world/l, respectively. The ROBOFLOW_API_KEY is not required to utilize this model.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#adjusting-confidence-level","title":"Adjusting Confidence Level\u00b6","text":""},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#using-non-max-suppression-nms-to-eliminate-double-detections","title":"Using Non-Max Suppression (NMS) to Eliminate Double Detections\u00b6","text":"

To eliminate duplicates, we will use Non-Max Suppression (NMS). NMS evaluates the extent to which detections overlap using the Intersection over Union metric and, upon exceeding a defined threshold, treats them as duplicates. Duplicates are then discarded, starting with those of the lowest confidence. The value should be within the range [0, 1]. The smaller the value, the more restrictive the NMS.

"},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#video-processing","title":"Video Processing\u00b6","text":""},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#filtering-detectuions-by-area","title":"Filtering Detectuions by Area\u00b6","text":""},{"location":"notebooks/zero-shot-object-detection-with-yolo-world/#final-result","title":"Final Result\u00b6","text":""},{"location":"utils/draw/","title":"Draw Utils","text":"draw_line

Draws a line on a given scene.

Parameters:

Name Type Description Default scene ndarray

The scene on which the line will be drawn

required start Point

The starting point of the line

required end Point

The end point of the line

required color Color

The color of the line

required thickness int

The thickness of the line

2

Returns:

Type Description ndarray

np.ndarray: The scene with the line drawn on it

Source code in supervision/draw/utils.py
def draw_line(\n    scene: np.ndarray, start: Point, end: Point, color: Color, thickness: int = 2\n) -> np.ndarray:\n    \"\"\"\n    Draws a line on a given scene.\n\n    Parameters:\n        scene (np.ndarray): The scene on which the line will be drawn\n        start (Point): The starting point of the line\n        end (Point): The end point of the line\n        color (Color): The color of the line\n        thickness (int): The thickness of the line\n\n    Returns:\n        np.ndarray: The scene with the line drawn on it\n    \"\"\"\n    cv2.line(\n        scene,\n        start.as_xy_int_tuple(),\n        end.as_xy_int_tuple(),\n        color.as_bgr(),\n        thickness=thickness,\n    )\n    return scene\n
draw_rectangle

Draws a rectangle on an image.

Parameters:

Name Type Description Default scene ndarray

The scene on which the rectangle will be drawn

required rect Rect

The rectangle to be drawn

required color Color

The color of the rectangle

required thickness int

The thickness of the rectangle border

2

Returns:

Type Description ndarray

np.ndarray: The scene with the rectangle drawn on it

Source code in supervision/draw/utils.py
def draw_rectangle(\n    scene: np.ndarray, rect: Rect, color: Color, thickness: int = 2\n) -> np.ndarray:\n    \"\"\"\n    Draws a rectangle on an image.\n\n    Parameters:\n        scene (np.ndarray): The scene on which the rectangle will be drawn\n        rect (Rect): The rectangle to be drawn\n        color (Color): The color of the rectangle\n        thickness (int): The thickness of the rectangle border\n\n    Returns:\n        np.ndarray: The scene with the rectangle drawn on it\n    \"\"\"\n    cv2.rectangle(\n        scene,\n        rect.top_left.as_xy_int_tuple(),\n        rect.bottom_right.as_xy_int_tuple(),\n        color.as_bgr(),\n        thickness=thickness,\n    )\n    return scene\n
draw_filled_rectangle

Draws a filled rectangle on an image.

Parameters:

Name Type Description Default scene ndarray

The scene on which the rectangle will be drawn

required rect Rect

The rectangle to be drawn

required color Color

The color of the rectangle

required

Returns:

Type Description ndarray

np.ndarray: The scene with the rectangle drawn on it

Source code in supervision/draw/utils.py
def draw_filled_rectangle(scene: np.ndarray, rect: Rect, color: Color) -> np.ndarray:\n    \"\"\"\n    Draws a filled rectangle on an image.\n\n    Parameters:\n        scene (np.ndarray): The scene on which the rectangle will be drawn\n        rect (Rect): The rectangle to be drawn\n        color (Color): The color of the rectangle\n\n    Returns:\n        np.ndarray: The scene with the rectangle drawn on it\n    \"\"\"\n    cv2.rectangle(\n        scene,\n        rect.top_left.as_xy_int_tuple(),\n        rect.bottom_right.as_xy_int_tuple(),\n        color.as_bgr(),\n        -1,\n    )\n    return scene\n
draw_polygon

Draw a polygon on a scene.

Parameters:

Name Type Description Default scene ndarray

The scene to draw the polygon on.

required polygon ndarray

The polygon to be drawn, given as a list of vertices.

required color Color

The color of the polygon.

required thickness int

The thickness of the polygon lines, by default 2.

2

Returns:

Type Description ndarray

np.ndarray: The scene with the polygon drawn on it.

Source code in supervision/draw/utils.py
def draw_polygon(\n    scene: np.ndarray, polygon: np.ndarray, color: Color, thickness: int = 2\n) -> np.ndarray:\n    \"\"\"Draw a polygon on a scene.\n\n    Parameters:\n        scene (np.ndarray): The scene to draw the polygon on.\n        polygon (np.ndarray): The polygon to be drawn, given as a list of vertices.\n        color (Color): The color of the polygon.\n        thickness (int, optional): The thickness of the polygon lines, by default 2.\n\n    Returns:\n        np.ndarray: The scene with the polygon drawn on it.\n    \"\"\"\n    cv2.polylines(\n        scene, [polygon], isClosed=True, color=color.as_bgr(), thickness=thickness\n    )\n    return scene\n
draw_text

Draw text with background on a scene.

Parameters:

Name Type Description Default scene ndarray

A 2-dimensional numpy ndarray representing an image or scene

required text str

The text to be drawn.

required text_anchor Point

The anchor point for the text, represented as a Point object with x and y attributes.

required text_color Color

The color of the text. Defaults to black.

BLACK text_scale float

The scale of the text. Defaults to 0.5.

0.5 text_thickness int

The thickness of the text. Defaults to 1.

1 text_padding int

The amount of padding to add around the text when drawing a rectangle in the background. Defaults to 10.

10 text_font int

The font to use for the text. Defaults to cv2.FONT_HERSHEY_SIMPLEX.

FONT_HERSHEY_SIMPLEX background_color Color

The color of the background rectangle, if one is to be drawn. Defaults to None.

None

Returns:

Type Description ndarray

np.ndarray: The input scene with the text drawn on it.

Examples:

import numpy as np\n\nscene = np.zeros((100, 100, 3), dtype=np.uint8)\ntext_anchor = Point(x=50, y=50)\nscene = draw_text(scene=scene, text=\"Hello, world!\",text_anchor=text_anchor)\n
Source code in supervision/draw/utils.py
def draw_text(\n    scene: np.ndarray,\n    text: str,\n    text_anchor: Point,\n    text_color: Color = Color.BLACK,\n    text_scale: float = 0.5,\n    text_thickness: int = 1,\n    text_padding: int = 10,\n    text_font: int = cv2.FONT_HERSHEY_SIMPLEX,\n    background_color: Optional[Color] = None,\n) -> np.ndarray:\n    \"\"\"\n    Draw text with background on a scene.\n\n    Parameters:\n        scene (np.ndarray): A 2-dimensional numpy ndarray representing an image or scene\n        text (str): The text to be drawn.\n        text_anchor (Point): The anchor point for the text, represented as a\n            Point object with x and y attributes.\n        text_color (Color, optional): The color of the text. Defaults to black.\n        text_scale (float, optional): The scale of the text. Defaults to 0.5.\n        text_thickness (int, optional): The thickness of the text. Defaults to 1.\n        text_padding (int, optional): The amount of padding to add around the text\n            when drawing a rectangle in the background. Defaults to 10.\n        text_font (int, optional): The font to use for the text.\n            Defaults to cv2.FONT_HERSHEY_SIMPLEX.\n        background_color (Color, optional): The color of the background rectangle,\n            if one is to be drawn. Defaults to None.\n\n    Returns:\n        np.ndarray: The input scene with the text drawn on it.\n\n    Examples:\n        ```python\n        import numpy as np\n\n        scene = np.zeros((100, 100, 3), dtype=np.uint8)\n        text_anchor = Point(x=50, y=50)\n        scene = draw_text(scene=scene, text=\"Hello, world!\",text_anchor=text_anchor)\n        ```\n    \"\"\"\n    text_width, text_height = cv2.getTextSize(\n        text=text,\n        fontFace=text_font,\n        fontScale=text_scale,\n        thickness=text_thickness,\n    )[0]\n\n    text_anchor_x, text_anchor_y = text_anchor.as_xy_int_tuple()\n\n    text_rect = Rect(\n        x=text_anchor_x - text_width // 2,\n        y=text_anchor_y - text_height // 2,\n        width=text_width,\n        height=text_height,\n    ).pad(text_padding)\n\n    if background_color is not None:\n        scene = draw_filled_rectangle(\n            scene=scene, rect=text_rect, color=background_color\n        )\n\n    cv2.putText(\n        img=scene,\n        text=text,\n        org=(text_anchor_x - text_width // 2, text_anchor_y + text_height // 2),\n        fontFace=text_font,\n        fontScale=text_scale,\n        color=text_color.as_bgr(),\n        thickness=text_thickness,\n        lineType=cv2.LINE_AA,\n    )\n    return scene\n
draw_image

Draws an image onto a given scene with specified opacity and dimensions.

Parameters:

Name Type Description Default scene ndarray

Background image where the new image will be drawn.

required image Union[str, ndarray]

Image to draw.

required opacity float

Opacity of the image to be drawn.

required rect Rect

Rectangle specifying where to draw the image.

required

Returns:

Type Description ndarray

np.ndarray: The updated scene.

Raises:

Type Description FileNotFoundError

If the image path does not exist.

ValueError

For invalid opacity or rectangle dimensions.

Source code in supervision/draw/utils.py
def draw_image(\n    scene: np.ndarray, image: Union[str, np.ndarray], opacity: float, rect: Rect\n) -> np.ndarray:\n    \"\"\"\n    Draws an image onto a given scene with specified opacity and dimensions.\n\n    Args:\n        scene (np.ndarray): Background image where the new image will be drawn.\n        image (Union[str, np.ndarray]): Image to draw.\n        opacity (float): Opacity of the image to be drawn.\n        rect (Rect): Rectangle specifying where to draw the image.\n\n    Returns:\n        np.ndarray: The updated scene.\n\n    Raises:\n        FileNotFoundError: If the image path does not exist.\n        ValueError: For invalid opacity or rectangle dimensions.\n    \"\"\"\n\n    # Validate and load image\n    if isinstance(image, str):\n        if not os.path.exists(image):\n            raise FileNotFoundError(f\"Image path ('{image}') does not exist.\")\n        image = cv2.imread(image, cv2.IMREAD_UNCHANGED)\n\n    # Validate opacity\n    if not 0.0 <= opacity <= 1.0:\n        raise ValueError(\"Opacity must be between 0.0 and 1.0.\")\n\n    # Validate rectangle dimensions\n    if (\n        rect.x < 0\n        or rect.y < 0\n        or rect.x + rect.width > scene.shape[1]\n        or rect.y + rect.height > scene.shape[0]\n    ):\n        raise ValueError(\"Invalid rectangle dimensions.\")\n\n    # Resize and isolate alpha channel\n    image = cv2.resize(image, (rect.width, rect.height))\n    alpha_channel = (\n        image[:, :, 3]\n        if image.shape[2] == 4\n        else np.ones((rect.height, rect.width), dtype=image.dtype) * 255\n    )\n    alpha_scaled = cv2.convertScaleAbs(alpha_channel * opacity)\n\n    # Perform blending\n    scene_roi = scene[rect.y : rect.y + rect.height, rect.x : rect.x + rect.width]\n    alpha_float = alpha_scaled.astype(np.float32) / 255.0\n    blended_roi = cv2.convertScaleAbs(\n        (1 - alpha_float[..., np.newaxis]) * scene_roi\n        + alpha_float[..., np.newaxis] * image[:, :, :3]\n    )\n\n    # Update the scene\n    scene[rect.y : rect.y + rect.height, rect.x : rect.x + rect.width] = blended_roi\n\n    return scene\n
calculate_optimal_text_scale

Calculate font scale based on the resolution of an image.

Parameters:

Name Type Description Default resolution_wh Tuple[int, int]

A tuple representing the width and height of the image.

required

Returns:

Name Type Description float float

The calculated font scale factor.

Source code in supervision/draw/utils.py
def calculate_optimal_text_scale(resolution_wh: Tuple[int, int]) -> float:\n    \"\"\"\n    Calculate font scale based on the resolution of an image.\n\n    Parameters:\n         resolution_wh (Tuple[int, int]): A tuple representing the width and height\n             of the image.\n\n    Returns:\n         float: The calculated font scale factor.\n    \"\"\"\n    return min(resolution_wh) * 1e-3\n
calculate_optimal_line_thickness

Calculate line thickness based on the resolution of an image.

Parameters:

Name Type Description Default resolution_wh Tuple[int, int]

A tuple representing the width and height of the image.

required

Returns:

Name Type Description int int

The calculated line thickness in pixels.

Source code in supervision/draw/utils.py
def calculate_optimal_line_thickness(resolution_wh: Tuple[int, int]) -> int:\n    \"\"\"\n    Calculate line thickness based on the resolution of an image.\n\n    Parameters:\n        resolution_wh (Tuple[int, int]): A tuple representing the width and height\n            of the image.\n\n    Returns:\n        int: The calculated line thickness in pixels.\n    \"\"\"\n    if min(resolution_wh) < 1080:\n        return 2\n    return 4\n
Color

Represents a color in RGB format.

This class provides methods to work with colors, including creating colors from hex codes, converting colors to hex strings, RGB tuples, and BGR tuples.

Attributes:

Name Type Description r int

Red channel value (0-255).

g int

Green channel value (0-255).

b int

Blue channel value (0-255).

Example
import supervision as sv\n\nsv.Color.WHITE\n# Color(r=255, g=255, b=255)\n
Constant Hex Code RGB WHITE #FFFFFF (255, 255, 255) BLACK #000000 (0, 0, 0) RED #FF0000 (255, 0, 0) GREEN #00FF00 (0, 255, 0) BLUE #0000FF (0, 0, 255) YELLOW #FFFF00 (255, 255, 0) ROBOFLOW #A351FB (163, 81, 251) Source code in supervision/draw/color.py
@dataclass\nclass Color:\n    \"\"\"\n    Represents a color in RGB format.\n\n    This class provides methods to work with colors, including creating colors from hex\n    codes, converting colors to hex strings, RGB tuples, and BGR tuples.\n\n    Attributes:\n        r (int): Red channel value (0-255).\n        g (int): Green channel value (0-255).\n        b (int): Blue channel value (0-255).\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color.WHITE\n        # Color(r=255, g=255, b=255)\n        ```\n\n    | Constant   | Hex Code   | RGB              |\n    |------------|------------|------------------|\n    | `WHITE`    | `#FFFFFF`  | `(255, 255, 255)`|\n    | `BLACK`    | `#000000`  | `(0, 0, 0)`      |\n    | `RED`      | `#FF0000`  | `(255, 0, 0)`    |\n    | `GREEN`    | `#00FF00`  | `(0, 255, 0)`    |\n    | `BLUE`     | `#0000FF`  | `(0, 0, 255)`    |\n    | `YELLOW`   | `#FFFF00`  | `(255, 255, 0)`  |\n    | `ROBOFLOW` | `#A351FB`  | `(163, 81, 251)` |\n    \"\"\"\n\n    r: int\n    g: int\n    b: int\n\n    @classmethod\n    def from_hex(cls, color_hex: str) -> Color:\n        \"\"\"\n        Create a Color instance from a hex string.\n\n        Args:\n            color_hex (str): The hex string representing the color. This string can\n                start with '#' followed by either 3 or 6 hexadecimal characters. In\n                case of 3 characters, each character is repeated to form the full\n                6-character hex code.\n\n        Returns:\n            Color: An instance representing the color.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color.from_hex('#ff00ff')\n            # Color(r=255, g=0, b=255)\n\n            sv.Color.from_hex('#f0f')\n            # Color(r=255, g=0, b=255)\n            ```\n        \"\"\"\n        _validate_color_hex(color_hex)\n        color_hex = color_hex.lstrip(\"#\")\n        if len(color_hex) == 3:\n            color_hex = \"\".join(c * 2 for c in color_hex)\n        r, g, b = (int(color_hex[i : i + 2], 16) for i in range(0, 6, 2))\n        return cls(r, g, b)\n\n    @classmethod\n    def from_rgb_tuple(cls, color_tuple: Tuple[int, int, int]) -> Color:\n        \"\"\"\n        Create a Color instance from an RGB tuple.\n\n        Args:\n            color_tuple (Tuple[int, int, int]): A tuple representing the color in RGB\n                format, where each element is an integer in the range 0-255.\n\n        Returns:\n            Color: An instance representing the color.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color.from_rgb_tuple((255, 255, 0))\n            # Color(r=255, g=255, b=0)\n            ```\n        \"\"\"\n        r, g, b = color_tuple\n        return cls(r=r, g=g, b=b)\n\n    @classmethod\n    def from_bgr_tuple(cls, color_tuple: Tuple[int, int, int]) -> Color:\n        \"\"\"\n        Create a Color instance from a BGR tuple.\n\n        Args:\n            color_tuple (Tuple[int, int, int]): A tuple representing the color in BGR\n                format, where each element is an integer in the range 0-255.\n\n        Returns:\n            Color: An instance representing the color.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color.from_bgr_tuple((0, 255, 255))\n            # Color(r=255, g=255, b=0)\n            ```\n        \"\"\"\n        b, g, r = color_tuple\n        return cls(r=r, g=g, b=b)\n\n    def as_hex(self) -> str:\n        \"\"\"\n        Converts the Color instance to a hex string.\n\n        Returns:\n            str: The hexadecimal color string.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color(r=255, g=255, b=0).as_hex()\n            # '#ffff00'\n            ```\n        \"\"\"\n        return f\"#{self.r:02x}{self.g:02x}{self.b:02x}\"\n\n    def as_rgb(self) -> Tuple[int, int, int]:\n        \"\"\"\n        Returns the color as an RGB tuple.\n\n        Returns:\n            Tuple[int, int, int]: RGB tuple.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color(r=255, g=255, b=0).as_rgb()\n            # (255, 255, 0)\n            ```\n        \"\"\"\n        return self.r, self.g, self.b\n\n    def as_bgr(self) -> Tuple[int, int, int]:\n        \"\"\"\n        Returns the color as a BGR tuple.\n\n        Returns:\n            Tuple[int, int, int]: BGR tuple.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.Color(r=255, g=255, b=0).as_bgr()\n            # (0, 255, 255)\n            ```\n        \"\"\"\n        return self.b, self.g, self.r\n\n    @classproperty\n    def WHITE(cls) -> Color:\n        return Color.from_hex(\"#FFFFFF\")\n\n    @classproperty\n    def BLACK(cls) -> Color:\n        return Color.from_hex(\"#000000\")\n\n    @classproperty\n    def RED(cls) -> Color:\n        return Color.from_hex(\"#FF0000\")\n\n    @classproperty\n    def GREEN(cls) -> Color:\n        return Color.from_hex(\"#00FF00\")\n\n    @classproperty\n    def BLUE(cls) -> Color:\n        return Color.from_hex(\"#0000FF\")\n\n    @classproperty\n    def YELLOW(cls) -> Color:\n        return Color.from_hex(\"#FFFF00\")\n\n    @classproperty\n    def ROBOFLOW(cls) -> Color:\n        return Color.from_hex(\"#A351FB\")\n\n    @classmethod\n    @deprecated(\n        \"`Color.white()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.WHITE` instead.\"\n    )\n    def white(cls) -> Color:\n        return Color.from_hex(color_hex=\"#ffffff\")\n\n    @classmethod\n    @deprecated(\n        \"`Color.black()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.BLACK` instead.\"\n    )\n    def black(cls) -> Color:\n        return Color.from_hex(color_hex=\"#000000\")\n\n    @classmethod\n    @deprecated(\n        \"`Color.red()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.RED` instead.\"\n    )\n    def red(cls) -> Color:\n        return Color.from_hex(color_hex=\"#ff0000\")\n\n    @classmethod\n    @deprecated(\n        \"`Color.green()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.GREEN` instead.\"\n    )\n    def green(cls) -> Color:\n        return Color.from_hex(color_hex=\"#00ff00\")\n\n    @classmethod\n    @deprecated(\n        \"`Color.blue()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.BLUE` instead.\"\n    )\n    def blue(cls) -> Color:\n        return Color.from_hex(color_hex=\"#0000ff\")\n
ColorPalette Source code in supervision/draw/color.py
@dataclass\nclass ColorPalette:\n    colors: List[Color]\n\n    @classproperty\n    def DEFAULT(cls) -> ColorPalette:\n        \"\"\"\n        Returns a default color palette.\n\n        Returns:\n            ColorPalette: A ColorPalette instance with default colors.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.ColorPalette.DEFAULT\n            # ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n            ```\n\n        ![default-color-palette](https://media.roboflow.com/\n        supervision-annotator-examples/default-color-palette.png)\n        \"\"\"  # noqa: E501 // docs\n        return ColorPalette.from_hex(color_hex_list=DEFAULT_COLOR_PALETTE)\n\n    @classproperty\n    def ROBOFLOW(cls) -> ColorPalette:\n        \"\"\"\n        Returns a Roboflow color palette.\n\n        Returns:\n            ColorPalette: A ColorPalette instance with Roboflow colors.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.ColorPalette.ROBOFLOW\n            # ColorPalette(colors=[Color(r=194, g=141, b=252), Color(r=163, g=81, b=251), ...])\n            ```\n\n        ![roboflow-color-palette](https://media.roboflow.com/\n        supervision-annotator-examples/roboflow-color-palette.png)\n        \"\"\"  # noqa: E501 // docs\n        return ColorPalette.from_hex(color_hex_list=ROBOFLOW_COLOR_PALETTE)\n\n    @classproperty\n    def LEGACY(cls) -> ColorPalette:\n        return ColorPalette.from_hex(color_hex_list=LEGACY_COLOR_PALETTE)\n\n    @classmethod\n    @deprecated(\n        \"`ColorPalette.default()` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `Color.DEFAULT` instead.\"\n    )\n    def default(cls) -> ColorPalette:\n        \"\"\"\n        !!! failure \"Deprecated\"\n\n            `ColorPalette.default()` is deprecated and will be removed in\n            `supervision-0.22.0`. Use `Color.DEFAULT` instead.\n\n        Returns a default color palette.\n\n        Returns:\n            ColorPalette: A ColorPalette instance with default colors.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.ColorPalette.default()\n            # ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n            ```\n        \"\"\"  # noqa: E501 // docs\n        return ColorPalette.from_hex(color_hex_list=DEFAULT_COLOR_PALETTE)\n\n    @classmethod\n    def from_hex(cls, color_hex_list: List[str]) -> ColorPalette:\n        \"\"\"\n        Create a ColorPalette instance from a list of hex strings.\n\n        Args:\n            color_hex_list (List[str]): List of color hex strings.\n\n        Returns:\n            ColorPalette: A ColorPalette instance.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\n            # ColorPalette(colors=[Color(r=255, g=0, b=0), Color(r=0, g=255, b=0), ...])\n            ```\n        \"\"\"\n        colors = [Color.from_hex(color_hex) for color_hex in color_hex_list]\n        return cls(colors)\n\n    @classmethod\n    def from_matplotlib(cls, palette_name: str, color_count: int) -> ColorPalette:\n        \"\"\"\n        Create a ColorPalette instance from a Matplotlib color palette.\n\n        Args:\n            palette_name (str): Name of the Matplotlib palette.\n            color_count (int): Number of colors to sample from the palette.\n\n        Returns:\n            ColorPalette: A ColorPalette instance.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            sv.ColorPalette.from_matplotlib('viridis', 5)\n            # ColorPalette(colors=[Color(r=68, g=1, b=84), Color(r=59, g=82, b=139), ...])\n            ```\n\n        ![visualized_color_palette](https://media.roboflow.com/\n        supervision-annotator-examples/visualized_color_palette.png)\n        \"\"\"  # noqa: E501 // docs\n        mpl_palette = plt.get_cmap(palette_name, color_count)\n        colors = [\n            Color(int(r * 255), int(g * 255), int(b * 255))\n            for r, g, b, _ in mpl_palette.colors\n        ]\n        return cls(colors)\n\n    def by_idx(self, idx: int) -> Color:\n        \"\"\"\n        Return the color at a given index in the palette.\n\n        Args:\n            idx (int): Index of the color in the palette.\n\n        Returns:\n            Color: Color at the given index.\n\n        Example:\n            ```python\n            import supervision as sv\n\n            color_palette = sv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\n            color_palette.by_idx(1)\n            # Color(r=0, g=255, b=0)\n            ```\n        \"\"\"\n        if idx < 0:\n            raise ValueError(\"idx argument should not be negative\")\n        idx = idx % len(self.colors)\n        return self.colors[idx]\n
"},{"location":"utils/draw/#supervision.draw.color.Color-functions","title":"Functions","text":""},{"location":"utils/draw/#supervision.draw.color.Color.as_bgr","title":"as_bgr()","text":"

Returns the color as a BGR tuple.

Returns:

Type Description Tuple[int, int, int]

Tuple[int, int, int]: BGR tuple.

Example
import supervision as sv\n\nsv.Color(r=255, g=255, b=0).as_bgr()\n# (0, 255, 255)\n
Source code in supervision/draw/color.py
def as_bgr(self) -> Tuple[int, int, int]:\n    \"\"\"\n    Returns the color as a BGR tuple.\n\n    Returns:\n        Tuple[int, int, int]: BGR tuple.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color(r=255, g=255, b=0).as_bgr()\n        # (0, 255, 255)\n        ```\n    \"\"\"\n    return self.b, self.g, self.r\n
"},{"location":"utils/draw/#supervision.draw.color.Color.as_hex","title":"as_hex()","text":"

Converts the Color instance to a hex string.

Returns:

Name Type Description str str

The hexadecimal color string.

Example
import supervision as sv\n\nsv.Color(r=255, g=255, b=0).as_hex()\n# '#ffff00'\n
Source code in supervision/draw/color.py
def as_hex(self) -> str:\n    \"\"\"\n    Converts the Color instance to a hex string.\n\n    Returns:\n        str: The hexadecimal color string.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color(r=255, g=255, b=0).as_hex()\n        # '#ffff00'\n        ```\n    \"\"\"\n    return f\"#{self.r:02x}{self.g:02x}{self.b:02x}\"\n
"},{"location":"utils/draw/#supervision.draw.color.Color.as_rgb","title":"as_rgb()","text":"

Returns the color as an RGB tuple.

Returns:

Type Description Tuple[int, int, int]

Tuple[int, int, int]: RGB tuple.

Example
import supervision as sv\n\nsv.Color(r=255, g=255, b=0).as_rgb()\n# (255, 255, 0)\n
Source code in supervision/draw/color.py
def as_rgb(self) -> Tuple[int, int, int]:\n    \"\"\"\n    Returns the color as an RGB tuple.\n\n    Returns:\n        Tuple[int, int, int]: RGB tuple.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color(r=255, g=255, b=0).as_rgb()\n        # (255, 255, 0)\n        ```\n    \"\"\"\n    return self.r, self.g, self.b\n
"},{"location":"utils/draw/#supervision.draw.color.Color.from_bgr_tuple","title":"from_bgr_tuple(color_tuple) classmethod","text":"

Create a Color instance from a BGR tuple.

Parameters:

Name Type Description Default color_tuple Tuple[int, int, int]

A tuple representing the color in BGR format, where each element is an integer in the range 0-255.

required

Returns:

Name Type Description Color Color

An instance representing the color.

Example
import supervision as sv\n\nsv.Color.from_bgr_tuple((0, 255, 255))\n# Color(r=255, g=255, b=0)\n
Source code in supervision/draw/color.py
@classmethod\ndef from_bgr_tuple(cls, color_tuple: Tuple[int, int, int]) -> Color:\n    \"\"\"\n    Create a Color instance from a BGR tuple.\n\n    Args:\n        color_tuple (Tuple[int, int, int]): A tuple representing the color in BGR\n            format, where each element is an integer in the range 0-255.\n\n    Returns:\n        Color: An instance representing the color.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color.from_bgr_tuple((0, 255, 255))\n        # Color(r=255, g=255, b=0)\n        ```\n    \"\"\"\n    b, g, r = color_tuple\n    return cls(r=r, g=g, b=b)\n
"},{"location":"utils/draw/#supervision.draw.color.Color.from_hex","title":"from_hex(color_hex) classmethod","text":"

Create a Color instance from a hex string.

Parameters:

Name Type Description Default color_hex str

The hex string representing the color. This string can start with '#' followed by either 3 or 6 hexadecimal characters. In case of 3 characters, each character is repeated to form the full 6-character hex code.

required

Returns:

Name Type Description Color Color

An instance representing the color.

Example
import supervision as sv\n\nsv.Color.from_hex('#ff00ff')\n# Color(r=255, g=0, b=255)\n\nsv.Color.from_hex('#f0f')\n# Color(r=255, g=0, b=255)\n
Source code in supervision/draw/color.py
@classmethod\ndef from_hex(cls, color_hex: str) -> Color:\n    \"\"\"\n    Create a Color instance from a hex string.\n\n    Args:\n        color_hex (str): The hex string representing the color. This string can\n            start with '#' followed by either 3 or 6 hexadecimal characters. In\n            case of 3 characters, each character is repeated to form the full\n            6-character hex code.\n\n    Returns:\n        Color: An instance representing the color.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color.from_hex('#ff00ff')\n        # Color(r=255, g=0, b=255)\n\n        sv.Color.from_hex('#f0f')\n        # Color(r=255, g=0, b=255)\n        ```\n    \"\"\"\n    _validate_color_hex(color_hex)\n    color_hex = color_hex.lstrip(\"#\")\n    if len(color_hex) == 3:\n        color_hex = \"\".join(c * 2 for c in color_hex)\n    r, g, b = (int(color_hex[i : i + 2], 16) for i in range(0, 6, 2))\n    return cls(r, g, b)\n
"},{"location":"utils/draw/#supervision.draw.color.Color.from_rgb_tuple","title":"from_rgb_tuple(color_tuple) classmethod","text":"

Create a Color instance from an RGB tuple.

Parameters:

Name Type Description Default color_tuple Tuple[int, int, int]

A tuple representing the color in RGB format, where each element is an integer in the range 0-255.

required

Returns:

Name Type Description Color Color

An instance representing the color.

Example
import supervision as sv\n\nsv.Color.from_rgb_tuple((255, 255, 0))\n# Color(r=255, g=255, b=0)\n
Source code in supervision/draw/color.py
@classmethod\ndef from_rgb_tuple(cls, color_tuple: Tuple[int, int, int]) -> Color:\n    \"\"\"\n    Create a Color instance from an RGB tuple.\n\n    Args:\n        color_tuple (Tuple[int, int, int]): A tuple representing the color in RGB\n            format, where each element is an integer in the range 0-255.\n\n    Returns:\n        Color: An instance representing the color.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.Color.from_rgb_tuple((255, 255, 0))\n        # Color(r=255, g=255, b=0)\n        ```\n    \"\"\"\n    r, g, b = color_tuple\n    return cls(r=r, g=g, b=b)\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette-functions","title":"Functions","text":""},{"location":"utils/draw/#supervision.draw.color.ColorPalette.DEFAULT","title":"DEFAULT()","text":"

Returns a default color palette.

Returns:

Name Type Description ColorPalette ColorPalette

A ColorPalette instance with default colors.

Example
import supervision as sv\n\nsv.ColorPalette.DEFAULT\n# ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n

Source code in supervision/draw/color.py
@classproperty\ndef DEFAULT(cls) -> ColorPalette:\n    \"\"\"\n    Returns a default color palette.\n\n    Returns:\n        ColorPalette: A ColorPalette instance with default colors.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.ColorPalette.DEFAULT\n        # ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n        ```\n\n    ![default-color-palette](https://media.roboflow.com/\n    supervision-annotator-examples/default-color-palette.png)\n    \"\"\"  # noqa: E501 // docs\n    return ColorPalette.from_hex(color_hex_list=DEFAULT_COLOR_PALETTE)\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette.ROBOFLOW","title":"ROBOFLOW()","text":"

Returns a Roboflow color palette.

Returns:

Name Type Description ColorPalette ColorPalette

A ColorPalette instance with Roboflow colors.

Example
import supervision as sv\n\nsv.ColorPalette.ROBOFLOW\n# ColorPalette(colors=[Color(r=194, g=141, b=252), Color(r=163, g=81, b=251), ...])\n

Source code in supervision/draw/color.py
@classproperty\ndef ROBOFLOW(cls) -> ColorPalette:\n    \"\"\"\n    Returns a Roboflow color palette.\n\n    Returns:\n        ColorPalette: A ColorPalette instance with Roboflow colors.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.ColorPalette.ROBOFLOW\n        # ColorPalette(colors=[Color(r=194, g=141, b=252), Color(r=163, g=81, b=251), ...])\n        ```\n\n    ![roboflow-color-palette](https://media.roboflow.com/\n    supervision-annotator-examples/roboflow-color-palette.png)\n    \"\"\"  # noqa: E501 // docs\n    return ColorPalette.from_hex(color_hex_list=ROBOFLOW_COLOR_PALETTE)\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette.by_idx","title":"by_idx(idx)","text":"

Return the color at a given index in the palette.

Parameters:

Name Type Description Default idx int

Index of the color in the palette.

required

Returns:

Name Type Description Color Color

Color at the given index.

Example
import supervision as sv\n\ncolor_palette = sv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\ncolor_palette.by_idx(1)\n# Color(r=0, g=255, b=0)\n
Source code in supervision/draw/color.py
def by_idx(self, idx: int) -> Color:\n    \"\"\"\n    Return the color at a given index in the palette.\n\n    Args:\n        idx (int): Index of the color in the palette.\n\n    Returns:\n        Color: Color at the given index.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        color_palette = sv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\n        color_palette.by_idx(1)\n        # Color(r=0, g=255, b=0)\n        ```\n    \"\"\"\n    if idx < 0:\n        raise ValueError(\"idx argument should not be negative\")\n    idx = idx % len(self.colors)\n    return self.colors[idx]\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette.default","title":"default() classmethod","text":"

Deprecated

ColorPalette.default() is deprecated and will be removed in supervision-0.22.0. Use Color.DEFAULT instead.

Returns a default color palette.

Returns:

Name Type Description ColorPalette ColorPalette

A ColorPalette instance with default colors.

Example
import supervision as sv\n\nsv.ColorPalette.default()\n# ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n
Source code in supervision/draw/color.py
@classmethod\n@deprecated(\n    \"`ColorPalette.default()` is deprecated and will be removed in \"\n    \"`supervision-0.22.0`. Use `Color.DEFAULT` instead.\"\n)\ndef default(cls) -> ColorPalette:\n    \"\"\"\n    !!! failure \"Deprecated\"\n\n        `ColorPalette.default()` is deprecated and will be removed in\n        `supervision-0.22.0`. Use `Color.DEFAULT` instead.\n\n    Returns a default color palette.\n\n    Returns:\n        ColorPalette: A ColorPalette instance with default colors.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.ColorPalette.default()\n        # ColorPalette(colors=[Color(r=255, g=64, b=64), Color(r=255, g=161, b=160), ...])\n        ```\n    \"\"\"  # noqa: E501 // docs\n    return ColorPalette.from_hex(color_hex_list=DEFAULT_COLOR_PALETTE)\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette.from_hex","title":"from_hex(color_hex_list) classmethod","text":"

Create a ColorPalette instance from a list of hex strings.

Parameters:

Name Type Description Default color_hex_list List[str]

List of color hex strings.

required

Returns:

Name Type Description ColorPalette ColorPalette

A ColorPalette instance.

Example
import supervision as sv\n\nsv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\n# ColorPalette(colors=[Color(r=255, g=0, b=0), Color(r=0, g=255, b=0), ...])\n
Source code in supervision/draw/color.py
@classmethod\ndef from_hex(cls, color_hex_list: List[str]) -> ColorPalette:\n    \"\"\"\n    Create a ColorPalette instance from a list of hex strings.\n\n    Args:\n        color_hex_list (List[str]): List of color hex strings.\n\n    Returns:\n        ColorPalette: A ColorPalette instance.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])\n        # ColorPalette(colors=[Color(r=255, g=0, b=0), Color(r=0, g=255, b=0), ...])\n        ```\n    \"\"\"\n    colors = [Color.from_hex(color_hex) for color_hex in color_hex_list]\n    return cls(colors)\n
"},{"location":"utils/draw/#supervision.draw.color.ColorPalette.from_matplotlib","title":"from_matplotlib(palette_name, color_count) classmethod","text":"

Create a ColorPalette instance from a Matplotlib color palette.

Parameters:

Name Type Description Default palette_name str

Name of the Matplotlib palette.

required color_count int

Number of colors to sample from the palette.

required

Returns:

Name Type Description ColorPalette ColorPalette

A ColorPalette instance.

Example
import supervision as sv\n\nsv.ColorPalette.from_matplotlib('viridis', 5)\n# ColorPalette(colors=[Color(r=68, g=1, b=84), Color(r=59, g=82, b=139), ...])\n

Source code in supervision/draw/color.py
@classmethod\ndef from_matplotlib(cls, palette_name: str, color_count: int) -> ColorPalette:\n    \"\"\"\n    Create a ColorPalette instance from a Matplotlib color palette.\n\n    Args:\n        palette_name (str): Name of the Matplotlib palette.\n        color_count (int): Number of colors to sample from the palette.\n\n    Returns:\n        ColorPalette: A ColorPalette instance.\n\n    Example:\n        ```python\n        import supervision as sv\n\n        sv.ColorPalette.from_matplotlib('viridis', 5)\n        # ColorPalette(colors=[Color(r=68, g=1, b=84), Color(r=59, g=82, b=139), ...])\n        ```\n\n    ![visualized_color_palette](https://media.roboflow.com/\n    supervision-annotator-examples/visualized_color_palette.png)\n    \"\"\"  # noqa: E501 // docs\n    mpl_palette = plt.get_cmap(palette_name, color_count)\n    colors = [\n        Color(int(r * 255), int(g * 255), int(b * 255))\n        for r, g, b, _ in mpl_palette.colors\n    ]\n    return cls(colors)\n
"},{"location":"utils/file/","title":"File Utils","text":"list_files_with_extensions

List files in a directory with specified extensions or all files if no extensions are provided.

Parameters:

Name Type Description Default directory Union[str, Path]

The directory path as a string or Path object.

required extensions Optional[List[str]]

A list of file extensions to filter. Default is None, which lists all files.

None

Returns:

Type Description List[Path]

A list of Path objects for the matching files.

Examples:

import supervision as sv\n\n# List all files in the directory\nfiles = sv.list_files_with_extensions(directory='my_directory')\n\n# List only files with '.txt' and '.md' extensions\nfiles = sv.list_files_with_extensions(\n    directory='my_directory', extensions=['txt', 'md'])\n
Source code in supervision/utils/file.py
def list_files_with_extensions(\n    directory: Union[str, Path], extensions: Optional[List[str]] = None\n) -> List[Path]:\n    \"\"\"\n    List files in a directory with specified extensions or\n        all files if no extensions are provided.\n\n    Args:\n        directory (Union[str, Path]): The directory path as a string or Path object.\n        extensions (Optional[List[str]]): A list of file extensions to filter.\n            Default is None, which lists all files.\n\n    Returns:\n        (List[Path]): A list of Path objects for the matching files.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        # List all files in the directory\n        files = sv.list_files_with_extensions(directory='my_directory')\n\n        # List only files with '.txt' and '.md' extensions\n        files = sv.list_files_with_extensions(\n            directory='my_directory', extensions=['txt', 'md'])\n        ```\n    \"\"\"\n\n    directory = Path(directory)\n    files_with_extensions = []\n\n    if extensions is not None:\n        for ext in extensions:\n            files_with_extensions.extend(directory.glob(f\"*.{ext}\"))\n    else:\n        files_with_extensions.extend(directory.glob(\"*\"))\n\n    return files_with_extensions\n
"},{"location":"utils/geometry/","title":"Geometry","text":"get_polygon_center

Calculate the center of a polygon. The center is calculated as the center of the solid figure formed by the points of the polygon

Parameters:

Name Type Description Default polygon ndarray

A 2-dimensional numpy ndarray representing the vertices of the polygon.

required

Returns:

Name Type Description Point Point

The center of the polygon, represented as a Point object with x and y attributes.

Examples:

import numpy as np\nimport supervision as sv\n\npolygon = np.array([[0, 0], [0, 2], [2, 2], [2, 0]])\nsv.get_polygon_center(polygon=polygon)\n# Point(x=1, y=1)\n
Source code in supervision/geometry/utils.py
def get_polygon_center(polygon: np.ndarray) -> Point:\n    \"\"\"\n    Calculate the center of a polygon. The center is calculated as the center\n    of the solid figure formed by the points of the polygon\n\n    Parameters:\n        polygon (np.ndarray): A 2-dimensional numpy ndarray representing the\n            vertices of the polygon.\n\n    Returns:\n        Point: The center of the polygon, represented as a\n            Point object with x and y attributes.\n\n    Examples:\n        ```python\n        import numpy as np\n        import supervision as sv\n\n        polygon = np.array([[0, 0], [0, 2], [2, 2], [2, 0]])\n        sv.get_polygon_center(polygon=polygon)\n        # Point(x=1, y=1)\n        ```\n    \"\"\"\n\n    # This is one of the 3 candidate algorithms considered for centroid calculation.\n    # For a more detailed discussion, see PR #1084 and commit eb33176\n\n    shift_polygon = np.roll(polygon, -1, axis=0)\n    signed_areas = np.cross(polygon, shift_polygon) / 2\n    if signed_areas.sum() == 0:\n        center = np.mean(polygon, axis=0).round()\n        return Point(x=center[0], y=center[1])\n    centroids = (polygon + shift_polygon) / 3.0\n    center = np.average(centroids, axis=0, weights=signed_areas).round()\n\n    return Point(x=center[0], y=center[1])\n
Position

Bases: Enum

Enum representing the position of an anchor point.

Source code in supervision/geometry/core.py
class Position(Enum):\n    \"\"\"\n    Enum representing the position of an anchor point.\n    \"\"\"\n\n    CENTER = \"CENTER\"\n    CENTER_LEFT = \"CENTER_LEFT\"\n    CENTER_RIGHT = \"CENTER_RIGHT\"\n    TOP_CENTER = \"TOP_CENTER\"\n    TOP_LEFT = \"TOP_LEFT\"\n    TOP_RIGHT = \"TOP_RIGHT\"\n    BOTTOM_LEFT = \"BOTTOM_LEFT\"\n    BOTTOM_CENTER = \"BOTTOM_CENTER\"\n    BOTTOM_RIGHT = \"BOTTOM_RIGHT\"\n    CENTER_OF_MASS = \"CENTER_OF_MASS\"\n\n    @classmethod\n    def list(cls):\n        return list(map(lambda c: c.value, cls))\n
"},{"location":"utils/image/","title":"Image Utils","text":"crop_image

Crops the given image based on the given bounding box.

Parameters:

Name Type Description Default image ImageType

The image to be cropped. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required xyxy Union[ndarray, List[int], Tuple[int, int, int, int]]

A bounding box coordinates in the format (x_min, y_min, x_max, y_max), accepted as either a numpy.ndarray, a list, or a tuple.

required

Returns:

Type Description ImageType

The cropped image. The type is determined by the input type and may be either a numpy.ndarray or PIL.Image.Image.

OpenCVPillow
import cv2\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage.shape\n# (1080, 1920, 3)\n\nxyxy = [200, 400, 600, 800]\ncropped_image = sv.crop_image(image=image, xyxy=xyxy)\ncropped_image.shape\n# (400, 400, 3)\n
from PIL import Image\nimport supervision as sv\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\nimage.size\n# (1920, 1080)\n\nxyxy = [200, 400, 600, 800]\ncropped_image = sv.crop_image(image=image, xyxy=xyxy)\ncropped_image.size\n# (400, 400)\n

Source code in supervision/utils/image.py
@convert_for_image_processing\ndef crop_image(\n    image: ImageType,\n    xyxy: Union[npt.NDArray[int], List[int], Tuple[int, int, int, int]],\n) -> ImageType:\n    \"\"\"\n    Crops the given image based on the given bounding box.\n\n    Args:\n        image (ImageType): The image to be cropped. `ImageType` is a flexible type,\n            accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        xyxy (Union[np.ndarray, List[int], Tuple[int, int, int, int]]): A bounding box\n            coordinates in the format `(x_min, y_min, x_max, y_max)`, accepted as either\n            a `numpy.ndarray`, a `list`, or a `tuple`.\n\n    Returns:\n        (ImageType): The cropped image. The type is determined by the input type and\n            may be either a `numpy.ndarray` or `PIL.Image.Image`.\n\n    === \"OpenCV\"\n\n        ```python\n        import cv2\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image.shape\n        # (1080, 1920, 3)\n\n        xyxy = [200, 400, 600, 800]\n        cropped_image = sv.crop_image(image=image, xyxy=xyxy)\n        cropped_image.shape\n        # (400, 400, 3)\n        ```\n\n    === \"Pillow\"\n\n        ```python\n        from PIL import Image\n        import supervision as sv\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        image.size\n        # (1920, 1080)\n\n        xyxy = [200, 400, 600, 800]\n        cropped_image = sv.crop_image(image=image, xyxy=xyxy)\n        cropped_image.size\n        # (400, 400)\n        ```\n\n    ![crop_image](https://media.roboflow.com/supervision-docs/crop-image.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n\n    if isinstance(xyxy, (list, tuple)):\n        xyxy = np.array(xyxy)\n    xyxy = np.round(xyxy).astype(int)\n    x_min, y_min, x_max, y_max = xyxy.flatten()\n    return image[y_min:y_max, x_min:x_max]\n
scale_image

Scales the given image based on the given scale factor.

Parameters:

Name Type Description Default image ImageType

The image to be scaled. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required scale_factor float

The factor by which the image will be scaled. Scale factor > 1.0 zooms in, < 1.0 zooms out.

required

Returns:

Type Description ImageType

The scaled image. The type is determined by the input type and may be either a numpy.ndarray or PIL.Image.Image.

Raises:

Type Description ValueError

If the scale factor is non-positive.

OpenCVPillow
import cv2\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage.shape\n# (1080, 1920, 3)\n\nscaled_image = sv.scale_image(image=image, scale_factor=0.5)\nscaled_image.shape\n# (540, 960, 3)\n
from PIL import Image\nimport supervision as sv\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\nimage.size\n# (1920, 1080)\n\nscaled_image = sv.scale_image(image=image, scale_factor=0.5)\nscaled_image.size\n# (960, 540)\n
Source code in supervision/utils/image.py
@convert_for_image_processing\ndef scale_image(image: ImageType, scale_factor: float) -> ImageType:\n    \"\"\"\n    Scales the given image based on the given scale factor.\n\n    Args:\n        image (ImageType): The image to be scaled. `ImageType` is a flexible type,\n            accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        scale_factor (float): The factor by which the image will be scaled. Scale\n            factor > `1.0` zooms in, < `1.0` zooms out.\n\n    Returns:\n        (ImageType): The scaled image. The type is determined by the input type and\n            may be either a `numpy.ndarray` or `PIL.Image.Image`.\n\n    Raises:\n        ValueError: If the scale factor is non-positive.\n\n    === \"OpenCV\"\n\n        ```python\n        import cv2\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image.shape\n        # (1080, 1920, 3)\n\n        scaled_image = sv.scale_image(image=image, scale_factor=0.5)\n        scaled_image.shape\n        # (540, 960, 3)\n        ```\n\n    === \"Pillow\"\n\n        ```python\n        from PIL import Image\n        import supervision as sv\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        image.size\n        # (1920, 1080)\n\n        scaled_image = sv.scale_image(image=image, scale_factor=0.5)\n        scaled_image.size\n        # (960, 540)\n        ```\n    \"\"\"\n    if scale_factor <= 0:\n        raise ValueError(\"Scale factor must be positive.\")\n\n    width_old, height_old = image.shape[1], image.shape[0]\n    width_new = int(width_old * scale_factor)\n    height_new = int(height_old * scale_factor)\n    return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)\n
resize_image

Resizes the given image to a specified resolution. Can maintain the original aspect ratio or resize directly to the desired dimensions.

Parameters:

Name Type Description Default image ImageType

The image to be resized. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required resolution_wh Tuple[int, int]

The target resolution as (width, height).

required keep_aspect_ratio bool

Flag to maintain the image's original aspect ratio. Defaults to False.

False

Returns:

Type Description ImageType

The resized image. The type is determined by the input type and may be either a numpy.ndarray or PIL.Image.Image.

OpenCVPillow
import cv2\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage.shape\n# (1080, 1920, 3)\n\nresized_image = sv.resize_image(\n    image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True\n)\nresized_image.shape\n# (562, 1000, 3)\n
from PIL import Image\nimport supervision as sv\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\nimage.size\n# (1920, 1080)\n\nresized_image = sv.resize_image(\n    image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True\n)\nresized_image.size\n# (1000, 562)\n

Source code in supervision/utils/image.py
@convert_for_image_processing\ndef resize_image(\n    image: ImageType,\n    resolution_wh: Tuple[int, int],\n    keep_aspect_ratio: bool = False,\n) -> ImageType:\n    \"\"\"\n    Resizes the given image to a specified resolution. Can maintain the original aspect\n    ratio or resize directly to the desired dimensions.\n\n    Args:\n        image (ImageType): The image to be resized. `ImageType` is a flexible type,\n            accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        resolution_wh (Tuple[int, int]): The target resolution as\n            `(width, height)`.\n        keep_aspect_ratio (bool, optional): Flag to maintain the image's original\n            aspect ratio. Defaults to `False`.\n\n    Returns:\n        (ImageType): The resized image. The type is determined by the input type and\n            may be either a `numpy.ndarray` or `PIL.Image.Image`.\n\n    === \"OpenCV\"\n\n        ```python\n        import cv2\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image.shape\n        # (1080, 1920, 3)\n\n        resized_image = sv.resize_image(\n            image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True\n        )\n        resized_image.shape\n        # (562, 1000, 3)\n        ```\n\n    === \"Pillow\"\n\n        ```python\n        from PIL import Image\n        import supervision as sv\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        image.size\n        # (1920, 1080)\n\n        resized_image = sv.resize_image(\n            image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True\n        )\n        resized_image.size\n        # (1000, 562)\n        ```\n\n    ![resize_image](https://media.roboflow.com/supervision-docs/resize-image.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    if keep_aspect_ratio:\n        image_ratio = image.shape[1] / image.shape[0]\n        target_ratio = resolution_wh[0] / resolution_wh[1]\n        if image_ratio >= target_ratio:\n            width_new = resolution_wh[0]\n            height_new = int(resolution_wh[0] / image_ratio)\n        else:\n            height_new = resolution_wh[1]\n            width_new = int(resolution_wh[1] * image_ratio)\n    else:\n        width_new, height_new = resolution_wh\n\n    return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)\n
letterbox_image

Resizes and pads an image to a specified resolution with a given color, maintaining the original aspect ratio.

Parameters:

Name Type Description Default image ImageType

The image to be resized. ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required resolution_wh Tuple[int, int]

The target resolution as (width, height).

required color Union[Tuple[int, int, int], Color]

The color to pad with. If tuple provided it should be in BGR format.

BLACK

Returns:

Type Description ImageType

The resized image. The type is determined by the input type and may be either a numpy.ndarray or PIL.Image.Image.

OpenCVPillow
import cv2\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\nimage.shape\n# (1080, 1920, 3)\n\nletterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))\nletterboxed_image.shape\n# (1000, 1000, 3)\n
from PIL import Image\nimport supervision as sv\n\nimage = Image.open(<SOURCE_IMAGE_PATH>)\nimage.size\n# (1920, 1080)\n\nletterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))\nletterboxed_image.size\n# (1000, 1000)\n

Source code in supervision/utils/image.py
@convert_for_image_processing\ndef letterbox_image(\n    image: ImageType,\n    resolution_wh: Tuple[int, int],\n    color: Union[Tuple[int, int, int], Color] = Color.BLACK,\n) -> ImageType:\n    \"\"\"\n    Resizes and pads an image to a specified resolution with a given color, maintaining\n    the original aspect ratio.\n\n    Args:\n        image (ImageType): The image to be resized. `ImageType` is a flexible type,\n            accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        resolution_wh (Tuple[int, int]): The target resolution as\n            `(width, height)`.\n        color (Union[Tuple[int, int, int], Color]): The color to pad with. If tuple\n            provided it should be in BGR format.\n\n    Returns:\n        (ImageType): The resized image. The type is determined by the input type and\n            may be either a `numpy.ndarray` or `PIL.Image.Image`.\n\n    === \"OpenCV\"\n\n        ```python\n        import cv2\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        image.shape\n        # (1080, 1920, 3)\n\n        letterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))\n        letterboxed_image.shape\n        # (1000, 1000, 3)\n        ```\n\n    === \"Pillow\"\n\n        ```python\n        from PIL import Image\n        import supervision as sv\n\n        image = Image.open(<SOURCE_IMAGE_PATH>)\n        image.size\n        # (1920, 1080)\n\n        letterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))\n        letterboxed_image.size\n        # (1000, 1000)\n        ```\n\n    ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    color = unify_to_bgr(color=color)\n    resized_image = resize_image(\n        image=image, resolution_wh=resolution_wh, keep_aspect_ratio=True\n    )\n    height_new, width_new = resized_image.shape[:2]\n    padding_top = (resolution_wh[1] - height_new) // 2\n    padding_bottom = resolution_wh[1] - height_new - padding_top\n    padding_left = (resolution_wh[0] - width_new) // 2\n    padding_right = resolution_wh[0] - width_new - padding_left\n    return cv2.copyMakeBorder(\n        resized_image,\n        padding_top,\n        padding_bottom,\n        padding_left,\n        padding_right,\n        cv2.BORDER_CONSTANT,\n        value=color,\n    )\n
overlay_image

Places an image onto a scene at a given anchor point, handling cases where the image's position is partially or completely outside the scene's bounds.

Parameters:

Name Type Description Default image ndarray

The background scene onto which the image is placed.

required overlay ndarray

The image to be placed onto the scene.

required anchor Tuple[int, int]

The (x, y) coordinates in the scene where the top-left corner of the image will be placed.

required

Returns:

Type Description ndarray

The result image with overlay.

Examples:

import cv2\nimport numpy as np\nimport supervision as sv\n\nimage = cv2.imread(<SOURCE_IMAGE_PATH>)\noverlay = np.zeros((400, 400, 3), dtype=np.uint8)\nresult_image = sv.overlay_image(image=image, overlay=overlay, anchor=(200, 400))\n

Source code in supervision/utils/image.py
def overlay_image(\n    image: npt.NDArray[np.uint8],\n    overlay: npt.NDArray[np.uint8],\n    anchor: Tuple[int, int],\n) -> npt.NDArray[np.uint8]:\n    \"\"\"\n    Places an image onto a scene at a given anchor point, handling cases where\n    the image's position is partially or completely outside the scene's bounds.\n\n    Args:\n        image (np.ndarray): The background scene onto which the image is placed.\n        overlay (np.ndarray): The image to be placed onto the scene.\n        anchor (Tuple[int, int]): The `(x, y)` coordinates in the scene where the\n            top-left corner of the image will be placed.\n\n    Returns:\n        (np.ndarray): The result image with overlay.\n\n    Examples:\n        ```python\n        import cv2\n        import numpy as np\n        import supervision as sv\n\n        image = cv2.imread(<SOURCE_IMAGE_PATH>)\n        overlay = np.zeros((400, 400, 3), dtype=np.uint8)\n        result_image = sv.overlay_image(image=image, overlay=overlay, anchor=(200, 400))\n        ```\n\n    ![overlay_image](https://media.roboflow.com/supervision-docs/overlay-image.png){ align=center width=\"800\" }\n    \"\"\"  # noqa E501 // docs\n    scene_height, scene_width = image.shape[:2]\n    image_height, image_width = overlay.shape[:2]\n    anchor_x, anchor_y = anchor\n\n    is_out_horizontally = anchor_x + image_width <= 0 or anchor_x >= scene_width\n    is_out_vertically = anchor_y + image_height <= 0 or anchor_y >= scene_height\n\n    if is_out_horizontally or is_out_vertically:\n        return image\n\n    x_min = max(anchor_x, 0)\n    y_min = max(anchor_y, 0)\n    x_max = min(scene_width, anchor_x + image_width)\n    y_max = min(scene_height, anchor_y + image_height)\n\n    crop_x_min = max(-anchor_x, 0)\n    crop_y_min = max(-anchor_y, 0)\n    crop_x_max = image_width - max((anchor_x + image_width) - scene_width, 0)\n    crop_y_max = image_height - max((anchor_y + image_height) - scene_height, 0)\n\n    image[y_min:y_max, x_min:x_max] = overlay[\n        crop_y_min:crop_y_max, crop_x_min:crop_x_max\n    ]\n\n    return image\n
ImageSink Source code in supervision/utils/image.py
class ImageSink:\n    def __init__(\n        self,\n        target_dir_path: str,\n        overwrite: bool = False,\n        image_name_pattern: str = \"image_{:05d}.png\",\n    ):\n        \"\"\"\n        Initialize a context manager for saving images.\n\n        Args:\n            target_dir_path (str): The target directory where images will be saved.\n            overwrite (bool, optional): Whether to overwrite the existing directory.\n                Defaults to False.\n            image_name_pattern (str, optional): The image file name pattern.\n                Defaults to \"image_{:05d}.png\".\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>, stride=2)\n\n            with sv.ImageSink(target_dir_path=<TARGET_CROPS_DIRECTORY>) as sink:\n                for image in frames_generator:\n                    sink.save_image(image=image)\n            ```\n        \"\"\"  # noqa E501 // docs\n\n        self.target_dir_path = target_dir_path\n        self.overwrite = overwrite\n        self.image_name_pattern = image_name_pattern\n        self.image_count = 0\n\n    def __enter__(self):\n        if os.path.exists(self.target_dir_path):\n            if self.overwrite:\n                shutil.rmtree(self.target_dir_path)\n                os.makedirs(self.target_dir_path)\n        else:\n            os.makedirs(self.target_dir_path)\n\n        return self\n\n    def save_image(self, image: np.ndarray, image_name: Optional[str] = None):\n        \"\"\"\n        Save a given image in the target directory.\n\n        Args:\n            image (np.ndarray): The image to be saved. The image must be in BGR color\n                format.\n            image_name (str, optional): The name to use for the saved image.\n                If not provided, a name will be\n                generated using the `image_name_pattern`.\n        \"\"\"\n        if image_name is None:\n            image_name = self.image_name_pattern.format(self.image_count)\n\n        image_path = os.path.join(self.target_dir_path, image_name)\n        cv2.imwrite(image_path, image)\n        self.image_count += 1\n\n    def __exit__(self, exc_type, exc_value, exc_traceback):\n        pass\n
"},{"location":"utils/image/#supervision.utils.image.ImageSink-functions","title":"Functions","text":""},{"location":"utils/image/#supervision.utils.image.ImageSink.__init__","title":"__init__(target_dir_path, overwrite=False, image_name_pattern='image_{:05d}.png')","text":"

Initialize a context manager for saving images.

Parameters:

Name Type Description Default target_dir_path str

The target directory where images will be saved.

required overwrite bool

Whether to overwrite the existing directory. Defaults to False.

False image_name_pattern str

The image file name pattern. Defaults to \"image_{:05d}.png\".

'image_{:05d}.png'

Examples:

import supervision as sv\n\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>, stride=2)\n\nwith sv.ImageSink(target_dir_path=<TARGET_CROPS_DIRECTORY>) as sink:\n    for image in frames_generator:\n        sink.save_image(image=image)\n
Source code in supervision/utils/image.py
def __init__(\n    self,\n    target_dir_path: str,\n    overwrite: bool = False,\n    image_name_pattern: str = \"image_{:05d}.png\",\n):\n    \"\"\"\n    Initialize a context manager for saving images.\n\n    Args:\n        target_dir_path (str): The target directory where images will be saved.\n        overwrite (bool, optional): Whether to overwrite the existing directory.\n            Defaults to False.\n        image_name_pattern (str, optional): The image file name pattern.\n            Defaults to \"image_{:05d}.png\".\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>, stride=2)\n\n        with sv.ImageSink(target_dir_path=<TARGET_CROPS_DIRECTORY>) as sink:\n            for image in frames_generator:\n                sink.save_image(image=image)\n        ```\n    \"\"\"  # noqa E501 // docs\n\n    self.target_dir_path = target_dir_path\n    self.overwrite = overwrite\n    self.image_name_pattern = image_name_pattern\n    self.image_count = 0\n
"},{"location":"utils/image/#supervision.utils.image.ImageSink.save_image","title":"save_image(image, image_name=None)","text":"

Save a given image in the target directory.

Parameters:

Name Type Description Default image ndarray

The image to be saved. The image must be in BGR color format.

required image_name str

The name to use for the saved image. If not provided, a name will be generated using the image_name_pattern.

None Source code in supervision/utils/image.py
def save_image(self, image: np.ndarray, image_name: Optional[str] = None):\n    \"\"\"\n    Save a given image in the target directory.\n\n    Args:\n        image (np.ndarray): The image to be saved. The image must be in BGR color\n            format.\n        image_name (str, optional): The name to use for the saved image.\n            If not provided, a name will be\n            generated using the `image_name_pattern`.\n    \"\"\"\n    if image_name is None:\n        image_name = self.image_name_pattern.format(self.image_count)\n\n    image_path = os.path.join(self.target_dir_path, image_name)\n    cv2.imwrite(image_path, image)\n    self.image_count += 1\n
"},{"location":"utils/iterables/","title":"Iterables Utils","text":"create_batches

Provides a generator that yields chunks of the input sequence of the size specified by the batch_size parameter. The last chunk may be a smaller batch.

Parameters:

Name Type Description Default sequence Iterable[V]

The sequence to be split into batches.

required batch_size int

The expected size of a batch.

required

Returns:

Type Description Generator[List[V], None, None]

A generator that yields chunks of sequence of size batch_size, up to the length of the input sequence.

Examples:

list(create_batches([1, 2, 3, 4, 5], 2))\n# [[1, 2], [3, 4], [5]]\n\nlist(create_batches(\"abcde\", 3))\n# [['a', 'b', 'c'], ['d', 'e']]\n
Source code in supervision/utils/iterables.py
def create_batches(\n    sequence: Iterable[V], batch_size: int\n) -> Generator[List[V], None, None]:\n    \"\"\"\n    Provides a generator that yields chunks of the input sequence\n    of the size specified by the `batch_size` parameter. The last\n    chunk may be a smaller batch.\n\n    Args:\n        sequence (Iterable[V]): The sequence to be split into batches.\n        batch_size (int): The expected size of a batch.\n\n    Returns:\n        (Generator[List[V], None, None]): A generator that yields chunks\n            of `sequence` of size `batch_size`, up to the length of\n            the input `sequence`.\n\n    Examples:\n        ```python\n        list(create_batches([1, 2, 3, 4, 5], 2))\n        # [[1, 2], [3, 4], [5]]\n\n        list(create_batches(\"abcde\", 3))\n        # [['a', 'b', 'c'], ['d', 'e']]\n        ```\n    \"\"\"\n    batch_size = max(batch_size, 1)\n    current_batch = []\n    for element in sequence:\n        if len(current_batch) == batch_size:\n            yield current_batch\n            current_batch = []\n        current_batch.append(element)\n    if current_batch:\n        yield current_batch\n
fill

Fill the sequence with padding elements until the sequence reaches the desired size.

Parameters:

Name Type Description Default sequence List[V]

The input sequence.

required desired_size int

The expected size of the output list. The difference between this value and the actual length of sequence (if positive) dictates how many elements will be added as padding.

required content V

The element to be placed at the end of the input sequence as padding.

required

Returns:

Type Description List[V]

A padded version of the input sequence (if needed).

Examples:

fill([1, 2], 4, 0)\n# [1, 2, 0, 0]\n\nfill(['a', 'b'], 3, 'c')\n# ['a', 'b', 'c']\n
Source code in supervision/utils/iterables.py
def fill(sequence: List[V], desired_size: int, content: V) -> List[V]:\n    \"\"\"\n    Fill the sequence with padding elements until the sequence reaches\n    the desired size.\n\n    Args:\n        sequence (List[V]): The input sequence.\n        desired_size (int): The expected size of the output list. The\n            difference between this value and the actual length of `sequence`\n            (if positive) dictates how many elements will be added as padding.\n        content (V): The element to be placed at the end of the input\n            `sequence` as padding.\n\n    Returns:\n        (List[V]): A padded version of the input `sequence` (if needed).\n\n    Examples:\n        ```python\n        fill([1, 2], 4, 0)\n        # [1, 2, 0, 0]\n\n        fill(['a', 'b'], 3, 'c')\n        # ['a', 'b', 'c']\n        ```\n    \"\"\"\n    missing_size = max(0, desired_size - len(sequence))\n    sequence.extend([content] * missing_size)\n    return sequence\n
"},{"location":"utils/notebook/","title":"Notebooks Utils","text":"plot_image

Plots image using matplotlib.

Parameters:

Name Type Description Default image ImageType

The frame to be displayed ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required size Tuple[int, int]

The size of the plot.

(12, 12) cmap str

the colormap to use for single channel images.

'gray'

Examples:

import cv2\nimport supervision as sv\n\nimage = cv2.imread(\"path/to/image.jpg\")\n\n%matplotlib inline\nsv.plot_image(image=image, size=(16, 16))\n
Source code in supervision/utils/notebook.py
def plot_image(\n    image: ImageType, size: Tuple[int, int] = (12, 12), cmap: Optional[str] = \"gray\"\n) -> None:\n    \"\"\"\n    Plots image using matplotlib.\n\n    Args:\n        image (ImageType): The frame to be displayed ImageType\n             is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`.\n        size (Tuple[int, int]): The size of the plot.\n        cmap (str): the colormap to use for single channel images.\n\n    Examples:\n        ```python\n        import cv2\n        import supervision as sv\n\n        image = cv2.imread(\"path/to/image.jpg\")\n\n        %matplotlib inline\n        sv.plot_image(image=image, size=(16, 16))\n        ```\n    \"\"\"\n    if isinstance(image, Image.Image):\n        image = pillow_to_cv2(image)\n\n    plt.figure(figsize=size)\n\n    if image.ndim == 2:\n        plt.imshow(image, cmap=cmap)\n    else:\n        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))\n\n    plt.axis(\"off\")\n    plt.show()\n
plot_images_grid

Plots images in a grid using matplotlib.

Parameters:

Name Type Description Default images List[ImageType]

A list of images as ImageType is a flexible type, accepting either numpy.ndarray or PIL.Image.Image.

required grid_size Tuple[int, int]

A tuple specifying the number of rows and columns for the grid.

required titles Optional[List[str]]

A list of titles for each image. Defaults to None.

None size Tuple[int, int]

A tuple specifying the width and height of the entire plot in inches.

(12, 12) cmap str

the colormap to use for single channel images.

'gray'

Raises:

Type Description ValueError

If the number of images exceeds the grid size.

Examples:

import cv2\nimport supervision as sv\nfrom PIL import Image\n\nimage1 = cv2.imread(\"path/to/image1.jpg\")\nimage2 = Image.open(\"path/to/image2.jpg\")\nimage3 = cv2.imread(\"path/to/image3.jpg\")\n\nimages = [image1, image2, image3]\ntitles = [\"Image 1\", \"Image 2\", \"Image 3\"]\n\n%matplotlib inline\nplot_images_grid(images, grid_size=(2, 2), titles=titles, size=(16, 16))\n
Source code in supervision/utils/notebook.py
def plot_images_grid(\n    images: List[ImageType],\n    grid_size: Tuple[int, int],\n    titles: Optional[List[str]] = None,\n    size: Tuple[int, int] = (12, 12),\n    cmap: Optional[str] = \"gray\",\n) -> None:\n    \"\"\"\n    Plots images in a grid using matplotlib.\n\n    Args:\n       images (List[ImageType]): A list of images as ImageType\n             is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`.\n       grid_size (Tuple[int, int]): A tuple specifying the number\n            of rows and columns for the grid.\n       titles (Optional[List[str]]): A list of titles for each image.\n            Defaults to None.\n       size (Tuple[int, int]): A tuple specifying the width and\n            height of the entire plot in inches.\n       cmap (str): the colormap to use for single channel images.\n\n    Raises:\n       ValueError: If the number of images exceeds the grid size.\n\n    Examples:\n        ```python\n        import cv2\n        import supervision as sv\n        from PIL import Image\n\n        image1 = cv2.imread(\"path/to/image1.jpg\")\n        image2 = Image.open(\"path/to/image2.jpg\")\n        image3 = cv2.imread(\"path/to/image3.jpg\")\n\n        images = [image1, image2, image3]\n        titles = [\"Image 1\", \"Image 2\", \"Image 3\"]\n\n        %matplotlib inline\n        plot_images_grid(images, grid_size=(2, 2), titles=titles, size=(16, 16))\n        ```\n    \"\"\"\n    nrows, ncols = grid_size\n\n    for idx, img in enumerate(images):\n        if isinstance(img, Image.Image):\n            images[idx] = pillow_to_cv2(img)\n\n    if len(images) > nrows * ncols:\n        raise ValueError(\n            \"The number of images exceeds the grid size. Please increase the grid size\"\n            \" or reduce the number of images.\"\n        )\n\n    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=size)\n\n    for idx, ax in enumerate(axes.flat):\n        if idx < len(images):\n            if images[idx].ndim == 2:\n                ax.imshow(images[idx], cmap=cmap)\n            else:\n                ax.imshow(cv2.cvtColor(images[idx], cv2.COLOR_BGR2RGB))\n\n            if titles is not None and idx < len(titles):\n                ax.set_title(titles[idx])\n\n        ax.axis(\"off\")\n    plt.show()\n
"},{"location":"utils/video/","title":"Video Utils","text":"VideoInfo

A class to store video information, including width, height, fps and total number of frames.

Attributes:

Name Type Description width int

width of the video in pixels

height int

height of the video in pixels

fps int

frames per second of the video

total_frames int

total number of frames in the video, default is None

Examples:

import supervision as sv\n\nvideo_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_VIDEO_FILE>)\n\nvideo_info\n# VideoInfo(width=3840, height=2160, fps=25, total_frames=538)\n\nvideo_info.resolution_wh\n# (3840, 2160)\n
Source code in supervision/utils/video.py
@dataclass\nclass VideoInfo:\n    \"\"\"\n    A class to store video information, including width, height, fps and\n        total number of frames.\n\n    Attributes:\n        width (int): width of the video in pixels\n        height (int): height of the video in pixels\n        fps (int): frames per second of the video\n        total_frames (int, optional): total number of frames in the video,\n            default is None\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        video_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_VIDEO_FILE>)\n\n        video_info\n        # VideoInfo(width=3840, height=2160, fps=25, total_frames=538)\n\n        video_info.resolution_wh\n        # (3840, 2160)\n        ```\n    \"\"\"\n\n    width: int\n    height: int\n    fps: int\n    total_frames: Optional[int] = None\n\n    @classmethod\n    def from_video_path(cls, video_path: str) -> VideoInfo:\n        video = cv2.VideoCapture(video_path)\n        if not video.isOpened():\n            raise Exception(f\"Could not open video at {video_path}\")\n\n        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))\n        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))\n        fps = int(video.get(cv2.CAP_PROP_FPS))\n        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))\n        video.release()\n        return VideoInfo(width, height, fps, total_frames)\n\n    @property\n    def resolution_wh(self) -> Tuple[int, int]:\n        return self.width, self.height\n
VideoSink

Context manager that saves video frames to a file using OpenCV.

Attributes:

Name Type Description target_path str

The path to the output file where the video will be saved.

video_info VideoInfo

Information about the video resolution, fps, and total frame count.

codec str

FOURCC code for video format

Example
import supervision as sv\n\nvideo_info = sv.VideoInfo.from_video_path(<SOURCE_VIDEO_PATH>)\nframes_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\nwith sv.VideoSink(target_path=<TARGET_VIDEO_PATH>, video_info=video_info) as sink:\n    for frame in frames_generator:\n        sink.write_frame(frame=frame)\n
Source code in supervision/utils/video.py
class VideoSink:\n    \"\"\"\n    Context manager that saves video frames to a file using OpenCV.\n\n    Attributes:\n        target_path (str): The path to the output file where the video will be saved.\n        video_info (VideoInfo): Information about the video resolution, fps,\n            and total frame count.\n        codec (str): FOURCC code for video format\n\n    Example:\n        ```python\n        import supervision as sv\n\n        video_info = sv.VideoInfo.from_video_path(<SOURCE_VIDEO_PATH>)\n        frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)\n\n        with sv.VideoSink(target_path=<TARGET_VIDEO_PATH>, video_info=video_info) as sink:\n            for frame in frames_generator:\n                sink.write_frame(frame=frame)\n        ```\n    \"\"\"  # noqa: E501 // docs\n\n    def __init__(self, target_path: str, video_info: VideoInfo, codec: str = \"mp4v\"):\n        self.target_path = target_path\n        self.video_info = video_info\n        self.__codec = codec\n        self.__writer = None\n\n    def __enter__(self):\n        try:\n            self.__fourcc = cv2.VideoWriter_fourcc(*self.__codec)\n        except TypeError as e:\n            print(str(e) + \". Defaulting to mp4v...\")\n            self.__fourcc = cv2.VideoWriter_fourcc(*\"mp4v\")\n        self.__writer = cv2.VideoWriter(\n            self.target_path,\n            self.__fourcc,\n            self.video_info.fps,\n            self.video_info.resolution_wh,\n        )\n        return self\n\n    def write_frame(self, frame: np.ndarray):\n        \"\"\"\n        Writes a single video frame to the target video file.\n\n        Args:\n            frame (np.ndarray): The video frame to be written to the file. The frame\n                must be in BGR color format.\n        \"\"\"\n        self.__writer.write(frame)\n\n    def __exit__(self, exc_type, exc_value, exc_traceback):\n        self.__writer.release()\n
FPSMonitor

A class for monitoring frames per second (FPS) to benchmark latency.

Source code in supervision/utils/video.py
class FPSMonitor:\n    \"\"\"\n    A class for monitoring frames per second (FPS) to benchmark latency.\n    \"\"\"\n\n    def __init__(self, sample_size: int = 30):\n        \"\"\"\n        Args:\n            sample_size (int): The maximum number of observations for latency\n                benchmarking.\n\n        Examples:\n            ```python\n            import supervision as sv\n\n            frames_generator = sv.get_video_frames_generator(source_path=<SOURCE_FILE_PATH>)\n            fps_monitor = sv.FPSMonitor()\n\n            for frame in frames_generator:\n                # your processing code here\n                fps_monitor.tick()\n                fps = fps_monitor.fps\n            ```\n        \"\"\"  # noqa: E501 // docs\n        self.all_timestamps = deque(maxlen=sample_size)\n\n    @deprecated(\n        \"`FPSMonitor.__call__` is deprecated and will be removed in \"\n        \"`supervision-0.22.0`. Use `FPSMonitor.fps` instead.\"\n    )\n    def __call__(self) -> float:\n        \"\"\"\n        !!! failure \"Deprecated\"\n\n            `FPSMonitor.__call__` is deprecated and will be removed in\n            `supervision-0.22.0`. Use `FPSMonitor.fps` instead.\n\n        Computes and returns the average FPS based on the stored time stamps.\n\n        Returns:\n            float: The average FPS. Returns 0.0 if no time stamps are stored.\n        \"\"\"\n        return self.fps\n\n    @property\n    def fps(self) -> float:\n        \"\"\"\n        Computes and returns the average FPS based on the stored time stamps.\n\n        Returns:\n            float: The average FPS. Returns 0.0 if no time stamps are stored.\n        \"\"\"\n        if not self.all_timestamps:\n            return 0.0\n        taken_time = self.all_timestamps[-1] - self.all_timestamps[0]\n        return (len(self.all_timestamps)) / taken_time if taken_time != 0 else 0.0\n\n    def tick(self) -> None:\n        \"\"\"\n        Adds a new time stamp to the deque for FPS calculation.\n        \"\"\"\n        self.all_timestamps.append(time.monotonic())\n\n    def reset(self) -> None:\n        \"\"\"\n        Clears all the time stamps from the deque.\n        \"\"\"\n        self.all_timestamps.clear()\n
get_video_frames_generator

Get a generator that yields the frames of the video.

Parameters:

Name Type Description Default source_path str

The path of the video file.

required stride int

Indicates the interval at which frames are returned, skipping stride - 1 frames between each.

1 start int

Indicates the starting position from which video should generate frames

0 end Optional[int]

Indicates the ending position at which video should stop generating frames. If None, video will be read to the end.

None

Returns:

Type Description Generator[ndarray, None, None]

A generator that yields the frames of the video.

Examples:

import supervision as sv\n\nfor frame in sv.get_video_frames_generator(source_path=<SOURCE_VIDEO_PATH>):\n    ...\n
Source code in supervision/utils/video.py
def get_video_frames_generator(\n    source_path: str, stride: int = 1, start: int = 0, end: Optional[int] = None\n) -> Generator[np.ndarray, None, None]:\n    \"\"\"\n    Get a generator that yields the frames of the video.\n\n    Args:\n        source_path (str): The path of the video file.\n        stride (int): Indicates the interval at which frames are returned,\n            skipping stride - 1 frames between each.\n        start (int): Indicates the starting position from which\n            video should generate frames\n        end (Optional[int]): Indicates the ending position at which video\n            should stop generating frames. If None, video will be read to the end.\n\n    Returns:\n        (Generator[np.ndarray, None, None]): A generator that yields the\n            frames of the video.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        for frame in sv.get_video_frames_generator(source_path=<SOURCE_VIDEO_PATH>):\n            ...\n        ```\n    \"\"\"\n    video, start, end = _validate_and_setup_video(source_path, start, end)\n    frame_position = start\n    while True:\n        success, frame = video.read()\n        if not success or frame_position >= end:\n            break\n        yield frame\n        for _ in range(stride - 1):\n            success = video.grab()\n            if not success:\n                break\n        frame_position += stride\n    video.release()\n
process_video

Process a video file by applying a callback function on each frame and saving the result to a target video file.

Parameters:

Name Type Description Default source_path str

The path to the source video file.

required target_path str

The path to the target video file.

required callback Callable[[ndarray, int], ndarray]

A function that takes in a numpy ndarray representation of a video frame and an int index of the frame and returns a processed numpy ndarray representation of the frame.

required

Examples:

import supervision as sv\n\ndef callback(scene: np.ndarray, index: int) -> np.ndarray:\n    ...\n\nprocess_video(\n    source_path=<SOURCE_VIDEO_PATH>,\n    target_path=<TARGET_VIDEO_PATH>,\n    callback=callback\n)\n
Source code in supervision/utils/video.py
def process_video(\n    source_path: str,\n    target_path: str,\n    callback: Callable[[np.ndarray, int], np.ndarray],\n) -> None:\n    \"\"\"\n    Process a video file by applying a callback function on each frame\n        and saving the result to a target video file.\n\n    Args:\n        source_path (str): The path to the source video file.\n        target_path (str): The path to the target video file.\n        callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in\n            a numpy ndarray representation of a video frame and an\n            int index of the frame and returns a processed numpy ndarray\n            representation of the frame.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        def callback(scene: np.ndarray, index: int) -> np.ndarray:\n            ...\n\n        process_video(\n            source_path=<SOURCE_VIDEO_PATH>,\n            target_path=<TARGET_VIDEO_PATH>,\n            callback=callback\n        )\n        ```\n    \"\"\"\n    source_video_info = VideoInfo.from_video_path(video_path=source_path)\n    with VideoSink(target_path=target_path, video_info=source_video_info) as sink:\n        for index, frame in enumerate(\n            get_video_frames_generator(source_path=source_path)\n        ):\n            result_frame = callback(frame, index)\n            sink.write_frame(frame=result_frame)\n
"},{"location":"utils/video/#supervision.utils.video.VideoSink-functions","title":"Functions","text":""},{"location":"utils/video/#supervision.utils.video.VideoSink.write_frame","title":"write_frame(frame)","text":"

Writes a single video frame to the target video file.

Parameters:

Name Type Description Default frame ndarray

The video frame to be written to the file. The frame must be in BGR color format.

required Source code in supervision/utils/video.py
def write_frame(self, frame: np.ndarray):\n    \"\"\"\n    Writes a single video frame to the target video file.\n\n    Args:\n        frame (np.ndarray): The video frame to be written to the file. The frame\n            must be in BGR color format.\n    \"\"\"\n    self.__writer.write(frame)\n
"},{"location":"utils/video/#supervision.utils.video.FPSMonitor-attributes","title":"Attributes","text":""},{"location":"utils/video/#supervision.utils.video.FPSMonitor.fps","title":"fps: float property","text":"

Computes and returns the average FPS based on the stored time stamps.

Returns:

Name Type Description float float

The average FPS. Returns 0.0 if no time stamps are stored.

"},{"location":"utils/video/#supervision.utils.video.FPSMonitor-functions","title":"Functions","text":""},{"location":"utils/video/#supervision.utils.video.FPSMonitor.__call__","title":"__call__()","text":"

Deprecated

FPSMonitor.__call__ is deprecated and will be removed in supervision-0.22.0. Use FPSMonitor.fps instead.

Computes and returns the average FPS based on the stored time stamps.

Returns:

Name Type Description float float

The average FPS. Returns 0.0 if no time stamps are stored.

Source code in supervision/utils/video.py
@deprecated(\n    \"`FPSMonitor.__call__` is deprecated and will be removed in \"\n    \"`supervision-0.22.0`. Use `FPSMonitor.fps` instead.\"\n)\ndef __call__(self) -> float:\n    \"\"\"\n    !!! failure \"Deprecated\"\n\n        `FPSMonitor.__call__` is deprecated and will be removed in\n        `supervision-0.22.0`. Use `FPSMonitor.fps` instead.\n\n    Computes and returns the average FPS based on the stored time stamps.\n\n    Returns:\n        float: The average FPS. Returns 0.0 if no time stamps are stored.\n    \"\"\"\n    return self.fps\n
"},{"location":"utils/video/#supervision.utils.video.FPSMonitor.__init__","title":"__init__(sample_size=30)","text":"

Parameters:

Name Type Description Default sample_size int

The maximum number of observations for latency benchmarking.

30

Examples:

import supervision as sv\n\nframes_generator = sv.get_video_frames_generator(source_path=<SOURCE_FILE_PATH>)\nfps_monitor = sv.FPSMonitor()\n\nfor frame in frames_generator:\n    # your processing code here\n    fps_monitor.tick()\n    fps = fps_monitor.fps\n
Source code in supervision/utils/video.py
def __init__(self, sample_size: int = 30):\n    \"\"\"\n    Args:\n        sample_size (int): The maximum number of observations for latency\n            benchmarking.\n\n    Examples:\n        ```python\n        import supervision as sv\n\n        frames_generator = sv.get_video_frames_generator(source_path=<SOURCE_FILE_PATH>)\n        fps_monitor = sv.FPSMonitor()\n\n        for frame in frames_generator:\n            # your processing code here\n            fps_monitor.tick()\n            fps = fps_monitor.fps\n        ```\n    \"\"\"  # noqa: E501 // docs\n    self.all_timestamps = deque(maxlen=sample_size)\n
"},{"location":"utils/video/#supervision.utils.video.FPSMonitor.reset","title":"reset()","text":"

Clears all the time stamps from the deque.

Source code in supervision/utils/video.py
def reset(self) -> None:\n    \"\"\"\n    Clears all the time stamps from the deque.\n    \"\"\"\n    self.all_timestamps.clear()\n
"},{"location":"utils/video/#supervision.utils.video.FPSMonitor.tick","title":"tick()","text":"

Adds a new time stamp to the deque for FPS calculation.

Source code in supervision/utils/video.py
def tick(self) -> None:\n    \"\"\"\n    Adds a new time stamp to the deque for FPS calculation.\n    \"\"\"\n    self.all_timestamps.append(time.monotonic())\n
"}]} \ No newline at end of file diff --git a/develop/sitemap.xml b/develop/sitemap.xml index 17804785b..f0bf882b4 100644 --- a/develop/sitemap.xml +++ b/develop/sitemap.xml @@ -2,222 +2,222 @@ https://supervision.roboflow.com/develop/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/assets/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/changelog/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/code_of_conduct/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/contributing/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/cookbooks/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/deprecated/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/license/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/trackers/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/classification/core/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/datasets/core/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/datasets/utils/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/detection/annotators/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/detection/core/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/detection/double_detection_filter/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/detection/metrics/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/detection/utils/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/detection/tools/inference_slicer/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/detection/tools/line_zone/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/detection/tools/polygon_zone/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/detection/tools/save_detections/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/detection/tools/smoother/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/how_to/detect_and_annotate/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/how_to/detect_small_objects/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/how_to/filter_detections/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/how_to/save_detections/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/how_to/track_objects/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/keypoint/annotators/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/keypoint/core/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/notebooks/annotate-video-with-detections/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/notebooks/count-objects-crossing-the-line/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/notebooks/download-supervision-assets/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/notebooks/evaluating-alignment-of-text-to-image-diffusion-models/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/notebooks/object-tracking/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/notebooks/occupancy_analytics/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/notebooks/quickstart/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/notebooks/zero-shot-object-detection-with-yolo-world/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/utils/draw/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/utils/file/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/utils/geometry/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/utils/image/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/utils/iterables/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/utils/notebook/ - 2024-06-19 + 2024-06-20 daily https://supervision.roboflow.com/develop/utils/video/ - 2024-06-19 + 2024-06-20 daily \ No newline at end of file diff --git a/develop/sitemap.xml.gz b/develop/sitemap.xml.gz index ed042b606389d2cb1ae369aea90ecc0f850ac4ae..6d8c2d13ac32641558c69d5425fdd4b979023740 100644 GIT binary patch delta 630 zcmV-+0*U>G1%?F&ABzYG0C97X2OWQDR8?)2u(y7K_Kb`@fH%g&+Cy0O(|35}aj$w9 zq$pq+=QqyZ^E$nLbbfb*m>h=l{!4b)?;seoE;Q%;pWnaK_x)?}avGvfKs%ls(s|z! z^*{4`9LFp4-!`S4>wEU3rZ7@G%Kla7Tj<0!v^=`RN9!)W?l6aXk^bmiqZYIbg zR#{aQvzc-_-HIQ?{|EX{ObofjYu_VYTzqDSsMS zvGP0DKN0H0yORWfuIqYiB(S1|66?}nu!A8H#DSu#2IA8r0v9h8eas76Pv5~cHpDF%l?YLmJ$5xJxe zjpFd6<{U?qEf&Y%QCs!&juO*}eb@n4?FT*g43+k-2_4LTQLP9*ND(U6=?0W_bv-O5 zGBS8sCa!{0i=&m%8+~SCH)iN{FcarO+D{~-Y<%#)56)Z?=}4lmKOv$@E#ADG1%?F&ABzYGfFE*^2OWReRaJGXguV3>v}fel19)RRtUbW8pT5H*k9*a_ zAVmSoIKOfJp4aj1v-P_HVsaQx`{(Si-$5{FU8qm{KfiyeAN#lB>DWh~fOb4Nq|?43 z>VD?=IF4Cz1g-8;hOy3*Y4{=K+hBgkUiZb*j<0!v^=`S29!)W?l6aZacMyLEHxpzL z&scdJGlOkzVE~U^e$40Tk8kTlE!yJc@bXO^zN_a~p0bTSOf!uRHB=Ek3ag!;j`_pL zik08E{)td0-kl@>bY0hDErAs+lvtGpgY6B8APyAWxhFn7B5?6?=8pv7TIeDeO<)DU zdS@Uc5I{;%n+tn?32NnEPp*G*ECQax;*AnE*lN;CY>>vwPRz@23w%DINl(&Jha-JZHvY`bI)JB$2gcyH7~5?d?ac7gdVjlN6zHt*${?S5^ID zA|r#BW#TG0wK!TCz0qa|c4PWZ2QzUlr2RxP%EmkYNAJufk&Yw^`wJqf)Zo=y-97H= zfG70qiZOZ#4dJzPw^5H`U^x*5F|ak@+|ilD053jW4lS1?`s^WzA-?(%F~Cb|`TuE` QFV2vPzZ?;d-}xT^0D>wmLI3~&