From 2edcbdc4872f83abcb7992f350afe338c9325958 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 21:27:55 +0100
Subject: [PATCH 01/12] initial commit with `from_qwen_3_vl` added,
 `from_qwen_2_5_vl` improved

---
 supervision/detection/core.py |   1 +
 supervision/detection/vlm.py  | 137 ++++++++++++++++++++++++++++------
 test/detection/test_vlm.py    |  45 ++++++++---
 3 files changed, 151 insertions(+), 32 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index bda2e7de3..66610b998 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1211,6 +1211,7 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
         | PaliGemma           | `PALIGEMMA`          | detection               | `resolution_wh`             | `classes`           |
         | PaliGemma 2         | `PALIGEMMA`          | detection               | `resolution_wh`             | `classes`           |
         | Qwen2.5-VL          | `QWEN_2_5_VL`        | detection               | `resolution_wh`, `input_wh` | `classes`           |
+        | Qwen3-VL            | `QWEN_3_VL`          | detection               | `resolution_wh`,            | `classes`           |
         | Google Gemini 2.0   | `GOOGLE_GEMINI_2_0`  | detection               | `resolution_wh`             | `classes`           |
         | Google Gemini 2.5   | `GOOGLE_GEMINI_2_5`  | detection, segmentation | `resolution_wh`             | `classes`           |
         | Moondream           | `MOONDREAM`          | detection               | `resolution_wh`             |                     |
diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 2f9b60ddb..9892e40de 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import ast
 import base64
 import io
 import json
@@ -27,7 +28,8 @@ class LMM(Enum):
     Attributes:
         PALIGEMMA: Google's PaliGemma vision-language model.
         FLORENCE_2: Microsoft's Florence-2 vision-language model.
-        QWEN_2_5_VL: Qwen2.5-VL open vision-language model from Alibaba.
+        QWEN_2_5_VL: Qwen2.5-VL open vision-language model from Alibaba.\
+        QWEN_3_VL: Qwen3-VL open vision-language model from Alibaba.
         GOOGLE_GEMINI_2_0: Google Gemini 2.0 vision-language model.
         GOOGLE_GEMINI_2_5: Google Gemini 2.5 vision-language model.
         MOONDREAM: The Moondream vision-language model.
@@ -36,6 +38,7 @@ class LMM(Enum):
     PALIGEMMA = "paligemma"
     FLORENCE_2 = "florence_2"
     QWEN_2_5_VL = "qwen_2_5_vl"
+    QWEN_3_VL = "qwen_3_vl"
     DEEPSEEK_VL_2 = "deepseek_vl_2"
     GOOGLE_GEMINI_2_0 = "gemini_2_0"
     GOOGLE_GEMINI_2_5 = "gemini_2_5"
@@ -69,6 +72,7 @@ class VLM(Enum):
         PALIGEMMA: Google's PaliGemma vision-language model.
         FLORENCE_2: Microsoft's Florence-2 vision-language model.
         QWEN_2_5_VL: Qwen2.5-VL open vision-language model from Alibaba.
+        QWEN_3_VL: Qwen3-VL open vision-language model from Alibaba.
         GOOGLE_GEMINI_2_0: Google Gemini 2.0 vision-language model.
         GOOGLE_GEMINI_2_5: Google Gemini 2.5 vision-language model.
         MOONDREAM: The Moondream vision-language model.
@@ -77,6 +81,7 @@ class VLM(Enum):
     PALIGEMMA = "paligemma"
     FLORENCE_2 = "florence_2"
     QWEN_2_5_VL = "qwen_2_5_vl"
+    QWEN_3_VL = "qwen_3_vl"
     DEEPSEEK_VL_2 = "deepseek_vl_2"
     GOOGLE_GEMINI_2_0 = "gemini_2_0"
     GOOGLE_GEMINI_2_5 = "gemini_2_5"
@@ -106,6 +111,7 @@ def from_value(cls, value: VLM | str) -> VLM:
     VLM.PALIGEMMA: str,
     VLM.FLORENCE_2: dict,
     VLM.QWEN_2_5_VL: str,
+    VLM.QWEN_3_VL: str,
     VLM.DEEPSEEK_VL_2: str,
     VLM.GOOGLE_GEMINI_2_0: str,
     VLM.GOOGLE_GEMINI_2_5: str,
@@ -116,6 +122,7 @@ def from_value(cls, value: VLM | str) -> VLM:
     VLM.PALIGEMMA: ["resolution_wh"],
     VLM.FLORENCE_2: ["resolution_wh"],
     VLM.QWEN_2_5_VL: ["input_wh", "resolution_wh"],
+    VLM.QWEN_3_VL: ["resolution_wh"],
     VLM.DEEPSEEK_VL_2: ["resolution_wh"],
     VLM.GOOGLE_GEMINI_2_0: ["resolution_wh"],
     VLM.GOOGLE_GEMINI_2_5: ["resolution_wh"],
@@ -126,6 +133,7 @@ def from_value(cls, value: VLM | str) -> VLM:
     VLM.PALIGEMMA: ["resolution_wh", "classes"],
     VLM.FLORENCE_2: ["resolution_wh"],
     VLM.QWEN_2_5_VL: ["input_wh", "resolution_wh", "classes"],
+    VLM.QWEN_3_VL: ["resolution_wh", "classes"],
     VLM.DEEPSEEK_VL_2: ["resolution_wh", "classes"],
     VLM.GOOGLE_GEMINI_2_0: ["resolution_wh", "classes"],
     VLM.GOOGLE_GEMINI_2_5: ["resolution_wh", "classes"],
@@ -235,6 +243,51 @@ def from_paligemma(
     return xyxy, class_id, class_name
 
 
+def recover_truncated_qwen_2_5_vl_response(text: str) -> Any | None:
+    """
+    Attempt to recover and parse a truncated or malformed JSON snippet from Qwen-2.5-VL
+    output.
+
+    This utility extracts a JSON-like portion from a string that may be truncated or
+    malformed, cleans trailing commas, and attempts to parse it into a Python object.
+
+    Args:
+        text (str): Raw text containing the JSON snippet possibly truncated or
+            incomplete.
+
+    Returns:
+        Parsed Python object (usually list) if recovery and parsing succeed;
+            otherwise `None`.
+    """
+    try:
+        first_bracket = text.find("[")
+        if first_bracket == -1:
+            return None
+        snippet = text[first_bracket:]
+
+        last_brace = snippet.rfind("}")
+        if last_brace == -1:
+            return None
+
+        snippet = snippet[: last_brace + 1]
+
+        prefix_end = snippet.find("[")
+        if prefix_end == -1:
+            return None
+
+        prefix = snippet[: prefix_end + 1]
+        body = snippet[prefix_end + 1 :].rstrip()
+
+        if body.endswith(","):
+            body = body[:-1].rstrip()
+
+        repaired = prefix + body + "]"
+
+        return json.loads(repaired)
+    except Exception:
+        return None
+
+
 def from_qwen_2_5_vl(
     result: str,
     input_wh: tuple[int, int],
@@ -242,7 +295,7 @@ def from_qwen_2_5_vl(
     classes: list[str] | None = None,
 ) -> tuple[np.ndarray, np.ndarray | None, np.ndarray]:
     """
-    Parse and scale bounding boxes from Qwen-2.5-VL style JSON output.
+    Parse and rescale bounding boxes and class labels from Qwen-2.5-VL JSON output.
 
     The JSON is expected to be enclosed in triple backticks with the format:
       ```json
@@ -253,37 +306,47 @@ def from_qwen_2_5_vl(
       ```
 
     Args:
-        result: String containing the JSON snippet enclosed by triple backticks.
-        input_wh: (input_width, input_height) describing the original bounding box
-            scale.
-        resolution_wh: (output_width, output_height) to which we rescale the boxes.
-        classes: Optional list of valid class names. If provided, returned boxes/labels
-            are filtered to only those classes found here.
+        result (str): String containing Qwen-2.5-VL JSON bounding box and label data.
+        input_wh (tuple[int, int]): Width and height of the coordinate space where boxes
+            are normalized.
+        resolution_wh (tuple[int, int]): Target width and height to scale bounding
+            boxes.
+        classes (list[str] or None): Optional list of valid class names to filter
+            results. If provided, only boxes with labels in this list are returned.
 
     Returns:
-        xyxy (np.ndarray): An array of shape `(n, 4)` containing
-            the bounding boxes coordinates in format `[x1, y1, x2, y2]`
-        class_id (Optional[np.ndarray]): An array of shape `(n,)` containing
-            the class indices for each bounding box (or None if `classes` is not
-            provided)
-        class_name (np.ndarray): An array of shape `(n,)` containing
-            the class labels for each bounding box
+        xyxy (np.ndarray): Array of shape `(N, 4)` with rescaled bounding boxes in
+            `(x_min, y_min, x_max, y_max)` format.
+        class_id (np.ndarray or None): Array of shape `(N,)` with indices of classes,
+            or `None` if no filtering applied.
+        class_name (np.ndarray): Array of shape `(N,)` with class names as strings.
     """
 
     in_w, in_h = validate_resolution(input_wh)
     out_w, out_h = validate_resolution(resolution_wh)
 
-    pattern = re.compile(r"```json\s*(.*?)\s*```", re.DOTALL)
-
-    match = pattern.search(result)
-    if not match:
-        return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+    text = result.strip()
+    text = re.sub(r"^```(json)?", "", text, flags=re.IGNORECASE).strip()
+    text = re.sub(r"```$", "", text).strip()
 
-    json_snippet = match.group(1)
+    start = text.find("[")
+    end = text.rfind("]")
+    if start != -1 and end != -1 and end > start:
+        text = text[start: end + 1].strip()
 
     try:
-        data = json.loads(json_snippet)
+        data = json.loads(text)
     except json.JSONDecodeError:
+        repaired = recover_truncated_qwen_2_5_vl_response(text)
+        if repaired is not None:
+            data = repaired
+        else:
+            try:
+                data = ast.literal_eval(text)
+            except (ValueError, SyntaxError, TypeError):
+                return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+
+    if not isinstance(data, list):
         return np.empty((0, 4)), None, np.empty((0,), dtype=str)
 
     boxes_list = []
@@ -315,6 +378,36 @@ def from_qwen_2_5_vl(
     return xyxy, class_id, class_name
 
 
+def from_qwen_3_vl(
+    result: str,
+    resolution_wh: tuple[int, int],
+    classes: list[str] | None = None,
+) -> tuple[np.ndarray, np.ndarray | None, np.ndarray]:
+    """
+    Parse and scale bounding boxes from Qwen-3-VL style JSON output.
+
+    Args:
+        result (str): String containing the Qwen-3-VL JSON output.
+        resolution_wh (tuple[int, int]): Target resolution `(width, height)` to
+            scale bounding boxes.
+        classes (list[str] or None): Optional list of valid classes to filter
+            results.
+
+    Returns:
+        xyxy (np.ndarray): Array of bounding boxes with shape `(N, 4)` in
+            `(x_min, y_min, x_max, y_max)` format scaled to `resolution_wh`.
+        class_id (np.ndarray or None): Array of class indices for each box, or
+            None if no filtering by classes.
+        class_name (np.ndarray): Array of class names as strings.
+    """
+    return from_qwen_2_5_vl(
+        result=result,
+        input_wh=(1000, 1000),
+        resolution_wh=resolution_wh,
+        classes=classes
+    )
+
+
 def from_deepseek_vl_2(
     result: str, resolution_wh: tuple[int, int], classes: list[str] | None = None
 ) -> tuple[np.ndarray, np.ndarray | None, np.ndarray]:
diff --git a/test/detection/test_vlm.py b/test/detection/test_vlm.py
index 8a8240e98..e93b66f3d 100644
--- a/test/detection/test_vlm.py
+++ b/test/detection/test_vlm.py
@@ -320,18 +320,43 @@ def test_from_paligemma(
                 np.array(["dog"], dtype=str),
             ),
         ),  # out-of-bounds box
+(
+            does_not_raise(),
+            """[
+                {'bbox_2d': [10, 20, 110, 120], 'label': 'cat'}
+            ]""",
+            (640, 640),
+            (1280, 720),
+            None,
+            (
+                np.array([[20.0, 22.5, 220.0, 135.0]]),
+                None,
+                np.array(["cat"], dtype=str),
+            ),
+        ),  # python-style list, single quotes, no fences
         (
-            pytest.raises(ValueError),
+            does_not_raise(),
             """```json
             [
-                {"bbox_2d": [10, 20, 110, 120], "label": "cat"}
-            ]
-            ```""",
-            (0, 640),
-            (1280, 720),
+                {"bbox_2d": [0, 0, 64, 64], "label": "dog"},
+                {"bbox_2d": [10, 20, 110, 120], "label": "cat"},
+                {"bbox_2d": [30, 40, 130, 140], "label":
+            """,
+            (640, 640),
+            (640, 640),
             None,
-            None,  # won't be compared because we expect an exception
-        ),  # zero input width -> ValueError
+            (
+                    np.array(
+                        [
+                            [0.0, 0.0, 64.0, 64.0],
+                            [10.0, 20.0, 110.0, 120.0],
+                        ],
+                        dtype=float,
+                    ),
+                    None,
+                    np.array(["dog", "cat"], dtype=str),
+            ),
+        ),  # truncated response, last object unfinished, previous ones recovered
         (
             pytest.raises(ValueError),
             """```json
@@ -342,8 +367,8 @@ def test_from_paligemma(
             (640, 640),
             (1280, -100),
             None,
-            None,
-        ),  # negative resolution height -> ValueError
+            None,  # invalid resolution_wh
+        ),
     ],
 )
 def test_from_qwen_2_5_vl(

From e68c908077bb8bf901478bc087458fd218abb69f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 15 Nov 2025 20:29:32 +0000
Subject: [PATCH 02/12] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/vlm.py |  4 ++--
 test/detection/test_vlm.py   | 20 ++++++++++----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 9892e40de..371827668 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -332,7 +332,7 @@ def from_qwen_2_5_vl(
     start = text.find("[")
     end = text.rfind("]")
     if start != -1 and end != -1 and end > start:
-        text = text[start: end + 1].strip()
+        text = text[start : end + 1].strip()
 
     try:
         data = json.loads(text)
@@ -404,7 +404,7 @@ def from_qwen_3_vl(
         result=result,
         input_wh=(1000, 1000),
         resolution_wh=resolution_wh,
-        classes=classes
+        classes=classes,
     )
 
 
diff --git a/test/detection/test_vlm.py b/test/detection/test_vlm.py
index e93b66f3d..7bfc23131 100644
--- a/test/detection/test_vlm.py
+++ b/test/detection/test_vlm.py
@@ -320,7 +320,7 @@ def test_from_paligemma(
                 np.array(["dog"], dtype=str),
             ),
         ),  # out-of-bounds box
-(
+        (
             does_not_raise(),
             """[
                 {'bbox_2d': [10, 20, 110, 120], 'label': 'cat'}
@@ -346,15 +346,15 @@ def test_from_paligemma(
             (640, 640),
             None,
             (
-                    np.array(
-                        [
-                            [0.0, 0.0, 64.0, 64.0],
-                            [10.0, 20.0, 110.0, 120.0],
-                        ],
-                        dtype=float,
-                    ),
-                    None,
-                    np.array(["dog", "cat"], dtype=str),
+                np.array(
+                    [
+                        [0.0, 0.0, 64.0, 64.0],
+                        [10.0, 20.0, 110.0, 120.0],
+                    ],
+                    dtype=float,
+                ),
+                None,
+                np.array(["dog", "cat"], dtype=str),
             ),
         ),  # truncated response, last object unfinished, previous ones recovered
         (

From 7301156e4613039415e99a5415dbffe842a20441 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 21:36:54 +0100
Subject: [PATCH 03/12] plug Qwen3-VL into `sv.Detections.from_vlm`

---
 supervision/detection/core.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 66610b998..903134f82 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1559,6 +1559,11 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
             data = {CLASS_NAME_DATA_FIELD: class_name}
             return cls(xyxy=xyxy, class_id=class_id, data=data)
 
+        if vlm == VLM.QWEN_3_VL:
+            xyxy, class_id, class_name = from_qwen_2_5_vl(result, **kwargs)
+            data = {CLASS_NAME_DATA_FIELD: class_name}
+            return cls(xyxy=xyxy, class_id=class_id, data=data)
+
         if vlm == VLM.DEEPSEEK_VL_2:
             xyxy, class_id, class_name = from_deepseek_vl_2(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}

From 9ab9650763205bac575bee3ecdfe3d4ff5231a11 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 21:39:23 +0100
Subject: [PATCH 04/12] plug Qwen3-VL into `sv.Detections.from_vlm`

---
 supervision/detection/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 903134f82..464855a3d 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -47,7 +47,7 @@
     from_moondream,
     from_paligemma,
     from_qwen_2_5_vl,
-    validate_vlm_parameters,
+    validate_vlm_parameters, from_qwen_3_vl,
 )
 from supervision.geometry.core import Position
 from supervision.utils.internal import deprecated, get_instance_variables
@@ -1560,7 +1560,7 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
             return cls(xyxy=xyxy, class_id=class_id, data=data)
 
         if vlm == VLM.QWEN_3_VL:
-            xyxy, class_id, class_name = from_qwen_2_5_vl(result, **kwargs)
+            xyxy, class_id, class_name = from_qwen_3_vl(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}
             return cls(xyxy=xyxy, class_id=class_id, data=data)
 

From 31db2ded112634d4661099f8da707a06f064c4c8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 15 Nov 2025 20:40:01 +0000
Subject: [PATCH 05/12] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 464855a3d..1fe602ea1 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -47,7 +47,8 @@
     from_moondream,
     from_paligemma,
     from_qwen_2_5_vl,
-    validate_vlm_parameters, from_qwen_3_vl,
+    from_qwen_3_vl,
+    validate_vlm_parameters,
 )
 from supervision.geometry.core import Position
 from supervision.utils.internal import deprecated, get_instance_variables

From 0422a66eaa0c81c73eb766058b0230041ae42fc1 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 22:05:46 +0100
Subject: [PATCH 06/12] plug Qwen3-VL into `sv.Detections.from_vlm`

---
 supervision/detection/core.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 464855a3d..2f350be1f 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1557,12 +1557,14 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
         if vlm == VLM.QWEN_2_5_VL:
             xyxy, class_id, class_name = from_qwen_2_5_vl(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}
-            return cls(xyxy=xyxy, class_id=class_id, data=data)
+            confidence = np.ones(len(class_id), dtype=float)
+            return cls(xyxy=xyxy, class_id=class_id, confidence=confidence, data=data)
 
         if vlm == VLM.QWEN_3_VL:
             xyxy, class_id, class_name = from_qwen_3_vl(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}
-            return cls(xyxy=xyxy, class_id=class_id, data=data)
+            confidence = np.ones(len(class_id), dtype=float)
+            return cls(xyxy=xyxy, class_id=class_id, confidence=confidence, data=data)
 
         if vlm == VLM.DEEPSEEK_VL_2:
             xyxy, class_id, class_name = from_deepseek_vl_2(result, **kwargs)

From e29e3832fb3b18ce38190bc3a34466a4a66afb59 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 22:19:28 +0100
Subject: [PATCH 07/12] plug Qwen3-VL into `sv.Detections.from_vlm`

---
 supervision/detection/vlm.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 371827668..388820f5b 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -344,10 +344,19 @@ def from_qwen_2_5_vl(
             try:
                 data = ast.literal_eval(text)
             except (ValueError, SyntaxError, TypeError):
-                return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+                return (
+                    np.empty((0, 4)),
+                    np.empty((0,), dtype=int),
+                    np.empty((0,), dtype=str)
+                )
 
     if not isinstance(data, list):
-        return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+        return (
+            np.empty((0, 4)),
+            np.empty((0,), dtype=int),
+            np.empty((0,), dtype=str)
+        )
+
 
     boxes_list = []
     labels_list = []
@@ -359,7 +368,12 @@ def from_qwen_2_5_vl(
         labels_list.append(item["label"])
 
     if not boxes_list:
-        return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+        return (
+            np.empty((0, 4)),
+            np.empty((0,), dtype=int),
+            np.empty((0,), dtype=str)
+        )
+
 
     xyxy = np.array(boxes_list, dtype=float)
     class_name = np.array(labels_list, dtype=str)

From 9bd8f71f26dfba13f80f32b10b4ae40da728aef5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 15 Nov 2025 21:19:51 +0000
Subject: [PATCH 08/12] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/vlm.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 388820f5b..97988c9f0 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -347,16 +347,11 @@ def from_qwen_2_5_vl(
                 return (
                     np.empty((0, 4)),
                     np.empty((0,), dtype=int),
-                    np.empty((0,), dtype=str)
+                    np.empty((0,), dtype=str),
                 )
 
     if not isinstance(data, list):
-        return (
-            np.empty((0, 4)),
-            np.empty((0,), dtype=int),
-            np.empty((0,), dtype=str)
-        )
-
+        return (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty((0,), dtype=str))
 
     boxes_list = []
     labels_list = []
@@ -368,12 +363,7 @@ def from_qwen_2_5_vl(
         labels_list.append(item["label"])
 
     if not boxes_list:
-        return (
-            np.empty((0, 4)),
-            np.empty((0,), dtype=int),
-            np.empty((0,), dtype=str)
-        )
-
+        return (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty((0,), dtype=str))
 
     xyxy = np.array(boxes_list, dtype=float)
     class_name = np.array(labels_list, dtype=str)

From c35a35cefdf8b586bbe325d16d63403ebbb0d6f2 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 22:27:11 +0100
Subject: [PATCH 09/12] plug Qwen3-VL into `sv.Detections.from_vlm`

---
 supervision/detection/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index e67f906f5..ef88fbfec 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1558,13 +1558,13 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
         if vlm == VLM.QWEN_2_5_VL:
             xyxy, class_id, class_name = from_qwen_2_5_vl(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}
-            confidence = np.ones(len(class_id), dtype=float)
+            confidence = np.ones(len(xyxy), dtype=float)
             return cls(xyxy=xyxy, class_id=class_id, confidence=confidence, data=data)
 
         if vlm == VLM.QWEN_3_VL:
             xyxy, class_id, class_name = from_qwen_3_vl(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}
-            confidence = np.ones(len(class_id), dtype=float)
+            confidence = np.ones(len(xyxy), dtype=float)
             return cls(xyxy=xyxy, class_id=class_id, confidence=confidence, data=data)
 
         if vlm == VLM.DEEPSEEK_VL_2:

From 5af13c0f8c71442acb90a053b65249810ca1de4c Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 22:48:20 +0100
Subject: [PATCH 10/12] add Qwen3-VL prompting example

---
 supervision/detection/core.py | 60 +++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index ef88fbfec..153dbe47a 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -951,6 +951,36 @@ def from_lmm(cls, lmm: LMM | str, result: str | dict, **kwargs: Any) -> Detectio
             detections.class_id
             # array([0, 1])
             ```
+            
+        !!! example "Qwen3-VL"
+
+            ```python
+            import supervision as sv
+
+            qwen_3_vl_result = \"\"\"```json
+            [
+                {"bbox_2d": [139, 768, 315, 954], "label": "cat"},
+                {"bbox_2d": [366, 679, 536, 849], "label": "dog"}
+            ]
+            ```\"\"\"
+            detections = sv.Detections.from_lmm(
+                sv.LMM.QWEN_3_VL,
+                qwen_3_vl_result,
+                resolution_wh=(1000, 1000),
+                classes=['cat', 'dog'],
+            )
+            detections.xyxy
+            # array([[139., 768., 315., 954.], [366., 679., 536., 849.]])
+
+            detections.class_id
+            # array([0, 1])
+
+            detections.data
+            # {'class_name': array(['cat', 'dog'], dtype='<U10')}
+
+            detections.class_id
+            # array([0, 1])
+            ```
 
         !!! example "Gemini 2.0"
             ```python
@@ -1329,6 +1359,36 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
             detections.class_id
             # array([0, 1])
             ```
+            
+        !!! example "Qwen3-VL"
+
+            ```python
+            import supervision as sv
+
+            qwen_3_vl_result = \"\"\"```json
+            [
+                {"bbox_2d": [139, 768, 315, 954], "label": "cat"},
+                {"bbox_2d": [366, 679, 536, 849], "label": "dog"}
+            ]
+            ```\"\"\"
+            detections = sv.Detections.from_vlm(
+                sv.VLM.QWEN_3_VL,
+                qwen_3_vl_result,
+                resolution_wh=(1000, 1000),
+                classes=['cat', 'dog'],
+            )
+            detections.xyxy
+            # array([[139., 768., 315., 954.], [366., 679., 536., 849.]])
+
+            detections.class_id
+            # array([0, 1])
+
+            detections.data
+            # {'class_name': array(['cat', 'dog'], dtype='<U10')}
+
+            detections.class_id
+            # array([0, 1])
+            ```
 
         !!! example "Gemini 2.0"
             ```python

From 52c772d77891410e332a7157c986009b11a20bc7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 15 Nov 2025 21:48:42 +0000
Subject: [PATCH 11/12] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 153dbe47a..e5e298bbb 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -951,7 +951,7 @@ def from_lmm(cls, lmm: LMM | str, result: str | dict, **kwargs: Any) -> Detectio
             detections.class_id
             # array([0, 1])
             ```
-            
+
         !!! example "Qwen3-VL"
 
             ```python
@@ -1359,7 +1359,7 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
             detections.class_id
             # array([0, 1])
             ```
-            
+
         !!! example "Qwen3-VL"
 
             ```python

From a99015ac2da717477346fd825ba11cbed934b11f Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 22:51:22 +0100
Subject: [PATCH 12/12] more Qwen2.5-VL tests

---
 test/detection/test_vlm.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/test/detection/test_vlm.py b/test/detection/test_vlm.py
index 7bfc23131..9a0195f78 100644
--- a/test/detection/test_vlm.py
+++ b/test/detection/test_vlm.py
@@ -357,6 +357,18 @@ def test_from_paligemma(
                 np.array(["dog", "cat"], dtype=str),
             ),
         ),  # truncated response, last object unfinished, previous ones recovered
+        (
+            pytest.raises(ValueError),
+            """```json
+            [
+                {"bbox_2d": [10, 20, 110, 120], "label": "cat"}
+            ]
+            ```""",
+            (0, 640),
+            (1280, 720),
+            None,
+            None,  # invalid input_wh
+        ),
         (
             pytest.raises(ValueError),
             """```json