From 299d352ba7bf946564ab820e08e1639d0a421bcb Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Tue, 26 May 2026 13:49:56 +0800
Subject: [PATCH 1/5] fix(inspect): suppress banner and spinner in JSON mode
 (#734)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Click 8.4 removed the mix_stderr parameter and always mixes stderr
into CliRunner.result.output. The banner and spinner added in #718
write to stderr via _stderr_console, which contaminates the JSON
payload in result.output — causing all 20 JSON-parsing e2e tests
to fail with JSONDecodeError.

Suppress the banner and spinner when --format json is requested,
matching the expectation that JSON consumers get clean stdout.
---
 src/winml/modelkit/commands/inspect.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/winml/modelkit/commands/inspect.py b/src/winml/modelkit/commands/inspect.py
index bbc5fb3c8..7406273ba 100644
--- a/src/winml/modelkit/commands/inspect.py
+++ b/src/winml/modelkit/commands/inspect.py
@@ -186,10 +186,13 @@ def inspect(
     # Print a banner BEFORE the heavy import chain / network calls so users
     # see immediate feedback instead of ~14 s of silence and assume the
     # command hung (see #543). Banner + spinner go to stderr so `--format
-    # json` consumers still get clean stdout. Suppressed in --quiet mode.
+    # json` consumers still get clean stdout. Suppressed in --quiet mode
+    # and in JSON mode (Click 8.4 mixes stderr into CliRunner.result.output,
+    # and JSON consumers expect clean stdout regardless).
     quiet = bool(ctx.obj and ctx.obj.get("quiet"))
+    json_mode = output_format.lower() == "json"
     target = model_id or model_type or model_class
-    if not quiet:
+    if not quiet and not json_mode:
         _stderr_console.print(f"[dim]Inspecting [bold]{target}[/bold] …[/dim]")
 
     from ..inspect import InspectError, ModelNotFoundError, NetworkError
@@ -204,7 +207,7 @@ def inspect(
         logging.getLogger("winml.modelkit").setLevel(logging.DEBUG)
 
     try:
-        if quiet:
+        if quiet or json_mode:
             result = _inspect_model_v2(
                 model_id=model_id,
                 task_override=task,

From 35671dbcb8e63ca10f4ba99af8d78fc33f3dcb50 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Tue, 26 May 2026 14:03:54 +0800
Subject: [PATCH 2/5] fix(inspect): use parent model_type for multimodal models
 in result

For multimodal models like CLIP, resolve_loader_config narrows the
hf_config to a sub-config (e.g. CLIPTextConfig with model_type
"clip_text_model"). The narrowed type is correct for internal
registry lookups (TasksManager, I/O specs), but the user-facing
InspectResult should show the parent model_type ("clip").

Use parent_hf_config.model_type for the display field while keeping
the narrowed model_type for all internal resolution paths.
---
 src/winml/modelkit/commands/inspect.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/winml/modelkit/commands/inspect.py b/src/winml/modelkit/commands/inspect.py
index 7406273ba..15eab5e94 100644
--- a/src/winml/modelkit/commands/inspect.py
+++ b/src/winml/modelkit/commands/inspect.py
@@ -482,9 +482,14 @@ def _inspect_model_v2(
         task=task,
     )
 
+    # Use the parent model_type for the user-facing result.  For multimodal
+    # models (CLIP, etc.) `loader_config.model_type` is the narrowed sub-config
+    # type (e.g. "clip_text_model"), but users expect the top-level type ("clip").
+    display_model_type = getattr(parent_hf_config, "model_type", model_type)
+
     return InspectResult(
-        model_id=model_id or model_type or model_class_override or "unknown",
-        model_type=model_type,
+        model_id=model_id or display_model_type or model_class_override or "unknown",
+        model_type=display_model_type,
         architectures=architectures,
         task=task,
         task_source=task_source,

From 2115554a144358c8934b35f5146afd9c5946bb87 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Tue, 26 May 2026 14:11:13 +0800
Subject: [PATCH 3/5] fix(inspect): handle --model-type override in
 display_model_type

The previous fix used only parent_hf_config.model_type, which works
when model_id is provided (step 1 loads the parent config). But for
--model-type clip (no model_id), parent_hf_config falls back to the
narrowed hf_config at line 340, so display_model_type was still
"clip_text_model".

Add model_type_override as the first precedence level: when the user
explicitly passes --model-type, use that value directly.

Precedence chain:
  1. model_type_override (user's --model-type flag)
  2. parent_hf_config.model_type (pre-narrowing, when model_id given)
  3. loader_config.model_type (narrowed fallback)
---
 src/winml/modelkit/commands/inspect.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/winml/modelkit/commands/inspect.py b/src/winml/modelkit/commands/inspect.py
index 15eab5e94..d52b9d33c 100644
--- a/src/winml/modelkit/commands/inspect.py
+++ b/src/winml/modelkit/commands/inspect.py
@@ -482,10 +482,16 @@ def _inspect_model_v2(
         task=task,
     )
 
-    # Use the parent model_type for the user-facing result.  For multimodal
+    # Use the top-level model_type for the user-facing result.  For multimodal
     # models (CLIP, etc.) `loader_config.model_type` is the narrowed sub-config
     # type (e.g. "clip_text_model"), but users expect the top-level type ("clip").
-    display_model_type = getattr(parent_hf_config, "model_type", model_type)
+    #
+    # Precedence:
+    #   1. model_type_override  — user explicitly passed --model-type
+    #   2. parent_hf_config     — pre-narrowing config (only when model_id was
+    #                             provided and AutoConfig succeeded in step 1)
+    #   3. model_type           — narrowed loader_config.model_type (fallback)
+    display_model_type = model_type_override or getattr(parent_hf_config, "model_type", model_type)
 
     return InspectResult(
         model_id=model_id or display_model_type or model_class_override or "unknown",

From 84644167a725bf24caf051d49d11ad84c8734de6 Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Tue, 26 May 2026 15:32:58 +0800
Subject: [PATCH 4/5] fix(inspect): use test-side result.stdout instead of
 production-side suppression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert the banner/spinner suppression in JSON mode — the original design
(PR #718) deliberately puts them on stderr so pipe-to-jq users still see
progress feedback.  The real issue is Click 8.4's result.output mixing
both streams; fix the tests to read result.stdout (which preserves the
split), matching the pattern already used in test_sys_e2e.py.
---
 src/winml/modelkit/commands/inspect.py |  9 +++------
 tests/e2e/test_inspect_e2e.py          | 13 ++++++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/winml/modelkit/commands/inspect.py b/src/winml/modelkit/commands/inspect.py
index d52b9d33c..19868a5a4 100644
--- a/src/winml/modelkit/commands/inspect.py
+++ b/src/winml/modelkit/commands/inspect.py
@@ -186,13 +186,10 @@ def inspect(
     # Print a banner BEFORE the heavy import chain / network calls so users
     # see immediate feedback instead of ~14 s of silence and assume the
     # command hung (see #543). Banner + spinner go to stderr so `--format
-    # json` consumers still get clean stdout. Suppressed in --quiet mode
-    # and in JSON mode (Click 8.4 mixes stderr into CliRunner.result.output,
-    # and JSON consumers expect clean stdout regardless).
+    # json` consumers still get clean stdout. Suppressed in --quiet mode.
     quiet = bool(ctx.obj and ctx.obj.get("quiet"))
-    json_mode = output_format.lower() == "json"
     target = model_id or model_type or model_class
-    if not quiet and not json_mode:
+    if not quiet:
         _stderr_console.print(f"[dim]Inspecting [bold]{target}[/bold] …[/dim]")
 
     from ..inspect import InspectError, ModelNotFoundError, NetworkError
@@ -207,7 +204,7 @@ def inspect(
         logging.getLogger("winml.modelkit").setLevel(logging.DEBUG)
 
     try:
-        if quiet or json_mode:
+        if quiet:
             result = _inspect_model_v2(
                 model_id=model_id,
                 task_override=task,
diff --git a/tests/e2e/test_inspect_e2e.py b/tests/e2e/test_inspect_e2e.py
index 1531440a3..239e1d2af 100644
--- a/tests/e2e/test_inspect_e2e.py
+++ b/tests/e2e/test_inspect_e2e.py
@@ -59,25 +59,28 @@
 def _run_json(*args: str) -> dict:
     """Invoke inspect with *args + '-f json' and return parsed JSON.
 
-    Asserts exit_code == 0 and that the output is valid JSON.
+    Reads ``result.stdout`` (not ``result.output``) so the banner and
+    spinner emitted on stderr do not corrupt the JSON payload — Click 8.4's
+    ``result.output`` aggregates both streams.
     """
     result = _run(*args, "-f", "json")
     assert result.exit_code == 0, f"inspect exited {result.exit_code}:\n{result.output}"
-    return json.loads(result.output)
+    return json.loads(result.stdout)
 
 
 def _run_network(model: str, task: str | None = None) -> dict:
     """Invoke inspect with a real model ID and return parsed JSON output.
 
-    Raises AssertionError when the command exits non-zero or the
-    output is not valid JSON.
+    Reads ``result.stdout`` (not ``result.output``) so the banner and
+    spinner emitted on stderr do not corrupt the JSON payload — Click 8.4's
+    ``result.output`` aggregates both streams.
     """
     args: list[str] = ["-m", model, "-f", "json"]
     if task:
         args.extend(["-t", task])
     result = CliRunner().invoke(inspect, args, obj={}, catch_exceptions=False)
     assert result.exit_code == 0, f"inspect failed (exit {result.exit_code}):\n{result.output}"
-    return json.loads(result.output)
+    return json.loads(result.stdout)
 
 
 def _assert_common_structure(data: dict, model_id: str, expected_task: str) -> None:

From 2c22577516a6dba7dd6980ff172bfee75f5c708d Mon Sep 17 00:00:00 2001
From: Qiong Wu <qiowu@microsoft.com>
Date: Tue, 26 May 2026 15:42:48 +0800
Subject: [PATCH 5/5] Revert "fix(inspect): use test-side result.stdout instead
 of production-side suppression"

This reverts commit 84644167a725bf24caf051d49d11ad84c8734de6.
---
 src/winml/modelkit/commands/inspect.py |  9 ++++++---
 tests/e2e/test_inspect_e2e.py          | 13 +++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/winml/modelkit/commands/inspect.py b/src/winml/modelkit/commands/inspect.py
index 19868a5a4..d52b9d33c 100644
--- a/src/winml/modelkit/commands/inspect.py
+++ b/src/winml/modelkit/commands/inspect.py
@@ -186,10 +186,13 @@ def inspect(
     # Print a banner BEFORE the heavy import chain / network calls so users
     # see immediate feedback instead of ~14 s of silence and assume the
     # command hung (see #543). Banner + spinner go to stderr so `--format
-    # json` consumers still get clean stdout. Suppressed in --quiet mode.
+    # json` consumers still get clean stdout. Suppressed in --quiet mode
+    # and in JSON mode (Click 8.4 mixes stderr into CliRunner.result.output,
+    # and JSON consumers expect clean stdout regardless).
     quiet = bool(ctx.obj and ctx.obj.get("quiet"))
+    json_mode = output_format.lower() == "json"
     target = model_id or model_type or model_class
-    if not quiet:
+    if not quiet and not json_mode:
         _stderr_console.print(f"[dim]Inspecting [bold]{target}[/bold] …[/dim]")
 
     from ..inspect import InspectError, ModelNotFoundError, NetworkError
@@ -204,7 +207,7 @@ def inspect(
         logging.getLogger("winml.modelkit").setLevel(logging.DEBUG)
 
     try:
-        if quiet:
+        if quiet or json_mode:
             result = _inspect_model_v2(
                 model_id=model_id,
                 task_override=task,
diff --git a/tests/e2e/test_inspect_e2e.py b/tests/e2e/test_inspect_e2e.py
index 239e1d2af..1531440a3 100644
--- a/tests/e2e/test_inspect_e2e.py
+++ b/tests/e2e/test_inspect_e2e.py
@@ -59,28 +59,25 @@
 def _run_json(*args: str) -> dict:
     """Invoke inspect with *args + '-f json' and return parsed JSON.
 
-    Reads ``result.stdout`` (not ``result.output``) so the banner and
-    spinner emitted on stderr do not corrupt the JSON payload — Click 8.4's
-    ``result.output`` aggregates both streams.
+    Asserts exit_code == 0 and that the output is valid JSON.
     """
     result = _run(*args, "-f", "json")
     assert result.exit_code == 0, f"inspect exited {result.exit_code}:\n{result.output}"
-    return json.loads(result.stdout)
+    return json.loads(result.output)
 
 
 def _run_network(model: str, task: str | None = None) -> dict:
     """Invoke inspect with a real model ID and return parsed JSON output.
 
-    Reads ``result.stdout`` (not ``result.output``) so the banner and
-    spinner emitted on stderr do not corrupt the JSON payload — Click 8.4's
-    ``result.output`` aggregates both streams.
+    Raises AssertionError when the command exits non-zero or the
+    output is not valid JSON.
     """
     args: list[str] = ["-m", model, "-f", "json"]
     if task:
         args.extend(["-t", task])
     result = CliRunner().invoke(inspect, args, obj={}, catch_exceptions=False)
     assert result.exit_code == 0, f"inspect failed (exit {result.exit_code}):\n{result.output}"
-    return json.loads(result.stdout)
+    return json.loads(result.output)
 
 
 def _assert_common_structure(data: dict, model_id: str, expected_task: str) -> None: