From 0de8b2b8d1251ca28d0e74fc5ad14d8b36eeabd5 Mon Sep 17 00:00:00 2001 From: Zhipeng Wang Date: Tue, 12 May 2026 10:18:25 +0800 Subject: [PATCH 1/3] refactor: rename ModelKit -> WinML CLI across user-visible surface Product name "ModelKit" / "WinML ModelKit" -> "WinML CLI" everywhere users see the name: - CLI help text, module docstrings (cli.py, __init__.py, __main__.py) - All subcommand --help (build, catalog, config, inspect, serve, sys) - Serve API titles, console banners, server_info["name"] response - Rich UI panel titles (catalog, sys) - Runtime rule information messages ("Use the WinML CLI rewrite flag...") - README, CONTRIBUTING, SUPPORT, docs/Privacy, docs/naming-convention - serve/static/index.html (browser title, logo, UI strings, code examples, MCP server identifiers, Claude Code skill filename) - scripts/mcp_server.py (FastMCP name, descriptions, log messages) - Stale wmk CLI command examples in model docstrings -> winml Internal rename for consistency: - Telemetry event names ModelKit{Heartbeat,Action,Error} -> WinMLCLI{...} (safe: not yet in production use) - Cache filename modelkit.cache -> winmlcli.cache - Internal attribute flags _modelkit_* -> _winmlcli_* in telemetry/ - Env vars MODELKIT_* -> WINMLCLI_*: _RULES_DIR, _SHOW_ALL_WARNINGS, _TIMING_LOG, _TELEMETRY_CACHE_DIR - Internal module docstrings (cache/, core/, onnx/, utils/, etc) - ModelKitPlugin class identifier in Semantic Kernel example - producer_name strings in test fixtures + pattern/base.py Adjacent fixes pulled in: - pyproject.toml URLs: github.com/microsoft/ModelKit -> WinML-ModelKit (stale 404 URLs; real repo is microsoft/WinML-ModelKit) - Same URL fix in README.md, SUPPORT.md, CONTRIBUTING.md - WML abbreviation -> WinML expansion in __author__ and export subsystem docstring (team identity unchanged) - Stale duplicate copyright block (Apache-2.0 SPDX) removed from graphpipe/builders test assets (verified original code, no Apache use) Out of scope (deferred): - Python module path winml.modelkit (high churn, low value) - PyPI package name winml-modelkit (wave 3, needs repo coordination) - GitHub Actions / Azure Pipelines yaml filenames (wave 2) - gim-home/ModelKitArtifacts cross-repo asset references - runtime_checker_query.py workspace marker substring match (behavior code) - Azure DevOps modelkit-selfhost-pool / Modelkit feed (infra-managed) - docs/superpowers/* historical design docs (preserve as-is) Refs #583 --- CONTRIBUTING.md | 6 +- README.md | 40 +++++------ SUPPORT.md | 2 +- docs/Privacy.md | 22 +++---- docs/naming-convention.md | 4 +- pyproject.toml | 8 +-- scripts/e2e_eval/README.md | 2 +- scripts/e2e_eval/run_eval.py | 14 ++-- scripts/e2e_eval/run_pytorch_baseline.py | 4 +- scripts/e2e_eval/run_sa_eval.py | 10 +-- scripts/e2e_eval/sa_comparison.py | 2 +- scripts/e2e_eval/sa_report.py | 2 - scripts/mcp_server.py | 12 ++-- src/winml/modelkit/__init__.py | 4 +- src/winml/modelkit/__main__.py | 2 +- src/winml/modelkit/_warnings.py | 6 +- src/winml/modelkit/analyze/__init__.py | 3 - .../default_information.json | 2 +- .../information_rules/qc_information.json | 2 +- .../rules/runtime_check_rules/README.md | 10 +-- .../modelkit/analyze/utils/rule_loader.py | 9 +-- .../modelkit/analyze/utils/timing_utils.py | 2 +- src/winml/modelkit/cache/__init__.py | 2 +- src/winml/modelkit/cache/model.py | 22 ++++--- src/winml/modelkit/cache/path.py | 2 +- src/winml/modelkit/cli.py | 8 +-- src/winml/modelkit/commands/__init__.py | 2 +- src/winml/modelkit/commands/build.py | 4 +- src/winml/modelkit/commands/catalog.py | 8 +-- src/winml/modelkit/commands/config.py | 2 +- src/winml/modelkit/commands/inspect.py | 4 +- src/winml/modelkit/commands/serve.py | 4 +- src/winml/modelkit/commands/sys.py | 6 +- src/winml/modelkit/config/precision.py | 2 +- src/winml/modelkit/core/__init__.py | 2 +- src/winml/modelkit/core/node_metadata.py | 2 +- src/winml/modelkit/data/__init__.py | 2 +- src/winml/modelkit/export/__init__.py | 2 +- src/winml/modelkit/inference/engine.py | 2 +- src/winml/modelkit/inspect/__init__.py | 2 +- src/winml/modelkit/inspect/types.py | 2 +- src/winml/modelkit/models/hf/bart.py | 4 +- src/winml/modelkit/models/hf/marian.py | 4 +- src/winml/modelkit/models/hf/mu2.py | 6 +- src/winml/modelkit/models/hf/t5.py | 4 +- .../modelkit/models/winml/composite_model.py | 12 ++-- src/winml/modelkit/onnx/__init__.py | 2 +- src/winml/modelkit/onnx/domains.py | 2 +- src/winml/modelkit/onnx/dtypes.py | 2 +- src/winml/modelkit/onnx/persistence.py | 2 +- src/winml/modelkit/onnx/shape.py | 2 +- src/winml/modelkit/onnx/utils.py | 2 +- src/winml/modelkit/optracing/__init__.py | 2 +- src/winml/modelkit/pattern/__init__.py | 2 +- src/winml/modelkit/pattern/base.py | 2 +- src/winml/modelkit/serve/__init__.py | 2 +- src/winml/modelkit/serve/app.py | 6 +- src/winml/modelkit/serve/cli_api.py | 4 +- src/winml/modelkit/serve/static/index.html | 66 +++++++++---------- src/winml/modelkit/sysinfo/device.py | 2 +- src/winml/modelkit/telemetry/__init__.py | 2 +- src/winml/modelkit/telemetry/_cache.py | 10 +-- src/winml/modelkit/telemetry/click_group.py | 20 +++--- .../telemetry/library/serialization.py | 2 +- src/winml/modelkit/telemetry/telemetry.py | 14 ++-- src/winml/modelkit/utils/cli.py | 2 +- src/winml/modelkit/utils/constants.py | 2 +- src/winml/modelkit/utils/logging.py | 2 +- tests/README_PIPE_TESTS.md | 2 +- tests/cli/test_import_time.py | 2 +- tests/cli/test_main.py | 2 +- tests/e2e/test_serve_e2e.py | 2 +- tests/fixtures/create_test_models.py | 4 +- .../test_runtime_checker_query_parquet.py | 22 ++++--- tests/unit/analyze/models/test_rule_loader.py | 20 +++--- tests/unit/commands/test_catalog.py | 2 +- .../unit/export/test_onnx_config_overrides.py | 6 +- .../assets/graphpipe/builders/__init__.py | 2 - .../assets/graphpipe/builders/attention.py | 2 +- .../optim/assets/graphpipe/builders/conv.py | 2 - .../unit/optim/pipes/test_constant_folding.py | 2 +- .../optim/pipes/test_pipe_graph_isolated.py | 2 +- tests/unit/telemetry/library/test_exporter.py | 44 ++++++------- .../telemetry/library/test_serialization.py | 14 ++-- tests/unit/telemetry/test_cache.py | 16 ++--- .../unit/telemetry/test_cache_integration.py | 30 ++++----- tests/unit/telemetry/test_click_group.py | 14 ++-- tests/unit/telemetry/test_consent.py | 2 +- tests/unit/telemetry/test_telemetry_emit.py | 6 +- tests/unit/telemetry/test_utils_cache.py | 2 +- 90 files changed, 314 insertions(+), 316 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 215cc0f31..96efd0706 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,8 +4,8 @@ We're always looking for your help to improve the product (bug fixes, new featur ## Contribute a code change -* Start by reading the project [README](./README.md) to understand the scope and goals of ModelKit. -* If your change is non-trivial or introduces new public facing APIs, please use the [feature request issue template](https://github.com/microsoft/ModelKit/issues/new) to discuss it with the team first. +* Start by reading the project [README](./README.md) to understand the scope and goals of WinML CLI. +* If your change is non-trivial or introduces new public facing APIs, please use the [feature request issue template](https://github.com/microsoft/WinML-ModelKit/issues/new) to discuss it with the team first. * For all other changes, you can directly create a pull request (PR) and we'll be happy to take a look. * Make sure your PR adheres to the coding conventions and standards below. @@ -22,7 +22,7 @@ This installs all dependencies and enables [pre-commit hooks](https://pre-commit ### Runtime check rules -When running ModelKit from a source tree (`uv run winml ...`), you need to populate the runtime check rule zips locally. See [`src/winml/modelkit/analyze/rules/runtime_check_rules/README.md`](./src/winml/modelkit/analyze/rules/runtime_check_rules/README.md) for setup options (GitHub release for external contributors, `gim-home` script for Microsoft internal, `MODELKIT_RULES_DIR` override). +When running WinML CLI from a source tree (`uv run winml ...`), you need to populate the runtime check rule zips locally. See [`src/winml/modelkit/analyze/rules/runtime_check_rules/README.md`](./src/winml/modelkit/analyze/rules/runtime_check_rules/README.md) for setup options (GitHub release for external contributors, `gim-home` script for Microsoft internal, `WINMLCLI_RULES_DIR` override). ## Coding conventions and standards diff --git a/README.md b/README.md index 7e70b6aac..355a325b7 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,15 @@ -# ModelKit +# WinML CLI [![ModelKit CI](https://github.com/microsoft/WinML-ModelKit/actions/workflows/modelkit-ci.yml/badge.svg)](https://github.com/microsoft/WinML-ModelKit/actions/workflows/modelkit-ci.yml) ![Status](https://img.shields.io/badge/status-early%20access-blue) ![Python](https://img.shields.io/badge/python-3.10%2B-blue?logo=python&logoColor=white) ![License](https://img.shields.io/badge/license-MIT-green) -**ModelKit** is a CLI toolkit to build **portable, performant, and high-quality** models for Windows ML. It covers the entire journey from pretrained model to on-device inference — export, optimization, quantization, compilation, and benchmarking — across **all execution providers**, regardless of silicon. +**WinML CLI** is a CLI toolkit to build **portable, performant, and high-quality** models for Windows ML. It covers the entire journey from pretrained model to on-device inference — export, optimization, quantization, compilation, and benchmarking — across **all execution providers**, regardless of silicon. --- -## :dart: ModelKit Is Right for You If +## :dart: WinML CLI Is Right for You If - [x] You want to build models that run on **any Windows device** — Qualcomm, Intel, AMD, NVIDIA, or CPU - [x] You want to benchmark a model with **one command** — latency, throughput, and live hardware utilization @@ -32,7 +32,7 @@ | **Dml** | Hardware-agnostic GPU backend | 🔶 Planned | `--ep dml` | `--device gpu` | | **CPU** | Cross-platform fallback | ⚪ Always available | `--ep cpu` | `--device cpu` | -> **Tip:** Use `--device auto` and ModelKit picks the best available device — NPU first, then GPU, then CPU. +> **Tip:** Use `--device auto` and WinML CLI picks the best available device — NPU first, then GPU, then CPU. --- @@ -45,11 +45,11 @@ | **Windows 11** (x64 or ARM64) | Windows 11 24H2+ required for NPU support | | **UV** | Install [UV](https://github.com/astral-sh/uv) | | **Windows App SDK Runtime 1.8** | [Latest Windows App SDK downloads](https://learn.microsoft.com/en-us/windows/apps/windows-app-sdk/downloads) | -| **ModelKit** (Python wheel) | See release instructions | +| **WinML CLI** (Python wheel) | See release instructions | ### Required Hardware -**ModelKit targets NPU.** We recommend testing on one of the following NPU devices: +**WinML CLI targets NPU.** We recommend testing on one of the following NPU devices: | Device | EP | Flag | |--------|-----|------| @@ -57,7 +57,7 @@ | Intel AI Boost (Meteor Lake / Lunar Lake) | OpenVINO | `--ep openvino --device npu` | | AMD Ryzen AI (Phoenix / Hawk Point / Strix) | VitisAI | `--ep vitisai --device npu` | -**No NPU?** Use `--device auto` — ModelKit will fall back to the best available device (GPU → CPU). Note that `winml compile` requires NPU and cannot run without one. +**No NPU?** Use `--device auto` — WinML CLI will fall back to the best available device (GPU → CPU). Note that `winml compile` requires NPU and cannot run without one. ### Accepted Inputs @@ -78,7 +78,7 @@ If `inspect` prints an error or shows `Unsupported`, **skip that model**. Only m ## :package: Installation -ModelKit requires **Python 3.10** and is distributed as a Python wheel. We recommend [uv](https://docs.astral.sh/uv/) for fast, reproducible environment setup. +WinML CLI requires **Python 3.10** and is distributed as a Python wheel. We recommend [uv](https://docs.astral.sh/uv/) for fast, reproducible environment setup. **1. Create a Python 3.10 environment** @@ -114,7 +114,7 @@ Confirm that your target device and EP appear in the output: - **Intel AI Boost** — look for `OpenVINOExecutionProvider` - **AMD Ryzen AI** — look for `VitisAIExecutionProvider` -If no NPU is detected, you can still use ModelKit with `--device auto` for most commands. The only exception is `winml compile`, which requires an NPU device. +If no NPU is detected, you can still use WinML CLI with `--device auto` for most commands. The only exception is `winml compile`, which requires an NPU device. --- @@ -177,7 +177,7 @@ If no NPU is detected, you can still use ModelKit with `--device auto` for most **`winml doctor`** — Diagnose environment issues. Checks runtimes, execution providers, and dependencies to identify configuration problems. -**`winml setting`** — Configure ModelKit preferences. Set default EPs, output directories, and other global options. +**`winml setting`** — Configure WinML CLI preferences. Set default EPs, output directories, and other global options. **`winml sys`** — System information and capability reporting. Prints detected hardware, available EPs, Python version, and installed package versions. @@ -300,7 +300,7 @@ The simplest way to evaluate a model — one command, zero setup: winml perf -m facebook/convnext-base-224 --device npu --monitor ``` -ModelKit handles everything behind the scenes: download the model from Hugging Face, export to ONNX, optimize the graph, and run the benchmark on your NPU. The `--monitor` flag enables live hardware monitoring — real-time CPU utilization, RAM usage, and NPU activity alongside the latency results. +WinML CLI handles everything behind the scenes: download the model from Hugging Face, export to ONNX, optimize the graph, and run the benchmark on your NPU. The `--monitor` flag enables live hardware monitoring — real-time CPU utilization, RAM usage, and NPU activity alongside the latency results. This is ideal for quick smoke tests: does the model run on this device, and how fast is it? @@ -308,7 +308,7 @@ This is ideal for quick smoke tests: does the model run on this device, and how ## :arrows_counterclockwise: The BYOM Workflow -The **Build Your Own Model** (BYOM) workflow is the philosophy behind ModelKit. It defines how a source model becomes a production-ready, device-optimized artifact. +The **Build Your Own Model** (BYOM) workflow is the philosophy behind WinML CLI. It defines how a source model becomes a production-ready, device-optimized artifact. ### The Pipeline @@ -318,7 +318,7 @@ Source Model --> Export --> Analyze --> Optimize --> Quantize --> Compile --> Be ![BYOM Workflow](docs/assets/workflow-only.svg) -Each arrow is a ModelKit command. You can enter the pipeline at any stage (for example, start with a local ONNX file and skip export), exit early (stop after optimization if you do not need quantization), or loop back to repeat a stage with different settings. +Each arrow is a WinML CLI command. You can enter the pipeline at any stage (for example, start with a local ONNX file and skip export), exit early (stop after optimization if you do not need quantization), or loop back to repeat a stage with different settings. ### Primitive Commands vs. Config-Driven Pipeline @@ -361,7 +361,7 @@ Run `winml catalog` to browse the full catalog interactively. -These models are verified against ModelKit's full pipeline and serve as reliable starting points. You are not limited to this list — any Hugging Face model that passes `winml inspect` is a valid input. +These models are verified against WinML CLI's full pipeline and serve as reliable starting points. You are not limited to this list — any Hugging Face model that passes `winml inspect` is a valid input. For models not in this table, run `winml inspect -m ` to verify support before proceeding. @@ -369,9 +369,9 @@ For models not in this table, run `winml inspect -m ` to verify suppor ## :warning: Scope & Limitations -### What ModelKit supports +### What WinML CLI supports -ModelKit targets **classic deep learning models** — CNNs, encoders, vision transformers, NLP classifiers, token classifiers, object detection models, and segmentation models. +WinML CLI targets **classic deep learning models** — CNNs, encoders, vision transformers, NLP classifiers, token classifiers, object detection models, and segmentation models. Supported tasks include: - Image classification (ResNet, ViT, Swin, ConvNeXT) @@ -380,9 +380,9 @@ Supported tasks include: - Object detection (Table Transformer) - Image segmentation (SegFormer) -### What ModelKit does not support +### What WinML CLI does not support -**LLMs and generative models are not in scope.** Do not use ModelKit with GPT, LLaMA, Phi, Mistral, Stable Diffusion, or any model with a decoder-only or sequence-to-sequence generative architecture. LLM support (with LoRA) is planned for Q3-Q4 2026. +**LLMs and generative models are not in scope.** Do not use WinML CLI with GPT, LLaMA, Phi, Mistral, Stable Diffusion, or any model with a decoder-only or sequence-to-sequence generative architecture. LLM support (with LoRA) is planned for Q3-Q4 2026. ### Known constraints @@ -432,7 +432,7 @@ Supported tasks include: ## :lock: Data / Telemetry -Official ModelKit releases can collect anonymous usage telemetry to +Official WinML CLI releases can collect anonymous usage telemetry to help improve the product. Telemetry is classified as **Optional**. A one-time prompt on your first run asks for consent (default: accept — press Enter to enable, type `n` to decline). @@ -459,7 +459,7 @@ locations. We welcome contributions! Please see the [contribution guidelines](CONTRIBUTING.md). -For feature requests or bug reports, please file a [GitHub Issue](https://github.com/microsoft/ModelKit/issues). +For feature requests or bug reports, please file a [GitHub Issue](https://github.com/microsoft/WinML-ModelKit/issues). --- diff --git a/SUPPORT.md b/SUPPORT.md index 3f48a7f00..f4b026105 100644 --- a/SUPPORT.md +++ b/SUPPORT.md @@ -2,7 +2,7 @@ ## How to file issues and get help -This project uses [GitHub Issues](https://github.com/microsoft/ModelKit/issues) to track bugs and feature requests. Please search the existing +This project uses [GitHub Issues](https://github.com/microsoft/WinML-ModelKit/issues) to track bugs and feature requests. Please search the existing issues before filing new issues to avoid duplicates. For new issues, file your bug or feature request as a new Issue. diff --git a/docs/Privacy.md b/docs/Privacy.md index c6527b156..97add4ba1 100644 --- a/docs/Privacy.md +++ b/docs/Privacy.md @@ -1,12 +1,12 @@ -# ModelKit Privacy Statement +# WinML CLI Privacy Statement -ModelKit collects limited, anonymous telemetry to help improve the +WinML CLI collects limited, anonymous telemetry to help improve the product. This page describes exactly what is collected, what is not, and how to control it. ## Data category -All ModelKit telemetry is classified as **Optional** under Microsoft's +All WinML CLI telemetry is classified as **Optional** under Microsoft's data categorization model. None of it is required to run any feature; it exists solely to support product improvement. @@ -20,15 +20,15 @@ see the prompt and default to off. ## Events collected -When telemetry is enabled, ModelKit emits three event types: +When telemetry is enabled, WinML CLI emits three event types: -### ModelKitHeartbeat +### WinMLCLIHeartbeat Sent once per CLI invocation, just before the requested command runs. Carries only context attributes (OS, architecture, app version, device ID) — no per-event payload. -### ModelKitAction +### WinMLCLIAction Sent once per command completion. @@ -41,7 +41,7 @@ Sent once per command completion. | `duration_ms` | Wall-clock execution time in milliseconds. | | `success` | Whether the command completed without raising. | -### ModelKitError +### WinMLCLIError Sent only when a command raises an unhandled exception. @@ -61,7 +61,7 @@ not by the command code): | `device_id` | SHA256 hash of a randomly generated UUID, persisted per machine. Enables counting distinct users without identifying them. | | `id_status` | `EXISTING`, `NEW`, or `FAILED` — how the device ID was obtained on this run. | | `os.name`, `os.version`, `os.release`, `os.arch` | Operating system and architecture (e.g., `Windows`, `10.0.26200`, `11`, `AMD64`). | -| `app_version` | ModelKit package version. | +| `app_version` | WinML CLI package version. | | `app_instance_id` | A random UUID generated for this process only; not persisted. | | `initTs` | Epoch timestamp when telemetry was initialized. | @@ -80,7 +80,7 @@ not by the command code): ### Consent -On the first run of any command, ModelKit prompts: +On the first run of any command, WinML CLI prompts: ``` Enable telemetry? [Y/n] @@ -125,7 +125,7 @@ variables are set, and no prompt is shown: Events that fail to send (e.g., transient network errors) are cached locally and retried on the next run. The cache file lives at: -`%USERPROFILE%\.winml\telemetry\modelkit.cache` +`%USERPROFILE%\.winml\telemetry\winmlcli.cache` The cache is append-only on failure and drain-then-resend on recovery. When telemetry is disabled, the cache is cleared so a disabled session @@ -133,7 +133,7 @@ never resends events the user has since opted out of. ## Dev installs -ModelKit installed from source (`pip install -e .`) or run directly +WinML CLI installed from source (`pip install -e .`) or run directly from a checkout never sends telemetry. The InstrumentationKey is blank in source and is only populated by the official build pipeline. Only official binary releases are capable of sending telemetry, and only diff --git a/docs/naming-convention.md b/docs/naming-convention.md index 7a93fabe1..f1cd3a9a5 100644 --- a/docs/naming-convention.md +++ b/docs/naming-convention.md @@ -1,6 +1,6 @@ -# ModelKit Naming Convention +# WinML CLI Naming Convention -This document defines the naming rules for the ModelKit codebase. All new code and refactored code must follow these conventions. +This document defines the naming rules for the WinML CLI codebase. All new code and refactored code must follow these conventions. ## 1. Acronyms in Class Names diff --git a/pyproject.toml b/pyproject.toml index 9b514c439..3df01496e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,10 +90,10 @@ optional-dependencies.openvino = [ "openvino>=2023" ] optional-dependencies.qnn = [ "onnxruntime-qnn>=1.24.1; python_version>='3.11'", ] -urls."Bug Tracker" = "https://github.com/microsoft/ModelKit/issues" -urls.Documentation = "https://github.com/microsoft/ModelKit/blob/main/README.md" -urls.Homepage = "https://github.com/microsoft/ModelKit" -urls.Repository = "https://github.com/microsoft/ModelKit.git" +urls."Bug Tracker" = "https://github.com/microsoft/WinML-ModelKit/issues" +urls.Documentation = "https://github.com/microsoft/WinML-ModelKit/blob/main/README.md" +urls.Homepage = "https://github.com/microsoft/WinML-ModelKit" +urls.Repository = "https://github.com/microsoft/WinML-ModelKit.git" # ============================================================================= # SETUPTOOLS - Package Configuration (Flat Layout with Namespace Prefix) # ============================================================================= diff --git a/scripts/e2e_eval/README.md b/scripts/e2e_eval/README.md index 7f32a3920..8d018d3b6 100644 --- a/scripts/e2e_eval/README.md +++ b/scripts/e2e_eval/README.md @@ -1,6 +1,6 @@ # E2E Evaluation Scripts -Batch-evaluate ModelKit's `winml perf` pipeline against a curated set of HuggingFace models. +Batch-evaluate WinML CLI's `winml perf` pipeline against a curated set of HuggingFace models. Captures pass/fail, failure classification, and generates interactive reports. ## Quick Start diff --git a/scripts/e2e_eval/run_eval.py b/scripts/e2e_eval/run_eval.py index 0df325fc2..7afa17ef5 100644 --- a/scripts/e2e_eval/run_eval.py +++ b/scripts/e2e_eval/run_eval.py @@ -117,9 +117,9 @@ def _get_timeout_skip_reason(hf_id: str, task: str) -> str: ) _HF_CACHE = Path.home() / ".cache" / "huggingface" -_WML_CACHE = Path.home() / ".cache" / "winml" +_WINML_CACHE = Path.home() / ".cache" / "winml" _TEMP_DIR = Path(os.environ.get("TEMP", os.environ.get("TMP", tempfile.gettempdir()))) -_TEMP_PREFIXES = ("wmk_", "modelkit_compat_") +_TEMP_PREFIXES = ("winmlcli_", "winmlcli_compat_") def _is_no_space_error(proc: dict) -> bool: @@ -129,8 +129,8 @@ def _is_no_space_error(proc: dict) -> bool: def _clear_disk_caches() -> None: - """Delete HuggingFace, WML cache directories and leaked temp files.""" - for cache_dir in (_HF_CACHE, _WML_CACHE): + """Delete HuggingFace, WinML cache directories and leaked temp files.""" + for cache_dir in (_HF_CACHE, _WINML_CACHE): if cache_dir.exists(): safe_print(f" [cleanup] Removing cache: {cache_dir}") try: @@ -139,7 +139,7 @@ def _clear_disk_caches() -> None: except OSError as exc: safe_print(f" [cleanup] Warning: could not remove {cache_dir}: {exc}") - # Clean leaked temp directories/files (wmk_*, modelkit_compat_*, tmp*.onnx*) + # Clean leaked temp directories/files (winmlcli_*, winmlcli_compat_*, tmp*.onnx*) if _TEMP_DIR.is_dir(): cleaned = 0 for entry in _TEMP_DIR.iterdir(): @@ -363,8 +363,8 @@ def _run_build( config_path = model_dir / "build_config.json" model_dir.mkdir(parents=True, exist_ok=True) - # Remove any stale suffixed sub-configs BEFORE `wmk config` runs. - # For composite models `wmk config` writes files matching {stem}_*.json + # Remove any stale suffixed sub-configs BEFORE `winml config` runs. + # For composite models `winml config` writes files matching {stem}_*.json # (e.g., build_config_encoder.json); cleaning those AFTER the command would # delete the freshly-written configs and silently degrade composite builds # to single-model. Running cleanup first removes prior-run artifacts without diff --git a/scripts/e2e_eval/run_pytorch_baseline.py b/scripts/e2e_eval/run_pytorch_baseline.py index 85de2279c..2bed0f242 100644 --- a/scripts/e2e_eval/run_pytorch_baseline.py +++ b/scripts/e2e_eval/run_pytorch_baseline.py @@ -11,7 +11,7 @@ Dataset config is read from ``utils/dataset_config.py`` — the authoritative source shared with run_eval.py. When ``winml eval`` is implemented inside -ModelKit, it should import from the same location. +WinML CLI, it should import from the same location. Output: prints a single JSON object as the last line on stdout: {"metric": "", "value": , "num_samples": } @@ -59,8 +59,8 @@ def _emit_result(metric: str, value: float, num_samples: int) -> None: def _load_pytorch_model(model_id: str, task: str, device_str: str): """Load a native PyTorch model with the task-appropriate AutoModel class.""" import torch - from transformers import AutoConfig + from winml.modelkit.loader.task import resolve_task_and_model_class config = AutoConfig.from_pretrained(model_id) diff --git a/scripts/e2e_eval/run_sa_eval.py b/scripts/e2e_eval/run_sa_eval.py index daa730654..144584e5b 100644 --- a/scripts/e2e_eval/run_sa_eval.py +++ b/scripts/e2e_eval/run_sa_eval.py @@ -10,7 +10,7 @@ EPContext diff against cached compiled ONNX. Pipeline per model: - Stage 1: wmk export + Python optimize_onnx (default) + Stage 1: winml export + Python optimize_onnx (default) → graph_optimized.onnx Stage 2: ONNXStaticAnalyzer (enable_information=True) → sa_pre.json + optim_config @@ -82,8 +82,8 @@ def is_cached(path: Path) -> bool: return path.exists() and path.stat().st_size > 0 -def run_wmk_export(hf_id: str, task: str, output: Path) -> tuple[int, str]: - """Run wmk export via subprocess. Returns (rc, stderr_tail).""" +def run_winmlcli_export(hf_id: str, task: str, output: Path) -> tuple[int, str]: + """Run winml export via subprocess. Returns (rc, stderr_tail).""" args = [ sys.executable, "-m", @@ -134,7 +134,7 @@ def stage1_export_optimize( safe_print(" [Stage 1a] Export (cached)") else: safe_print(f" [Stage 1a] Exporting {hf_id}...") - rc, stderr = run_wmk_export(hf_id, task, exported_path) + rc, stderr = run_winmlcli_export(hf_id, task, exported_path) if rc != 0 or not is_cached(exported_path): safe_print(f" [ERROR] Export failed (rc={rc}): {stderr}") return None, "SKIP_EXPORT" @@ -286,7 +286,7 @@ def _run_compile( device: str = "npu", ep: str | None = None, ) -> tuple[int, str]: - """Run wmk compile --device --no-quantize. Returns (rc, stderr_tail).""" + """Run winml compile --device --no-quantize. Returns (rc, stderr_tail).""" cmd = [ sys.executable, "-m", diff --git a/scripts/e2e_eval/sa_comparison.py b/scripts/e2e_eval/sa_comparison.py index a4803de31..34630d6e8 100644 --- a/scripts/e2e_eval/sa_comparison.py +++ b/scripts/e2e_eval/sa_comparison.py @@ -90,7 +90,7 @@ def run_sa_with_info( def parse_sa_json(json_path: Path, ep: str = "QNNExecutionProvider") -> dict[str, str]: - """Parse wmk analyze JSON output into {pattern_id: level}. + """Parse winml analyze JSON output into {pattern_id: level}. Works for both subprocess-written JSON (lowercase keys in classification dict) and Python API-written JSON (SupportLevel enum serialized as diff --git a/scripts/e2e_eval/sa_report.py b/scripts/e2e_eval/sa_report.py index 0ad748145..8cb34a933 100644 --- a/scripts/e2e_eval/sa_report.py +++ b/scripts/e2e_eval/sa_report.py @@ -9,8 +9,6 @@ EPContext ground-truth accuracy, and per-model drill-down. """ -# ruff: noqa: E501 - from __future__ import annotations import json diff --git a/scripts/mcp_server.py b/scripts/mcp_server.py index a43f10e71..4e459159c 100644 --- a/scripts/mcp_server.py +++ b/scripts/mcp_server.py @@ -3,7 +3,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Standalone MCP server for ModelKit inference. +"""Standalone MCP server for WinML CLI inference. This script is intentionally self-contained and does NOT import from winml.modelkit, avoiding heavy ML dependency imports (PyTorch etc.) @@ -150,7 +150,7 @@ def _guess_media_type(path: Path) -> str: def create_server(model_url: str) -> FastMCP: """Create MCP server with schema-driven per-model tools.""" model_url = model_url.rstrip("/") - mcp = FastMCP("modelkit-inference") + mcp = FastMCP("winmlcli-inference") # -- Static tool: list all loaded models -------------------------------- @@ -364,7 +364,7 @@ async def predict( inputs_json: str = "{}", params_json: str = "{}", ) -> str: - """Run inference on the ModelKit server. + """Run inference on the WinML CLI server. Use list_models first to discover loaded models and their schemas. @@ -404,15 +404,15 @@ async def predict( def main() -> None: """Parse arguments and run the MCP server.""" - parser = ArgumentParser(description="ModelKit MCP Server (standalone)") + parser = ArgumentParser(description="WinML CLI MCP Server (standalone)") parser.add_argument( "--server-url", default="http://localhost:8000", - help="Base URL of the ModelKit service (default: http://localhost:8000)", + help="Base URL of the WinML CLI service (default: http://localhost:8000)", ) args = parser.parse_args() server = create_server(args.server_url) - logger.info("Starting MCP server (ModelKit: %s)", args.server_url) + logger.info("Starting MCP server (WinML CLI: %s)", args.server_url) server.run(transport="stdio") diff --git a/src/winml/modelkit/__init__.py b/src/winml/modelkit/__init__.py index 59a45c313..87ec1ddad 100644 --- a/src/winml/modelkit/__init__.py +++ b/src/winml/modelkit/__init__.py @@ -2,9 +2,9 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""WML ModelKit - Accelerate Model Deployment on WinML. +"""WinML CLI - Accelerate Model Deployment on WinML. -ModelKit provides tools for converting PyTorch models to optimized ONNX format +WinML CLI provides tools for converting PyTorch models to optimized ONNX format with support for QNN (Qualcomm Neural Processing SDK) and OpenVINO backends. Key Features: diff --git a/src/winml/modelkit/__main__.py b/src/winml/modelkit/__main__.py index 82938228b..bce08f1be 100644 --- a/src/winml/modelkit/__main__.py +++ b/src/winml/modelkit/__main__.py @@ -4,7 +4,7 @@ # -------------------------------------------------------------------------- """Module execution entry point: python -m winml.modelkit. -This module enables running ModelKit CLI via Python module execution: +This module enables running WinML CLI via Python module execution: python -m winml.modelkit --version python -m winml.modelkit export --model MODEL --output PATH """ diff --git a/src/winml/modelkit/_warnings.py b/src/winml/modelkit/_warnings.py index 37381245e..6d8d42095 100644 --- a/src/winml/modelkit/_warnings.py +++ b/src/winml/modelkit/_warnings.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Early warning filter configuration for ModelKit. +"""Early warning filter configuration for WinML CLI. This module configures warning filters ON IMPORT. It MUST have no dependencies on modelkit subpackages to avoid triggering the import chain that loads @@ -12,7 +12,7 @@ from . import _warnings # Filters are configured automatically Environment Variables: - MODELKIT_SHOW_ALL_WARNINGS: Set to "1" or "true" to disable warning suppression + WINMLCLI_SHOW_ALL_WARNINGS: Set to "1" or "true" to disable warning suppression """ from __future__ import annotations @@ -28,7 +28,7 @@ def _configure() -> None: os.environ.setdefault("TOKENIZERS_PARALLELISM", "false") # Allow users to see all warnings if they want - if os.environ.get("MODELKIT_SHOW_ALL_WARNINGS", "").lower() in ("1", "true", "yes"): + if os.environ.get("WINMLCLI_SHOW_ALL_WARNINGS", "").lower() in ("1", "true", "yes"): return # ========================================================================= diff --git a/src/winml/modelkit/analyze/__init__.py b/src/winml/modelkit/analyze/__init__.py index b0a9cddd7..4a7cb42e6 100644 --- a/src/winml/modelkit/analyze/__init__.py +++ b/src/winml/modelkit/analyze/__init__.py @@ -8,9 +8,6 @@ runtime support across NPU execution providers (QNN, Intel OpenVINO, AMD Quark). """ -__version__ = "0.1.0" -__author__ = "WML Team" - from .analyzer import ( AnalysisResult, AnalyzerConfig, diff --git a/src/winml/modelkit/analyze/rules/information_rules/default_information.json b/src/winml/modelkit/analyze/rules/information_rules/default_information.json index b186254fc..1fb10ae06 100644 --- a/src/winml/modelkit/analyze/rules/information_rules/default_information.json +++ b/src/winml/modelkit/analyze/rules/information_rules/default_information.json @@ -61,7 +61,7 @@ } ], "enabled": true, - "details": "The ReshapeTransposeReshapeOverlyHighDimPattern (commonly found in attention mechanisms) can be merged into a single ReshapeTransposeReshapeLowDimPattern. Use the ModelKit rewrite flag to apply this optimization before exporting." + "details": "The ReshapeTransposeReshapeOverlyHighDimPattern (commonly found in attention mechanisms) can be merged into a single ReshapeTransposeReshapeLowDimPattern. Use the WinML CLI rewrite flag to apply this optimization before exporting." } ], "enabled": true, diff --git a/src/winml/modelkit/analyze/rules/information_rules/qc_information.json b/src/winml/modelkit/analyze/rules/information_rules/qc_information.json index a834c88de..13448589e 100644 --- a/src/winml/modelkit/analyze/rules/information_rules/qc_information.json +++ b/src/winml/modelkit/analyze/rules/information_rules/qc_information.json @@ -15,7 +15,7 @@ } ], "enabled": true, - "details": "The ai.onnx Attention operator (opset 23+) may not be supported on this hardware. Use the ModelKit rewrite flag to expand it to the individual scaled dot-product attention subgraph (Transpose, Mul, MatMul, Add, Softmax, MatMul) which has broader hardware support." + "details": "The ai.onnx Attention operator (opset 23+) may not be supported on this hardware. Use the WinML CLI rewrite flag to expand it to the individual scaled dot-product attention subgraph (Transpose, Mul, MatMul, Add, Softmax, MatMul) which has broader hardware support." } ], "enabled": true, diff --git a/src/winml/modelkit/analyze/rules/runtime_check_rules/README.md b/src/winml/modelkit/analyze/rules/runtime_check_rules/README.md index d433e7f18..56904a219 100644 --- a/src/winml/modelkit/analyze/rules/runtime_check_rules/README.md +++ b/src/winml/modelkit/analyze/rules/runtime_check_rules/README.md @@ -40,13 +40,13 @@ Copy all runtime rule parquet files from: ### Option 4: Use external rules directories via environment variable -Set `MODELKIT_RULES_DIR` to one or more directories containing parquet rule artifacts. +Set `WINMLCLI_RULES_DIR` to one or more directories containing parquet rule artifacts. Important: relative paths are resolved from `src/winml/modelkit/analyze/utils/` (the directory of `rule_loader.py`), not from the current terminal working directory. -- Windows (PowerShell, user-level absolute path): `[Environment]::SetEnvironmentVariable("MODELKIT_RULES_DIR", "C:\*path*\rules", "User")` -- Windows (PowerShell, user-level repo-relative path): `[Environment]::SetEnvironmentVariable("MODELKIT_RULES_DIR", "..\..\..\..\..\..\ModelKitArtifacts\rules", "User")` +- Windows (PowerShell, user-level absolute path): `[Environment]::SetEnvironmentVariable("WINMLCLI_RULES_DIR", "C:\*path*\rules", "User")` +- Windows (PowerShell, user-level repo-relative path): `[Environment]::SetEnvironmentVariable("WINMLCLI_RULES_DIR", "..\..\..\..\..\..\ModelKitArtifacts\rules", "User")` Multiple directories are supported using `os.pathsep` (`;` on Windows, `:` on Unix-like systems). @@ -54,10 +54,10 @@ Multiple directories are supported using `os.pathsep` (`;` on Windows, `:` on Un The analyzer searches directories in this order: -1. Directories listed in `MODELKIT_RULES_DIR` (left to right) +1. Directories listed in `WINMLCLI_RULES_DIR` (left to right) 2. Embedded default directory: `src/winml/modelkit/analyze/rules/runtime_check_rules/` -`MODELKIT_RULES_DIR` takes precedence over the embedded default when the same parquet file +`WINMLCLI_RULES_DIR` takes precedence over the embedded default when the same parquet file exists in multiple locations. ## What happens if parquet rules are missing diff --git a/src/winml/modelkit/analyze/utils/rule_loader.py b/src/winml/modelkit/analyze/utils/rule_loader.py index 7c2b3200d..14b703735 100644 --- a/src/winml/modelkit/analyze/utils/rule_loader.py +++ b/src/winml/modelkit/analyze/utils/rule_loader.py @@ -19,7 +19,7 @@ #: Environment variable for additional runtime check rules directories. #: Use ``os.pathsep`` (`;` on Windows, `:` on Unix) to separate multiple paths. -MODELKIT_RULES_DIR_ENV = "MODELKIT_RULES_DIR" +WINMLCLI_RULES_DIR_ENV = "WINMLCLI_RULES_DIR" # Directory containing this module file. Relative env-var entries are resolved from here. _RULE_LOADER_DIR: Path = Path(__file__).resolve().parent @@ -29,8 +29,9 @@ Path(__file__).resolve().parent.parent / "rules" / "runtime_check_rules" ) + def _resolve_env_rules_dir_entry(entry: str) -> Path: - """Resolve a MODELKIT_RULES_DIR entry into an absolute directory path. + """Resolve a WINMLCLI_RULES_DIR entry into an absolute directory path. Absolute paths are used directly. Relative paths are interpreted relative to this module file's directory. @@ -45,7 +46,7 @@ def get_runtime_rules_search_dirs() -> list[Path]: """Return ordered list of directories to search for runtime rule artifacts. The search order is: - 1. Any extra directories listed in the :data:`MODELKIT_RULES_DIR` env var + 1. Any extra directories listed in the :data:`WINMLCLI_RULES_DIR` env var (separated by ``os.pathsep``). Absolute paths are used directly; relative paths are resolved relative to this module file directory. 2. Default embedded directory (``src/winml/modelkit/analyze/rules/runtime_check_rules/``) @@ -54,7 +55,7 @@ def get_runtime_rules_search_dirs() -> list[Path]: List of directory Paths (may include non-existent ones; callers filter). """ dirs: list[Path] = [] - env_val = os.environ.get(MODELKIT_RULES_DIR_ENV, "").strip() + env_val = os.environ.get(WINMLCLI_RULES_DIR_ENV, "").strip() if env_val: for entry in env_val.split(os.pathsep): entry = entry.strip() diff --git a/src/winml/modelkit/analyze/utils/timing_utils.py b/src/winml/modelkit/analyze/utils/timing_utils.py index 77eccb789..9b2cce187 100644 --- a/src/winml/modelkit/analyze/utils/timing_utils.py +++ b/src/winml/modelkit/analyze/utils/timing_utils.py @@ -15,7 +15,7 @@ from collections.abc import Callable -_TIMING_LOG_ENABLED = os.environ.get("MODELKIT_TIMING_LOG", "").strip().lower() in { +_TIMING_LOG_ENABLED = os.environ.get("WINMLCLI_TIMING_LOG", "").strip().lower() in { "1", "true", "yes", diff --git a/src/winml/modelkit/cache/__init__.py b/src/winml/modelkit/cache/__init__.py index 74648397e..031506075 100644 --- a/src/winml/modelkit/cache/__init__.py +++ b/src/winml/modelkit/cache/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Cache management for ModelKit. +"""Cache management for WinML CLI. Provides deterministic path computation for cached build artifacts. Both ``from_pretrained()`` and ``winml build --use-cache`` use these diff --git a/src/winml/modelkit/cache/model.py b/src/winml/modelkit/cache/model.py index a8bd8f2fb..26c99dc27 100644 --- a/src/winml/modelkit/cache/model.py +++ b/src/winml/modelkit/cache/model.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Model-aware cache operations for ModelKit. +"""Model-aware cache operations for WinML CLI. Adds model_id to slug mapping, model directory resolution, and directory scanning for cached artifact enumeration. @@ -94,14 +94,16 @@ def list_cached_models(cache_dir: Path | None = None) -> list[dict[str, str]]: if parsed is None: continue task_abbrev, config_hash, stage = parsed - results.append({ - "model_slug": model_slug, - "task_abbrev": task_abbrev, - "config_hash": config_hash, - "stage": stage, - "filename": artifact.name, - "path": str(artifact), - }) + results.append( + { + "model_slug": model_slug, + "task_abbrev": task_abbrev, + "config_hash": config_hash, + "stage": stage, + "filename": artifact.name, + "path": str(artifact), + } + ) return results @@ -120,7 +122,7 @@ def _parse_artifact_filename( last_sep = stem.rfind("_") if last_sep < 0: return None - stage = stem[last_sep + 1:] + stage = stem[last_sep + 1 :] prefix = stem[:last_sep] # prefix = "{task_abbrev}_{config_hash}" diff --git a/src/winml/modelkit/cache/path.py b/src/winml/modelkit/cache/path.py index 5ac7ca76d..80c43008b 100644 --- a/src/winml/modelkit/cache/path.py +++ b/src/winml/modelkit/cache/path.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Cache path primitives for ModelKit. +"""Cache path primitives for WinML CLI. Pure functions that compute deterministic cache paths. This module knows about cache directories and key assembly — nothing about models. diff --git a/src/winml/modelkit/cli.py b/src/winml/modelkit/cli.py index 72b5743ff..b00c7da11 100644 --- a/src/winml/modelkit/cli.py +++ b/src/winml/modelkit/cli.py @@ -2,9 +2,9 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""WinML ModelKit CLI - Universal ONNX export from command line. +"""WinML CLI - Universal ONNX export from command line. -This module provides the main CLI entry point for ModelKit with lazy +This module provides the main entry point for WinML CLI with lazy command discovery from the commands/ directory. Usage: @@ -161,7 +161,7 @@ class LazyGroup(ActionGroup): parsing (no module execution). Extends :class:`ActionGroup` so every resolved subcommand is also - auto-instrumented with ModelKit telemetry. + auto-instrumented with WinML CLI telemetry. """ def list_commands(self, ctx: click.Context) -> list[str]: @@ -251,7 +251,7 @@ def format_commands(self, ctx: click.Context, formatter: click.HelpFormatter) -> ) @click.pass_context def main(ctx: click.Context, verbose: int, quiet: bool, debug: bool) -> None: - """WML ModelKit - Accelerate Model Deployment on WinML. + """WinML CLI - Accelerate Model Deployment on WinML. Universal ONNX export with QNN and OpenVINO backend support. """ diff --git a/src/winml/modelkit/commands/__init__.py b/src/winml/modelkit/commands/__init__.py index 1775593a9..46e39213f 100644 --- a/src/winml/modelkit/commands/__init__.py +++ b/src/winml/modelkit/commands/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""ModelKit CLI commands package. +"""WinML CLI commands package. Commands in this package are auto-discovered by cli.py. Each module should export a Click command as the primary object. diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py index 5c620330a..2e707097f 100644 --- a/src/winml/modelkit/commands/build.py +++ b/src/winml/modelkit/commands/build.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Build command for ModelKit CLI. +"""Build command for WinML CLI. Thin CLI wrapper around build_hf_model() and build_onnx_model() APIs. The build module owns the pipeline. This command parses flags, loads config, @@ -256,7 +256,7 @@ def _build_modules( "--use-cache", is_flag=True, default=False, - help="Use ModelKit global cache (~/.cache/winml/). Mutually exclusive with -o.", + help="Use WinML CLI global cache (~/.cache/winml/). Mutually exclusive with -o.", ) @click.option( "--rebuild", diff --git a/src/winml/modelkit/commands/catalog.py b/src/winml/modelkit/commands/catalog.py index caac36498..892a07525 100644 --- a/src/winml/modelkit/commands/catalog.py +++ b/src/winml/modelkit/commands/catalog.py @@ -2,9 +2,9 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -r"""Catalog command for ModelKit CLI. +r"""Catalog command for WinML CLI. -Lets users discover ModelKit's curated built-in model catalog. The catalog +Lets users discover WinML CLI's curated built-in model catalog. The catalog is stored in ``modelkit/data/hub_models.json`` and lists specific, validated HuggingFace model IDs with their task, architecture, and supported EPs. @@ -295,7 +295,7 @@ def _build_list_renderable( panel = Panel( table, - title=f"[bold]ModelKit Catalog[/bold] [dim]|[/dim] " + title=f"[bold]WinML CLI Catalog[/bold] [dim]|[/dim] " f"[bold cyan]{len(models)}[/bold cyan] validated model(s)", border_style="blue", padding=(0, 1), @@ -394,7 +394,7 @@ def catalog( device: str | None, output: Path | None, ) -> None: - r"""Browse ModelKit's curated built-in model catalog. + r"""Browse WinML CLI's curated built-in model catalog. Lists HuggingFace models that have been validated end-to-end (export -> quantise -> run on device) with confirmed accuracy results. diff --git a/src/winml/modelkit/commands/config.py b/src/winml/modelkit/commands/config.py index 260e4878a..5a762d167 100644 --- a/src/winml/modelkit/commands/config.py +++ b/src/winml/modelkit/commands/config.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Config generation command (v2, Rich UI) for ModelKit CLI. +"""Config generation command (v2, Rich UI) for WinML CLI. Generates WinMLBuildConfig for a HuggingFace model or a pre-exported ONNX file by auto-detecting task, model class, and I/O specifications. diff --git a/src/winml/modelkit/commands/inspect.py b/src/winml/modelkit/commands/inspect.py index 10fca4ca6..893363ebe 100644 --- a/src/winml/modelkit/commands/inspect.py +++ b/src/winml/modelkit/commands/inspect.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Inspect input model's ModelKit configuration. +"""Inspect input model's WinML CLI configuration. Resolves loader, exporter, and WinML inference class for a given model, showing what the build pipeline will use. @@ -99,7 +99,7 @@ def inspect( model_type: str | None, model_class: str | None, ) -> None: - r"""Inspect input model's ModelKit configuration. + r"""Inspect input model's WinML CLI configuration. Shows the loader, exporter, WinML inference class, I/O specs, and build resolution that the pipeline will use for the given model. diff --git a/src/winml/modelkit/commands/serve.py b/src/winml/modelkit/commands/serve.py index 9f7697f78..8b71451fb 100644 --- a/src/winml/modelkit/commands/serve.py +++ b/src/winml/modelkit/commands/serve.py @@ -3,7 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Model serving command for ModelKit CLI. +"""Model serving command for WinML CLI. Usage: winml serve # Phase 0: CLI wrapper @@ -82,7 +82,7 @@ def serve( memory_budget: float, auto_reload: bool, ) -> None: - r"""Start ModelKit as a local REST API server. + r"""Start WinML CLI as a local REST API server. Without --model starts in Phase 0 (CLI wrapper mode). With --model starts in Phase 1/3 (inference server mode). diff --git a/src/winml/modelkit/commands/sys.py b/src/winml/modelkit/commands/sys.py index 161e7b1b0..244b5c41d 100644 --- a/src/winml/modelkit/commands/sys.py +++ b/src/winml/modelkit/commands/sys.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""System information command for ModelKit CLI. +"""System information command for WinML CLI. Displays detailed information about the system environment, including: - Python and OS information @@ -230,7 +230,7 @@ def _output_text(info: dict[str, Any], verbose: bool = False) -> None: # Title console.print( Panel.fit( - "[bold]ModelKit System Information[/bold]", + "[bold]WinML CLI System Information[/bold]", border_style="blue", ) ) @@ -550,7 +550,7 @@ def sysinfo( list_device: bool, list_ep: bool, ) -> None: - r"""Display system information for ModelKit export. + r"""Display system information for WinML CLI export. This command gathers and displays information relevant to ONNX model export, including Python version, library versions, hardware diff --git a/src/winml/modelkit/config/precision.py b/src/winml/modelkit/config/precision.py index c2b558910..81454d003 100644 --- a/src/winml/modelkit/config/precision.py +++ b/src/winml/modelkit/config/precision.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Precision resolution for ModelKit. +"""Precision resolution for WinML CLI. Pure decision logic: given a device, precision, and available devices, produce a PrecisionPolicy. No I/O, no config mutation, no sysinfo dependency. diff --git a/src/winml/modelkit/core/__init__.py b/src/winml/modelkit/core/__init__.py index 8f4cc9d47..6590152a4 100644 --- a/src/winml/modelkit/core/__init__.py +++ b/src/winml/modelkit/core/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Core utilities for ModelKit.""" +"""Core utilities for WinML CLI.""" from .model_input_generator import generate_dummy_inputs_from_specs from .node_metadata import ( diff --git a/src/winml/modelkit/core/node_metadata.py b/src/winml/modelkit/core/node_metadata.py index 901e96df9..662c2bd78 100644 --- a/src/winml/modelkit/core/node_metadata.py +++ b/src/winml/modelkit/core/node_metadata.py @@ -5,7 +5,7 @@ """Node-level metadata system for ONNX models. This module provides a comprehensive metadata system for tracking ONNX node origins, -transformations, optimizations, and semantic information through the ModelKit pipeline. +transformations, optimizations, and semantic information through the WinML CLI pipeline. Metadata is stored as custom ONNX node attributes with the 'winml.' prefix. """ diff --git a/src/winml/modelkit/data/__init__.py b/src/winml/modelkit/data/__init__.py index 68ba86923..353823069 100644 --- a/src/winml/modelkit/data/__init__.py +++ b/src/winml/modelkit/data/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Data loading and preprocessing components for ModelKit.""" +"""Data loading and preprocessing components for WinML CLI.""" from . import ( dummy_dataset, diff --git a/src/winml/modelkit/export/__init__.py b/src/winml/modelkit/export/__init__.py index 017f28911..868c8a925 100644 --- a/src/winml/modelkit/export/__init__.py +++ b/src/winml/modelkit/export/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""WML Export - ONNX Export with Hierarchy Preservation. +"""WinML Export - ONNX Export with Hierarchy Preservation. This package provides: - WinMLExportConfig with input/output tensor specifications diff --git a/src/winml/modelkit/inference/engine.py b/src/winml/modelkit/inference/engine.py index 234287ccb..1c52932f5 100644 --- a/src/winml/modelkit/inference/engine.py +++ b/src/winml/modelkit/inference/engine.py @@ -3,7 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""InferenceEngine — core inference component for ModelKit. +"""InferenceEngine — core inference component for WinML CLI. Uses HF ``transformers.pipeline`` for preprocessing and postprocessing, sharing the same code path as ``winml eval``. The WinMLPreTrainedModel diff --git a/src/winml/modelkit/inspect/__init__.py b/src/winml/modelkit/inspect/__init__.py index 0b9e5cd4d..90371adb9 100644 --- a/src/winml/modelkit/inspect/__init__.py +++ b/src/winml/modelkit/inspect/__init__.py @@ -5,7 +5,7 @@ """Inspect module for analyzing HuggingFace models. Provides the inspect_model() function to analyze model compatibility -with ModelKit and display loader/exporter/WinML configurations. +with WinML CLI and display loader/exporter/WinML configurations. Usage: from winml.modelkit.inspect import inspect_model diff --git a/src/winml/modelkit/inspect/types.py b/src/winml/modelkit/inspect/types.py index 60d1c156e..bad08005e 100644 --- a/src/winml/modelkit/inspect/types.py +++ b/src/winml/modelkit/inspect/types.py @@ -12,7 +12,7 @@ class SupportLevel(Enum): """Support level for each component.""" - SUPPORTED = "supported" # Explicit config exists in ModelKit + SUPPORTED = "supported" # Explicit config exists in WinML CLI DEFAULT = "default" # Using framework defaults (TasksManager/Optimum) GENERIC = "generic" # Using generic fallback class UNSUPPORTED = "unsupported" # No viable path found diff --git a/src/winml/modelkit/models/hf/bart.py b/src/winml/modelkit/models/hf/bart.py index 87dde249e..16d938da8 100644 --- a/src/winml/modelkit/models/hf/bart.py +++ b/src/winml/modelkit/models/hf/bart.py @@ -64,8 +64,8 @@ facebook/bart-base, etc. Usage: - wmk config -m facebook/bart-large-cnn --task feature-extraction → encoder - wmk config -m facebook/bart-large-cnn --task text2text-generation → decoder + winml config -m facebook/bart-large-cnn --task feature-extraction → encoder + winml config -m facebook/bart-large-cnn --task text2text-generation → decoder """ from __future__ import annotations diff --git a/src/winml/modelkit/models/hf/marian.py b/src/winml/modelkit/models/hf/marian.py index f8492f3d5..6251ff4ce 100644 --- a/src/winml/modelkit/models/hf/marian.py +++ b/src/winml/modelkit/models/hf/marian.py @@ -78,8 +78,8 @@ Models: Helsinki-NLP/opus-mt-fr-en, opus-mt-en-ru, opus-mt-es-en, etc. Usage: - wmk config -m Helsinki-NLP/opus-mt-fr-en --task feature-extraction → encoder - wmk config -m Helsinki-NLP/opus-mt-fr-en --task text2text-generation → decoder + winml config -m Helsinki-NLP/opus-mt-fr-en --task feature-extraction → encoder + winml config -m Helsinki-NLP/opus-mt-fr-en --task text2text-generation → decoder """ from __future__ import annotations diff --git a/src/winml/modelkit/models/hf/mu2.py b/src/winml/modelkit/models/hf/mu2.py index 54f09a5d1..3efcabc5d 100644 --- a/src/winml/modelkit/models/hf/mu2.py +++ b/src/winml/modelkit/models/hf/mu2.py @@ -52,9 +52,9 @@ class for Mu2 (custom ``trust_remote_code`` model). Usage:: - wmk config -m path/to/mu2 --task translation --trust-remote-code -o mu2.json - wmk build -c mu2_encoder.json -m path/to/mu2 --trust-remote-code -o output/encoder - wmk build -c mu2_decoder.json -m path/to/mu2 --trust-remote-code -o output/decoder + winml config -m path/to/mu2 --task translation --trust-remote-code -o mu2.json + winml build -c mu2_encoder.json -m path/to/mu2 --trust-remote-code -o output/encoder + winml build -c mu2_decoder.json -m path/to/mu2 --trust-remote-code -o output/decoder """ from __future__ import annotations diff --git a/src/winml/modelkit/models/hf/t5.py b/src/winml/modelkit/models/hf/t5.py index e93cb1534..686f43562 100644 --- a/src/winml/modelkit/models/hf/t5.py +++ b/src/winml/modelkit/models/hf/t5.py @@ -18,8 +18,8 @@ Model: google-t5/t5-small, google-t5/t5-base, etc. Usage: - wmk config -m google-t5/t5-small --task feature-extraction → encoder - wmk config -m google-t5/t5-small --task text2text-generation → decoder + winml config -m google-t5/t5-small --task feature-extraction → encoder + winml config -m google-t5/t5-small --task text2text-generation → decoder """ from __future__ import annotations diff --git a/src/winml/modelkit/models/winml/composite_model.py b/src/winml/modelkit/models/winml/composite_model.py index 8a887cb2f..ed2ed20a6 100644 --- a/src/winml/modelkit/models/winml/composite_model.py +++ b/src/winml/modelkit/models/winml/composite_model.py @@ -12,13 +12,13 @@ Registry -------- ``@register_composite_model(model_type, task)`` registers a pipeline class. -``wmk config`` checks the registry to generate one config file per component:: +``winml config`` checks the registry to generate one config file per component:: - wmk config -m google-t5/t5-small --task translation -o t5.json + winml config -m google-t5/t5-small --task translation -o t5.json # → t5_encoder.json (feature-extraction) + t5_decoder.json (text2text-generation) - wmk build -c t5_encoder.json -m google-t5/t5-small -o output/encoder - wmk build -c t5_decoder.json -m google-t5/t5-small -o output/decoder + winml build -c t5_encoder.json -m google-t5/t5-small -o output/encoder + winml build -c t5_decoder.json -m google-t5/t5-small -o output/decoder Per-component kwargs -------------------- @@ -61,12 +61,12 @@ # ========================================================================= # Maps (model_type, task) → pipeline class with _SUB_MODEL_CONFIG. -# Used by `wmk config` to generate one config file per sub-component. +# Used by `winml config` to generate one config file per sub-component. COMPOSITE_MODEL_REGISTRY: dict[tuple[str, str], type] = {} def register_composite_model(model_type: str, task: str): - """Class decorator that registers a composite model for `wmk config`.""" + """Class decorator that registers a composite model for `winml config`.""" def decorator(cls: type) -> type: key = (model_type, task) diff --git a/src/winml/modelkit/onnx/__init__.py b/src/winml/modelkit/onnx/__init__.py index 4d83605e9..a3bc49d51 100644 --- a/src/winml/modelkit/onnx/__init__.py +++ b/src/winml/modelkit/onnx/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""ONNX model utilities for ModelKit. +"""ONNX model utilities for WinML CLI. Read and write winml.* metadata, extract I/O config from ONNX models. Canonical home for InputTensorSpec / OutputTensorSpec tensor spec dataclasses. diff --git a/src/winml/modelkit/onnx/domains.py b/src/winml/modelkit/onnx/domains.py index 53b421dd3..df75d16ef 100644 --- a/src/winml/modelkit/onnx/domains.py +++ b/src/winml/modelkit/onnx/domains.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""ONNX domain identifiers shared across ModelKit modules. +"""ONNX domain identifiers shared across WinML CLI modules. This module provides the ONNXDomain enum, extracted from the analyze onnx_opset package so that other packages (pattern, optim) can use it without diff --git a/src/winml/modelkit/onnx/dtypes.py b/src/winml/modelkit/onnx/dtypes.py index 8a35c8ee1..0351b5443 100644 --- a/src/winml/modelkit/onnx/dtypes.py +++ b/src/winml/modelkit/onnx/dtypes.py @@ -14,7 +14,7 @@ # FLOAT8E8M0, # STRING, -"""ONNX type conversion utilities shared across ModelKit modules. +"""ONNX type conversion utilities shared across WinML CLI modules. Canonical home for ONNX ↔ numpy ↔ TensorProto type mappings. """ diff --git a/src/winml/modelkit/onnx/persistence.py b/src/winml/modelkit/onnx/persistence.py index 78fb32236..4e3ec41b3 100644 --- a/src/winml/modelkit/onnx/persistence.py +++ b/src/winml/modelkit/onnx/persistence.py @@ -5,7 +5,7 @@ """ONNX model persistence utilities. Load, save, and clean up ONNX models with external data support. -Designed as the canonical persistence API for ModelKit ONNX workflows. +Designed as the canonical persistence API for WinML CLI ONNX workflows. See also: docs/design/onnx/persistence.md (if available) """ diff --git a/src/winml/modelkit/onnx/shape.py b/src/winml/modelkit/onnx/shape.py index 52d8c5fdb..26ea3cf39 100644 --- a/src/winml/modelkit/onnx/shape.py +++ b/src/winml/modelkit/onnx/shape.py @@ -110,7 +110,7 @@ def infer_onnx_shapes( so the caller never receives a model with dangling external-data refs. """ if get_model_size(model) >= EXTERNAL_DATA_THRESHOLD: - with tempfile.TemporaryDirectory(prefix="modelkit_compat_") as tmp_dir: + with tempfile.TemporaryDirectory(prefix="winmlcli_compat_") as tmp_dir: tmp_path = str(Path(tmp_dir) / "model.onnx") # onnx.save mutates model in-place; restore immediately onnx.save(model, tmp_path, save_as_external_data=True) diff --git a/src/winml/modelkit/onnx/utils.py b/src/winml/modelkit/onnx/utils.py index 87426a545..e42d3407e 100644 --- a/src/winml/modelkit/onnx/utils.py +++ b/src/winml/modelkit/onnx/utils.py @@ -44,7 +44,7 @@ def check_onnx_model( if get_model_size(model) >= EXTERNAL_DATA_THRESHOLD: try: - with tempfile.TemporaryDirectory(prefix="modelkit_compat_") as tmp_dir: + with tempfile.TemporaryDirectory(prefix="winmlcli_compat_") as tmp_dir: tmp_path = str(Path(tmp_dir) / "model.onnx") # onnx.save mutates model in-place; restore immediately onnx.save(model, tmp_path, save_as_external_data=True) diff --git a/src/winml/modelkit/optracing/__init__.py b/src/winml/modelkit/optracing/__init__.py index e0c02a3d5..7e463d177 100644 --- a/src/winml/modelkit/optracing/__init__.py +++ b/src/winml/modelkit/optracing/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Operator-level profiling for ModelKit.""" +"""Operator-level profiling for WinML CLI.""" from __future__ import annotations diff --git a/src/winml/modelkit/pattern/__init__.py b/src/winml/modelkit/pattern/__init__.py index d31b1732f..38f0112c4 100644 --- a/src/winml/modelkit/pattern/__init__.py +++ b/src/winml/modelkit/pattern/__init__.py @@ -3,7 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""Shared pattern matching infrastructure for ModelKit. +"""Shared pattern matching infrastructure for WinML CLI. This package provides pattern matching, input generation, and graph rewriting infrastructure used by both the static analyzer and the optimizer. diff --git a/src/winml/modelkit/pattern/base.py b/src/winml/modelkit/pattern/base.py index 7fdd37daf..5d93da5de 100644 --- a/src/winml/modelkit/pattern/base.py +++ b/src/winml/modelkit/pattern/base.py @@ -942,7 +942,7 @@ def get_onnx_model( # Create model model = helper.make_model( graph, - producer_name="modelkit-pattern-generator", + producer_name="winmlcli-pattern-generator", opset_imports=opset_imports, ) # Set IR version to 11 for compatibility with older onnxruntime versions diff --git a/src/winml/modelkit/serve/__init__.py b/src/winml/modelkit/serve/__init__.py index 5217dd82a..51e210b5d 100644 --- a/src/winml/modelkit/serve/__init__.py +++ b/src/winml/modelkit/serve/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""WinML ModelKit serving package. +"""WinML CLI serving package. Public API: InferenceEngine — core inference component (re-exported from inference/) diff --git a/src/winml/modelkit/serve/app.py b/src/winml/modelkit/serve/app.py index 96a2dfb46..fb69e962d 100644 --- a/src/winml/modelkit/serve/app.py +++ b/src/winml/modelkit/serve/app.py @@ -151,7 +151,7 @@ async def lifespan(app: FastAPI): app.state.manager.shutdown() app = FastAPI( - title="ModelKit Inference Server", + title="WinML CLI Inference Server", version=__version__, description=( "Local REST API for WinML model inference.\n\n" @@ -417,7 +417,7 @@ async def get_mcp_schema() -> dict[str, Any]: return { "tools": mcp_tools, "server_info": { - "name": "ModelKit Inference", + "name": "WinML CLI Inference", "version": __version__, "models": [ {"model_id": mid, "task": t} @@ -840,7 +840,7 @@ def print_startup_banner( console = Console() console.print() - console.print("[bold]ModelKit Inference Server[/bold]") + console.print("[bold]WinML CLI Inference Server[/bold]") console.print(f"Model: {model_path or '(none — load via POST /v1/models)'}") if task: console.print(f"Task: {task}") diff --git a/src/winml/modelkit/serve/cli_api.py b/src/winml/modelkit/serve/cli_api.py index d6ae401fa..c47a5cf56 100644 --- a/src/winml/modelkit/serve/cli_api.py +++ b/src/winml/modelkit/serve/cli_api.py @@ -134,7 +134,7 @@ class HealthResponse(BaseModel): # --------------------------------------------------------------------------- app = FastAPI( - title="ModelKit API", + title="WinML CLI API", description=( "Phase 0 — CLI Wrapper. Each endpoint invokes a `winml` CLI command and " "returns structured JSON in `result` (where supported) plus raw " @@ -378,7 +378,7 @@ def print_startup_banner(host: str, port: int) -> None: # noqa: D103 console.print() console.print( Panel.fit( - f"[bold]ModelKit API Server[/bold]\n" + f"[bold]WinML CLI API Server[/bold]\n" f"[dim]Mode:[/dim] CLI Wrapper (Phase 0)\n" f"[dim]Version:[/dim] {__version__}\n\n" f"[dim]API:[/dim] [link]http://{host}:{port}[/link]\n" diff --git a/src/winml/modelkit/serve/static/index.html b/src/winml/modelkit/serve/static/index.html index 92ee107a0..1b34d9c90 100644 --- a/src/winml/modelkit/serve/static/index.html +++ b/src/winml/modelkit/serve/static/index.html @@ -3,7 +3,7 @@ -ModelKit Inference Demo +WinML CLI Inference Demo