pydantic · DouweM · Nov 21, 2025 · Nov 16, 2025 · Nov 16, 2025 · Nov 16, 2025
diff --git a/Makefile b/Makefile
@@ -43,6 +43,7 @@ typecheck-pyright:
 .PHONY: typecheck-mypy
 typecheck-mypy:
 	uv run mypy
+	uv run mypy typings/ --strict
 
 .PHONY: typecheck
 typecheck: typecheck-pyright ## Run static type checking

diff --git a/pydantic_ai_slim/pydantic_ai/models/outlines.py b/pydantic_ai_slim/pydantic_ai/models/outlines.py
@@ -60,7 +60,7 @@
     )
     from outlines.models.vllm_offline import (
         VLLMOffline,
-        from_vllm_offline,  # pyright: ignore[reportUnknownVariableType]
+        from_vllm_offline,
     )
     from outlines.types.dsl import JsonSchema
     from PIL import Image as PILImage
@@ -393,7 +393,7 @@ def _format_vllm_offline_inference_kwargs(  # pragma: no cover
         self, model_settings: dict[str, Any]
     ) -> dict[str, Any]:
         """Select the model settings supported by the vLLMOffline model."""
-        from vllm.sampling_params import SamplingParams  # pyright: ignore
+        from vllm.sampling_params import SamplingParams
 
         supported_args = [
             'max_tokens',

diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml
@@ -79,7 +79,7 @@ bedrock = ["boto3>=1.40.14"]
 huggingface = ["huggingface-hub[inference]>=0.33.5"]
 outlines-transformers = ["outlines[transformers]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "transformers>=4.0.0", "pillow", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
 outlines-llamacpp = ["outlines[llamacpp]>=1.0.0, <1.3.0"]
-outlines-mlxlm = ["outlines[mlxlm]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
+outlines-mlxlm = ["outlines[mlxlm]>=1.0.0, <1.3.0; platform_system == 'Darwin' and platform_machine == 'arm64'"]
 outlines-sglang = ["outlines[sglang]>=1.0.0, <1.3.0", "pillow"]
 outlines-vllm-offline = ["vllm; python_version < '3.12' and (sys_platform != 'darwin' or platform_machine != 'x86_64')", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "outlines>=1.0.0, <1.3.0"]
 # Tools

diff --git a/pyproject.toml b/pyproject.toml
@@ -56,7 +56,7 @@ dbos = ["pydantic-ai-slim[dbos]=={{ version }}"]
 prefect = ["pydantic-ai-slim[prefect]=={{ version }}"]
 outlines-transformers = ["pydantic-ai-slim[outlines-transformers]=={{ version }}"]
 outlines-llamacpp = ["pydantic-ai-slim[outlines-llamacpp]=={{ version }}"]
-outlines-mlxlm = ["pydantic-ai-slim[outlines-mlxlm]=={{ version }}"]
+outlines-mlxlm = ["pydantic-ai-slim[outlines-mlxlm]=={{ version }}; platform_system == 'Darwin' and platform_machine == 'arm64'"]
 outlines-sglang = ["pydantic-ai-slim[outlines-sglang]=={{ version }}"]
 outlines-vllm-offline = ["pydantic-ai-slim[outlines-vllm-offline]=={{ version }}"]
 
@@ -142,6 +142,7 @@ include = [
     "clai/**/*.py",
     "tests/**/*.py",
     "docs/**/*.py",
+    "typings/**/*.pyi",
 ]
 
 [tool.ruff.lint]
@@ -186,8 +187,10 @@ quote-style = "single"
 "examples/**/*.py" = ["D101", "D103"]
 "tests/**/*.py" = ["D"]
 "docs/**/*.py" = ["D"]
+"typings/**/*.pyi" = ["F401", "PYI044", "PYI035", "ANN401"]
 
 [tool.pyright]
+stubPath = "typings"
 pythonVersion = "3.12"
 typeCheckingMode = "strict"
 reportMissingTypeStubs = false
@@ -217,6 +220,7 @@ exclude = [
 [tool.mypy]
 files = "tests/typed_agent.py,tests/typed_graph.py"
 strict = true
+mypy_path = "typings"
 
 [tool.pytest.ini_options]
 testpaths = ["tests", "docs/.hooks"]

diff --git a/tests/models/test_outlines.py b/tests/models/test_outlines.py
@@ -44,7 +44,7 @@
 with try_import() as imports_successful:
     import outlines
 
-    from pydantic_ai.models.outlines import OutlinesModel
+    from pydantic_ai.models.outlines import OutlinesAsyncBaseModel, OutlinesModel
     from pydantic_ai.providers.outlines import OutlinesProvider
 
 with try_import() as transformer_imports_successful:
@@ -54,11 +54,11 @@
     import llama_cpp
 
 with try_import() as vllm_imports_successful:
-    import vllm  # type: ignore[reportMissingImports]
+    import vllm
 
     # We try to load the vllm model to ensure it is available
     try:  # pragma: no lax cover
-        vllm.LLM('microsoft/Phi-3-mini-4k-instruct')  # type: ignore
+        vllm.LLM('microsoft/Phi-3-mini-4k-instruct')
     except RuntimeError as e:  # pragma: lax no cover
         if 'Found no NVIDIA driver' in str(e) or 'Device string must not be empty' in str(e):
             # Treat as import failure
@@ -97,68 +97,67 @@
 
 @pytest.fixture
 def mock_async_model() -> OutlinesModel:
-    class MockOutlinesAsyncModel(outlines.models.base.AsyncModel):
+    class MockOutlinesAsyncModel(OutlinesAsyncBaseModel):
         """Mock an OutlinesAsyncModel because no Outlines local models have an async version.
 
         The `__call__` and `stream` methods will be called by the Pydantic AI model while the other methods are
         only implemented because they are abstract methods in the OutlinesAsyncModel class.
         """
 
-        async def __call__(self, model_input, output_type, backend, **inference_kwargs):  # type: ignore[reportMissingParameterType]
+        async def __call__(self, model_input: Any, output_type: Any, backend: Any, **inference_kwargs: Any) -> str:
             return 'test'
 
-        async def stream(self, model_input, output_type, backend, **inference_kwargs):  # type: ignore[reportMissingParameterType]
+        async def stream(self, model_input: Any, output_type: Any, backend: Any, **inference_kwargs: Any):
             for _ in range(2):
                 yield 'test'
 
-        async def generate(self, model_input, output_type, **inference_kwargs):  # type: ignore[reportMissingParameterType]
-            ...  # pragma: no cover
+        async def generate(self, model_input: Any, output_type: Any, **inference_kwargs: Any): ...  # pragma: no cover
 
-        async def generate_batch(self, model_input, output_type, **inference_kwargs):  # type: ignore[reportMissingParameterType]
-            ...  # pragma: no cover
+        async def generate_batch(
+            self, model_input: Any, output_type: Any, **inference_kwargs: Any
+        ): ...  # pragma: no cover
 
-        async def generate_stream(self, model_input, output_type, **inference_kwargs):  # type: ignore[reportMissingParameterType]
-            ...  # pragma: no cover
+        async def generate_stream(
+            self, model_input: Any, output_type: Any, **inference_kwargs: Any
+        ): ...  # pragma: no cover
 
     return OutlinesModel(MockOutlinesAsyncModel(), provider=OutlinesProvider())
 
 
 @pytest.fixture
 def transformers_model() -> OutlinesModel:
-    hf_model = transformers.AutoModelForCausalLM.from_pretrained(  # type: ignore
+    hf_model = transformers.AutoModelForCausalLM.from_pretrained(
         'erwanf/gpt2-mini',
         device_map='cpu',
     )
-    hf_tokenizer = transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini')  # type: ignore
+    hf_tokenizer = transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini')
     chat_template = '{% for message in messages %}{{ message.role }}: {{ message.content }}{% endfor %}'
     hf_tokenizer.chat_template = chat_template
     outlines_model = outlines.models.transformers.from_transformers(
-        hf_model,  # type: ignore[reportUnknownArgumentType]
-        hf_tokenizer,  # type: ignore
+        hf_model,
+        hf_tokenizer,
     )
     return OutlinesModel(outlines_model, provider=OutlinesProvider())
 
 
 @pytest.fixture
 def transformers_multimodal_model() -> OutlinesModel:
-    hf_model = transformers.LlavaForConditionalGeneration.from_pretrained(  # type: ignore
+    hf_model = transformers.LlavaForConditionalGeneration.from_pretrained(
         'trl-internal-testing/tiny-LlavaForConditionalGeneration',
         device_map='cpu',
     )
-    hf_processor = transformers.AutoProcessor.from_pretrained(  # type: ignore
-        'trl-internal-testing/tiny-LlavaForConditionalGeneration'
-    )
+    hf_processor = transformers.AutoProcessor.from_pretrained('trl-internal-testing/tiny-LlavaForConditionalGeneration')
     outlines_model = outlines.models.transformers.from_transformers(
         hf_model,
-        hf_processor,  # type: ignore
+        hf_processor,
     )
     return OutlinesModel(outlines_model, provider=OutlinesProvider())
 
 
 @pytest.fixture
 def llamacpp_model() -> OutlinesModel:
     outlines_model_llamacpp = outlines.models.llamacpp.from_llamacpp(
-        llama_cpp.Llama.from_pretrained(  # type: ignore
+        llama_cpp.Llama.from_pretrained(
             repo_id='M4-ai/TinyMistral-248M-v2-Instruct-GGUF',
             filename='TinyMistral-248M-v2-Instruct.Q4_K_M.gguf',
         )
@@ -168,9 +167,7 @@ def llamacpp_model() -> OutlinesModel:
 
 @pytest.fixture
 def mlxlm_model() -> OutlinesModel:  # pragma: no cover
-    outlines_model = outlines.models.mlxlm.from_mlxlm(
-        *mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit')  # type: ignore
-    )
+    outlines_model = outlines.models.mlxlm.from_mlxlm(*mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'))
     return OutlinesModel(outlines_model, provider=OutlinesProvider())
 
 
@@ -184,9 +181,7 @@ def sglang_model() -> OutlinesModel:
 
 @pytest.fixture
 def vllm_model_offline() -> OutlinesModel:  # pragma: no cover
-    outlines_model = outlines.models.vllm_offline.from_vllm_offline(  # type: ignore
-        vllm.LLM('microsoft/Phi-3-mini-4k-instruct')  # type: ignore
-    )
+    outlines_model = outlines.models.vllm_offline.from_vllm_offline(vllm.LLM('microsoft/Phi-3-mini-4k-instruct'))
     return OutlinesModel(outlines_model, provider=OutlinesProvider())
 
 
@@ -201,18 +196,18 @@ def binary_image() -> BinaryImage:
     pytest.param(
         'from_transformers',
         lambda: (
-            transformers.AutoModelForCausalLM.from_pretrained(  # type: ignore
+            transformers.AutoModelForCausalLM.from_pretrained(
                 'erwanf/gpt2-mini',
                 device_map='cpu',
             ),
-            transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'),  # type: ignore
+            transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'),
         ),
         marks=skip_if_transformers_imports_unsuccessful,
     ),
     pytest.param(
         'from_llamacpp',
         lambda: (
-            llama_cpp.Llama.from_pretrained(  # type: ignore
+            llama_cpp.Llama.from_pretrained(
                 repo_id='M4-ai/TinyMistral-248M-v2-Instruct-GGUF',
                 filename='TinyMistral-248M-v2-Instruct.Q4_K_M.gguf',
             ),
@@ -221,7 +216,7 @@ def binary_image() -> BinaryImage:
     ),
     pytest.param(
         'from_mlxlm',
-        lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'),  # type: ignore
+        lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'),
         marks=skip_if_mlxlm_imports_unsuccessful,
     ),
     pytest.param(
@@ -231,7 +226,7 @@ def binary_image() -> BinaryImage:
     ),
     pytest.param(
         'from_vllm_offline',
-        lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),),  # type: ignore
+        lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),),
         marks=skip_if_vllm_imports_unsuccessful,
     ),
 ]
@@ -260,18 +255,18 @@ def test_init(model_loading_function_name: str, args: Callable[[], tuple[Any]])
     pytest.param(
         'from_transformers',
         lambda: (
-            transformers.AutoModelForCausalLM.from_pretrained(  # type: ignore
+            transformers.AutoModelForCausalLM.from_pretrained(
                 'erwanf/gpt2-mini',
                 device_map='cpu',
             ),
-            transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'),  # type: ignore
+            transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'),
         ),
         marks=skip_if_transformers_imports_unsuccessful,
     ),
     pytest.param(
         'from_llamacpp',
         lambda: (
-            llama_cpp.Llama.from_pretrained(  # type: ignore
+            llama_cpp.Llama.from_pretrained(
                 repo_id='M4-ai/TinyMistral-248M-v2-Instruct-GGUF',
                 filename='TinyMistral-248M-v2-Instruct.Q4_K_M.gguf',
             ),
@@ -280,7 +275,7 @@ def test_init(model_loading_function_name: str, args: Callable[[], tuple[Any]])
     ),
     pytest.param(
         'from_mlxlm',
-        lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'),  # type: ignore
+        lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'),
         marks=skip_if_mlxlm_imports_unsuccessful,
     ),
     pytest.param(
@@ -290,7 +285,7 @@ def test_init(model_loading_function_name: str, args: Callable[[], tuple[Any]])
     ),
     pytest.param(
         'from_vllm_offline',
-        lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),),  # type: ignore
+        lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),),
         marks=skip_if_vllm_imports_unsuccessful,
     ),
 ]

diff --git a/typings/README.md b/typings/README.md
@@ -0,0 +1,27 @@
+Stub files (`*.pyi`) contain type hints used only by type checkers, not at
+runtime. They were introduced in
+[PEP 484](https://peps.python.org/pep-0484/#stub-files). For example, the
+[`typeshed`](https://github.com/python/typeshed) repository maintains a
+collection of such stubs for the Python standard library and some third-party
+libraries.
+
+The `./typings` folder contains type information only for the parts of
+third-party dependencies used in the `pydantic-ai` codebase. These stubs must be
+manually maintained. When a dependency's API changes, both the codebase and the
+stubs need to be updated. There are two ways to update the stubs:
+
+1. **Manual update:** Check the dependency's source code and copy the type
+   information to `./typings`. For example, take the `from_pretrained()` method
+   of the `Llama` class in `llama-cpp-python`. The
+   [source code](https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py#L2240)
+   contains the type information that is copied to `./typings/llama_cpp.pyi`.
+   This eliminates the need for `# type: ignore` comments in the codebase.
+
+2. **Update with AI coding assistants:** Most dependencies maintain `llms.txt`
+   and `llms-full.txt` files with their documentation. This information is
+   compiled by [Context7](https://context7.com). For example, the
+   `llama-cpp-python` library is documented
+   [here](https://github.com/abetlen/llama-cpp-python). MCP servers such as
+   [this one by Upstash](https://github.com/upstash/context7) provide AI coding
+   assistants access to Context7. AI coding assistants such as VS Code Copilot
+   or Cursor can reliably generate and update the stubs.
diff --git a/typings/llama_cpp.pyi b/typings/llama_cpp.pyi
@@ -0,0 +1,19 @@
+from collections.abc import Sequence
+from os import PathLike
+from typing import Any, Literal
+
+from typing_extensions import Self
+
+class Llama:
+    def __init__(self, *args: Any, **kwargs: Any) -> None: ...
+    @classmethod
+    def from_pretrained(
+        cls,
+        repo_id: str,
+        filename: str | None = None,
+        additional_files: Sequence[str] | None = None,
+        local_dir: str | PathLike[str] | None = None,
+        local_dir_use_symlinks: bool | Literal['auto'] = 'auto',
+        cache_dir: str | PathLike[str] | None = None,
+        **kwargs: Any,
+    ) -> Self: ...
diff --git a/typings/mlx/__init__.pyi b/typings/mlx/__init__.pyi
@@ -0,0 +1,6 @@
+from typing import Any
+
+from . import nn
+
+# mlx is imported as a package, primarily for mlx.nn
+__all__: list[str] = []
diff --git a/typings/mlx/nn.pyi b/typings/mlx/nn.pyi
@@ -0,0 +1,3 @@
+from typing import Any
+
+class Module: ...
diff --git a/typings/mlx_lm.pyi b/typings/mlx_lm.pyi
@@ -0,0 +1,7 @@
+from typing import Any
+
+from mlx.nn import Module
+from transformers.tokenization_utils import PreTrainedTokenizer
+
+def load(model_path: str | None = None, *args: Any, **kwargs: Any) -> tuple[Module, PreTrainedTokenizer]: ...
+def generate_step(*args: Any, **kwargs: Any) -> Any: ...
diff --git a/typings/outlines/__init__.pyi b/typings/outlines/__init__.pyi
@@ -0,0 +1,3 @@
+from . import models
+
+__all__: list[str] = []
diff --git a/typings/outlines/inputs.pyi b/typings/outlines/inputs.pyi
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from PIL.Image import Image as PILImage
+
+class Chat:
+    def __init__(self, messages: list[dict[str, Any]] | None = None) -> None: ...
+    def add_system_message(self, content: str) -> None: ...
+    def add_user_message(self, content: str | Sequence[str | Image]) -> None: ...
+    def add_assistant_message(self, content: str | list[str | Image]) -> None: ...
+    def extend(self, messages: list[dict[str, Any]]) -> None: ...
+    def append(self, message: dict[str, Any]) -> None: ...
+    def pop(self) -> dict[str, Any] | None: ...
+
+class Image:
+    def __init__(self, image: PILImage) -> None: ...
diff --git a/typings/outlines/models/__init__.pyi b/typings/outlines/models/__init__.pyi
@@ -0,0 +1,3 @@
+from . import base, llamacpp, mlxlm, sglang, transformers, vllm_offline
+
+__all__: list[str] = []
diff --git a/typings/outlines/models/base.pyi b/typings/outlines/models/base.pyi
@@ -0,0 +1,10 @@
+from collections.abc import AsyncIterable, Iterable
+from typing import Any
+
+class Model:
+    def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
+    def stream(self, *args: Any, **kwargs: Any) -> Iterable[Any]: ...
+
+class AsyncModel:
+    async def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
+    def stream(self, *args: Any, **kwargs: Any) -> AsyncIterable[Any]: ...
diff --git a/typings/outlines/models/llamacpp.pyi b/typings/outlines/models/llamacpp.pyi
@@ -0,0 +1,10 @@
+from typing import TYPE_CHECKING
+
+from outlines.models.base import Model
+
+if TYPE_CHECKING:
+    from llama_cpp import Llama
+
+class LlamaCpp(Model): ...
+
+def from_llamacpp(model: Llama) -> LlamaCpp: ...
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from . import base, llamacpp, mlxlm, sglang, transformers, vllm_offline

		__all__: list[str] = []