Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ typecheck-pyright:
.PHONY: typecheck-mypy
typecheck-mypy:
uv run mypy
uv run mypy typings/ --strict

.PHONY: typecheck
typecheck: typecheck-pyright ## Run static type checking
Expand Down
4 changes: 2 additions & 2 deletions pydantic_ai_slim/pydantic_ai/models/outlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
)
from outlines.models.vllm_offline import (
VLLMOffline,
from_vllm_offline, # pyright: ignore[reportUnknownVariableType]
from_vllm_offline,
)
from outlines.types.dsl import JsonSchema
from PIL import Image as PILImage
Expand Down Expand Up @@ -393,7 +393,7 @@ def _format_vllm_offline_inference_kwargs( # pragma: no cover
self, model_settings: dict[str, Any]
) -> dict[str, Any]:
"""Select the model settings supported by the vLLMOffline model."""
from vllm.sampling_params import SamplingParams # pyright: ignore
from vllm.sampling_params import SamplingParams

supported_args = [
'max_tokens',
Expand Down
2 changes: 1 addition & 1 deletion pydantic_ai_slim/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ bedrock = ["boto3>=1.40.14"]
huggingface = ["huggingface-hub[inference]>=0.33.5"]
outlines-transformers = ["outlines[transformers]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "transformers>=4.0.0", "pillow", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
outlines-llamacpp = ["outlines[llamacpp]>=1.0.0, <1.3.0"]
outlines-mlxlm = ["outlines[mlxlm]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
outlines-mlxlm = ["outlines[mlxlm]>=1.0.0, <1.3.0; platform_system == 'Darwin' and platform_machine == 'arm64'"]
outlines-sglang = ["outlines[sglang]>=1.0.0, <1.3.0", "pillow"]
outlines-vllm-offline = ["vllm; python_version < '3.12' and (sys_platform != 'darwin' or platform_machine != 'x86_64')", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "outlines>=1.0.0, <1.3.0"]
# Tools
Expand Down
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ dbos = ["pydantic-ai-slim[dbos]=={{ version }}"]
prefect = ["pydantic-ai-slim[prefect]=={{ version }}"]
outlines-transformers = ["pydantic-ai-slim[outlines-transformers]=={{ version }}"]
outlines-llamacpp = ["pydantic-ai-slim[outlines-llamacpp]=={{ version }}"]
outlines-mlxlm = ["pydantic-ai-slim[outlines-mlxlm]=={{ version }}"]
outlines-mlxlm = ["pydantic-ai-slim[outlines-mlxlm]=={{ version }}; platform_system == 'Darwin' and platform_machine == 'arm64'"]
outlines-sglang = ["pydantic-ai-slim[outlines-sglang]=={{ version }}"]
outlines-vllm-offline = ["pydantic-ai-slim[outlines-vllm-offline]=={{ version }}"]

Expand Down Expand Up @@ -142,6 +142,7 @@ include = [
"clai/**/*.py",
"tests/**/*.py",
"docs/**/*.py",
"typings/**/*.pyi",
]

[tool.ruff.lint]
Expand Down Expand Up @@ -186,8 +187,10 @@ quote-style = "single"
"examples/**/*.py" = ["D101", "D103"]
"tests/**/*.py" = ["D"]
"docs/**/*.py" = ["D"]
"typings/**/*.pyi" = ["F401", "PYI044", "PYI035", "ANN401"]

[tool.pyright]
stubPath = "typings"
pythonVersion = "3.12"
typeCheckingMode = "strict"
reportMissingTypeStubs = false
Expand Down Expand Up @@ -217,6 +220,7 @@ exclude = [
[tool.mypy]
files = "tests/typed_agent.py,tests/typed_graph.py"
strict = true
mypy_path = "typings"

[tool.pytest.ini_options]
testpaths = ["tests", "docs/.hooks"]
Expand Down
71 changes: 33 additions & 38 deletions tests/models/test_outlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
with try_import() as imports_successful:
import outlines

from pydantic_ai.models.outlines import OutlinesModel
from pydantic_ai.models.outlines import OutlinesAsyncBaseModel, OutlinesModel
from pydantic_ai.providers.outlines import OutlinesProvider

with try_import() as transformer_imports_successful:
Expand All @@ -54,11 +54,11 @@
import llama_cpp

with try_import() as vllm_imports_successful:
import vllm # type: ignore[reportMissingImports]
import vllm

# We try to load the vllm model to ensure it is available
try: # pragma: no lax cover
vllm.LLM('microsoft/Phi-3-mini-4k-instruct') # type: ignore
vllm.LLM('microsoft/Phi-3-mini-4k-instruct')
except RuntimeError as e: # pragma: lax no cover
if 'Found no NVIDIA driver' in str(e) or 'Device string must not be empty' in str(e):
# Treat as import failure
Expand Down Expand Up @@ -97,68 +97,67 @@

@pytest.fixture
def mock_async_model() -> OutlinesModel:
class MockOutlinesAsyncModel(outlines.models.base.AsyncModel):
class MockOutlinesAsyncModel(OutlinesAsyncBaseModel):
"""Mock an OutlinesAsyncModel because no Outlines local models have an async version.

The `__call__` and `stream` methods will be called by the Pydantic AI model while the other methods are
only implemented because they are abstract methods in the OutlinesAsyncModel class.
"""

async def __call__(self, model_input, output_type, backend, **inference_kwargs): # type: ignore[reportMissingParameterType]
async def __call__(self, model_input: Any, output_type: Any, backend: Any, **inference_kwargs: Any) -> str:
return 'test'

async def stream(self, model_input, output_type, backend, **inference_kwargs): # type: ignore[reportMissingParameterType]
async def stream(self, model_input: Any, output_type: Any, backend: Any, **inference_kwargs: Any):
for _ in range(2):
yield 'test'

async def generate(self, model_input, output_type, **inference_kwargs): # type: ignore[reportMissingParameterType]
... # pragma: no cover
async def generate(self, model_input: Any, output_type: Any, **inference_kwargs: Any): ... # pragma: no cover

async def generate_batch(self, model_input, output_type, **inference_kwargs): # type: ignore[reportMissingParameterType]
... # pragma: no cover
async def generate_batch(
self, model_input: Any, output_type: Any, **inference_kwargs: Any
): ... # pragma: no cover

async def generate_stream(self, model_input, output_type, **inference_kwargs): # type: ignore[reportMissingParameterType]
... # pragma: no cover
async def generate_stream(
self, model_input: Any, output_type: Any, **inference_kwargs: Any
): ... # pragma: no cover

return OutlinesModel(MockOutlinesAsyncModel(), provider=OutlinesProvider())


@pytest.fixture
def transformers_model() -> OutlinesModel:
hf_model = transformers.AutoModelForCausalLM.from_pretrained( # type: ignore
hf_model = transformers.AutoModelForCausalLM.from_pretrained(
'erwanf/gpt2-mini',
device_map='cpu',
)
hf_tokenizer = transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini') # type: ignore
hf_tokenizer = transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini')
chat_template = '{% for message in messages %}{{ message.role }}: {{ message.content }}{% endfor %}'
hf_tokenizer.chat_template = chat_template
outlines_model = outlines.models.transformers.from_transformers(
hf_model, # type: ignore[reportUnknownArgumentType]
hf_tokenizer, # type: ignore
hf_model,
hf_tokenizer,
)
return OutlinesModel(outlines_model, provider=OutlinesProvider())


@pytest.fixture
def transformers_multimodal_model() -> OutlinesModel:
hf_model = transformers.LlavaForConditionalGeneration.from_pretrained( # type: ignore
hf_model = transformers.LlavaForConditionalGeneration.from_pretrained(
'trl-internal-testing/tiny-LlavaForConditionalGeneration',
device_map='cpu',
)
hf_processor = transformers.AutoProcessor.from_pretrained( # type: ignore
'trl-internal-testing/tiny-LlavaForConditionalGeneration'
)
hf_processor = transformers.AutoProcessor.from_pretrained('trl-internal-testing/tiny-LlavaForConditionalGeneration')
outlines_model = outlines.models.transformers.from_transformers(
hf_model,
hf_processor, # type: ignore
hf_processor,
)
return OutlinesModel(outlines_model, provider=OutlinesProvider())


@pytest.fixture
def llamacpp_model() -> OutlinesModel:
outlines_model_llamacpp = outlines.models.llamacpp.from_llamacpp(
llama_cpp.Llama.from_pretrained( # type: ignore
llama_cpp.Llama.from_pretrained(
repo_id='M4-ai/TinyMistral-248M-v2-Instruct-GGUF',
filename='TinyMistral-248M-v2-Instruct.Q4_K_M.gguf',
)
Expand All @@ -168,9 +167,7 @@ def llamacpp_model() -> OutlinesModel:

@pytest.fixture
def mlxlm_model() -> OutlinesModel: # pragma: no cover
outlines_model = outlines.models.mlxlm.from_mlxlm(
*mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit') # type: ignore
)
outlines_model = outlines.models.mlxlm.from_mlxlm(*mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'))
return OutlinesModel(outlines_model, provider=OutlinesProvider())


Expand All @@ -184,9 +181,7 @@ def sglang_model() -> OutlinesModel:

@pytest.fixture
def vllm_model_offline() -> OutlinesModel: # pragma: no cover
outlines_model = outlines.models.vllm_offline.from_vllm_offline( # type: ignore
vllm.LLM('microsoft/Phi-3-mini-4k-instruct') # type: ignore
)
outlines_model = outlines.models.vllm_offline.from_vllm_offline(vllm.LLM('microsoft/Phi-3-mini-4k-instruct'))
return OutlinesModel(outlines_model, provider=OutlinesProvider())


Expand All @@ -201,18 +196,18 @@ def binary_image() -> BinaryImage:
pytest.param(
'from_transformers',
lambda: (
transformers.AutoModelForCausalLM.from_pretrained( # type: ignore
transformers.AutoModelForCausalLM.from_pretrained(
'erwanf/gpt2-mini',
device_map='cpu',
),
transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'), # type: ignore
transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'),
),
marks=skip_if_transformers_imports_unsuccessful,
),
pytest.param(
'from_llamacpp',
lambda: (
llama_cpp.Llama.from_pretrained( # type: ignore
llama_cpp.Llama.from_pretrained(
repo_id='M4-ai/TinyMistral-248M-v2-Instruct-GGUF',
filename='TinyMistral-248M-v2-Instruct.Q4_K_M.gguf',
),
Expand All @@ -221,7 +216,7 @@ def binary_image() -> BinaryImage:
),
pytest.param(
'from_mlxlm',
lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'), # type: ignore
lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'),
marks=skip_if_mlxlm_imports_unsuccessful,
),
pytest.param(
Expand All @@ -231,7 +226,7 @@ def binary_image() -> BinaryImage:
),
pytest.param(
'from_vllm_offline',
lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),), # type: ignore
lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),),
marks=skip_if_vllm_imports_unsuccessful,
),
]
Expand Down Expand Up @@ -260,18 +255,18 @@ def test_init(model_loading_function_name: str, args: Callable[[], tuple[Any]])
pytest.param(
'from_transformers',
lambda: (
transformers.AutoModelForCausalLM.from_pretrained( # type: ignore
transformers.AutoModelForCausalLM.from_pretrained(
'erwanf/gpt2-mini',
device_map='cpu',
),
transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'), # type: ignore
transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'),
),
marks=skip_if_transformers_imports_unsuccessful,
),
pytest.param(
'from_llamacpp',
lambda: (
llama_cpp.Llama.from_pretrained( # type: ignore
llama_cpp.Llama.from_pretrained(
repo_id='M4-ai/TinyMistral-248M-v2-Instruct-GGUF',
filename='TinyMistral-248M-v2-Instruct.Q4_K_M.gguf',
),
Expand All @@ -280,7 +275,7 @@ def test_init(model_loading_function_name: str, args: Callable[[], tuple[Any]])
),
pytest.param(
'from_mlxlm',
lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'), # type: ignore
lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'),
marks=skip_if_mlxlm_imports_unsuccessful,
),
pytest.param(
Expand All @@ -290,7 +285,7 @@ def test_init(model_loading_function_name: str, args: Callable[[], tuple[Any]])
),
pytest.param(
'from_vllm_offline',
lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),), # type: ignore
lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),),
marks=skip_if_vllm_imports_unsuccessful,
),
]
Expand Down
27 changes: 27 additions & 0 deletions typings/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Stub files (`*.pyi`) contain type hints used only by type checkers, not at
runtime. They were introduced in
[PEP 484](https://peps.python.org/pep-0484/#stub-files). For example, the
[`typeshed`](https://github.com/python/typeshed) repository maintains a
collection of such stubs for the Python standard library and some third-party
libraries.

The `./typings` folder contains type information only for the parts of
third-party dependencies used in the `pydantic-ai` codebase. These stubs must be
manually maintained. When a dependency's API changes, both the codebase and the
stubs need to be updated. There are two ways to update the stubs:

1. **Manual update:** Check the dependency's source code and copy the type
information to `./typings`. For example, take the `from_pretrained()` method
of the `Llama` class in `llama-cpp-python`. The
[source code](https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py#L2240)
contains the type information that is copied to `./typings/llama_cpp.pyi`.
This eliminates the need for `# type: ignore` comments in the codebase.

2. **Update with AI coding assistants:** Most dependencies maintain `llms.txt`
and `llms-full.txt` files with their documentation. This information is
compiled by [Context7](https://context7.com). For example, the
`llama-cpp-python` library is documented
[here](https://github.com/abetlen/llama-cpp-python). MCP servers such as
[this one by Upstash](https://github.com/upstash/context7) provide AI coding
assistants access to Context7. AI coding assistants such as VS Code Copilot
or Cursor can reliably generate and update the stubs.
19 changes: 19 additions & 0 deletions typings/llama_cpp.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from collections.abc import Sequence
from os import PathLike
from typing import Any, Literal

from typing_extensions import Self

class Llama:
def __init__(self, *args: Any, **kwargs: Any) -> None: ...
@classmethod
def from_pretrained(
cls,
repo_id: str,
filename: str | None = None,
additional_files: Sequence[str] | None = None,
local_dir: str | PathLike[str] | None = None,
local_dir_use_symlinks: bool | Literal['auto'] = 'auto',
cache_dir: str | PathLike[str] | None = None,
**kwargs: Any,
) -> Self: ...
6 changes: 6 additions & 0 deletions typings/mlx/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from typing import Any

from . import nn

# mlx is imported as a package, primarily for mlx.nn
__all__: list[str] = []
3 changes: 3 additions & 0 deletions typings/mlx/nn.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from typing import Any

class Module: ...
7 changes: 7 additions & 0 deletions typings/mlx_lm.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from typing import Any

from mlx.nn import Module
from transformers.tokenization_utils import PreTrainedTokenizer

def load(model_path: str | None = None, *args: Any, **kwargs: Any) -> tuple[Module, PreTrainedTokenizer]: ...
def generate_step(*args: Any, **kwargs: Any) -> Any: ...
3 changes: 3 additions & 0 deletions typings/outlines/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from . import models

__all__: list[str] = []
18 changes: 18 additions & 0 deletions typings/outlines/inputs.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from __future__ import annotations

from collections.abc import Sequence
from typing import Any

from PIL.Image import Image as PILImage

class Chat:
def __init__(self, messages: list[dict[str, Any]] | None = None) -> None: ...
def add_system_message(self, content: str) -> None: ...
def add_user_message(self, content: str | Sequence[str | Image]) -> None: ...
def add_assistant_message(self, content: str | list[str | Image]) -> None: ...
def extend(self, messages: list[dict[str, Any]]) -> None: ...
def append(self, message: dict[str, Any]) -> None: ...
def pop(self) -> dict[str, Any] | None: ...

class Image:
def __init__(self, image: PILImage) -> None: ...
3 changes: 3 additions & 0 deletions typings/outlines/models/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from . import base, llamacpp, mlxlm, sglang, transformers, vllm_offline

__all__: list[str] = []
10 changes: 10 additions & 0 deletions typings/outlines/models/base.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from collections.abc import AsyncIterable, Iterable
from typing import Any

class Model:
def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
def stream(self, *args: Any, **kwargs: Any) -> Iterable[Any]: ...

class AsyncModel:
async def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
def stream(self, *args: Any, **kwargs: Any) -> AsyncIterable[Any]: ...
10 changes: 10 additions & 0 deletions typings/outlines/models/llamacpp.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from typing import TYPE_CHECKING

from outlines.models.base import Model

if TYPE_CHECKING:
from llama_cpp import Llama

class LlamaCpp(Model): ...

def from_llamacpp(model: Llama) -> LlamaCpp: ...
Loading