pydantic · samuelcolvin · Nov 29, 2024 · Nov 29, 2024 · Nov 29, 2024 · Nov 29, 2024
diff --git a/docs/.hooks/main.py b/docs/.hooks/main.py
@@ -57,18 +57,19 @@ def sub_example(m: re.Match[str]) -> str:
 
 
 def render_video(markdown: str) -> str:
-    return re.sub(r'\{\{ *video\((["\'])(.+?)\1(?:, (\d+))?\) *\}\}', sub_cf_video, markdown)
+    return re.sub(r'\{\{ *video\((["\'])(.+?)\1(?:, (\d+))?(?:, (\d+))?\) *\}\}', sub_cf_video, markdown)
 
 
 def sub_cf_video(m: re.Match[str]) -> str:
     video_id = m.group(2)
     time = m.group(3)
     time = f'{time}s' if time else ''
+    padding_top = m.group(4) or '67'
 
     domain = 'https://customer-nmegqx24430okhaq.cloudflarestream.com'
     poster = f'{domain}/{video_id}/thumbnails/thumbnail.jpg?time={time}&height=600'
     return f"""
-<div style="position: relative; padding-top: 67%;">
+<div style="position: relative; padding-top: {padding_top}%;">
   <iframe
     src="{domain}/{video_id}/iframe?poster={urllib.parse.quote_plus(poster)}"
     loading="lazy"

diff --git a/docs/agents.md b/docs/agents.md
@@ -119,6 +119,67 @@ print(result2.data)
 
 _(This example is complete, it can be run "as is")_
 
+## Type safe by design {#static-type-checking}
+
+PydanticAI is designed to work well with static type checkers, like mypy and pyright.
+
+!!! tip "Typing is (somewhat) optional"
+    PydanticAI is designed to make type checking as useful as possible for you if you choose to use it, but you don't have to use types everywhere all the time.
+
+    That said, because PydanticAI uses Pydantic, and Pydantic uses type hints as the definition for schema and validation, some types (specifically type hints on parameters to tools, and the `result_type` arguments to [`Agent`][pydantic_ai.Agent]) are used at runtime.
+
+    We (the library developers) have messed up if type hints are confusing you more than they're help you, if you find this, please create an [issue](https://github.com/pydantic/pydantic-ai/issues) explaining what's annoying you!
+
+In particular, agents are generic in both the type of their dependencies and the type of results they return, so you can use the type hints to ensure you're using the right types.
+
+Consider the following script with type mistakes:
+
+```py title="type_mistakes.py" hl_lines="18 28"
+from dataclasses import dataclass
+
+from pydantic_ai import Agent, RunContext
+
+
+@dataclass
+class User:
+    name: str
+
+
+agent = Agent(
+    'test',
+    deps_type=User,  # (1)!
+    result_type=bool,
+)
+
+
+@agent.system_prompt
+def add_user_name(ctx: RunContext[str]) -> str:  # (2)!
+    return f"The user's name is {ctx.deps}."
+
+
+def foobar(x: bytes) -> None:
+    pass
+
+
+result = agent.run_sync('Does their name start with "A"?', deps=User('Adam'))
+foobar(result.data)  # (3)!
+```
+
+1. The agent is defined as expecting an instance of `User` as `deps`.
+2. But here `add_user_name` is defined as taking a `str` as the dependency, not a `User`.
+3. Since the agent is defined as returning a `bool`, this will raise a type error since `foobar` expects `bytes`.
+
+Running `mypy` on this will give the following output:
+
+```bash
+➤ uv run mypy type_mistakes.py
+type_mistakes.py:18: error: Argument 1 to "system_prompt" of "Agent" has incompatible type "Callable[[RunContext[str]], str]"; expected "Callable[[RunContext[User]], str]"  [arg-type]
+type_mistakes.py:28: error: Argument 1 to "foobar" has incompatible type "bool"; expected "bytes"  [arg-type]
+Found 2 errors in 1 file (checked 1 source file)
+```
+
+Running `pyright` would identify the same issues.
+
 ## System Prompts
 
 System prompts might seem simple at first glance since they're just strings (or sequences of strings that are concatenated), but crafting the right system prompt is key to getting the model to behave as you want.
@@ -514,64 +575,3 @@ else:
 1. Define a tool that will raise `ModelRetry` repeatedly in this case.
 
 _(This example is complete, it can be run "as is")_
-
-## Static Type Checking
-
-PydanticAI is designed to work well with static type checkers, like mypy and pyright.
-
-!!! tip "mypy vs. pyright"
-    [mypy](https://github.com/python/mypy) and [pyright](https://github.com/microsoft/pyright) are both static type checkers for Python.
-
-    Mypy was the first and is still generally considered the default, in part because it was developed parly by Guido van Rossum, the creator of Python.
-
-    Pyright is generally faster and more sophisticated. It is develoepd by Eric Trout for use in VSCode, since that's its primary use case, it's terminal output is more verbose and harder to read than that of mypy.
-
-In particular, agents are generic in both the type of their dependencies and the type of results they return, so you can use the type hints to ensure you're using the right types.
-
-Consider the following script with type mistakes:
-
-```py title="type_mistakes.py" hl_lines="18 28"
-from dataclasses import dataclass
-
-from pydantic_ai import Agent, RunContext
-
-
-@dataclass
-class User:
-    name: str
-
-
-agent = Agent(
-    'test',
-    deps_type=User,  # (1)!
-    result_type=bool,
-)
-
-
-@agent.system_prompt
-def add_user_name(ctx: RunContext[str]) -> str:  # (2)!
-    return f"The user's name is {ctx.deps}."
-
-
-def foobar(x: bytes) -> None:
-    pass
-
-
-result = agent.run_sync('Does their name start with "A"?', deps=User('Adam'))
-foobar(result.data)  # (3)!
-```
-
-1. The agent is defined as expecting an instance of `User` as `deps`.
-2. But here `add_user_name` is defined as taking a `str` as the dependency, not a `User`.
-3. Since the agent is defined as returning a `bool`, this will raise a type error since `foobar` expects `bytes`.
-
-Running `mypy` on this will give the following output:
-
-```bash
-➤ uv run mypy type_mistakes.py
-type_mistakes.py:18: error: Argument 1 to "system_prompt" of "Agent" has incompatible type "Callable[[RunContext[str]], str]"; expected "Callable[[RunContext[User]], str]"  [arg-type]
-type_mistakes.py:28: error: Argument 1 to "foobar" has incompatible type "bool"; expected "bytes"  [arg-type]
-Found 2 errors in 1 file (checked 1 source file)
-```
-
-Running `pyright` would identify the same issues.
diff --git a/docs/index.md b/docs/index.md
@@ -10,10 +10,10 @@ PydanticAI is a Python Agent Framework designed to make it less painful to build
 
 ## Why use PydanticAI
 
-* Built by the team behind Pydantic (the validation layer of the OpenAI SDK, the Anthropic SDK, LangChain, LlamaIndex, AutoGPT, Transformers, Instructor and many more)
-* Model-agnostic — currently both OpenAI, Gemini, and Groq are supported, Anthropic [is coming soon](https://github.com/pydantic/pydantic-ai/issues/63). And there is a simple interface to implement support for other models.
-* Type-safe
-* Control flow and composing agents is done with vanilla python, allowing you to make use of the same Python development best practices you'd use in any other (non-AI) project
+* Built by the team behind Pydantic (the validation layer of the OpenAI SDK, the Anthropic SDK, LangChain, LlamaIndex, AutoGPT, Transformers, CrewAI, Instructor and many more)
+* Model-agnostic — currently OpenAI, Gemini, and Groq are supported, Anthropic [is coming soon](https://github.com/pydantic/pydantic-ai/issues/63). And there is a simple interface to implement support for other models.
+* [Type-safe](agents.md#static-type-checking)
+* Control flow and agent composition is done with vanilla Python, allowing you to make use of the same Python development best practices you'd use in any other (non-AI) project
 * [Structured response](results.md#structured-result-validation) validation with Pydantic
 * [Streamed responses](results.md#streamed-results), including validation of streamed _structured_ responses with Pydantic
 * Novel, type-safe [dependency injection system](dependencies.md), useful for testing and eval-driven iterative development
@@ -124,66 +124,47 @@ async def main():
 
 1. This [agent](agents.md) will act as first-tier support in a bank. Agents are generic in the type of dependencies they accept and the type of result they return. In this case, the support agent has type `#!python Agent[SupportDependencies, SupportResult]`.
 2. Here we configure the agent to use [OpenAI's GPT-4o model](api/models/openai.md), you can also set the model when running the agent.
-3. The `SupportDependencies` dataclass is used to pass data, connections, and logic into the model that will be needed when running [system prompt](agents.md#system-prompts) and [tool](agents.md#function-tools) functions. PydanticAI's system of dependency injection provides a type-safe way to customise the behavior of your agents, and can be especially useful when running unit tests and evals.
+3. The `SupportDependencies` dataclass is used to pass data, connections, and logic into the model that will be needed when running [system prompt](agents.md#system-prompts) and [tool](agents.md#function-tools) functions. PydanticAI's system of dependency injection provides a [type-safe](agents.md#static-type-checking) way to customise the behavior of your agents, and can be especially useful when running [unit tests](testing-evals.md) and evals.
 4. Static [system prompts](agents.md#system-prompts) can be registered with the [`system_prompt` keyword argument][pydantic_ai.Agent.__init__] to the agent.
 5. Dynamic [system prompts](agents.md#system-prompts) can be registered with the [`@agent.system_prompt`][pydantic_ai.Agent.system_prompt] decorator, and can make use of dependency injection. Dependencies are carried via the [`RunContext`][pydantic_ai.dependencies.RunContext] argument, which is parameterized with the `deps_type` from above. If the type annotation here is wrong, static type checkers will catch it.
-6. [Tools](agents.md#function-tools) let you register "tools" which the LLM may call while responding to a user. Again, dependencies are carried via [`RunContext`][pydantic_ai.dependencies.RunContext], and any other arguments become the tool schema passed to the LLM. Pydantic is used to validate these arguments, and errors are passed back to the LLM so it can retry.
+6. [`tool`](agents.md#function-tools) let you register functions which the LLM may call while responding to a user. Again, dependencies are carried via [`RunContext`][pydantic_ai.dependencies.RunContext], and any other arguments become the tool schema passed to the LLM. Pydantic is used to validate these arguments, and errors are passed back to the LLM so it can retry.
 7. The docstring of a tool is also passed to the LLM as the description of the tool. Parameter descriptions are [extracted](agents.md#function-tools-and-schema) from the docstring and added to the tool schema sent to the LLM.
 8. [Run the agent](agents.md#running-agents) asynchronously, conducting a conversation with the LLM until a final response is reached. Even in this fairly simple case, the agent will exchange multiple messages with the LLM as tools are called to retrieve a result.
 9. The response from the agent will, be guaranteed to be a `SupportResult`, if validation fails [reflection](agents.md#reflection-and-self-correction) will mean the agent is prompted to try again.
 10. The result will be validated with Pydantic to guarantee it is a `SupportResult`, since the agent is generic, it'll also be typed as a `SupportResult` to aid with static type checking.
-11. In a real use case, you'd add many more tools and a longer system prompt to the agent to extend the context it's equipped with and support it can provide.
+11. In a real use case, you'd add more tools and a longer system prompt to the agent to extend the context it's equipped with and support it can provide.
 12. This is a simple sketch of a database connection, used to keep the example short and readable. In reality, you'd be connecting to an external database (e.g. PostgreSQL) to get information about customers.
-13. This [Pydantic](https://docs.pydantic.dev) model is used to constrain the structured data returned by the agent. From this simple definition, Pydantic builds the JSON Schema that tells the LLM how to return the data, and performs validation to guarantee the data is correct at the end of the conversation.
-
-To help make things more clear, here is a diagram of what is happening in the `#!python await support_agent.run('What is my balance?', deps=deps)` call within `main`:
-```mermaid
-sequenceDiagram
-    participant DatabaseConn
-    participant Agent
-    participant LLM
-
-    Note over Agent: Dynamic system prompt<br>add_customer_name()
-    Agent ->> DatabaseConn: Retrieve customer name
-    activate DatabaseConn
-    DatabaseConn -->> Agent: "John"
-    deactivate DatabaseConn
-
-    Note over Agent: User query
-
-    Agent ->> LLM: Request<br>System: "You are a support agent..."<br>System: "The customer's name is John"<br>User: "What is my balance?"
-    activate LLM
-    Note over LLM: LLM decides to use a tool
-    LLM ->> Agent: Call tool<br>customer_balance()
-    deactivate LLM
-    activate Agent
-    Note over Agent: Retrieve account balance
-
-    Agent ->> DatabaseConn: Retrieve balance<br>Include pending
-    activate DatabaseConn
-    DatabaseConn -->> Agent: "$123.45"
-    deactivate DatabaseConn
-
-    Agent -->> LLM: ToolReturn<br>"$123.45"
-    deactivate Agent
-    activate LLM
-    Note over LLM: LLM processes response
-
-    LLM ->> Agent: StructuredResponse<br>SupportResult
-    deactivate LLM
-    activate Agent
-    Note over Agent: Support session complete
-    deactivate Agent
-```
-
+13. This [Pydantic](https://docs.pydantic.dev) model is used to constrain the structured data returned by the agent. From this simple definition, Pydantic builds the JSON Schema that tells the LLM how to return the data, and performs validation to guarantee the data is correct at the end of the run.
 
 !!! tip "Complete `bank_support.py` example"
     The code included here is incomplete for the sake of brevity (the definition of `DatabaseConn` is missing); you can find the complete `bank_support.py` example [here](examples/bank-support.md).
 
+## Instrumentation with Pydantic Logfire
+
+To understand the flow of the above runs, we can watch the agent in action using Pydantic Logfire.
+
+To do this, we need to set up logfire, and add the following to our code:
+
+```py title="bank_support_with_logfire.py"
+import logfire
+
+logfire.configure()  # (1)!
+logfire.instrument_asyncpg()  # (2)!
+```
+
+1. Configure logfire, this will fail if not project is set up.
+2. In our demo, `DatabaseConn` uses [`asyncpg`]() to connect to a PostgreSQL database, so [`logfire.instrument_asyncpg()`](https://magicstack.github.io/asyncpg/current/) is used to log the database queries.
+
+That's enough to get the following view of your agent in action:
+
+{{ video('9078b98c4f75d01f912a0368bbbdb97a', 25, 55) }}
+
+See [Monitoring and Performance](logfire.md) to learn more.
+
 ## Next Steps
 
 To try PydanticAI yourself, follow the instructions [in the examples](examples/index.md).
 
-Read the [conceptual documentation](agents.md) to learn more about building applications with PydanticAI.
+Read the [docs](agents.md) to learn more about building applications with PydanticAI.
 
 Read the [API Reference](api/agent.md) to understand PydanticAI's interface.
diff --git a/docs/testing-evals.md b/docs/testing-evals.md
@@ -3,7 +3,7 @@
 With PydanticAI and LLM integrations in general, there are two distinct kinds of test:
 
 1. **Unit tests** — tests of your application code, and whether it's behaving correctly
-2. **"Evals"** — tests of the LLM, and how good or bad its responses are
+2. **Evals** — tests of the LLM, and how good or bad its responses are
 
 For the most part, these two kinds of tests have pretty separate goals and considerations.
 

diff --git a/pydantic_ai_slim/pydantic_ai/agent.py b/pydantic_ai_slim/pydantic_ai/agent.py
@@ -75,8 +75,8 @@ class Agent(Generic[AgentDeps, ResultData]):
     def __init__(
         self,
         model: models.Model | models.KnownModelName | None = None,
-        result_type: type[ResultData] = str,
         *,
+        result_type: type[ResultData] = str,
         system_prompt: str | Sequence[str] = (),
         deps_type: type[AgentDeps] = NoneType,
         retries: int = 1,
@@ -150,21 +150,21 @@ async def run(
 
         deps = self._get_deps(deps)
 
-        new_message_index, messages = await self._prepare_messages(deps, user_prompt, message_history)
-        self.last_run_messages = messages
-
-        for tool in self._function_tools.values():
-            tool.reset()
-
-        cost = result.Cost()
-
         with _logfire.span(
             'agent run {prompt=}',
             prompt=user_prompt,
             agent=self,
             custom_model=custom_model,
             model_name=model_used.name(),
         ) as run_span:
+            new_message_index, messages = await self._prepare_messages(deps, user_prompt, message_history)
+            self.last_run_messages = messages
+
+            for tool in self._function_tools.values():
+                tool.reset()
+
+            cost = result.Cost()
+
             run_step = 0
             while True:
                 run_step += 1
@@ -243,21 +243,21 @@ async def run_stream(
 
         deps = self._get_deps(deps)
 
-        new_message_index, messages = await self._prepare_messages(deps, user_prompt, message_history)
-        self.last_run_messages = messages
-
-        for tool in self._function_tools.values():
-            tool.reset()
-
-        cost = result.Cost()
-
         with _logfire.span(
             'agent run stream {prompt=}',
             prompt=user_prompt,
             agent=self,
             custom_model=custom_model,
             model_name=model_used.name(),
         ) as run_span:
+            new_message_index, messages = await self._prepare_messages(deps, user_prompt, message_history)
+            self.last_run_messages = messages
+
+            for tool in self._function_tools.values():
+                tool.reset()
+
+            cost = result.Cost()
+
             run_step = 0
             while True:
                 run_step += 1

diff --git a/tests/example_modules/README.md b/tests/example_modules/README.md
@@ -0,0 +1,3 @@
+# docs examples imports
+
+This directory is added to `sys.path` in `tests/test_examples.py::test_docs_examples` to augment some of the examples.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# docs examples imports

		This directory is added to `sys.path` in `tests/test_examples.py::test_docs_examples` to augment some of the examples.