pydantic · Kludex · Apr 11, 2025 · Apr 9, 2025 · Apr 10, 2025 · Apr 10, 2025
diff --git a/docs/input.md b/docs/input.md
@@ -1,6 +1,6 @@
-# Image, Audio & Document Input
+# Image, Audio, Video & Document Input
 
-Some LLMs are now capable of understanding both audio, image and document content.
+Some LLMs are now capable of understanding audio, video, image and document content.
 
 ## Image Input
 
@@ -52,6 +52,13 @@ print(result.data)
 
 You can provide audio input using either [`AudioUrl`][pydantic_ai.AudioUrl] or [`BinaryContent`][pydantic_ai.BinaryContent]. The process is analogous to the examples above.
 
+## Video Input
+
+!!! info
+    Some models do not support video input. Please check the model's documentation to confirm whether it supports audio input.
+
+You can provide video input using either [`VideoUrl`][pydantic_ai.VideoUrl] or [`BinaryContent`][pydantic_ai.BinaryContent]. The process is analogous to the examples above.
+
 ## Document Input
 
 !!! info

diff --git a/pydantic_ai_slim/pydantic_ai/__init__.py b/pydantic_ai_slim/pydantic_ai/__init__.py
@@ -10,7 +10,7 @@
     UsageLimitExceeded,
     UserError,
 )
-from .messages import AudioUrl, BinaryContent, DocumentUrl, ImageUrl
+from .messages import AudioUrl, BinaryContent, DocumentUrl, ImageUrl, VideoUrl
 from .tools import RunContext, Tool
 
 __all__ = (
@@ -33,6 +33,7 @@
     # messages
     'ImageUrl',
     'AudioUrl',
+    'VideoUrl',
     'DocumentUrl',
     'BinaryContent',
     # tools

diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py
@@ -15,6 +15,34 @@
 from ._utils import generate_tool_call_id as _generate_tool_call_id, now_utc as _now_utc
 from .exceptions import UnexpectedModelBehavior
 
+AudioMediaType: TypeAlias = Literal['audio/wav', 'audio/mpeg']
+ImageMediaType: TypeAlias = Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']
+DocumentMediaType: TypeAlias = Literal[
+    'application/pdf',
+    'text/plain',
+    'text/csv',
+    'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+    'text/html',
+    'text/markdown',
+    'application/vnd.ms-excel',
+]
+VideoMediaType: TypeAlias = Literal[
+    'video/x-matroska',
+    'video/quicktime',
+    'video/mp4',
+    'video/webm',
+    'video/x-flv',
+    'video/mpeg',
+    'video/x-ms-wmv',
+    'video/3gpp',
+]
+
+AudioFormat: TypeAlias = Literal['wav', 'mp3']
+ImageFormat: TypeAlias = Literal['jpeg', 'png', 'gif', 'webp']
+DocumentFormat: TypeAlias = Literal['csv', 'doc', 'docx', 'html', 'md', 'pdf', 'txt', 'xls', 'xlsx']
+VideoFormat: TypeAlias = Literal['mkv', 'mov', 'mp4', 'webm', 'flv', 'mpeg', 'mpg', 'wmv', 'three_gp']
+
 
 @dataclass
 class SystemPromptPart:
@@ -42,6 +70,47 @@ def otel_event(self) -> Event:
         return Event('gen_ai.system.message', body={'content': self.content, 'role': 'system'})
 
 
+@dataclass
+class VideoUrl:
+    """A URL to an video."""
+
+    url: str
+    """The URL of the video."""
+
+    kind: Literal['video-url'] = 'video-url'
+    """Type identifier, this is available on all parts as a discriminator."""
+
+    @property
+    def media_type(self) -> VideoMediaType:  # pragma: no cover
+        """Return the media type of the video, based on the url."""
+        if self.url.endswith('.mkv'):
+            return 'video/x-matroska'
+        elif self.url.endswith('.mov'):
+            return 'video/quicktime'
+        elif self.url.endswith('.mp4'):
+            return 'video/mp4'
+        elif self.url.endswith('.webm'):
+            return 'video/webm'
+        elif self.url.endswith('.flv'):
+            return 'video/x-flv'
+        elif self.url.endswith(('.mpeg', '.mpg')):
+            return 'video/mpeg'
+        elif self.url.endswith('.wmv'):
+            return 'video/x-ms-wmv'
+        elif self.url.endswith('.three_gp'):
+            return 'video/3gpp'
+        else:
+            raise ValueError(f'Unknown video file extension: {self.url}')
+
+    @property
+    def format(self) -> VideoFormat:
+        """The file format of the video.
+
+        The choice of supported formats were based on the Bedrock Converse API. Other APIs don't require to use a format.
+        """
+        return _video_format(self.media_type)
+
+
 @dataclass
 class AudioUrl:
     """A URL to an audio file."""
@@ -123,23 +192,6 @@ def format(self) -> DocumentFormat:
         return _document_format(self.media_type)
 
 
-AudioMediaType: TypeAlias = Literal['audio/wav', 'audio/mpeg']
-ImageMediaType: TypeAlias = Literal['image/jpeg', 'image/png', 'image/gif', 'image/webp']
-DocumentMediaType: TypeAlias = Literal[
-    'application/pdf',
-    'text/plain',
-    'text/csv',
-    'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
-    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
-    'text/html',
-    'text/markdown',
-    'application/vnd.ms-excel',
-]
-AudioFormat: TypeAlias = Literal['wav', 'mp3']
-ImageFormat: TypeAlias = Literal['jpeg', 'png', 'gif', 'webp']
-DocumentFormat: TypeAlias = Literal['csv', 'doc', 'docx', 'html', 'md', 'pdf', 'txt', 'xls', 'xlsx']
-
-
 @dataclass
 class BinaryContent:
     """Binary content, e.g. an audio or image file."""
@@ -163,6 +215,11 @@ def is_image(self) -> bool:
         """Return `True` if the media type is an image type."""
         return self.media_type.startswith('image/')
 
+    @property
+    def is_video(self) -> bool:
+        """Return `True` if the media type is a video type."""
+        return self.media_type.startswith('video/')
+
     @property
     def is_document(self) -> bool:
         """Return `True` if the media type is a document type."""
@@ -189,10 +246,12 @@ def format(self) -> str:
             return _image_format(self.media_type)
         elif self.is_document:
             return _document_format(self.media_type)
+        elif self.is_video:
+            return _video_format(self.media_type)
         raise ValueError(f'Unknown media type: {self.media_type}')
 
 
-UserContent: TypeAlias = 'str | ImageUrl | AudioUrl | DocumentUrl | BinaryContent'
+UserContent: TypeAlias = 'str | ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent'
 
 
 def _document_format(media_type: str) -> DocumentFormat:
@@ -229,6 +288,27 @@ def _image_format(media_type: str) -> ImageFormat:
         raise ValueError(f'Unknown image media type: {media_type}')
 
 
+def _video_format(media_type: str) -> VideoFormat:
+    if media_type == 'video/x-matroska':
+        return 'mkv'
+    elif media_type == 'video/quicktime':
+        return 'mov'
+    elif media_type == 'video/mp4':
+        return 'mp4'
+    elif media_type == 'video/webm':
+        return 'webm'
+    elif media_type == 'video/x-flv':
+        return 'flv'
+    elif media_type == 'video/mpeg':
+        return 'mpeg'
+    elif media_type == 'video/x-ms-wmv':
+        return 'wmv'
+    elif media_type == 'video/3gpp':
+        return 'three_gp'
+    else:  # pragma: no cover
+        raise ValueError(f'Unknown video media type: {media_type}')
+
+
 @dataclass
 class UserPromptPart:
     """A user prompt, generally written by the end user.

diff --git a/pydantic_ai_slim/pydantic_ai/models/bedrock.py b/pydantic_ai_slim/pydantic_ai/models/bedrock.py
@@ -29,6 +29,7 @@
     ToolCallPart,
     ToolReturnPart,
     UserPromptPart,
+    VideoUrl,
 )
 from pydantic_ai.models import Model, ModelRequestParameters, StreamedResponse, cached_async_http_client
 from pydantic_ai.providers import Provider, infer_provider
@@ -52,6 +53,7 @@
         SystemContentBlockTypeDef,
         ToolChoiceTypeDef,
         ToolTypeDef,
+        VideoBlockTypeDef,
     )
 
 
@@ -381,21 +383,33 @@ async def _map_user_prompt(part: UserPromptPart) -> list[MessageUnionTypeDef]:
                     elif item.is_image:
                         assert format in ('jpeg', 'png', 'gif', 'webp')
                         content.append({'image': {'format': format, 'source': {'bytes': item.data}}})
+                    elif item.is_video:
+                        assert format in ('mkv', 'mov', 'mp4', 'webm', 'flv', 'mpeg', 'mpg', 'wmv', 'three_gp')
+                        content.append({'video': {'format': format, 'source': {'bytes': item.data}}})
                     else:
                         raise NotImplementedError('Binary content is not supported yet.')
-                elif isinstance(item, (ImageUrl, DocumentUrl)):
+                elif isinstance(item, (ImageUrl, DocumentUrl, VideoUrl)):
                     response = await cached_async_http_client().get(item.url)
                     response.raise_for_status()
                     if item.kind == 'image-url':
                         format = item.media_type.split('/')[1]
                         assert format in ('jpeg', 'png', 'gif', 'webp'), f'Unsupported image format: {format}'
                         image: ImageBlockTypeDef = {'format': format, 'source': {'bytes': response.content}}
                         content.append({'image': image})
+
                     elif item.kind == 'document-url':
                         document_count += 1
                         name = f'Document {document_count}'
                         data = response.content
                         content.append({'document': {'name': name, 'format': item.format, 'source': {'bytes': data}}})
+
+                    elif item.kind == 'video-url':
+                        format = item.media_type.split('/')[1]
+                        assert format in ('mkv', 'mov', 'mp4', 'webm', 'flv', 'mpeg', 'mpg', 'wmv', 'three_gp'), (
+                            f'Unsupported video format: {format}'
+                        )
+                        video: VideoBlockTypeDef = {'format': format, 'source': {'bytes': response.content}}
+                        content.append({'video': video})
                 elif isinstance(item, AudioUrl):  # pragma: no cover
                     raise NotImplementedError('Audio is not supported yet.')
                 else:

diff --git a/pydantic_ai_slim/pydantic_ai/models/gemini.py b/pydantic_ai_slim/pydantic_ai/models/gemini.py
@@ -34,6 +34,7 @@
     ToolCallPart,
     ToolReturnPart,
     UserPromptPart,
+    VideoUrl,
 )
 from ..settings import ModelSettings
 from ..tools import ToolDefinition
@@ -335,6 +336,8 @@ async def _map_user_prompt(part: UserPromptPart) -> list[_GeminiPartUnion]:
                         inline_data={'data': base64.b64encode(response.content).decode('utf-8'), 'mime_type': mime_type}
                     )
                     content.append(inline_data)
+                elif isinstance(item, VideoUrl):  # pragma: no cover
+                    raise NotImplementedError('VideoUrl is not supported for Gemini.')
                 else:
                     assert_never(item)
         return content

diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py
@@ -29,6 +29,7 @@
     ToolCallPart,
     ToolReturnPart,
     UserPromptPart,
+    VideoUrl,
 )
 from ..providers import Provider, infer_provider
 from ..result import Usage
@@ -503,6 +504,8 @@ def _map_user_prompt(part: UserPromptPart) -> MistralUserMessage:
                         raise RuntimeError('Only image binary content is supported for Mistral.')
                 elif isinstance(item, DocumentUrl):
                     raise RuntimeError('DocumentUrl is not supported in Mistral.')
+                elif isinstance(item, VideoUrl):
+                    raise RuntimeError('VideoUrl is not supported in Mistral.')
                 else:  # pragma: no cover
                     raise RuntimeError(f'Unsupported content type: {type(item)}')
         return MistralUserMessage(content=content)

diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -30,6 +30,7 @@
     ToolCallPart,
     ToolReturnPart,
     UserPromptPart,
+    VideoUrl,
 )
 from ..settings import ModelSettings
 from ..tools import ToolDefinition
@@ -448,6 +449,8 @@ async def _map_user_prompt(part: UserPromptPart) -> chat.ChatCompletionUserMessa
                     # file_data = f'data:{media_type};base64,{base64_encoded}'
                     # file = File(file={'file_data': file_data, 'file_name': item.url, 'file_id': item.url}, type='file')
                     # content.append(file)
+                elif isinstance(item, VideoUrl):  # pragma: no cover
+                    raise NotImplementedError('VideoUrl is not supported for OpenAI')
                 else:
                     assert_never(item)
         return chat.ChatCompletionUserMessageParam(role='user', content=content)
@@ -765,6 +768,8 @@ async def _map_user_prompt(part: UserPromptPart) -> responses.EasyInputMessagePa
                             filename=f'filename.{item.format}',
                         )
                     )
+                elif isinstance(item, VideoUrl):  # pragma: no cover
+                    raise NotImplementedError('VideoUrl is not supported for OpenAI.')
                 else:
                     assert_never(item)
         return responses.EasyInputMessageParam(role='user', content=content)

diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml
@@ -62,7 +62,7 @@ vertexai = ["google-auth>=2.36.0", "requests>=2.32.3"]
 anthropic = ["anthropic>=0.49.0"]
 groq = ["groq>=0.15.0"]
 mistral = ["mistralai>=1.2.5"]
-bedrock = ["boto3>=1.34.116"]
+bedrock = ["boto3>=1.35.74"]
 # Tools
 duckduckgo = ["duckduckgo-search>=7.0.0"]
 tavily = ["tavily-python>=0.5.0"]

diff --git a/tests/assets/small_video.mp4 b/tests/assets/small_video.mp4
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -232,6 +232,12 @@ def image_content(assets_path: Path) -> BinaryContent:
     return BinaryContent(data=image_bytes, media_type='image/png')
 
 
+@pytest.fixture(scope='session')
+def video_content(assets_path: Path) -> BinaryContent:
+    video_bytes = assets_path.joinpath('small_video.mp4').read_bytes()
+    return BinaryContent(data=video_bytes, media_type='video/mp4')
+
+
 @pytest.fixture(scope='session')
 def document_content(assets_path: Path) -> BinaryContent:
     pdf_bytes = assets_path.joinpath('dummy.pdf').read_bytes()

diff --git a/tests/models/cassettes/test_bedrock/test_video_as_binary_content_input.yaml b/tests/models/cassettes/test_bedrock/test_video_as_binary_content_input.yaml