Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions pydantic_ai_slim/pydantic_ai/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,27 @@ def format(self) -> DocumentFormat:
except KeyError as e:
raise ValueError(f'Unknown document media type: {media_type}') from e

@staticmethod
def is_text_like_media_type(media_type: str) -> bool:
return (
media_type.startswith('text/')
or media_type == 'application/json'
or media_type.endswith('+json')
or media_type == 'application/xml'
or media_type.endswith('+xml')
or media_type in ('application/x-yaml', 'application/yaml')
)

@staticmethod
def inline_text_file_part(text: str, *, media_type: str, identifier: str):
text = '\n'.join(
[
f'-----BEGIN FILE id="{identifier}" type="{media_type}"-----',
text,
f'-----END FILE id="{identifier}"-----',
]
)
return {'text': text}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's just return the text


@dataclass(init=False, repr=False)
class BinaryContent:
Expand Down Expand Up @@ -513,8 +534,16 @@ def narrow_type(bc: BinaryContent) -> BinaryContent | BinaryImage:
identifier=bc.identifier,
vendor_metadata=bc.vendor_metadata,
)
else:
return bc
else:
return bc # pragma: no cover

@staticmethod
def is_text_like_media_type(mediatype: str) -> bool:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can these be instance methods on both classes that use a private function defined at the top level? The instance methods shouldn't need arguments as they can get everything from self.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DouweM I'm trying this above suggestion. Have a look at the modified section of messages.py
https://gist.github.com/Kamal-Moha/d35fda49a688efe0608dda571ebb65cd

Though, when I actually use this in google.py, I get the below error

[/content/pydantic-ai/pydantic_ai_slim/pydantic_ai/messages.py](https://localhost:8080/#) in is_text_like_media_type(self)
    480     def is_text_like_media_type(self) -> bool:
    481         print(f"-----Media Type------")
--> 482         print(self.media_type)
    483         return _is_text_like_media_type(self.media_type)
    484

As you can see, the issue is when I try accessing self.media_type in messages.py. Please suggest any modifications.

Though when I have the function like below

 def is_text_like_media_type(mediatype: str) -> bool:
      return DocumentUrl.is_text_like_media_type(mediatype)

It's able to get the mediatype without any issue.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Though, when I actually use this in google.py, I get the below error

@Kamal-Moha Sorry I don't understand, what error are you referring to?

return DocumentUrl.is_text_like_media_type(mediatype)

@staticmethod
def inline_text_file_part(text: str, *, media_type: str, identifier: str):
return DocumentUrl.inline_text_file_part(text, media_type=media_type, identifier=identifier)

@classmethod
def from_data_uri(cls, data_uri: str) -> BinaryContent:
Expand Down
43 changes: 37 additions & 6 deletions pydantic_ai_slim/pydantic_ai/models/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
BinaryContent,
BuiltinToolCallPart,
BuiltinToolReturnPart,
DocumentUrl,
FilePart,
FileUrl,
FinishReason,
Expand Down Expand Up @@ -565,17 +566,46 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
if isinstance(item, str):
content.append({'text': item})
elif isinstance(item, BinaryContent):
inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type}
part_dict: PartDict = {'inline_data': inline_data_dict}
if item.vendor_metadata:
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)
if BinaryContent.is_text_like_media_type(item.media_type):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also update the OpenAI implementation to use the new methods

content.append(
BinaryContent.inline_text_file_part(
item.data.decode('utf-8'),
media_type=item.media_type,
identifier=item.identifier,
)
)
else:
inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type}
part_dict: PartDict = {'inline_data': inline_data_dict}
if item.vendor_metadata:
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)

elif isinstance(item, DocumentUrl):
if DocumentUrl.is_text_like_media_type(item.media_type):
downloaded_text = await download_item(item, data_format='text')
content.append(
DocumentUrl.inline_text_file_part(
downloaded_text['data'],
media_type=item.media_type,
identifier=item.identifier,
)
)
else:
downloaded_item = await download_item(item, data_format='bytes')
inline_data_dict: BlobDict = {
'data': downloaded_item['data'],
'mime_type': downloaded_item['data_type'],
}
content.append({'inline_data': inline_data_dict})

elif isinstance(item, VideoUrl) and item.is_youtube:
file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type}
part_dict: PartDict = {'file_data': file_data_dict}
if item.vendor_metadata: # pragma: no branch
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)

elif isinstance(item, FileUrl):
if item.force_download or (
# google-gla does not support passing file urls directly, except for youtube videos
Expand All @@ -594,7 +624,8 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
content.append({'file_data': file_data_dict}) # pragma: lax no cover
else:
assert_never(item)
return content

return content

def _map_response_schema(self, o: OutputObjectDefinition) -> dict[str, Any]:
response_schema = o.json_schema.copy()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
interactions:
- request:
body: ''
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
connection:
- keep-alive
host:
- kamalscraping-collab.github.io
method: GET
uri: https://kamalscraping-collab.github.io/sample-data/sample_transcript.json
response:
body:
string: |-
{
"items": [
{
"id": "GR_ad8d2a461fc5",
"type": "message",
"role": "assistant",
"content": [
"Hello, how can I help you today?"
],
"interrupted": false
},
{
"id": "item_13ecd51e0dcc",
"type": "function_call",
"call_id": "function-call-18124021183837676163",
"arguments": "{\"location\": \"Kampala, Uganda\"}",
"name": "lookup_weather"
},
{
"id": "GI_14a70e7c2d20",
"type": "message",
"role": "user",
"content": [
"Haide, can you please tell me the weather in compiler Uganda"
],
"interrupted": false
},
{
"id": "item_000f739d4414",
"type": "function_call_output",
"name": "lookup_weather",
"call_id": "function-call-18124021183837676163",
"output": "{'weather': 'sunny', 'temperature_f': 70}",
"is_error": false
},
{
"id": "GR_95c91db6b975",
"type": "message",
"role": "assistant",
"content": [
"The weather in Kampala, Uganda is sunny with a temperature of 70 degrees Fahrenheit."
],
"interrupted": false
},
{
"id": "GI_c8cc9177073f",
"type": "message",
"role": "user",
"content": [
"what can you please tell me what are the best things to do in compiler you're"
],
"interrupted": false
},
{
"id": "GR_792c5f6fbc89",
"type": "message",
"role": "assistant",
"content": [
"While I can tell you the weather, I'm not able to provide information on the best things to do in a specific location. Is there anything else I can help you with?"
],
"interrupted": false
}
]
}
headers:
cache-control:
- max-age=604800
- public
connection:
- keep-alive
content-length:
- '2574'
content-type:
- text/plain; charset=UTF-8
etag:
- W/"61efea10-a0e"
expires:
- Fri, 26 Dec 2025 16:42:28 GMT
last-modified:
- Tue, 25 Jan 2022 12:16:16 GMT
strict-transport-security:
- max-age=15552000; includeSubDomains
transfer-encoding:
- chunked
vary:
- Accept-Encoding
status:
code: 200
message: OK
- request:
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '3701'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
parsed_body:
contents:
- parts:
- text: What is the main content on this document?
- inlineData:
data: VFhUIHRlc3QgZmlsZQpQdXJwb3NlOiBQcm92aWRlIGV4YW1wbGUgb2YgdGhpcyBmaWxlIHR5cGUKRG9jdW1lbnQgZmlsZSB0eXBlOiBUWFQKVmVyc2lvbjogMS4wClJlbWFyazoKCkV4YW1wbGUgY29udGVudDoKVGhlIG5hbWVzICJKb2huIERvZSIgZm9yIG1hbGVzLCAiSmFuZSBEb2UiIG9yICJKYW5lIFJvZSIgZm9yIGZlbWFsZXMsIG9yICJKb25uaWUgRG9lIiBhbmQgIkphbmllIERvZSIgZm9yIGNoaWxkcmVuLCBvciBqdXN0ICJEb2UiIG5vbi1nZW5kZXItc3BlY2lmaWNhbGx5IGFyZSB1c2VkIGFzIHBsYWNlaG9sZGVyIG5hbWVzIGZvciBhIHBhcnR5IHdob3NlIHRydWUgaWRlbnRpdHkgaXMgdW5rbm93biBvciBtdXN0IGJlIHdpdGhoZWxkIGluIGEgbGVnYWwgYWN0aW9uLCBjYXNlLCBvciBkaXNjdXNzaW9uLiBUaGUgbmFtZXMgYXJlIGFsc28gdXNlZCB0byByZWZlciB0byBhY29ycHNlIG9yIGhvc3BpdGFsIHBhdGllbnQgd2hvc2UgaWRlbnRpdHkgaXMgdW5rbm93bi4gVGhpcyBwcmFjdGljZSBpcyB3aWRlbHkgdXNlZCBpbiB0aGUgVW5pdGVkIFN0YXRlcyBhbmQgQ2FuYWRhLCBidXQgaXMgcmFyZWx5IHVzZWQgaW4gb3RoZXIgRW5nbGlzaC1zcGVha2luZyBjb3VudHJpZXMgaW5jbHVkaW5nIHRoZSBVbml0ZWQgS2luZ2RvbSBpdHNlbGYsIGZyb20gd2hlcmUgdGhlIHVzZSBvZiAiSm9obiBEb2UiIGluIGEgbGVnYWwgY29udGV4dCBvcmlnaW5hdGVzLiBUaGUgbmFtZXMgSm9lIEJsb2dncyBvciBKb2huIFNtaXRoIGFyZSB1c2VkIGluIHRoZSBVSyBpbnN0ZWFkLCBhcyB3ZWxsIGFzIGluIEF1c3RyYWxpYSBhbmQgTmV3IFplYWxhbmQuCgpKb2huIERvZSBpcyBzb21ldGltZXMgdXNlZCB0byByZWZlciB0byBhIHR5cGljYWwgbWFsZSBpbiBvdGhlciBjb250ZXh0cyBhcyB3ZWxsLCBpbiBhIHNpbWlsYXIgbWFubmVyIHRvIEpvaG4gUS4gUHVibGljLCBrbm93biBpbiBHcmVhdCBCcml0YWluIGFzIEpvZSBQdWJsaWMsIEpvaG4gU21pdGggb3IgSm9lIEJsb2dncy4gRm9yIGV4YW1wbGUsIHRoZSBmaXJzdCBuYW1lIGxpc3RlZCBvbiBhIGZvcm0gaXMgb2Z0ZW4gSm9obiBEb2UsIGFsb25nIHdpdGggYSBmaWN0aW9uYWwgYWRkcmVzcyBvciBvdGhlciBmaWN0aW9uYWwgaW5mb3JtYXRpb24gdG8gcHJvdmlkZSBhbiBleGFtcGxlIG9mIGhvdyB0byBmaWxsIGluIHRoZSBmb3JtLiBUaGUgbmFtZSBpcyBhbHNvIHVzZWQgZnJlcXVlbnRseSBpbiBwb3B1bGFyIGN1bHR1cmUsIGZvciBleGFtcGxlIGluIHRoZSBGcmFuayBDYXByYSBmaWxtIE1lZXQgSm9obiBEb2UuIEpvaG4gRG9lIHdhcyBhbHNvIHRoZSBuYW1lIG9mIGEgMjAwMiBBbWVyaWNhbiB0ZWxldmlzaW9uIHNlcmllcy4KClNpbWlsYXJseSwgYSBjaGlsZCBvciBiYWJ5IHdob3NlIGlkZW50aXR5IGlzIHVua25vd24gbWF5IGJlIHJlZmVycmVkIHRvIGFzIEJhYnkgRG9lLiBBIG5vdG9yaW91cyBtdXJkZXIgY2FzZSBpbiBLYW5zYXMgQ2l0eSwgTWlzc291cmksIHJlZmVycmVkIHRvIHRoZSBiYWJ5IHZpY3RpbSBhcyBQcmVjaW91cyBEb2UuIE90aGVyIHVuaWRlbnRpZmllZCBmZW1hbGUgbXVyZGVyIHZpY3RpbXMgYXJlIENhbGkgRG9lIGFuZCBQcmluY2VzcyBEb2UuIEFkZGl0aW9uYWwgcGVyc29ucyBtYXkgYmUgY2FsbGVkIEphbWVzIERvZSwgSnVkeSBEb2UsIGV0Yy4gSG93ZXZlciwgdG8gYXZvaWQgcG9zc2libGUgY29uZnVzaW9uLCBpZiB0d28gYW5vbnltb3VzIG9yIHVua25vd24gcGFydGllcyBhcmUgY2l0ZWQgaW4gYSBzcGVjaWZpYyBjYXNlIG9yIGFjdGlvbiwgdGhlIHN1cm5hbWVzIERvZSBhbmQgUm9lIG1heSBiZSB1c2VkIHNpbXVsdGFuZW91c2x5OyBmb3IgZXhhbXBsZSwgIkpvaG4gRG9lIHYuIEphbmUgUm9lIi4gSWYgc2V2ZXJhbCBhbm9ueW1vdXMgcGFydGllcyBhcmUgcmVmZXJlbmNlZCwgdGhleSBtYXkgc2ltcGx5IGJlIGxhYmVsbGVkIEpvaG4gRG9lICMxLCBKb2huIERvZSAjMiwgZXRjLiAodGhlIFUuUy4gT3BlcmF0aW9uIERlbGVnbyBjaXRlZCAyMSAobnVtYmVyZWQpICJKb2huIERvZSJzKSBvciBsYWJlbGxlZCB3aXRoIG90aGVyIHZhcmlhbnRzIG9mIERvZSAvIFJvZSAvIFBvZSAvIGV0Yy4gT3RoZXIgZWFybHkgYWx0ZXJuYXRpdmVzIHN1Y2ggYXMgSm9obiBTdGlsZXMgYW5kIFJpY2hhcmQgTWlsZXMgYXJlIG5vdyByYXJlbHkgdXNlZCwgYW5kIE1hcnkgTWFqb3IgaGFzIGJlZW4gdXNlZCBpbiBzb21lIEFtZXJpY2FuIGZlZGVyYWwgY2FzZXMuCgoKCkZpbGUgY3JlYXRlZCBieSBodHRwczovL3d3dy5vbmxpbmUtY29udmVydC5jb20KTW9yZSBleGFtcGxlIGZpbGVzOiBodHRwczovL3d3dy5vbmxpbmUtY29udmVydC5jb20vZmlsZS10eXBlClRleHQgb2YgRXhhbXBsZSBjb250ZW50OiBXaWtpcGVkaWEgKGh0dHBzOi8vZW4ud2lraXBlZGlhLm9yZy93aWtpL0pvaG5fRG9lKQpMaWNlbnNlOiBBdHRyaWJ1dGlvbi1TaGFyZUFsaWtlIDQuMCAoaHR0cHM6Ly9jcmVhdGl2ZWNvbW1vbnMub3JnL2xpY2Vuc2VzL2J5LXNhLzQuMC8pCgpGZWVsIGZyZWUgdG8gdXNlIGFuZCBzaGFyZSB0aGUgZmlsZSBhY2NvcmRpbmcgdG8gdGhlIGxpY2Vuc2UgYWJvdmUu
mimeType: application/json
role: user
generationConfig: {}
systemInstruction:
parts:
- text: You are a helpful chatbot.
role: user
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent
response:
headers:
alt-svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
content-length:
- '985'
content-type:
- application/json; charset=UTF-8
server-timing:
- gfet4t7; dur=888
transfer-encoding:
- chunked
vary:
- Origin
- X-Origin
- Referer
parsed_body:
candidates:
- avgLogprobs: -0.5004191543116714
content:
parts:
- text: |
Based on the JSON data provided, the document contains the log of a conversation between a user and an AI assistant.
role: model
finishReason: STOP
modelVersion: gemini-2.5-pro
responseId: 9YfNaLGGDuOmqtsPoLXu4AQ
usageMetadata:
candidatesTokenCount: 66
candidatesTokensDetails:
- modality: TEXT
tokenCount: 66
promptTokenCount: 614
promptTokensDetails:
- modality: TEXT
tokenCount: 614
totalTokenCount: 680
status:
code: 200
message: OK
version: 1
12 changes: 12 additions & 0 deletions tests/models/test_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,18 @@ async def test_google_model_text_document_url_input(allow_model_requests: None,
)


async def test_google_model_json_document_url_input(allow_model_requests: None, google_provider: GoogleProvider):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The VCR cassette will be generated automatically when you call uv run pytest <path> --record-mode=rewrite, is that what you did?

m = GoogleModel('gemini-2.5-pro', provider=google_provider)
agent = Agent(m, system_prompt='You are a helpful chatbot.')

json_document_url = DocumentUrl(url='https://kamalscraping-collab.github.io/sample-data/sample_transcript.json')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we please use a different public JSON file that's not dependent on your repo?


result = await agent.run(['What is the main content of this document?', json_document_url])
assert result.output == snapshot(
'Based on the JSON data provided, the document contains the log of a conversation between a user and an AI assistant.\n'
)


async def test_google_model_text_as_binary_content_input(allow_model_requests: None, google_provider: GoogleProvider):
m = GoogleModel('gemini-2.0-flash', provider=google_provider)
agent = Agent(m, system_prompt='You are a helpful chatbot.')
Expand Down
Loading