Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions deploy/test/index.spec.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ exports[`deploy > should call openai via gateway > llm 1`] = `
},
},
],
"created": 1761828474,
"id": "chatcmpl-CWMMElxV7Z5jV4zs2g2cRQjZTsY8M",
"created": 1762272055,
"id": "chatcmpl-CYDklwaN7x9okuWTnABMCrZykoiRj",
"model": "gpt-5-2025-08-07",
"object": "chat.completion",
"service_tier": "default",
Expand Down Expand Up @@ -118,7 +118,7 @@ exports[`deploy > should call openai via gateway > span 1`] = `
{
"key": "gen_ai.response.id",
"value": {
"stringValue": "chatcmpl-CWMMElxV7Z5jV4zs2g2cRQjZTsY8M",
"stringValue": "chatcmpl-CYDklwaN7x9okuWTnABMCrZykoiRj",
},
},
{
Expand Down Expand Up @@ -293,7 +293,7 @@ exports[`deploy > should call openai via gateway > span 1`] = `
{
"key": "http.response.body.text",
"value": {
"stringValue": "{"id":"chatcmpl-CWMMElxV7Z5jV4zs2g2cRQjZTsY8M","object":"chat.completion","created":1761828474,"model":"gpt-5-2025-08-07","choices":[{"index":0,"message":{"role":"assistant","content":"Paris.","refusal":null,"annotations":[]},"finish_reason":"stop"}],"usage":{"prompt_tokens":23,"completion_tokens":75,"total_tokens":98,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":64,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0},"pydantic_ai_gateway":{"cost_estimate":0.00077875}},"service_tier":"default","system_fingerprint":null}",
"stringValue": "{"id":"chatcmpl-CYDklwaN7x9okuWTnABMCrZykoiRj","object":"chat.completion","created":1762272055,"model":"gpt-5-2025-08-07","choices":[{"index":0,"message":{"role":"assistant","content":"Paris.","refusal":null,"annotations":[]},"finish_reason":"stop"}],"usage":{"prompt_tokens":23,"completion_tokens":75,"total_tokens":98,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":64,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0},"pydantic_ai_gateway":{"cost_estimate":0.00077875}},"service_tier":"default","system_fingerprint":null}",
},
},
{
Expand Down
1 change: 1 addition & 0 deletions gateway/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"@opentelemetry/resources": "^2.0.1",
"@pydantic/genai-prices": "^0.0.35",
"@pydantic/logfire-api": "^0.9.0",
"@streamparser/json-whatwg": "^0.0.22",
"eventsource-parser": "^3.0.6",
"mime-types": "^3.0.1",
"ts-pattern": "^5.8.0"
Expand Down
26 changes: 24 additions & 2 deletions gateway/src/api/google.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ import type {
TextPart,
} from '../otel/genai'
import { isMapping, type JsonData } from '../providers/default'
import { BaseAPI } from './base'
import { BaseAPI, type ExtractedRequest, type ExtractedResponse, type ExtractorConfig } from './base'

export { GenerateContentResponse } from '@google/genai'

export class GoogleAPI extends BaseAPI<GoogleRequest, GenerateContentResponse> {
export class GoogleAPI extends BaseAPI<GoogleRequest, GenerateContentResponse, GenerateContentResponse> {
requestStopSequences = (_request: GoogleRequest): string[] | undefined => {
return _request.generationConfig?.stopSequences ?? undefined
}
Expand Down Expand Up @@ -67,6 +67,28 @@ export class GoogleAPI extends BaseAPI<GoogleRequest, GenerateContentResponse> {
systemInstructions = (_request: GoogleRequest): TextPart[] | undefined => {
return systemInstructions(_request.systemInstruction)
}

// SafeExtractor implementation

requestExtractors: ExtractorConfig<GoogleRequest, ExtractedRequest> = {
requestModel: (_request: GoogleRequest) => {
this.extractedRequest.requestModel = this.requestModel
},
}

chunkExtractors: ExtractorConfig<GenerateContentResponse, ExtractedResponse> = {
usage: (chunk: GenerateContentResponse) => {
if (chunk.usageMetadata) {
// TODO(Marcelo): This is likely to be wrong, since we are not summing the usage.
this.extractedResponse.usage = this.extractUsage(chunk)
Comment on lines +82 to +83
Copy link

Copilot AI Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TODO comment correctly identifies a bug: for streaming responses, usage from multiple chunks should be accumulated rather than overwritten. Each chunk may contain partial usage data that needs to be summed. Consider implementing accumulation logic similar to how token counts are typically aggregated across streaming chunks.

Suggested change
// TODO(Marcelo): This is likely to be wrong, since we are not summing the usage.
this.extractedResponse.usage = this.extractUsage(chunk)
// Accumulate usage across streaming chunks.
const newUsage = this.extractUsage(chunk);
if (this.extractedResponse.usage) {
// Sum numeric fields in usage objects.
for (const key of Object.keys(newUsage)) {
if (
typeof newUsage[key] === 'number' &&
typeof this.extractedResponse.usage[key] === 'number'
) {
this.extractedResponse.usage[key] += newUsage[key];
} else {
// For non-numeric fields, prefer the latest chunk's value.
this.extractedResponse.usage[key] = newUsage[key];
}
}
} else {
this.extractedResponse.usage = { ...newUsage };
}

Copilot uses AI. Check for mistakes.
}
},
responseModel: (chunk: GenerateContentResponse) => {
if (chunk.modelVersion) {
this.extractedResponse.responseModel = chunk.modelVersion
}
},
}
}

function mapContent(content: Content): ChatMessage {
Expand Down
4 changes: 2 additions & 2 deletions gateway/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ export async function gatewayFetch(
ctx: ExecutionContext,
options: GatewayOptions,
): Promise<Response> {
let { pathname: proxyPath } = url
let { pathname: proxyPath, search: queryString } = url
if (options.proxyPrefixLength) {
proxyPath = proxyPath.slice(options.proxyPrefixLength)
}
try {
if (proxyPath === '/') {
return index(request, options)
} else {
return await gateway(request, proxyPath, ctx, options)
return await gateway(request, `${proxyPath}${queryString}`, ctx, options)
}
} catch (error) {
if (error instanceof ResponseError) {
Expand Down
11 changes: 8 additions & 3 deletions gateway/src/providers/default.ts
Original file line number Diff line number Diff line change
Expand Up @@ -321,9 +321,7 @@ export class DefaultProviderProxy {
}
}

const isStreaming =
responseHeaders.get('content-type')?.startsWith('text/event-stream') ||
('stream' in requestBodyData && requestBodyData.stream === true)
const isStreaming = this.isStreaming(responseHeaders, requestBodyData)
if (isStreaming) {
return this.dispatchStreaming(prepResult, response, responseHeaders)
}
Expand Down Expand Up @@ -465,6 +463,13 @@ export class DefaultProviderProxy {
}
}

protected isStreaming(responseHeaders: Headers, requestBodyData: JsonData): boolean {
return (
responseHeaders.get('content-type')?.toLowerCase().startsWith('text/event-stream') ||
('stream' in requestBodyData && requestBodyData.stream === true)
)
}

protected isWhitelistedEndpoint(): boolean {
return false
}
Expand Down
8 changes: 4 additions & 4 deletions gateway/src/providers/google/auth.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { ResponseError } from '../../utils'

export async function authToken(credentials: string, kv: KVNamespace): Promise<string> {
export async function authToken(credentials: string, kv: KVNamespace, subFetch: typeof fetch): Promise<string> {
const serviceAccountHash = await hash(credentials)
const cacheKey = `gcp-auth:${serviceAccountHash}`
const cachedToken = await kv.get(cacheKey, { cacheTtl: 300 })
Expand All @@ -9,7 +9,7 @@ export async function authToken(credentials: string, kv: KVNamespace): Promise<s
}
const serviceAccount = getServiceAccount(credentials)
const jwt = await jwtSign(serviceAccount)
const token = await getAccessToken(jwt)
const token = await getAccessToken(jwt, subFetch)
await kv.put(cacheKey, token, { expirationTtl: 3000 })
return token
}
Expand Down Expand Up @@ -80,10 +80,10 @@ async function jwtSign(serviceAccount: ServiceAccount): Promise<string> {
return `${signingInput}.${b64UrlEncodeArray(signature)}`
}

async function getAccessToken(jwt: string): Promise<string> {
async function getAccessToken(jwt: string, subFetch: typeof fetch): Promise<string> {
const body = new URLSearchParams({ grant_type: 'urn:ietf:params:oauth:grant-type:jwt-bearer', assertion: jwt })

const response = await fetch(tokenUrl, {
const response = await subFetch(tokenUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
signal: AbortSignal.timeout(10000),
Expand Down
18 changes: 9 additions & 9 deletions gateway/src/providers/google/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export class GoogleVertexProvider extends DefaultProviderProxy {
if (!path) {
return { error: 'Unable to parse path' }
}
return `${this.providerProxy.baseUrl}${path}`
return `${stripTrailingSlash(this.providerProxy.baseUrl)}/${stripLeadingSlash(path)}`
} else {
return { error: 'baseUrl is required for the Google Provider' }
}
Expand Down Expand Up @@ -72,7 +72,8 @@ export class GoogleVertexProvider extends DefaultProviderProxy {
this.flavor = 'anthropic'
}

return `/${version}/projects/${projectId}/locations/${region}/publishers/${publisher}/models/${modelAndApi}`
const path = `/${version}/projects/${projectId}/locations/${region}/publishers/${publisher}/models/${modelAndApi}`
return path
}

async prepRequest() {
Expand All @@ -92,7 +93,7 @@ export class GoogleVertexProvider extends DefaultProviderProxy {
}

async requestHeaders(headers: Headers): Promise<void> {
const token = await authToken(this.providerProxy.credentials, this.options.kv)
const token = await authToken(this.providerProxy.credentials, this.options.kv, this.options.subFetch)
headers.set('Authorization', `Bearer ${token}`)
}
}
Expand All @@ -104,10 +105,9 @@ export class GoogleVertexProvider extends DefaultProviderProxy {
* @param url - The URL to extract the region from e.g. https://europe-west4-aiplatform.googleapis.com or https://aiplatform.googleapis.com.
*/
function regionFromUrl(url: string): null | string {
if (url.includes('https://aiplatform.googleapis.com')) {
return 'global'
}
// The group includes regions with hyphen like "europe-west4"
const match = url.match(/^https:\/\/(.+?)-aiplatform\.googleapis\.com$/)
return match?.[1] ?? null
const match = url.match(/^https:\/\/([^-]+)-aiplatform\.googleapis\.com$/)
Copy link

Copilot AI Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex pattern ([^-]+) will not match regions with hyphens like 'europe-west4' or 'us-east1'. The pattern stops at the first hyphen, so 'https://europe-west4-aiplatform.googleapis.com' would only capture 'europe' instead of 'europe-west4'. The regex should be changed to ([^.]+) or a more specific pattern that matches valid GCP regions.

Suggested change
const match = url.match(/^https:\/\/([^-]+)-aiplatform\.googleapis\.com$/)
const match = url.match(/^https:\/\/([a-z0-9-]+)-aiplatform\.googleapis\.com$/)

Copilot uses AI. Check for mistakes.
return match?.[1] ?? 'global'
}

const stripTrailingSlash = (url: string): string => (url.endsWith('/') ? url.slice(0, -1) : url)
const stripLeadingSlash = (url: string): string => (url.startsWith('/') ? url.slice(1) : url)
1 change: 1 addition & 0 deletions gateway/test/env.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ interface Env {
GROQ_API_KEY: string
ANTHROPIC_API_KEY: string
AWS_BEARER_TOKEN_BEDROCK: string
GOOGLE_SERVICE_ACCOUNT_KEY: string
}

declare module 'cloudflare:test' {
Expand Down
4 changes: 2 additions & 2 deletions gateway/test/gateway.spec.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ exports[`custom proxyPrefixLength > inference > proxyPrefixLength 1`] = `
},
},
],
"created": 1761823178,
"id": "chatcmpl-CWKyoLFrrxfDdUZO6hAaDA7rYn3Fo",
"created": 1762271642,
"id": "chatcmpl-CYDe6BCWOKGGGTlQLofyQ2DP3QTRV",
"model": "gpt-5-2025-08-07",
"object": "chat.completion",
"service_tier": "default",
Expand Down
8 changes: 7 additions & 1 deletion gateway/test/providers/anthropic.spec.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -1130,7 +1130,7 @@ exports[`anthropic > should call anthropic via gateway with stream > span 1`] =
{
"key": "logfire.json_schema",
"value": {
"stringValue": "{"type":"object","properties":{"gen_ai.system":{"type":"string"},"gen_ai.operation.name":{"type":"string"},"gen_ai.request.model":{"type":"string"},"gen_ai.request.max_tokens":{"type":"number"},"gen_ai.response.model":{"type":"string"},"gen_ai.response.id":{"type":"string"},"gen_ai.usage.input_tokens":{"type":"number"},"gen_ai.usage.cache_read_tokens":{"type":"number"},"gen_ai.usage.cache_write_tokens":{"type":"number"},"gen_ai.usage.output_tokens":{"type":"number"},"http.request.method":{"type":"string"},"url.full":{"type":"string"},"http.request.header.accept":{"type":"string"},"http.request.header.anthropic-version":{"type":"string"},"http.request.header.authorization":{"type":"string"},"http.request.header.content-type":{"type":"string"},"http.request.header.user-agent":{"type":"string"},"http.request.header.x-stainless-arch":{"type":"string"},"http.request.header.x-stainless-lang":{"type":"string"},"http.request.header.x-stainless-os":{"type":"string"},"http.request.header.x-stainless-package-version":{"type":"string"},"http.request.header.x-stainless-retry-count":{"type":"string"},"http.request.header.x-stainless-runtime":{"type":"string"},"http.request.header.x-stainless-runtime-version":{"type":"string"},"http.request.header.x-stainless-timeout":{"type":"string"},"http.response.status_code":{"type":"number"},"http.response.header.server":{"type":"string"},"http.response.header.transfer-encoding":{"type":"string"}}}",
"stringValue": "{"type":"object","properties":{"gen_ai.system":{"type":"string"},"gen_ai.operation.name":{"type":"string"},"gen_ai.request.model":{"type":"string"},"gen_ai.request.max_tokens":{"type":"number"},"gen_ai.response.model":{"type":"string"},"gen_ai.response.id":{"type":"string"},"gen_ai.usage.input_tokens":{"type":"number"},"gen_ai.usage.cache_read_tokens":{"type":"number"},"gen_ai.usage.cache_write_tokens":{"type":"number"},"gen_ai.usage.output_tokens":{"type":"number"},"http.request.method":{"type":"string"},"url.full":{"type":"string"},"http.request.header.accept":{"type":"string"},"http.request.header.anthropic-version":{"type":"string"},"http.request.header.authorization":{"type":"string"},"http.request.header.content-type":{"type":"string"},"http.request.header.user-agent":{"type":"string"},"http.request.header.x-stainless-arch":{"type":"string"},"http.request.header.x-stainless-lang":{"type":"string"},"http.request.header.x-stainless-os":{"type":"string"},"http.request.header.x-stainless-package-version":{"type":"string"},"http.request.header.x-stainless-retry-count":{"type":"string"},"http.request.header.x-stainless-runtime":{"type":"string"},"http.request.header.x-stainless-runtime-version":{"type":"string"},"http.request.header.x-stainless-timeout":{"type":"string"},"http.response.status_code":{"type":"number"},"http.response.header.content-type":{"type":"string"},"http.response.header.server":{"type":"string"},"http.response.header.transfer-encoding":{"type":"string"}}}",
},
},
{
Expand Down Expand Up @@ -1295,6 +1295,12 @@ exports[`anthropic > should call anthropic via gateway with stream > span 1`] =
"intValue": 200,
},
},
{
"key": "http.response.header.content-type",
"value": {
"stringValue": "text/event-stream; charset=utf-8",
},
},
{
"key": "http.response.header.server",
"value": {
Expand Down
87 changes: 70 additions & 17 deletions gateway/test/providers/google.spec.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,78 @@
import { GoogleGenAI } from '@google/genai'
import { describe, expect } from 'vitest'
import { test } from '../setup'

const body = JSON.stringify({
contents: [{ parts: [{ text: "Samuel lived in London and was born on Jan 28th '87" }], role: 'user' }],
systemInstruction: { parts: [{ text: 'Extract information about the person' }], role: 'user' },
tools: [
{
functionDeclarations: [
{
description: 'The final response which ends this conversation',
name: 'final_result',
parameters: {
properties: {
name: { description: 'The name of the person.', type: 'STRING' },
dob: {
description: 'The date of birth of the person. MUST BE A VALID ISO 8601 date. (format: date)',
type: 'STRING',
},
city: { description: 'The city where the person lives.', type: 'STRING' },
},
required: ['name', 'dob', 'city'],
type: 'OBJECT',
},
},
],
},
],
toolConfig: { functionCallingConfig: { mode: 'ANY', allowedFunctionNames: ['final_result'] } },
generationConfig: { temperature: 0.5, topP: 0.9, stopSequences: ['potato'] },
})
const headers = {
Authorization: 'healthy',
'x-goog-api-client': 'google-genai-sdk/1.36.0 gl-python/3.13.0',
'x-goog-api-key': 'unset',
accept: '*/*',
'accept-encoding': 'deflate',
'content-type': 'application/json',
'content-length': body.length.toString(),
'user-agent':
'pydantic-ai/1.0.19.dev5+b3b34f9, google-genai-sdk/1.36.0 gl-python/3.13.0 via Pydantic AI Gateway unknown, contact engineering@pydantic.dev',
traceparent: '00-019a4effa21047ac31372f093cb8e712-8b60768281864a49-01',
}

describe('google', () => {
// TODO(Marcelo): When Google supports `fetch` parameter, we can fix this: https://github.com/googleapis/js-genai/issues/999
test.fails('google-vertex/default', async ({ gateway }) => {
const { otelBatch } = gateway

// The `authToken` is passed as `Authorization` header with the anthropic client.
const client = new GoogleGenAI({
apiKey: 'healthy',
httpOptions: { baseUrl: 'https://example.com/google-vertex' },
})

const response = await client.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'What is the capital of france?',
config: { maxOutputTokens: 1024, topP: 0.95, topK: 1, temperature: 0.5, stopSequences: ['potato'] },
})

expect(response).toMatchSnapshot('llm')
test('google-vertex/default', async ({ gateway }) => {
const { fetch, otelBatch } = gateway

const response = await fetch(
'https://example.com/gemini/v1beta1/projects/pydantic-ai/locations/global/publishers/google/models/gemini-2.5-flash:generateContent?alt=sse',
{ method: 'POST', headers, body },
)

const content = await response.text()

expect(content).toMatchSnapshot('llm')
expect(otelBatch, 'otelBatch length not 1').toHaveLength(1)
expect(JSON.parse(otelBatch[0]!).resourceSpans?.[0].scopeSpans?.[0].spans?.[0]?.attributes).toMatchSnapshot('span')
})

test('google-vertex/stream', async ({ gateway }) => {
const { fetch, otelBatch } = gateway

const response = await fetch(
'https://example.com/gemini/v1beta1/projects/pydantic-ai/locations/global/publishers/google/models/gemini-2.5-flash:streamGenerateContent?alt=sse',
{ method: 'POST', headers: { ...headers, 'x-vcr-filename': 'stream' }, body },
)

const chunks: object[] = []
for await (const chunk of response.body!) {
chunks.push(chunk)
}

expect(chunks).toMatchSnapshot('chunks')
expect(otelBatch, 'otelBatch length not 1').toHaveLength(1)
expect(JSON.parse(otelBatch[0]!).resourceSpans?.[0].scopeSpans?.[0].spans?.[0]?.attributes).toMatchSnapshot('span')
})
Expand Down
Loading
Loading