pydantic · Kludex · Nov 5, 2025 · Nov 4, 2025 · Nov 4, 2025 · Nov 5, 2025
diff --git a/deploy/test/index.spec.ts.snap b/deploy/test/index.spec.ts.snap
@@ -14,8 +14,8 @@ exports[`deploy > should call openai via gateway > llm 1`] = `
       },
     },
   ],
-  "created": 1761828474,
-  "id": "chatcmpl-CWMMElxV7Z5jV4zs2g2cRQjZTsY8M",
+  "created": 1762272055,
+  "id": "chatcmpl-CYDklwaN7x9okuWTnABMCrZykoiRj",
   "model": "gpt-5-2025-08-07",
   "object": "chat.completion",
   "service_tier": "default",
@@ -118,7 +118,7 @@ exports[`deploy > should call openai via gateway > span 1`] = `
   {
     "key": "gen_ai.response.id",
     "value": {
-      "stringValue": "chatcmpl-CWMMElxV7Z5jV4zs2g2cRQjZTsY8M",
+      "stringValue": "chatcmpl-CYDklwaN7x9okuWTnABMCrZykoiRj",
     },
   },
   {
@@ -293,7 +293,7 @@ exports[`deploy > should call openai via gateway > span 1`] = `
   {
     "key": "http.response.body.text",
     "value": {
-      "stringValue": "{"id":"chatcmpl-CWMMElxV7Z5jV4zs2g2cRQjZTsY8M","object":"chat.completion","created":1761828474,"model":"gpt-5-2025-08-07","choices":[{"index":0,"message":{"role":"assistant","content":"Paris.","refusal":null,"annotations":[]},"finish_reason":"stop"}],"usage":{"prompt_tokens":23,"completion_tokens":75,"total_tokens":98,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":64,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0},"pydantic_ai_gateway":{"cost_estimate":0.00077875}},"service_tier":"default","system_fingerprint":null}",
+      "stringValue": "{"id":"chatcmpl-CYDklwaN7x9okuWTnABMCrZykoiRj","object":"chat.completion","created":1762272055,"model":"gpt-5-2025-08-07","choices":[{"index":0,"message":{"role":"assistant","content":"Paris.","refusal":null,"annotations":[]},"finish_reason":"stop"}],"usage":{"prompt_tokens":23,"completion_tokens":75,"total_tokens":98,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":64,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0},"pydantic_ai_gateway":{"cost_estimate":0.00077875}},"service_tier":"default","system_fingerprint":null}",
     },
   },
   {

diff --git a/gateway/package.json b/gateway/package.json
@@ -13,6 +13,7 @@
     "@opentelemetry/resources": "^2.0.1",
     "@pydantic/genai-prices": "^0.0.35",
     "@pydantic/logfire-api": "^0.9.0",
+    "@streamparser/json-whatwg": "^0.0.22",
     "eventsource-parser": "^3.0.6",
     "mime-types": "^3.0.1",
     "ts-pattern": "^5.8.0"

diff --git a/gateway/src/api/google.ts b/gateway/src/api/google.ts
@@ -19,11 +19,11 @@ import type {
   TextPart,
 } from '../otel/genai'
 import { isMapping, type JsonData } from '../providers/default'
-import { BaseAPI } from './base'
+import { BaseAPI, type ExtractedRequest, type ExtractedResponse, type ExtractorConfig } from './base'
 
 export { GenerateContentResponse } from '@google/genai'
 
-export class GoogleAPI extends BaseAPI<GoogleRequest, GenerateContentResponse> {
+export class GoogleAPI extends BaseAPI<GoogleRequest, GenerateContentResponse, GenerateContentResponse> {
   requestStopSequences = (_request: GoogleRequest): string[] | undefined => {
     return _request.generationConfig?.stopSequences ?? undefined
   }
@@ -67,6 +67,28 @@ export class GoogleAPI extends BaseAPI<GoogleRequest, GenerateContentResponse> {
   systemInstructions = (_request: GoogleRequest): TextPart[] | undefined => {
     return systemInstructions(_request.systemInstruction)
   }
+
+  // SafeExtractor implementation
+
+  requestExtractors: ExtractorConfig<GoogleRequest, ExtractedRequest> = {
+    requestModel: (_request: GoogleRequest) => {
+      this.extractedRequest.requestModel = this.requestModel
+    },
+  }
+
+  chunkExtractors: ExtractorConfig<GenerateContentResponse, ExtractedResponse> = {
+    usage: (chunk: GenerateContentResponse) => {
+      if (chunk.usageMetadata) {
+        // TODO(Marcelo): This is likely to be wrong, since we are not summing the usage.
+        this.extractedResponse.usage = this.extractUsage(chunk)
-        // TODO(Marcelo): This is likely to be wrong, since we are not summing the usage.
-        this.extractedResponse.usage = this.extractUsage(chunk)
+        // Accumulate usage across streaming chunks.
+        const newUsage = this.extractUsage(chunk);
+        if (this.extractedResponse.usage) {
+          // Sum numeric fields in usage objects.
+          for (const key of Object.keys(newUsage)) {
+            if (
+              typeof newUsage[key] === 'number' &&
+              typeof this.extractedResponse.usage[key] === 'number'
+            ) {
+              this.extractedResponse.usage[key] += newUsage[key];
+            } else {
+              // For non-numeric fields, prefer the latest chunk's value.
+              this.extractedResponse.usage[key] = newUsage[key];
+            }
+          }
+        } else {
+          this.extractedResponse.usage = { ...newUsage };
+        }
-        // TODO(Marcelo): This is likely to be wrong, since we are not summing the usage.
-        this.extractedResponse.usage = this.extractUsage(chunk)
+        // Accumulate usage across streaming chunks.
+        const newUsage = this.extractUsage(chunk);
+        if (this.extractedResponse.usage) {
+          // Sum numeric fields in usage objects.
+          for (const key of Object.keys(newUsage)) {
+            if (
+              typeof newUsage[key] === 'number' &&
+              typeof this.extractedResponse.usage[key] === 'number'
+            ) {
+              this.extractedResponse.usage[key] += newUsage[key];
+            } else {
+              // For non-numeric fields, prefer the latest chunk's value.
+              this.extractedResponse.usage[key] = newUsage[key];
+            }
+          }
+        } else {
+          this.extractedResponse.usage = { ...newUsage };
+        }
+      }
+    },
+    responseModel: (chunk: GenerateContentResponse) => {
+      if (chunk.modelVersion) {
+        this.extractedResponse.responseModel = chunk.modelVersion
+      }
+    },
+  }
 }
 
 function mapContent(content: Content): ChatMessage {

diff --git a/gateway/src/index.ts b/gateway/src/index.ts
@@ -45,15 +45,15 @@ export async function gatewayFetch(
   ctx: ExecutionContext,
   options: GatewayOptions,
 ): Promise<Response> {
-  let { pathname: proxyPath } = url
+  let { pathname: proxyPath, search: queryString } = url
   if (options.proxyPrefixLength) {
     proxyPath = proxyPath.slice(options.proxyPrefixLength)
   }
   try {
     if (proxyPath === '/') {
       return index(request, options)
     } else {
-      return await gateway(request, proxyPath, ctx, options)
+      return await gateway(request, `${proxyPath}${queryString}`, ctx, options)
     }
   } catch (error) {
     if (error instanceof ResponseError) {

diff --git a/gateway/src/providers/default.ts b/gateway/src/providers/default.ts
@@ -321,9 +321,7 @@ export class DefaultProviderProxy {
       }
     }
 
-    const isStreaming =
-      responseHeaders.get('content-type')?.startsWith('text/event-stream') ||
-      ('stream' in requestBodyData && requestBodyData.stream === true)
+    const isStreaming = this.isStreaming(responseHeaders, requestBodyData)
     if (isStreaming) {
       return this.dispatchStreaming(prepResult, response, responseHeaders)
     }
@@ -465,6 +463,13 @@ export class DefaultProviderProxy {
     }
   }
 
+  protected isStreaming(responseHeaders: Headers, requestBodyData: JsonData): boolean {
+    return (
+      responseHeaders.get('content-type')?.toLowerCase().startsWith('text/event-stream') ||
+      ('stream' in requestBodyData && requestBodyData.stream === true)
+    )
+  }
+
   protected isWhitelistedEndpoint(): boolean {
     return false
   }

diff --git a/gateway/src/providers/google/auth.ts b/gateway/src/providers/google/auth.ts
@@ -1,6 +1,6 @@
 import { ResponseError } from '../../utils'
 
-export async function authToken(credentials: string, kv: KVNamespace): Promise<string> {
+export async function authToken(credentials: string, kv: KVNamespace, subFetch: typeof fetch): Promise<string> {
   const serviceAccountHash = await hash(credentials)
   const cacheKey = `gcp-auth:${serviceAccountHash}`
   const cachedToken = await kv.get(cacheKey, { cacheTtl: 300 })
@@ -9,7 +9,7 @@ export async function authToken(credentials: string, kv: KVNamespace): Promise<s
   }
   const serviceAccount = getServiceAccount(credentials)
   const jwt = await jwtSign(serviceAccount)
-  const token = await getAccessToken(jwt)
+  const token = await getAccessToken(jwt, subFetch)
   await kv.put(cacheKey, token, { expirationTtl: 3000 })
   return token
 }
@@ -80,10 +80,10 @@ async function jwtSign(serviceAccount: ServiceAccount): Promise<string> {
   return `${signingInput}.${b64UrlEncodeArray(signature)}`
 }
 
-async function getAccessToken(jwt: string): Promise<string> {
+async function getAccessToken(jwt: string, subFetch: typeof fetch): Promise<string> {
   const body = new URLSearchParams({ grant_type: 'urn:ietf:params:oauth:grant-type:jwt-bearer', assertion: jwt })
 
-  const response = await fetch(tokenUrl, {
+  const response = await subFetch(tokenUrl, {
     method: 'POST',
     headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
     signal: AbortSignal.timeout(10000),

diff --git a/gateway/src/providers/google/index.ts b/gateway/src/providers/google/index.ts
@@ -19,7 +19,7 @@ export class GoogleVertexProvider extends DefaultProviderProxy {
       if (!path) {
         return { error: 'Unable to parse path' }
       }
-      return `${this.providerProxy.baseUrl}${path}`
+      return `${stripTrailingSlash(this.providerProxy.baseUrl)}/${stripLeadingSlash(path)}`
     } else {
       return { error: 'baseUrl is required for the Google Provider' }
     }
@@ -72,7 +72,8 @@ export class GoogleVertexProvider extends DefaultProviderProxy {
       this.flavor = 'anthropic'
     }
 
-    return `/${version}/projects/${projectId}/locations/${region}/publishers/${publisher}/models/${modelAndApi}`
+    const path = `/${version}/projects/${projectId}/locations/${region}/publishers/${publisher}/models/${modelAndApi}`
+    return path
   }
 
   async prepRequest() {
@@ -92,7 +93,7 @@ export class GoogleVertexProvider extends DefaultProviderProxy {
   }
 
   async requestHeaders(headers: Headers): Promise<void> {
-    const token = await authToken(this.providerProxy.credentials, this.options.kv)
+    const token = await authToken(this.providerProxy.credentials, this.options.kv, this.options.subFetch)
     headers.set('Authorization', `Bearer ${token}`)
   }
 }
@@ -104,10 +105,9 @@ export class GoogleVertexProvider extends DefaultProviderProxy {
  * @param url - The URL to extract the region from e.g. https://europe-west4-aiplatform.googleapis.com or https://aiplatform.googleapis.com.
  */
 function regionFromUrl(url: string): null | string {
-  if (url.includes('https://aiplatform.googleapis.com')) {
-    return 'global'
-  }
-  // The group includes regions with hyphen like "europe-west4"
-  const match = url.match(/^https:\/\/(.+?)-aiplatform\.googleapis\.com$/)
-  return match?.[1] ?? null
+  const match = url.match(/^https:\/\/([^-]+)-aiplatform\.googleapis\.com$/)
-  const match = url.match(/^https:\/\/([^-]+)-aiplatform\.googleapis\.com$/)
+  const match = url.match(/^https:\/\/([a-z0-9-]+)-aiplatform\.googleapis\.com$/)
-  const match = url.match(/^https:\/\/([^-]+)-aiplatform\.googleapis\.com$/)
+  const match = url.match(/^https:\/\/([a-z0-9-]+)-aiplatform\.googleapis\.com$/)
+  return match?.[1] ?? 'global'
 }
+
+const stripTrailingSlash = (url: string): string => (url.endsWith('/') ? url.slice(0, -1) : url)
+const stripLeadingSlash = (url: string): string => (url.startsWith('/') ? url.slice(1) : url)
diff --git a/gateway/test/env.d.ts b/gateway/test/env.d.ts
@@ -6,6 +6,7 @@ interface Env {
   GROQ_API_KEY: string
   ANTHROPIC_API_KEY: string
   AWS_BEARER_TOKEN_BEDROCK: string
+  GOOGLE_SERVICE_ACCOUNT_KEY: string
 }
 
 declare module 'cloudflare:test' {

diff --git a/gateway/test/gateway.spec.ts.snap b/gateway/test/gateway.spec.ts.snap
@@ -14,8 +14,8 @@ exports[`custom proxyPrefixLength > inference > proxyPrefixLength 1`] = `
       },
     },
   ],
-  "created": 1761823178,
-  "id": "chatcmpl-CWKyoLFrrxfDdUZO6hAaDA7rYn3Fo",
+  "created": 1762271642,
+  "id": "chatcmpl-CYDe6BCWOKGGGTlQLofyQ2DP3QTRV",
   "model": "gpt-5-2025-08-07",
   "object": "chat.completion",
   "service_tier": "default",

diff --git a/gateway/test/providers/anthropic.spec.ts.snap b/gateway/test/providers/anthropic.spec.ts.snap
@@ -1130,7 +1130,7 @@ exports[`anthropic > should call anthropic via gateway with stream > span 1`] =
   {
     "key": "logfire.json_schema",
     "value": {
-      "stringValue": "{"type":"object","properties":{"gen_ai.system":{"type":"string"},"gen_ai.operation.name":{"type":"string"},"gen_ai.request.model":{"type":"string"},"gen_ai.request.max_tokens":{"type":"number"},"gen_ai.response.model":{"type":"string"},"gen_ai.response.id":{"type":"string"},"gen_ai.usage.input_tokens":{"type":"number"},"gen_ai.usage.cache_read_tokens":{"type":"number"},"gen_ai.usage.cache_write_tokens":{"type":"number"},"gen_ai.usage.output_tokens":{"type":"number"},"http.request.method":{"type":"string"},"url.full":{"type":"string"},"http.request.header.accept":{"type":"string"},"http.request.header.anthropic-version":{"type":"string"},"http.request.header.authorization":{"type":"string"},"http.request.header.content-type":{"type":"string"},"http.request.header.user-agent":{"type":"string"},"http.request.header.x-stainless-arch":{"type":"string"},"http.request.header.x-stainless-lang":{"type":"string"},"http.request.header.x-stainless-os":{"type":"string"},"http.request.header.x-stainless-package-version":{"type":"string"},"http.request.header.x-stainless-retry-count":{"type":"string"},"http.request.header.x-stainless-runtime":{"type":"string"},"http.request.header.x-stainless-runtime-version":{"type":"string"},"http.request.header.x-stainless-timeout":{"type":"string"},"http.response.status_code":{"type":"number"},"http.response.header.server":{"type":"string"},"http.response.header.transfer-encoding":{"type":"string"}}}",
+      "stringValue": "{"type":"object","properties":{"gen_ai.system":{"type":"string"},"gen_ai.operation.name":{"type":"string"},"gen_ai.request.model":{"type":"string"},"gen_ai.request.max_tokens":{"type":"number"},"gen_ai.response.model":{"type":"string"},"gen_ai.response.id":{"type":"string"},"gen_ai.usage.input_tokens":{"type":"number"},"gen_ai.usage.cache_read_tokens":{"type":"number"},"gen_ai.usage.cache_write_tokens":{"type":"number"},"gen_ai.usage.output_tokens":{"type":"number"},"http.request.method":{"type":"string"},"url.full":{"type":"string"},"http.request.header.accept":{"type":"string"},"http.request.header.anthropic-version":{"type":"string"},"http.request.header.authorization":{"type":"string"},"http.request.header.content-type":{"type":"string"},"http.request.header.user-agent":{"type":"string"},"http.request.header.x-stainless-arch":{"type":"string"},"http.request.header.x-stainless-lang":{"type":"string"},"http.request.header.x-stainless-os":{"type":"string"},"http.request.header.x-stainless-package-version":{"type":"string"},"http.request.header.x-stainless-retry-count":{"type":"string"},"http.request.header.x-stainless-runtime":{"type":"string"},"http.request.header.x-stainless-runtime-version":{"type":"string"},"http.request.header.x-stainless-timeout":{"type":"string"},"http.response.status_code":{"type":"number"},"http.response.header.content-type":{"type":"string"},"http.response.header.server":{"type":"string"},"http.response.header.transfer-encoding":{"type":"string"}}}",
     },
   },
   {
@@ -1295,6 +1295,12 @@ exports[`anthropic > should call anthropic via gateway with stream > span 1`] =
       "intValue": 200,
     },
   },
+  {
+    "key": "http.response.header.content-type",
+    "value": {
+      "stringValue": "text/event-stream; charset=utf-8",
+    },
+  },
   {
     "key": "http.response.header.server",
     "value": {

diff --git a/gateway/test/providers/google.spec.ts b/gateway/test/providers/google.spec.ts
@@ -1,25 +1,78 @@
-import { GoogleGenAI } from '@google/genai'
 import { describe, expect } from 'vitest'
 import { test } from '../setup'
 
+const body = JSON.stringify({
+  contents: [{ parts: [{ text: "Samuel lived in London and was born on Jan 28th '87" }], role: 'user' }],
+  systemInstruction: { parts: [{ text: 'Extract information about the person' }], role: 'user' },
+  tools: [
+    {
+      functionDeclarations: [
+        {
+          description: 'The final response which ends this conversation',
+          name: 'final_result',
+          parameters: {
+            properties: {
+              name: { description: 'The name of the person.', type: 'STRING' },
+              dob: {
+                description: 'The date of birth of the person. MUST BE A VALID ISO 8601 date. (format: date)',
+                type: 'STRING',
+              },
+              city: { description: 'The city where the person lives.', type: 'STRING' },
+            },
+            required: ['name', 'dob', 'city'],
+            type: 'OBJECT',
+          },
+        },
+      ],
+    },
+  ],
+  toolConfig: { functionCallingConfig: { mode: 'ANY', allowedFunctionNames: ['final_result'] } },
+  generationConfig: { temperature: 0.5, topP: 0.9, stopSequences: ['potato'] },
+})
+const headers = {
+  Authorization: 'healthy',
+  'x-goog-api-client': 'google-genai-sdk/1.36.0 gl-python/3.13.0',
+  'x-goog-api-key': 'unset',
+  accept: '*/*',
+  'accept-encoding': 'deflate',
+  'content-type': 'application/json',
+  'content-length': body.length.toString(),
+  'user-agent':
+    'pydantic-ai/1.0.19.dev5+b3b34f9, google-genai-sdk/1.36.0 gl-python/3.13.0 via Pydantic AI Gateway unknown, contact engineering@pydantic.dev',
+  traceparent: '00-019a4effa21047ac31372f093cb8e712-8b60768281864a49-01',
+}
+
 describe('google', () => {
   // TODO(Marcelo): When Google supports `fetch` parameter, we can fix this: https://github.com/googleapis/js-genai/issues/999
-  test.fails('google-vertex/default', async ({ gateway }) => {
-    const { otelBatch } = gateway
-
-    // The `authToken` is passed as `Authorization` header with the anthropic client.
-    const client = new GoogleGenAI({
-      apiKey: 'healthy',
-      httpOptions: { baseUrl: 'https://example.com/google-vertex' },
-    })
-
-    const response = await client.models.generateContent({
-      model: 'gemini-2.5-flash',
-      contents: 'What is the capital of france?',
-      config: { maxOutputTokens: 1024, topP: 0.95, topK: 1, temperature: 0.5, stopSequences: ['potato'] },
-    })
-
-    expect(response).toMatchSnapshot('llm')
+  test('google-vertex/default', async ({ gateway }) => {
+    const { fetch, otelBatch } = gateway
+
+    const response = await fetch(
+      'https://example.com/gemini/v1beta1/projects/pydantic-ai/locations/global/publishers/google/models/gemini-2.5-flash:generateContent?alt=sse',
+      { method: 'POST', headers, body },
+    )
+
+    const content = await response.text()
+
+    expect(content).toMatchSnapshot('llm')
+    expect(otelBatch, 'otelBatch length not 1').toHaveLength(1)
+    expect(JSON.parse(otelBatch[0]!).resourceSpans?.[0].scopeSpans?.[0].spans?.[0]?.attributes).toMatchSnapshot('span')
+  })
+
+  test('google-vertex/stream', async ({ gateway }) => {
+    const { fetch, otelBatch } = gateway
+
+    const response = await fetch(
+      'https://example.com/gemini/v1beta1/projects/pydantic-ai/locations/global/publishers/google/models/gemini-2.5-flash:streamGenerateContent?alt=sse',
+      { method: 'POST', headers: { ...headers, 'x-vcr-filename': 'stream' }, body },
+    )
+
+    const chunks: object[] = []
+    for await (const chunk of response.body!) {
+      chunks.push(chunk)
+    }
+
+    expect(chunks).toMatchSnapshot('chunks')
     expect(otelBatch, 'otelBatch length not 1').toHaveLength(1)
     expect(JSON.parse(otelBatch[0]!).resourceSpans?.[0].scopeSpans?.[0].spans?.[0]?.attributes).toMatchSnapshot('span')
   })