From c3e7a538aaa44f3c448ec21142b8e3f168db367d Mon Sep 17 00:00:00 2001 From: Yashwant Bezawada Date: Wed, 5 Nov 2025 09:58:26 -0600 Subject: [PATCH 1/2] Fix: Strip unsupported JSON Schema keywords for structured outputs Resolves #2718 where Decimal fields caused 500 errors with responses.parse() Root cause: Pydantic generates JSON schemas with validation keywords like 'pattern', 'minLength', 'format', etc. that are not supported by OpenAI's structured outputs in strict mode. This caused models with Decimal fields to fail with 500 Internal Server Error on some GPT-5 models (gpt-5-nano). Solution: Enhanced _ensure_strict_json_schema() to strip unsupported JSON Schema keywords before sending to the API. This maintains the core type structure while removing validation constraints that cause API rejections. Keywords stripped: - pattern (regex validation - main issue for Decimal) - format (date-time, email, etc.) - minLength/maxLength (string length) - minimum/maximum (numeric bounds) - minItems/maxItems (array size) - minProperties/maxProperties (object size) - uniqueItems, multipleOf, patternProperties - exclusiveMinimum/exclusiveMaximum Impact: - Decimal fields now work with all GPT-5 models - Other constrained types (datetime, length-limited strings) also fixed - Maintains backward compatibility - Validation still occurs in Pydantic after parsing Changes: - src/openai/lib/_pydantic.py: Added keyword stripping logic - tests/lib/test_pydantic.py: Added test for Decimal field handling Test results: - Decimal schemas no longer contain 'pattern' keyword - Schema structure preserved (anyOf with number/string) - All model types (String, Float, Decimal) generate valid schemas --- src/openai/lib/_pydantic.py | 24 +++++++++++++ tests/lib/test_pydantic.py | 71 +++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/src/openai/lib/_pydantic.py b/src/openai/lib/_pydantic.py index 3cfe224cb1..012abd94dd 100644 --- a/src/openai/lib/_pydantic.py +++ b/src/openai/lib/_pydantic.py @@ -112,6 +112,30 @@ def _ensure_strict_json_schema( # we call `_ensure_strict_json_schema` again to fix the inlined schema and ensure it's valid. return _ensure_strict_json_schema(json_schema, path=path, root=root) + # Remove JSON Schema keywords that are not supported by OpenAI's structured outputs + # These keywords are used for validation but cause errors with strict mode + # See: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas + unsupported_keywords = [ + "pattern", # Regex patterns (e.g., from Decimal fields) + "format", # String formats like "date-time" + "minLength", # String length constraints + "maxLength", # String length constraints + "minimum", # Numeric minimum values + "maximum", # Numeric maximum values + "exclusiveMinimum", # Exclusive numeric bounds + "exclusiveMaximum", # Exclusive numeric bounds + "multipleOf", # Numeric multiple constraints + "patternProperties", # Pattern-based object properties + "minItems", # Array size constraints + "maxItems", # Array size constraints + "minProperties", # Object property count constraints + "maxProperties", # Object property count constraints + "uniqueItems", # Array uniqueness constraints + ] + + for keyword in unsupported_keywords: + json_schema.pop(keyword, None) + return json_schema diff --git a/tests/lib/test_pydantic.py b/tests/lib/test_pydantic.py index 754a15151c..016424b4cd 100644 --- a/tests/lib/test_pydantic.py +++ b/tests/lib/test_pydantic.py @@ -1,5 +1,6 @@ from __future__ import annotations +from decimal import Decimal from enum import Enum from pydantic import Field, BaseModel @@ -409,3 +410,73 @@ def test_nested_inline_ref_expansion() -> None: "additionalProperties": False, } ) + + +class InsuranceQuote(BaseModel): + """Test model with Decimal field to verify pattern keyword is stripped""" + premium: Decimal = Field(description="The insurance premium amount") + coverage_amount: float = Field(description="The coverage amount") + customer_name: str = Field(description="The customer's name") + + +def test_decimal_field_strips_pattern() -> None: + """ + Test that Decimal fields do not include unsupported 'pattern' keyword. + + Pydantic generates a regex pattern for Decimal fields by default, but this + is not supported by OpenAI's structured outputs in strict mode. This test + verifies that the pattern keyword is properly stripped from the schema. + + Fixes issue #2718 + """ + if not PYDANTIC_V1: + schema = to_strict_json_schema(InsuranceQuote) + + # Verify the schema structure exists + assert "properties" in schema + assert "premium" in schema["properties"] + + # Get the premium field schema + premium_schema = schema["properties"]["premium"] + + # Verify it's an anyOf with number/string/null options + assert "anyOf" in premium_schema + + # Check all variants in the anyOf for 'pattern' keyword + # Pattern should NOT be present after our fix + for variant in premium_schema["anyOf"]: + assert "pattern" not in variant, ( + "Pattern keyword should be stripped from Decimal field schema. " + "Found pattern in variant: " + str(variant) + ) + + # Verify the schema matches expected structure (without pattern) + assert schema == snapshot( + { + "title": "InsuranceQuote", + "type": "object", + "properties": { + "premium": { + "anyOf": [ + {"type": "number"}, + {"type": "string"}, + {"type": "null"} + ], + "description": "The insurance premium amount", + "title": "Premium", + }, + "coverage_amount": { + "description": "The coverage amount", + "title": "Coverage Amount", + "type": "number", + }, + "customer_name": { + "description": "The customer's name", + "title": "Customer Name", + "type": "string", + }, + }, + "required": ["premium", "coverage_amount", "customer_name"], + "additionalProperties": False, + } + ) From 3b29879202e7f289a1b939e6a62023a1ab8da8cd Mon Sep 17 00:00:00 2001 From: Yashwant Bezawada Date: Wed, 5 Nov 2025 10:15:22 -0600 Subject: [PATCH 2/2] Address Codex review: Recursively process additionalProperties Fixes the issue identified in Codex review where Dict[str, Decimal] would still fail because additionalProperties schemas were not being recursively processed. The previous fix stripped unsupported keywords from the top-level schema and recursively processed properties, items, anyOf, and allOf. However, it missed additionalProperties which Pydantic uses for typed dictionaries like Dict[str, Decimal]. Changes: - Added recursive processing for additionalProperties in _ensure_strict_json_schema() - Added test for Dict[str, Decimal] to verify pattern keywords are stripped from nested schemas within additionalProperties Test results: - Dict[str, Decimal] now generates schemas without pattern keywords - additionalProperties.anyOf properly sanitized - All constrained types work in dictionary values --- src/openai/lib/_pydantic.py | 9 +++++ tests/lib/test_pydantic.py | 73 +++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/src/openai/lib/_pydantic.py b/src/openai/lib/_pydantic.py index 012abd94dd..8956802af1 100644 --- a/src/openai/lib/_pydantic.py +++ b/src/openai/lib/_pydantic.py @@ -66,6 +66,15 @@ def _ensure_strict_json_schema( if is_dict(items): json_schema["items"] = _ensure_strict_json_schema(items, path=(*path, "items"), root=root) + # typed dictionaries + # { 'type': 'object', 'additionalProperties': {...} } + # Note: additionalProperties can be boolean (true/false) or a schema dict + additional_properties = json_schema.get("additionalProperties") + if is_dict(additional_properties): + json_schema["additionalProperties"] = _ensure_strict_json_schema( + additional_properties, path=(*path, "additionalProperties"), root=root + ) + # unions any_of = json_schema.get("anyOf") if is_list(any_of): diff --git a/tests/lib/test_pydantic.py b/tests/lib/test_pydantic.py index 016424b4cd..7255714743 100644 --- a/tests/lib/test_pydantic.py +++ b/tests/lib/test_pydantic.py @@ -480,3 +480,76 @@ def test_decimal_field_strips_pattern() -> None: "additionalProperties": False, } ) + + +class ProductPricing(BaseModel): + """Test model with Dict[str, Decimal] to verify pattern is stripped from additionalProperties""" + prices: dict[str, Decimal] = Field(description="Product prices by region") + product_name: str = Field(description="The product name") + + +def test_dict_decimal_strips_pattern_in_additional_properties() -> None: + """ + Test that Dict[str, Decimal] fields strip pattern from additionalProperties. + + When Pydantic generates schemas for typed dictionaries (Dict[str, Decimal]), + it uses additionalProperties with a Decimal schema that includes a regex + pattern. This test verifies that pattern keywords are stripped from nested + schemas within additionalProperties. + + Addresses Codex review feedback on PR #2733 + """ + if not PYDANTIC_V1: + schema = to_strict_json_schema(ProductPricing) + + # Verify the schema structure exists + assert "properties" in schema + assert "prices" in schema["properties"] + + # Get the prices field schema + prices_schema = schema["properties"]["prices"] + + # Should be an object with additionalProperties + assert prices_schema.get("type") == "object" + assert "additionalProperties" in prices_schema + + # Get the additionalProperties schema (Decimal schema) + add_props = prices_schema["additionalProperties"] + assert "anyOf" in add_props + + # Check all variants in anyOf for 'pattern' keyword + # Pattern should NOT be present after our fix + for variant in add_props["anyOf"]: + assert "pattern" not in variant, ( + "Pattern keyword should be stripped from additionalProperties Decimal schema. " + "Found pattern in variant: " + str(variant) + ) + + # Verify the full schema matches expected structure + assert schema == snapshot( + { + "description": "Test model with Dict[str, Decimal] to verify pattern is stripped from additionalProperties", + "properties": { + "prices": { + "additionalProperties": { + "anyOf": [ + {"type": "number"}, + {"type": "string"}, + ] + }, + "description": "Product prices by region", + "title": "Prices", + "type": "object", + }, + "product_name": { + "description": "The product name", + "title": "Product Name", + "type": "string", + }, + }, + "required": ["prices", "product_name"], + "title": "ProductPricing", + "type": "object", + "additionalProperties": False, + } + )