From e86c06abe3f45de966c9b39306eaeec3a7478264 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Thu, 30 Oct 2025 21:10:58 -0400 Subject: [PATCH] Add response_format parameter support for structured output Implements response_format parameter to enable JSON mode and JSON schema output formatting for both Meta Llama and Cohere models. This allows users to leverage structured output capabilities via with_structured_output() and direct response_format configuration. Changes: - Add response_format field to OCIGenAIBase with comprehensive documentation - Implement response_format handling in ChatOCIGenAI._prepare_request method - Support response_format via class initialization, bind(), and model_kwargs - Add comprehensive unit tests covering all configuration methods - Add integration tests validating end-to-end functionality with real API calls The implementation ensures response_format is properly passed to both GenericChatRequest (Meta Llama) and CohereChatRequest (Cohere) models. Fixes #33 --- .../chat_models/oci_generative_ai.py | 4 + .../langchain_oci/llms/oci_generative_ai.py | 7 + .../chat_models/test_response_format.py | 302 ++++++++++++++++++ .../chat_models/test_response_format.py | 200 ++++++++++++ 4 files changed, 513 insertions(+) create mode 100644 libs/oci/tests/integration_tests/chat_models/test_response_format.py create mode 100644 libs/oci/tests/unit_tests/chat_models/test_response_format.py diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index 4eacf98..e14f25f 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -1153,6 +1153,10 @@ def _prepare_request( if stop is not None: _model_kwargs[self._provider.stop_sequence_key] = stop + # Handle response_format from class level or kwargs + if self.response_format is not None: + _model_kwargs['response_format'] = self.response_format + # Warn if using max_tokens with OpenAI models if self.model_id and self.model_id.startswith("openai.") and "max_tokens" in _model_kwargs: import warnings diff --git a/libs/oci/langchain_oci/llms/oci_generative_ai.py b/libs/oci/langchain_oci/llms/oci_generative_ai.py index 3649e87..c9d5bb0 100644 --- a/libs/oci/langchain_oci/llms/oci_generative_ai.py +++ b/libs/oci/langchain_oci/llms/oci_generative_ai.py @@ -120,6 +120,13 @@ class OCIGenAIBase(BaseModel, ABC): """Maximum tool calls before forcing final answer. Prevents infinite loops while allowing multi-step orchestration.""" + response_format: Optional[Any] = None + """Format for the model's output. + Can be a dict with 'type' key (e.g., {"type": "JSON_OBJECT"}) + or an OCI ResponseFormat object (TextResponseFormat, JsonObjectResponseFormat, JsonSchemaResponseFormat). + Supported for both GenericChatRequest and CohereChatRequest models. + Default: None (no specific format enforced).""" + model_config = ConfigDict( extra="forbid", arbitrary_types_allowed=True, protected_namespaces=() ) diff --git a/libs/oci/tests/integration_tests/chat_models/test_response_format.py b/libs/oci/tests/integration_tests/chat_models/test_response_format.py new file mode 100644 index 0000000..13eb87e --- /dev/null +++ b/libs/oci/tests/integration_tests/chat_models/test_response_format.py @@ -0,0 +1,302 @@ +# Copyright (c) 2025 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ + +"""Integration tests for response_format feature with OCI Generative AI chat models. + +These tests verify that the response_format parameter works correctly with real +OCI Generative AI API calls for both JSON mode and JSON schema mode. + +## Prerequisites + +1. **OCI Authentication**: Set up OCI authentication with security token: + ```bash + oci session authenticate + ``` + +2. **Environment Variables**: Export the following: + ```bash + export OCI_REGION="us-chicago-1" # or your region + export OCI_COMP="ocid1.compartment.oc1..your-compartment-id" + ``` + +3. **OCI Config**: Ensure `~/.oci/config` exists with DEFAULT profile + +## Running the Tests + +Run all integration tests: +```bash +cd libs/oci +python -m pytest tests/integration_tests/chat_models/test_response_format.py -v -o addopts="" +``` + +Run specific test: +```bash +pytest tests/integration_tests/chat_models/test_response_format.py::test_json_mode_meta_llama -v -o addopts="" +``` + +## What These Tests Verify + +1. **JSON Mode**: Models return valid JSON when using {"type": "JSON_OBJECT"} +2. **JSON Schema Mode**: Models follow specific JSON schemas when provided +3. **Multi-Vendor**: Works for both Meta Llama and Cohere models +4. **Structured Output**: with_structured_output integration works end-to-end +""" + +import json +import os + +import pytest +from langchain_core.messages import HumanMessage +from pydantic import BaseModel, Field + +from langchain_oci.chat_models import ChatOCIGenAI + + +def create_chat_model(model_id: str, **kwargs): + """Create a ChatOCIGenAI instance for testing.""" + region = os.getenv("OCI_REGION", "us-chicago-1") + endpoint = f"https://inference.generativeai.{region}.oci.oraclecloud.com" + + return ChatOCIGenAI( + model_id=model_id, + service_endpoint=endpoint, + compartment_id=os.getenv("OCI_COMP"), + model_kwargs={"temperature": 0.1, "max_tokens": 512}, + auth_type="SECURITY_TOKEN", + auth_profile="DEFAULT", + auth_file_location=os.path.expanduser("~/.oci/config"), + **kwargs + ) + + +@pytest.mark.requires("oci") +@pytest.mark.parametrize( + "model_id", + [ + "meta.llama-3.3-70b-instruct", + "cohere.command-r-plus-08-2024", + ], +) +def test_json_mode_basic(model_id: str): + """Test basic JSON mode with response_format parameter. + + This test verifies that when response_format={"type": "JSON_OBJECT"} is set, + the model returns valid JSON output. + """ + llm = create_chat_model(model_id) + llm_with_json = llm.bind(response_format={"type": "JSON_OBJECT"}) + + response = llm_with_json.invoke([ + HumanMessage(content="List three colors in JSON format with a 'colors' array.") + ]) + + # Verify response is valid JSON + try: + parsed = json.loads(response.content) + assert isinstance(parsed, dict), "Response should be a JSON object" + assert "colors" in parsed or "colour" in parsed, "Should contain colors array" + except json.JSONDecodeError as e: + pytest.fail(f"Response is not valid JSON: {e}\nContent: {response.content}") + + +@pytest.mark.requires("oci") +def test_json_mode_meta_llama(): + """Test JSON mode specifically with Meta Llama models.""" + model_id = "meta.llama-3.3-70b-instruct" + llm = create_chat_model(model_id, response_format={"type": "JSON_OBJECT"}) + + response = llm.invoke([ + HumanMessage(content="Create a JSON object with a person's name and age. Name: Alice, Age: 30") + ]) + + # Verify valid JSON + try: + parsed = json.loads(response.content) + assert isinstance(parsed, dict) + # Check for common variations in key names + has_name = any(k.lower() in ["name", "person", "alice"] for k in str(parsed).lower()) + has_age = "30" in str(parsed) or "age" in str(parsed).lower() + assert has_name or has_age, f"Should contain person info: {parsed}" + except json.JSONDecodeError as e: + pytest.fail(f"Meta Llama JSON mode failed: {e}\nContent: {response.content}") + + +@pytest.mark.requires("oci") +def test_json_mode_cohere(): + """Test JSON mode specifically with Cohere models.""" + model_id = "cohere.command-r-plus-08-2024" + llm = create_chat_model(model_id, response_format={"type": "JSON_OBJECT"}) + + response = llm.invoke([ + HumanMessage(content="Generate a JSON object with a book title and author. Use 'title' and 'author' as keys.") + ]) + + # Verify valid JSON + try: + parsed = json.loads(response.content) + assert isinstance(parsed, dict) + # Cohere should follow instructions closely + assert len(parsed) >= 1, f"Should have at least one key: {parsed}" + except json.JSONDecodeError as e: + pytest.fail(f"Cohere JSON mode failed: {e}\nContent: {response.content}") + + +@pytest.mark.requires("oci") +@pytest.mark.parametrize( + "model_id", + [ + "meta.llama-3.3-70b-instruct", + "cohere.command-r-plus-08-2024", + ], +) +def test_with_structured_output_json_mode(model_id: str): + """Test with_structured_output using json_mode method. + + This verifies the integration between response_format and LangChain's + structured output feature using JSON mode. + """ + class Person(BaseModel): + """A person with name and age.""" + name: str = Field(description="The person's name") + age: int = Field(description="The person's age") + + llm = create_chat_model(model_id) + structured_llm = llm.with_structured_output(Person, method="json_mode") + + result = structured_llm.invoke("Tell me about a person named Bob who is 25 years old.") + + # Verify we got a Person object + assert isinstance(result, Person), f"Should return Person object, got {type(result)}" + assert hasattr(result, "name"), "Should have name attribute" + assert hasattr(result, "age"), "Should have age attribute" + + # Verify the content is reasonable (some models might not follow exactly) + # Just check that we got some data + assert result.name, "Name should not be empty" + assert result.age > 0, "Age should be positive" + + +@pytest.mark.requires("oci") +@pytest.mark.parametrize( + "model_id", + [ + "meta.llama-3.3-70b-instruct", + # Note: Cohere models use CohereResponseFormat, not JsonSchemaResponseFormat + # so json_schema method is not supported for Cohere models + ], +) +def test_with_structured_output_json_schema(model_id: str): + """Test with_structured_output using json_schema method. + + This verifies that JSON schema mode works with the OCI API and properly + constrains the output to match the provided schema. + + Note: This test only runs with Meta Llama models as Cohere models require + a different response format type (CohereResponseFormat vs JsonSchemaResponseFormat). + """ + class Product(BaseModel): + """A product with details.""" + product_name: str = Field(description="Name of the product") + price: float = Field(description="Price in USD") + in_stock: bool = Field(description="Whether the product is in stock") + + llm = create_chat_model(model_id) + structured_llm = llm.with_structured_output(Product, method="json_schema") + + result = structured_llm.invoke( + "Create a product: Laptop, $999.99, available in stock" + ) + + # Verify we got a Product object with correct types + assert isinstance(result, Product), f"Should return Product object, got {type(result)}" + assert isinstance(result.product_name, str), "product_name should be string" + assert isinstance(result.price, (int, float)), "price should be numeric" + assert isinstance(result.in_stock, bool), "in_stock should be boolean" + + # Verify reasonable values + assert result.product_name, "product_name should not be empty" + assert result.price > 0, "price should be positive" + + +@pytest.mark.requires("oci") +def test_response_format_via_model_kwargs(): + """Test that response_format works when passed via model_kwargs. + + This tests an alternative way to set response_format at initialization time. + """ + model_id = "meta.llama-3.3-70b-instruct" + region = os.getenv("OCI_REGION", "us-chicago-1") + endpoint = f"https://inference.generativeai.{region}.oci.oraclecloud.com" + + llm = ChatOCIGenAI( + model_id=model_id, + service_endpoint=endpoint, + compartment_id=os.getenv("OCI_COMP"), + model_kwargs={ + "temperature": 0.1, + "max_tokens": 512, + "response_format": {"type": "JSON_OBJECT"} + }, + auth_type="SECURITY_TOKEN", + auth_profile="DEFAULT", + auth_file_location=os.path.expanduser("~/.oci/config"), + ) + + response = llm.invoke([ + HumanMessage(content="Create a JSON with a list of two fruits.") + ]) + + # Verify valid JSON + try: + parsed = json.loads(response.content) + assert isinstance(parsed, dict), "Response should be a JSON object" + except json.JSONDecodeError as e: + pytest.fail(f"model_kwargs response_format failed: {e}\nContent: {response.content}") + + +@pytest.mark.requires("oci") +def test_json_mode_complex_nested_structure(): + """Test JSON mode with a more complex nested structure request.""" + model_id = "cohere.command-r-plus-08-2024" + llm = create_chat_model(model_id, response_format={"type": "JSON_OBJECT"}) + + response = llm.invoke([ + HumanMessage(content="""Create a JSON object representing a company with: + - name: "TechCorp" + - employees: array of 2 employees, each with name and role + - founded: 2020""") + ]) + + # Verify valid JSON with nested structure + try: + parsed = json.loads(response.content) + assert isinstance(parsed, dict), "Response should be a JSON object" + + # Check for reasonable structure (flexible since models vary) + assert len(parsed) >= 1, "Should have at least one top-level key" + + # Try to verify it has some nested structure + has_nested = any(isinstance(v, (dict, list)) for v in parsed.values()) + assert has_nested or len(str(parsed)) > 50, "Should have some nested structure or substantial content" + + except json.JSONDecodeError as e: + pytest.fail(f"Complex JSON failed: {e}\nContent: {response.content}") + + +@pytest.mark.requires("oci") +def test_response_format_class_level(): + """Test response_format set at class initialization level.""" + model_id = "meta.llama-3.3-70b-instruct" + llm = create_chat_model(model_id, response_format={"type": "JSON_OBJECT"}) + + # Should work without bind() + response = llm.invoke([ + HumanMessage(content="Return JSON with a single key 'status' set to 'ok'") + ]) + + # Verify valid JSON + try: + parsed = json.loads(response.content) + assert isinstance(parsed, dict), "Response should be a JSON object" + except json.JSONDecodeError as e: + pytest.fail(f"Class-level response_format failed: {e}\nContent: {response.content}") diff --git a/libs/oci/tests/unit_tests/chat_models/test_response_format.py b/libs/oci/tests/unit_tests/chat_models/test_response_format.py new file mode 100644 index 0000000..4627e39 --- /dev/null +++ b/libs/oci/tests/unit_tests/chat_models/test_response_format.py @@ -0,0 +1,200 @@ +"""Unit tests for response_format feature.""" +import pytest +from unittest.mock import MagicMock + +from langchain_core.messages import HumanMessage +from langchain_oci.chat_models import ChatOCIGenAI + + +@pytest.mark.requires("oci") +def test_response_format_class_level(): + """Test class-level response_format parameter.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + response_format={"type": "JSON_OBJECT"}, + client=oci_gen_ai_client + ) + assert llm.response_format == {"type": "JSON_OBJECT"} + + +@pytest.mark.requires("oci") +def test_response_format_default_none(): + """Test that response_format defaults to None.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + client=oci_gen_ai_client + ) + assert llm.response_format is None + + +@pytest.mark.requires("oci") +def test_response_format_via_bind(): + """Test response_format set via bind().""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + client=oci_gen_ai_client + ) + + # Should not raise TypeError anymore + llm_with_format = llm.bind(response_format={"type": "JSON_OBJECT"}) + + assert "response_format" in llm_with_format.kwargs + assert llm_with_format.kwargs["response_format"] == {"type": "JSON_OBJECT"} + + +@pytest.mark.requires("oci") +def test_response_format_passed_to_api_generic(): + """Test that response_format is passed to OCI API for Generic models.""" + from oci.generative_ai_inference import models + + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + client=oci_gen_ai_client + ) + + llm_with_format = llm.bind(response_format={"type": "JSON_OBJECT"}) + + # Prepare a request + request = llm_with_format._prepare_request( + [HumanMessage(content="Hello")], + stop=None, + stream=False, + **llm_with_format.kwargs + ) + + # Verify response_format is in the request + assert hasattr(request.chat_request, 'response_format') + assert request.chat_request.response_format == {"type": "JSON_OBJECT"} + + +@pytest.mark.requires("oci") +def test_response_format_passed_to_api_cohere(): + """Test that response_format is passed to OCI API for Cohere models.""" + from oci.generative_ai_inference import models + + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="cohere.command-r-plus", + client=oci_gen_ai_client + ) + + llm_with_format = llm.bind(response_format={"type": "JSON_OBJECT"}) + + # Prepare a request + request = llm_with_format._prepare_request( + [HumanMessage(content="Hello")], + stop=None, + stream=False, + **llm_with_format.kwargs + ) + + # Verify response_format is in the request + assert hasattr(request.chat_request, 'response_format') + assert request.chat_request.response_format == {"type": "JSON_OBJECT"} + + +@pytest.mark.requires("oci") +def test_with_structured_output_json_mode(): + """Test with_structured_output with json_mode method.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="cohere.command-r-plus", + client=oci_gen_ai_client + ) + + # This should not raise TypeError anymore + from pydantic import BaseModel + + class TestSchema(BaseModel): + name: str + age: int + + structured_llm = llm.with_structured_output( + schema=TestSchema, + method="json_mode" + ) + + # The structured LLM should have response_format in kwargs + # It's wrapped in a Runnable, so we need to check the first step + assert structured_llm is not None + + +@pytest.mark.requires("oci") +def test_with_structured_output_json_schema(): + """Test with_structured_output with json_schema method.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + client=oci_gen_ai_client + ) + + # This should not raise TypeError anymore + from pydantic import BaseModel + + class TestSchema(BaseModel): + """Test schema""" + name: str + age: int + + structured_llm = llm.with_structured_output( + schema=TestSchema, + method="json_schema" + ) + + # The structured LLM should be created without errors + assert structured_llm is not None + + +@pytest.mark.requires("oci") +def test_response_format_json_schema_object(): + """Test response_format with JsonSchemaResponseFormat object.""" + from oci.generative_ai_inference import models + + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + client=oci_gen_ai_client + ) + + # Create a proper JsonSchemaResponseFormat object + response_json_schema = models.ResponseJsonSchema( + name="test_response", + description="Test schema", + schema={"type": "object", "properties": {"key": {"type": "string"}}}, + is_strict=True + ) + + response_format_obj = models.JsonSchemaResponseFormat( + json_schema=response_json_schema + ) + + llm_with_format = llm.bind(response_format=response_format_obj) + + # Verify it's stored in kwargs + assert "response_format" in llm_with_format.kwargs + assert llm_with_format.kwargs["response_format"] == response_format_obj + + +@pytest.mark.requires("oci") +def test_response_format_model_kwargs(): + """Test response_format via model_kwargs.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + model_kwargs={"response_format": {"type": "JSON_OBJECT"}}, + client=oci_gen_ai_client + ) + + request = llm._prepare_request( + [HumanMessage(content="Hello")], + stop=None, + stream=False + ) + + # Verify response_format is in the request + assert hasattr(request.chat_request, 'response_format') + assert request.chat_request.response_format == {"type": "JSON_OBJECT"}