From e86c06abe3f45de966c9b39306eaeec3a7478264 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 30 Oct 2025 21:10:58 -0400
Subject: [PATCH] Add response_format parameter support for structured output

Implements response_format parameter to enable JSON mode and JSON schema
output formatting for both Meta Llama and Cohere models. This allows
users to leverage structured output capabilities via with_structured_output()
and direct response_format configuration.

Changes:
- Add response_format field to OCIGenAIBase with comprehensive documentation
- Implement response_format handling in ChatOCIGenAI._prepare_request method
- Support response_format via class initialization, bind(), and model_kwargs
- Add comprehensive unit tests covering all configuration methods
- Add integration tests validating end-to-end functionality with real API calls

The implementation ensures response_format is properly passed to both
GenericChatRequest (Meta Llama) and CohereChatRequest (Cohere) models.

Fixes #33
---
 .../chat_models/oci_generative_ai.py          |   4 +
 .../langchain_oci/llms/oci_generative_ai.py   |   7 +
 .../chat_models/test_response_format.py       | 302 ++++++++++++++++++
 .../chat_models/test_response_format.py       | 200 ++++++++++++
 4 files changed, 513 insertions(+)
 create mode 100644 libs/oci/tests/integration_tests/chat_models/test_response_format.py
 create mode 100644 libs/oci/tests/unit_tests/chat_models/test_response_format.py

diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
index 4eacf98..e14f25f 100644
--- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
+++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
@@ -1153,6 +1153,10 @@ def _prepare_request(
         if stop is not None:
             _model_kwargs[self._provider.stop_sequence_key] = stop
 
+        # Handle response_format from class level or kwargs
+        if self.response_format is not None:
+            _model_kwargs['response_format'] = self.response_format
+
         # Warn if using max_tokens with OpenAI models
         if self.model_id and self.model_id.startswith("openai.") and "max_tokens" in _model_kwargs:
             import warnings
diff --git a/libs/oci/langchain_oci/llms/oci_generative_ai.py b/libs/oci/langchain_oci/llms/oci_generative_ai.py
index 3649e87..c9d5bb0 100644
--- a/libs/oci/langchain_oci/llms/oci_generative_ai.py
+++ b/libs/oci/langchain_oci/llms/oci_generative_ai.py
@@ -120,6 +120,13 @@ class OCIGenAIBase(BaseModel, ABC):
     """Maximum tool calls before forcing final answer.
     Prevents infinite loops while allowing multi-step orchestration."""
 
+    response_format: Optional[Any] = None
+    """Format for the model's output.
+    Can be a dict with 'type' key (e.g., {"type": "JSON_OBJECT"})
+    or an OCI ResponseFormat object (TextResponseFormat, JsonObjectResponseFormat, JsonSchemaResponseFormat).
+    Supported for both GenericChatRequest and CohereChatRequest models.
+    Default: None (no specific format enforced)."""
+
     model_config = ConfigDict(
         extra="forbid", arbitrary_types_allowed=True, protected_namespaces=()
     )
diff --git a/libs/oci/tests/integration_tests/chat_models/test_response_format.py b/libs/oci/tests/integration_tests/chat_models/test_response_format.py
new file mode 100644
index 0000000..13eb87e
--- /dev/null
+++ b/libs/oci/tests/integration_tests/chat_models/test_response_format.py
@@ -0,0 +1,302 @@
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+"""Integration tests for response_format feature with OCI Generative AI chat models.
+
+These tests verify that the response_format parameter works correctly with real
+OCI Generative AI API calls for both JSON mode and JSON schema mode.
+
+## Prerequisites
+
+1. **OCI Authentication**: Set up OCI authentication with security token:
+   ```bash
+   oci session authenticate
+   ```
+
+2. **Environment Variables**: Export the following:
+   ```bash
+   export OCI_REGION="us-chicago-1"  # or your region
+   export OCI_COMP="ocid1.compartment.oc1..your-compartment-id"
+   ```
+
+3. **OCI Config**: Ensure `~/.oci/config` exists with DEFAULT profile
+
+## Running the Tests
+
+Run all integration tests:
+```bash
+cd libs/oci
+python -m pytest tests/integration_tests/chat_models/test_response_format.py -v -o addopts=""
+```
+
+Run specific test:
+```bash
+pytest tests/integration_tests/chat_models/test_response_format.py::test_json_mode_meta_llama -v -o addopts=""
+```
+
+## What These Tests Verify
+
+1. **JSON Mode**: Models return valid JSON when using {"type": "JSON_OBJECT"}
+2. **JSON Schema Mode**: Models follow specific JSON schemas when provided
+3. **Multi-Vendor**: Works for both Meta Llama and Cohere models
+4. **Structured Output**: with_structured_output integration works end-to-end
+"""
+
+import json
+import os
+
+import pytest
+from langchain_core.messages import HumanMessage
+from pydantic import BaseModel, Field
+
+from langchain_oci.chat_models import ChatOCIGenAI
+
+
+def create_chat_model(model_id: str, **kwargs):
+    """Create a ChatOCIGenAI instance for testing."""
+    region = os.getenv("OCI_REGION", "us-chicago-1")
+    endpoint = f"https://inference.generativeai.{region}.oci.oraclecloud.com"
+
+    return ChatOCIGenAI(
+        model_id=model_id,
+        service_endpoint=endpoint,
+        compartment_id=os.getenv("OCI_COMP"),
+        model_kwargs={"temperature": 0.1, "max_tokens": 512},
+        auth_type="SECURITY_TOKEN",
+        auth_profile="DEFAULT",
+        auth_file_location=os.path.expanduser("~/.oci/config"),
+        **kwargs
+    )
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize(
+    "model_id",
+    [
+        "meta.llama-3.3-70b-instruct",
+        "cohere.command-r-plus-08-2024",
+    ],
+)
+def test_json_mode_basic(model_id: str):
+    """Test basic JSON mode with response_format parameter.
+
+    This test verifies that when response_format={"type": "JSON_OBJECT"} is set,
+    the model returns valid JSON output.
+    """
+    llm = create_chat_model(model_id)
+    llm_with_json = llm.bind(response_format={"type": "JSON_OBJECT"})
+
+    response = llm_with_json.invoke([
+        HumanMessage(content="List three colors in JSON format with a 'colors' array.")
+    ])
+
+    # Verify response is valid JSON
+    try:
+        parsed = json.loads(response.content)
+        assert isinstance(parsed, dict), "Response should be a JSON object"
+        assert "colors" in parsed or "colour" in parsed, "Should contain colors array"
+    except json.JSONDecodeError as e:
+        pytest.fail(f"Response is not valid JSON: {e}\nContent: {response.content}")
+
+
+@pytest.mark.requires("oci")
+def test_json_mode_meta_llama():
+    """Test JSON mode specifically with Meta Llama models."""
+    model_id = "meta.llama-3.3-70b-instruct"
+    llm = create_chat_model(model_id, response_format={"type": "JSON_OBJECT"})
+
+    response = llm.invoke([
+        HumanMessage(content="Create a JSON object with a person's name and age. Name: Alice, Age: 30")
+    ])
+
+    # Verify valid JSON
+    try:
+        parsed = json.loads(response.content)
+        assert isinstance(parsed, dict)
+        # Check for common variations in key names
+        has_name = any(k.lower() in ["name", "person", "alice"] for k in str(parsed).lower())
+        has_age = "30" in str(parsed) or "age" in str(parsed).lower()
+        assert has_name or has_age, f"Should contain person info: {parsed}"
+    except json.JSONDecodeError as e:
+        pytest.fail(f"Meta Llama JSON mode failed: {e}\nContent: {response.content}")
+
+
+@pytest.mark.requires("oci")
+def test_json_mode_cohere():
+    """Test JSON mode specifically with Cohere models."""
+    model_id = "cohere.command-r-plus-08-2024"
+    llm = create_chat_model(model_id, response_format={"type": "JSON_OBJECT"})
+
+    response = llm.invoke([
+        HumanMessage(content="Generate a JSON object with a book title and author. Use 'title' and 'author' as keys.")
+    ])
+
+    # Verify valid JSON
+    try:
+        parsed = json.loads(response.content)
+        assert isinstance(parsed, dict)
+        # Cohere should follow instructions closely
+        assert len(parsed) >= 1, f"Should have at least one key: {parsed}"
+    except json.JSONDecodeError as e:
+        pytest.fail(f"Cohere JSON mode failed: {e}\nContent: {response.content}")
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize(
+    "model_id",
+    [
+        "meta.llama-3.3-70b-instruct",
+        "cohere.command-r-plus-08-2024",
+    ],
+)
+def test_with_structured_output_json_mode(model_id: str):
+    """Test with_structured_output using json_mode method.
+
+    This verifies the integration between response_format and LangChain's
+    structured output feature using JSON mode.
+    """
+    class Person(BaseModel):
+        """A person with name and age."""
+        name: str = Field(description="The person's name")
+        age: int = Field(description="The person's age")
+
+    llm = create_chat_model(model_id)
+    structured_llm = llm.with_structured_output(Person, method="json_mode")
+
+    result = structured_llm.invoke("Tell me about a person named Bob who is 25 years old.")
+
+    # Verify we got a Person object
+    assert isinstance(result, Person), f"Should return Person object, got {type(result)}"
+    assert hasattr(result, "name"), "Should have name attribute"
+    assert hasattr(result, "age"), "Should have age attribute"
+
+    # Verify the content is reasonable (some models might not follow exactly)
+    # Just check that we got some data
+    assert result.name, "Name should not be empty"
+    assert result.age > 0, "Age should be positive"
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize(
+    "model_id",
+    [
+        "meta.llama-3.3-70b-instruct",
+        # Note: Cohere models use CohereResponseFormat, not JsonSchemaResponseFormat
+        # so json_schema method is not supported for Cohere models
+    ],
+)
+def test_with_structured_output_json_schema(model_id: str):
+    """Test with_structured_output using json_schema method.
+
+    This verifies that JSON schema mode works with the OCI API and properly
+    constrains the output to match the provided schema.
+
+    Note: This test only runs with Meta Llama models as Cohere models require
+    a different response format type (CohereResponseFormat vs JsonSchemaResponseFormat).
+    """
+    class Product(BaseModel):
+        """A product with details."""
+        product_name: str = Field(description="Name of the product")
+        price: float = Field(description="Price in USD")
+        in_stock: bool = Field(description="Whether the product is in stock")
+
+    llm = create_chat_model(model_id)
+    structured_llm = llm.with_structured_output(Product, method="json_schema")
+
+    result = structured_llm.invoke(
+        "Create a product: Laptop, $999.99, available in stock"
+    )
+
+    # Verify we got a Product object with correct types
+    assert isinstance(result, Product), f"Should return Product object, got {type(result)}"
+    assert isinstance(result.product_name, str), "product_name should be string"
+    assert isinstance(result.price, (int, float)), "price should be numeric"
+    assert isinstance(result.in_stock, bool), "in_stock should be boolean"
+
+    # Verify reasonable values
+    assert result.product_name, "product_name should not be empty"
+    assert result.price > 0, "price should be positive"
+
+
+@pytest.mark.requires("oci")
+def test_response_format_via_model_kwargs():
+    """Test that response_format works when passed via model_kwargs.
+
+    This tests an alternative way to set response_format at initialization time.
+    """
+    model_id = "meta.llama-3.3-70b-instruct"
+    region = os.getenv("OCI_REGION", "us-chicago-1")
+    endpoint = f"https://inference.generativeai.{region}.oci.oraclecloud.com"
+
+    llm = ChatOCIGenAI(
+        model_id=model_id,
+        service_endpoint=endpoint,
+        compartment_id=os.getenv("OCI_COMP"),
+        model_kwargs={
+            "temperature": 0.1,
+            "max_tokens": 512,
+            "response_format": {"type": "JSON_OBJECT"}
+        },
+        auth_type="SECURITY_TOKEN",
+        auth_profile="DEFAULT",
+        auth_file_location=os.path.expanduser("~/.oci/config"),
+    )
+
+    response = llm.invoke([
+        HumanMessage(content="Create a JSON with a list of two fruits.")
+    ])
+
+    # Verify valid JSON
+    try:
+        parsed = json.loads(response.content)
+        assert isinstance(parsed, dict), "Response should be a JSON object"
+    except json.JSONDecodeError as e:
+        pytest.fail(f"model_kwargs response_format failed: {e}\nContent: {response.content}")
+
+
+@pytest.mark.requires("oci")
+def test_json_mode_complex_nested_structure():
+    """Test JSON mode with a more complex nested structure request."""
+    model_id = "cohere.command-r-plus-08-2024"
+    llm = create_chat_model(model_id, response_format={"type": "JSON_OBJECT"})
+
+    response = llm.invoke([
+        HumanMessage(content="""Create a JSON object representing a company with:
+        - name: "TechCorp"
+        - employees: array of 2 employees, each with name and role
+        - founded: 2020""")
+    ])
+
+    # Verify valid JSON with nested structure
+    try:
+        parsed = json.loads(response.content)
+        assert isinstance(parsed, dict), "Response should be a JSON object"
+
+        # Check for reasonable structure (flexible since models vary)
+        assert len(parsed) >= 1, "Should have at least one top-level key"
+
+        # Try to verify it has some nested structure
+        has_nested = any(isinstance(v, (dict, list)) for v in parsed.values())
+        assert has_nested or len(str(parsed)) > 50, "Should have some nested structure or substantial content"
+
+    except json.JSONDecodeError as e:
+        pytest.fail(f"Complex JSON failed: {e}\nContent: {response.content}")
+
+
+@pytest.mark.requires("oci")
+def test_response_format_class_level():
+    """Test response_format set at class initialization level."""
+    model_id = "meta.llama-3.3-70b-instruct"
+    llm = create_chat_model(model_id, response_format={"type": "JSON_OBJECT"})
+
+    # Should work without bind()
+    response = llm.invoke([
+        HumanMessage(content="Return JSON with a single key 'status' set to 'ok'")
+    ])
+
+    # Verify valid JSON
+    try:
+        parsed = json.loads(response.content)
+        assert isinstance(parsed, dict), "Response should be a JSON object"
+    except json.JSONDecodeError as e:
+        pytest.fail(f"Class-level response_format failed: {e}\nContent: {response.content}")
diff --git a/libs/oci/tests/unit_tests/chat_models/test_response_format.py b/libs/oci/tests/unit_tests/chat_models/test_response_format.py
new file mode 100644
index 0000000..4627e39
--- /dev/null
+++ b/libs/oci/tests/unit_tests/chat_models/test_response_format.py
@@ -0,0 +1,200 @@
+"""Unit tests for response_format feature."""
+import pytest
+from unittest.mock import MagicMock
+
+from langchain_core.messages import HumanMessage
+from langchain_oci.chat_models import ChatOCIGenAI
+
+
+@pytest.mark.requires("oci")
+def test_response_format_class_level():
+    """Test class-level response_format parameter."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        response_format={"type": "JSON_OBJECT"},
+        client=oci_gen_ai_client
+    )
+    assert llm.response_format == {"type": "JSON_OBJECT"}
+
+
+@pytest.mark.requires("oci")
+def test_response_format_default_none():
+    """Test that response_format defaults to None."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+    assert llm.response_format is None
+
+
+@pytest.mark.requires("oci")
+def test_response_format_via_bind():
+    """Test response_format set via bind()."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    # Should not raise TypeError anymore
+    llm_with_format = llm.bind(response_format={"type": "JSON_OBJECT"})
+
+    assert "response_format" in llm_with_format.kwargs
+    assert llm_with_format.kwargs["response_format"] == {"type": "JSON_OBJECT"}
+
+
+@pytest.mark.requires("oci")
+def test_response_format_passed_to_api_generic():
+    """Test that response_format is passed to OCI API for Generic models."""
+    from oci.generative_ai_inference import models
+
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    llm_with_format = llm.bind(response_format={"type": "JSON_OBJECT"})
+
+    # Prepare a request
+    request = llm_with_format._prepare_request(
+        [HumanMessage(content="Hello")],
+        stop=None,
+        stream=False,
+        **llm_with_format.kwargs
+    )
+
+    # Verify response_format is in the request
+    assert hasattr(request.chat_request, 'response_format')
+    assert request.chat_request.response_format == {"type": "JSON_OBJECT"}
+
+
+@pytest.mark.requires("oci")
+def test_response_format_passed_to_api_cohere():
+    """Test that response_format is passed to OCI API for Cohere models."""
+    from oci.generative_ai_inference import models
+
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="cohere.command-r-plus",
+        client=oci_gen_ai_client
+    )
+
+    llm_with_format = llm.bind(response_format={"type": "JSON_OBJECT"})
+
+    # Prepare a request
+    request = llm_with_format._prepare_request(
+        [HumanMessage(content="Hello")],
+        stop=None,
+        stream=False,
+        **llm_with_format.kwargs
+    )
+
+    # Verify response_format is in the request
+    assert hasattr(request.chat_request, 'response_format')
+    assert request.chat_request.response_format == {"type": "JSON_OBJECT"}
+
+
+@pytest.mark.requires("oci")
+def test_with_structured_output_json_mode():
+    """Test with_structured_output with json_mode method."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="cohere.command-r-plus",
+        client=oci_gen_ai_client
+    )
+
+    # This should not raise TypeError anymore
+    from pydantic import BaseModel
+
+    class TestSchema(BaseModel):
+        name: str
+        age: int
+
+    structured_llm = llm.with_structured_output(
+        schema=TestSchema,
+        method="json_mode"
+    )
+
+    # The structured LLM should have response_format in kwargs
+    # It's wrapped in a Runnable, so we need to check the first step
+    assert structured_llm is not None
+
+
+@pytest.mark.requires("oci")
+def test_with_structured_output_json_schema():
+    """Test with_structured_output with json_schema method."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    # This should not raise TypeError anymore
+    from pydantic import BaseModel
+
+    class TestSchema(BaseModel):
+        """Test schema"""
+        name: str
+        age: int
+
+    structured_llm = llm.with_structured_output(
+        schema=TestSchema,
+        method="json_schema"
+    )
+
+    # The structured LLM should be created without errors
+    assert structured_llm is not None
+
+
+@pytest.mark.requires("oci")
+def test_response_format_json_schema_object():
+    """Test response_format with JsonSchemaResponseFormat object."""
+    from oci.generative_ai_inference import models
+
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        client=oci_gen_ai_client
+    )
+
+    # Create a proper JsonSchemaResponseFormat object
+    response_json_schema = models.ResponseJsonSchema(
+        name="test_response",
+        description="Test schema",
+        schema={"type": "object", "properties": {"key": {"type": "string"}}},
+        is_strict=True
+    )
+
+    response_format_obj = models.JsonSchemaResponseFormat(
+        json_schema=response_json_schema
+    )
+
+    llm_with_format = llm.bind(response_format=response_format_obj)
+
+    # Verify it's stored in kwargs
+    assert "response_format" in llm_with_format.kwargs
+    assert llm_with_format.kwargs["response_format"] == response_format_obj
+
+
+@pytest.mark.requires("oci")
+def test_response_format_model_kwargs():
+    """Test response_format via model_kwargs."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        model_kwargs={"response_format": {"type": "JSON_OBJECT"}},
+        client=oci_gen_ai_client
+    )
+
+    request = llm._prepare_request(
+        [HumanMessage(content="Hello")],
+        stop=None,
+        stream=False
+    )
+
+    # Verify response_format is in the request
+    assert hasattr(request.chat_request, 'response_format')
+    assert request.chat_request.response_format == {"type": "JSON_OBJECT"}