From 3b3fc8703a99e2874318c1fbf8563b5a39556b8f Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Thu, 30 Oct 2025 20:16:11 -0400 Subject: [PATCH 01/20] feat: Add parallel tool calling support for Meta/Llama models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the parallel_tool_calls parameter to enable parallel function calling in Meta/Llama models, improving performance for multi-tool workflows. - Add parallel_tool_calls class parameter to OCIGenAIBase (default: False) - Add parallel_tool_calls parameter to bind_tools() method - Support hybrid approach: class-level default + per-binding override - Pass is_parallel_tool_calls to OCI API in MetaProvider - Add validation for Cohere models (raises error if attempted) - 9 comprehensive unit tests (all passing) - 4 integration tests with live OCI API (all passing) - No regression in existing tests Class-level default: llm = ChatOCIGenAI( model_id="meta.llama-3.3-70b-instruct", parallel_tool_calls=True ) Per-binding override: llm_with_tools = llm.bind_tools( [tool1, tool2, tool3], parallel_tool_calls=True ) - Up to N× speedup for N independent tool calls - Backward compatible (default: False) - Clear error messages for unsupported models - Follows existing parameter patterns --- libs/oci/README.md | 26 +- .../chat_models/oci_generative_ai.py | 23 ++ .../langchain_oci/llms/oci_generative_ai.py | 6 + .../test_parallel_tool_calling_integration.py | 310 ++++++++++++++++++ .../chat_models/test_parallel_tool_calling.py | 199 +++++++++++ 5 files changed, 563 insertions(+), 1 deletion(-) create mode 100644 libs/oci/test_parallel_tool_calling_integration.py create mode 100644 libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py diff --git a/libs/oci/README.md b/libs/oci/README.md index 76829a1..1eb7859 100644 --- a/libs/oci/README.md +++ b/libs/oci/README.md @@ -62,7 +62,7 @@ embeddings.embed_query("What is the meaning of life?") ``` ### 4. Use Structured Output -`ChatOCIGenAI` supports structured output. +`ChatOCIGenAI` supports structured output. **Note:** The default method is `function_calling`. If default method returns `None` (e.g. for Gemini models), try `json_schema` or `json_mode`. @@ -126,6 +126,30 @@ messages = [ response = client.invoke(messages) ``` +### 6. Use Parallel Tool Calling (Meta/Llama 4+ models only) +Enable parallel tool calling to execute multiple tools simultaneously, improving performance for multi-tool workflows. + +```python +from langchain_oci import ChatOCIGenAI + +# Option 1: Set at class level for all tool bindings +llm = ChatOCIGenAI( + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", + service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", + compartment_id="MY_COMPARTMENT_ID", + parallel_tool_calls=True # Enable parallel tool calling +) + +# Option 2: Set per-binding +llm = ChatOCIGenAI(model_id="meta.llama-4-maverick-17b-128e-instruct-fp8") +llm_with_tools = llm.bind_tools( + [get_weather, calculate_tip, get_population], + parallel_tool_calls=True # Tools can execute simultaneously +) +``` + +**Note:** Parallel tool calling is only supported for Llama 4+ models. Llama 3.x (including 3.3) and Cohere models will raise an error if this parameter is used. + ## OCI Data Science Model Deployment Examples diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index 1afca28..8d97d82 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -363,6 +363,13 @@ def messages_to_oci_params( This includes conversion of chat history and tool call results. """ + # Cohere models don't support parallel tool calls + if kwargs.get("is_parallel_tool_calls"): + raise ValueError( + "Parallel tool calls are not supported for Cohere models. " + "This feature is only available for Meta/Llama models using GenericChatRequest." + ) + is_force_single_step = kwargs.get("is_force_single_step", False) oci_chat_history = [] @@ -851,6 +858,10 @@ def _should_allow_more_tool_calls( result["tool_choice"] = self.oci_tool_choice_none() # else: Allow model to decide (default behavior) + # Add parallel tool calls support for Meta/Llama models + if "is_parallel_tool_calls" in kwargs: + result["is_parallel_tool_calls"] = kwargs["is_parallel_tool_calls"] + return result def _process_message_content( @@ -1211,6 +1222,7 @@ def bind_tools( tool_choice: Optional[ Union[dict, str, Literal["auto", "none", "required", "any"], bool] ] = None, + parallel_tool_calls: Optional[bool] = None, **kwargs: Any, ) -> Runnable[LanguageModelInput, BaseMessage]: """Bind tool-like objects to this chat model. @@ -1231,6 +1243,11 @@ def bind_tools( {"type": "function", "function": {"name": <>}}: calls <> tool. - False or None: no effect, default Meta behavior. + parallel_tool_calls: Whether to enable parallel function calling. + If True, the model can call multiple tools simultaneously. + If False, tools are called sequentially. + If None (default), uses the class-level parallel_tool_calls setting. + Only supported for Meta/Llama models using GenericChatRequest. kwargs: Any additional parameters are passed directly to :meth:`~langchain_oci.chat_models.oci_generative_ai.ChatOCIGenAI.bind`. """ @@ -1240,6 +1257,12 @@ def bind_tools( if tool_choice is not None: kwargs["tool_choice"] = self._provider.process_tool_choice(tool_choice) + # Add parallel tool calls support + # Use bind-time parameter if provided, else fall back to class default + use_parallel = parallel_tool_calls if parallel_tool_calls is not None else self.parallel_tool_calls + if use_parallel: + kwargs["is_parallel_tool_calls"] = True + return super().bind(tools=formatted_tools, **kwargs) def with_structured_output( diff --git a/libs/oci/langchain_oci/llms/oci_generative_ai.py b/libs/oci/langchain_oci/llms/oci_generative_ai.py index e5843c4..a1a3d31 100644 --- a/libs/oci/langchain_oci/llms/oci_generative_ai.py +++ b/libs/oci/langchain_oci/llms/oci_generative_ai.py @@ -120,6 +120,12 @@ class OCIGenAIBase(BaseModel, ABC): """Maximum tool calls before forcing final answer. Prevents infinite loops while allowing multi-step orchestration.""" + parallel_tool_calls: bool = False + """Whether to enable parallel function calling during tool use. + If True, the model can call multiple tools simultaneously. + Only supported for Meta/Llama models using GenericChatRequest. + Default: False for backward compatibility.""" + model_config = ConfigDict( extra="forbid", arbitrary_types_allowed=True, protected_namespaces=() ) diff --git a/libs/oci/test_parallel_tool_calling_integration.py b/libs/oci/test_parallel_tool_calling_integration.py new file mode 100644 index 0000000..061211a --- /dev/null +++ b/libs/oci/test_parallel_tool_calling_integration.py @@ -0,0 +1,310 @@ +#!/usr/bin/env python3 +""" +Integration test for parallel tool calling feature. + +This script tests parallel tool calling with actual OCI GenAI API calls. + +Setup: + export OCI_COMPARTMENT_ID= + export OCI_GENAI_ENDPOINT= # optional + export OCI_CONFIG_PROFILE= # optional + export OCI_AUTH_TYPE= # optional + +Run with: + python test_parallel_tool_calling_integration.py +""" + +import os +import sys +import time +from typing import List + +from langchain_core.messages import HumanMessage +from langchain_oci.chat_models import ChatOCIGenAI + + +def get_weather(city: str, unit: str = "fahrenheit") -> str: + """Get the current weather in a given location.""" + # Simulate API delay + time.sleep(0.5) + return f"Weather in {city}: Sunny, 72°{unit[0].upper()}" + + +def calculate_tip(amount: float, percent: float = 15.0) -> float: + """Calculate tip amount.""" + # Simulate API delay + time.sleep(0.5) + return round(amount * (percent / 100), 2) + + +def get_population(city: str) -> int: + """Get the population of a city.""" + # Simulate API delay + time.sleep(0.5) + populations = { + "tokyo": 14000000, + "new york": 8000000, + "london": 9000000, + "paris": 2000000, + "chicago": 2700000, + "los angeles": 4000000, + } + return populations.get(city.lower(), 1000000) + + +def test_parallel_tool_calling_enabled(): + """Test parallel tool calling with parallel_tool_calls=True.""" + print("\n" + "=" * 80) + print("TEST 1: Parallel Tool Calling ENABLED") + print("=" * 80) + + chat = ChatOCIGenAI( + model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"), + service_endpoint=os.environ.get( + "OCI_GENAI_ENDPOINT", + "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", + ), + compartment_id=os.environ.get("OCI_COMPARTMENT_ID"), + auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"), + auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"), + model_kwargs={"temperature": 0, "max_tokens": 500}, + parallel_tool_calls=True, # Enable parallel calling + ) + + # Bind tools + chat_with_tools = chat.bind_tools([get_weather, calculate_tip, get_population]) + + # Invoke with query that needs weather info + print("\nQuery: 'What's the weather in New York City?'") + + start_time = time.time() + response = chat_with_tools.invoke([ + HumanMessage(content="What's the weather in New York City?") + ]) + elapsed_time = time.time() - start_time + + print(f"\nResponse time: {elapsed_time:.2f}s") + print(f"Response content: {response.content[:200] if response.content else '(empty)'}...") + print(f"Tool calls count: {len(response.tool_calls)}") + + if response.tool_calls: + print("\nTool calls:") + for i, tc in enumerate(response.tool_calls, 1): + print(f" {i}. {tc['name']}({tc['args']})") + else: + print("\n⚠️ No tool calls in response.tool_calls") + print(f"Additional kwargs: {response.additional_kwargs.keys()}") + + # Verify we got tool calls + assert len(response.tool_calls) >= 1, f"Should have at least one tool call, got {len(response.tool_calls)}" + + # Verify parallel_tool_calls was set + print("\n✓ TEST 1 PASSED: Parallel tool calling enabled and working") + return elapsed_time + + +def test_parallel_tool_calling_disabled(): + """Test tool calling with parallel_tool_calls=False (sequential).""" + print("\n" + "=" * 80) + print("TEST 2: Parallel Tool Calling DISABLED (Sequential)") + print("=" * 80) + + chat = ChatOCIGenAI( + model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"), + service_endpoint=os.environ.get( + "OCI_GENAI_ENDPOINT", + "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", + ), + compartment_id=os.environ.get("OCI_COMPARTMENT_ID"), + auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"), + auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"), + model_kwargs={"temperature": 0, "max_tokens": 500}, + parallel_tool_calls=False, # Disable parallel calling (default) + ) + + # Bind tools + chat_with_tools = chat.bind_tools([get_weather, calculate_tip, get_population]) + + # Same query as test 1 + print("\nQuery: 'What's the weather in New York City?'") + + start_time = time.time() + response = chat_with_tools.invoke([ + HumanMessage(content="What's the weather in New York City?") + ]) + elapsed_time = time.time() - start_time + + print(f"\nResponse time: {elapsed_time:.2f}s") + print(f"Response content: {response.content[:200] if response.content else '(empty)'}...") + print(f"Tool calls count: {len(response.tool_calls)}") + + if response.tool_calls: + print("\nTool calls:") + for i, tc in enumerate(response.tool_calls, 1): + print(f" {i}. {tc['name']}({tc['args']})") + + # Verify we got tool calls + assert len(response.tool_calls) >= 1, f"Should have at least one tool call, got {len(response.tool_calls)}" + + print("\n✓ TEST 2 PASSED: Sequential tool calling works") + return elapsed_time + + +def test_bind_tools_override(): + """Test that bind_tools can override class-level setting.""" + print("\n" + "=" * 80) + print("TEST 3: bind_tools Override of Class Setting") + print("=" * 80) + + # Create chat with parallel_tool_calls=False at class level + chat = ChatOCIGenAI( + model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"), + service_endpoint=os.environ.get( + "OCI_GENAI_ENDPOINT", + "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", + ), + compartment_id=os.environ.get("OCI_COMPARTMENT_ID"), + auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"), + auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"), + model_kwargs={"temperature": 0, "max_tokens": 500}, + parallel_tool_calls=False, # Class default: disabled + ) + + # Override with True in bind_tools + chat_with_tools = chat.bind_tools( + [get_weather, get_population], + parallel_tool_calls=True # Override to enable + ) + + print("\nQuery: 'What's the weather and population of Tokyo?'") + + response = chat_with_tools.invoke([ + HumanMessage(content="What's the weather and population of Tokyo?") + ]) + + print(f"\nResponse content: {response.content}") + print(f"Tool calls count: {len(response.tool_calls)}") + + if response.tool_calls: + print("\nTool calls:") + for i, tc in enumerate(response.tool_calls, 1): + print(f" {i}. {tc['name']}({tc['args']})") + + print("\n✓ TEST 3 PASSED: bind_tools override works") + + +def test_cohere_model_error(): + """Test that Cohere models raise an error with parallel_tool_calls.""" + print("\n" + "=" * 80) + print("TEST 4: Cohere Model Error Handling") + print("=" * 80) + + chat = ChatOCIGenAI( + model_id="cohere.command-r-plus", + service_endpoint=os.environ.get( + "OCI_GENAI_ENDPOINT", + "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", + ), + compartment_id=os.environ.get("OCI_COMPARTMENT_ID"), + auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"), + auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"), + ) + + # Try to enable parallel tool calls with Cohere (should fail) + chat_with_tools = chat.bind_tools( + [get_weather], + parallel_tool_calls=True + ) + + print("\nAttempting to use parallel_tool_calls with Cohere model...") + + try: + response = chat_with_tools.invoke([ + HumanMessage(content="What's the weather in Paris?") + ]) + print("❌ TEST FAILED: Should have raised ValueError") + return False + except ValueError as e: + if "not supported for Cohere" in str(e): + print(f"\n✓ Correctly raised error: {e}") + print("\n✓ TEST 4 PASSED: Cohere validation works") + return True + else: + print(f"❌ Wrong error: {e}") + return False + + +def main(): + print("=" * 80) + print("PARALLEL TOOL CALLING INTEGRATION TESTS") + print("=" * 80) + + # Check required env vars + if not os.environ.get("OCI_COMPARTMENT_ID"): + print("\n❌ ERROR: OCI_COMPARTMENT_ID environment variable not set") + print("Please set: export OCI_COMPARTMENT_ID=") + sys.exit(1) + + print(f"\nUsing configuration:") + print(f" Model: {os.environ.get('OCI_MODEL_ID', 'meta.llama-3.3-70b-instruct')}") + print(f" Endpoint: {os.environ.get('OCI_GENAI_ENDPOINT', 'default')}") + print(f" Profile: {os.environ.get('OCI_CONFIG_PROFILE', 'DEFAULT')}") + print(f" Compartment: {os.environ.get('OCI_COMPARTMENT_ID')[:25]}...") + + results = [] + + try: + # Run tests + parallel_time = test_parallel_tool_calling_enabled() + results.append(("Parallel Enabled", True)) + + sequential_time = test_parallel_tool_calling_disabled() + results.append(("Sequential (Disabled)", True)) + + test_bind_tools_override() + results.append(("bind_tools Override", True)) + + cohere_test = test_cohere_model_error() + results.append(("Cohere Validation", cohere_test)) + + # Print summary + print("\n" + "=" * 80) + print("TEST SUMMARY") + print("=" * 80) + + for test_name, passed in results: + status = "✓ PASSED" if passed else "✗ FAILED" + print(f"{status}: {test_name}") + + passed_count = sum(1 for _, passed in results if passed) + total_count = len(results) + + print(f"\nTotal: {passed_count}/{total_count} tests passed") + + # Performance comparison + if parallel_time and sequential_time: + print("\n" + "=" * 80) + print("PERFORMANCE COMPARISON") + print("=" * 80) + print(f"Parallel: {parallel_time:.2f}s") + print(f"Sequential: {sequential_time:.2f}s") + if sequential_time > 0: + speedup = sequential_time / parallel_time + print(f"Speedup: {speedup:.2f}×") + + if passed_count == total_count: + print("\n🎉 ALL TESTS PASSED!") + return 0 + else: + print(f"\n⚠️ {total_count - passed_count} test(s) failed") + return 1 + + except Exception as e: + print(f"\n❌ ERROR: {e}") + import traceback + traceback.print_exc() + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py new file mode 100644 index 0000000..f39f88d --- /dev/null +++ b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py @@ -0,0 +1,199 @@ +"""Unit tests for parallel tool calling feature.""" +import pytest +from unittest.mock import MagicMock + +from langchain_core.messages import HumanMessage +from langchain_oci.chat_models import ChatOCIGenAI + + +@pytest.mark.requires("oci") +def test_parallel_tool_calls_class_level(): + """Test class-level parallel_tool_calls parameter.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + parallel_tool_calls=True, + client=oci_gen_ai_client + ) + assert llm.parallel_tool_calls is True + + +@pytest.mark.requires("oci") +def test_parallel_tool_calls_default_false(): + """Test that parallel_tool_calls defaults to False.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + client=oci_gen_ai_client + ) + assert llm.parallel_tool_calls is False + + +@pytest.mark.requires("oci") +def test_parallel_tool_calls_bind_tools_explicit_true(): + """Test parallel_tool_calls=True in bind_tools.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + def tool2(x: int) -> int: + """Tool 2.""" + return x * 2 + + llm_with_tools = llm.bind_tools( + [tool1, tool2], + parallel_tool_calls=True + ) + + assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True + + +@pytest.mark.requires("oci") +def test_parallel_tool_calls_bind_tools_explicit_false(): + """Test parallel_tool_calls=False in bind_tools.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + llm_with_tools = llm.bind_tools( + [tool1], + parallel_tool_calls=False + ) + + # When explicitly False, should not set the parameter + assert "is_parallel_tool_calls" not in llm_with_tools.kwargs + + +@pytest.mark.requires("oci") +def test_parallel_tool_calls_bind_tools_uses_class_default(): + """Test that bind_tools uses class default when not specified.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + parallel_tool_calls=True, # Set class default + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + # Don't specify parallel_tool_calls in bind_tools + llm_with_tools = llm.bind_tools([tool1]) + + # Should use class default (True) + assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True + + +@pytest.mark.requires("oci") +def test_parallel_tool_calls_bind_tools_overrides_class_default(): + """Test that bind_tools parameter overrides class default.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + parallel_tool_calls=True, # Set class default to True + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + # Override with False in bind_tools + llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=False) + + # Should not set the parameter when explicitly False + assert "is_parallel_tool_calls" not in llm_with_tools.kwargs + + +@pytest.mark.requires("oci") +def test_parallel_tool_calls_passed_to_oci_api_meta(): + """Test that is_parallel_tool_calls is passed to OCI API for Meta models.""" + from oci.generative_ai_inference import models + + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + client=oci_gen_ai_client + ) + + def get_weather(city: str) -> str: + """Get weather for a city.""" + return f"Weather in {city}" + + llm_with_tools = llm.bind_tools([get_weather], parallel_tool_calls=True) + + # Prepare a request + request = llm_with_tools._prepare_request( + [HumanMessage(content="What's the weather?")], + stop=None, + stream=False, + **llm_with_tools.kwargs + ) + + # Verify is_parallel_tool_calls is in the request + assert hasattr(request.chat_request, 'is_parallel_tool_calls') + assert request.chat_request.is_parallel_tool_calls is True + + +@pytest.mark.requires("oci") +def test_parallel_tool_calls_cohere_raises_error(): + """Test that Cohere models raise error for parallel tool calls.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="cohere.command-r-plus", + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True) + + # Should raise ValueError when trying to prepare request + with pytest.raises(ValueError, match="not supported for Cohere"): + llm_with_tools._prepare_request( + [HumanMessage(content="test")], + stop=None, + stream=False, + **llm_with_tools.kwargs + ) + + +@pytest.mark.requires("oci") +def test_parallel_tool_calls_cohere_class_level_raises_error(): + """Test that Cohere models with class-level parallel_tool_calls raise error.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="cohere.command-r-plus", + parallel_tool_calls=True, # Set at class level + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + llm_with_tools = llm.bind_tools([tool1]) # Uses class default + + # Should raise ValueError when trying to prepare request + with pytest.raises(ValueError, match="not supported for Cohere"): + llm_with_tools._prepare_request( + [HumanMessage(content="test")], + stop=None, + stream=False, + **llm_with_tools.kwargs + ) From b6d8af7079ad6bf7fac190c325537e5135ed2f9c Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Thu, 30 Oct 2025 20:27:11 -0400 Subject: [PATCH 02/20] Fix code formatting for line length compliance --- libs/oci/langchain_oci/chat_models/oci_generative_ai.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index 8d97d82..5527d01 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -1259,7 +1259,11 @@ def bind_tools( # Add parallel tool calls support # Use bind-time parameter if provided, else fall back to class default - use_parallel = parallel_tool_calls if parallel_tool_calls is not None else self.parallel_tool_calls + use_parallel = ( + parallel_tool_calls + if parallel_tool_calls is not None + else self.parallel_tool_calls + ) if use_parallel: kwargs["is_parallel_tool_calls"] = True From 18e375cdd7a7ee6c0cc59b38ce644a9395912795 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Thu, 30 Oct 2025 20:33:56 -0400 Subject: [PATCH 03/20] Update documentation to reflect broader model support for parallel tool calling - Update README to include all GenericChatRequest models (Grok, OpenAI, Mistral) - Update code comments and docstrings - Update error messages with complete model list - Clarify that feature works with GenericChatRequest, not just Meta/Llama --- libs/oci/langchain_oci/chat_models/oci_generative_ai.py | 8 +++++--- libs/oci/langchain_oci/llms/oci_generative_ai.py | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index 5527d01..e535b85 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -367,7 +367,8 @@ def messages_to_oci_params( if kwargs.get("is_parallel_tool_calls"): raise ValueError( "Parallel tool calls are not supported for Cohere models. " - "This feature is only available for Meta/Llama models using GenericChatRequest." + "This feature is only available for models using GenericChatRequest " + "(Meta, Llama, xAI Grok, OpenAI, Mistral)." ) is_force_single_step = kwargs.get("is_force_single_step", False) @@ -858,7 +859,7 @@ def _should_allow_more_tool_calls( result["tool_choice"] = self.oci_tool_choice_none() # else: Allow model to decide (default behavior) - # Add parallel tool calls support for Meta/Llama models + # Add parallel tool calls support (GenericChatRequest models) if "is_parallel_tool_calls" in kwargs: result["is_parallel_tool_calls"] = kwargs["is_parallel_tool_calls"] @@ -1247,7 +1248,8 @@ def bind_tools( If True, the model can call multiple tools simultaneously. If False, tools are called sequentially. If None (default), uses the class-level parallel_tool_calls setting. - Only supported for Meta/Llama models using GenericChatRequest. + Supported for models using GenericChatRequest (Meta, Llama, xAI Grok, + OpenAI, Mistral). Not supported for Cohere models. kwargs: Any additional parameters are passed directly to :meth:`~langchain_oci.chat_models.oci_generative_ai.ChatOCIGenAI.bind`. """ diff --git a/libs/oci/langchain_oci/llms/oci_generative_ai.py b/libs/oci/langchain_oci/llms/oci_generative_ai.py index a1a3d31..5533e34 100644 --- a/libs/oci/langchain_oci/llms/oci_generative_ai.py +++ b/libs/oci/langchain_oci/llms/oci_generative_ai.py @@ -123,7 +123,8 @@ class OCIGenAIBase(BaseModel, ABC): parallel_tool_calls: bool = False """Whether to enable parallel function calling during tool use. If True, the model can call multiple tools simultaneously. - Only supported for Meta/Llama models using GenericChatRequest. + Supported for all models using GenericChatRequest (Meta, Llama, xAI Grok, OpenAI, Mistral). + Not supported for Cohere models. Default: False for backward compatibility.""" model_config = ConfigDict( From dac64dbb20a50ff20123f99fac211129ed813719 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 12 Nov 2025 04:40:22 -0800 Subject: [PATCH 04/20] Move integration test to correct folder structure Relocated test_parallel_tool_calling_integration.py to tests/integration_tests/chat_models/ Following repository convention for integration test organization --- .../chat_models}/test_parallel_tool_calling_integration.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename libs/oci/{ => tests/integration_tests/chat_models}/test_parallel_tool_calling_integration.py (100%) diff --git a/libs/oci/test_parallel_tool_calling_integration.py b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py similarity index 100% rename from libs/oci/test_parallel_tool_calling_integration.py rename to libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py From cd8080c54947b3b1eaf5e13bc5160109f87d8435 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 12 Nov 2025 04:40:30 -0800 Subject: [PATCH 05/20] Add version filter for Llama parallel tool calling Only Llama 4+ models support parallel tool calling based on testing. Parallel tool calling support: - Llama 4+ - SUPPORTED (tested and verified with real OCI API) - ALL Llama 3.x (3.0, 3.1, 3.2, 3.3) - BLOCKED - Cohere - BLOCKED (existing behavior) - Other models (xAI Grok, OpenAI, Mistral) - SUPPORTED Implementation: - Added _supports_parallel_tool_calls() helper method with regex version parsing - Updated bind_tools() to validate model version before enabling parallel calls - Provides clear error messages: "only available for Llama 4+ models" Unit tests added (8 tests, all mocked, no OCI connection): - test_version_filter_llama_3_0_blocked - test_version_filter_llama_3_1_blocked - test_version_filter_llama_3_2_blocked - test_version_filter_llama_3_3_blocked (Llama 3.3 doesn't support it either) - test_version_filter_llama_4_allowed - test_version_filter_other_models_allowed - test_version_filter_supports_parallel_tool_calls_method - Plus existing parallel tool calling tests updated to use Llama 4 --- .../chat_models/oci_generative_ai.py | 55 +++++++ .../chat_models/test_parallel_tool_calling.py | 145 +++++++++++++++++- 2 files changed, 193 insertions(+), 7 deletions(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index e535b85..63243e4 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -1216,6 +1216,49 @@ def _prepare_request( return request + def _supports_parallel_tool_calls(self, model_id: str) -> bool: + """Check if the model supports parallel tool calling. + + Parallel tool calling is supported for: + - Llama 4+ only (tested and verified) + - Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) + + Not supported for: + - All Llama 3.x versions (3.0, 3.1, 3.2, 3.3) + - Cohere models + + Args: + model_id: The model identifier (e.g., "meta.llama-4-maverick-17b-128e-instruct-fp8") + + Returns: + bool: True if model supports parallel tool calling, False otherwise + """ + import re + + # Extract provider from model_id (e.g., "meta" from "meta.llama-4-maverick-17b-128e-instruct-fp8") + provider = model_id.split(".")[0].lower() + + # Cohere models don't support parallel tool calling + if provider == "cohere": + return False + + # For Meta/Llama models, check version + if provider == "meta" and "llama" in model_id.lower(): + # Extract version number (e.g., "4" from "meta.llama-4-maverick-17b-128e-instruct-fp8") + version_match = re.search(r"llama-(\d+)", model_id.lower()) + if version_match: + major = int(version_match.group(1)) + + # Only Llama 4+ supports parallel tool calling + # Llama 3.x (including 3.3) does NOT support it based on testing + if major >= 4: + return True + + return False + + # Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) support it + return True + def bind_tools( self, tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], @@ -1267,6 +1310,18 @@ def bind_tools( else self.parallel_tool_calls ) if use_parallel: + # Validate model supports parallel tool calling + if not self._supports_parallel_tool_calls(self.model_id): + if "llama" in self.model_id.lower(): + raise ValueError( + f"Parallel tool calls are not supported for {self.model_id}. " + "This feature is only available for Llama 4+ models. " + "Llama 3.x models (including 3.3) do not support parallel tool calling." + ) + else: + raise ValueError( + f"Parallel tool calls are not supported for {self.model_id}." + ) kwargs["is_parallel_tool_calls"] = True return super().bind(tools=formatted_tools, **kwargs) diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py index f39f88d..d51d85c 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py +++ b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py @@ -11,7 +11,7 @@ def test_parallel_tool_calls_class_level(): """Test class-level parallel_tool_calls parameter.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-3.3-70b-instruct", + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", parallel_tool_calls=True, client=oci_gen_ai_client ) @@ -23,7 +23,7 @@ def test_parallel_tool_calls_default_false(): """Test that parallel_tool_calls defaults to False.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-3.3-70b-instruct", + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) assert llm.parallel_tool_calls is False @@ -34,7 +34,7 @@ def test_parallel_tool_calls_bind_tools_explicit_true(): """Test parallel_tool_calls=True in bind_tools.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-3.3-70b-instruct", + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) @@ -59,7 +59,7 @@ def test_parallel_tool_calls_bind_tools_explicit_false(): """Test parallel_tool_calls=False in bind_tools.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-3.3-70b-instruct", + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) @@ -81,7 +81,7 @@ def test_parallel_tool_calls_bind_tools_uses_class_default(): """Test that bind_tools uses class default when not specified.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-3.3-70b-instruct", + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", parallel_tool_calls=True, # Set class default client=oci_gen_ai_client ) @@ -102,7 +102,7 @@ def test_parallel_tool_calls_bind_tools_overrides_class_default(): """Test that bind_tools parameter overrides class default.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-3.3-70b-instruct", + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", parallel_tool_calls=True, # Set class default to True client=oci_gen_ai_client ) @@ -125,7 +125,7 @@ def test_parallel_tool_calls_passed_to_oci_api_meta(): oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-3.3-70b-instruct", + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) @@ -197,3 +197,134 @@ def tool1(x: int) -> int: stream=False, **llm_with_tools.kwargs ) + + +@pytest.mark.requires("oci") +def test_version_filter_llama_3_0_blocked(): + """Test that Llama 3.0 models are blocked from parallel tool calling.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3-70b-instruct", + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + # Should raise ValueError when trying to enable parallel tool calling + with pytest.raises(ValueError, match="Llama 4\\+"): + llm.bind_tools([tool1], parallel_tool_calls=True) + + +@pytest.mark.requires("oci") +def test_version_filter_llama_3_1_blocked(): + """Test that Llama 3.1 models are blocked from parallel tool calling.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.1-70b-instruct", + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + # Should raise ValueError + with pytest.raises(ValueError, match="Llama 4\\+"): + llm.bind_tools([tool1], parallel_tool_calls=True) + + +@pytest.mark.requires("oci") +def test_version_filter_llama_3_2_blocked(): + """Test that Llama 3.2 models are blocked from parallel tool calling.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.2-11b-vision-instruct", + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + # Should raise ValueError + with pytest.raises(ValueError, match="Llama 4\\+"): + llm.bind_tools([tool1], parallel_tool_calls=True) + + +@pytest.mark.requires("oci") +def test_version_filter_llama_3_3_blocked(): + """Test that Llama 3.3 models are blocked from parallel tool calling.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-3.3-70b-instruct", + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + # Should raise ValueError - Llama 3.3 doesn't actually support parallel calls + with pytest.raises(ValueError, match="Llama 4\\+"): + llm.bind_tools([tool1], parallel_tool_calls=True) + + +@pytest.mark.requires("oci") +def test_version_filter_llama_4_allowed(): + """Test that Llama 4 models are allowed parallel tool calling.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + # Should NOT raise ValueError + llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True) + assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True + + +@pytest.mark.requires("oci") +def test_version_filter_other_models_allowed(): + """Test that other GenericChatRequest models are allowed parallel tool calling.""" + oci_gen_ai_client = MagicMock() + + # Test with xAI Grok + llm_grok = ChatOCIGenAI( + model_id="xai.grok-4-fast", + client=oci_gen_ai_client + ) + + def tool1(x: int) -> int: + """Tool 1.""" + return x + 1 + + # Should NOT raise ValueError for Grok + llm_with_tools = llm_grok.bind_tools([tool1], parallel_tool_calls=True) + assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True + + +@pytest.mark.requires("oci") +def test_version_filter_supports_parallel_tool_calls_method(): + """Test the _supports_parallel_tool_calls method directly.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI( + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", + client=oci_gen_ai_client + ) + + # Test various model IDs + assert llm._supports_parallel_tool_calls("meta.llama-4-maverick-17b-128e-instruct-fp8") is True + assert llm._supports_parallel_tool_calls("meta.llama-3.3-70b-instruct") is False # Llama 3.3 NOT supported + assert llm._supports_parallel_tool_calls("meta.llama-3.2-11b-vision-instruct") is False + assert llm._supports_parallel_tool_calls("meta.llama-3.1-70b-instruct") is False + assert llm._supports_parallel_tool_calls("meta.llama-3-70b-instruct") is False + assert llm._supports_parallel_tool_calls("cohere.command-r-plus") is False + assert llm._supports_parallel_tool_calls("xai.grok-4-fast") is True + assert llm._supports_parallel_tool_calls("openai.gpt-4") is True + assert llm._supports_parallel_tool_calls("mistral.mistral-large") is True From 03a2d5c70d50925baaec642178bb75daa8b9f911 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 19 Nov 2025 12:10:35 -0500 Subject: [PATCH 06/20] Fix linting issues after rebase - Fix line length violations in chat_models and llms - Replace print statements with logging in integration tests - Fix import sorting and remove unused imports - Fix unused variable in test --- .../chat_models/oci_generative_ai.py | 15 +- .../langchain_oci/llms/oci_generative_ai.py | 3 +- .../test_parallel_tool_calling_integration.py | 142 +++++++++--------- 3 files changed, 86 insertions(+), 74 deletions(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index 63243e4..7ee5d57 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -1228,14 +1228,16 @@ def _supports_parallel_tool_calls(self, model_id: str) -> bool: - Cohere models Args: - model_id: The model identifier (e.g., "meta.llama-4-maverick-17b-128e-instruct-fp8") + model_id: The model identifier + (e.g., "meta.llama-4-maverick-17b-128e-instruct-fp8") Returns: bool: True if model supports parallel tool calling, False otherwise """ import re - # Extract provider from model_id (e.g., "meta" from "meta.llama-4-maverick-17b-128e-instruct-fp8") + # Extract provider from model_id + # (e.g., "meta" from "meta.llama-4-maverick-17b-128e-instruct-fp8") provider = model_id.split(".")[0].lower() # Cohere models don't support parallel tool calling @@ -1244,7 +1246,8 @@ def _supports_parallel_tool_calls(self, model_id: str) -> bool: # For Meta/Llama models, check version if provider == "meta" and "llama" in model_id.lower(): - # Extract version number (e.g., "4" from "meta.llama-4-maverick-17b-128e-instruct-fp8") + # Extract version number + # (e.g., "4" from "meta.llama-4-maverick-17b-128e-instruct-fp8") version_match = re.search(r"llama-(\d+)", model_id.lower()) if version_match: major = int(version_match.group(1)) @@ -1314,9 +1317,9 @@ def bind_tools( if not self._supports_parallel_tool_calls(self.model_id): if "llama" in self.model_id.lower(): raise ValueError( - f"Parallel tool calls are not supported for {self.model_id}. " - "This feature is only available for Llama 4+ models. " - "Llama 3.x models (including 3.3) do not support parallel tool calling." + f"Parallel tool calls not supported for {self.model_id}. " + "Only Llama 4+ models support this feature. " + "Llama 3.x (including 3.3) don't support parallel calls." ) else: raise ValueError( diff --git a/libs/oci/langchain_oci/llms/oci_generative_ai.py b/libs/oci/langchain_oci/llms/oci_generative_ai.py index 5533e34..ab6babb 100644 --- a/libs/oci/langchain_oci/llms/oci_generative_ai.py +++ b/libs/oci/langchain_oci/llms/oci_generative_ai.py @@ -123,7 +123,8 @@ class OCIGenAIBase(BaseModel, ABC): parallel_tool_calls: bool = False """Whether to enable parallel function calling during tool use. If True, the model can call multiple tools simultaneously. - Supported for all models using GenericChatRequest (Meta, Llama, xAI Grok, OpenAI, Mistral). + Supported for models using GenericChatRequest + (Meta, Llama, xAI Grok, OpenAI, Mistral). Not supported for Cohere models. Default: False for backward compatibility.""" diff --git a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py index 061211a..be2b825 100644 --- a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py +++ b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py @@ -14,14 +14,18 @@ python test_parallel_tool_calling_integration.py """ +import logging import os import sys import time -from typing import List from langchain_core.messages import HumanMessage + from langchain_oci.chat_models import ChatOCIGenAI +# Configure logging +logging.basicConfig(level=logging.INFO, format="%(message)s") + def get_weather(city: str, unit: str = "fahrenheit") -> str: """Get the current weather in a given location.""" @@ -54,9 +58,9 @@ def get_population(city: str) -> int: def test_parallel_tool_calling_enabled(): """Test parallel tool calling with parallel_tool_calls=True.""" - print("\n" + "=" * 80) - print("TEST 1: Parallel Tool Calling ENABLED") - print("=" * 80) + logging.info("\n" + "=" * 80) + logging.info("TEST 1: Parallel Tool Calling ENABLED") + logging.info("=" * 80) chat = ChatOCIGenAI( model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"), @@ -75,7 +79,7 @@ def test_parallel_tool_calling_enabled(): chat_with_tools = chat.bind_tools([get_weather, calculate_tip, get_population]) # Invoke with query that needs weather info - print("\nQuery: 'What's the weather in New York City?'") + logging.info("\nQuery: 'What's the weather in New York City?'") start_time = time.time() response = chat_with_tools.invoke([ @@ -83,31 +87,33 @@ def test_parallel_tool_calling_enabled(): ]) elapsed_time = time.time() - start_time - print(f"\nResponse time: {elapsed_time:.2f}s") - print(f"Response content: {response.content[:200] if response.content else '(empty)'}...") - print(f"Tool calls count: {len(response.tool_calls)}") + logging.info(f"\nResponse time: {elapsed_time:.2f}s") + content = response.content[:200] if response.content else "(empty)" + logging.info(f"Response content: {content}...") + logging.info(f"Tool calls count: {len(response.tool_calls)}") if response.tool_calls: - print("\nTool calls:") + logging.info("\nTool calls:") for i, tc in enumerate(response.tool_calls, 1): - print(f" {i}. {tc['name']}({tc['args']})") + logging.info(f" {i}. {tc['name']}({tc['args']})") else: - print("\n⚠️ No tool calls in response.tool_calls") - print(f"Additional kwargs: {response.additional_kwargs.keys()}") + logging.info("\n⚠️ No tool calls in response.tool_calls") + logging.info(f"Additional kwargs: {response.additional_kwargs.keys()}") # Verify we got tool calls - assert len(response.tool_calls) >= 1, f"Should have at least one tool call, got {len(response.tool_calls)}" + count = len(response.tool_calls) + assert count >= 1, f"Should have at least one tool call, got {count}" # Verify parallel_tool_calls was set - print("\n✓ TEST 1 PASSED: Parallel tool calling enabled and working") + logging.info("\n✓ TEST 1 PASSED: Parallel tool calling enabled and working") return elapsed_time def test_parallel_tool_calling_disabled(): """Test tool calling with parallel_tool_calls=False (sequential).""" - print("\n" + "=" * 80) - print("TEST 2: Parallel Tool Calling DISABLED (Sequential)") - print("=" * 80) + logging.info("\n" + "=" * 80) + logging.info("TEST 2: Parallel Tool Calling DISABLED (Sequential)") + logging.info("=" * 80) chat = ChatOCIGenAI( model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"), @@ -126,7 +132,7 @@ def test_parallel_tool_calling_disabled(): chat_with_tools = chat.bind_tools([get_weather, calculate_tip, get_population]) # Same query as test 1 - print("\nQuery: 'What's the weather in New York City?'") + logging.info("\nQuery: 'What's the weather in New York City?'") start_time = time.time() response = chat_with_tools.invoke([ @@ -134,27 +140,29 @@ def test_parallel_tool_calling_disabled(): ]) elapsed_time = time.time() - start_time - print(f"\nResponse time: {elapsed_time:.2f}s") - print(f"Response content: {response.content[:200] if response.content else '(empty)'}...") - print(f"Tool calls count: {len(response.tool_calls)}") + logging.info(f"\nResponse time: {elapsed_time:.2f}s") + content = response.content[:200] if response.content else "(empty)" + logging.info(f"Response content: {content}...") + logging.info(f"Tool calls count: {len(response.tool_calls)}") if response.tool_calls: - print("\nTool calls:") + logging.info("\nTool calls:") for i, tc in enumerate(response.tool_calls, 1): - print(f" {i}. {tc['name']}({tc['args']})") + logging.info(f" {i}. {tc['name']}({tc['args']})") # Verify we got tool calls - assert len(response.tool_calls) >= 1, f"Should have at least one tool call, got {len(response.tool_calls)}" + count = len(response.tool_calls) + assert count >= 1, f"Should have at least one tool call, got {count}" - print("\n✓ TEST 2 PASSED: Sequential tool calling works") + logging.info("\n✓ TEST 2 PASSED: Sequential tool calling works") return elapsed_time def test_bind_tools_override(): """Test that bind_tools can override class-level setting.""" - print("\n" + "=" * 80) - print("TEST 3: bind_tools Override of Class Setting") - print("=" * 80) + logging.info("\n" + "=" * 80) + logging.info("TEST 3: bind_tools Override of Class Setting") + logging.info("=" * 80) # Create chat with parallel_tool_calls=False at class level chat = ChatOCIGenAI( @@ -176,28 +184,28 @@ def test_bind_tools_override(): parallel_tool_calls=True # Override to enable ) - print("\nQuery: 'What's the weather and population of Tokyo?'") + logging.info("\nQuery: 'What's the weather and population of Tokyo?'") response = chat_with_tools.invoke([ HumanMessage(content="What's the weather and population of Tokyo?") ]) - print(f"\nResponse content: {response.content}") - print(f"Tool calls count: {len(response.tool_calls)}") + logging.info(f"\nResponse content: {response.content}") + logging.info(f"Tool calls count: {len(response.tool_calls)}") if response.tool_calls: - print("\nTool calls:") + logging.info("\nTool calls:") for i, tc in enumerate(response.tool_calls, 1): - print(f" {i}. {tc['name']}({tc['args']})") + logging.info(f" {i}. {tc['name']}({tc['args']})") - print("\n✓ TEST 3 PASSED: bind_tools override works") + logging.info("\n✓ TEST 3 PASSED: bind_tools override works") def test_cohere_model_error(): """Test that Cohere models raise an error with parallel_tool_calls.""" - print("\n" + "=" * 80) - print("TEST 4: Cohere Model Error Handling") - print("=" * 80) + logging.info("\n" + "=" * 80) + logging.info("TEST 4: Cohere Model Error Handling") + logging.info("=" * 80) chat = ChatOCIGenAI( model_id="cohere.command-r-plus", @@ -216,40 +224,40 @@ def test_cohere_model_error(): parallel_tool_calls=True ) - print("\nAttempting to use parallel_tool_calls with Cohere model...") + logging.info("\nAttempting to use parallel_tool_calls with Cohere model...") try: - response = chat_with_tools.invoke([ + _ = chat_with_tools.invoke([ HumanMessage(content="What's the weather in Paris?") ]) - print("❌ TEST FAILED: Should have raised ValueError") + logging.info("❌ TEST FAILED: Should have raised ValueError") return False except ValueError as e: if "not supported for Cohere" in str(e): - print(f"\n✓ Correctly raised error: {e}") - print("\n✓ TEST 4 PASSED: Cohere validation works") + logging.info(f"\n✓ Correctly raised error: {e}") + logging.info("\n✓ TEST 4 PASSED: Cohere validation works") return True else: - print(f"❌ Wrong error: {e}") + logging.info(f"❌ Wrong error: {e}") return False def main(): - print("=" * 80) - print("PARALLEL TOOL CALLING INTEGRATION TESTS") - print("=" * 80) + logging.info("=" * 80) + logging.info("PARALLEL TOOL CALLING INTEGRATION TESTS") + logging.info("=" * 80) # Check required env vars if not os.environ.get("OCI_COMPARTMENT_ID"): - print("\n❌ ERROR: OCI_COMPARTMENT_ID environment variable not set") - print("Please set: export OCI_COMPARTMENT_ID=") + logging.info("\n❌ ERROR: OCI_COMPARTMENT_ID environment variable not set") + logging.info("Please set: export OCI_COMPARTMENT_ID=") sys.exit(1) - print(f"\nUsing configuration:") - print(f" Model: {os.environ.get('OCI_MODEL_ID', 'meta.llama-3.3-70b-instruct')}") - print(f" Endpoint: {os.environ.get('OCI_GENAI_ENDPOINT', 'default')}") - print(f" Profile: {os.environ.get('OCI_CONFIG_PROFILE', 'DEFAULT')}") - print(f" Compartment: {os.environ.get('OCI_COMPARTMENT_ID')[:25]}...") + logging.info(f"\nUsing configuration:") + logging.info(f" Model: {os.environ.get('OCI_MODEL_ID', 'meta.llama-3.3-70b-instruct')}") + logging.info(f" Endpoint: {os.environ.get('OCI_GENAI_ENDPOINT', 'default')}") + logging.info(f" Profile: {os.environ.get('OCI_CONFIG_PROFILE', 'DEFAULT')}") + logging.info(f" Compartment: {os.environ.get('OCI_COMPARTMENT_ID')[:25]}...") results = [] @@ -268,39 +276,39 @@ def main(): results.append(("Cohere Validation", cohere_test)) # Print summary - print("\n" + "=" * 80) - print("TEST SUMMARY") - print("=" * 80) + logging.info("\n" + "=" * 80) + logging.info("TEST SUMMARY") + logging.info("=" * 80) for test_name, passed in results: status = "✓ PASSED" if passed else "✗ FAILED" - print(f"{status}: {test_name}") + logging.info(f"{status}: {test_name}") passed_count = sum(1 for _, passed in results if passed) total_count = len(results) - print(f"\nTotal: {passed_count}/{total_count} tests passed") + logging.info(f"\nTotal: {passed_count}/{total_count} tests passed") # Performance comparison if parallel_time and sequential_time: - print("\n" + "=" * 80) - print("PERFORMANCE COMPARISON") - print("=" * 80) - print(f"Parallel: {parallel_time:.2f}s") - print(f"Sequential: {sequential_time:.2f}s") + logging.info("\n" + "=" * 80) + logging.info("PERFORMANCE COMPARISON") + logging.info("=" * 80) + logging.info(f"Parallel: {parallel_time:.2f}s") + logging.info(f"Sequential: {sequential_time:.2f}s") if sequential_time > 0: speedup = sequential_time / parallel_time - print(f"Speedup: {speedup:.2f}×") + logging.info(f"Speedup: {speedup:.2f}×") if passed_count == total_count: - print("\n🎉 ALL TESTS PASSED!") + logging.info("\n🎉 ALL TESTS PASSED!") return 0 else: - print(f"\n⚠️ {total_count - passed_count} test(s) failed") + logging.info(f"\n⚠️ {total_count - passed_count} test(s) failed") return 1 except Exception as e: - print(f"\n❌ ERROR: {e}") + logging.info(f"\n❌ ERROR: {e}") import traceback traceback.print_exc() return 1 From 7708d9ef1d3b8eb624c95d650758bbec8368eace Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 19 Nov 2025 12:13:11 -0500 Subject: [PATCH 07/20] Fix remaining linting issues in test files --- .../test_parallel_tool_calling_integration.py | 11 +++++++---- .../chat_models/test_parallel_tool_calling.py | 14 ++++++++------ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py index be2b825..f19465b 100644 --- a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py +++ b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py @@ -253,10 +253,13 @@ def main(): logging.info("Please set: export OCI_COMPARTMENT_ID=") sys.exit(1) - logging.info(f"\nUsing configuration:") - logging.info(f" Model: {os.environ.get('OCI_MODEL_ID', 'meta.llama-3.3-70b-instruct')}") - logging.info(f" Endpoint: {os.environ.get('OCI_GENAI_ENDPOINT', 'default')}") - logging.info(f" Profile: {os.environ.get('OCI_CONFIG_PROFILE', 'DEFAULT')}") + logging.info("\nUsing configuration:") + model_id = os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct") + logging.info(f" Model: {model_id}") + endpoint = os.environ.get("OCI_GENAI_ENDPOINT", "default") + logging.info(f" Endpoint: {endpoint}") + profile = os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT") + logging.info(f" Profile: {profile}") logging.info(f" Compartment: {os.environ.get('OCI_COMPARTMENT_ID')[:25]}...") results = [] diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py index d51d85c..77ada69 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py +++ b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py @@ -1,8 +1,9 @@ """Unit tests for parallel tool calling feature.""" -import pytest from unittest.mock import MagicMock +import pytest from langchain_core.messages import HumanMessage + from langchain_oci.chat_models import ChatOCIGenAI @@ -121,8 +122,6 @@ def tool1(x: int) -> int: @pytest.mark.requires("oci") def test_parallel_tool_calls_passed_to_oci_api_meta(): """Test that is_parallel_tool_calls is passed to OCI API for Meta models.""" - from oci.generative_ai_inference import models - oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", @@ -319,9 +318,12 @@ def test_version_filter_supports_parallel_tool_calls_method(): ) # Test various model IDs - assert llm._supports_parallel_tool_calls("meta.llama-4-maverick-17b-128e-instruct-fp8") is True - assert llm._supports_parallel_tool_calls("meta.llama-3.3-70b-instruct") is False # Llama 3.3 NOT supported - assert llm._supports_parallel_tool_calls("meta.llama-3.2-11b-vision-instruct") is False + model_id = "meta.llama-4-maverick-17b-128e-instruct-fp8" + assert llm._supports_parallel_tool_calls(model_id) is True + # Llama 3.3 NOT supported + assert llm._supports_parallel_tool_calls("meta.llama-3.3-70b-instruct") is False + model_id = "meta.llama-3.2-11b-vision-instruct" + assert llm._supports_parallel_tool_calls(model_id) is False assert llm._supports_parallel_tool_calls("meta.llama-3.1-70b-instruct") is False assert llm._supports_parallel_tool_calls("meta.llama-3-70b-instruct") is False assert llm._supports_parallel_tool_calls("cohere.command-r-plus") is False From 1c2b0ef0e94f0de5bcb50ec4d9480595bb78bd50 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 19 Nov 2025 12:42:59 -0500 Subject: [PATCH 08/20] Move parallel tool call validation from bind_tools to provider - Validation now happens at request preparation time - Cohere validation remains in CohereProvider - Llama 3.x validation added to GenericProvider - Fixes failing unit tests --- .../chat_models/oci_generative_ai.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index 7ee5d57..82dc38a 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -861,6 +861,14 @@ def _should_allow_more_tool_calls( # Add parallel tool calls support (GenericChatRequest models) if "is_parallel_tool_calls" in kwargs: + # Validate Llama 3.x doesn't support parallel tool calls + model_id = self.llm.model_id + if "llama" in model_id.lower() and not self.llm._supports_parallel_tool_calls(model_id): + raise ValueError( + f"Parallel tool calls not supported for {model_id}. " + "Only Llama 4+ models support this feature. " + "Llama 3.x (including 3.3) don't support parallel calls." + ) result["is_parallel_tool_calls"] = kwargs["is_parallel_tool_calls"] return result @@ -1313,18 +1321,7 @@ def bind_tools( else self.parallel_tool_calls ) if use_parallel: - # Validate model supports parallel tool calling - if not self._supports_parallel_tool_calls(self.model_id): - if "llama" in self.model_id.lower(): - raise ValueError( - f"Parallel tool calls not supported for {self.model_id}. " - "Only Llama 4+ models support this feature. " - "Llama 3.x (including 3.3) don't support parallel calls." - ) - else: - raise ValueError( - f"Parallel tool calls are not supported for {self.model_id}." - ) + # Store the parameter; validation happens in provider kwargs["is_parallel_tool_calls"] = True return super().bind(tools=formatted_tools, **kwargs) From 8b59fff597e40e60d773b7d77185928285456842 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 19 Nov 2025 12:44:30 -0500 Subject: [PATCH 09/20] Add Llama 3.x validation at bind_tools time - Llama 3.x validation happens early at bind_tools time - Cohere validation happens at provider level (_prepare_request time) - All 16 parallel tool calling tests now pass --- .../chat_models/oci_generative_ai.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index 82dc38a..58ba8dc 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -861,14 +861,6 @@ def _should_allow_more_tool_calls( # Add parallel tool calls support (GenericChatRequest models) if "is_parallel_tool_calls" in kwargs: - # Validate Llama 3.x doesn't support parallel tool calls - model_id = self.llm.model_id - if "llama" in model_id.lower() and not self.llm._supports_parallel_tool_calls(model_id): - raise ValueError( - f"Parallel tool calls not supported for {model_id}. " - "Only Llama 4+ models support this feature. " - "Llama 3.x (including 3.3) don't support parallel calls." - ) result["is_parallel_tool_calls"] = kwargs["is_parallel_tool_calls"] return result @@ -1321,7 +1313,13 @@ def bind_tools( else self.parallel_tool_calls ) if use_parallel: - # Store the parameter; validation happens in provider + # Validate Llama 3.x doesn't support parallel tool calls (early check) + if "llama" in self.model_id.lower() and not self._supports_parallel_tool_calls(self.model_id): + raise ValueError( + f"Parallel tool calls not supported for {self.model_id}. " + "Only Llama 4+ models support this feature. " + "Llama 3.x (including 3.3) don't support parallel calls." + ) kwargs["is_parallel_tool_calls"] = True return super().bind(tools=formatted_tools, **kwargs) From 1ed506acc122ee39e1e4cbfd56f886fb1b0c8d4b Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Mon, 24 Nov 2025 16:47:53 -0500 Subject: [PATCH 10/20] Fix line length issue in bind_tools validation --- libs/oci/langchain_oci/chat_models/oci_generative_ai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index 58ba8dc..2d072fe 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -1314,7 +1314,8 @@ def bind_tools( ) if use_parallel: # Validate Llama 3.x doesn't support parallel tool calls (early check) - if "llama" in self.model_id.lower() and not self._supports_parallel_tool_calls(self.model_id): + is_llama = "llama" in self.model_id.lower() + if is_llama and not self._supports_parallel_tool_calls(self.model_id): raise ValueError( f"Parallel tool calls not supported for {self.model_id}. " "Only Llama 4+ models support this feature. " From 3bb4d01ea5e607631f90cffbb6d8b7c29b4d3f9a Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Tue, 25 Nov 2025 14:53:44 -0500 Subject: [PATCH 11/20] Apply ruff formatting to parallel tool calling tests --- .../test_parallel_tool_calling_integration.py | 32 ++++---- .../chat_models/test_parallel_tool_calling.py | 73 ++++++------------- 2 files changed, 38 insertions(+), 67 deletions(-) diff --git a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py index f19465b..56f7637 100644 --- a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py +++ b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py @@ -82,9 +82,9 @@ def test_parallel_tool_calling_enabled(): logging.info("\nQuery: 'What's the weather in New York City?'") start_time = time.time() - response = chat_with_tools.invoke([ - HumanMessage(content="What's the weather in New York City?") - ]) + response = chat_with_tools.invoke( + [HumanMessage(content="What's the weather in New York City?")] + ) elapsed_time = time.time() - start_time logging.info(f"\nResponse time: {elapsed_time:.2f}s") @@ -135,9 +135,9 @@ def test_parallel_tool_calling_disabled(): logging.info("\nQuery: 'What's the weather in New York City?'") start_time = time.time() - response = chat_with_tools.invoke([ - HumanMessage(content="What's the weather in New York City?") - ]) + response = chat_with_tools.invoke( + [HumanMessage(content="What's the weather in New York City?")] + ) elapsed_time = time.time() - start_time logging.info(f"\nResponse time: {elapsed_time:.2f}s") @@ -181,14 +181,14 @@ def test_bind_tools_override(): # Override with True in bind_tools chat_with_tools = chat.bind_tools( [get_weather, get_population], - parallel_tool_calls=True # Override to enable + parallel_tool_calls=True, # Override to enable ) logging.info("\nQuery: 'What's the weather and population of Tokyo?'") - response = chat_with_tools.invoke([ - HumanMessage(content="What's the weather and population of Tokyo?") - ]) + response = chat_with_tools.invoke( + [HumanMessage(content="What's the weather and population of Tokyo?")] + ) logging.info(f"\nResponse content: {response.content}") logging.info(f"Tool calls count: {len(response.tool_calls)}") @@ -219,17 +219,14 @@ def test_cohere_model_error(): ) # Try to enable parallel tool calls with Cohere (should fail) - chat_with_tools = chat.bind_tools( - [get_weather], - parallel_tool_calls=True - ) + chat_with_tools = chat.bind_tools([get_weather], parallel_tool_calls=True) logging.info("\nAttempting to use parallel_tool_calls with Cohere model...") try: - _ = chat_with_tools.invoke([ - HumanMessage(content="What's the weather in Paris?") - ]) + _ = chat_with_tools.invoke( + [HumanMessage(content="What's the weather in Paris?")] + ) logging.info("❌ TEST FAILED: Should have raised ValueError") return False except ValueError as e: @@ -313,6 +310,7 @@ def main(): except Exception as e: logging.info(f"\n❌ ERROR: {e}") import traceback + traceback.print_exc() return 1 diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py index 77ada69..8759683 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py +++ b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py @@ -1,4 +1,5 @@ """Unit tests for parallel tool calling feature.""" + from unittest.mock import MagicMock import pytest @@ -14,7 +15,7 @@ def test_parallel_tool_calls_class_level(): llm = ChatOCIGenAI( model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", parallel_tool_calls=True, - client=oci_gen_ai_client + client=oci_gen_ai_client, ) assert llm.parallel_tool_calls is True @@ -24,8 +25,7 @@ def test_parallel_tool_calls_default_false(): """Test that parallel_tool_calls defaults to False.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", - client=oci_gen_ai_client + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) assert llm.parallel_tool_calls is False @@ -35,8 +35,7 @@ def test_parallel_tool_calls_bind_tools_explicit_true(): """Test parallel_tool_calls=True in bind_tools.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", - client=oci_gen_ai_client + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) def tool1(x: int) -> int: @@ -47,10 +46,7 @@ def tool2(x: int) -> int: """Tool 2.""" return x * 2 - llm_with_tools = llm.bind_tools( - [tool1, tool2], - parallel_tool_calls=True - ) + llm_with_tools = llm.bind_tools([tool1, tool2], parallel_tool_calls=True) assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True @@ -60,18 +56,14 @@ def test_parallel_tool_calls_bind_tools_explicit_false(): """Test parallel_tool_calls=False in bind_tools.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", - client=oci_gen_ai_client + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) def tool1(x: int) -> int: """Tool 1.""" return x + 1 - llm_with_tools = llm.bind_tools( - [tool1], - parallel_tool_calls=False - ) + llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=False) # When explicitly False, should not set the parameter assert "is_parallel_tool_calls" not in llm_with_tools.kwargs @@ -84,7 +76,7 @@ def test_parallel_tool_calls_bind_tools_uses_class_default(): llm = ChatOCIGenAI( model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", parallel_tool_calls=True, # Set class default - client=oci_gen_ai_client + client=oci_gen_ai_client, ) def tool1(x: int) -> int: @@ -105,7 +97,7 @@ def test_parallel_tool_calls_bind_tools_overrides_class_default(): llm = ChatOCIGenAI( model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", parallel_tool_calls=True, # Set class default to True - client=oci_gen_ai_client + client=oci_gen_ai_client, ) def tool1(x: int) -> int: @@ -124,8 +116,7 @@ def test_parallel_tool_calls_passed_to_oci_api_meta(): """Test that is_parallel_tool_calls is passed to OCI API for Meta models.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", - client=oci_gen_ai_client + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) def get_weather(city: str) -> str: @@ -139,11 +130,11 @@ def get_weather(city: str) -> str: [HumanMessage(content="What's the weather?")], stop=None, stream=False, - **llm_with_tools.kwargs + **llm_with_tools.kwargs, ) # Verify is_parallel_tool_calls is in the request - assert hasattr(request.chat_request, 'is_parallel_tool_calls') + assert hasattr(request.chat_request, "is_parallel_tool_calls") assert request.chat_request.is_parallel_tool_calls is True @@ -151,10 +142,7 @@ def get_weather(city: str) -> str: def test_parallel_tool_calls_cohere_raises_error(): """Test that Cohere models raise error for parallel tool calls.""" oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI( - model_id="cohere.command-r-plus", - client=oci_gen_ai_client - ) + llm = ChatOCIGenAI(model_id="cohere.command-r-plus", client=oci_gen_ai_client) def tool1(x: int) -> int: """Tool 1.""" @@ -168,7 +156,7 @@ def tool1(x: int) -> int: [HumanMessage(content="test")], stop=None, stream=False, - **llm_with_tools.kwargs + **llm_with_tools.kwargs, ) @@ -179,7 +167,7 @@ def test_parallel_tool_calls_cohere_class_level_raises_error(): llm = ChatOCIGenAI( model_id="cohere.command-r-plus", parallel_tool_calls=True, # Set at class level - client=oci_gen_ai_client + client=oci_gen_ai_client, ) def tool1(x: int) -> int: @@ -194,7 +182,7 @@ def tool1(x: int) -> int: [HumanMessage(content="test")], stop=None, stream=False, - **llm_with_tools.kwargs + **llm_with_tools.kwargs, ) @@ -202,10 +190,7 @@ def tool1(x: int) -> int: def test_version_filter_llama_3_0_blocked(): """Test that Llama 3.0 models are blocked from parallel tool calling.""" oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI( - model_id="meta.llama-3-70b-instruct", - client=oci_gen_ai_client - ) + llm = ChatOCIGenAI(model_id="meta.llama-3-70b-instruct", client=oci_gen_ai_client) def tool1(x: int) -> int: """Tool 1.""" @@ -220,10 +205,7 @@ def tool1(x: int) -> int: def test_version_filter_llama_3_1_blocked(): """Test that Llama 3.1 models are blocked from parallel tool calling.""" oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI( - model_id="meta.llama-3.1-70b-instruct", - client=oci_gen_ai_client - ) + llm = ChatOCIGenAI(model_id="meta.llama-3.1-70b-instruct", client=oci_gen_ai_client) def tool1(x: int) -> int: """Tool 1.""" @@ -239,8 +221,7 @@ def test_version_filter_llama_3_2_blocked(): """Test that Llama 3.2 models are blocked from parallel tool calling.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-3.2-11b-vision-instruct", - client=oci_gen_ai_client + model_id="meta.llama-3.2-11b-vision-instruct", client=oci_gen_ai_client ) def tool1(x: int) -> int: @@ -256,10 +237,7 @@ def tool1(x: int) -> int: def test_version_filter_llama_3_3_blocked(): """Test that Llama 3.3 models are blocked from parallel tool calling.""" oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI( - model_id="meta.llama-3.3-70b-instruct", - client=oci_gen_ai_client - ) + llm = ChatOCIGenAI(model_id="meta.llama-3.3-70b-instruct", client=oci_gen_ai_client) def tool1(x: int) -> int: """Tool 1.""" @@ -275,8 +253,7 @@ def test_version_filter_llama_4_allowed(): """Test that Llama 4 models are allowed parallel tool calling.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", - client=oci_gen_ai_client + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) def tool1(x: int) -> int: @@ -294,10 +271,7 @@ def test_version_filter_other_models_allowed(): oci_gen_ai_client = MagicMock() # Test with xAI Grok - llm_grok = ChatOCIGenAI( - model_id="xai.grok-4-fast", - client=oci_gen_ai_client - ) + llm_grok = ChatOCIGenAI(model_id="xai.grok-4-fast", client=oci_gen_ai_client) def tool1(x: int) -> int: """Tool 1.""" @@ -313,8 +287,7 @@ def test_version_filter_supports_parallel_tool_calls_method(): """Test the _supports_parallel_tool_calls method directly.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", - client=oci_gen_ai_client + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) # Test various model IDs From 719516fd0465fdbe8b78b50d7cbdd7adf9133d67 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Tue, 25 Nov 2025 16:19:23 -0500 Subject: [PATCH 12/20] Move parallel_tool_calls to bind_tools only (remove class-level param) --- .../chat_models/oci_generative_ai.py | 17 ++-- .../langchain_oci/llms/oci_generative_ai.py | 8 -- .../chat_models/test_parallel_tool_calling.py | 79 +------------------ 3 files changed, 9 insertions(+), 95 deletions(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index 2d072fe..0295b25 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -1292,10 +1292,9 @@ def bind_tools( - False or None: no effect, default Meta behavior. parallel_tool_calls: Whether to enable parallel function calling. If True, the model can call multiple tools simultaneously. - If False, tools are called sequentially. - If None (default), uses the class-level parallel_tool_calls setting. - Supported for models using GenericChatRequest (Meta, Llama, xAI Grok, - OpenAI, Mistral). Not supported for Cohere models. + If False or None (default), tools are called sequentially. + Supported for models using GenericChatRequest (Meta Llama 4+, xAI Grok, + OpenAI, Mistral). Not supported for Cohere models or Llama 3.x. kwargs: Any additional parameters are passed directly to :meth:`~langchain_oci.chat_models.oci_generative_ai.ChatOCIGenAI.bind`. """ @@ -1305,14 +1304,8 @@ def bind_tools( if tool_choice is not None: kwargs["tool_choice"] = self._provider.process_tool_choice(tool_choice) - # Add parallel tool calls support - # Use bind-time parameter if provided, else fall back to class default - use_parallel = ( - parallel_tool_calls - if parallel_tool_calls is not None - else self.parallel_tool_calls - ) - if use_parallel: + # Add parallel tool calls support (only when explicitly enabled) + if parallel_tool_calls: # Validate Llama 3.x doesn't support parallel tool calls (early check) is_llama = "llama" in self.model_id.lower() if is_llama and not self._supports_parallel_tool_calls(self.model_id): diff --git a/libs/oci/langchain_oci/llms/oci_generative_ai.py b/libs/oci/langchain_oci/llms/oci_generative_ai.py index ab6babb..e5843c4 100644 --- a/libs/oci/langchain_oci/llms/oci_generative_ai.py +++ b/libs/oci/langchain_oci/llms/oci_generative_ai.py @@ -120,14 +120,6 @@ class OCIGenAIBase(BaseModel, ABC): """Maximum tool calls before forcing final answer. Prevents infinite loops while allowing multi-step orchestration.""" - parallel_tool_calls: bool = False - """Whether to enable parallel function calling during tool use. - If True, the model can call multiple tools simultaneously. - Supported for models using GenericChatRequest - (Meta, Llama, xAI Grok, OpenAI, Mistral). - Not supported for Cohere models. - Default: False for backward compatibility.""" - model_config = ConfigDict( extra="forbid", arbitrary_types_allowed=True, protected_namespaces=() ) diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py index 8759683..b796560 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py +++ b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py @@ -8,28 +8,6 @@ from langchain_oci.chat_models import ChatOCIGenAI -@pytest.mark.requires("oci") -def test_parallel_tool_calls_class_level(): - """Test class-level parallel_tool_calls parameter.""" - oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI( - model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", - parallel_tool_calls=True, - client=oci_gen_ai_client, - ) - assert llm.parallel_tool_calls is True - - -@pytest.mark.requires("oci") -def test_parallel_tool_calls_default_false(): - """Test that parallel_tool_calls defaults to False.""" - oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI( - model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client - ) - assert llm.parallel_tool_calls is False - - @pytest.mark.requires("oci") def test_parallel_tool_calls_bind_tools_explicit_true(): """Test parallel_tool_calls=True in bind_tools.""" @@ -70,13 +48,11 @@ def tool1(x: int) -> int: @pytest.mark.requires("oci") -def test_parallel_tool_calls_bind_tools_uses_class_default(): - """Test that bind_tools uses class default when not specified.""" +def test_parallel_tool_calls_bind_tools_default_none(): + """Test that bind_tools without parallel_tool_calls doesn't enable it.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( - model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", - parallel_tool_calls=True, # Set class default - client=oci_gen_ai_client, + model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) def tool1(x: int) -> int: @@ -86,28 +62,7 @@ def tool1(x: int) -> int: # Don't specify parallel_tool_calls in bind_tools llm_with_tools = llm.bind_tools([tool1]) - # Should use class default (True) - assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True - - -@pytest.mark.requires("oci") -def test_parallel_tool_calls_bind_tools_overrides_class_default(): - """Test that bind_tools parameter overrides class default.""" - oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI( - model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", - parallel_tool_calls=True, # Set class default to True - client=oci_gen_ai_client, - ) - - def tool1(x: int) -> int: - """Tool 1.""" - return x + 1 - - # Override with False in bind_tools - llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=False) - - # Should not set the parameter when explicitly False + # Should not have is_parallel_tool_calls set assert "is_parallel_tool_calls" not in llm_with_tools.kwargs @@ -160,32 +115,6 @@ def tool1(x: int) -> int: ) -@pytest.mark.requires("oci") -def test_parallel_tool_calls_cohere_class_level_raises_error(): - """Test that Cohere models with class-level parallel_tool_calls raise error.""" - oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI( - model_id="cohere.command-r-plus", - parallel_tool_calls=True, # Set at class level - client=oci_gen_ai_client, - ) - - def tool1(x: int) -> int: - """Tool 1.""" - return x + 1 - - llm_with_tools = llm.bind_tools([tool1]) # Uses class default - - # Should raise ValueError when trying to prepare request - with pytest.raises(ValueError, match="not supported for Cohere"): - llm_with_tools._prepare_request( - [HumanMessage(content="test")], - stop=None, - stream=False, - **llm_with_tools.kwargs, - ) - - @pytest.mark.requires("oci") def test_version_filter_llama_3_0_blocked(): """Test that Llama 3.0 models are blocked from parallel tool calling.""" From b4bf03dcfdc29a5c549e22c9ad42faee158ea54f Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 26 Nov 2025 07:47:27 -0500 Subject: [PATCH 13/20] Update integration tests for bind_tools-only parallel_tool_calls --- .../test_parallel_tool_calling_integration.py | 49 ++++++++++--------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py index 56f7637..ebbbbcb 100644 --- a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py +++ b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py @@ -57,13 +57,15 @@ def get_population(city: str) -> int: def test_parallel_tool_calling_enabled(): - """Test parallel tool calling with parallel_tool_calls=True.""" + """Test parallel tool calling with parallel_tool_calls=True in bind_tools.""" logging.info("\n" + "=" * 80) - logging.info("TEST 1: Parallel Tool Calling ENABLED") + logging.info("TEST 1: Parallel Tool Calling ENABLED (via bind_tools)") logging.info("=" * 80) chat = ChatOCIGenAI( - model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"), + model_id=os.environ.get( + "OCI_MODEL_ID", "meta.llama-4-maverick-17b-128e-instruct-fp8" + ), service_endpoint=os.environ.get( "OCI_GENAI_ENDPOINT", "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", @@ -72,11 +74,12 @@ def test_parallel_tool_calling_enabled(): auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"), auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"), model_kwargs={"temperature": 0, "max_tokens": 500}, - parallel_tool_calls=True, # Enable parallel calling ) - # Bind tools - chat_with_tools = chat.bind_tools([get_weather, calculate_tip, get_population]) + # Bind tools with parallel_tool_calls=True + chat_with_tools = chat.bind_tools( + [get_weather, calculate_tip, get_population], parallel_tool_calls=True + ) # Invoke with query that needs weather info logging.info("\nQuery: 'What's the weather in New York City?'") @@ -116,7 +119,9 @@ def test_parallel_tool_calling_disabled(): logging.info("=" * 80) chat = ChatOCIGenAI( - model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"), + model_id=os.environ.get( + "OCI_MODEL_ID", "meta.llama-4-maverick-17b-128e-instruct-fp8" + ), service_endpoint=os.environ.get( "OCI_GENAI_ENDPOINT", "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", @@ -125,10 +130,9 @@ def test_parallel_tool_calling_disabled(): auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"), auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"), model_kwargs={"temperature": 0, "max_tokens": 500}, - parallel_tool_calls=False, # Disable parallel calling (default) ) - # Bind tools + # Bind tools without parallel_tool_calls (defaults to sequential) chat_with_tools = chat.bind_tools([get_weather, calculate_tip, get_population]) # Same query as test 1 @@ -158,15 +162,16 @@ def test_parallel_tool_calling_disabled(): return elapsed_time -def test_bind_tools_override(): - """Test that bind_tools can override class-level setting.""" +def test_multiple_tool_calls(): + """Test query that should trigger multiple tool calls.""" logging.info("\n" + "=" * 80) - logging.info("TEST 3: bind_tools Override of Class Setting") + logging.info("TEST 3: Multiple Tool Calls Query") logging.info("=" * 80) - # Create chat with parallel_tool_calls=False at class level chat = ChatOCIGenAI( - model_id=os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct"), + model_id=os.environ.get( + "OCI_MODEL_ID", "meta.llama-4-maverick-17b-128e-instruct-fp8" + ), service_endpoint=os.environ.get( "OCI_GENAI_ENDPOINT", "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", @@ -175,13 +180,11 @@ def test_bind_tools_override(): auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"), auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"), model_kwargs={"temperature": 0, "max_tokens": 500}, - parallel_tool_calls=False, # Class default: disabled ) - # Override with True in bind_tools + # Bind tools with parallel_tool_calls=True chat_with_tools = chat.bind_tools( - [get_weather, get_population], - parallel_tool_calls=True, # Override to enable + [get_weather, get_population], parallel_tool_calls=True ) logging.info("\nQuery: 'What's the weather and population of Tokyo?'") @@ -198,7 +201,7 @@ def test_bind_tools_override(): for i, tc in enumerate(response.tool_calls, 1): logging.info(f" {i}. {tc['name']}({tc['args']})") - logging.info("\n✓ TEST 3 PASSED: bind_tools override works") + logging.info("\n✓ TEST 3 PASSED: Multiple tool calls query works") def test_cohere_model_error(): @@ -251,7 +254,9 @@ def main(): sys.exit(1) logging.info("\nUsing configuration:") - model_id = os.environ.get("OCI_MODEL_ID", "meta.llama-3.3-70b-instruct") + model_id = os.environ.get( + "OCI_MODEL_ID", "meta.llama-4-maverick-17b-128e-instruct-fp8" + ) logging.info(f" Model: {model_id}") endpoint = os.environ.get("OCI_GENAI_ENDPOINT", "default") logging.info(f" Endpoint: {endpoint}") @@ -269,8 +274,8 @@ def main(): sequential_time = test_parallel_tool_calling_disabled() results.append(("Sequential (Disabled)", True)) - test_bind_tools_override() - results.append(("bind_tools Override", True)) + test_multiple_tool_calls() + results.append(("Multiple Tool Calls", True)) cohere_test = test_cohere_model_error() results.append(("Cohere Validation", cohere_test)) From a1fde233bbeff1a8d1a2a8944fe39769f9f8e664 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 26 Nov 2025 07:51:22 -0500 Subject: [PATCH 14/20] Fix README to show bind_tools-only parallel_tool_calls usage --- libs/oci/README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/libs/oci/README.md b/libs/oci/README.md index 1eb7859..7bfcd9b 100644 --- a/libs/oci/README.md +++ b/libs/oci/README.md @@ -132,16 +132,13 @@ Enable parallel tool calling to execute multiple tools simultaneously, improving ```python from langchain_oci import ChatOCIGenAI -# Option 1: Set at class level for all tool bindings llm = ChatOCIGenAI( model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", compartment_id="MY_COMPARTMENT_ID", - parallel_tool_calls=True # Enable parallel tool calling ) -# Option 2: Set per-binding -llm = ChatOCIGenAI(model_id="meta.llama-4-maverick-17b-128e-instruct-fp8") +# Enable parallel tool calling in bind_tools llm_with_tools = llm.bind_tools( [get_weather, calculate_tip, get_population], parallel_tool_calls=True # Tools can execute simultaneously From 296d719eef7c3a7c85a25d5ae06973fd098c7423 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 26 Nov 2025 16:36:58 -0500 Subject: [PATCH 15/20] Fix mypy type errors for LangChain 1.x compatibility - Add type: ignore[override] to bind_tools methods in oci_data_science.py and oci_generative_ai.py to handle signature incompatibility with BaseChatModel parent class - Remove unused type: ignore comments in oci_generative_ai.py - Add type: ignore[attr-defined] comments for RunnableBinding runtime attributes (kwargs, _prepare_request) in test_parallel_tool_calling.py - Fix test_parallel_tool_calling_integration.py to use getattr for tool_calls attribute access on BaseMessage - Fix test_tool_calling.py: import StructuredTool from langchain_core.tools - Fix test_oci_data_science.py: remove unused type: ignore comment - Fix test_oci_generative_ai_responses_api.py: add type: ignore for LangGraph invoke arg type --- .../chat_models/oci_data_science.py | 2 +- .../chat_models/oci_generative_ai.py | 13 ++++---- .../test_parallel_tool_calling_integration.py | 31 ++++++++++++------- .../chat_models/test_tool_calling.py | 4 +-- .../chat_models/test_oci_data_science.py | 2 +- .../test_oci_generative_ai_responses_api.py | 2 +- .../chat_models/test_parallel_tool_calling.py | 25 +++++++++------ 7 files changed, 47 insertions(+), 32 deletions(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_data_science.py b/libs/oci/langchain_oci/chat_models/oci_data_science.py index 364d8b4..c1fd7b2 100644 --- a/libs/oci/langchain_oci/chat_models/oci_data_science.py +++ b/libs/oci/langchain_oci/chat_models/oci_data_science.py @@ -763,7 +763,7 @@ def _process_response(self, response_json: dict) -> ChatResult: } return ChatResult(generations=generations, llm_output=llm_output) - def bind_tools( + def bind_tools( # type: ignore[override] self, tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], **kwargs: Any, diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index 0295b25..ed53206 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -943,8 +943,8 @@ def convert_to_oci_tool( "required": parameters.get("required", []), }, ) - elif isinstance(tool, BaseTool): # type: ignore[unreachable] - return self.oci_function_definition( # type: ignore[unreachable] + elif isinstance(tool, BaseTool): + return self.oci_function_definition( name=tool.name, description=OCIUtils.remove_signature_from_tool_description( tool.name, tool.description @@ -1262,7 +1262,7 @@ def _supports_parallel_tool_calls(self, model_id: str) -> bool: # Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) support it return True - def bind_tools( + def bind_tools( # type: ignore[override] self, tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], *, @@ -1307,10 +1307,11 @@ def bind_tools( # Add parallel tool calls support (only when explicitly enabled) if parallel_tool_calls: # Validate Llama 3.x doesn't support parallel tool calls (early check) - is_llama = "llama" in self.model_id.lower() - if is_llama and not self._supports_parallel_tool_calls(self.model_id): + model_id = self.model_id or "" + is_llama = "llama" in model_id.lower() + if is_llama and not self._supports_parallel_tool_calls(model_id): raise ValueError( - f"Parallel tool calls not supported for {self.model_id}. " + f"Parallel tool calls not supported for {model_id}. " "Only Llama 4+ models support this feature. " "Llama 3.x (including 3.3) don't support parallel calls." ) diff --git a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py index ebbbbcb..bfd606f 100644 --- a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py +++ b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py @@ -93,18 +93,20 @@ def test_parallel_tool_calling_enabled(): logging.info(f"\nResponse time: {elapsed_time:.2f}s") content = response.content[:200] if response.content else "(empty)" logging.info(f"Response content: {content}...") - logging.info(f"Tool calls count: {len(response.tool_calls)}") + # AIMessage has tool_calls attribute at runtime + tool_calls = getattr(response, "tool_calls", []) + logging.info(f"Tool calls count: {len(tool_calls)}") - if response.tool_calls: + if tool_calls: logging.info("\nTool calls:") - for i, tc in enumerate(response.tool_calls, 1): + for i, tc in enumerate(tool_calls, 1): logging.info(f" {i}. {tc['name']}({tc['args']})") else: logging.info("\n⚠️ No tool calls in response.tool_calls") logging.info(f"Additional kwargs: {response.additional_kwargs.keys()}") # Verify we got tool calls - count = len(response.tool_calls) + count = len(tool_calls) assert count >= 1, f"Should have at least one tool call, got {count}" # Verify parallel_tool_calls was set @@ -147,15 +149,17 @@ def test_parallel_tool_calling_disabled(): logging.info(f"\nResponse time: {elapsed_time:.2f}s") content = response.content[:200] if response.content else "(empty)" logging.info(f"Response content: {content}...") - logging.info(f"Tool calls count: {len(response.tool_calls)}") + # AIMessage has tool_calls attribute at runtime + tool_calls = getattr(response, "tool_calls", []) + logging.info(f"Tool calls count: {len(tool_calls)}") - if response.tool_calls: + if tool_calls: logging.info("\nTool calls:") - for i, tc in enumerate(response.tool_calls, 1): + for i, tc in enumerate(tool_calls, 1): logging.info(f" {i}. {tc['name']}({tc['args']})") # Verify we got tool calls - count = len(response.tool_calls) + count = len(tool_calls) assert count >= 1, f"Should have at least one tool call, got {count}" logging.info("\n✓ TEST 2 PASSED: Sequential tool calling works") @@ -194,11 +198,13 @@ def test_multiple_tool_calls(): ) logging.info(f"\nResponse content: {response.content}") - logging.info(f"Tool calls count: {len(response.tool_calls)}") + # AIMessage has tool_calls attribute at runtime + tool_calls = getattr(response, "tool_calls", []) + logging.info(f"Tool calls count: {len(tool_calls)}") - if response.tool_calls: + if tool_calls: logging.info("\nTool calls:") - for i, tc in enumerate(response.tool_calls, 1): + for i, tc in enumerate(tool_calls, 1): logging.info(f" {i}. {tc['name']}({tc['args']})") logging.info("\n✓ TEST 3 PASSED: Multiple tool calls query works") @@ -262,7 +268,8 @@ def main(): logging.info(f" Endpoint: {endpoint}") profile = os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT") logging.info(f" Profile: {profile}") - logging.info(f" Compartment: {os.environ.get('OCI_COMPARTMENT_ID')[:25]}...") + compartment_id = os.environ.get("OCI_COMPARTMENT_ID", "") + logging.info(f" Compartment: {compartment_id[:25]}...") results = [] diff --git a/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py b/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py index 9c07763..283bf0a 100644 --- a/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py +++ b/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py @@ -53,8 +53,8 @@ import os import pytest -from langchain.tools import StructuredTool from langchain_core.messages import HumanMessage, SystemMessage +from langchain_core.tools import StructuredTool from langgraph.graph import END, START, MessagesState, StateGraph from langgraph.prebuilt import ToolNode @@ -428,7 +428,7 @@ def should_continue(state: MessagesState): # Invoke agent with a diagnostic scenario result = agent.invoke( - { + { # type: ignore[arg-type] "messages": [ SystemMessage(content=system_prompt), HumanMessage( diff --git a/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py b/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py index 68b7e7a..e6a691e 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py +++ b/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py @@ -152,7 +152,7 @@ def test_stream_vllm(*args: Any) -> None: if output is None: output = chunk else: - output += chunk # type: ignore[assignment] + output += chunk count += 1 assert count == 5 assert output is not None diff --git a/libs/oci/tests/unit_tests/chat_models/test_oci_generative_ai_responses_api.py b/libs/oci/tests/unit_tests/chat_models/test_oci_generative_ai_responses_api.py index 62d63ce..2e95db6 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_oci_generative_ai_responses_api.py +++ b/libs/oci/tests/unit_tests/chat_models/test_oci_generative_ai_responses_api.py @@ -385,7 +385,7 @@ def call_model(state: AgentState): # ---- Act ---- app = workflow.compile() input_message = HumanMessage(content="What is the capital of France?") - result = app.invoke({"messages": [input_message]}) + result = app.invoke({"messages": [input_message]}) # type: ignore[arg-type] # ---- Assert ---- content = result["messages"][1].content[0] diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py index b796560..c5bcb35 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py +++ b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py @@ -26,7 +26,8 @@ def tool2(x: int) -> int: llm_with_tools = llm.bind_tools([tool1, tool2], parallel_tool_calls=True) - assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True + # RunnableBinding has kwargs attribute at runtime + assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True # type: ignore[attr-defined] @pytest.mark.requires("oci") @@ -44,7 +45,8 @@ def tool1(x: int) -> int: llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=False) # When explicitly False, should not set the parameter - assert "is_parallel_tool_calls" not in llm_with_tools.kwargs + # RunnableBinding has kwargs attribute at runtime + assert "is_parallel_tool_calls" not in llm_with_tools.kwargs # type: ignore[attr-defined] @pytest.mark.requires("oci") @@ -63,7 +65,8 @@ def tool1(x: int) -> int: llm_with_tools = llm.bind_tools([tool1]) # Should not have is_parallel_tool_calls set - assert "is_parallel_tool_calls" not in llm_with_tools.kwargs + # RunnableBinding has kwargs attribute at runtime + assert "is_parallel_tool_calls" not in llm_with_tools.kwargs # type: ignore[attr-defined] @pytest.mark.requires("oci") @@ -81,11 +84,12 @@ def get_weather(city: str) -> str: llm_with_tools = llm.bind_tools([get_weather], parallel_tool_calls=True) # Prepare a request - request = llm_with_tools._prepare_request( + # RunnableBinding has _prepare_request and kwargs attributes at runtime + request = llm_with_tools._prepare_request( # type: ignore[attr-defined] [HumanMessage(content="What's the weather?")], stop=None, stream=False, - **llm_with_tools.kwargs, + **llm_with_tools.kwargs, # type: ignore[attr-defined] ) # Verify is_parallel_tool_calls is in the request @@ -106,12 +110,13 @@ def tool1(x: int) -> int: llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True) # Should raise ValueError when trying to prepare request + # RunnableBinding has _prepare_request and kwargs attributes at runtime with pytest.raises(ValueError, match="not supported for Cohere"): - llm_with_tools._prepare_request( + llm_with_tools._prepare_request( # type: ignore[attr-defined] [HumanMessage(content="test")], stop=None, stream=False, - **llm_with_tools.kwargs, + **llm_with_tools.kwargs, # type: ignore[attr-defined] ) @@ -191,7 +196,8 @@ def tool1(x: int) -> int: # Should NOT raise ValueError llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True) - assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True + # RunnableBinding has kwargs attribute at runtime + assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True # type: ignore[attr-defined] @pytest.mark.requires("oci") @@ -208,7 +214,8 @@ def tool1(x: int) -> int: # Should NOT raise ValueError for Grok llm_with_tools = llm_grok.bind_tools([tool1], parallel_tool_calls=True) - assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True + # RunnableBinding has kwargs attribute at runtime + assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True # type: ignore[attr-defined] @pytest.mark.requires("oci") From 027be6a2e710552567e1ac23c7bd8b57097c0a7c Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 26 Nov 2025 16:41:04 -0500 Subject: [PATCH 16/20] Fix mypy errors for CI environment compatibility - Add type: ignore[unreachable] back to BaseTool isinstance check in oci_generative_ai.py (CI mypy flags this as unreachable) - Remove type: ignore[override] from bind_tools (CI reports unused) - Fix test_oci_data_science.py: explicitly type output variable and use explicit addition instead of += to avoid assignment type error - Remove unused type: ignore comments from test files --- libs/oci/langchain_oci/chat_models/oci_data_science.py | 2 +- libs/oci/langchain_oci/chat_models/oci_generative_ai.py | 6 +++--- .../integration_tests/chat_models/test_tool_calling.py | 2 +- .../tests/unit_tests/chat_models/test_oci_data_science.py | 4 ++-- .../chat_models/test_oci_generative_ai_responses_api.py | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_data_science.py b/libs/oci/langchain_oci/chat_models/oci_data_science.py index c1fd7b2..364d8b4 100644 --- a/libs/oci/langchain_oci/chat_models/oci_data_science.py +++ b/libs/oci/langchain_oci/chat_models/oci_data_science.py @@ -763,7 +763,7 @@ def _process_response(self, response_json: dict) -> ChatResult: } return ChatResult(generations=generations, llm_output=llm_output) - def bind_tools( # type: ignore[override] + def bind_tools( self, tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], **kwargs: Any, diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index ed53206..f866874 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -943,8 +943,8 @@ def convert_to_oci_tool( "required": parameters.get("required", []), }, ) - elif isinstance(tool, BaseTool): - return self.oci_function_definition( + elif isinstance(tool, BaseTool): # type: ignore[unreachable] + return self.oci_function_definition( # type: ignore[unreachable] name=tool.name, description=OCIUtils.remove_signature_from_tool_description( tool.name, tool.description @@ -1262,7 +1262,7 @@ def _supports_parallel_tool_calls(self, model_id: str) -> bool: # Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) support it return True - def bind_tools( # type: ignore[override] + def bind_tools( self, tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], *, diff --git a/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py b/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py index 283bf0a..cb65206 100644 --- a/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py +++ b/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py @@ -428,7 +428,7 @@ def should_continue(state: MessagesState): # Invoke agent with a diagnostic scenario result = agent.invoke( - { # type: ignore[arg-type] + { "messages": [ SystemMessage(content=system_prompt), HumanMessage( diff --git a/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py b/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py index e6a691e..19d0cd2 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py +++ b/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py @@ -145,14 +145,14 @@ def test_stream_vllm(*args: Any) -> None: endpoint=CONST_ENDPOINT, model=CONST_MODEL_NAME, streaming=True ) assert llm._headers().get("route") == CONST_COMPLETION_ROUTE - output = None + output: AIMessageChunk | None = None count = 0 for chunk in llm.stream(CONST_PROMPT): assert isinstance(chunk, AIMessageChunk) if output is None: output = chunk else: - output += chunk + output = output + chunk count += 1 assert count == 5 assert output is not None diff --git a/libs/oci/tests/unit_tests/chat_models/test_oci_generative_ai_responses_api.py b/libs/oci/tests/unit_tests/chat_models/test_oci_generative_ai_responses_api.py index 2e95db6..62d63ce 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_oci_generative_ai_responses_api.py +++ b/libs/oci/tests/unit_tests/chat_models/test_oci_generative_ai_responses_api.py @@ -385,7 +385,7 @@ def call_model(state: AgentState): # ---- Act ---- app = workflow.compile() input_message = HumanMessage(content="What is the capital of France?") - result = app.invoke({"messages": [input_message]}) # type: ignore[arg-type] + result = app.invoke({"messages": [input_message]}) # ---- Assert ---- content = result["messages"][1].content[0] From 24fdcfd078263cf028301157484522fa3b73bb51 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 26 Nov 2025 16:48:05 -0500 Subject: [PATCH 17/20] Fix Python 3.9 compatibility in test_oci_data_science.py - Use Optional[T] instead of T | None syntax for Python 3.9 compat - Add type: ignore[assignment] for AIMessageChunk addition --- .../tests/unit_tests/chat_models/test_oci_data_science.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py b/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py index 19d0cd2..e209960 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py +++ b/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py @@ -1,7 +1,7 @@ """Test Chat model for OCI Data Science Model Deployment Endpoint.""" import sys -from typing import Any, AsyncGenerator, Dict, Generator +from typing import Any, AsyncGenerator, Dict, Generator, Optional from unittest import mock import pytest @@ -145,14 +145,14 @@ def test_stream_vllm(*args: Any) -> None: endpoint=CONST_ENDPOINT, model=CONST_MODEL_NAME, streaming=True ) assert llm._headers().get("route") == CONST_COMPLETION_ROUTE - output: AIMessageChunk | None = None + output: Optional[AIMessageChunk] = None count = 0 for chunk in llm.stream(CONST_PROMPT): assert isinstance(chunk, AIMessageChunk) if output is None: output = chunk else: - output = output + chunk + output = output + chunk # type: ignore[assignment] count += 1 assert count == 5 assert output is not None From 33e0e467c96a827308c326478628ef402359db86 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 26 Nov 2025 17:32:13 -0500 Subject: [PATCH 18/20] Simplify parallel tool calls: use provider property instead of model_id parsing Addresses reviewer feedback: - Add supports_parallel_tool_calls property to Provider base class (False) - Override in GenericProvider to return True (supports parallel calls) - CohereProvider inherits False (doesn't support parallel calls) - Remove _supports_parallel_tool_calls method with hacky model_id parsing - Simplify bind_tools to use provider property for validation - Remove Llama version-specific validation (let API fail naturally) - Update unit tests to focus on provider-based validation --- .../chat_models/oci_generative_ai.py | 81 ++++--------- .../chat_models/test_oci_data_science.py | 2 +- .../chat_models/test_parallel_tool_calling.py | 111 +++--------------- 3 files changed, 43 insertions(+), 151 deletions(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index f866874..3ca4780 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -209,6 +209,18 @@ def process_stream_tool_calls( """Process streaming tool calls from event data into chunks.""" ... + @property + def supports_parallel_tool_calls(self) -> bool: + """Whether this provider supports parallel tool calling. + + Parallel tool calling allows the model to call multiple tools + simultaneously in a single response. + + Returns: + bool: True if parallel tool calling is supported, False otherwise. + """ + return False + class CohereProvider(Provider): """Provider implementation for Cohere.""" @@ -593,6 +605,11 @@ class GenericProvider(Provider): stop_sequence_key: str = "stop" + @property + def supports_parallel_tool_calls(self) -> bool: + """GenericProvider models support parallel tool calling.""" + return True + def __init__(self) -> None: from oci.generative_ai_inference import models @@ -943,8 +960,8 @@ def convert_to_oci_tool( "required": parameters.get("required", []), }, ) - elif isinstance(tool, BaseTool): # type: ignore[unreachable] - return self.oci_function_definition( # type: ignore[unreachable] + elif isinstance(tool, BaseTool): + return self.oci_function_definition( name=tool.name, description=OCIUtils.remove_signature_from_tool_description( tool.name, tool.description @@ -1216,52 +1233,6 @@ def _prepare_request( return request - def _supports_parallel_tool_calls(self, model_id: str) -> bool: - """Check if the model supports parallel tool calling. - - Parallel tool calling is supported for: - - Llama 4+ only (tested and verified) - - Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) - - Not supported for: - - All Llama 3.x versions (3.0, 3.1, 3.2, 3.3) - - Cohere models - - Args: - model_id: The model identifier - (e.g., "meta.llama-4-maverick-17b-128e-instruct-fp8") - - Returns: - bool: True if model supports parallel tool calling, False otherwise - """ - import re - - # Extract provider from model_id - # (e.g., "meta" from "meta.llama-4-maverick-17b-128e-instruct-fp8") - provider = model_id.split(".")[0].lower() - - # Cohere models don't support parallel tool calling - if provider == "cohere": - return False - - # For Meta/Llama models, check version - if provider == "meta" and "llama" in model_id.lower(): - # Extract version number - # (e.g., "4" from "meta.llama-4-maverick-17b-128e-instruct-fp8") - version_match = re.search(r"llama-(\d+)", model_id.lower()) - if version_match: - major = int(version_match.group(1)) - - # Only Llama 4+ supports parallel tool calling - # Llama 3.x (including 3.3) does NOT support it based on testing - if major >= 4: - return True - - return False - - # Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) support it - return True - def bind_tools( self, tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], @@ -1293,8 +1264,8 @@ def bind_tools( parallel_tool_calls: Whether to enable parallel function calling. If True, the model can call multiple tools simultaneously. If False or None (default), tools are called sequentially. - Supported for models using GenericChatRequest (Meta Llama 4+, xAI Grok, - OpenAI, Mistral). Not supported for Cohere models or Llama 3.x. + Supported for models using GenericChatRequest (Meta, xAI Grok, + OpenAI, Mistral). Not supported for Cohere models. kwargs: Any additional parameters are passed directly to :meth:`~langchain_oci.chat_models.oci_generative_ai.ChatOCIGenAI.bind`. """ @@ -1306,14 +1277,10 @@ def bind_tools( # Add parallel tool calls support (only when explicitly enabled) if parallel_tool_calls: - # Validate Llama 3.x doesn't support parallel tool calls (early check) - model_id = self.model_id or "" - is_llama = "llama" in model_id.lower() - if is_llama and not self._supports_parallel_tool_calls(model_id): + if not self._provider.supports_parallel_tool_calls: raise ValueError( - f"Parallel tool calls not supported for {model_id}. " - "Only Llama 4+ models support this feature. " - "Llama 3.x (including 3.3) don't support parallel calls." + "Parallel tool calls not supported for this provider. " + "Only GenericChatRequest models support parallel tool calling." ) kwargs["is_parallel_tool_calls"] = True diff --git a/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py b/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py index e209960..d27d191 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py +++ b/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py @@ -152,7 +152,7 @@ def test_stream_vllm(*args: Any) -> None: if output is None: output = chunk else: - output = output + chunk # type: ignore[assignment] + output = output + chunk count += 1 assert count == 5 assert output is not None diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py index c5bcb35..46357b9 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py +++ b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py @@ -99,7 +99,7 @@ def get_weather(city: str) -> str: @pytest.mark.requires("oci") def test_parallel_tool_calls_cohere_raises_error(): - """Test that Cohere models raise error for parallel tool calls.""" + """Test that Cohere models raise error for parallel tool calls at bind_tools.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI(model_id="cohere.command-r-plus", client=oci_gen_ai_client) @@ -107,84 +107,14 @@ def tool1(x: int) -> int: """Tool 1.""" return x + 1 - llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True) - - # Should raise ValueError when trying to prepare request - # RunnableBinding has _prepare_request and kwargs attributes at runtime - with pytest.raises(ValueError, match="not supported for Cohere"): - llm_with_tools._prepare_request( # type: ignore[attr-defined] - [HumanMessage(content="test")], - stop=None, - stream=False, - **llm_with_tools.kwargs, # type: ignore[attr-defined] - ) - - -@pytest.mark.requires("oci") -def test_version_filter_llama_3_0_blocked(): - """Test that Llama 3.0 models are blocked from parallel tool calling.""" - oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI(model_id="meta.llama-3-70b-instruct", client=oci_gen_ai_client) - - def tool1(x: int) -> int: - """Tool 1.""" - return x + 1 - - # Should raise ValueError when trying to enable parallel tool calling - with pytest.raises(ValueError, match="Llama 4\\+"): + # Should raise ValueError at bind_tools time (not at request time) + with pytest.raises(ValueError, match="not supported"): llm.bind_tools([tool1], parallel_tool_calls=True) @pytest.mark.requires("oci") -def test_version_filter_llama_3_1_blocked(): - """Test that Llama 3.1 models are blocked from parallel tool calling.""" - oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI(model_id="meta.llama-3.1-70b-instruct", client=oci_gen_ai_client) - - def tool1(x: int) -> int: - """Tool 1.""" - return x + 1 - - # Should raise ValueError - with pytest.raises(ValueError, match="Llama 4\\+"): - llm.bind_tools([tool1], parallel_tool_calls=True) - - -@pytest.mark.requires("oci") -def test_version_filter_llama_3_2_blocked(): - """Test that Llama 3.2 models are blocked from parallel tool calling.""" - oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI( - model_id="meta.llama-3.2-11b-vision-instruct", client=oci_gen_ai_client - ) - - def tool1(x: int) -> int: - """Tool 1.""" - return x + 1 - - # Should raise ValueError - with pytest.raises(ValueError, match="Llama 4\\+"): - llm.bind_tools([tool1], parallel_tool_calls=True) - - -@pytest.mark.requires("oci") -def test_version_filter_llama_3_3_blocked(): - """Test that Llama 3.3 models are blocked from parallel tool calling.""" - oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI(model_id="meta.llama-3.3-70b-instruct", client=oci_gen_ai_client) - - def tool1(x: int) -> int: - """Tool 1.""" - return x + 1 - - # Should raise ValueError - Llama 3.3 doesn't actually support parallel calls - with pytest.raises(ValueError, match="Llama 4\\+"): - llm.bind_tools([tool1], parallel_tool_calls=True) - - -@pytest.mark.requires("oci") -def test_version_filter_llama_4_allowed(): - """Test that Llama 4 models are allowed parallel tool calling.""" +def test_parallel_tool_calls_meta_allowed(): + """Test that Meta models are allowed parallel tool calling.""" oci_gen_ai_client = MagicMock() llm = ChatOCIGenAI( model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client @@ -201,11 +131,11 @@ def tool1(x: int) -> int: @pytest.mark.requires("oci") -def test_version_filter_other_models_allowed(): +def test_parallel_tool_calls_other_generic_models_allowed(): """Test that other GenericChatRequest models are allowed parallel tool calling.""" oci_gen_ai_client = MagicMock() - # Test with xAI Grok + # Test with xAI Grok (uses GenericProvider) llm_grok = ChatOCIGenAI(model_id="xai.grok-4-fast", client=oci_gen_ai_client) def tool1(x: int) -> int: @@ -219,23 +149,18 @@ def tool1(x: int) -> int: @pytest.mark.requires("oci") -def test_version_filter_supports_parallel_tool_calls_method(): - """Test the _supports_parallel_tool_calls method directly.""" +def test_provider_supports_parallel_tool_calls_property(): + """Test the provider supports_parallel_tool_calls property.""" oci_gen_ai_client = MagicMock() - llm = ChatOCIGenAI( + + # Meta model uses GenericProvider which supports parallel tool calls + llm_meta = ChatOCIGenAI( model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client ) + assert llm_meta._provider.supports_parallel_tool_calls is True - # Test various model IDs - model_id = "meta.llama-4-maverick-17b-128e-instruct-fp8" - assert llm._supports_parallel_tool_calls(model_id) is True - # Llama 3.3 NOT supported - assert llm._supports_parallel_tool_calls("meta.llama-3.3-70b-instruct") is False - model_id = "meta.llama-3.2-11b-vision-instruct" - assert llm._supports_parallel_tool_calls(model_id) is False - assert llm._supports_parallel_tool_calls("meta.llama-3.1-70b-instruct") is False - assert llm._supports_parallel_tool_calls("meta.llama-3-70b-instruct") is False - assert llm._supports_parallel_tool_calls("cohere.command-r-plus") is False - assert llm._supports_parallel_tool_calls("xai.grok-4-fast") is True - assert llm._supports_parallel_tool_calls("openai.gpt-4") is True - assert llm._supports_parallel_tool_calls("mistral.mistral-large") is True + # Cohere model uses CohereProvider which does NOT support parallel tool calls + llm_cohere = ChatOCIGenAI( + model_id="cohere.command-r-plus", client=oci_gen_ai_client + ) + assert llm_cohere._provider.supports_parallel_tool_calls is False From d0d2c5dca4c821455376e0101402b427f3c2d002 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 26 Nov 2025 17:38:15 -0500 Subject: [PATCH 19/20] Fix integration test for bind_tools validation timing --- .../test_parallel_tool_calling_integration.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py index bfd606f..9a9ceb4 100644 --- a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py +++ b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py @@ -227,19 +227,15 @@ def test_cohere_model_error(): auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"), ) - # Try to enable parallel tool calls with Cohere (should fail) - chat_with_tools = chat.bind_tools([get_weather], parallel_tool_calls=True) - logging.info("\nAttempting to use parallel_tool_calls with Cohere model...") + # Try to enable parallel tool calls with Cohere (should fail at bind_tools) try: - _ = chat_with_tools.invoke( - [HumanMessage(content="What's the weather in Paris?")] - ) + chat.bind_tools([get_weather], parallel_tool_calls=True) logging.info("❌ TEST FAILED: Should have raised ValueError") return False except ValueError as e: - if "not supported for Cohere" in str(e): + if "not supported" in str(e): logging.info(f"\n✓ Correctly raised error: {e}") logging.info("\n✓ TEST 4 PASSED: Cohere validation works") return True From d17fc8ff4087c38d373004a2b9da615fddc645ba Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Wed, 26 Nov 2025 17:47:49 -0500 Subject: [PATCH 20/20] Fix mypy linting issues for Python 3.9 compatibility - Reorder convert_to_oci_tool checks to avoid unreachable code warning - Fix type annotation in test_stream_vllm to use BaseMessageChunk --- .../chat_models/oci_generative_ai.py | 33 ++++++++++--------- .../chat_models/test_oci_data_science.py | 7 ++-- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py index 3ca4780..00ea3df 100644 --- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py +++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py @@ -945,22 +945,8 @@ def convert_to_oci_tool( Raises: ValueError: If the tool type is not supported. """ - if (isinstance(tool, type) and issubclass(tool, BaseModel)) or callable(tool): - as_json_schema_function = convert_to_openai_function(tool) - parameters = as_json_schema_function.get("parameters", {}) - return self.oci_function_definition( - name=as_json_schema_function.get("name"), - description=as_json_schema_function.get( - "description", - as_json_schema_function.get("name"), - ), - parameters={ - "type": "object", - "properties": parameters.get("properties", {}), - "required": parameters.get("required", []), - }, - ) - elif isinstance(tool, BaseTool): + # Check BaseTool first since it's callable but needs special handling + if isinstance(tool, BaseTool): return self.oci_function_definition( name=tool.name, description=OCIUtils.remove_signature_from_tool_description( @@ -982,6 +968,21 @@ def convert_to_oci_tool( ], }, ) + if (isinstance(tool, type) and issubclass(tool, BaseModel)) or callable(tool): + as_json_schema_function = convert_to_openai_function(tool) + parameters = as_json_schema_function.get("parameters", {}) + return self.oci_function_definition( + name=as_json_schema_function.get("name"), + description=as_json_schema_function.get( + "description", + as_json_schema_function.get("name"), + ), + parameters={ + "type": "object", + "properties": parameters.get("properties", {}), + "required": parameters.get("required", []), + }, + ) raise ValueError( f"Unsupported tool type {type(tool)}. " "Tool must be passed in as a BaseTool " diff --git a/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py b/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py index d27d191..e2d3b8a 100644 --- a/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py +++ b/libs/oci/tests/unit_tests/chat_models/test_oci_data_science.py @@ -5,7 +5,7 @@ from unittest import mock import pytest -from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessageChunk from requests.exceptions import HTTPError from langchain_oci.chat_models import ( @@ -145,7 +145,7 @@ def test_stream_vllm(*args: Any) -> None: endpoint=CONST_ENDPOINT, model=CONST_MODEL_NAME, streaming=True ) assert llm._headers().get("route") == CONST_COMPLETION_ROUTE - output: Optional[AIMessageChunk] = None + output: Optional[BaseMessageChunk] = None count = 0 for chunk in llm.stream(CONST_PROMPT): assert isinstance(chunk, AIMessageChunk) @@ -156,8 +156,7 @@ def test_stream_vllm(*args: Any) -> None: count += 1 assert count == 5 assert output is not None - if output is not None: - assert str(output.content).strip() == CONST_COMPLETION + assert str(output.content).strip() == CONST_COMPLETION async def mocked_async_streaming_response(