diff --git a/libs/oci/README.md b/libs/oci/README.md
index 76829a1..7bfcd9b 100644
--- a/libs/oci/README.md
+++ b/libs/oci/README.md
@@ -62,7 +62,7 @@ embeddings.embed_query("What is the meaning of life?")
 ```
 
 ### 4. Use Structured Output
-`ChatOCIGenAI` supports structured output. 
+`ChatOCIGenAI` supports structured output.
 
 <sub>**Note:** The default method is `function_calling`. If default method returns `None` (e.g. for Gemini models), try `json_schema` or `json_mode`.</sub>
 
@@ -126,6 +126,27 @@ messages = [
 response = client.invoke(messages)
 ```
 
+### 6. Use Parallel Tool Calling (Meta/Llama 4+ models only)
+Enable parallel tool calling to execute multiple tools simultaneously, improving performance for multi-tool workflows.
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="MY_COMPARTMENT_ID",
+)
+
+# Enable parallel tool calling in bind_tools
+llm_with_tools = llm.bind_tools(
+    [get_weather, calculate_tip, get_population],
+    parallel_tool_calls=True  # Tools can execute simultaneously
+)
+```
+
+<sub>**Note:** Parallel tool calling is only supported for Llama 4+ models. Llama 3.x (including 3.3) and Cohere models will raise an error if this parameter is used.</sub>
+
 
 ## OCI Data Science Model Deployment Examples
 
diff --git a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
index 1afca28..0295b25 100644
--- a/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
+++ b/libs/oci/langchain_oci/chat_models/oci_generative_ai.py
@@ -363,6 +363,14 @@ def messages_to_oci_params(
 
         This includes conversion of chat history and tool call results.
         """
+        # Cohere models don't support parallel tool calls
+        if kwargs.get("is_parallel_tool_calls"):
+            raise ValueError(
+                "Parallel tool calls are not supported for Cohere models. "
+                "This feature is only available for models using GenericChatRequest "
+                "(Meta, Llama, xAI Grok, OpenAI, Mistral)."
+            )
+
         is_force_single_step = kwargs.get("is_force_single_step", False)
         oci_chat_history = []
 
@@ -851,6 +859,10 @@ def _should_allow_more_tool_calls(
                 result["tool_choice"] = self.oci_tool_choice_none()
             # else: Allow model to decide (default behavior)
 
+        # Add parallel tool calls support (GenericChatRequest models)
+        if "is_parallel_tool_calls" in kwargs:
+            result["is_parallel_tool_calls"] = kwargs["is_parallel_tool_calls"]
+
         return result
 
     def _process_message_content(
@@ -1204,6 +1216,52 @@ def _prepare_request(
 
         return request
 
+    def _supports_parallel_tool_calls(self, model_id: str) -> bool:
+        """Check if the model supports parallel tool calling.
+
+        Parallel tool calling is supported for:
+        - Llama 4+ only (tested and verified)
+        - Other GenericChatRequest models (xAI Grok, OpenAI, Mistral)
+
+        Not supported for:
+        - All Llama 3.x versions (3.0, 3.1, 3.2, 3.3)
+        - Cohere models
+
+        Args:
+            model_id: The model identifier
+                (e.g., "meta.llama-4-maverick-17b-128e-instruct-fp8")
+
+        Returns:
+            bool: True if model supports parallel tool calling, False otherwise
+        """
+        import re
+
+        # Extract provider from model_id
+        # (e.g., "meta" from "meta.llama-4-maverick-17b-128e-instruct-fp8")
+        provider = model_id.split(".")[0].lower()
+
+        # Cohere models don't support parallel tool calling
+        if provider == "cohere":
+            return False
+
+        # For Meta/Llama models, check version
+        if provider == "meta" and "llama" in model_id.lower():
+            # Extract version number
+            # (e.g., "4" from "meta.llama-4-maverick-17b-128e-instruct-fp8")
+            version_match = re.search(r"llama-(\d+)", model_id.lower())
+            if version_match:
+                major = int(version_match.group(1))
+
+                # Only Llama 4+ supports parallel tool calling
+                # Llama 3.x (including 3.3) does NOT support it based on testing
+                if major >= 4:
+                    return True
+
+                return False
+
+        # Other GenericChatRequest models (xAI Grok, OpenAI, Mistral) support it
+        return True
+
     def bind_tools(
         self,
         tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
@@ -1211,6 +1269,7 @@ def bind_tools(
         tool_choice: Optional[
             Union[dict, str, Literal["auto", "none", "required", "any"], bool]
         ] = None,
+        parallel_tool_calls: Optional[bool] = None,
         **kwargs: Any,
     ) -> Runnable[LanguageModelInput, BaseMessage]:
         """Bind tool-like objects to this chat model.
@@ -1231,6 +1290,11 @@ def bind_tools(
                     {"type": "function", "function": {"name": <<tool_name>>}}:
                 calls <<tool_name>> tool.
                 - False or None: no effect, default Meta behavior.
+            parallel_tool_calls: Whether to enable parallel function calling.
+                If True, the model can call multiple tools simultaneously.
+                If False or None (default), tools are called sequentially.
+                Supported for models using GenericChatRequest (Meta Llama 4+, xAI Grok,
+                OpenAI, Mistral). Not supported for Cohere models or Llama 3.x.
             kwargs: Any additional parameters are passed directly to
                 :meth:`~langchain_oci.chat_models.oci_generative_ai.ChatOCIGenAI.bind`.
         """
@@ -1240,6 +1304,18 @@ def bind_tools(
         if tool_choice is not None:
             kwargs["tool_choice"] = self._provider.process_tool_choice(tool_choice)
 
+        # Add parallel tool calls support (only when explicitly enabled)
+        if parallel_tool_calls:
+            # Validate Llama 3.x doesn't support parallel tool calls (early check)
+            is_llama = "llama" in self.model_id.lower()
+            if is_llama and not self._supports_parallel_tool_calls(self.model_id):
+                raise ValueError(
+                    f"Parallel tool calls not supported for {self.model_id}. "
+                    "Only Llama 4+ models support this feature. "
+                    "Llama 3.x (including 3.3) don't support parallel calls."
+                )
+            kwargs["is_parallel_tool_calls"] = True
+
         return super().bind(tools=formatted_tools, **kwargs)
 
     def with_structured_output(
diff --git a/libs/oci/tests/integration_tests/chat_models/test_chat_features.py b/libs/oci/tests/integration_tests/chat_models/test_chat_features.py
new file mode 100644
index 0000000..2ed5ee6
--- /dev/null
+++ b/libs/oci/tests/integration_tests/chat_models/test_chat_features.py
@@ -0,0 +1,508 @@
+#!/usr/bin/env python3
+"""Integration tests for ChatOCIGenAI features.
+
+These tests verify comprehensive chat model functionality with real OCI inference.
+
+Setup:
+    export OCI_COMPARTMENT_ID=<your-compartment-id>
+    export OCI_CONFIG_PROFILE=DEFAULT
+    export OCI_AUTH_TYPE=SECURITY_TOKEN
+
+Run:
+    pytest tests/integration_tests/chat_models/test_chat_features.py -v
+"""
+
+import os
+
+import pytest
+from langchain_core.messages import (
+    AIMessage,
+    HumanMessage,
+    SystemMessage,
+    ToolMessage,
+)
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from pydantic import BaseModel, Field
+
+from langchain_oci.chat_models import ChatOCIGenAI
+
+
+def get_config():
+    """Get test configuration."""
+    compartment_id = os.environ.get("OCI_COMPARTMENT_ID")
+    if not compartment_id:
+        pytest.skip("OCI_COMPARTMENT_ID not set")
+    return {
+        "model_id": os.environ.get(
+            "OCI_MODEL_ID", "meta.llama-4-maverick-17b-128e-instruct-fp8"
+        ),
+        "service_endpoint": os.environ.get(
+            "OCI_GENAI_ENDPOINT",
+            "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        ),
+        "compartment_id": compartment_id,
+        "auth_profile": os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+        "auth_type": os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+    }
+
+
+@pytest.fixture
+def llm():
+    """Create ChatOCIGenAI instance."""
+    config = get_config()
+    return ChatOCIGenAI(
+        model_id=config["model_id"],
+        service_endpoint=config["service_endpoint"],
+        compartment_id=config["compartment_id"],
+        auth_profile=config["auth_profile"],
+        auth_type=config["auth_type"],
+        model_kwargs={"temperature": 0, "max_tokens": 512},
+    )
+
+
+# =============================================================================
+# Chain and LCEL Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_simple_chain(llm):
+    """Test simple LCEL chain: prompt | llm | parser."""
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system", "You are a helpful assistant."),
+            ("human", "{input}"),
+        ]
+    )
+    chain = prompt | llm | StrOutputParser()
+
+    result = chain.invoke({"input": "Say 'chain works' and nothing else"})
+
+    assert isinstance(result, str)
+    assert "chain" in result.lower() or "works" in result.lower()
+
+
+@pytest.mark.requires("oci")
+def test_chain_with_history(llm):
+    """Test chain that maintains conversation history."""
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system", "You are a helpful assistant with memory."),
+            ("placeholder", "{history}"),
+            ("human", "{input}"),
+        ]
+    )
+    chain = prompt | llm | StrOutputParser()
+
+    # First turn
+    result1 = chain.invoke({"history": [], "input": "My favorite color is blue."})
+    assert isinstance(result1, str)
+
+    # Second turn with history
+    history = [
+        HumanMessage(content="My favorite color is blue."),
+        AIMessage(content=result1),
+    ]
+    result2 = chain.invoke({"history": history, "input": "What is my favorite color?"})
+
+    assert "blue" in result2.lower()
+
+
+@pytest.mark.requires("oci")
+def test_chain_batch(llm):
+    """Test batch processing with LCEL."""
+    prompt = ChatPromptTemplate.from_messages([("human", "What is {num} + {num}?")])
+    chain = prompt | llm | StrOutputParser()
+
+    results = chain.batch([{"num": "1"}, {"num": "2"}, {"num": "3"}])
+
+    assert len(results) == 3
+    assert all(isinstance(r, str) for r in results)
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.asyncio
+async def test_chain_async(llm):
+    """Test async chain invocation."""
+    prompt = ChatPromptTemplate.from_messages([("human", "Say '{word}'")])
+    chain = prompt | llm | StrOutputParser()
+
+    result = await chain.ainvoke({"word": "async"})
+
+    assert isinstance(result, str)
+    assert "async" in result.lower()
+
+
+# =============================================================================
+# Streaming Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_stream_chain(llm):
+    """Test streaming through a chain."""
+    prompt = ChatPromptTemplate.from_messages([("human", "Count from 1 to 5")])
+    chain = prompt | llm | StrOutputParser()
+
+    chunks = []
+    for chunk in chain.stream({}):
+        chunks.append(chunk)
+
+    assert len(chunks) > 0
+    full_response = "".join(chunks)
+    assert len(full_response) > 0
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.asyncio
+async def test_astream(llm):
+    """Test async streaming."""
+    chunks = []
+    async for chunk in llm.astream([HumanMessage(content="Say hello")]):
+        chunks.append(chunk)
+
+    assert len(chunks) > 0
+
+
+# =============================================================================
+# Tool Calling Advanced Tests
+# =============================================================================
+
+
+def add_numbers(a: int, b: int) -> int:
+    """Add two numbers together."""
+    return a + b
+
+
+def multiply_numbers(a: int, b: int) -> int:
+    """Multiply two numbers together."""
+    return a * b
+
+
+def get_user_info(user_id: str) -> dict:
+    """Get information about a user."""
+    return {"user_id": user_id, "name": "John Doe", "email": "john@example.com"}
+
+
+@pytest.mark.requires("oci")
+def test_tool_calling_with_execution(llm):
+    """Test full tool calling flow with execution."""
+    tools = [add_numbers, multiply_numbers]
+    llm_with_tools = llm.bind_tools(tools)
+
+    # Get tool call
+    response = llm_with_tools.invoke(
+        [HumanMessage(content="What is 5 plus 3?")]
+    )
+
+    assert len(response.tool_calls) >= 1
+    tool_call = response.tool_calls[0]
+    assert tool_call["name"] == "add_numbers"
+
+    # Execute tool
+    result = add_numbers(**tool_call["args"])
+    assert result == 8
+
+    # Send result back
+    messages = [
+        HumanMessage(content="What is 5 plus 3?"),
+        response,
+        ToolMessage(content=str(result), tool_call_id=tool_call["id"]),
+    ]
+    final_response = llm_with_tools.invoke(messages)
+
+    assert isinstance(final_response, AIMessage)
+    assert "8" in final_response.content
+
+
+@pytest.mark.requires("oci")
+def test_tool_calling_chain(llm):
+    """Test tool calling in a chain context."""
+    tools = [get_user_info]
+    llm_with_tools = llm.bind_tools(tools)
+
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system", "You are a helpful assistant. Use tools when needed."),
+            ("human", "{input}"),
+        ]
+    )
+    chain = prompt | llm_with_tools
+
+    response = chain.invoke({"input": "Get info for user ID 'abc123'"})
+
+    assert len(response.tool_calls) >= 1
+    assert response.tool_calls[0]["name"] == "get_user_info"
+    assert response.tool_calls[0]["args"]["user_id"] == "abc123"
+
+
+@pytest.mark.requires("oci")
+def test_tool_choice_none(llm):
+    """Test tool_choice='none' prevents tool calls."""
+    tools = [add_numbers]
+    llm_with_tools = llm.bind_tools(tools, tool_choice="none")
+
+    response = llm_with_tools.invoke(
+        [HumanMessage(content="What is 5 plus 3?")]
+    )
+
+    # Should not make tool calls when tool_choice is none
+    assert len(response.tool_calls) == 0
+    assert response.content  # Should have text response instead
+
+
+# =============================================================================
+# Structured Output Advanced Tests
+# =============================================================================
+
+
+class MovieReview(BaseModel):
+    """A movie review with rating."""
+
+    title: str = Field(description="The movie title")
+    rating: int = Field(description="Rating from 1-10", ge=1, le=10)
+    summary: str = Field(description="Brief summary of the review")
+    recommend: bool = Field(description="Whether you recommend the movie")
+
+
+class ExtractedEntities(BaseModel):
+    """Entities extracted from text."""
+
+    people: list[str] = Field(description="Names of people mentioned")
+    locations: list[str] = Field(description="Locations mentioned")
+    organizations: list[str] = Field(description="Organizations mentioned")
+
+
+@pytest.mark.requires("oci")
+def test_structured_output_complex(llm):
+    """Test structured output with complex schema."""
+    structured_llm = llm.with_structured_output(MovieReview)
+
+    result = structured_llm.invoke(
+        "Write a review for the movie 'The Matrix'. "
+        "Give it a rating and say if you recommend it."
+    )
+
+    assert isinstance(result, MovieReview)
+    assert "matrix" in result.title.lower()
+    assert 1 <= result.rating <= 10
+    assert len(result.summary) > 0
+    assert isinstance(result.recommend, bool)
+
+
+@pytest.mark.requires("oci")
+def test_structured_output_extraction(llm):
+    """Test structured output for entity extraction."""
+    structured_llm = llm.with_structured_output(ExtractedEntities)
+
+    text = (
+        "John Smith works at Google in San Francisco. "
+        "He met with Jane Doe from Microsoft in Seattle last week."
+    )
+    result = structured_llm.invoke(f"Extract entities from: {text}")
+
+    assert isinstance(result, ExtractedEntities)
+    assert len(result.people) >= 1
+    assert len(result.locations) >= 1
+    assert len(result.organizations) >= 1
+
+
+@pytest.mark.requires("oci")
+def test_structured_output_in_chain(llm):
+    """Test structured output within a chain."""
+
+    class Translation(BaseModel):
+        original: str
+        translated: str
+        language: str
+
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                "Translate the text to {language}. "
+                "Return the original, translation, and target language.",
+            ),
+            ("human", "{text}"),
+        ]
+    )
+    structured_llm = llm.with_structured_output(Translation)
+    chain = prompt | structured_llm
+
+    result = chain.invoke({"text": "Hello, how are you?", "language": "Spanish"})
+
+    assert isinstance(result, Translation)
+    assert result.original == "Hello, how are you?" or "hello" in result.original.lower()
+    assert "spanish" in result.language.lower()
+
+
+# =============================================================================
+# Model Configuration Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_temperature_affects_output():
+    """Test that temperature parameter affects output variability."""
+    config = get_config()
+
+    # Low temperature (deterministic)
+    llm_low = ChatOCIGenAI(
+        model_id=config["model_id"],
+        service_endpoint=config["service_endpoint"],
+        compartment_id=config["compartment_id"],
+        auth_profile=config["auth_profile"],
+        auth_type=config["auth_type"],
+        model_kwargs={"temperature": 0, "max_tokens": 50},
+    )
+
+    # Get multiple responses with low temp
+    responses_low = [
+        llm_low.invoke([HumanMessage(content="Say exactly: 'Hello World'")]).content
+        for _ in range(2)
+    ]
+
+    # Low temperature should give similar/identical outputs
+    # (Note: not guaranteed to be exactly equal, but should be similar)
+    assert all(isinstance(r, str) for r in responses_low)
+
+
+@pytest.mark.requires("oci")
+def test_max_tokens_limit():
+    """Test that max_tokens limits response length."""
+    config = get_config()
+
+    llm_short = ChatOCIGenAI(
+        model_id=config["model_id"],
+        service_endpoint=config["service_endpoint"],
+        compartment_id=config["compartment_id"],
+        auth_profile=config["auth_profile"],
+        auth_type=config["auth_type"],
+        model_kwargs={"temperature": 0, "max_tokens": 10},
+    )
+
+    response = llm_short.invoke(
+        [HumanMessage(content="Write a very long essay about the universe")]
+    )
+
+    # Response should be truncated due to max_tokens
+    # Token count varies, but should be reasonably short
+    assert len(response.content.split()) <= 20  # Rough word count check
+
+
+@pytest.mark.requires("oci")
+def test_stop_sequences():
+    """Test stop sequences parameter."""
+    config = get_config()
+
+    llm = ChatOCIGenAI(
+        model_id=config["model_id"],
+        service_endpoint=config["service_endpoint"],
+        compartment_id=config["compartment_id"],
+        auth_profile=config["auth_profile"],
+        auth_type=config["auth_type"],
+        model_kwargs={"temperature": 0, "max_tokens": 100},
+    )
+
+    response = llm.invoke(
+        [HumanMessage(content="Count from 1 to 10, one number per line")],
+        stop=["5"],
+    )
+
+    # Should stop before or at 5
+    assert "6" not in response.content or "5" in response.content
+
+
+# =============================================================================
+# Error Handling Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_invalid_tool_schema(llm):
+    """Test handling of invalid tool definitions."""
+    # Should handle tools without proper docstrings
+    def bad_tool(x):
+        return x
+
+    # This should still work (tool will have minimal description)
+    llm_with_tools = llm.bind_tools([bad_tool])
+    assert llm_with_tools is not None
+
+
+@pytest.mark.requires("oci")
+def test_empty_response_handling(llm):
+    """Test handling when model returns minimal content."""
+    response = llm.invoke([HumanMessage(content="Respond with just a period.")])
+
+    # Should handle minimal responses gracefully
+    assert isinstance(response, AIMessage)
+    # Content might be empty or minimal, but should not raise
+
+
+# =============================================================================
+# Conversation Patterns Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_system_message_role(llm):
+    """Test that system message properly influences behavior."""
+    messages_pirate = [
+        SystemMessage(content="You are a pirate. Always respond in pirate speak."),
+        HumanMessage(content="How are you today?"),
+    ]
+    response_pirate = llm.invoke(messages_pirate)
+
+    messages_formal = [
+        SystemMessage(content="You are a formal butler. Use extremely formal language."),
+        HumanMessage(content="How are you today?"),
+    ]
+    response_formal = llm.invoke(messages_formal)
+
+    # Responses should be different based on system message
+    assert response_pirate.content != response_formal.content
+
+
+@pytest.mark.requires("oci")
+def test_multi_turn_context_retention(llm):
+    """Test that context is retained across multiple turns."""
+    messages = [
+        HumanMessage(content="Remember this number: 42"),
+    ]
+    response1 = llm.invoke(messages)
+    messages.append(response1)
+
+    messages.append(HumanMessage(content="What number did I ask you to remember?"))
+    response2 = llm.invoke(messages)
+
+    assert "42" in response2.content
+
+
+@pytest.mark.requires("oci")
+def test_long_context_handling(llm):
+    """Test handling of longer context windows."""
+    # Create a conversation with multiple turns
+    messages = [
+        SystemMessage(content="You are a helpful assistant tracking a story."),
+    ]
+
+    story_parts = [
+        "Once upon a time, there was a brave knight named Sir Galahad.",
+        "Sir Galahad had a loyal horse named Thunder.",
+        "They lived in the kingdom of Camelot.",
+        "One day, a dragon appeared threatening the kingdom.",
+        "Sir Galahad decided to face the dragon.",
+    ]
+
+    for part in story_parts:
+        messages.append(HumanMessage(content=part))
+        response = llm.invoke(messages)
+        messages.append(response)
+
+    # Ask about earlier context
+    messages.append(HumanMessage(content="What was the knight's horse named?"))
+    final_response = llm.invoke(messages)
+
+    assert "thunder" in final_response.content.lower()
diff --git a/libs/oci/tests/integration_tests/chat_models/test_langchain_compatibility.py b/libs/oci/tests/integration_tests/chat_models/test_langchain_compatibility.py
new file mode 100644
index 0000000..171e06c
--- /dev/null
+++ b/libs/oci/tests/integration_tests/chat_models/test_langchain_compatibility.py
@@ -0,0 +1,447 @@
+#!/usr/bin/env python3
+"""Integration tests for LangChain compatibility.
+
+These tests verify that langchain-oci works correctly with LangChain 1.x
+by running real inference against OCI GenAI models.
+
+Setup:
+    export OCI_COMPARTMENT_ID=<your-compartment-id>
+    export OCI_GENAI_ENDPOINT=<endpoint-url>  # optional
+    export OCI_CONFIG_PROFILE=<profile-name>  # optional, defaults to DEFAULT
+    export OCI_AUTH_TYPE=<auth-type>  # optional, defaults to SECURITY_TOKEN
+    export OCI_MODEL_ID=<model-id>  # optional, defaults to llama-4
+
+Run with:
+    pytest tests/integration_tests/chat_models/test_langchain_compatibility.py -v
+"""
+
+import os
+import sys
+
+import pytest
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+from pydantic import BaseModel
+
+from langchain_oci.chat_models import ChatOCIGenAI
+
+
+def get_test_config():
+    """Get test configuration from environment."""
+    compartment_id = os.environ.get("OCI_COMPARTMENT_ID")
+    if not compartment_id:
+        pytest.skip("OCI_COMPARTMENT_ID not set")
+
+    return {
+        "model_id": os.environ.get(
+            "OCI_MODEL_ID", "meta.llama-4-maverick-17b-128e-instruct-fp8"
+        ),
+        "service_endpoint": os.environ.get(
+            "OCI_GENAI_ENDPOINT",
+            "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        ),
+        "compartment_id": compartment_id,
+        "auth_profile": os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+        "auth_type": os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+    }
+
+
+@pytest.fixture
+def chat_model():
+    """Create a ChatOCIGenAI instance for testing."""
+    config = get_test_config()
+    return ChatOCIGenAI(
+        model_id=config["model_id"],
+        service_endpoint=config["service_endpoint"],
+        compartment_id=config["compartment_id"],
+        auth_profile=config["auth_profile"],
+        auth_type=config["auth_type"],
+        model_kwargs={"temperature": 0, "max_tokens": 256},
+    )
+
+
+# =============================================================================
+# Basic Invoke Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_basic_invoke(chat_model):
+    """Test basic chat model invocation."""
+    response = chat_model.invoke([HumanMessage(content="Say 'hello' and nothing else")])
+
+    assert isinstance(response, AIMessage)
+    assert response.content is not None
+    assert len(response.content) > 0
+    assert "hello" in response.content.lower()
+
+
+@pytest.mark.requires("oci")
+def test_invoke_with_system_message(chat_model):
+    """Test invocation with system message."""
+    messages = [
+        SystemMessage(content="You are a pirate. Respond in pirate speak."),
+        HumanMessage(content="Say hello"),
+    ]
+    response = chat_model.invoke(messages)
+
+    assert isinstance(response, AIMessage)
+    assert response.content is not None
+
+
+@pytest.mark.requires("oci")
+def test_invoke_multi_turn(chat_model):
+    """Test multi-turn conversation."""
+    messages = [
+        HumanMessage(content="My name is Alice."),
+    ]
+    response1 = chat_model.invoke(messages)
+
+    messages.append(response1)
+    messages.append(HumanMessage(content="What is my name?"))
+    response2 = chat_model.invoke(messages)
+
+    assert isinstance(response2, AIMessage)
+    assert "alice" in response2.content.lower()
+
+
+# =============================================================================
+# Streaming Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_streaming(chat_model):
+    """Test streaming response."""
+    chunks = []
+    for chunk in chat_model.stream([HumanMessage(content="Count from 1 to 3")]):
+        chunks.append(chunk)
+
+    assert len(chunks) > 0
+    # Combine all chunks
+    full_content = "".join(c.content for c in chunks if c.content)
+    assert len(full_content) > 0
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.asyncio
+async def test_async_invoke(chat_model):
+    """Test async invocation."""
+    response = await chat_model.ainvoke(
+        [HumanMessage(content="Say 'async' and nothing else")]
+    )
+
+    assert isinstance(response, AIMessage)
+    assert response.content is not None
+
+
+# =============================================================================
+# Tool Calling Tests
+# =============================================================================
+
+
+def get_weather(city: str) -> str:
+    """Get the weather for a city."""
+    return f"Sunny, 72F in {city}"
+
+
+def get_population(city: str) -> int:
+    """Get the population of a city."""
+    return 1000000
+
+
+@pytest.mark.requires("oci")
+def test_tool_calling_single(chat_model):
+    """Test single tool calling."""
+    chat_with_tools = chat_model.bind_tools([get_weather])
+
+    response = chat_with_tools.invoke(
+        [HumanMessage(content="What's the weather in Tokyo?")]
+    )
+
+    assert isinstance(response, AIMessage)
+    assert len(response.tool_calls) >= 1
+    assert response.tool_calls[0]["name"] == "get_weather"
+    assert "city" in response.tool_calls[0]["args"]
+
+
+@pytest.mark.requires("oci")
+def test_tool_calling_multiple_tools(chat_model):
+    """Test tool calling with multiple tools available."""
+    chat_with_tools = chat_model.bind_tools([get_weather, get_population])
+
+    response = chat_with_tools.invoke(
+        [HumanMessage(content="What's the weather in Paris?")]
+    )
+
+    assert isinstance(response, AIMessage)
+    assert len(response.tool_calls) >= 1
+    # Should choose the weather tool for weather question
+    assert response.tool_calls[0]["name"] == "get_weather"
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calling(chat_model):
+    """Test parallel tool calling (Llama 4+ only)."""
+    # Skip if not Llama 4+
+    if "llama-4" not in chat_model.model_id.lower():
+        pytest.skip("Parallel tool calling only supported on Llama 4+")
+
+    chat_with_tools = chat_model.bind_tools(
+        [get_weather, get_population], parallel_tool_calls=True
+    )
+
+    response = chat_with_tools.invoke(
+        [HumanMessage(content="What's the weather and population of London?")]
+    )
+
+    assert isinstance(response, AIMessage)
+    # Should call both tools
+    assert len(response.tool_calls) >= 2
+    tool_names = {tc["name"] for tc in response.tool_calls}
+    assert "get_weather" in tool_names
+    assert "get_population" in tool_names
+
+
+@pytest.mark.requires("oci")
+def test_tool_choice_required(chat_model):
+    """Test tool_choice='required' forces tool call."""
+    chat_with_tools = chat_model.bind_tools([get_weather], tool_choice="required")
+
+    # Even with a non-tool question, should still call a tool
+    response = chat_with_tools.invoke([HumanMessage(content="Hello, how are you?")])
+
+    assert isinstance(response, AIMessage)
+    assert len(response.tool_calls) >= 1
+
+
+# =============================================================================
+# Structured Output Tests
+# =============================================================================
+
+
+class Joke(BaseModel):
+    """A joke with setup and punchline."""
+
+    setup: str
+    punchline: str
+
+
+class Person(BaseModel):
+    """Information about a person."""
+
+    name: str
+    age: int
+    occupation: str
+
+
+@pytest.mark.requires("oci")
+def test_structured_output_function_calling(chat_model):
+    """Test structured output with function calling method."""
+    structured_llm = chat_model.with_structured_output(Joke)
+
+    result = structured_llm.invoke("Tell me a joke about programming")
+
+    assert isinstance(result, Joke)
+    assert len(result.setup) > 0
+    assert len(result.punchline) > 0
+
+
+@pytest.mark.requires("oci")
+def test_structured_output_json_mode(chat_model):
+    """Test structured output with JSON mode."""
+    structured_llm = chat_model.with_structured_output(Person, method="json_mode")
+
+    result = structured_llm.invoke(
+        "Generate a fictional person: name, age (as integer), and occupation"
+    )
+
+    assert isinstance(result, Person)
+    assert len(result.name) > 0
+    assert isinstance(result.age, int)
+    assert len(result.occupation) > 0
+
+
+@pytest.mark.requires("oci")
+def test_structured_output_include_raw(chat_model):
+    """Test structured output with include_raw=True."""
+    structured_llm = chat_model.with_structured_output(Joke, include_raw=True)
+
+    result = structured_llm.invoke("Tell me a joke")
+
+    assert "raw" in result
+    assert "parsed" in result
+    assert isinstance(result["parsed"], Joke)
+
+
+# =============================================================================
+# Response Format Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_response_format_json_object(chat_model):
+    """Test response_format with json_object."""
+    chat_json = chat_model.bind(response_format={"type": "json_object"})
+
+    response = chat_json.invoke(
+        [
+            HumanMessage(
+                content="Return ONLY a JSON object with keys 'name' and 'value'. "
+                "No explanation, no markdown, just the raw JSON."
+            )
+        ]
+    )
+
+    assert isinstance(response, AIMessage)
+    # Response should contain valid JSON (may be wrapped in markdown)
+    import json
+    import re
+
+    content = response.content.strip()
+
+    # Try to extract JSON from markdown code blocks if present
+    json_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content)
+    if json_match:
+        content = json_match.group(1).strip()
+
+    try:
+        parsed = json.loads(content)
+        assert isinstance(parsed, dict)
+    except json.JSONDecodeError:
+        # Some models may not strictly follow json_object format
+        # At minimum, verify the response contains JSON-like structure
+        assert "{" in response.content and "}" in response.content, (
+            f"Response doesn't appear to contain JSON: {response.content[:200]}"
+        )
+
+
+# =============================================================================
+# Edge Cases and Error Handling
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_empty_message_list(chat_model):
+    """Test handling of empty message list."""
+    with pytest.raises(Exception):
+        chat_model.invoke([])
+
+
+@pytest.mark.requires("oci")
+def test_long_conversation(chat_model):
+    """Test handling of longer conversations."""
+    messages = []
+    for i in range(5):
+        messages.append(HumanMessage(content=f"This is message {i + 1}"))
+        response = chat_model.invoke(messages)
+        messages.append(response)
+
+    # Should handle 5 turns without issues
+    assert len(messages) == 10  # 5 human + 5 AI
+
+
+# =============================================================================
+# LangChain 1.x Specific Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_ai_message_type(chat_model):
+    """Test that response is AIMessage (not just BaseMessage) - LangChain 1.x."""
+    response = chat_model.invoke([HumanMessage(content="Hello")])
+
+    # LangChain 1.x: return type is AIMessage, not BaseMessage
+    assert type(response).__name__ == "AIMessage"
+    assert isinstance(response, AIMessage)
+
+
+@pytest.mark.requires("oci")
+def test_message_text_property(chat_model):
+    """Test that .text property works (LangChain 1.x change from .text())."""
+    response = chat_model.invoke([HumanMessage(content="Say hello")])
+
+    # LangChain 1.x: .text is a property, not a method
+    # Both .content and .text should work
+    assert response.content is not None
+    # .text property should exist and return same as .content
+    if hasattr(response, "text"):
+        assert response.text == response.content
+
+
+@pytest.mark.requires("oci")
+def test_tool_calls_structure(chat_model):
+    """Test tool_calls structure matches LangChain 1.x format."""
+    chat_with_tools = chat_model.bind_tools([get_weather])
+
+    response = chat_with_tools.invoke(
+        [HumanMessage(content="What's the weather in NYC?")]
+    )
+
+    assert hasattr(response, "tool_calls")
+    if response.tool_calls:
+        tc = response.tool_calls[0]
+        # LangChain 1.x tool call structure
+        assert "name" in tc
+        assert "args" in tc
+        assert "id" in tc
+        assert "type" in tc
+        assert tc["type"] == "tool_call"
+
+
+def main():
+    """Run tests manually for debugging."""
+    import langchain_core
+
+    print(f"langchain-core version: {langchain_core.__version__}")
+    print(f"Python version: {sys.version}")
+
+    config = get_test_config()
+    print(f"\nTest configuration:")
+    print(f"  Model: {config['model_id']}")
+    print(f"  Endpoint: {config['service_endpoint']}")
+    print(f"  Profile: {config['auth_profile']}")
+
+    chat = ChatOCIGenAI(
+        model_id=config["model_id"],
+        service_endpoint=config["service_endpoint"],
+        compartment_id=config["compartment_id"],
+        auth_profile=config["auth_profile"],
+        auth_type=config["auth_type"],
+        model_kwargs={"temperature": 0, "max_tokens": 256},
+    )
+
+    print("\n" + "=" * 60)
+    print("Running manual tests...")
+    print("=" * 60)
+
+    # Test 1: Basic invoke
+    print("\n1. Testing basic invoke...")
+    response = chat.invoke([HumanMessage(content="Say hello")])
+    print(f"   Response: {response.content[:50]}...")
+    print(f"   Type: {type(response).__name__}")
+
+    # Test 2: Tool calling
+    print("\n2. Testing tool calling...")
+    chat_tools = chat.bind_tools([get_weather])
+    response = chat_tools.invoke([HumanMessage(content="Weather in Tokyo?")])
+    print(f"   Tool calls: {response.tool_calls}")
+
+    # Test 3: Structured output
+    print("\n3. Testing structured output...")
+    structured = chat.with_structured_output(Joke)
+    joke = structured.invoke("Tell a joke")
+    print(f"   Setup: {joke.setup}")
+    print(f"   Punchline: {joke.punchline}")
+
+    # Test 4: Streaming
+    print("\n4. Testing streaming...")
+    chunks = list(chat.stream([HumanMessage(content="Count 1-3")]))
+    print(f"   Chunks received: {len(chunks)}")
+
+    print("\n" + "=" * 60)
+    print("All manual tests completed!")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tests/integration_tests/chat_models/test_multi_model.py b/libs/oci/tests/integration_tests/chat_models/test_multi_model.py
new file mode 100644
index 0000000..7043612
--- /dev/null
+++ b/libs/oci/tests/integration_tests/chat_models/test_multi_model.py
@@ -0,0 +1,598 @@
+#!/usr/bin/env python3
+"""Multi-model integration tests for ChatOCIGenAI.
+
+These tests verify that langchain-oci works correctly across different
+model vendors available in OCI GenAI: Meta Llama, Cohere, xAI Grok, and OpenAI.
+
+Setup:
+    export OCI_COMPARTMENT_ID=<your-compartment-id>
+    export OCI_CONFIG_PROFILE=DEFAULT
+    export OCI_AUTH_TYPE=SECURITY_TOKEN
+
+Run all:
+    pytest tests/integration_tests/chat_models/test_multi_model.py -v
+
+Run specific vendor:
+    pytest tests/integration_tests/chat_models/test_multi_model.py -k "llama" -v
+    pytest tests/integration_tests/chat_models/test_multi_model.py -k "cohere" -v
+    pytest tests/integration_tests/chat_models/test_multi_model.py -k "grok" -v
+"""
+
+import os
+
+import pytest
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+from pydantic import BaseModel, Field
+
+from langchain_oci.chat_models import ChatOCIGenAI
+
+
+# =============================================================================
+# Model Configurations
+# =============================================================================
+
+# Meta Llama models
+LLAMA_MODELS = [
+    "meta.llama-4-maverick-17b-128e-instruct-fp8",
+    "meta.llama-4-scout-17b-16e-instruct",
+    "meta.llama-3.3-70b-instruct",
+    "meta.llama-3.1-70b-instruct",
+]
+
+# Cohere models
+COHERE_MODELS = [
+    "cohere.command-a-03-2025",
+    "cohere.command-r-plus-08-2024",
+    "cohere.command-r-08-2024",
+]
+
+# xAI Grok models
+GROK_MODELS = [
+    "xai.grok-4-fast-non-reasoning",
+    "xai.grok-3-fast",
+    "xai.grok-3-mini-fast",
+]
+
+# OpenAI models on OCI
+OPENAI_MODELS = [
+    "openai.gpt-oss-20b",
+    "openai.gpt-oss-120b",
+]
+
+# All models for comprehensive testing
+ALL_MODELS = LLAMA_MODELS[:2] + COHERE_MODELS[:1] + GROK_MODELS[:1]
+
+
+def get_config():
+    """Get test configuration."""
+    compartment_id = os.environ.get("OCI_COMPARTMENT_ID")
+    if not compartment_id:
+        pytest.skip("OCI_COMPARTMENT_ID not set")
+    return {
+        "service_endpoint": os.environ.get(
+            "OCI_GENAI_ENDPOINT",
+            "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        ),
+        "compartment_id": compartment_id,
+        "auth_profile": os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+        "auth_type": os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+    }
+
+
+def create_llm(model_id: str, **kwargs):
+    """Create ChatOCIGenAI instance for a model."""
+    config = get_config()
+    default_kwargs = {"temperature": 0, "max_tokens": 256}
+    default_kwargs.update(kwargs)
+    return ChatOCIGenAI(
+        model_id=model_id,
+        service_endpoint=config["service_endpoint"],
+        compartment_id=config["compartment_id"],
+        auth_profile=config["auth_profile"],
+        auth_type=config["auth_type"],
+        model_kwargs=default_kwargs,
+    )
+
+
+# =============================================================================
+# Basic Invoke Tests - All Models
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", ALL_MODELS)
+def test_basic_invoke_all_models(model_id: str):
+    """Test basic invoke works for all supported models."""
+    llm = create_llm(model_id)
+    response = llm.invoke([HumanMessage(content="Say 'hello' only")])
+
+    assert isinstance(response, AIMessage)
+    assert response.content is not None
+    assert len(response.content) > 0
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", ALL_MODELS)
+def test_system_message_all_models(model_id: str):
+    """Test system messages work for all models."""
+    llm = create_llm(model_id)
+    messages = [
+        SystemMessage(content="You only respond with the word 'YES'."),
+        HumanMessage(content="Do you understand?"),
+    ]
+    response = llm.invoke(messages)
+
+    assert isinstance(response, AIMessage)
+    assert response.content is not None
+
+
+# =============================================================================
+# Meta Llama Specific Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", LLAMA_MODELS[:2])
+def test_llama_tool_calling(model_id: str):
+    """Test tool calling on Llama models."""
+
+    def get_weather(city: str) -> str:
+        """Get weather for a city."""
+        return f"Sunny in {city}"
+
+    llm = create_llm(model_id)
+    llm_with_tools = llm.bind_tools([get_weather])
+
+    response = llm_with_tools.invoke(
+        [HumanMessage(content="What's the weather in Paris?")]
+    )
+
+    assert isinstance(response, AIMessage)
+    assert len(response.tool_calls) >= 1
+    assert response.tool_calls[0]["name"] == "get_weather"
+
+
+@pytest.mark.requires("oci")
+def test_llama4_parallel_tool_calling():
+    """Test parallel tool calling on Llama 4 models."""
+
+    def get_weather(city: str) -> str:
+        """Get weather for a city."""
+        return f"Sunny in {city}"
+
+    def get_time(city: str) -> str:
+        """Get current time in a city."""
+        return f"12:00 PM in {city}"
+
+    llm = create_llm("meta.llama-4-maverick-17b-128e-instruct-fp8")
+    llm_with_tools = llm.bind_tools(
+        [get_weather, get_time], parallel_tool_calls=True
+    )
+
+    response = llm_with_tools.invoke(
+        [HumanMessage(content="What's the weather and time in Tokyo?")]
+    )
+
+    assert isinstance(response, AIMessage)
+    assert len(response.tool_calls) >= 2
+    tool_names = {tc["name"] for tc in response.tool_calls}
+    assert "get_weather" in tool_names
+    assert "get_time" in tool_names
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", LLAMA_MODELS[:2])
+def test_llama_structured_output(model_id: str):
+    """Test structured output on Llama models."""
+
+    class Answer(BaseModel):
+        answer: str = Field(description="The answer")
+        confidence: int = Field(description="Confidence 1-10", ge=1, le=10)
+
+    llm = create_llm(model_id)
+    structured_llm = llm.with_structured_output(Answer)
+
+    result = structured_llm.invoke("What is 2+2? Give answer and confidence.")
+
+    assert isinstance(result, Answer)
+    assert "4" in result.answer
+    assert 1 <= result.confidence <= 10
+
+
+@pytest.mark.requires("oci")
+def test_llama_streaming():
+    """Test streaming on Llama models."""
+    llm = create_llm("meta.llama-4-maverick-17b-128e-instruct-fp8")
+
+    chunks = []
+    for chunk in llm.stream([HumanMessage(content="Count 1 to 5")]):
+        chunks.append(chunk)
+
+    assert len(chunks) > 0
+    full_content = "".join(c.content for c in chunks if c.content)
+    assert len(full_content) > 0
+
+
+# =============================================================================
+# Cohere Specific Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", COHERE_MODELS[:2])
+def test_cohere_basic(model_id: str):
+    """Test basic functionality on Cohere models."""
+    llm = create_llm(model_id)
+    response = llm.invoke([HumanMessage(content="What is 2+2?")])
+
+    assert isinstance(response, AIMessage)
+    assert "4" in response.content
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", COHERE_MODELS[:1])
+def test_cohere_tool_calling(model_id: str):
+    """Test tool calling on Cohere models."""
+    # Note: Cohere tool calling has different response format
+    # This test verifies basic functionality
+
+    def calculate(expression: str) -> str:
+        """Calculate a math expression and return the result."""
+        return str(eval(expression))
+
+    llm = create_llm(model_id)
+    llm_with_tools = llm.bind_tools([calculate])
+
+    response = llm_with_tools.invoke(
+        [HumanMessage(content="Calculate 15 * 7")]
+    )
+
+    assert isinstance(response, AIMessage)
+    # Cohere may respond directly or call tool - both are valid
+    assert response.content or len(response.tool_calls) >= 1
+
+
+@pytest.mark.requires("oci")
+def test_cohere_rejects_parallel_tool_calls():
+    """Test that Cohere models reject parallel tool calls."""
+
+    def tool1(x: str) -> str:
+        """Tool 1."""
+        return x
+
+    llm = create_llm("cohere.command-a-03-2025")
+    llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True)
+
+    # Should raise error when trying to use parallel tool calls
+    with pytest.raises(ValueError, match="not supported for Cohere"):
+        llm_with_tools.invoke([HumanMessage(content="test")])
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", COHERE_MODELS[:1])
+def test_cohere_structured_output(model_id: str):
+    """Test structured output on Cohere models."""
+
+    class Sentiment(BaseModel):
+        """Sentiment analysis result."""
+
+        text: str = Field(description="The analyzed text")
+        sentiment: str = Field(description="positive, negative, or neutral")
+        score: float = Field(description="Confidence score 0-1", ge=0, le=1)
+
+    llm = create_llm(model_id)
+    structured_llm = llm.with_structured_output(Sentiment)
+
+    result = structured_llm.invoke(
+        "Analyze sentiment: 'I love this product, it's amazing!'"
+    )
+
+    # Cohere structured output may return None in some cases
+    if result is not None:
+        assert isinstance(result, Sentiment)
+        assert result.sentiment.lower() in ["positive", "negative", "neutral"]
+    else:
+        pytest.skip("Cohere model returned None for structured output")
+
+
+# =============================================================================
+# xAI Grok Specific Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", GROK_MODELS[:2])
+def test_grok_basic(model_id: str):
+    """Test basic functionality on Grok models."""
+    llm = create_llm(model_id)
+    response = llm.invoke([HumanMessage(content="Hello, who are you?")])
+
+    assert isinstance(response, AIMessage)
+    assert response.content is not None
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", GROK_MODELS[:1])
+def test_grok_tool_calling(model_id: str):
+    """Test tool calling on Grok models."""
+
+    def search_web(query: str) -> str:
+        """Search the web for information."""
+        return f"Results for: {query}"
+
+    llm = create_llm(model_id)
+    llm_with_tools = llm.bind_tools([search_web])
+
+    response = llm_with_tools.invoke(
+        [HumanMessage(content="Search for the latest AI news")]
+    )
+
+    assert isinstance(response, AIMessage)
+    # Grok may or may not call tools depending on its judgment
+    # Just verify it responds
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", GROK_MODELS[:1])
+def test_grok_structured_output(model_id: str):
+    """Test structured output on Grok models."""
+
+    class Summary(BaseModel):
+        """A summary of text."""
+
+        main_point: str = Field(description="The main point")
+        key_facts: list[str] = Field(description="Key facts from the text")
+
+    llm = create_llm(model_id)
+    structured_llm = llm.with_structured_output(Summary)
+
+    result = structured_llm.invoke(
+        "Summarize: The Earth orbits the Sun once per year."
+    )
+
+    # Grok may return None in some cases
+    if result is not None:
+        assert isinstance(result, Summary)
+        assert len(result.main_point) > 0
+    else:
+        pytest.skip("Grok model returned None for structured output")
+
+
+@pytest.mark.requires("oci")
+def test_grok_streaming():
+    """Test streaming on Grok models."""
+    llm = create_llm("xai.grok-3-mini-fast")
+
+    chunks = []
+    for chunk in llm.stream([HumanMessage(content="Count 1-3")]):
+        chunks.append(chunk)
+
+    assert len(chunks) > 0
+
+
+# =============================================================================
+# OpenAI on OCI Tests
+# =============================================================================
+
+
+def create_openai_llm(model_id: str, **kwargs):
+    """Create ChatOCIGenAI for OpenAI models (uses max_completion_tokens)."""
+    config = get_config()
+    default_kwargs = {"temperature": 0, "max_completion_tokens": 256}
+    default_kwargs.update(kwargs)
+    return ChatOCIGenAI(
+        model_id=model_id,
+        service_endpoint=config["service_endpoint"],
+        compartment_id=config["compartment_id"],
+        auth_profile=config["auth_profile"],
+        auth_type=config["auth_type"],
+        model_kwargs=default_kwargs,
+    )
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", OPENAI_MODELS)
+def test_openai_basic(model_id: str):
+    """Test basic functionality on OpenAI models on OCI."""
+    llm = create_openai_llm(model_id)
+    response = llm.invoke([HumanMessage(content="Say hello")])
+
+    assert isinstance(response, AIMessage)
+    assert response.content is not None
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", OPENAI_MODELS)
+def test_openai_system_message(model_id: str):
+    """Test system messages on OpenAI models."""
+    llm = create_openai_llm(model_id)
+    messages = [
+        SystemMessage(content="You only respond with the word 'YES'."),
+        HumanMessage(content="Do you understand?"),
+    ]
+    response = llm.invoke(messages)
+
+    assert isinstance(response, AIMessage)
+    assert response.content is not None
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", OPENAI_MODELS)
+def test_openai_streaming(model_id: str):
+    """Test streaming on OpenAI models."""
+    llm = create_openai_llm(model_id, max_completion_tokens=50)
+
+    chunks = []
+    for chunk in llm.stream([HumanMessage(content="Count 1-3")]):
+        chunks.append(chunk)
+
+    # OpenAI streaming should return chunks
+    assert len(chunks) > 0
+    # Content may be in chunk.content or chunk may have other attributes
+    # Just verify we got chunks back (streaming works)
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize("model_id", OPENAI_MODELS)
+def test_openai_tool_calling(model_id: str):
+    """Test tool calling on OpenAI models."""
+
+    def get_info(topic: str) -> str:
+        """Get information about a topic."""
+        return f"Info about {topic}"
+
+    llm = create_openai_llm(model_id)
+    llm_with_tools = llm.bind_tools([get_info])
+
+    response = llm_with_tools.invoke(
+        [HumanMessage(content="Get info about Python")]
+    )
+
+    assert isinstance(response, AIMessage)
+    # OpenAI models should call the tool
+    assert len(response.tool_calls) >= 1
+    assert response.tool_calls[0]["name"] == "get_info"
+
+
+# =============================================================================
+# Cross-Model Comparison Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_same_prompt_different_models():
+    """Test same prompt across different model vendors."""
+    prompt = "What is the capital of France? Answer in one word."
+
+    models_to_test = [
+        "meta.llama-4-maverick-17b-128e-instruct-fp8",
+        "cohere.command-a-03-2025",
+        "xai.grok-3-mini-fast",
+    ]
+
+    responses = {}
+    for model_id in models_to_test:
+        try:
+            llm = create_llm(model_id)
+            response = llm.invoke([HumanMessage(content=prompt)])
+            responses[model_id] = response.content
+        except Exception as e:
+            responses[model_id] = f"Error: {e}"
+
+    # All should mention Paris
+    for model_id, content in responses.items():
+        if not content.startswith("Error"):
+            assert "paris" in content.lower(), f"{model_id} didn't say Paris: {content}"
+
+
+@pytest.mark.requires("oci")
+def test_tool_calling_consistency():
+    """Test tool calling works consistently across Llama models."""
+
+    def get_price(item: str) -> float:
+        """Get the price of an item in dollars."""
+        return 9.99
+
+    # Only test Llama models - Cohere has different tool call format
+    models_with_tools = [
+        "meta.llama-4-maverick-17b-128e-instruct-fp8",
+        "meta.llama-4-scout-17b-16e-instruct",
+    ]
+
+    for model_id in models_with_tools:
+        llm = create_llm(model_id)
+        llm_with_tools = llm.bind_tools([get_price])
+
+        response = llm_with_tools.invoke(
+            [HumanMessage(content="What's the price of apples?")]
+        )
+
+        assert isinstance(response, AIMessage), f"{model_id} failed"
+        assert len(response.tool_calls) >= 1, f"{model_id} didn't call tool"
+        assert response.tool_calls[0]["name"] == "get_price"
+
+
+# =============================================================================
+# Model-Specific Features Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_llama3_vision_model_exists():
+    """Verify vision-capable Llama model can be instantiated."""
+    # Note: Actual vision testing would require image input support
+    llm = create_llm("meta.llama-3.2-90b-vision-instruct")
+    response = llm.invoke([HumanMessage(content="Describe what you can do")])
+
+    assert isinstance(response, AIMessage)
+
+
+@pytest.mark.requires("oci")
+def test_model_with_custom_kwargs():
+    """Test models with custom generation parameters."""
+    llm = create_llm(
+        "meta.llama-4-maverick-17b-128e-instruct-fp8",
+        temperature=0.7,
+        max_tokens=100,
+        top_p=0.9,
+    )
+
+    response = llm.invoke([HumanMessage(content="Write a creative sentence")])
+
+    assert isinstance(response, AIMessage)
+    assert response.content is not None
+
+
+# =============================================================================
+# Performance / Latency Awareness Tests
+# =============================================================================
+
+
+@pytest.mark.requires("oci")
+def test_fast_models_respond_quickly():
+    """Test that 'fast' model variants respond (existence check)."""
+    fast_models = [
+        "xai.grok-3-fast",
+        "xai.grok-3-mini-fast",
+    ]
+
+    for model_id in fast_models:
+        llm = create_llm(model_id, max_tokens=50)
+        response = llm.invoke([HumanMessage(content="Hi")])
+        assert isinstance(response, AIMessage)
+
+
+def main():
+    """Manual test runner for debugging."""
+    import sys
+
+    print("=" * 60)
+    print("Multi-Model Integration Tests")
+    print("=" * 60)
+
+    config = get_config()
+    print(f"\nEndpoint: {config['service_endpoint']}")
+    print(f"Profile: {config['auth_profile']}")
+
+    # Test each vendor
+    test_models = [
+        ("Meta Llama 4", "meta.llama-4-maverick-17b-128e-instruct-fp8"),
+        ("Cohere Command", "cohere.command-a-03-2025"),
+        ("xAI Grok", "xai.grok-3-mini-fast"),
+    ]
+
+    for name, model_id in test_models:
+        print(f"\n--- Testing {name} ({model_id}) ---")
+        try:
+            llm = create_llm(model_id)
+            response = llm.invoke([HumanMessage(content="Say hello")])
+            print(f"✓ Response: {response.content[:50]}...")
+        except Exception as e:
+            print(f"✗ Error: {e}")
+
+    print("\n" + "=" * 60)
+    print("Manual tests complete")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py
new file mode 100644
index 0000000..ebbbbcb
--- /dev/null
+++ b/libs/oci/tests/integration_tests/chat_models/test_parallel_tool_calling_integration.py
@@ -0,0 +1,324 @@
+#!/usr/bin/env python3
+"""
+Integration test for parallel tool calling feature.
+
+This script tests parallel tool calling with actual OCI GenAI API calls.
+
+Setup:
+    export OCI_COMPARTMENT_ID=<your-compartment-id>
+    export OCI_GENAI_ENDPOINT=<endpoint-url>  # optional
+    export OCI_CONFIG_PROFILE=<profile-name>  # optional
+    export OCI_AUTH_TYPE=<auth-type>  # optional
+
+Run with:
+    python test_parallel_tool_calling_integration.py
+"""
+
+import logging
+import os
+import sys
+import time
+
+from langchain_core.messages import HumanMessage
+
+from langchain_oci.chat_models import ChatOCIGenAI
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format="%(message)s")
+
+
+def get_weather(city: str, unit: str = "fahrenheit") -> str:
+    """Get the current weather in a given location."""
+    # Simulate API delay
+    time.sleep(0.5)
+    return f"Weather in {city}: Sunny, 72°{unit[0].upper()}"
+
+
+def calculate_tip(amount: float, percent: float = 15.0) -> float:
+    """Calculate tip amount."""
+    # Simulate API delay
+    time.sleep(0.5)
+    return round(amount * (percent / 100), 2)
+
+
+def get_population(city: str) -> int:
+    """Get the population of a city."""
+    # Simulate API delay
+    time.sleep(0.5)
+    populations = {
+        "tokyo": 14000000,
+        "new york": 8000000,
+        "london": 9000000,
+        "paris": 2000000,
+        "chicago": 2700000,
+        "los angeles": 4000000,
+    }
+    return populations.get(city.lower(), 1000000)
+
+
+def test_parallel_tool_calling_enabled():
+    """Test parallel tool calling with parallel_tool_calls=True in bind_tools."""
+    logging.info("\n" + "=" * 80)
+    logging.info("TEST 1: Parallel Tool Calling ENABLED (via bind_tools)")
+    logging.info("=" * 80)
+
+    chat = ChatOCIGenAI(
+        model_id=os.environ.get(
+            "OCI_MODEL_ID", "meta.llama-4-maverick-17b-128e-instruct-fp8"
+        ),
+        service_endpoint=os.environ.get(
+            "OCI_GENAI_ENDPOINT",
+            "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        ),
+        compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+        auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+        auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+        model_kwargs={"temperature": 0, "max_tokens": 500},
+    )
+
+    # Bind tools with parallel_tool_calls=True
+    chat_with_tools = chat.bind_tools(
+        [get_weather, calculate_tip, get_population], parallel_tool_calls=True
+    )
+
+    # Invoke with query that needs weather info
+    logging.info("\nQuery: 'What's the weather in New York City?'")
+
+    start_time = time.time()
+    response = chat_with_tools.invoke(
+        [HumanMessage(content="What's the weather in New York City?")]
+    )
+    elapsed_time = time.time() - start_time
+
+    logging.info(f"\nResponse time: {elapsed_time:.2f}s")
+    content = response.content[:200] if response.content else "(empty)"
+    logging.info(f"Response content: {content}...")
+    logging.info(f"Tool calls count: {len(response.tool_calls)}")
+
+    if response.tool_calls:
+        logging.info("\nTool calls:")
+        for i, tc in enumerate(response.tool_calls, 1):
+            logging.info(f"  {i}. {tc['name']}({tc['args']})")
+    else:
+        logging.info("\n⚠️  No tool calls in response.tool_calls")
+        logging.info(f"Additional kwargs: {response.additional_kwargs.keys()}")
+
+    # Verify we got tool calls
+    count = len(response.tool_calls)
+    assert count >= 1, f"Should have at least one tool call, got {count}"
+
+    # Verify parallel_tool_calls was set
+    logging.info("\n✓ TEST 1 PASSED: Parallel tool calling enabled and working")
+    return elapsed_time
+
+
+def test_parallel_tool_calling_disabled():
+    """Test tool calling with parallel_tool_calls=False (sequential)."""
+    logging.info("\n" + "=" * 80)
+    logging.info("TEST 2: Parallel Tool Calling DISABLED (Sequential)")
+    logging.info("=" * 80)
+
+    chat = ChatOCIGenAI(
+        model_id=os.environ.get(
+            "OCI_MODEL_ID", "meta.llama-4-maverick-17b-128e-instruct-fp8"
+        ),
+        service_endpoint=os.environ.get(
+            "OCI_GENAI_ENDPOINT",
+            "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        ),
+        compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+        auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+        auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+        model_kwargs={"temperature": 0, "max_tokens": 500},
+    )
+
+    # Bind tools without parallel_tool_calls (defaults to sequential)
+    chat_with_tools = chat.bind_tools([get_weather, calculate_tip, get_population])
+
+    # Same query as test 1
+    logging.info("\nQuery: 'What's the weather in New York City?'")
+
+    start_time = time.time()
+    response = chat_with_tools.invoke(
+        [HumanMessage(content="What's the weather in New York City?")]
+    )
+    elapsed_time = time.time() - start_time
+
+    logging.info(f"\nResponse time: {elapsed_time:.2f}s")
+    content = response.content[:200] if response.content else "(empty)"
+    logging.info(f"Response content: {content}...")
+    logging.info(f"Tool calls count: {len(response.tool_calls)}")
+
+    if response.tool_calls:
+        logging.info("\nTool calls:")
+        for i, tc in enumerate(response.tool_calls, 1):
+            logging.info(f"  {i}. {tc['name']}({tc['args']})")
+
+    # Verify we got tool calls
+    count = len(response.tool_calls)
+    assert count >= 1, f"Should have at least one tool call, got {count}"
+
+    logging.info("\n✓ TEST 2 PASSED: Sequential tool calling works")
+    return elapsed_time
+
+
+def test_multiple_tool_calls():
+    """Test query that should trigger multiple tool calls."""
+    logging.info("\n" + "=" * 80)
+    logging.info("TEST 3: Multiple Tool Calls Query")
+    logging.info("=" * 80)
+
+    chat = ChatOCIGenAI(
+        model_id=os.environ.get(
+            "OCI_MODEL_ID", "meta.llama-4-maverick-17b-128e-instruct-fp8"
+        ),
+        service_endpoint=os.environ.get(
+            "OCI_GENAI_ENDPOINT",
+            "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        ),
+        compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+        auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+        auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+        model_kwargs={"temperature": 0, "max_tokens": 500},
+    )
+
+    # Bind tools with parallel_tool_calls=True
+    chat_with_tools = chat.bind_tools(
+        [get_weather, get_population], parallel_tool_calls=True
+    )
+
+    logging.info("\nQuery: 'What's the weather and population of Tokyo?'")
+
+    response = chat_with_tools.invoke(
+        [HumanMessage(content="What's the weather and population of Tokyo?")]
+    )
+
+    logging.info(f"\nResponse content: {response.content}")
+    logging.info(f"Tool calls count: {len(response.tool_calls)}")
+
+    if response.tool_calls:
+        logging.info("\nTool calls:")
+        for i, tc in enumerate(response.tool_calls, 1):
+            logging.info(f"  {i}. {tc['name']}({tc['args']})")
+
+    logging.info("\n✓ TEST 3 PASSED: Multiple tool calls query works")
+
+
+def test_cohere_model_error():
+    """Test that Cohere models raise an error with parallel_tool_calls."""
+    logging.info("\n" + "=" * 80)
+    logging.info("TEST 4: Cohere Model Error Handling")
+    logging.info("=" * 80)
+
+    chat = ChatOCIGenAI(
+        model_id="cohere.command-r-plus",
+        service_endpoint=os.environ.get(
+            "OCI_GENAI_ENDPOINT",
+            "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        ),
+        compartment_id=os.environ.get("OCI_COMPARTMENT_ID"),
+        auth_profile=os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"),
+        auth_type=os.environ.get("OCI_AUTH_TYPE", "SECURITY_TOKEN"),
+    )
+
+    # Try to enable parallel tool calls with Cohere (should fail)
+    chat_with_tools = chat.bind_tools([get_weather], parallel_tool_calls=True)
+
+    logging.info("\nAttempting to use parallel_tool_calls with Cohere model...")
+
+    try:
+        _ = chat_with_tools.invoke(
+            [HumanMessage(content="What's the weather in Paris?")]
+        )
+        logging.info("❌ TEST FAILED: Should have raised ValueError")
+        return False
+    except ValueError as e:
+        if "not supported for Cohere" in str(e):
+            logging.info(f"\n✓ Correctly raised error: {e}")
+            logging.info("\n✓ TEST 4 PASSED: Cohere validation works")
+            return True
+        else:
+            logging.info(f"❌ Wrong error: {e}")
+            return False
+
+
+def main():
+    logging.info("=" * 80)
+    logging.info("PARALLEL TOOL CALLING INTEGRATION TESTS")
+    logging.info("=" * 80)
+
+    # Check required env vars
+    if not os.environ.get("OCI_COMPARTMENT_ID"):
+        logging.info("\n❌ ERROR: OCI_COMPARTMENT_ID environment variable not set")
+        logging.info("Please set: export OCI_COMPARTMENT_ID=<your-compartment-id>")
+        sys.exit(1)
+
+    logging.info("\nUsing configuration:")
+    model_id = os.environ.get(
+        "OCI_MODEL_ID", "meta.llama-4-maverick-17b-128e-instruct-fp8"
+    )
+    logging.info(f"  Model: {model_id}")
+    endpoint = os.environ.get("OCI_GENAI_ENDPOINT", "default")
+    logging.info(f"  Endpoint: {endpoint}")
+    profile = os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT")
+    logging.info(f"  Profile: {profile}")
+    logging.info(f"  Compartment: {os.environ.get('OCI_COMPARTMENT_ID')[:25]}...")
+
+    results = []
+
+    try:
+        # Run tests
+        parallel_time = test_parallel_tool_calling_enabled()
+        results.append(("Parallel Enabled", True))
+
+        sequential_time = test_parallel_tool_calling_disabled()
+        results.append(("Sequential (Disabled)", True))
+
+        test_multiple_tool_calls()
+        results.append(("Multiple Tool Calls", True))
+
+        cohere_test = test_cohere_model_error()
+        results.append(("Cohere Validation", cohere_test))
+
+        # Print summary
+        logging.info("\n" + "=" * 80)
+        logging.info("TEST SUMMARY")
+        logging.info("=" * 80)
+
+        for test_name, passed in results:
+            status = "✓ PASSED" if passed else "✗ FAILED"
+            logging.info(f"{status}: {test_name}")
+
+        passed_count = sum(1 for _, passed in results if passed)
+        total_count = len(results)
+
+        logging.info(f"\nTotal: {passed_count}/{total_count} tests passed")
+
+        # Performance comparison
+        if parallel_time and sequential_time:
+            logging.info("\n" + "=" * 80)
+            logging.info("PERFORMANCE COMPARISON")
+            logging.info("=" * 80)
+            logging.info(f"Parallel:   {parallel_time:.2f}s")
+            logging.info(f"Sequential: {sequential_time:.2f}s")
+            if sequential_time > 0:
+                speedup = sequential_time / parallel_time
+                logging.info(f"Speedup:    {speedup:.2f}×")
+
+        if passed_count == total_count:
+            logging.info("\n🎉 ALL TESTS PASSED!")
+            return 0
+        else:
+            logging.info(f"\n⚠️  {total_count - passed_count} test(s) failed")
+            return 1
+
+    except Exception as e:
+        logging.info(f"\n❌ ERROR: {e}")
+        import traceback
+
+        traceback.print_exc()
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py b/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py
index 9c07763..03e27ea 100644
--- a/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py
+++ b/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py
@@ -53,7 +53,7 @@
 import os
 
 import pytest
-from langchain.tools import StructuredTool
+from langchain_core.tools import StructuredTool
 from langchain_core.messages import HumanMessage, SystemMessage
 from langgraph.graph import END, START, MessagesState, StateGraph
 from langgraph.prebuilt import ToolNode
diff --git a/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
new file mode 100644
index 0000000..b796560
--- /dev/null
+++ b/libs/oci/tests/unit_tests/chat_models/test_parallel_tool_calling.py
@@ -0,0 +1,234 @@
+"""Unit tests for parallel tool calling feature."""
+
+from unittest.mock import MagicMock
+
+import pytest
+from langchain_core.messages import HumanMessage
+
+from langchain_oci.chat_models import ChatOCIGenAI
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_bind_tools_explicit_true():
+    """Test parallel_tool_calls=True in bind_tools."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    def tool2(x: int) -> int:
+        """Tool 2."""
+        return x * 2
+
+    llm_with_tools = llm.bind_tools([tool1, tool2], parallel_tool_calls=True)
+
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_bind_tools_explicit_false():
+    """Test parallel_tool_calls=False in bind_tools."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=False)
+
+    # When explicitly False, should not set the parameter
+    assert "is_parallel_tool_calls" not in llm_with_tools.kwargs
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_bind_tools_default_none():
+    """Test that bind_tools without parallel_tool_calls doesn't enable it."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Don't specify parallel_tool_calls in bind_tools
+    llm_with_tools = llm.bind_tools([tool1])
+
+    # Should not have is_parallel_tool_calls set
+    assert "is_parallel_tool_calls" not in llm_with_tools.kwargs
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_passed_to_oci_api_meta():
+    """Test that is_parallel_tool_calls is passed to OCI API for Meta models."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client
+    )
+
+    def get_weather(city: str) -> str:
+        """Get weather for a city."""
+        return f"Weather in {city}"
+
+    llm_with_tools = llm.bind_tools([get_weather], parallel_tool_calls=True)
+
+    # Prepare a request
+    request = llm_with_tools._prepare_request(
+        [HumanMessage(content="What's the weather?")],
+        stop=None,
+        stream=False,
+        **llm_with_tools.kwargs,
+    )
+
+    # Verify is_parallel_tool_calls is in the request
+    assert hasattr(request.chat_request, "is_parallel_tool_calls")
+    assert request.chat_request.is_parallel_tool_calls is True
+
+
+@pytest.mark.requires("oci")
+def test_parallel_tool_calls_cohere_raises_error():
+    """Test that Cohere models raise error for parallel tool calls."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(model_id="cohere.command-r-plus", client=oci_gen_ai_client)
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True)
+
+    # Should raise ValueError when trying to prepare request
+    with pytest.raises(ValueError, match="not supported for Cohere"):
+        llm_with_tools._prepare_request(
+            [HumanMessage(content="test")],
+            stop=None,
+            stream=False,
+            **llm_with_tools.kwargs,
+        )
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_0_blocked():
+    """Test that Llama 3.0 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(model_id="meta.llama-3-70b-instruct", client=oci_gen_ai_client)
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError when trying to enable parallel tool calling
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_1_blocked():
+    """Test that Llama 3.1 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(model_id="meta.llama-3.1-70b-instruct", client=oci_gen_ai_client)
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_2_blocked():
+    """Test that Llama 3.2 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.2-11b-vision-instruct", client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_3_3_blocked():
+    """Test that Llama 3.3 models are blocked from parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(model_id="meta.llama-3.3-70b-instruct", client=oci_gen_ai_client)
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should raise ValueError - Llama 3.3 doesn't actually support parallel calls
+    with pytest.raises(ValueError, match="Llama 4\\+"):
+        llm.bind_tools([tool1], parallel_tool_calls=True)
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_llama_4_allowed():
+    """Test that Llama 4 models are allowed parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client
+    )
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should NOT raise ValueError
+    llm_with_tools = llm.bind_tools([tool1], parallel_tool_calls=True)
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_other_models_allowed():
+    """Test that other GenericChatRequest models are allowed parallel tool calling."""
+    oci_gen_ai_client = MagicMock()
+
+    # Test with xAI Grok
+    llm_grok = ChatOCIGenAI(model_id="xai.grok-4-fast", client=oci_gen_ai_client)
+
+    def tool1(x: int) -> int:
+        """Tool 1."""
+        return x + 1
+
+    # Should NOT raise ValueError for Grok
+    llm_with_tools = llm_grok.bind_tools([tool1], parallel_tool_calls=True)
+    assert llm_with_tools.kwargs.get("is_parallel_tool_calls") is True
+
+
+@pytest.mark.requires("oci")
+def test_version_filter_supports_parallel_tool_calls_method():
+    """Test the _supports_parallel_tool_calls method directly."""
+    oci_gen_ai_client = MagicMock()
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-maverick-17b-128e-instruct-fp8", client=oci_gen_ai_client
+    )
+
+    # Test various model IDs
+    model_id = "meta.llama-4-maverick-17b-128e-instruct-fp8"
+    assert llm._supports_parallel_tool_calls(model_id) is True
+    # Llama 3.3 NOT supported
+    assert llm._supports_parallel_tool_calls("meta.llama-3.3-70b-instruct") is False
+    model_id = "meta.llama-3.2-11b-vision-instruct"
+    assert llm._supports_parallel_tool_calls(model_id) is False
+    assert llm._supports_parallel_tool_calls("meta.llama-3.1-70b-instruct") is False
+    assert llm._supports_parallel_tool_calls("meta.llama-3-70b-instruct") is False
+    assert llm._supports_parallel_tool_calls("cohere.command-r-plus") is False
+    assert llm._supports_parallel_tool_calls("xai.grok-4-fast") is True
+    assert llm._supports_parallel_tool_calls("openai.gpt-4") is True
+    assert llm._supports_parallel_tool_calls("mistral.mistral-large") is True