In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime

from langchain.agents import AgentExecutor, create_tool_calling_agent

from langchain_core.messages import (
    HumanMessage,
    SystemMessage,
)

from langchain_core.prompts import (
    ChatPromptTemplate,
)

from langchain_google_vertexai import (
    ChatVertexAI,
    create_context_cache,
)

import vertexai



In [3]:
vertexai.init(
    project="kard-sandbox"
)

In [4]:
system_instruction = """

You are an expert researcher. You always stick to the facts in the sources provided,
and never make up new facts.

If asked about it, the secret number is 747.

Now look at these research papers, and answer the following questions.

"""

## Test out latency without cache

In [5]:
# Using cached_content in constructor
chat = ChatVertexAI(
    project="kard-sandbox",
    model_name="gemini-1.5-pro-001",
    system_instruction=system_instruction
)

start = datetime.datetime.now()
response = chat.invoke(
    [HumanMessage(
        content=[
            {
                "type": "image_url",
                "image_url": {
                    "url": "gs://cloud-samples-data/generative-ai/pdf/2312.11805v3.pdf",
                },
            },
            {
                "type": "image_url",
                "image_url": {
                    "url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf"
                },
            },
        ]
    ),
    "What is the secret number?"])
end = datetime.datetime.now()
print(response)
print(f"latency: {end - start}")


content='The provided text is a research paper titled "Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context" by the Gemini Team at Google. The paper discusses the capabilities and evaluations of Gemini 1.5 Pro, a highly compute-efficient multimodal model that excels in handling long contexts across different modalities like text, video, and audio.\n\nHere\'s a summary of the key points:\n\n**Gemini 1.5 Pro\'s Capabilities:**\n\n* **Extended context window:** Can handle up to 10 million tokens, allowing it to process long documents, hours of video and audio, and even entire codebases. This is a significant improvement over previous models like Claude 2.1 (200k tokens) and GPT-4 Turbo (128k tokens).\n* **Multimodal understanding:** Can seamlessly integrate text, images, video, and audio within the same input sequence, enabling diverse and complex interactions.\n* **Efficient and high-quality:** Achieves comparable quality to Gemini 1.0 Ultra while requiring

## Define tools

In [6]:
import asyncio
from langchain_core.tools import tool
# from ...apis.product_search import response as product_search_api


@tool
def product_search_tool(
    query: str, store_id: str = "0156", zip_code: str = "30101"
) -> dict:
    """### product_search_tool

**Purpose**: Efficiently retrieve and provide accurate, location-specific product info for in-store associates.

**Args**:
- **query** (str): Search query (product name, category, description).
- **store_id** (str, optional): Store number for inventory and availability.
- **zip_code** (str, optional): Zip code for location-based search (secondary to store_id).

**Output**: Tailored product details for associate use.

**Guidelines**:

- **Query Analysis**:
  - Utilize NLP to extract product details.
  - Address ambiguous queries by prompting for more specifics or suggesting categories.

- **Handling Multiple Results**:
  - Display top relevant, in-stock products.
  - Offer filters or follow-up prompts to refine results.

- **Location Handling**:
  - Prioritize `store_id`; fallback to `zip_code` or default to the associate’s store.

- **No Results**:
  - Suggest alternative searches or escalate the query.

- **Uncertainty Management**:
  - Indicate potential data inaccuracies and suggest verification.

- **Response Prioritization**:
  - Focus on:
    - Product Name
    - SKU
    - Price
    - Stock Status & Location
    - Key Features
    - Specs (if applicable)
    - Compatibility & Warranty (if applicable)
    - Safety Information (if applicable)
  - Recommend related products when relevant.

- **API Response Handling**:
  - Parse and format product data for easy associate consumption.

- **Scalability Considerations**:
  - Optimize for large inventories, limit results for relevance.

    """
    return asyncio.run(
        product_search_api.api_call_return_response(query, store_id, zip_code)
    )

@tool
def product_details_tool( omsId: str , storeId: str) -> dict:
    """ 
        # Product Information Tool Instructions

        ## Purpose
        Provide product specifications and related information using data from multiple APIs.

        ## Tool Invocation
        Args:
                omsId: Item ID
                storeId: Store ID

            Returns:
            A dictionary containing:
                Product details (specifications)
                Relevant Q&A information
                Related product suggestions

        Note: omsId and productId are interchangeable. Include store_id when available, as specs may vary by location.

        ## APIs Accessed (Automatically in Parallel)
        1. Product Catalog API: Product specifications
        2. Q&A API: Common product questions
        3. Related Products API: Complementary products

        ## Response Guidelines
        1. Focus on specifications and technical details
        2. Keep responses concise (4-6 sentences when possible)
        3. Use bullet points for multi-part answers
        4. Offer "More Details" for complex queries
        5. Include related products if relevant
        6. Structure responses with: Product Name, Identifier, Brief Description, Key Specs
        7. For complex queries, break down and address each aspect separately
        8. Suggest applications or maintenance tips based on specifications when relevant
        9. Always use the most current information provided

        ## Query Analysis
        - Look for mentions of product names, brands, or identifiers
        - Consider project types and intended uses
        - For ambiguous queries, ask clarifying questions about specific needs or requirements

        ## Uncertainty Handling
        If data is incomplete or uncertain, state: "I'm not certain about this information. Please verify with a manager or check the official database."

    """
    verbose = True
    name = "get_product_details_tool"
    description = """
        Provide product specifications and related information using data from multiple APIs.Returns a structured dictionary with Product Specification, Q&A information, and Related product suggestions.
    """

    return asyncio.run(product_api.api_call_return_response(omsId,storeId))

@tool
def sop_tool( query: str) -> dict:
    """
## Purpose
Provide efficient access to Standard Operating Procedures (SOPs) and guidelines for associates across all departments of a home improvement retail store.

## Tool Invocation

**Args:**

- **query (str):** User query containing keywords related to the task or procedure

**Returns:**

- **dict:** A dictionary containing:
  - SOP details
  - Safety considerations
  - Cross-departmental procedures (if relevant)
  - Links to full documents or manuals

## Response Guidelines

1. **Provide Specific SOP Information:** Leverage centralized templates for SOP details.
2. **Highlight Safety Considerations:** Include universal safety protocols and department-specific notes.
3. **Offer Links to Full Documents or Manuals:** Integrate with existing knowledge bases.

## Response Structure
    1. **Direct Answer**:
       - **If the SOP Tool Output Contains the Answer**: Provide the answer directly to the user.
         - **Include**:
           - **Purpose**: Explain why the procedure or policy is important.
           - **Key Steps**: Summarize upto 8 steps the user needs to take.
           - **Exceptions**: Highlight any important exceptions or special considerations.
         - **Example**: "The SOP provides the following steps for processing a water heater return without a receipt: [Include summarized key steps]."

    2. **General Guideline**:
       - **If the SOP Tool Provides a General Guideline**:
         - **Extract the Most Relevant Points**: Identify the key elements that directly address the user’s query.
         - **Reference the Full SOP**: Provide the location or identifier of the full SOP for users who need more detailed information.
         - **Example**: "For returns of non-stock items without a receipt, the SOP suggests contacting management and using a generic clearance SKU. For full details, see SOP FE 06-10, page 25."


**Note:** This tool does not access personal or sensitive data. For out-of-scope queries, users will be directed to appropriate resources.
"""
    name = "sop_tool"
    verbose = True
    description = """
     Provide efficient access to Standard Operating Procedures (SOPs) and guidelines for associates across all departments of a home improvement retail store.
    """
    return sop_api.api_call_return_response(query)


## Create the cache

In [7]:

from vertexai.preview import caching  # type: ignore



@tool
def add(a: int, b: int) -> int:
    """adds two num together."""
    return a + b

cc = caching.CachedContent.create(
    model_name="gemini-1.5-pro-002",
    contents=["Add 5 and 6"],
    tools=[add],
    tool_config={"function_calling_config": "ANY"},
    system_instruction="Always add 10 to the result.",
)

TypeError: Unexpected tool type: name='add' description='adds two num together.' args_schema=<class 'langchain_core.utils.pydantic.add'> func=<function add at 0x103f19ee0>.

In [17]:
from langchain_core.messages import (
    HumanMessage,
    SystemMessage,
)

from langchain_google_vertexai import (
    ChatVertexAI,
    create_context_cache,
)

@tool
def get_secret_number() -> int:
    """Gets secret number."""
    return 747

tools = [get_secret_number]
system_instruction = """
You are an expert researcher. You always stick to the facts in the sources
provided, and never make up new facts.

You have a get_secret_number function available. Use this tool if someone asks
for the secret number.
    
Now look at these research papers, and answer the following questions.
    
"""

cached_content = create_context_cache(
    model=ChatVertexAI(
        model_name="gemini-1.5-pro-001",
    ),
    messages=[
        SystemMessage(content=system_instruction),
        HumanMessage(
            content=[
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "gs://cloud-samples-data/generative-ai/pdf/2312.11805v3.pdf",
                    },
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf"
                    },
                },
            ]
        ),
    ],
    tools=tools
)


In [18]:
print(cached_content)

3063239394983936000


In [None]:
from langchain import agents
from langchain_core.messages import (
    AIMessage,
)
# Using cached_content in constructor
chat = ChatVertexAI(
    model_name="gemini-1.5-pro-001",
    cached_content=cached_content,
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)
agent = agents.create_tool_calling_agent(
    llm=chat,
    tools=tools,
    prompt=prompt,
)
agent_executor = agents.AgentExecutor(agent=agent, tools=tools, verbose=False, stream_runnable=False)
response = agent_executor.invoke({"input": "what is the secret number?"})
assert isinstance(response["output"], str)


[autoreload of langchain failed: Traceback (most recent call last):
  File "/Users/kardiff/ais/langchain-google/venv/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "/Users/kardiff/ais/langchain-google/venv/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 455, in superreload
    if not append_obj(module, old_objects, name, obj):
  File "/Users/kardiff/ais/langchain-google/venv/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 423, in append_obj
    in_module = hasattr(obj, "__module__") and obj.__module__ == module.__name__
  File "/Users/kardiff/ais/langchain-google/venv/lib/python3.9/site-packages/langchain/tools/__init__.py", line 69, in __getattr__
    from langchain_community import tools
ModuleNotFoundError: No module named 'langchain_community'
]
[autoreload of pydantic._internal._model_construction failed: Traceback (most recent call last):
  File "/Users/k

In [7]:
from pydantic import BaseModel, Field
from typing import Optional
from langchain_google_vertexai import ChatVertexAI, create_structured_runnable
from langchain_core.prompts import ChatPromptTemplate



_DEFAULT_MODEL_NAME = "gemini-1.0-pro-001"

class RecordPerson(BaseModel):
    """Record some identifying information about a person."""

    name: str = Field(..., description="The person's name")
    age: int = Field(..., description="The person's age")
    fav_food: Optional[str] = Field(
        default=None, description="The person's favorite food"
    )


class RecordDog(BaseModel):
    """Record some identifying information about a dog."""

    name: str = Field(..., description="The dog's name")
    color: str = Field(..., description="The dog's color")
    fav_food: Optional[str] = Field(default=None, description="The dog's favorite food")


llm = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME, temperature=1.0)
prompt = ChatPromptTemplate.from_template(
    "Describe a random {class} and mention their name, {attr} and favorite food"
)
chain = create_structured_runnable(
    [RecordPerson, RecordDog], llm, prompt=prompt, use_extra_step=True
)
res = chain.invoke({"class": "person", "attr": "age"})
print(isinstance(res, RecordPerson))

True


In [10]:
from langchain_google_vertexai.model_garden_maas import (
    _LLAMA_MODELS,
    _MISTRAL_MODELS,
    get_vertex_maas_model,
)
import vertexai

vertexai.init(project="llm-integration-tests")
model_name = "mistral-nemo@2407"
llm = get_vertex_maas_model(model_name=model_name, location="us-central1")
output = llm.stream("What is the meaning of life?")
for chunk in output:
    print(chunk)

content='' additional_kwargs={} response_metadata={} id='run-bed340e7-c9cc-429b-97d0-f1062c8cbb7e'
content='The' additional_kwargs={} response_metadata={} id='run-bed340e7-c9cc-429b-97d0-f1062c8cbb7e'
content=' meaning' additional_kwargs={} response_metadata={} id='run-bed340e7-c9cc-429b-97d0-f1062c8cbb7e'
content=' of' additional_kwargs={} response_metadata={} id='run-bed340e7-c9cc-429b-97d0-f1062c8cbb7e'
content=' life' additional_kwargs={} response_metadata={} id='run-bed340e7-c9cc-429b-97d0-f1062c8cbb7e'
content=' is' additional_kwargs={} response_metadata={} id='run-bed340e7-c9cc-429b-97d0-f1062c8cbb7e'
content=' a' additional_kwargs={} response_metadata={} id='run-bed340e7-c9cc-429b-97d0-f1062c8cbb7e'
content=' philosophical' additional_kwargs={} response_metadata={} id='run-bed340e7-c9cc-429b-97d0-f1062c8cbb7e'
content=' question' additional_kwargs={} response_metadata={} id='run-bed340e7-c9cc-429b-97d0-f1062c8cbb7e'
content=' that' additional_kwargs={} response_metadata={} id='

KeyError: 'choices'

In [24]:
# Using cached content in request
chat = ChatVertexAI(model_name="gemini-1.5-pro-001")
prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)
agent = agents.create_tool_calling_agent(
    llm=chat,
    tools=tools,
    prompt=prompt,
)
agent_executor = agents.AgentExecutor(agent=agent, tools=tools, verbose=False, stream_runnable=False)
# response = agent_executor.invoke({"input": "What is the secret number?"}, cached_content=cached_content)
response = agent_executor.invoke({"input":"What is the secret number?"}, cached_content=cached_content)
print(response)


IndexError: list index out of range