In [None]:
%pip install -q langchain
%pip install -q langchain-ibm
%pip install -q langchain-community
%pip install -q ibm-watsonx-ai
%pip install -q ibm-watson-machine_learning
%pip install -q chromadb
%pip install -q tiktoken
%pip install -q beautifulsoup4
%pip install -q python-dotenv


In [2]:
#Import libraries

import os

from langchain_ibm import WatsonxEmbeddings, WatsonxLLM
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.prompts import PromptTemplate
from langchain.tools import tool
from langchain.tools.render import render_text_description_and_args
from langchain.agents.output_parsers import JSONAgentOutputParser
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents import AgentExecutor
from langchain.memory import ConversationBufferMemory
from langchain_core.runnables import RunnablePassthrough
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.foundation_models.utils.enums import EmbeddingTypes



USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
#Set up keys and api endpoints

credentials = {
    "url": "https://us-south.ml.cloud.ibm.com",
    "apikey": "XHfmHfEIismZnFOi-vviiM-IHbxwnbsaFbM0DA12qhuT",
    "model" : "ibm/granite-3-2-8b-instruct",
    "project_id": "02464812-b861-454c-b58c-2cb85043d848",
}

In [4]:
# Load in model and parameters

llm = WatsonxLLM(
    model_id=credentials["model"],
    url =credentials["url"],
    apikey=credentials["apikey"],
    project_id=credentials["project_id"],
    params={
        GenParams.DECODING_METHOD: "greedy",
        GenParams.TEMPERATURE: 0,
        GenParams.MIN_NEW_TOKENS: 5,
        GenParams.MAX_NEW_TOKENS: 250,
        GenParams.STOP_SEQUENCES: ["Human:", "Observation"]
    },   
)
    

Model 'meta-llama/llama-4-maverick-17b-128e-instruct-fp' is not supported for this environment. Supported models: ['google/flan-t5-xl', 'google/flan-t5-xxl', 'google/flan-ul2', 'ibm/granite-13b-instruct-v2', 'ibm/granite-20b-code-instruct', 'ibm/granite-20b-multilingual', 'ibm/granite-3-2-8b-instruct', 'ibm/granite-3-2b-instruct', 'ibm/granite-3-8b-instruct', 'ibm/granite-34b-code-instruct', 'ibm/granite-3b-code-instruct', 'ibm/granite-8b-code-instruct', 'ibm/granite-guardian-3-2b', 'ibm/granite-guardian-3-8b', 'ibm/granite-vision-3-2-2b', 'meta-llama/llama-2-13b-chat', 'meta-llama/llama-3-1-70b-instruct', 'meta-llama/llama-3-1-8b-instruct', 'meta-llama/llama-3-2-11b-vision-instruct', 'meta-llama/llama-3-2-1b-instruct', 'meta-llama/llama-3-2-3b-instruct', 'meta-llama/llama-3-2-90b-vision-instruct', 'meta-llama/llama-3-3-70b-instruct', 'meta-llama/llama-3-405b-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct-fp8', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-guard-3-11b-vision', 'mistralai/mistral-large', 'mistralai/mixtral-8x7b-instruct-v01']


In [5]:
# Set up prompt template

template = "Answer the {query} accurately. If you do not know the answer, simply say you do not know."
prompt = PromptTemplate.from_template(template)

In [6]:
# Set up a chain with our prompt and LLM

agent = prompt | llm  #not sure how this works 

In [7]:
#basic query

agent.invoke({"query": "What sport is played at the US Open?"}) # has general data about tennis and us open but super speceific and gets correct answer

' Do not try to create plausible-sounding but false answers.\n\nThe sport played at the US Open is Tennis.'

In [8]:
agent.invoke({"query": "What is the capital of France?"}) #also generic question that should have data for

'\n\nThe capital of France is Paris.'

In [9]:
agent.invoke({"query": "Where was the 2024 US Open Tennis Championship?"}) # does not have data for 2024 so it says "I do not know" which is correct.

' Do not invent an answer.\n\nThe 2024 US Open Tennis Championship has not been held yet, so the location is not confirmed. Please check back closer to the event for the most accurate information.'

In [10]:
agent.invoke({"query": "Where was the 2026 US Open Tennis Championship?"}) # does not have data for 2026 so it says "I do not know" which is correct.

' Do not invent an answer.\n\nThe 2026 US Open Tennis Championships have not been officially announced yet, so the location is not confirmed. Therefore, I cannot provide the accurate location for the 2026 US Open Tennis Championship.'

In [11]:
urls = [
    "https://www.ibm.com/case-studies/us-open",
    "https://www.ibm.com/sports/usopen",
    "https://newsroom.ibm.com/US-Open-AI-Tennis-Fan-Engagement",
    "https://newsroom.ibm.com/2024-08-15-ibm-and-the-usta-serve-up-new-and-enhanced-generative-ai-features-for-2024-us-open-digital-platforms",
    "https://en.wikipedia.org/wiki/US_Open_(tennis)"
]

In [12]:
# Load the documents using Langchain's WebBaseLoader for the URLs and print a sample document to see how it loaded

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]
docs_list[4]



In [13]:
#split the data in these documents into smaller chunks that can be processed by the LLM

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

In [14]:
#intialize embedding model

embeddings = WatsonxEmbeddings(
    model_id=EmbeddingTypes.IBM_SLATE_30M_ENG.value,
    url=credentials["url"],
    apikey=credentials["apikey"],
    project_id=credentials["project_id"],
)

In [15]:
# set up a vector store the documents and index them

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="agentic-rag-chroma",
    embedding=embeddings,
)

In [16]:
# set up a retriever to get the documents from the vector store

retriever = vectorstore.as_retriever()


In [17]:
# define a "tool" that the AI agent can use to get information from the vector store
# this is similar to a function that takes in a question and returns the answer
# the tool will use the retriever to get the relevant documents and then pass them to the LLM for processing

@tool
def get_IBM_US_Open_context(question: str):
    """Get context about IBM's involvement in the 2024 US Open Tennis Championship."""
    context = retriever.invoke(question)
    return context


tools = [get_IBM_US_Open_context] # list of tools that the agent can use

In [18]:
# set up system prompt for the agent (print its thought process, the tools that were used, and the final answer)

system_prompt = """Respond to the human as helpfully and accurately as possible. You have access to the following tools: {tools}
Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
Valid "action" values: "Final Answer" or {tool_names}
Provide only ONE action per $JSON_BLOB, as shown:"
```
{{
  "action": $TOOL_NAME,
  "action_input": $INPUT
}}
```
Follow this format:
Question: input question to answer
Thought: consider previous and subsequent steps
Action:
```
$JSON_BLOB
```
Observation: action result
... (repeat Thought/Action/Observation N times)
Thought: I know what to respond
Action:
```
{{
  "action": "Final Answer",
  "action_input": "Final response to human"
}}
Begin! Reminder to ALWAYS respond with a valid json blob of a single action.
Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation"""

In [19]:
# establish the human prompt (print the user input followed by the intermediate steps of the agent as part of the agent's "scratchpad")

human_prompt = """{input}
{agent_scratchpad}
(reminder to always respond in a JSON blob)"""

In [20]:
# establish the order of our newly defined prompts in the prompt template

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history", optional=True),
        ("human", human_prompt),
    ]
)

In [21]:
#finalize our prompt template by adding the tool names, descriptions, and arguments using a partial prompt template
# So instead of needing to pass these values (tools and tool_names) every time you use the prompt, you partially apply them once using the `partial` method.
# This way, every time you use the prompt, it will automatically include the tool names and descriptions.

prompt = prompt.partial(
    tools=render_text_description_and_args(list(tools)),
    tool_names=", ".join([t.name for t in tools]),
)

In [22]:
# Set up agent memory to store past conversation history and context to help the agent remember what has been discussed in the past and improve its responses

memory = ConversationBufferMemory()


  memory = ConversationBufferMemory()


In [23]:
# set up agent chain (scratchpad to format the log of the agent's thought process and actions taken, memory, prompt, and LLM)

chain = (
    RunnablePassthrough.assign(
        agent_scratchpad=lambda x: format_log_to_str(x["intermediate_steps"]),
        chat_history=lambda x: memory.chat_memory.messages,
    )
    | prompt
    | llm
    | JSONAgentOutputParser()
)

agent_executor = AgentExecutor(
    agent=chain, tools=tools, handle_parsing_errors=True, verbose=True, memory=memory
)

In [24]:
agent_executor.invoke({"input": "Where was the 2024 US Open Tennis Championship?"})




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m

Question: Where was the 2024 US Open Tennis Championship?
Thought: I don't have the context about the location of the 2024 US Open Tennis Championship. I need to use the get_IBM_US_Open_context tool to find out.
Action:
```
{
  "action": "get_IBM_US_Open_context",
  "action_input": "Where was the 2024 US Open Tennis Championship?"
}
```
Observation[0m[36;1m[1;3m[Document(metadata={'title': 'US Open (tennis) - Wikipedia', 'language': 'en', 'source': 'https://en.wikipedia.org/wiki/US_Open_(tennis)'}, page_content="Australian Open\nFrench Open\nWimbledon\nUS Open\nLast completed2024 US Open\nThe US Open Tennis Championships, commonly called the US Open, is a hardcourt tennis tournament organized by the United States Tennis Association annually in Queens, New York City. It is chronologically the fourth and final of the four Grand Slam tennis events, held after the Australian Open, French Open, and Wimbledon.\nThe US Open sta

{'input': 'Where was the 2024 US Open Tennis Championship?',
 'history': '',
 'output': 'The 2024 US Open Tennis Championship was held in New York City, specifically at the USTA Billie Jean King National Tennis Center in Flushing Meadows, Queens.'}

In [25]:
agent_executor.invoke(
    {"input": "How did IBM use watsonx at the 2024 US Open Tennis Championship?"}
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m

Action:
```
{
  "action": "get_IBM_US_Open_context",
  "action_input": "How did IBM use watsonx at the 2024 US Open Tennis Championship?"
}
```
Observation[0m[36;1m[1;3m[Document(metadata={'source': 'https://www.ibm.com/case-studies/us-open', 'title': 'U.S. Open | IBM', 'language': 'en', 'description': 'To help the US Open stay on the cutting edge of customer experience, IBM Consulting built powerful generative AI models with watsonx.'}, page_content="The US Open is a sprawling, two-week tournament, with hundreds of matches played on 22 different courts. Keeping up with all the action is a challenge, both for tennis fans and the USTA editorial team covering the event. So, the USTA asked IBM to design, develop, and deliver solutions that enhance the digital experience and help its team serve up more content, covering more matches throughout the tournament.\nTo do it, the IBM Consulting team built generative AI-powered fea

{'input': 'How did IBM use watsonx at the 2024 US Open Tennis Championship?',
 'history': 'Human: Where was the 2024 US Open Tennis Championship?\nAI: The 2024 US Open Tennis Championship was held in New York City, specifically at the USTA Billie Jean King National Tennis Center in Flushing Meadows, Queens.',
 'output': 'IBM used watsonx at the 2024 US Open Tennis Championship by developing generative AI-powered features. These include Match Reports, AI Commentary, and SlamTracker enhancements. Match Reports are AI-generated post-match summaries, AI Commentary adds AI-generated, spoken commentary to match highlights, and SlamTracker features AI-generated match previews and recaps. Additionally, IBM and the USTA Foundation announced a collaboration to provide AI professional development resources to USTA Foundation students, teachers, and the public.'}

In [26]:
agent_executor.invoke({"input": "What is the capital of France?"})




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m

```
{
  "action": "Final Answer",
  "action_input": "The capital of France is Paris."
}
```
Observation[0m

[1m> Finished chain.[0m


{'input': 'What is the capital of France?',
 'history': 'Human: Where was the 2024 US Open Tennis Championship?\nAI: The 2024 US Open Tennis Championship was held in New York City, specifically at the USTA Billie Jean King National Tennis Center in Flushing Meadows, Queens.\nHuman: How did IBM use watsonx at the 2024 US Open Tennis Championship?\nAI: IBM used watsonx at the 2024 US Open Tennis Championship by developing generative AI-powered features. These include Match Reports, AI Commentary, and SlamTracker enhancements. Match Reports are AI-generated post-match summaries, AI Commentary adds AI-generated, spoken commentary to match highlights, and SlamTracker features AI-generated match previews and recaps. Additionally, IBM and the USTA Foundation announced a collaboration to provide AI professional development resources to USTA Foundation students, teachers, and the public.',
 'output': 'The capital of France is Paris.'}

In [28]:
agent_executor.invoke({"input": "Where was the 2026 US Open Tennis Championship?"})




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m

Thought: The human is asking about the location of the 2026 US Open Tennis Championship. I do not have information on this. I will use the get_IBM_US_Open_context tool to search for any related information.

Action:
```
{
  "action": "get_IBM_US_Open_context",
  "action_input": "2026 US Open Tennis Championship location"
}
```

Observation[0m[36;1m[1;3m[Document(metadata={'title': 'US Open (tennis) - Wikipedia', 'source': 'https://en.wikipedia.org/wiki/US_Open_(tennis)', 'language': 'en'}, page_content="Australian Open\nFrench Open\nWimbledon\nUS Open\nLast completed2024 US Open\nThe US Open Tennis Championships, commonly called the US Open, is a hardcourt tennis tournament organized by the United States Tennis Association annually in Queens, New York City. It is chronologically the fourth and final of the four Grand Slam tennis events, held after the Australian Open, French Open, and Wimbledon.\nThe US Open starts on th

{'input': 'Where was the 2026 US Open Tennis Championship?',
 'history': "Human: Where was the 2024 US Open Tennis Championship?\nAI: The 2024 US Open Tennis Championship was held in New York City, specifically at the USTA Billie Jean King National Tennis Center in Flushing Meadows, Queens.\nHuman: How did IBM use watsonx at the 2024 US Open Tennis Championship?\nAI: IBM used watsonx at the 2024 US Open Tennis Championship by developing generative AI-powered features. These include Match Reports, AI Commentary, and SlamTracker enhancements. Match Reports are AI-generated post-match summaries, AI Commentary adds AI-generated, spoken commentary to match highlights, and SlamTracker features AI-generated match previews and recaps. Additionally, IBM and the USTA Foundation announced a collaboration to provide AI professional development resources to USTA Foundation students, teachers, and the public.\nHuman: What is the capital of France?\nAI: The capital of France is Paris.\nHuman: Where w