In [1]:
!pip install langchain-cohere

Collecting langchain-cohere
  Downloading langchain_cohere-0.4.4-py3-none-any.whl.metadata (6.6 kB)
Collecting cohere<6.0,>=5.12.0 (from langchain-cohere)
  Downloading cohere-5.15.0-py3-none-any.whl.metadata (3.4 kB)
Collecting types-pyyaml<7.0.0.0,>=6.0.12.20240917 (from langchain-cohere)
  Downloading types_pyyaml-6.0.12.20250516-py3-none-any.whl.metadata (1.8 kB)
Collecting fastavro<2.0.0,>=1.9.4 (from cohere<6.0,>=5.12.0->langchain-cohere)
  Downloading fastavro-1.11.1-cp312-cp312-win_amd64.whl.metadata (5.9 kB)
Downloading langchain_cohere-0.4.4-py3-none-any.whl (42 kB)
Downloading cohere-5.15.0-py3-none-any.whl (259 kB)
Downloading types_pyyaml-6.0.12.20250516-py3-none-any.whl (20 kB)
Downloading fastavro-1.11.1-cp312-cp312-win_amd64.whl (442 kB)
Installing collected packages: types-pyyaml, fastavro, cohere, langchain-cohere
Successfully installed cohere-5.15.0 fastavro-1.11.1 langchain-cohere-0.4.4 types-pyyaml-6.0.12.20250516


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
### Build Index

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_cohere import CohereEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI
# Set embeddings
embd = CohereEmbeddings(model="embed-v4.0")

# Docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

# Load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=512/2,
    chunk_overlap=0
)

doc_splits = text_splitter.split_documents(docs_list)

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    embedding=embd
)

retriever = vectorstore.as_retriever()

In [39]:
from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_cohere import ChatCohere

# Data Model
class web_search(BaseModel):
    """
    The internet. Use web_search for questions that are related to anything else than agents, prompt engineering, and adversarial attacks.
    """

    query: str = Field(description="The query to use when searching the internet.")


class vectorstore(BaseModel):
    """
    A vectorstore containing documents related to agents, prompt engineering, and adversarial attacks. Use the vectorstore for questions on these topics.
    """

    query: str = Field(description="The query to use when searching the vectorstore.")

preamble = """You are an expert at routing a user question to a vectorstore or web search.
The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for questions related to these topics. Otherwise, use web-search"""

llm = ChatCohere(model="command-r", temperature=0)
structured_llm_router = llm.bind_tools(
    tools=[web_search, vectorstore], preamble=preamble
)

# Prompt
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{question}"),
    ]
)

question_router = route_prompt | structured_llm_router
response = question_router.invoke(
    {"question": "Who will the Bears draft first in the NFL draft?"}
)
response = question_router.invoke({"question": "Who wil the Bears draft first in the NFL draft"})
print(response.response_metadata["tool_calls"])

response = question_router.invoke({"question": "Hi how are you"})


[{'id': 'web_search_6ffm90czpecn', 'type': 'function', 'function': {'name': 'web_search', 'arguments': '{"query":"who will the bears pick first in the NFL draft"}'}}]


In [43]:
from langchain_core.utils.function_calling import convert_to_openai_tool
from langchain_google_genai import ChatGoogleGenerativeAI

# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""
    binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")
# Prompt
preamble = """You are a grader assessing relevance of a retrieved document to a user question. \n
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

# LLM with function call
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", preamble),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader
question = "NFL match 2025 schedule"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
response = retrieval_grader.invoke({"question": question, "document": doc_txt})
print(response)

binary_score='no'
