In [1]:
from dotenv import load_dotenv
import yaml
import pandas as pd
from io import StringIO

from cohere import V2RerankResponse, ClientV2, V2RerankResponseResultsItem

from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
from langchain_cohere import CohereRerank, CohereEmbeddings, ChatCohere

In [2]:
load_dotenv()

True

# Cohere samples

In [3]:
co = ClientV2()

## Text Data

In [4]:
# Define the documents
faqs = [
    {
        "text": "Reimbursing Travel Expenses: Easily manage your travel expenses by submitting them through our finance tool. Approvals are prompt and straightforward."
    },
    {
        "text": "Working from Abroad: Working remotely from another country is possible. Simply coordinate with your manager and ensure your availability during core hours."
    },
    {
        "text": "Health and Wellness Benefits: We care about your well-being and offer gym memberships, on-site yoga classes, and comprehensive health insurance."
    },
    {
        "text": "Performance Reviews Frequency: We conduct informal check-ins every quarter and formal performance reviews twice a year."
    },
]


In [5]:
# Add the user query
query = "Are there fitness-related perks?"

# Rerank the documents
results = co.rerank(
    model="rerank-v3.5",
    query=query,
    documents=faqs,
    top_n=3,
)

print(type(results))
print(type(results.results))
print(results)

<class 'cohere.v2.types.v2rerank_response.V2RerankResponse'>
<class 'list'>


In [6]:
# Display the reranking results
def return_results(results: V2RerankResponse, documents: list[dict]):
    # result: V2RerankResponseResultsItem
    for idx, result in enumerate(results.results):
        print(f"Rank: {idx+1}")
        print(f"Score: {result.relevance_score}")
        print(f"Document: {documents[result.index]}\n")

In [7]:
return_results(results, faqs)

Rank: 1
Score: 0.115670934
Document: {'text': 'Health and Wellness Benefits: We care about your well-being and offer gym memberships, on-site yoga classes, and comprehensive health insurance.'}

Rank: 2
Score: 0.01729751
Document: {'text': 'Working from Abroad: Working remotely from another country is possible. Simply coordinate with your manager and ensure your availability during core hours.'}

Rank: 3
Score: 0.01667148
Document: {'text': 'Performance Reviews Frequency: We conduct informal check-ins every quarter and formal performance reviews twice a year.'}



In [8]:
query = "هل هناك مزايا تتعلق باللياقة البدنية؟"  # Are there fitness benefits?
# Rerank the documents
results = co.rerank(
    model="rerank-v3.5",
    query=query,
    documents=faqs,
    top_n=2,
)
return_results(results, faqs)

Rank: 1
Score: 0.22864738
Document: {'text': 'Health and Wellness Benefits: We care about your well-being and offer gym memberships, on-site yoga classes, and comprehensive health insurance.'}

Rank: 2
Score: 0.01610483
Document: {'text': 'Working from Abroad: Working remotely from another country is possible. Simply coordinate with your manager and ensure your availability during core hours.'}



In [9]:
query = "What is the capital of the United States?"
docs = [
    "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.",
    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.",
    "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.",
    "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.",
    "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.",
]
results = co.rerank(
    model="rerank-v3.5", query=query, documents=docs, top_n=5, return_documents=True
)

In [11]:
rerank_results = results.results
for result in rerank_results:
    print(type(result))
    print(result.document.text, result.relevance_score, result.index)
    print("---"*10)

<class 'cohere.v2.types.v2rerank_response_results_item.V2RerankResponseResultsItem'>
Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America. 0.8858788 3
------------------------------
<class 'cohere.v2.types.v2rerank_response_results_item.V2RerankResponseResultsItem'>
Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas. 0.23744369 2
------------------------------
<class 'cohere.v2.types.v2rerank_response_results_item.V2RerankResponseResultsItem'>
Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274. 0.21675707 0
-------------------------

## Semi-structured Data

In [12]:
# Define the documents
emails = [
    {
        "from": "hr@co1t.com",
        "to": "david@co1t.com",
        "date": "2024-06-24",
        "subject": "A Warm Welcome to Co1t!",
        "text": "We are delighted to welcome you to the team! As you embark on your journey with us, you'll find attached an agenda to guide you through your first week.",
    },
    {
        "from": "it@co1t.com",
        "to": "david@co1t.com",
        "date": "2024-06-24",
        "subject": "Setting Up Your IT Needs",
        "text": "Greetings! To ensure a seamless start, please refer to the attached comprehensive guide, which will assist you in setting up all your work accounts.",
    },
    {
        "from": "john@co1t.com",
        "to": "david@co1t.com",
        "date": "2024-06-24",
        "subject": "First Week Check-In",
        "text": "Hello! I hope you're settling in well. Let's connect briefly tomorrow to discuss how your first week has been going. Also, make sure to join us for a welcoming lunch this Thursday at noon—it's a great opportunity to get to know your colleagues!",
    },
]


In [14]:
# Convert the documents to YAML format
yaml_docs: list[str] = [yaml.dump(doc, sort_keys=False) for doc in emails]
for doc in yaml_docs:
    print(type(doc))
    print(doc)

<class 'str'>
from: hr@co1t.com
to: david@co1t.com
date: '2024-06-24'
subject: A Warm Welcome to Co1t!
text: We are delighted to welcome you to the team! As you embark on your journey with
  us, you'll find attached an agenda to guide you through your first week.

<class 'str'>
from: it@co1t.com
to: david@co1t.com
date: '2024-06-24'
subject: Setting Up Your IT Needs
text: Greetings! To ensure a seamless start, please refer to the attached comprehensive
  guide, which will assist you in setting up all your work accounts.

<class 'str'>
from: john@co1t.com
to: david@co1t.com
date: '2024-06-24'
subject: First Week Check-In
text: "Hello! I hope you're settling in well. Let's connect briefly tomorrow to discuss\
  \ how your first week has been going. Also, make sure to join us for a welcoming\
  \ lunch this Thursday at noon\u2014it's a great opportunity to get to know your\
  \ colleagues!"



In [15]:
# Add the user query
query = "Any email about check ins?"

# Rerank the documents
results = co.rerank(
    model="rerank-v3.5",
    query=query,
    documents=yaml_docs,
    top_n=2,
)

In [16]:
return_results(results, emails)

Rank: 1
Score: 0.73868835
Document: {'from': 'john@co1t.com', 'to': 'david@co1t.com', 'date': '2024-06-24', 'subject': 'First Week Check-In', 'text': "Hello! I hope you're settling in well. Let's connect briefly tomorrow to discuss how your first week has been going. Also, make sure to join us for a welcoming lunch this Thursday at noon—it's a great opportunity to get to know your colleagues!"}

Rank: 2
Score: 0.11693554
Document: {'from': 'hr@co1t.com', 'to': 'david@co1t.com', 'date': '2024-06-24', 'subject': 'A Warm Welcome to Co1t!', 'text': "We are delighted to welcome you to the team! As you embark on your journey with us, you'll find attached an agenda to guide you through your first week."}



## Tabular Data

In [17]:
# Create a demo CSV file
data = """name,role,join_date,email,status
Rebecca Lee,Senior Software Engineer,2024-07-01,rebecca@co1t.com,Full-time
Emma Williams,Product Designer,2024-06-15,emma@co1t.com,Full-time
Michael Jones,Marketing Manager,2024-05-20,michael@co1t.com,Full-time
Amelia Thompson,Sales Representative,2024-05-20,amelia@co1t.com,Part-time
Ethan Davis,Product Designer,2024-05-25,ethan@co1t.com,Contractor"""
data_csv = StringIO(data)
# Load the CSV file
df = pd.read_csv(data_csv)
df.head(1)

Unnamed: 0,name,role,join_date,email,status
0,Rebecca Lee,Senior Software Engineer,2024-07-01,rebecca@co1t.com,Full-time


In [18]:
# Define the documents
employees: list[dict] = df.to_dict("records")

# Convert the documents to YAML format
yaml_docs: list[str] = [yaml.dump(doc, sort_keys=False) for doc in employees]

# Add the user query
query = "Any full-time product designers who joined recently?"

# Rerank the documents
results = co.rerank(
    model="rerank-v3.5",
    query=query,
    documents=yaml_docs,
    top_n=1,
)

In [19]:
return_results(results, employees)

Rank: 1
Score: 0.81875163
Document: {'name': 'Emma Williams', 'role': 'Product Designer', 'join_date': '2024-06-15', 'email': 'emma@co1t.com', 'status': 'Full-time'}



# LangChain Example

In [28]:
# Helper function for printing docs
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [29]:
documents = TextLoader("data/state_of_the_union.txt").load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = text_splitter.split_documents(documents)

In [30]:
# oai_embedding = OpenAIEmbeddings(model="text-embedding-3-small")
cohere_embedding = CohereEmbeddings(model="embed-v4.0")
retriever = FAISS.from_documents(texts, cohere_embedding).as_retriever(
    search_kwargs={"k": 20}
)

In [31]:
query = "What did the president say about Ketanji Brown Jackson"

In [32]:
docs = retriever.invoke(query)
pretty_print_docs(docs)

Document 1:

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.
----------------------------------------------------------------------------------------------------
Document 2:

As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. 

While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice.
----------------------------------------------------------------------------------------------------
Document 3:

A for

## Cohere Reranking

In [37]:
pure_retrieved_docs = retriever.invoke(query)
pretty_print_docs(pure_retrieved_docs)

Document 1:

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.
----------------------------------------------------------------------------------------------------
Document 2:

As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. 

While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice.
----------------------------------------------------------------------------------------------------
Document 3:

A for

In [38]:
compressor = CohereRerank(top_n=5, model="rerank-v3.5")
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(query)
pretty_print_docs(compressed_docs)

Document 1:

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.
----------------------------------------------------------------------------------------------------
Document 2:

He will never extinguish their love of freedom. He will never weaken the resolve of the free world. 

We meet tonight in an America that has lived through two of the hardest years this nation has ever faced. 

The pandemic has been punishing. 

And so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. 

I understand.
----------------------------------------------------------------------------------------------------
Document 3:

I spoke with their families

In [39]:
cohere_llm = ChatCohere(model="command-a-03-2025", temperature=0)

chain = RetrievalQA.from_chain_type(
    llm=cohere_llm, retriever=compression_retriever
)

In [40]:
chain.invoke({"query": query})
# chain({"question": query})

{'query': 'What did the president say about Ketanji Brown Jackson',
 'result': 'The president described Ketanji Brown Jackson as "one of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence." He highlighted her background as a former top litigator in private practice, a former federal public defender, and her family ties to public school educators and police officers. He also emphasized her ability as a "consensus builder," noting the broad range of support she has received, including from the Fraternal Order of Police and former judges appointed by both Democrats and Republicans.'}