In [1]:
!pip install cohere

Collecting cohere
  Downloading cohere-5.5.8-py3-none-any.whl (173 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.8/173.8 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting boto3<2.0.0,>=1.34.0 (from cohere)
  Downloading boto3-1.34.138-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastavro<2.0.0,>=1.9.4 (from cohere)
  Downloading fastavro-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.21.2 (from cohere)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx-sse<0.5.0,>=0.4.0 (from cohere)
  Downloading httpx_sse-0.4.0-py3-no

In [2]:
from google.colab import userdata

In [3]:
import cohere
co = cohere.Client(api_key=userdata.get('COHERE_API_KEY'))

co.chat(
  model="command-r-plus",
  message="Where do the tallest penguins live?",
  documents=[
    {"title": "Tall penguins", "snippet": "Emperor penguins are the tallest."},
    {"title": "Penguin habitats", "snippet": "Emperor penguins only live in Antarctica."},
    {"title": "What are animals?", "snippet": "Animals are different from plants."}
  ])



In [4]:
!pip install wikipedia --quiet

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone


In [5]:
import wikipedia

In [6]:
article = wikipedia.page('Dune Part Two')
text = article.content
print(f"The text has roughly {len(text.split())} words.")

The text has roughly 5918 words.


In [7]:
# For chunking let's use langchain to help us split the text
%pip install -qU langchain-text-splitters --quiet
from langchain_text_splitters import RecursiveCharacterTextSplitter

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.4/337.4 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.5/127.5 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.1/141.1 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [8]:
# Create basic configurations to chunk the text
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)

# Split the text into chunks with some overlap
chunks_ = text_splitter.create_documents([text])
chunks = [c.page_content for c in chunks_]
print(f"The text has been broken down in {len(chunks)} chunks.")

The text has been broken down in 113 chunks.


In [9]:
# Because the texts being embedded are the chunks we are searching over, we set the input type as search_doc
model="embed-english-v3.0"
response = co.embed(
    texts= chunks,
    model=model,
    input_type="search_document",
    embedding_types=['float']
)
embeddings = response.embeddings.float
print(f"We just computed {len(embeddings)} embeddings.")

We just computed 113 embeddings.


In [10]:
# We use the simplest vector database ever: a python dictionary
!pip install numpy --quiet

In [11]:
import numpy as np
vector_database = {i: np.array(embedding) for i, embedding in enumerate(embeddings)}

In [12]:
query = "Name everyone involved in writing the script, directing, and producing 'Dune: Part Two'?"

In [13]:
# Because the text being embedded is the search query, we set the input type as search_query
response = co.embed(
    texts=[query],
    model=model,
    input_type="search_query",
    embedding_types=['float']
)
query_embedding = response.embeddings.float[0]
print("query_embedding: ", query_embedding)

query_embedding:  [-0.068603516, -0.02947998, -0.06274414, -0.015449524, -0.033294678, 0.0056877136, -0.047210693, 0.04714966, -0.024871826, 0.008148193, 0.0770874, 0.023880005, -0.058685303, -0.052520752, 0.012832642, 0.024398804, 0.0053215027, 0.035491943, 0.02961731, -0.0069847107, 0.01083374, -0.0011358261, -0.002199173, 0.018417358, 0.027389526, -0.002691269, -0.026535034, 0.015197754, 0.024368286, 0.03729248, 0.0057754517, -0.02229309, -0.014694214, 0.019989014, -0.0036315918, -0.013793945, 0.02835083, 0.006011963, 0.011428833, 0.008682251, 0.046142578, -0.040039062, -0.032196045, -0.002653122, -0.012580872, -0.0041618347, 0.03111267, -0.016799927, 0.014801025, -0.00030636787, -0.033050537, 0.033966064, -0.016021729, -0.025009155, -0.007534027, -0.017074585, 0.008415222, -0.10620117, 0.019195557, -0.015686035, -0.0043182373, -0.045440674, 0.05404663, 0.030776978, -0.014129639, -0.01499939, -0.007286072, 0.009933472, 0.06390381, 0.02444458, -0.010345459, 0.041931152, 0.032989502, 

In [14]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Calculate similarity between the user question & each chunk
similarities = [cosine_similarity(query_embedding, chunk) for chunk in embeddings]
print("similarity scores: ", similarities)

# Get indices of the top 10 most similar chunks
sorted_indices = np.argsort(similarities)[::-1]

# Keep only the top 10 indices
top_indices = sorted_indices[:10]
print("Here are the indices of the top 10 chunks after retrieval: ", top_indices)

# Retrieve the top 10 most similar chunks
top_chunks_after_retrieval = [chunks[i] for i in top_indices]
print("Here are the top 10 chunks after retrieval: ")
for t in top_chunks_after_retrieval:
    print("== " + t)

similarity scores:  [0.6875888718302366, 0.39269418952694124, 0.6864932971056473, 0.3145181964299242, 0.4413361575890713, 0.24433414846393067, 0.40676914179902307, 0.281908452794666, 0.31863783609279156, 0.2958806029346864, 0.425560940613036, 0.1650776631876737, 0.3997435318744615, 0.3750338306016755, 0.40399861875637233, 0.32328429500327127, 0.3207388394289174, 0.3413780643673337, 0.23206035063597272, 0.49714879938520146, 0.34348653658592415, 0.28827907305792067, 0.5791464776697353, 0.5490710710857941, 0.7784154002245387, 0.5236541350938924, 0.5483334106328851, 0.714889033919447, 0.5205381827616469, 0.5881525276753472, 0.2649990410674255, 0.6410858426784225, 0.5369446831119411, 0.6821327220127418, 0.3900514608549773, 0.4824043505431881, 0.4509175921745556, 0.24611453192320207, 0.44276081774137727, 0.3915032292959836, 0.20537145812198718, 0.43674179465662816, 0.3752260732974342, 0.46209230119742634, 0.30051869114968277, 0.36398118915824396, 0.3599629755871494, 0.39313887429683014, 0.23

In [27]:
response = co.rerank(
    query=query,
    documents=top_chunks_after_retrieval,
    top_n=3,
    model="rerank-english-v2.0",
)

In [33]:
## Was unable to figure out the bug here. I think the API may have been updated since the notebook was run.##
top_chunks_after_rerank = [result for result in response]
print("Here are the top 3 chunks after rerank: ")
for t in top_chunks_after_rerank:
    print("== " + t)

Here are the top 3 chunks after rerank: 


TypeError: can only concatenate str (not "tuple") to str

In [40]:
# preamble containing instructions about the task and the desired style for the output.
preamble = """
## Task & Context
You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.

## Style Guide
Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.
"""

In [42]:
# retrieved documents
documents = [
    {"title": "chunk 0", "snippet": top_chunks_after_retrieval[0]},
    {"title": "chunk 1", "snippet": top_chunks_after_retrieval[1]},
    {"title": "chunk 2", "snippet": top_chunks_after_retrieval[2]},
  ]

# get model response
response = co.chat(
  message=query,
  documents=documents,
  preamble=preamble,
  model="command-r",
  temperature=0.3
)

print("Final answer:")
print(response.text)

Final answer:
Here's a list of everyone involved in writing the script, directing, and producing 'Dune: Part Two'.
- Denis Villeneuve (director and producer)
- Jon Spaihts (screenwriter and producer)
- Mary Parent (producer)
- Cale Boyter (producer)

The following people were also credited as executive producers:
- Tanya Lapointe
- Brian Herbert
- Byron Merritt
- Kim Herbert
- Thomas Tull
- Richard P. Rubinstein
- John Harrison
- Herbert W. Gain
Alternatively, the following statement credits the entire production team:
> Dune: Part Two was produced by Villeneuve, Mary Parent, and Cale Boyter, with Tanya Lapointe, Brian Herbert, Byron Merritt, Kim Herbert, Thomas Tull, Jon Spaihts, Richard P. Rubinstein, John Harrison, and Herbert W. Gain serving as executive producers


In [43]:
print("Citations that support the final answer:")
for cite in response.citations:
  print(cite)

Citations that support the final answer:
start=103 end=119 text='Denis Villeneuve' document_ids=['doc_1', 'doc_2']
start=120 end=129 text='(director' document_ids=['doc_2']
start=134 end=142 text='producer' document_ids=['doc_2']
start=146 end=157 text='Jon Spaihts' document_ids=['doc_0', 'doc_2']
start=158 end=171 text='(screenwriter' document_ids=['doc_2']
start=176 end=184 text='producer' document_ids=['doc_0']
start=188 end=199 text='Mary Parent' document_ids=['doc_0']
start=200 end=210 text='(producer)' document_ids=['doc_0']
start=213 end=224 text='Cale Boyter' document_ids=['doc_0']
start=225 end=235 text='(producer)' document_ids=['doc_0']
start=280 end=299 text='executive producers' document_ids=['doc_0']
start=303 end=317 text='Tanya Lapointe' document_ids=['doc_0']
start=320 end=333 text='Brian Herbert' document_ids=['doc_0']
start=336 end=349 text='Byron Merritt' document_ids=['doc_0']
start=352 end=363 text='Kim Herbert' document_ids=['doc_0']
start=366 end=377 text='Thoma