In [1]:
%pip install "cohere<5" --quiet

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import cohere

In [None]:
API_KEY = "COHERE_API_KEY"

In [4]:
co = cohere.Client(API_KEY)

In [5]:
!pip install wikipedia --quiet
import wikipedia


[notice] A new release of pip is available: 24.2 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
article = wikipedia.page('Wild Robot', auto_suggest = False)
text = article.content
print(f"The article is titled: {article.title}")
print(f"The text has roughly {len(text.split())} words.")

The article is titled: The Wild Robot
The text has roughly 3065 words.


In [7]:
%pip install -qU langchain-text-splitters --quiet
from langchain_text_splitters import RecursiveCharacterTextSplitter

Note: you may need to restart the kernel to use updated packages.


In [8]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)
chunks_ = text_splitter.create_documents([text])
chunks = [c.page_content for c in chunks_]
print(f"The text has been broken down in {len(chunks)} chunks.")

The text has been broken down in 63 chunks.


In [9]:
model="embed-english-v3.0"
response = co.embed(
    texts=chunks,
    model=model,
    input_type="search_document",
    embedding_types=['float']
)
embeddings = response.embeddings.float
print(f"We just computed {len(embeddings)} embeddings.")


We just computed 63 embeddings.


In [10]:
!pip install numpy
import numpy as np

Collecting numpy
  Using cached numpy-2.3.2-cp312-cp312-win_amd64.whl.metadata (60 kB)
Using cached numpy-2.3.2-cp312-cp312-win_amd64.whl (12.8 MB)
Installing collected packages: numpy
Successfully installed numpy-2.3.2



[notice] A new release of pip is available: 24.2 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
vector_database = {i: np.array(embedding) for i, embedding in enumerate(embeddings)}


In [12]:
query = "Who is the director of the movie The Wild Robot? List out the names of all actors in the movie and their role in the movie?"


In [13]:
response = co.embed(
    texts=[query],
    model=model,
    input_type="search_query",
    embedding_types=['float']
)
query_embedding = response.embeddings.float[0]
print("query_embedding: ", query_embedding)


query_embedding:  [-0.039245605, -0.0029792786, -0.057525635, 0.013801575, 0.010322571, -0.03668213, 0.03781128, 0.03201294, -0.04437256, 0.022888184, -0.0032081604, 0.02458191, -0.09240723, -0.012565613, 0.0064926147, -0.063964844, 0.0027065277, 0.09680176, 0.08782959, 0.022659302, 0.021636963, 0.038269043, -0.03338623, -0.035888672, 0.043762207, -0.010627747, -0.017608643, -0.011962891, 0.014465332, 0.018310547, 0.014968872, 0.011802673, 0.0132369995, 0.0018453598, 0.00020682812, -0.024505615, 0.0065307617, -0.011146545, 0.0009727478, -0.012565613, 0.008514404, -0.0022830963, 0.0026721954, 0.00075387955, -0.016677856, -0.012161255, 0.021606445, -0.007129669, 0.031951904, 0.016281128, -0.02067566, -0.021850586, -0.012939453, -0.02293396, -0.038146973, -0.037750244, -0.0022506714, -0.06390381, 0.015838623, 0.047821045, 0.013153076, -0.0069885254, 0.012237549, -0.02848816, 0.0030879974, -0.0011091232, 0.0158844, 0.01763916, 0.056365967, 0.0009160042, -0.031433105, 0.035491943, 0.0209503

In [14]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

similarities = [cosine_similarity(query_embedding, chunk) for chunk in embeddings]
print("similarity scores: ", similarities)

sorted_indices = np.argsort(similarities)[::-1]
top_indices = sorted_indices[:10]
print("Here are the indices of the top 10 chunks after retrieval: ", top_indices)

top_chunks_after_retrieval = [chunks[i] for i in top_indices]
print("Here are the top 10 chunks after retrieval: ")
for t in top_chunks_after_retrieval:
    print("== " + t)



similarity scores:  [np.float64(0.7816325949972139), np.float64(0.15574661290089367), np.float64(0.3327048454848402), np.float64(0.6050063766120897), np.float64(0.18578365447234477), np.float64(0.2800438304298471), np.float64(0.29853918913290883), np.float64(0.17814177627071265), np.float64(0.18311135456179842), np.float64(0.06064340689027584), np.float64(0.2050079764886597), np.float64(0.10976479378468372), np.float64(0.27122340937427425), np.float64(0.3402325874140159), np.float64(0.16118026869699917), np.float64(0.17712139135269347), np.float64(0.30036985663166815), np.float64(0.32481583620825183), np.float64(0.2919675757621855), np.float64(0.41239239261819843), np.float64(0.37172499253381985), np.float64(0.19626280948495775), np.float64(0.6364201488870006), np.float64(0.320562737748284), np.float64(0.2187956657190147), np.float64(0.2531005925704008), np.float64(0.2468638244984641), np.float64(0.33007202740478314), np.float64(0.29196991710997444), np.float64(0.5095763215462779), np.

In [15]:
preamble = """
## Task & Context
You help people answer their questions and other requests interactively...
"""

documents = [
    {"title": "chunk 0", "snippet": top_chunks_after_retrieval[0]},
    {"title": "chunk 1", "snippet": top_chunks_after_retrieval[1]},
    {"title": "chunk 2", "snippet": top_chunks_after_retrieval[2]},
]

response = co.chat(
  message=query,
  documents=documents,
  preamble=preamble,
  model="command-r",
  temperature=0.3
)

print("Final answer:")
print(response.text)



Final answer:
The Wild Robot is an American animated science fiction movie directed by Chris Sanders. Since it is an animated film, it features the voices of actors rather than live-action performances. The list of voice actors and their respective roles are as follows:
- Lupita Nyong'o
- Role: Robot
- Pedro Pascal
- Kit Connor
- Bill Nighy
- Stephanie Hsu
- Matt Berry
- Ving Rhames
- Mark Hamill
- Catherine O'Hara

The movie is an adaptation of a novel of the same name, written by Peter Brown. Dreamworks Animation produced the film on a budget of $78 million and collaborated with Universal's GreenerLight Program and the Natural Resources Defense Council's Rewrite the Future initiative for the environmental themes depicted in the movie.
