## Setup

In [None]:
import os
from dotenv import load_dotenv
from llama_index.llms.cohere import Cohere

load_dotenv()
api_key = os.getenv("API_KEY")

### Call `complete` with a prompt

In [None]:
llm = Cohere(api_key=api_key)

response = llm.complete("Paul Graham is")
print(response) # An influential entrepreneur and investor, Paul Graham has significantly shaped the world of computer

### Call `chat` with a list of messages

In [None]:
from llama_index.core.llms import ChatMessage, MessageRole

messages = [
    ChatMessage(role=MessageRole.USER, content="Hello there"),
    ChatMessage(role=MessageRole.ASSISTANT, content="Arrrr, matey! How can I help ye today?"),
    ChatMessage(role=MessageRole.USER, content="What is your name")
]

response = llm.chat(messages=messages)
print(response)

### Streaming

In [None]:
# stream_complete
response = llm.stream_complete("Paul Graham is ")
for chunk in response:
    print(chunk.delta, end="")

In [None]:
# stream_chat
response = llm.stream_chat(messages=messages)
for chunk in response:
    print(chunk.delta, end="")

### Async

In [None]:
response = await llm.acomplete("Paul Graham is")
print(response)

In [None]:
from llama_index.embeddings.cohere import CohereEmbedding

embeddings = CohereEmbedding(cohere_api_key=api_key)
response1 = embeddings.get_query_embedding("This is a first document.")

print(response1) # [0.45874023, -1.4619141, 1.0244141, -1.0947266, 1.1396484,

In [None]:
response2 = embeddings.get_query_embedding("This is a first duc")
response3 = embeddings.get_query_embedding("Why did you think so?")

similarity1 = embeddings.similarity(response1, response2)
similarity2 = embeddings.similarity(response1, response3)

print(similarity1)
print(similarity2)