In [26]:
from langchain.retrievers import ContextualCompressionRetriever, CohereRagRetriever
from langchain.retrievers.document_compressors import CohereRerank
from langchain_community.embeddings import CohereEmbeddings
from langchain_community.chat_models import ChatCohere
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma

user_query = "Provide a simple answer: Who is Cora?"
# Create cohere's chat model and embeddings objects
cohere_chat_model = ChatCohere()
cohere_embeddings = CohereEmbeddings()
# Load text files and split into chunks, you can also use data gathered elsewhere in your application
raw_documents = TextLoader('fable.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
# Create a vector store from the documents
db = Chroma.from_documents(documents, cohere_embeddings)
input_docs = db.as_retriever().get_relevant_documents(user_query)

print(input_docs)

# Create the cohere rag retriever using the chat model 
rag = CohereRagRetriever(llm=cohere_chat_model)
docs = rag.get_relevant_documents(
    user_query,
    source_documents=input_docs,
)
# Print the documents
for doc in docs[:-1]:
    print(doc.metadata)
    print("\n\n" + doc.page_content)
    print("\n\n" + "-" * 30 + "\n\n")
# Print the final generation 
answer = docs[-1].page_content
print(answer)
# Print the final citations 
citations = docs[-1].metadata['citations']
print(citations)

Created a chunk of size 292, which is longer than the specified 100
Created a chunk of size 177, which is longer than the specified 100
Created a chunk of size 234, which is longer than the specified 100
Created a chunk of size 107, which is longer than the specified 100
Created a chunk of size 333, which is longer than the specified 100
Created a chunk of size 221, which is longer than the specified 100


[Document(page_content='One day, Reynard observed Cora perched high on a branch, holding a shiny piece of silver in her beak. Curious and intrigued, Reynard decided to put his cunning plan into action.', metadata={'source': 'fable.txt'}), Document(page_content='One day, Reynard observed Cora perched high on a branch, holding a shiny piece of silver in her beak. Curious and intrigued, Reynard decided to put his cunning plan into action.', metadata={'source': 'fable.txt'}), Document(page_content='One day, Reynard observed Cora perched high on a branch, holding a shiny piece of silver in her beak. Curious and intrigued, Reynard decided to put his cunning plan into action.', metadata={'source': 'fable.txt'}), Document(page_content='One day, Reynard observed Cora perched high on a branch, holding a shiny piece of silver in her beak. Curious and intrigued, Reynard decided to put his cunning plan into action.', metadata={'source': 'fable.txt'})]
{'id': 'doc-0', 'snippet': 'One day, Reynard ob

In [23]:
from langchain.retrievers import ContextualCompressionRetriever, CohereRagRetriever
from langchain.retrievers.document_compressors import CohereRerank
from langchain_community.embeddings import CohereEmbeddings
from langchain_community.chat_models import ChatCohere
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma

user_query = "Who is Cora?"
# Create cohere's chat model and embeddings objects
cohere_chat_model = ChatCohere()
cohere_embeddings = CohereEmbeddings()
# Load text files and split into chunks, you can also use data gathered elsewhere in your application
raw_documents = TextLoader('fable.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
# Create a vector store from the documents
db = Chroma.from_documents(documents, cohere_embeddings)
input_docs = db.as_retriever().get_relevant_documents(user_query)

print(input_docs)

# Create the cohere rag retriever using the chat model 
rag = CohereRagRetriever(llm=cohere_chat_model)
docs = rag.get_relevant_documents(
    user_query,
    source_documents=input_docs,
)
# Print the documents
for doc in docs[:-1]:
    print(doc.metadata)
    print("\n\n" + doc.page_content)
    print("\n\n" + "-" * 30 + "\n\n")
# Print the final generation 
answer = docs[-1].page_content
print(answer)
# Print the final citations 
citations = docs[-1].metadata['citations']
print(citations)

Created a chunk of size 292, which is longer than the specified 100
Created a chunk of size 177, which is longer than the specified 100
Created a chunk of size 234, which is longer than the specified 100
Created a chunk of size 107, which is longer than the specified 100
Created a chunk of size 333, which is longer than the specified 100
Created a chunk of size 221, which is longer than the specified 100


I could not find any information about Cora. 
Are you perhaps referring to the character Cora from the children's story Reynard the Fox? 

In this story, Cora is a crow who possesses a shiny piece of silver, which catches the eye of the cunning Reynard fox. 
Cora is typically portrayed perched on a branch with the silver item in her beak, perhaps dropping it, although this is unconfirmed in the sources I could find.


In [3]:
# imports

from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings, OpenAIEmbeddings, CohereEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import Cohere
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader
import os
import dotenv

In [12]:
# set up loader to load text document
loader = TextLoader("fable.txt")

# load data from text file
document = loader.load()
document

[Document(page_content='The Clever Fox and the Gullible Crow\n\nOnce upon a time, in the heart of the enchanted forest, there lived a clever fox named Reynard. Reynard was renowned for his wit and cunning ways, always outsmarting the other animals in the forest. However, there was one creature who seemed immune to his tricks – a gullible crow named Cora.\n\nOne day, Reynard observed Cora perched high on a branch, holding a shiny piece of silver in her beak. Curious and intrigued, Reynard decided to put his cunning plan into action.\n\nApproaching Cora with a sly grin, Reynard spoke, "Oh, magnificent Cora, your feathers shimmer like the morning dew, and your voice is as melodious as a songbird. Surely, you must possess the most beautiful voice in the entire forest."\n\nCora, flattered by the fox\'s words, puffed up her feathers with pride. "Do you really think so?" she asked.\n\nReynard, with a mischievous twinkle in his eye, continued, "Indeed, dear Cora. I am convinced that your voice

In [9]:
# set up splitter to split document into chunks
splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=0,
    length_function=len,
)

# split text into chunks of text
texts = splitter.split_documents(document)

In [10]:
texts

[Document(page_content='The Clever Fox and the Gullible Crow', metadata={'source': 'fable.txt'}),
 Document(page_content='Once upon a time, in the heart of the enchanted forest, there lived a clever fox named Reynard.', metadata={'source': 'fable.txt'}),
 Document(page_content='Reynard was renowned for his wit and cunning ways, always outsmarting the other animals in the', metadata={'source': 'fable.txt'}),
 Document(page_content='forest. However, there was one creature who seemed immune to his tricks – a gullible crow named', metadata={'source': 'fable.txt'}),
 Document(page_content='Cora.', metadata={'source': 'fable.txt'}),
 Document(page_content='One day, Reynard observed Cora perched high on a branch, holding a shiny piece of silver in her', metadata={'source': 'fable.txt'}),
 Document(page_content='beak. Curious and intrigued, Reynard decided to put his cunning plan into action.', metadata={'source': 'fable.txt'}),
 Document(page_content='Approaching Cora with a sly grin, Reynard

In [6]:
# set up embeddings
# embed_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
embed_model = CohereEmbeddings(model="embed-english-light-v3.0")
# embeddings = OpenAIEmbeddings

# embeddings = embed_model.embed_documents(texts)
embeddings = [embed_model.embed_documents(text.page_content) for text in texts]
print(embeddings)

# set up vector database
# db = Chroma.from_documents(texts, embeddings)

ValidationError: 1 validation error for CohereEmbeddings
__root__
  Did not find cohere_api_key, please add an environment variable `COHERE_API_KEY` which contains it, or pass `cohere_api_key` as a named parameter. (type=value_error)

# Attempt 2

In [8]:
# imports

from langchain.retrievers import ContextualCompressionRetriever, CohereRagRetriever
from langchain.retrievers.document_compressors import CohereRerank
from langchain_community.embeddings import CohereEmbeddings
from langchain_community.chat_models import ChatCohere
from langchain.llms import Cohere
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma
from langchain import hub
import os
import dotenv

In [9]:
# API keys

dotenv.load_dotenv()

os.environ["COHERE_API_KEY"] = os.getenv("COHERE_API_KEY")

In [10]:
# load data from text file
docs = TextLoader('fable.txt').load()
print(f"Original docs:")
print(docs)

Original docs:
[Document(page_content='The Clever Fox and the Gullible Crow\n\nOnce upon a time, in the heart of the enchanted forest, there lived a clever fox named Reynard. Reynard was renowned for his wit and cunning ways, always outsmarting the other animals in the forest. However, there was one creature who seemed immune to his tricks – a gullible crow named Cora.\n\nOne day, Reynard observed Cora perched high on a branch, holding a shiny piece of silver in her beak. Curious and intrigued, Reynard decided to put his cunning plan into action.\n\nApproaching Cora with a sly grin, Reynard spoke, "Oh, magnificent Cora, your feathers shimmer like the morning dew, and your voice is as melodious as a songbird. Surely, you must possess the most beautiful voice in the entire forest."\n\nCora, flattered by the fox\'s words, puffed up her feathers with pride. "Do you really think so?" she asked.\n\nReynard, with a mischievous twinkle in his eye, continued, "Indeed, dear Cora. I am convinced 

In [11]:
# setup text splitter to break docs into chunks
splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=0,
    length_function=len,
)

# separate docs into chunks for vector storage
split_docs = splitter.split_documents(docs)
print(f"Split docs:")
print(split_docs)

Split docs:
[Document(page_content='The Clever Fox and the Gullible Crow', metadata={'source': 'fable.txt'}), Document(page_content='Once upon a time, in the heart of the enchanted forest, there lived a clever fox named Reynard. Reynard was renowned for his wit and cunning ways, always outsmarting the other animals in the forest.', metadata={'source': 'fable.txt'}), Document(page_content='However, there was one creature who seemed immune to his tricks – a gullible crow named Cora.', metadata={'source': 'fable.txt'}), Document(page_content='One day, Reynard observed Cora perched high on a branch, holding a shiny piece of silver in her beak. Curious and intrigued, Reynard decided to put his cunning plan into action.', metadata={'source': 'fable.txt'}), Document(page_content='Approaching Cora with a sly grin, Reynard spoke, "Oh, magnificent Cora, your feathers shimmer like the morning dew, and your voice is as melodious as a songbird. Surely, you must possess the most', metadata={'source'

In [12]:
# setup Chroma vector database
db = Chroma.from_documents(
    documents=split_docs,
    embedding=CohereEmbeddings()
)

# setup retriever for similarity search of vector database
retriever = db.as_retriever()

# retrieve docs to check whether embedding, storage and retrieval works as expected
retrieved_docs = retriever.invoke("What type of an animal is Reynard?")
print(retrieved_docs)

[Document(page_content='Once upon a time, in the heart of the enchanted forest, there lived a clever fox named Reynard. Reynard was renowned for his wit and cunning ways, always outsmarting the other animals in the forest.', metadata={'source': 'fable.txt'}), Document(page_content='One day, Reynard observed Cora perched high on a branch, holding a shiny piece of silver in her beak. Curious and intrigued, Reynard decided to put his cunning plan into action.', metadata={'source': 'fable.txt'}), Document(page_content='Reynard, with a mischievous twinkle in his eye, continued, "Indeed, dear Cora. I am convinced that your voice is so enchanting that it could rival the melodious nightingale. But to truly prove your', metadata={'source': 'fable.txt'}), Document(page_content='Approaching Cora with a sly grin, Reynard spoke, "Oh, magnificent Cora, your feathers shimmer like the morning dew, and your voice is as melodious as a songbird. Surely, you must possess the most', metadata={'source': 'fa

In [13]:
# setup large language model to be used
llm = Cohere(model="command-xlarge-nightly", temperature=0.5, max_tokens=800, k=0, p=1)

# setup prompt from LangSmith
prompt = hub.pull("rlm/rag-prompt")

In [14]:
# setup RAG chain with LCEL Runnable protocol
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [15]:
# stream generation from chain
for chunk in rag_chain.stream("What type of an animal is Reynard? Answer in a single word."):
    print(chunk, end="", flush=True)

 Fox