## Environment Setup

In [33]:
import os
from dotenv import load_dotenv 
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_cohere import CohereEmbeddings
from langchain_cohere import ChatCohere
from langchain.chains import RetrievalQA
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone


In [34]:
load_dotenv()

True

## load document 

In [35]:
def read_doc(directory):
    file_loader=PyPDFDirectoryLoader(directory)
    documents=file_loader.load()
    return documents

In [None]:
docs=read_doc('documents/')


[Document(metadata={'producer': 'Microsoft® Word LTSC', 'creator': 'Microsoft® Word LTSC', 'creationdate': '2025-02-13T13:17:53+01:00', 'author': 'Anthony Orji', 'moddate': '2025-02-13T13:17:53+01:00', 'source': 'documents\\To the Love of my life.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content="To the Love of my life,  \nthe one whose smile lightens up my mood, whose voice, so calm and tender, \nmakes me feel like a child in the embrace of her father. I can’t completely \ndescribe how I feel anytime I hear your voic e, the way it speaks to my \nsoul, the positivity I feel, the consciousness that this feeling happens only \nbecause of you. I love your cheerfulness, your happiness, the way you find \nenergy to put up a smiling face  anytime we talk.  I really find it  \nfascinating and I pray I get to see this side of you forever. \nWhen the year started, I never imagined I w ould have a reason to write \nthis to anyone. The year started with a mixed feeling because I

In [37]:
def chunk_data(docs,chunk_size=100,chunk_overlap=20):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    doc=text_splitter.split_documents(docs)
    return doc

## split text into chunks

In [38]:
documents = chunk_data(docs=docs)

## embeddings and vector store

In [39]:
embeddings=CohereEmbeddings(model="embed-english-light-v3.0",
                            cohere_api_key=os.environ['COHERE_API_KEY'])
embeddings

CohereEmbeddings(client=<cohere.client.Client object at 0x00000187B578DB50>, async_client=<cohere.client.AsyncClient object at 0x00000187B585C290>, model='embed-english-light-v3.0', truncate=None, cohere_api_key=SecretStr('**********'), embedding_types=['float'], max_retries=3, request_timeout=None, user_agent='langchain:partner', base_url=None)

In [40]:
vectors=embeddings.embed_query("what does the writer love about the recipient?")

In [41]:
len(vectors)

384

In [42]:


pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])

index_name = "pineconedemo"

if not pc.has_index(index_name):
    pc.create_index_for_model(
        name=index_name,
        cloud="aws",
        region="us-east-1",
        embed={
            "model":"llama-text-embed-v2",
            "field_map":{"text": "chunk_text"}
        }
    )

## query the vector database

In [43]:
vectorstore_from_docs = PineconeVectorStore.from_documents(
        documents,
        index_name=index_name,
        embedding=embeddings
    )
retriever = vectorstore_from_docs.as_retriever()

In [44]:


llm = ChatCohere(model="command-r", cohere_api_key=os.environ['COHERE_API_KEY'])


In [45]:
system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise.\n\n"
    "Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [46]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)


In [None]:
query = "what does the writer love about the recipient?"
response = chain.invoke({"input": query})
#print(response)

{'input': 'what does the writer love about the recipient?', 'context': [Document(id='0fcd0f0f-3373-45a4-83e0-b9f11fdc5e53', metadata={'author': 'Anthony Orji', 'creationdate': '2025-02-13T13:17:53+01:00', 'creator': 'Microsoft® Word LTSC', 'moddate': '2025-02-13T13:17:53+01:00', 'page': 1.0, 'page_label': '2', 'producer': 'Microsoft® Word LTSC', 'source': 'documents\\To the Love of my life.pdf', 'total_pages': 4.0}, page_content='Now I know what it feels to have someone genuinely care so much about'), Document(id='26394cac-fe87-493f-9905-186bf10c33c3', metadata={'author': 'Anthony Orji', 'creationdate': '2025-02-13T13:17:53+01:00', 'creator': 'Microsoft® Word LTSC', 'moddate': '2025-02-13T13:17:53+01:00', 'page': 1.0, 'page_label': '2', 'producer': 'Microsoft® Word LTSC', 'source': 'documents\\To the Love of my life.pdf', 'total_pages': 4.0}, page_content='Now I know what it feels to have someone genuinely care so much about'), Document(id='3fda264b-a2ad-4484-b51f-5ff7c3ce996e', metada