In [44]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from dotenv import load_dotenv
from langchain.schema import Document
from langchain.vectorstores import chroma
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
load_dotenv()


True

In [4]:
pdfdoc = PyPDFLoader("RAMAYANA.pdf")
ramayadocloader = pdfdoc.load()

In [26]:
pdfdoc

<langchain_community.document_loaders.pdf.PyPDFLoader at 0x207b43125a0>

In [None]:
print(len(ramayadocloader))
ramayadocloader

In [11]:
### Initialing the Embedding
embedding =OpenAIEmbeddings()

In [25]:
def clean_doc(doc:Document) -> Document:
    clean_text = " ".join(doc.page_content.split())
    return Document(page_content=clean_text,metadata=doc.metadata)

clean_docs =[clean_doc(doc) for doc in ramayadocloader]

print(len(clean_docs))


45


In [29]:
### Chunking of documents using SemanticChunker

chuncking_rules = SemanticChunker(embeddings=embedding,breakpoint_threshold_amount=0.7)
chunks = chuncking_rules.split_documents(clean_docs)
print(len(chunks))

1185


In [None]:
for index, doc in enumerate(chunks):
    print(f"Chunk Number:{index}")
    print(chunks[index].page_content)


In [35]:
## Initiating the Chromadb
from langchain_community.vectorstores import Chroma
persist_directory ="./ramayanadb"

vectorstore = Chroma.from_documents(
    persist_directory=persist_directory,
    embedding = OpenAIEmbeddings(),
    collection_name ="ramayana_collection",
    documents=chunks
)


In [36]:
print(vectorstore._collection.count())

1185


In [46]:
## Converting Vector as retervier

retriver = vectorstore.as_retriever(
    search_kwarg={"k":3}
)

In [59]:
query ="What is the name of Rama's wife"
response = vectorstore.similarity_search(query,k=1)
for index, doc in enumerate(response):
    print(query)
    print(f"Document{index}")
    print(f"Document Content:{response[index].page_content}")

What is the name of Rama's wife
Document0
Document Content:Her husband Sri Rama is the Lord of the Universe.


In [48]:
### Creating a LLM connection
llm = ChatOpenAI(model="gpt-4o-mini")

### Creating a prompt
system_prompt = """You are assistant for question and answer tasks, your specilist in Hindu mytholy and hindu ramayana. Use the following retrieved context to answer the question
                if you don't know the answer, just say "Sorry, i don't know the answer. user three sentacnes at maximum and please be very polite in answer,
                keep the answer concies. Context:{context}"""

prompt = ChatPromptTemplate(
    [("system",system_prompt),("user","{input}")]

)


In [49]:
prompt

ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='You are assistant for question and answer tasks, your specilist in Hindu mytholy and hindu ramayana. Use the following retrieved context to answer the question\n                if you don\'t know the answer, just say "Sorry, i don\'t know the answer. user three sentacnes at maximum and please be very polite in answer,\n                keep the answer concies. Context:{context}'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])

In [65]:
### Creating document stuff

document_chain = create_stuff_documents_chain(llm,prompt)

### Create document retrival

rag_chain = create_retrieval_chain(retriver,document_chain)
response = rag_chain.invoke({"input":"Name Rama's sibilings names"})



In [66]:
response

{'input': "Name Rama's sibilings names",
 'context': [Document(metadata={'page_label': '17', 'producer': 'Microsoft® Word 2010', 'author': 'Sony', 'source': 'RAMAYANA.pdf', 'subject': 'Compiled by', 'page': 16, 'creationdate': '2013-04-14T19:39:50-07:00', 'total_pages': 45, 'creator': 'Microsoft® Word 2010', 'title': 'RAMAYANA FOR CHILDREN', 'moddate': '2013-04-14T19:39:50-07:00'}, page_content='Rama, my son!'),
  Document(metadata={'moddate': '2013-04-14T19:39:50-07:00', 'source': 'RAMAYANA.pdf', 'producer': 'Microsoft® Word 2010', 'title': 'RAMAYANA FOR CHILDREN', 'subject': 'Compiled by', 'page': 6, 'total_pages': 45, 'creationdate': '2013-04-14T19:39:50-07:00', 'author': 'Sony', 'creator': 'Microsoft® Word 2010', 'page_label': '7'}, page_content="While Rama wed Sita, Uremia was married to Lakshmana and Janaka's nieces Mondovi and Shruakirthi to Bharata and Shatrugna."),
  Document(metadata={'creationdate': '2013-04-14T19:39:50-07:00', 'page_label': '22', 'total_pages': 45, 'subject

In [67]:
print(response["answer"])

Rama's siblings are Lakshmana, Bharata, and Shatrugna. If you have any more questions about them, feel free to ask!


In [63]:
print(response["input"])

Name Rama's sibilings names
