In [7]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

# 1. Load and Split the Ramayana PDF
loader = PyMuPDFLoader("data/RAMAYANA.pdf")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
splits = text_splitter.split_documents(docs)




In [8]:
# 2. Create a vector store
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2",
                                        model_kwargs={"device": "cpu"})
vectorstore = FAISS.from_documents(splits, embedding)



In [9]:
# 3. Define LLM and RetrievalQA chain
llm = ChatOpenAI(temperature=0, model_name="gpt-4")
retriever = vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)



In [10]:
# 4. Ask a question
query = "Why did Rama go to the forest?"
response = qa_chain(query)
generated_answer = response['result']
context_pages = response['source_documents']

# Show response
print("\n=== Generated Answer ===")
print(generated_answer)




=== Generated Answer ===
Rama went to the forest to carry out his father's promises. It was his duty to fulfill these promises, despite the hardship it would cause him.


In [11]:
# 5. Prepare the evaluation prompt
scoring_prompt_template = PromptTemplate.from_template("""
You are a strict evaluator. Your task is to evaluate the quality of the following answer on 3 criteria:

1. Completeness (Is the answer complete and does it cover all relevant aspects?)
2. Factuality (Is the answer factually correct based on the context provided?)
3. Fluency (Is the answer grammatically correct, coherent, and easy to read?)

Return your result in JSON format as follows:
{{
  "completeness": "<score out of 5>",
  "factuality": "<score out of 5>",
  "fluency": "<score out of 5>",
  "comments": "<brief feedback>"
}}

### Context:
{context}

### Answer:
{answer}
""")



In [12]:
# 6. Prepare context for scoring
context_text = "\n\n".join([doc.page_content[:1000] for doc in context_pages[:2]])  # Limit for brevity
scoring_prompt = scoring_prompt_template.format(context=context_text, answer=generated_answer)

# 7. Score the generated answer
scoring_llm = ChatOpenAI(model_name="gpt-4", temperature=0)
scoring_result = scoring_llm.predict(scoring_prompt)

# 8. Display scoring results
print("\n=== LLM-Based Scoring ===")
print(scoring_result)

  scoring_result = scoring_llm.predict(scoring_prompt)



=== LLM-Based Scoring ===
{
  "completeness": "3 out of 5",
  "factuality": "5 out of 5",
  "fluency": "5 out of 5",
  "comments": "The answer is factually correct and fluently written, but it does not cover all aspects of the context. It does not mention Rama's mother's sadness, Bharatha's rule over Ayodhya, or Rama's decision to leave Chitrkut."
}
