SETTING UP ENVIRONMENT VARIABLES

In [100]:
from dotenv import load_dotenv
load_dotenv()

True

In [101]:
MODEL_NAME = "gpt-3.5-turbo"
#MODEL_NAME = "llama2"

LOAD MODEL

In [77]:
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

if MODEL_NAME.startswith("gpt"):
    llm = ChatOpenAI(model=MODEL_NAME, temperature=0.0)
    embeddings = OpenAIEmbeddings()
else:
    llm = Ollama(model = MODEL_NAME)
    embeddings = OllamaEmbeddings()


CREATING PROMPT

In [43]:
from langchain.prompts import PromptTemplate

template = '''
Answer the question based on the context given below. If you can't answer the question, reply "I can't answer this as this is beyond the context provided to me."

context: {context}

question: {question}
'''

prompt_template = PromptTemplate.from_template(template)

LOAD DOCUMENT

In [59]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("Ramayana Short Story.pdf")
data = loader.load()
data

[Document(page_content="Ramayana Story in ShortThe epic Ramayana has been told in countless ways in several Indian and\nSoutheast Asian languages and geographical locations. Nonetheless, the\nfollowing are the two most well-known and signiﬁcant versions:\nThe Valmiki Ramayana: Written by the sage Valmiki, this version is regarded as\nthe oldest and original. It provides a thorough and objective narrative and is\nwritten in Sanskrit.\nTulsi Ramayana or Ramcharitmanas: Written in the sixteenth century by the\npoet-saint Tulsidas. Composed in the Hindi dialect of Awadhi, a shortened and\nmore concise rendition that emphasizes Ram, Sita, and Lakshmana's emotional10/06/2024, 02:58 Ramayana Short Story\nhttps://blog.jkyog.org/ramayana-short-story/ 2/17", metadata={'source': 'Ramayana Short Story.pdf', 'page': 0}),
 Document(page_content="Chapter 1 - The Bal Kand of\nRamayana\nRam's Birthjourney. This narrative is is poetic and aims for the reader to develop love for\nGod.\nFor the purposes o

CHUNKING DATA

In [58]:
def chunk_data(data, chunk_size=256):
  from langchain.text_splitter import RecursiveCharacterTextSplitter
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=20)
  chunks = text_splitter.split_documents(data)
  
  return chunks

In [61]:
documents = chunk_data(data)
print(f"Total number of chunks: {len(documents)}")

Total number of chunks: 104


CREATE PINECONE EMBEDDINGS

In [62]:
def insert_or_fetch_embeddings(index_name, chunks, embeddings):
  import pinecone
  from langchain_community.vectorstores import Pinecone
  from pinecone import PodSpec
  pc = pinecone.Pinecone()

  if index_name in pc.list_indexes().names():
    print(f'Index {index_name} already exists. Loading embeddings ...', end='')
    vectorstore = Pinecone.from_existing_index(index_name, embeddings)
    print('Ok')
  else:
    print(f'Creating index {index_name} and embeddings ...', end='')
    pc.create_index(name=index_name, dimension=1536, metric="cosine", spec=PodSpec(environment="gcp-starter"))
    vectorstore = Pinecone.from_documents(chunks, embeddings, index_name=index_name)
    print('Ok')

  return vectorstore

In [63]:
index_name = 'ramayana'
vectorstore = insert_or_fetch_embeddings(index_name, documents, embeddings)

Creating index ramayana and embeddings ...Ok


In [64]:
retriever = vectorstore.as_retriever()
retriever.invoke("Tell me about Arjun")

[Document(page_content='https://blog.jkyog.org/ramayana-short-story/ 6/17', metadata={'page': 4.0, 'source': 'Ramayana Short Story.pdf'}),
 Document(page_content='https://blog.jkyog.org/ramayana-short-story/ 13/17', metadata={'page': 11.0, 'source': 'Ramayana Short Story.pdf'}),
 Document(page_content='Sugriv Challanges Bali for Battle\nSugriv gets Ready With His Army to Search\nfor Sita\nHanuman is Reminded of His Extraordinary\nPowers by JambavanSugriv shares his story of how his brother Bali deceived him. They both agree to\nhelp each other.', metadata={'page': 8.0, 'source': 'Ramayana Short Story.pdf'}),
 Document(page_content='Everyone in the kingdom adores Ram, and He is said to be gorgeous, clever, and\nkind.\nWhen the sage Vishwamitra arrives in Ayodhya, he begs Ram for assistance in\ndefending his yagya (ﬁre sacriﬁce) from demons that interfere with the', metadata={'page': 2.0, 'source': 'Ramayana Short Story.pdf'})]

Plot similarity between Questions and embedding vectors

In [122]:
from openai import OpenAI
client = OpenAI()

def get_embedding(text, model="text-embedding-ada-002"):
    response = client.embeddings.create(
    input=text,
    model=model
)
    embedding = response.data[0].embedding
    
    return embedding

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pinecone

pc = pinecone.Pinecone()
index = pc.Index(index_name)

questions = ["Tell me about Ram and Ravan"]
embed = list(get_embedding(questions[0]))
results = index.query(embeddings=embed, top_k=10)
similarity_matrix = cosine_similarity(results)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot similarity matrix
plt.figure(figsize=(10, 8))
sns.heatmap(similarity_matrix, annot=True, xticklabels=[questions], yticklabels=questions, cmap='coolwarm')
plt.title('Question Similarity Heatmap')
plt.show()

CREATING CHAIN

In [87]:
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter

parser = StrOutputParser()

chain =(
        {
            "context": itemgetter("question") | retriever, "question": itemgetter("question")
        }
        | prompt_template 
        | llm 
        | parser)
for chr in chain.stream({
    "question":"Why ram and ravan are fighting?"
}):
    print(chr, end="", flush=True)

Ram and Ravan are fighting because Ravan kidnapped Sita, Ram's wife, and Ram is trying to rescue her from Ravan's clutches.

CREATING A MULTI-QUERY RETRIEVER

In [78]:
from langchain.retrievers.multi_query import MultiQueryRetriever

multi_query_retriever = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(), llm=llm)

In [82]:
question = "Why ram and ravan are fighting?"
queries = multi_query_retriever.invoke(question)


In [84]:
queries = "\n\n".join([query.page_content for query in queries])
queries

"battle with Ram. Realizing that it is futile to try and persuade Ravan, he\nchooses to depart Lanks and join Ram and Lakshman in their quest to rescue\nSita from Ravan's clutches. Ram welcomes Vibhishan with open arms and\ncrowns him as the King of Lanka.\n\nand his army. The battle is inevitable now.\nThe army of Ram arrives in Lanka and conﬂicts break out between the Ram's\narmy and the Ravana's sons and generals. Ravana's son Indrajit aims his Shakti\n\nRavan adamantly refuses to consider releasing Sita or take any of his advice\ninto account.\nRavan then instructed his men to put Hanuman's tail on ﬁre and let him go.\nWith his tail ablaze, Hanuman started jumping from one palace on to the other,\n\nRam sends Angada as a peace messenger to Ravana's court before the war,\noﬀering one last chance for Ravana to release Sita and prevent the conﬂict.\xa0\nAngada urges Ravana to release Sita and avoid a catastrophic battle.10/06/2024, 02:58 Ramayana Short Story\n\ncleverness and prowess 

In [85]:
chain =(
        {
            "context": itemgetter("context"), "question": itemgetter("question")
        }
        | prompt_template 
        | llm 
        | parser)
for chr in chain.stream({
    "context": queries,
    "question":"Why ram and ravan are fighting?"
}):
    print(chr, end="", flush=True)

Ram and Ravan are fighting because Ravan abducted Sita, Ram's wife, and refused to release her despite multiple attempts at negotiation and peaceful resolution.

EXPLORING CONTEXT COMPRESSION

In [88]:
compression_prompt_template = """Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context is relevant return {no_output_str}.

Remember, *DO NOT* edit the extracted parts of the context.

> Question: {{question}}
> Context:
>>>
{{context}}
>>>
Extracted relevant parts:"""

In [91]:
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers.document_compressors.chain_extract import NoOutputParser

question = "Why ram and ravan are fighting?"

docs = vectorstore.as_retriever().invoke(question)
docs

[Document(page_content="battle with Ram. Realizing that it is futile to try and persuade Ravan, he\nchooses to depart Lanks and join Ram and Lakshman in their quest to rescue\nSita from Ravan's clutches. Ram welcomes Vibhishan with open arms and\ncrowns him as the King of Lanka.", metadata={'page': 11.0, 'source': 'Ramayana Short Story.pdf'}),
 Document(page_content="and his army. The battle is inevitable now.\nThe army of Ram arrives in Lanka and conﬂicts break out between the Ram's\narmy and the Ravana's sons and generals. Ravana's son Indrajit aims his Shakti", metadata={'page': 12.0, 'source': 'Ramayana Short Story.pdf'}),
 Document(page_content='Ram and oﬀers powerful weapons. He also suggests them to go to Panchavati,\non the banks of river Godavari, and settle there. On the way to Panchavati, the\nmighty old eagle Jatayu greets Ram.', metadata={'page': 6.0, 'source': 'Ramayana Short Story.pdf'}),
 Document(page_content="Ravan adamantly refuses to consider releasing Sita or take 

In [96]:
output_parser = NoOutputParser()
compression_template = compression_prompt_template.format(no_output_str=output_parser.no_output_str)
COMPRESS_DOC_PROMPT = PromptTemplate(
        input_variables= ["question","context"],
        template=compression_template,
        output_parser=NoOutputParser()
)
print(COMPRESS_DOC_PROMPT)

input_variables=['context', 'question'] output_parser=NoOutputParser() template='Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context is relevant return NO_OUTPUT.\n\nRemember, *DO NOT* edit the extracted parts of the context.\n\n> Question: {question}\n> Context:\n>>>\n{context}\n>>>\nExtracted relevant parts:'


In [97]:
compressor = LLMChainExtractor.from_llm(llm,prompt=COMPRESS_DOC_PROMPT)
compress_docs = compressor.compress_documents(documents=docs,query=question)
compress_docs

[Document(page_content="battle with Ram. Realizing that it is futile to try and persuade Ravan, he\nchooses to depart Lanks and join Ram and Lakshman in their quest to rescue\nSita from Ravan's clutches. Ram welcomes Vibhishan with open arms and\ncrowns him as the King of Lanka.", metadata={'page': 11.0, 'source': 'Ramayana Short Story.pdf'}),
 Document(page_content="The army of Ram arrives in Lanka and conﬂicts break out between the Ram's\narmy and the Ravana's sons and generals.", metadata={'page': 12.0, 'source': 'Ramayana Short Story.pdf'})]

In [98]:
queries = "\n\n".join([compress_doc.page_content for compress_doc in compress_docs])
queries

"battle with Ram. Realizing that it is futile to try and persuade Ravan, he\nchooses to depart Lanks and join Ram and Lakshman in their quest to rescue\nSita from Ravan's clutches. Ram welcomes Vibhishan with open arms and\ncrowns him as the King of Lanka.\n\nThe army of Ram arrives in Lanka and conﬂicts break out between the Ram's\narmy and the Ravana's sons and generals."

In [99]:
chain =(
        {
            "context": itemgetter("context"), "question": itemgetter("question")
        }
        | prompt_template 
        | llm 
        | parser)
for chr in chain.stream({
    "context": queries,
    "question":"Why ram and ravan are fighting?"
}):
    print(chr, end="", flush=True)

Ram and Ravan are fighting because Ravan had kidnapped Sita, Ram's wife, and Ram is trying to rescue her from Ravan's clutches.

EVALUATING MODEL AND OUTPUTS

Answer Relevance

In [128]:
def get_relevance_of_answers(provider, prompt, response):
    relevance_score = provider.relevance(
        prompt=prompt,
        response=response
    )

    return relevance_score

In [134]:
from trulens_eval import OpenAI as relevanceOpenAI

openai_provider = relevanceOpenAI()
prompt = """"battle with Ram. Realizing that it is futile to try and persuade Ravan, he\nchooses to depart Lanks and join Ram and Lakshman in their quest to rescue\nSita from Ravan's clutches. Ram welcomes Vibhishan with open arms and\ncrowns him as the King of Lanka.\n\nThe army of Ram arrives in Lanka and conﬂicts break out between the Ram's\narmy and the Ravana's sons and generals."
"""
response = "Ram and Ravan are fighting because Ravan had kidnapped Sita, Ram's wife, and Ram is trying to rescue her from Ravan's clutches."
response_openai = get_relevance_of_answers(openai_provider, prompt, response)
print("Open AI Response ", response_openai)

Open AI Response  0.0


In [None]:
prompt = """"battle with Ram. Realizing that it is futile to try and persuade Ravan, he\nchooses to depart Lanks and join Ram and Lakshman in their quest to rescue\nSita from Ravan's clutches. Ram welcomes Vibhishan with open arms and\ncrowns him as the King of Lanka.\n\nThe army of Ram arrives in Lanka and conﬂicts break out between the Ram's\narmy and the Ravana's sons and generals."
"""
response = "Ram and Ravan are fighting because Ravan took pencil from Ram's geometery."
response_openai = get_relevance_of_answers(openai_provider, prompt, response)
print("Open AI Response ", response_openai)