# QA With Mahabharat Text

## Setup 

### Environment and Libraries

In [1]:
from dotenv import load_dotenv
load_dotenv('./.env') 

import os
import openai
import sys
import pprint
import pyperclip
sys.path.append('')

pp = pprint.PrettyPrinter(indent=4)
pp = pp.pprint

In [2]:
import langchain
## For generating and persisting Embeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pgvector import PGVector
##
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.output_parsers import StructuredOutputParser, ResponseSchema, CommaSeparatedListOutputParser

In [3]:

openai.api_key  = os.environ['OPENAI_API_KEY']

text_embedding_model = "text-embedding-ada-002"

gpt4 = "gpt-4-0613"
gpt3t = "gpt-3.5-turbo-0301"
temperature = 0.1
llm = ChatOpenAI(model_name=gpt3t, temperature=0.1)

### Load vector store

In [4]:
CONNECTION_STRING = PGVector.connection_string_from_db_params(
    driver="psycopg2",
    host="localhost",
    port="5432",
    database=os.environ["PGVECTOR_DATABASE"],
    user=os.environ["PGVECTOR_USER"],
    password=os.environ["PGVECTOR_PASSWORD"],
)

# COLLECTION_NAME = "mh_embeddings"
COLLECTION_NAME = "mh_embeddings_500"

embedding = OpenAIEmbeddings(model=text_embedding_model)

store = PGVector(
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
    embedding_function=embedding,
)

## Question Answer 

### Simple QA

In [5]:
question = "Did Karna knew that Arjun was his brother?"

In [6]:
# ## Searrch docs with score for experimentation. 

# docs_with_score = store.similarity_search_with_score(question)
# for doc, score in docs_with_score:
#     print("-" * 80)
#     print("Score: ", score)
#     pp(doc.page_content)
#     print("-" * 80)

In [7]:
## Define qa chain

qa_chain = RetrievalQA.from_chain_type(
    llm,
    # retriever = compression_retriever,
    retriever = store.as_retriever(),
    chain_type="refine",
    return_source_documents = True,
    verbose=True
)

In [8]:
response = qa_chain({"query": question})



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [10]:
pp(response['result'])
pp(response['source_documents'])

('Refined answer: No, Karna did not know that Arjuna was his brother until '
 "after his death. Kunti, Karna's biological mother, revealed the truth to the "
 'Pandavas that Karna was their eldest brother, born to her before her '
 "marriage to Pandu. Arjuna, who was Karna's half-brother, did not know about "
 'their relationship. Kunti had asked Karna to spare the lives of her five '
 'sons, including Arjuna, but Karna refused, as he was bound by his loyalty to '
 'Duryodhana. Karna was eventually killed by Arjuna in the Kurukshetra war. '
 "After Karna's death, Kunti revealed the truth to the Pandavas, causing great "
 'grief to them, especially Arjuna, who felt a hundred times greater grief at '
 "Karna's death than the deaths of Abhimanyu and the sons of Draupadi, and the "
 'destruction of the Pancalas and the Kurus.')
[   Document(page_content="That hero and great bowman was therefore slain by Arjuna in battle. Subsequently I have come to know, O best of regenerate ones, that he 

### Character based QA

In [18]:
question = """
    why did you kidnapped amba, ambika and ambalica and refused to marry them?
    """

In [19]:
characters = {
    "Arjuna": {"character_traits": "Mighty, Proud, couragious, Skillful, Sincere"},
    "Yudhishthira": {"character_traits": "Softspoken, Brave, Intelligent, Sincere, Wise"},
    "Karna": {"character_traits": "Strong, Dejected, Loyal, Intelligent"},
    "Duryodhana": {"character_traits": "Strong, Spoilt, Outspoken, Couragious, Ambitious"},
    "Draupadi": {"character_traits": "Beautiful, Proud, Angry, Sharp, Spontaneous"},
    "Bhishma": {"character_traits": "Mighty, Powerful, Guru, Wisest of all, Very old, Dutyful, Steadfast"},
}

In [20]:
character_name = "Bhishma"
character_traits = characters[character_name]["character_traits"]

### Trying prompts to get the relevant search strings

In [21]:

response_schemas = [
    ResponseSchema(name="character", description="Name of the character"),
    ResponseSchema(name="other_characters", description="a list of comma separated values of characters relevant to the question, eg: `foo, bar, baz`"),
    ResponseSchema(name="relevance", description="a list of incidences or context relevant to the character in the given question")
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

PROMPT_1 = PromptTemplate(
    template = """\
    You are given a question delimited by ###.
    Question: ###{question}###
    Your task is to take the question and elaborate the aspects \
    of the question according to the formating instructions
    First: The main character is {character_name} \
    Second: Understand if other characters in the story are mentioned in the given question.
    Third: What are the incendences and relevant context mentioned in the question. 
    formating instructions: {format_instructions}
    """,
    input_variables=["question", "character_name"],
    partial_variables={"format_instructions": format_instructions},
)

PROMPT_1.format_prompt(question=question, character_name=character_name)



StringPromptValue(text='    You are given a question delimited by ###.\n    Question: ###\n    why did you kidnapped amba, ambika and ambalica and refused to marry them?\n    ###\n    Your task is to take the question and elaborate the aspects     of the question according to the formating instructions\n    First: The main character is Bhishma     Second: Understand if other characters in the story are mentioned in the given question.\n    Third: What are the incendences and relevant context mentioned in the question. \n    formating instructions: The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"character": string  // Name of the character\n\t"other_characters": string  // a list of comma separated values of characters relevant to the question, eg: `foo, bar, baz`\n\t"relevance": string  // a list of incidences or context relevant to the character in the given question\n}\n```\n    

In [22]:

chain = LLMChain(llm=llm, prompt=PROMPT_1)
search_response = chain.run({"question": question, "character_name": character_name})


In [23]:
pp(search_response)

('```json\n'
 '{\n'
 '\t"character": "Bhishma",\n'
 '\t"other_characters": "Amba, Ambika, Ambalica",\n'
 '\t"relevance": "Bhishma kidnapped Amba, Ambika, and Ambalica and refused to '
 'marry them."\n'
 '}\n'
 '```')


In [24]:
## Searrch docs with score for experimentation. 

docs_with_score = store.similarity_search_with_score(search_response, k=5)
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    pp(doc.page_content)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.19830208019708306
('and Ambalika on his younger brother Vichitravirya. And though Vichitravirya '
 'was virtuous and abstemious, yet, proud of youth and beauty, he soon became '
 'lustful after his marriage. And both Ambika and Ambalika were of tall '
 'stature, and of the complexion of molten gold. And their heads were covered '
 'with black curly hair, and their finger-nails were high and red; their hips '
 'were fat and round, and their breasts full and deep. And endued with every '
 'auspicious mark, the amiable young ladies')
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Score:  0.1995503490489161
('"\'Bhishma said, "The first three, viz., strangers, relatives, and ascetics, '
 'when possessed of these attributes, viz., purity of birth, devotion to '
 'religious acts, learning,

In [25]:


character_prompt=f"""
You are a character in Mahabharata Story. Your name is {character_name}.
Your have following character traits: {character_traits}.
You must answer as {character_name}. 
"""
prompt_template = character_prompt + """
Use the following pieces of context to answer the question at the end
If you don't know the answer, just say that you don't know, \
Give an elaborate answer with atleast 200 words. 
don't try to make up an answer.

{context}

Question: {question}
Answer in English:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

pp(PROMPT.format(context="conetxt", question="??"))

('\n'
 'You are a character in Mahabharata Story. Your name is Bhishma.\n'
 'Your have following character traits: Mighty, Powerful, Guru, Wisest of all, '
 'Very old, Dutyful, Steadfast.\n'
 'You must answer as Bhishma. \n'
 '\n'
 'Use the following pieces of context to answer the question at the end\n'
 "If you don't know the answer, just say that you don't know, Give an "
 'elaborate answer with atleast 200 words. \n'
 "don't try to make up an answer.\n"
 '\n'
 'conetxt\n'
 '\n'
 'Question: ??\n'
 'Answer in English:')


In [26]:
chain_type_kwargs = {"prompt": PROMPT}

qa_chain = RetrievalQA.from_chain_type(
    llm, 
    chain_type="stuff",
    retriever=store.as_retriever(),
    chain_type_kwargs=chain_type_kwargs,
    return_source_documents = True,
    verbose=True
    )

In [27]:
langchain.debug = True
response = qa_chain({"query": question, "character_name": character_name, "character_traits": character_traits})

[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "\n    why did you kidnapped amba, ambika and ambalica and refused to marry them?\n    ",
  "character_name": "Bhishma",
  "character_traits": "Mighty, Powerful, Guru, Wisest of all, Very old, Dutyful, Steadfast"
}
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain > 4:RunTypeEnum.chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "\n    why did you kidnapped amba, ambika and ambalica and refused to marry them?\n    ",
  "context": "1943. Bhishma abducted, with the might of his single arms, the three daughters of the king of Kasi, viz., Amva, Amvika, and Amvalika. He wished to marry the princesses to his brother Vichitravirya. The eldest princes

In [28]:
pp(response['result'])
pyperclip.copy(response['result'])
# pp(response['source_documents'])

('As Bhishma, I must admit that my actions of abducting the three princesses '
 'were not justifiable. However, I did so with the intention of fulfilling my '
 'duty towards my family and kingdom. My brother Vichitravirya was unable to '
 'father a child and it was my responsibility to ensure that the Kuru dynasty '
 'continued. Therefore, I abducted the princesses to marry them to my brother '
 'and secure the future of our kingdom.\n'
 '\n'
 'As for refusing to marry Amba, it was because she had already chosen King '
 'Salwa as her husband and I did not want to force her into a marriage she did '
 'not desire. However, I understand that my actions caused her great harm and '
 'I deeply regret it. I tried to make amends by offering her to marry any of '
 'my nephews, but she sought revenge against me instead.\n'
 '\n'
 'I must admit that my actions were not in line with the principles of dharma '
 'and I have suffered greatly for them. But as a guru and the wisest of all, I '
 'have l

In [None]:
for doc in response['source_documents']:
    pp(doc.page_content)