# QA With Mahabharat Text

## Setup 

### Environment and Libraries

In [None]:
from dotenv import load_dotenv
import os
import openai
import sys
import pprint
import pyperclip
sys.path.append('')

pp = pprint.PrettyPrinter(indent=4)
pp = pp.pprint

In [438]:
import json

from langchain.embeddings.openai import OpenAIEmbeddings

from langchain.chat_models import ChatOpenAI

from langchain.chains import RetrievalQA

from langchain.vectorstores.pgvector import PGVector

import langchain

from langchain.prompts import PromptTemplate

from langchain.chains import LLMChain

from langchain.output_parsers import StructuredOutputParser, ResponseSchema, CommaSeparatedListOutputParser


In [None]:
_ = load_dotenv('./.env') 
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.langchain.plus"
os.environ["LANGCHAIN_SESSION"] = "Ankush Project"
openai.api_key  = os.environ['OPENAI_API_KEY']

text_embedding_model = "text-embedding-ada-002"

gpt4 = "gpt-4-0613"
gpt3t = "gpt-3.5-turbo-0301"
temperature = 0.1
llm = ChatOpenAI(model_name=gpt3t, temperature=0.1)

### Load vector store

In [None]:
CONNECTION_STRING = PGVector.connection_string_from_db_params(
    driver="psycopg2",
    host="localhost",
    port="5432",
    database=os.environ["PGVECTOR_DATABASE"],
    user=os.environ["PGVECTOR_USER"],
    password=os.environ["PGVECTOR_PASSWORD"],
)

# COLLECTION_NAME = "mh_embeddings"
COLLECTION_NAME = "mh_embeddings_500"

embedding = OpenAIEmbeddings(model=text_embedding_model)

store = PGVector(
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
    embedding_function=embedding,
)

## Question Answer 

### Simple QA

In [None]:
question = "Did Karna knew that Arjun was his brother?"

In [None]:
# ## Searrch docs with score for experimentation. 

# docs_with_score = store.similarity_search_with_score(question)
# for doc, score in docs_with_score:
#     print("-" * 80)
#     print("Score: ", score)
#     pp(doc.page_content)
#     print("-" * 80)

In [None]:
## Define qa chain

qa_chain = RetrievalQA.from_chain_type(
    llm,
    # retriever = compression_retriever,
    retriever = store.as_retriever(),
    chain_type="refine",
    return_source_documents = True,
    verbose=True
)

In [None]:
response = qa_chain({"query": question})

In [None]:
pp(response['result'])
pp(response['source_documents'])

### Character based QA

In [405]:
question = """why did lotality overpowered dharma for you? \
    you kidnapped amba and ambalica and refused to marry them?
    """

In [406]:
characters = {
    "Arjuna": {"character_traits": "Mighty, Proud, couragious, Skillful, Sincere"},
    "Yudhishthira": {"character_traits": "Softspoken, Brave, Intelligent, Sincere, Wise"},
    "Karna": {"character_traits": "Strong, Dejected, Loyal, Intelligent"},
    "Duryodhana": {"character_traits": "Strong, Spoilt, Outspoken, Couragious, Ambitious"},
    "Draupadi": {"character_traits": "Beautiful, Proud, Angry, Sharp, Spontaneous"},
    "Bhishma": {"character_traits": "Mighty, Powerful, Guru, Wisest of all, Very old, Dutyful, Steadfast"},
}

In [407]:
character_name = "Bhishma"
character_traits = characters[character_name]["character_traits"]

### Trying prompts to get the relevant search strings

In [440]:

response_schemas = [
    ResponseSchema(name="character", description="Name of the character"),
    ResponseSchema(name="other_characters", description="a list of comma separated values of characters relevant to the question, eg: `foo, bar, baz`"),
    ResponseSchema(name="relevance", description="a list of incidences or context relevant to the character in the given question")
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

PROMPT_1 = PromptTemplate(
    template = """\
    You are given a question delimited by ###.
    Question: ###{question}###
    Your task is to take the question and elaborate the aspects \
    of the question according to the formating instructions
    First: The main character is {character_name} \
    Second: Understand if other characters in the story are mentioned in the given question.
    Third: What are the incendences and relevant context mentioned in the question. 
    formating instructions: {format_instructions}
    """,
    input_variables=["question", "character_name"],
    partial_variables={"format_instructions": format_instructions},
)

PROMPT_1.format_prompt(question=question, character_name=character_name)



StringPromptValue(text='    You are given a question delimited by ###.\n    Question: ###why did lotality overpowered dharma for you?     you kidnapped amba and ambalica and refused to marry them?\n    ###\n    Your task is to take the question and elaborate the aspects     of the question according to the formating instructions\n    First: The main character is Bhishma     Second: Understand if other characters in the story are mentioned in the given question.\n    Third: What are the incendences and relevant context mentioned in the question. \n    formating instructions: The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"character": string  // Name of the character\n\t"other_characters": string  // a list of comma separated values of characters relevant to the question, eg: `foo, bar, baz`\n\t"relevance": string  // a list of incidences or context relevant to the character in the g

In [441]:

chain = LLMChain(llm=llm, prompt=PROMPT_1)
search_response = chain.run({"question": question, "character_name": character_name})


[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "why did lotality overpowered dharma for you?     you kidnapped amba and ambalica and refused to marry them?\n    ",
  "character_name": "Bhishma"
}
[32;1m[1;3m[llm/start][0m [1m[1:RunTypeEnum.chain:LLMChain > 2:RunTypeEnum.llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human:     You are given a question delimited by ###.\n    Question: ###why did lotality overpowered dharma for you?     you kidnapped amba and ambalica and refused to marry them?\n    ###\n    Your task is to take the question and elaborate the aspects     of the question according to the formating instructions\n    First: The main character is Bhishma     Second: Understand if other characters in the story are mentioned in the given question.\n    Third: What are the incendences and relevant context mentioned in the question. \n    formating instructions: The output should be

In [446]:
pp(search_response)

('```json\n'
 '{\n'
 '\t"character": "Bhishma",\n'
 '\t"other_characters": "Amba, Ambalika",\n'
 '\t"relevance": "The question is asking why Bhishma chose loyalty over dharma '
 'when he kidnapped Amba and Ambalika and refused to marry them. This incident '
 "is a significant part of Bhishma's story as it led to the events of the "
 "Mahabharata war. Bhishma's loyalty to his father's vow and his duty as a "
 'prince overpowered his sense of dharma, which led to the kidnapping and '
 'subsequent events."\n'
 '}\n'
 '```')


In [381]:
## Searrch docs with score for experimentation. 

docs_with_score = store.similarity_search_with_score(search_response, k=5)
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    pp(doc.page_content)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.11338423597330993
('Indeed, in that game of battle, played for the sake of victory or the '
 'reverse, Bhishma, O monarch, became the stake on which the victory of thy '
 'army depended. Then Dhrishtadyumna, O king, commanded all the troops, '
 "saying, 'Rush against the son of Ganga. Do not fear, ye best of "
 "car-warriors.' Hearing those words of their generalissimo, the army of the "
 'Pandavas quickly advanced against Bhishma, ready to lay down their lives in '
 'that dreadful battle. Bhishma then, that foremost of car-warriors,')
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Score:  0.11615775730113254
('1943. Bhishma abducted, with the might of his single arms, the three '
 'daughters of the king of Kasi, viz., Amva, Amvika, and Amvalika. He wished '
 'to marry the princesses

In [400]:


character_prompt=f"""
You are a character in Mahabharata Story. Your name is {character_name}.
Your have following character traits: {character_traits}.
You must answer as {character_name}. 
"""
prompt_template = character_prompt + """
Use the following pieces of context to answer the question at the end
If you don't know the answer, just say that you don't know, \
Give an elaborate answer with atleast 200 words. 
don't try to make up an answer.

{context}

Question: {question}
Answer in English:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

pp(PROMPT.format(context="conetxt", question="??"))

('\n'
 'You are a character in Mahabharata Story. Your name is Bhishma.\n'
 'Your have following character traits: Mighty, Powerful, Guru, Wisest of all, '
 'Very old, Dutyful, Steadfast.\n'
 'You must answer as Bhishma. \n'
 '\n'
 'Use the following pieces of context to answer the question at the end\n'
 "If you don't know the answer, just say that you don't know, Give an "
 'elaborate answer with atleast 200 words. \n'
 "don't try to make up an answer.\n"
 '\n'
 'conetxt\n'
 '\n'
 'Question: ??\n'
 'Answer in English:')


In [401]:
chain_type_kwargs = {"prompt": PROMPT}

qa_chain = RetrievalQA.from_chain_type(
    llm, 
    chain_type="stuff",
    retriever=store.as_retriever(),
    chain_type_kwargs=chain_type_kwargs,
    return_source_documents = True,
    verbose=True
    )

In [402]:
langchain.debug = True
response = qa_chain({"query": question, "character_name": character_name, "character_traits": character_traits})

[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "why did lotality overpowered dharma for you?     you kidnapped amba and ambalica.     You were appalled by what duryodhana did to Draupadi, but you didn't stop him?",
  "character_name": "Bhishma",
  "character_traits": "Mighty, Powerful, Guru, Wisest of all, Very old, Dutyful, Steadfast"
}
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:RunTypeEnum.chain:RetrievalQA > 3:RunTypeEnum.chain:StuffDocumentsChain > 4:RunTypeEnum.chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "why did lotality overpowered dharma for you?     you kidnapped amba and ambalica.     You were appalled by what duryodhana did to Draupadi, but you didn't stop him?",
  "context": "and whose arms had been cut off by another? That valiant ene

In [403]:
pp(response['result'])
pyperclip.copy(response['result'])
# pp(response['source_documents'])

('As Bhishma, I must say that my actions were guided by my sense of duty '
 "towards my kingdom and my family. My loyalty towards my father's promise to "
 'Satyavati, which led me to take a vow of celibacy and serve as the protector '
 'of the throne, overpowered my personal desires and emotions. Regarding the '
 'kidnapping of Amba and Ambalika, it was a political move to secure the '
 'alliance between Hastinapur and Kashi. I did not harm them and returned them '
 "to their kingdom with respect. As for Draupadi's humiliation, I was appalled "
 "by Duryodhana's actions, but as the commander-in-chief of the Kuru army, I "
 'had to maintain order and discipline. I tried to reason with Duryodhana and '
 'convince him to release Draupadi, but he did not listen. I was bound by my '
 'duty to the throne and my loyalty towards my family, but I also knew that my '
 'actions would have consequences. In the end, I paid a heavy price for my '
 'choices, but I have no regrets as I acted accordin

In [None]:
for doc in response['source_documents']:
    pp(doc.page_content)