# Create a Research Agent

This is a concept agent that is inspired by my process of researching on a subject. <br>
I also took some inspiration from BabyAGI (without tools) implementation 

In [50]:
from dotenv import load_dotenv
load_dotenv('./.env') 
import uuid
import os
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate


In [51]:
text_embedding_model = "text-embedding-ada-002"
gpt3t = "gpt-3.5-turbo"
gpt4 = "gpt-4"

embeddings = OpenAIEmbeddings(model=text_embedding_model)


## Set up the vector store

In [52]:
use_localdb = True

SUPABASE_PASSWORD = os.environ['SUPABASE_PASSWORD']
SUPABASE_DBUSER = os.environ['SUPABASE_DBUSER']
SUPABASE_DATABASE = os.environ['SUPABASE_DATABASE']
supabasedb_string = f"postgresql://{SUPABASE_DBUSER}:{SUPABASE_PASSWORD}@db.doxggeyqopdnxfhseufq.supabase.co:5432/{SUPABASE_DATABASE}"

PGVECTOR_USER = os.environ['PGVECTOR_USER']
PGVECTOR_PASSWORD = os.environ['PGVECTOR_PASSWORD']
PGVECTOR_DATABASE = os.environ['PGVECTOR_DATABASE']
localdb_string = f"postgresql://{PGVECTOR_USER}:{PGVECTOR_PASSWORD}@localhost:5432/{PGVECTOR_DATABASE}"

connection_string = localdb_string if use_localdb else supabasedb_string

### Main text store

In [53]:
from langchain.vectorstores import PGVector

combined_text_store = PGVector(
    collection_name='mahabharat_combined_text',
    connection_string=connection_string,
    embedding_function=embeddings,
)
print(type(combined_text_store))

<class 'langchain.vectorstores.pgvector.PGVector'>


### Supabase vector store for storing runs

The supabase client here is not used as a vector store. 
I am only using it to save the runs data. 
You can remove it if you dont need it. 

In [54]:
from supabase.client import Client, create_client
from langchain.vectorstores import SupabaseVectorStore

supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
supabase: Client = create_client(supabase_url, supabase_key)

runs_store = SupabaseVectorStore(embedding=embeddings, client=supabase, table_name='runs',query_name="match_runs")

# ## Testing store
# run_id = str(uuid.uuid4())
# runs_store.add_texts(texts=["testing the store"], metadatas=[{"key": "value"}], ids=[run_id])
# matched_docs = runs_store.similarity_search_with_relevance_scores("testing store", 1)
# matched_docs



### Helper Function for converting question string to list of questions

In [55]:
def split2Dict(question):
    question = question.strip().split(".", 1)
    question_dict = {question[0].strip(): question[-1].strip()}
    return question_dict


def result2DictOfQuestions(result: str):
    questions = result.split("\n")
    qdict = {}
    for q in questions:
        qdict = {**qdict, **split2Dict(q)}
    return qdict


## Import Chains

#### Question creator
Idea: Generate new questions based on 
- `question` - Original question. This is important so that the pertinence to the original question is always maintained. Or else the context can diverge quickly into impertinent results.
- `unanswered_questions`: So that the new questions do not overlap with the old ones.
- `context`: The document found during the current run. So the questions are derived from the fresh information based on the current question being asked. 
- `num_questions`: Configurable hyper parameter.
- `start_id`: This is passed so that the ids of the newly generated question do overlap with the current list. 

---
#### Most pertinent Question chain
Pick the most pertinent question out of the given list of questions <br>
I am not using any additional context other than the `original_question` for decising the pertinence.

---
#### Retrieval QA
This chain is used to answer the intermediate questions. The idea is to generate succinct answers which can be used as notes to finally answer the original question

--- 
#### Result Analyser
Not using this right now. I am not able to get this piece working well. So currently I will just run the agent for a fixed number of iterations and then compile the answer. 

In [56]:
## Import all the chains. 
from chains.create_questions import QuestionCreationChain 
from chains.most_pertinent_question import MostPertinentQuestion
from chains.retrieval_qa import retrieval_qa
from chains.research_compiler import research_compiler

## Model with parameters
def language_model(model_name: str = gpt3t, temperature: float = 0, verbose: bool = False):
    llm = ChatOpenAI(model_name=model_name, temperature=temperature)
    return llm


#### Test Data

In [57]:

scratchpad = {
    'original_question': "Why was Arjun not present in the battle when Abhimanyu was slain?",
    'unanswered_questions': {},
    'answerpad': [],
    'current_question': '',
    'current_answer': "Based on the given context, it is not explicitly mentioned why Arjuna was absent during the battle when Abhimanyu was slain. The context only provides information about Arjuna's grief and anger upon learning about his son's death. Arjuna expresses his disappointment and blames his allies, the Panchalas, for not protecting Abhimanyu. He questions their manliness and prowess, suggesting that they failed to fulfill their promise of protecting his son.\n\nArjuna's absence during the battle could be due to various reasons, but without further information, it is difficult to determine the exact reason.",
    'notes': [],
    'current_documents': [],
}

In [58]:

# ---
## Test Question Creator Chain
# question_creation_chain = QuestionCreationChain.from_llm(llm, verbose=False)

# question_response = question_creation_chain.run(
#         question=scratchpad['original_question'],
#         context=scratchpad['current_answer'],
#         unanswered_questions=scratchpad['unanswered_questions'],
#         num_questions = 4,
#         start_id=5,
#     )

# scratchpad['unanswered_questions'] = {**scratchpad['unanswered_questions'], **result2DictOfQuestions(result = question_response)}

# print(scratchpad['original_question'], "\n")
# print(scratchpad['unanswered_questions'], "\n")

# ---
## Test Pertinent Question picker chain
# print("\033[93m\033[1m", ",\n".join(f"'{key}. {value}'" for key, value in scratchpad['unanswered_questions'].items()), "\033[0m\033[0m")

# most_pertinent_question = MostPertinentQuestion.from_llm(llm = ChatOpenAI(model_name=gpt3t, temperature=0.5), verbose = False)
# response = most_pertinent_question.run(
#         original_question=scratchpad['original_question'],
#         unanswered_questions="\n".join(f"'{key}. {value}'," for key, value in scratchpad['unanswered_questions'].items()),
#     )

# next_question = response

# print('Original Question: ', scratchpad['original_question'])
# print('Chosen question', "\033[92m\033[1m", next_question, "\033[0m\033[0m")

# ---
## Test Retrieval QA Chain
# r, d = retrieval_qa(
#     llm = ChatOpenAI(model_name=gpt3t, temperature=0.1),
#     store = combined_text_store,
#     question = scratchpad['original_question'], 
#     verbose = verbose)

## Test Research Compiler chain
# result = research_compiler(
#     llm = ChatOpenAI(model_name=gpt3t, temperature=0.1),
#     question = scratchpad['original_question'], 
#     context = scratchpad['notes'], 
#     prev_answer = scratchpad['answerpad'],
#     verbose = verbose)

# print(result)

# The Research Agent

This is the final research Agent. 

In [59]:
## First create a run id. 
run_id = str(uuid.uuid4())

## Define scratchpad for keeping the run data
scratchpad = {
    'original_question': "Why did Kunti abandoned her first born??",
    'unanswered_questions': {},
    'answerpad': [],
    'notes': [],
    'current_documents': [],
    'documents': [],
    'answered_questions': [],
}
verbose = False
first_run = True
max_iterations = 3
current_iteration = 0
num_questions_per_iteration = 4
question_creation_temperature = 0.5
question_prioritisation_temperature = 0.5
analyser_temperature = 0
store = combined_text_store
current_question_id = None


## ---- The researcher -----
while True: 
    current_iteration += 1

    print("\033[91m\033[1m" + f"\n ------  Iteration {current_iteration} ------- \n" + "\033[0m\033[0m")
    if current_iteration >= max_iterations:
        print("\033[91m\033[1m" + f"\n ------ Max iterations reached." + "\033[0m\033[0m")
        break
    
    ## 1. First run the retrieval_qa chain on current question
    if first_run:
        current_question = scratchpad['original_question']
        current_answer, current_documents = retrieval_qa( 
            llm = language_model(temperature=0), 
            store = combined_text_store, 
            question = current_question, 
            verbose = verbose)
        scratchpad['answerpad'] += [current_answer]
        first_run = False
    else:
        current_answer, current_documents = retrieval_qa( 
            llm = language_model(temperature=0), 
            store = combined_text_store, 
            question = current_question, 
            verbose = verbose)
        scratchpad['notes'] += [{'question': current_question, 'answer': current_answer}]

    scratchpad['current_documents'] = current_documents
    scratchpad['documents'] += current_documents
    print(
        "\033[36;2m",
        f"\nCurrent Question: {current_question}\n", 
        f"Current Answer: {current_answer} \n",
        "\033[0m\033[0m")

    ## 2. Ask more questions based on current_answer as context
    ## ----
    start_id = (current_iteration-1)*num_questions_per_iteration + 1
    question_creation_chain = QuestionCreationChain.from_llm(
        llm = language_model(temperature=question_creation_temperature), 
        verbose=verbose
        )
    question_creation_response = question_creation_chain.run(
        question=scratchpad['original_question'],
        context="\n".join(list(map(lambda x: x.page_content, scratchpad['current_documents']))),
        unanswered_questions=scratchpad['unanswered_questions'],
        num_questions = num_questions_per_iteration,
        start_id=start_id,
    )
    scratchpad['unanswered_questions'] = {
        **scratchpad['unanswered_questions'], 
        **result2DictOfQuestions(result = question_creation_response)
        }

    if current_question_id:
        scratchpad['answered_questions'] += [scratchpad['unanswered_questions'].pop(current_question_id)]
        # Remove the current question here after generating the new questions so that the 
        # current question is not regenerated. 
       
    print(
        "\033[35;3m",
        "** Unanswered Questions **\n",
        "\n".join(f"'{key}. {value}'" for key, value in scratchpad['unanswered_questions'].items()), 
        "\033[32;5m",
        "\n** Answered Questions **\n",
        "\n".join(scratchpad['answered_questions']),
        "\033[0m\033[0m")
    ### --- ###

    ## 3. Find the most pertinent question to ask next 
    ## ----
    find_next_question = MostPertinentQuestion.from_llm(
        llm = language_model(temperature=question_prioritisation_temperature),
        verbose = verbose
        )
    current_question = find_next_question.run(
        original_question=scratchpad['original_question'],
        unanswered_questions="\n".join(f"'{key}. {value}'," for key, value in scratchpad['unanswered_questions'].items()),
    )

    # print("next q response -->", current_question)
    # Calculate question id so it can be deleted in the next iteration
    current_question_id = current_question.split('.', 1)[0]
    current_question = current_question.split('.', 1)[-1]
    print(
        "\033[93m\033[1m", 
        f"\nNext Question I need to ask: {current_question}\nQuestion Id: {current_question_id}", 
        "\033[0m\033[0m")
    ### --- ###



## ----- The Compiler -----
final_answer= research_compiler(
    llm = language_model(temperature = analyser_temperature, verbose=True),
    question = scratchpad['original_question'], 
    context = scratchpad['notes'], 
    prev_answer = scratchpad['answerpad'],
    verbose = verbose)

print("\033[1;37m" + f"\nFinal Answer: \n {final_answer['text']}\n" + "\033[0m\033[0m")  

## ----- Save the run data -----
## If you do not need this, feel free to comment it out. 
_ = runs_store.add_texts(
    texts=[scratchpad['original_question']], 
    metadatas=[{
        'docs_metadata': [document.metadata for document in scratchpad['documents']], 
        'answer': final_answer['text'], 
        'context': final_answer['context']
        }], 
    ids=[run_id])
    

[91m[1m
 ------  Iteration 1 ------- 
[0m[0m
[36;2m 
Current Question: Why did Kunti abandoned her first born??
 Current Answer: Kunti abandoned her first-born child because she had summoned the sun-god, Surya, using a mantra given to her by the rishi Durvasa. When Surya appeared and Kunti realized that she was going to have a child, she became fearful and regretful. However, Surya informed her that the mantra could not be undone, and she had to bear the consequences of her actions. Therefore, when the baby was born, Kunti put him in a basket and set him adrift in a river. The exact reason for her decision to abandon the child is not explicitly mentioned in the given context, but it can be inferred that Kunti did so out of fear, regret, and possibly a sense of responsibility towards her other sons and her position as a queen. 
 [0m[0m
[35;3m ** Unanswered Questions **
 '1. Why did Kunti use the mantra to summon Surya, the sun-god?'
'2. What was the reason behind Kunti putting h

2023-09-12 23:40:26,654:INFO - HTTP Request: POST https://doxggeyqopdnxfhseufq.supabase.co/rest/v1/runs "HTTP/1.1 201 Created"


In [60]:
scratchpad

{'original_question': 'Why did Kunti abandoned her first born??',
 'unanswered_questions': {'1': 'Why did Kunti use the mantra to summon Surya, the sun-god?',
  '3': "Why did Gandhari become angered when she heard about Kunti's child?",
  '4': 'What was the significance of Gandhari giving birth to a piece of flesh and Vyasadeva appearing?',
  '5': "How did Kunti's relatives react when they found out about her first-born son?",
  '6': 'What was the reaction of Adhiratha and Radha when they found the child in the basket?',
  '7': "How did Karna's upbringing by Adhiratha and Radha influence his character and skills?",
  '8': 'Can you provide more details about the city of Champa and its ruler, who found and raised Karna?'},
 'answerpad': ['Kunti abandoned her first-born child because she had summoned the sun-god, Surya, using a mantra given to her by the rishi Durvasa. When Surya appeared and Kunti realized that she was going to have a child, she became fearful and regretful. However, Sur