In [None]:
import os
import json
import autogen
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
import chromadb
from google.colab import userdata

api_key = userdata.get("OPEN_AI_API_KEY")

if not api_key:
    raise ValueError("Please set your OpenAI API key in the environment variable 'OPEN_AI_API_KEY'.")


config_list = [
    {
        "model": "gpt-3.5-turbo",
        "api_key": api_key
    }
]

assistant = RetrieveAssistantAgent(
    name="assistant",
    system_message="You are a helpful assistant. Answer questions based on the provided context.",
    llm_config={
        "timeout": 600,
        "cache_seed": 42,
        "config_list": config_list,
    },
)

corpus_file = "https://huggingface.co/datasets/thinkall/NaturalQuestions-QA/resolve/main/corpus.txt"

ragproxyagent = RetrieveUserProxyAgent(
    name="ragproxyagent",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=10,
    retrieve_config={
        "task": "qa",
        "docs_path": corpus_file,
        "chunk_token_size": 2000,
        "model": config_list[0]["model"],
        "client": chromadb.PersistentClient(path="/tmp/chromadb"),
        "collection_name": "natural-questions",
        "chunk_mode": "one_line",
        "embedding_model": "all-MiniLM-L6-v2",
    },
)

queries = '''
{"_id": "ce2342e1feb4e119cb273c05356b33309d38fa132a1cbeac2368a337e38419b8", "text": "what is non controlling interest on balance sheet"}
{"_id": "3a10ff0e520530c0aa33b2c7e8d989d78a8cd5d699201fc4b13d3845010994ee", "text": "how many episodes are in chicago fire season 4"}
{"_id": "fcdb6b11969d5d3b900806f52e3d435e615c333405a1ff8247183e8db6246040", "text": "what are bulls used for on a farm"}
{"_id": "26c3b53ec44533bbdeeccffa32e094cfea0cc2a78c9f6a6c7a008ada1ad0792e", "text": "has been honoured with the wisden leading cricketer in the world award for 2016"}
{"_id":"0868d0964c719a52cbcfb116971b0152123dad908ac4e0a01bc138f16a907ab3", "text": "who carried the usa flag in opening ceremony"}
'''

queries = [json.loads(line) for line in queries.split("\n") if line]
questions = [q["text"] for q in queries]
answers = [
    ["the portion of a subsidiary corporation's stock that is not owned by the parent corporation"],
    ['23'],
    ['breeding', 'as work oxen', 'slaughtered for meat'],
    ['Virat Kohli'],
    ['Erin Hamlin']
]

print("Questions:", questions)
print("Expected Answers:", answers)

for i in range(len(questions)):
    print(f"\n\n>>>>>>>>>>>>>>> Below are outputs of Case {i+1} <<<<<<<<<<<<\n\n")
    assistant.reset()
    qa_problem = questions[i]
    response = ragproxyagent.initiate_chat(
        assistant,
        message=qa_problem  # Directly pass the question
    )
    
    for message in reversed(ragproxyagent.chat_messages[assistant]):
        if message["role"] == "assistant":
            print(f"Question: {qa_problem}")
            print(f"System Answer: {message['content']}")
            print(f"Expected Answer: {answers[i]}")
            break


Questions: ['what is non controlling interest on balance sheet', 'how many episodes are in chicago fire season 4', 'what are bulls used for on a farm', 'has been honoured with the wisden leading cricketer in the world award for 2016', 'who carried the usa flag in opening ceremony']
Expected Answers: [["the portion of a subsidiary corporation's stock that is not owned by the parent corporation"], ['23'], ['breeding', 'as work oxen', 'slaughtered for meat'], ['Virat Kohli'], ['Erin Hamlin']]


>>>>>>>>>>>>>>> Below are outputs of Case 1 <<<<<<<<<<<<


ragproxyagent (to assistant):

what is non controlling interest on balance sheet

--------------------------------------------------------------------------------
assistant (to ragproxyagent):

Non-controlling interest (NCI) on the balance sheet represents the portion of a subsidiary company that is not owned by the parent company. This means that the parent company does not have full ownership (controlling interest) of the subsidiary. NCI is reported in the equity section of the balance sheet and represents the equity interest held by external investors or minority shareholders in the subsidiary.

--------------------------------------------------------------------------------
Question: what is non controlling interest on balance sheet
System Answer: what is non controlling interest on balance sheet
Expected Answer: ["the portion of a subsidiary corporation's stock that is not owned by the parent corporation"]


>>>>>>>>>>>>>>> Below are outputs of Case 2 <<<<<<<<<<<<


ragproxyagent (to assistant):

how many episodes are in chicago fire season 4

--------------------------------------------------------------------------------
assistant (to ragproxyagent):

Chicago Fire Season 4 consists of 23 episodes.

--------------------------------------------------------------------------------
Question: how many episodes are in chicago fire season 4
System Answer: how many episodes are in chicago fire season 4
Expected Answer: ['23']


>>>>>>>>>>>>>>> Below are outputs of Case 3 <<<<<<<<<<<<


ragproxyagent (to assistant):

what are bulls used for on a farm

--------------------------------------------------------------------------------
assistant (to ragproxyagent):

Bulls are typically used for breeding purposes on a farm. They are primarily responsible for mating with cows to produce calves, which helps maintain the farm's livestock population.

--------------------------------------------------------------------------------
Question: what are bulls used for on a farm
System Answer: what are bulls used for on a farm
Expected Answer: ['breeding', 'as work oxen', 'slaughtered for meat']


>>>>>>>>>>>>>>> Below are outputs of Case 4 <<<<<<<<<<<<


ragproxyagent (to assistant):

has been honoured with the wisden leading cricketer in the world award for 2016

--------------------------------------------------------------------------------
assistant (to ragproxyagent):

During the 2016 season, Indian cricketer Virat Kohli was honored with the Wisden Leading Cricketer in the World award.

--------------------------------------------------------------------------------
Question: has been honoured with the wisden leading cricketer in the world award for 2016
System Answer: has been honoured with the wisden leading cricketer in the world award for 2016
Expected Answer: ['Virat Kohli']


>>>>>>>>>>>>>>> Below are outputs of Case 5 <<<<<<<<<<<<


ragproxyagent (to assistant):

who carried the usa flag in opening ceremony

--------------------------------------------------------------------------------
assistant (to ragproxyagent):

The flag bearer for the United States during the opening ceremony can vary from event to event, as it is typically a chosen representative athlete. Since you did not specify an event or year, I am unable to provide you with the specific name of the flag bearer.

--------------------------------------------------------------------------------
Question: who carried the usa flag in opening ceremony
System Answer: who carried the usa flag in opening ceremony
Expected Answer: ['Erin Hamlin']
