# Setup

In [1]:
import os
import openai
import time
 
openai.api_key = os.environ["OPENAI_API_KEY"]

def llm_legacy(prompt, stop=["\n"]):
    # response = openai.Completion.create(
    response = openai.completions.create(
      ## model="text-davinci-002",
      model="gpt-3.5-turbo-instruct",
      prompt=prompt,
      temperature=0,
      max_tokens=300,
      top_p=1,
      frequency_penalty=0.0,
      presence_penalty=0.0,
      stop=stop
    )
    return response.choices[0].text ##
    # return response["choices"][0]["text"]

def llm(prompt, stop=["\n"]):
    response = openai.chat.completions.create(
      model="gpt-3.5-turbo",
      messages=prompt,
      temperature=0,
      max_tokens=200,
      top_p=1,
      frequency_penalty=0.0,
      presence_penalty=0.0,
      stop=stop
    )
    return response.choices[0].message.content

In [2]:
import wikienv, wrappers
env = wikienv.WikiEnv()
env = wrappers.HotPotQAWrapper(env, split="dev")
env = wrappers.LoggingWrapper(env)

def step(env, action):
    attempts = 0
    while attempts < 10:
        try:
            return env.step(action)
        except requests.exceptions.Timeout:
            attempts += 1

Example for messages:
```
messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Who won the world series in 2020?"},
    {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
    {"role": "user", "content": "Where was it played?"}
  ]
```

Full documentation: https://platform.openai.com/docs/api-reference/chat

In [3]:
def append_message(messages, role, content):
    messages.append({
        "role": role,
        "content": content
    })
    return messages

def get_one_message(messages, role="system"):
    final_message = ""
    for message in messages:
        if message["role"] != role:
            final_message += message["content"] + "\n"
    return final_message

In [4]:
from difflib import SequenceMatcher

def string_similarity(str1, str2):
    # Create a SequenceMatcher object
    seq_matcher = SequenceMatcher(None, str1, str2)

    # Get the similarity ratio
    similarity_ratio = seq_matcher.ratio()

    return similarity_ratio

# Implementation - main function

In [5]:
import json
import sys

folder = './prompts/'
prompt_file = 'prompts_naive.json'
with open(folder + prompt_file, 'r') as f:
    prompt_dict = json.load(f)

webthink_examples = prompt_dict['webthink_simple6']
instruction = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types: 
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
Here are some examples.
"""
webthink_prompt = instruction + webthink_examples

webthink_critique_examples = prompt_dict['webthink_critique_examples'] ##
# instruction_critique = """You are a helpful assistant. You are assisting another agent who's trying to solve a question answering task. You will find Thought, Action steps. Thought can reason about the current situation, and Action can be three types:
# (1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
# (2) Lookup[keyword], which returns the next sentence containing the keyword in the current passage.
# (3) Finish[answer], which returns the answer and finishes the task.
# Ensure that the thoughts and actions are directed towards the most efficient and direct path to answer the given question, avoiding unnecessary steps or exhaustive searches.
# Always include a Critique in your response. Suggest a new Thought, Action pair in the Critique only if corrections are necessary.
# Be sure to not include indices or Observation when suggesting a new Thought, Action pair.
# Here are some examples.
# """ ##
# instruction_critique = """You are a helpful assistant. You are assisting another agent who's trying to solve a question answering task. Ensure that the thoughts and actions are directed towards the most correct and efficient path to answer the given question. You will find Thought, Action steps. Thought can reason about the current situation, and Action can be three types:
# # (1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
# # (2) Lookup[keyword], which returns the next sentence containing the keyword in the current passage.
# # (3) Finish[answer], which returns the answer and finishes the task.
# Always include a Critique in your response. Suggest a new Thought, Action pair in the Critique only if corrections are necessary.
# Be sure to not include indices of the type 1, 2, 3 after Thought or Action. Do not include new Observation.
# Here are some examples.
# """ ##
# instruction_critique = """You are a helpful assistant. You are assisting another agent who's trying to solve a question answering task. You will find Thought, Action steps. Thought can reason about the current situation, and Action can be three types:
# # (1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
# # (2) Lookup[keyword], which returns the next sentence containing the keyword in the current passage.
# # (3) Finish[answer], which returns the answer and finishes the task.
# Ensure that the thoughts and actions are directed towards the most correct and efficient path to answer the given question
# Always include a Critique in your response. Suggest a new Thought, Action pair in the Critique only if corrections are necessary.
# Here are some examples.
# """ ##
instruction_critique = """Critique the trajectory of a question answering task, suggesting a new Action only if necessary. Critique can reason about the current situation, and Action can be three types:
# (1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search. entity should be appropriate for a Wikipedia search, meaning that it should contain only keywords and not full sentences.
# (2) Lookup[keyword], which returns the next sentence containing the keyword in the current passage.
# (3) Finish[answer], which returns the answer and finishes the task.
Ensure that the trajectory is directed towards the most correct and efficient path to answer the given question.
Do not suggest a new Action in the Critique if no corrections are necessary.
Here are some examples.
""" ##
webthink_prompt_critique = instruction_critique + webthink_critique_examples ##

def webthink(idx=None, prompt=webthink_prompt, to_print=True, critique=False):
# def webthink(messages, idx=None, to_print=True):
    # print("START messages", messages) ##
    question = env.reset(idx=idx)
    # question = "What does the goddess associated with the goddess frigg consists of what tales?" ##
    if to_print:
        print(idx, question)
    prompt += question + "\n"
    # messages = append_message(messages, "user", question + "\n") ##
    n_calls, n_badcalls = 0, 0
    critique_last = ""
    for i in range(1, 8):
        # print("i", i) ##
        n_calls += 1
        # messages_temp = messages.copy() ##
        # if i == 1: ##
        #     messages_temp[-1]['content'] += f"Thought {i}:" ##
        # else: ##
        #     messages_temp = append_message(messages_temp, "user", f"Thought {i}:") ##
        # print("ONGOING messages", messages) ##
        thought_action = llm_legacy(prompt + f"Thought {i}:", stop=[f"\nObservation {i}:"])
        # thought_action = llm(messages_temp, stop=[f"\nObservation {i}:"])
        # print("thought_action", thought_action) ##
        try:
            thought, action = thought_action.strip().split(f"\nAction {i}: ")
            # if i != 1: ##
            #     thought = thought.split(f"Thought {i}: ")[1] ##
        except:
            print('ohh...', thought_action)
            n_badcalls += 1
            n_calls += 1
            thought = thought_action.strip().split('\n')[0]
            # messages_temp[-1]['content'] = f"Thought {i}: {thought}\nAction {i}:" ##
            action = llm_legacy(prompt + f"Thought {i}: {thought}\nAction {i}:", stop=[f"\n"]).strip()
            # action = llm(messages_temp, stop=[f"\n"]).strip() ##

        # TODO: critique the (Thought i, Action i) pair
        # skip the critique for the first step ##
        if critique: ##
            if i != 1 and random.random() < 0.8: ##
                prompt_critique = webthink_prompt_critique + '\n' ##
                prompt_critique += prompt ##
                prompt_critique = prompt_critique.replace(webthink_prompt, '') ##
                prompt_critique += f"Thought {i}: {thought}\nAction {i}: {action}\nCritique:" ##
                ## TODO & obs: at some point the "mentor" stop critiquing and just copy the agent's actions (add critique to the prompt instead of substituting it)
                ## TODO & obs: the mentor Thought is not detailed enough, it's very brief (quick fix: remove Thought and keep only Critique)
                critique = llm_legacy(prompt_critique, stop=None).strip()
                if f"Action:" in critique: ##
                    try:
                        thought_tmp, action_tmp = critique.split("\nAction: ", 1)
                        # remove from action_tmp everything after the first ] ##
                        action_tmp = action_tmp.split(']', 1)[0] + ']' ##
                        # thought_tmp = thought_tmp.split(f"Critique: ")[1] ##
                        action_tmp = action_tmp.replace("Action:", f"Action {i}:") ##
                        cr = string_similarity(critique_last, critique) ##
                        ac = string_similarity(action, action_tmp) ##
                        # print("similarity thought:", th, " similarity action:", ac) ##
                        if cr < 0.7 and ac < 0.95: ##
                            thought = thought_tmp ##
                            action = action_tmp ##
                    except:
                        pass
                critique_last = critique ##
        # TODO: critique the (Thought i, Action i) pair

        obs, r, done, info = step(env, action[0].lower() + action[1:])
        obs = obs.replace('\\n', '')
        step_str = f"Thought {i}: {thought}\nAction {i}: {action}\nObservation {i}: {obs}\n"
        prompt += step_str
        # messages = append_message(messages, "assistant", step_str) ##
        if to_print:
            print(step_str)
        if done:
            break
    if not done:
        obs, r, done, info = step(env, "finish[]")
    if to_print:
        print(info, '\n')
    info.update({'n_calls': n_calls, 'n_badcalls': n_badcalls, 'traj': prompt}) ##
    return r, info

## ReAct

In [6]:
import random
import time

idxs = list(range(7405))
# random.Random(233).shuffle(idxs)
random.Random(222).shuffle(idxs) ##

rs = []
infos = []
old_time = time.time()

for i in idxs[:100]:
    # print(env.reset(idx=i)) ##
    # messages = append_message([], "system", webthink_prompt) ##
    # r, info = webthink(messages, i, to_print=True) ##
    r, info = webthink(i, to_print=True, critique=False) ##
    rs.append(info['em'])
    infos.append(info)
    print(sum(rs), len(rs), sum(rs) / len(rs), (time.time() - old_time) / len(rs))
    print('-----------')
    print()

3046 Question: Are both Stacey McClean and Biff Byford American singer?
Thought 1: I need to search Stacey McClean and Biff Byford, find their nationality, then find if they are both American singers.
Action 1: Search[Stacey McClean]
Observation 1: Stacey Franks (née McClean, born 17 February 1989) is an English pop singer. She was part of the S Club 7 spin-off band, S Club 8 and in 2009 took part in the sixth series of The X Factor.. Franks was born in Bispham, Blackpool, Lancashire. In 2001 she auditioned for a place in the group S Club 7 on their 2002 Carnival Tour. She won a part in the S Club Juniors, aged 12 years old.[1] In 2002 the group released four singles and guest starred in Viva S Club..

Thought 2: Stacey McClean is an English singer. I need to search Biff Byford next and find its nationality.
Action 2: Search[Biff Byford]
Observation 2: Peter Rodney "Biff" Byford (born 15 January 1951)[1][2] is an English singer best known as the lead vocalist of the heavy metal band Sa

## ReAct-SR

In [7]:
import random
import time

idxs = list(range(7405))
# random.Random(233).shuffle(idxs)
random.Random(222).shuffle(idxs) ##

rs = []
infos = []
old_time = time.time()

for i in idxs[:100]:
    # print(env.reset(idx=i)) ##
    # messages = append_message([], "system", webthink_prompt) ##
    # r, info = webthink(messages, i, to_print=True) ##
    r, info = webthink(i, to_print=True, critique=True) ##
    rs.append(info['em'])
    infos.append(info)
    print(sum(rs), len(rs), sum(rs) / len(rs), (time.time() - old_time) / len(rs))
    print('-----------')
    print()

3046 Question: Are both Stacey McClean and Biff Byford American singer?
Thought 1: I need to search Stacey McClean and Biff Byford, find their nationality, then find if they are both American singers.
Action 1: Search[Stacey McClean]
Observation 1: Stacey Franks (née McClean, born 17 February 1989) is an English pop singer. She was part of the S Club 7 spin-off band, S Club 8 and in 2009 took part in the sixth series of The X Factor.. Franks was born in Bispham, Blackpool, Lancashire. In 2001 she auditioned for a place in the group S Club 7 on their 2002 Carnival Tour. She won a part in the S Club Juniors, aged 12 years old.[1] In 2002 the group released four singles and guest starred in Viva S Club..

Thought 2: The thought process involves searching for the nationality of both singers separately, which is time-consuming and inefficient. It would be more efficient to search for both singers' nationalities at the same time.
Action 2: Search[Stacey McClean and Biff Byford nationality]
O