In [1]:
import joblib
from react_cls import CoTAgent
from mocks import DocStoreExplorerMock, LLMMock
import numpy as np

In [2]:
def summarize_trial(agents):
    correct = [a for a in agents if a.is_correct()]
    incorrect = [a for a in agents if a.is_finished() and not a.is_correct()]
    return correct, incorrect

def remove_fewshot(prompt: str) -> str:
    prefix = prompt.split('Here are some examples:')[0]
    suffix = prompt.split('(END OF EXAMPLES)')[1]
    return prefix.strip('\n').strip() + '\n' +  suffix.strip('\n').strip()

def log_trial(agents, trial_n):
    correct, incorrect = summarize_trial(agents)

    log = f"""
########################################
BEGIN TRIAL {trial_n}
Trial summary: Correct: {len(correct)}, Incorrect: {len(incorrect)}
#######################################
"""

    log += '------------- BEGIN CORRECT AGENTS -------------\n\n'
    for agent in correct:
        log += remove_fewshot(agent._build_agent_prompt()) + f'\nCorrect answer: {agent.key}\n\n'

    log += '------------- BEGIN INCORRECT AGENTS -----------\n\n'
    for agent in incorrect:
        log += remove_fewshot(agent._build_agent_prompt()) + f'\nCorrect answer: {agent.key}\n\n'

    return log

In [4]:
hotpot = joblib.load('data/hotpot-qa-distractor-sample.joblib').reset_index(drop = True)

In [5]:
from prompts import cot_simple_reflect_agent_prompt, cot_simple_reflect_prompt
from fewshots import COTQA_SIMPLE6, COT_SIMPLE_REFLECTION
agents = [CoTAgent(row['question'], '', row['answer'],
                   agent_prompt=cot_simple_reflect_agent_prompt,
                     reflect_prompt=cot_simple_reflect_prompt,
                     cot_examples = COTQA_SIMPLE6,
                        reflect_examples = COT_SIMPLE_REFLECTION) for _, row in hotpot.iterrows()]

In [6]:
trial = 0
log = ''

In [7]:
for i in range(6):
    for agent in [a for a in agents if not a.is_correct()]:
        agent.run(reflect_strategy='last_attempt + reflexion')
        print(f'Answer: {agent.key}')
    trial += 1
    log += log_trial(agents, trial)
    correct, incorrect = summarize_trial(agents)
    print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}')

Thought: Let's think step by step. VIVA Media AG changed its name to Viacom International Media Networks in 2004. The acronym for Viacom International Media Networks is VIMN, so the answer is VIMN.
Action: Finish[VIMN]
Answer: Gesellschaft mit beschränkter Haftung
Thought: Let's think step by step. Jonny Craig has been a member of six bands, while Pete Doherty has been a member of seven bands. So Pete Doherty has been a member of more bands.
Action: Finish[Pete Doherty]
Answer: Jonny" Craig
Thought: Let's think step by step. The Missouri Compromise was an agreement between the United States and the state of Missouri in 1820. The first governor after the Missouri Compromise was Alexander McNair, who was from Missouri. So the answer is Missouri.
Action: Finish[Missouri]
Answer: Bath, Maine
Thought: Let's think step by step. The creator of "Wallace and Gromit" is Nick Park. Nick Park created the animation comedy "Creature Comforts" which matched animated zoo animals with a soundtrack of p

In [8]:
print(log)


########################################
BEGIN TRIAL 1
Trial summary: Correct: 31, Incorrect: 69
#######################################
------------- BEGIN CORRECT AGENTS -------------

Solve a question answering task by having a Thought, then Finish with your answer. Thought can reason about the current situation. Finish[answer] returns the answer and finishes the task.
Question: The creator of "Wallace and Gromit" also created what animation comedy that matched animated zoo animals with a soundtrack of people talking about their homes? 
Thought: Let's think step by step. The creator of "Wallace and Gromit" is Nick Park. Nick Park created the animation comedy "Creature Comforts" which matched animated zoo animals with a soundtrack of people talking about their homes. So the answer is "Creature Comforts".
Action: Finish["Creature Comforts"]
Observation: Answer is CORRECT
Correct answer: Creature Comforts

Solve a question answering task by having a Thought, then Finish with your answ

In [16]:
print

'\n########################################\nBEGIN TRIAL 1\nTrial summary: Correct: 32, Incorrect: 68\n#######################################\n------------- BEGIN CORRECT AGENTS -------------\n\nContext: \nQuestion: The creator of "Wallace and Gromit" also created what animation comedy that matched animated zoo animals with a soundtrack of people talking about their homes? \nThought: Let\'s think step by step. The creator of "Wallace and Gromit" is Nick Park. Nick Park created the animation comedy "Creature Comforts" which matched animated zoo animals with a soundtrack of people talking about their homes. So the answer is "Creature Comforts".\nAction: Finish["Creature Comforts"]\nObservation: Answer is CORRECT\nReflections: \nCorrect answer: Creature Comforts\n\nContext: \nQuestion: Jaclyn Stapp is married to the former frontman of a band that disbanded in what year?\nThought: Let\'s think step by step. Jaclyn Stapp is married to Scott Stapp, the former frontman of the band Creed. Cre

In [10]:
with open('output/last_trial_reflect_cot_no_context/100_questions_5_trials.txt', 'w') as f:
    f.write(log)

In [29]:
import os
def save_agents(agents, dir: str):
    os.makedirs(dir, exist_ok=True)
    for i, agent in enumerate(agents):
        joblib.dump(agent, f'{dir}/{i}.joblib')

In [30]:
save_agents(agents, 'output/last_trial_cot_no_context/100_questions_6_trials')

In [6]:
joblib.dump(agents[0], 'test.joblib')

['test.joblib']

In [7]:
joblib.load('test.joblib')

<react_cls.CoTAgent at 0x12cd97d30>

In [11]:
dicts = [dict(a.__dict__) for a in agents]
for d in dicts:
    for k, v in d.items():
        d[k] = str(v)

joblib.dump(dicts, 'output/last_trial_cot_no_context/cot_reflect_no_context_40_correct_dicts-5-trials.joblib')

['output/reflect_cot_no_context/cot_reflect_no_context_40_correct_dicts-5-trials.joblib']

In [3]:
dicts = joblib.load('output/base_cot/cot_reflect_50_correct_dicts-8-trials.joblib')

In [12]:
print(log)


########################################
BEGIN TRIAL 1
Trial summary: Correct: 32, Incorrect: 68
#######################################
------------- BEGIN CORRECT AGENTS -------------

Context: 
Question: The creator of "Wallace and Gromit" also created what animation comedy that matched animated zoo animals with a soundtrack of people talking about their homes? 
Thought: Let's think step by step. The creator of "Wallace and Gromit" is Nick Park. Nick Park created the animation comedy "Creature Comforts" which matched animated zoo animals with a soundtrack of people talking about their homes. So the answer is "Creature Comforts".
Action: Finish["Creature Comforts"]
Observation: Answer is CORRECT
Reflections: 
Correct answer: Creature Comforts

Context: 
Question: Jaclyn Stapp is married to the former frontman of a band that disbanded in what year?
Thought: Let's think step by step. Jaclyn Stapp is married to Scott Stapp, the former frontman of the band Creed. Creed disbanded in 2004