In [20]:
import joblib
from react_cls import CoTAgent
from mocks import DocStoreExplorerMock, LLMMock
import numpy as np

In [21]:
def summarize_trial(agents):
    correct = [a for a in agents if a.is_correct()]
    incorrect = [a for a in agents if a.is_finished() and not a.is_correct()]
    return correct, incorrect

def log_trial(agents, trial_n):
    correct, incorrect = summarize_trial(agents)

    log = f"""
########################################
BEGIN TRIAL {trial_n}
Trial summary: Correct: {len(correct)}, Incorrect: {len(incorrect)}
#######################################
"""

    log += '------------- BEGIN CORRECT AGENTS -------------\n\n'
    for agent in correct:
        log += f'Context: {agent.context}\nQuestion: {agent.question}{agent.scratchpad}\nCorrect answer: {agent.key}\n\n'

    log += '------------- BEGIN INCORRECT AGENTS -----------\n\n'
    for agent in incorrect:
        log += f'Context: {agent.context}\nQuestion: {agent.question}{agent.scratchpad}\nCorrect answer: {agent.key}\n\n'
    return log

In [22]:
hotpot = joblib.load('data/hotpot-qa-distractor-sample.joblib').reset_index(drop = True)

In [23]:
from prompts import cot_simple_agent_prompt
from fewshots import COTQA_SIMPLE6 
agents = [CoTAgent(row['question'], '', row['answer'],
                   agent_prompt=cot_simple_agent_prompt,
                     cot_examples = COTQA_SIMPLE6,
                      ) for _, row in hotpot.iterrows()]

In [24]:
trial = 0
log = ''

In [25]:
for i in range(5):
    for agent in [a for a in agents if not a.is_correct()]:
        agent.run(reflect = False)
        print(f'Answer: {agent.key}')
    trial += 1
    log += log_trial(agents, trial)
    correct, incorrect = summarize_trial(agents)
    print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}')

Thought: Let's think step by step. VIVA Media AG changed its name to Viacom International Media Networks in 2004. The acronym for Viacom International Media Networks is VIMN, so the answer is VIMN.
Action: Finish[VIMN]
Answer: Gesellschaft mit beschränkter Haftung
Thought: Let's think step by step. Jonny Craig has been a member of five bands, including Emarosa and Dance Gavin Dance. Pete Doherty has been a member of seven bands, including The Libertines and Babyshambles. So Pete Doherty has been a member of more bands.
Action: Finish[Pete Doherty]
Answer: Jonny" Craig
Thought: Let's think step by step. The Missouri Compromise was signed in 1820. The first governor after the The Missouri Compromise was Alexander McNair, who was from Missouri. So the answer is Missouri.
Action: Finish[Missouri]
Answer: Bath, Maine
Thought: Let's think step by step. The creator of "Wallace and Gromit" is Nick Park. Nick Park created the animation comedy "Creature Comforts" which matched animated zoo anima

In [27]:
with open('output/base_cot_no_context/100_questions_5_trials.txt', 'w') as f:
    f.write(log)

In [28]:
dicts = [dict(a.__dict__) for a in agents]
for d in dicts:
    for k, v in d.items():
        d[k] = str(v)

joblib.dump(dicts, 'output/base_cot_no_context/cot_33_correct_dicts-5-trials.joblib')

['output/base_cot_no_context/cot_33_correct_dicts-5-trials.joblib']

In [28]:
with open('output/base_cot/100_questions_8_trials.txt', 'w') as f:
    f.write(log)

In [3]:
dicts = joblib.load('output/base_cot/cot_reflect_50_correct_dicts-8-trials.joblib')

In [5]:
dicts[0].keys()
for d in dicts:
    agent = CoTAgent(d['question'], d['context'], d['key'])
    agent.reflections = d['reflections']
    agent.scratchpad = d['scratchpad']
    agent.answer = d['answer']
    agent.step_n = d['step_n']
    agent.finished = d['finished']
    agents.append(agent)

dict_keys(['question', 'context', 'key', 'agent_prompt', 'reflect_prompt', 'cot_examples', 'reflect_examples', 'llm', 'reflections', 'answer', 'step_n', 'scratchpad', 'finished'])