#### Notebook for running Chain-of-Thought with supporting context experiments 

In [10]:
import sys, os
sys.path.append('..')
root = '../root/'

In [11]:
import dotenv
dotenv.load_dotenv()

True

In [12]:
import joblib
import json
import logging
import numpy as np
from llm import AnyOpenAILLM
from agents import CoTAgent, ReflexionStrategy
from util import summarize_trial, log_trial, save_agents, save_results

#### Load the HotPotQA Sample

In [13]:
hotpot = joblib.load('../data/hotpot-qa-distractor-sample.joblib').reset_index(drop = True)

hotpot['supporting_paragraphs'] = None
for ind, row in hotpot.iterrows():
    supporting_articles = row['supporting_facts']['title']
    articles = row['context']['title']
    sentences = row['context']['sentences'] 
    supporting_paragraphs = []
    for article in supporting_articles:
        supporting_paragraph = ''.join(sentences[np.where(articles == article)][0])
        supporting_paragraphs.append(supporting_paragraph)
    supporting_paragraphs = '\n\n'.join(supporting_paragraphs)
    hotpot.at[ind, 'supporting_paragraphs'] = supporting_paragraphs

#### Define the Reflexion Strategy

In [14]:
print(ReflexionStrategy.__doc__)


    NONE: No reflection
    LAST_ATTEMPT: Use last reasoning trace in context 
    REFLEXION: Apply reflexion to the next reasoning trace 
    LAST_ATTEMPT_AND_REFLEXION: Use last reasoning trace in context and apply reflexion to the next reasoning trace 
    


In [15]:
strategy: ReflexionStrategy = ReflexionStrategy.REFLEXION

#### Define the LLMs

In [16]:
self_reflect_llm = "gpt-4"
action_llm = "gpt-4"

#### Initialize a CoTAgent for each question

In [17]:
from prompts import cot_agent_prompt, cot_reflect_agent_prompt, cot_reflect_prompt
from fewshots import COT, COT_REFLECT
agents = [CoTAgent(row['question'],
                   row['supporting_paragraphs'],
                   row['answer'],
                   agent_prompt=cot_agent_prompt if strategy == ReflexionStrategy.NONE else cot_reflect_agent_prompt,
                   cot_examples=COT,
                   reflect_prompt=cot_reflect_prompt,
                   reflect_examples=COT_REFLECT,
                   self_reflect_llm= AnyOpenAILLM(
                                            temperature=0,
                                            max_tokens=250,
                                            model_name=self_reflect_llm,
                                            model_kwargs={"stop": "\n"},
                                            openai_api_key=os.environ['OPENAI_API_KEY']),
                    action_llm= AnyOpenAILLM(
                                            temperature=0,
                                            max_tokens=250,
                                            model_name=action_llm,
                                            model_kwargs={"stop": "\n"},
                                            openai_api_key=os.environ['OPENAI_API_KEY']),
                    ) for _, row in hotpot.iterrows()]

#### Run `n` trials

In [18]:
n = 9 
trial = 5 
log = ''
results = []

llm = 'gpt4'
run_dir = os.path.join(root, 'CoT', 'context', strategy.value + f'_{llm}')
os.makedirs(run_dir, exist_ok=True)

# Basic config
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler(os.path.join(run_dir, f'{n}_{trial}.log')),
    ]
)

#### Optional: Load agents

In [24]:
agents = []
agents_dir = os.path.join(run_dir, 'agents')
for file in os.listdir(agents_dir):
    agents.append(joblib.load(os.path.join(agents_dir, file)))

In [11]:
for i in range(3):
    for agent in [a for a in agents if not a.is_correct()]:
        agent.run(reflexion_strategy = strategy)
        print(f'Answer: {agent.key}')
    trial += 1
    log += log_trial(agents, trial)
    correct, incorrect = summarize_trial(agents)
    results.append({'trial': trial, 'correct': len(correct), 'incorrect': len(incorrect)})
    save_results(agents, results, run_dir)
    print(f'Finished Trial {trial}, Correct: {len(correct)}, Incorrect: {len(incorrect)}')

Answer: 22 November
Answer: Roman
Answer: in the village of Aldenham
Answer: author
Answer: a failed coup attempt
Answer: novelist
Answer: California
Answer: super-regional shopping mall
Answer: singer, songwriter
Answer: German
Answer: the port of Baltimore west to Sandy Hook
Answer: New York
Answer: Frederick Alexander


ERROR:root:Invalid action: No, only Affiliated Managers Group.


Answer: no
Answer: cleaning, catering and security
Answer: chronological collection of critical quotations
Answer: The Bad Hemingway Contest
Answer: "Now and Then" (1995)
Answer: no
Answer: the Cold War (1947–91)
Answer: fortnightly women interest magazine
Answer: 2 March 1972


ERROR:root:Invalid action: Upon further research, I have confirmed that Larnelle Steward Harris was indeed born in the month of July. Therefore, the answer to the question is Yes, David Huntsinger has worked with a gospel singer born in the month of July.


Answer: Larnelle Harris
Answer: The Bears
Answer: Vivendi S.A.


ERROR:root:Invalid action: No, only Maxillaria is a genus of orchids.


Answer: no
Answer: fictional character
Answer: 10 January 1920
Answer: Ricard Rubio i Vives
Finished Trial 6, Correct: 71, Incorrect: 26
Answer: 22 November
Answer: Roman
Answer: in the village of Aldenham
Answer: author
Answer: a failed coup attempt
Answer: novelist
Answer: California
Answer: super-regional shopping mall
Answer: singer, songwriter
Answer: German
Answer: the port of Baltimore west to Sandy Hook
Answer: New York
Answer: Frederick Alexander
Answer: no
Answer: cleaning, catering and security
Answer: chronological collection of critical quotations
Answer: The Bad Hemingway Contest
Answer: "Now and Then" (1995)
Answer: no
Answer: the Cold War (1947–91)
Answer: fortnightly women interest magazine
Answer: 2 March 1972
Answer: Larnelle Harris


ERROR:root:Invalid action: The context does not provide information about the mascot of Mercer University.


Answer: The Bears
Answer: Vivendi S.A.
Answer: no
Answer: fictional character
Answer: 10 January 1920
Answer: Ricard Rubio i Vives
Finished Trial 7, Correct: 71, Incorrect: 28
Answer: 22 November
Answer: Roman
Answer: in the village of Aldenham
Answer: author
Answer: a failed coup attempt
Answer: novelist
Answer: California
Answer: super-regional shopping mall
Answer: singer, songwriter
Answer: German
Answer: the port of Baltimore west to Sandy Hook
Answer: New York
Answer: Frederick Alexander
Answer: no
Answer: cleaning, catering and security
Answer: chronological collection of critical quotations
Answer: The Bad Hemingway Contest
Answer: "Now and Then" (1995)
Answer: no
Answer: the Cold War (1947–91)
Answer: fortnightly women interest magazine
Answer: 2 March 1972
Answer: Larnelle Harris
Answer: The Bears
Answer: Vivendi S.A.
Answer: no
Answer: fictional character
Answer: 10 January 1920
Answer: Ricard Rubio i Vives
Finished Trial 8, Correct: 71, Incorrect: 29


#### Save the result log

In [13]:
with open(os.path.join(run_dir, f'{len(agents)}_questions_{trial}_trials.txt'), 'w') as f:
    f.write(log)
save_agents(agents, os.path.join(run_dir, 'agents'))

Saving agent 0...
Saving agent 1...
Saving agent 2...
Saving agent 3...
Saving agent 4...
Saving agent 5...
Saving agent 6...
Saving agent 7...
Saving agent 8...
Saving agent 9...
Saving agent 10...
Saving agent 11...
Saving agent 12...
Saving agent 13...
Saving agent 14...
Saving agent 15...
Saving agent 16...
Saving agent 17...
Saving agent 18...
Saving agent 19...
Saving agent 20...
Saving agent 21...
Saving agent 22...
Saving agent 23...
Saving agent 24...
Saving agent 25...
Saving agent 26...
Saving agent 27...
Saving agent 28...
Saving agent 29...
Saving agent 30...
Saving agent 31...
Saving agent 32...
Saving agent 33...
Saving agent 34...
Saving agent 35...
Saving agent 36...
Saving agent 37...
Saving agent 38...
Saving agent 39...
Saving agent 40...
Saving agent 41...
Saving agent 42...
Saving agent 43...
Saving agent 44...
Saving agent 45...
Saving agent 46...
Saving agent 47...
Saving agent 48...
Saving agent 49...
Saving agent 50...
Saving agent 51...
Saving agent 52...
Sav