# OpenAI Simple Chat Model

In [None]:
! pip3 install openai

In [1]:
OPENAI_API_KEY = ""

In [3]:
from agentquest.drivers.mastermind import MasterMindDriver
from agentquest.metrics import get_mastermind_repetitions, get_mastermind_progress
from agentquest.utils import Action, load_data, cpprint
from termcolor import cprint

Load a generic 4 digits mastermind game

In [4]:
game = load_data('mastermind', '4 digits')[0]

Initialize the prompt and the LLM

In [5]:
from openai import OpenAI

prompt = '''You are tasked to play the Mastermind game. The host chooses a 
number and gives you the amount of digits. You have to gess the correct number
as fast as you can.
The number can contains repetitions.
The number can contain any possible digit between:
0, 1, 2, 3, 4, 5, 6, 7, 8, 9

At each round you provide a number as guess. The hosts provides you 
1. The number of correct digits in the wrong position
2. The number of correct digits in the correct position

The game ends when the hosts outputs 'You Won!'

Carefully choose your strategy. Avoid bruteforce.

Use the following format:
THOUGHT: think about your guess
ACTION: only your guessed number without additional info.
'''

# Initialize the LLM
client = OpenAI(
    api_key=OPENAI_API_KEY,
)

# Initalize the system prompt
messages=[
    {"role": "system", "content": prompt},
]

Initialize the mastermind driver and get the first observation

In [6]:
driver = MasterMindDriver(game)
obs = driver.reset() # Get the first observation
cpprint(f'OBSERVATION: {obs.output}', 'cyan')

[36mOBSERVATION: Start guessing the 4 digits number.[0m


Start the LLM loop solving the task

In [7]:
actions, progress, repetitions = [], [], []
step_cnt = 0
while not obs.done and step_cnt<10:
    # Provide the observation to the LLM
    messages.append({"role": "user", "content": obs.output})
    
    # Get the LLM output
    agent_choice = client.chat.completions.create(
        model="gpt-4",
        messages=messages
    ).choices[0].message.content
    
    # Provide the choice to the LLM as history
    messages.append({"role": "assistant", "content": agent_choice})
    
    # Extract the LLM guess
    thought = agent_choice.split('ACTION: ')[0].split('THOUGHT: ')[-1]
    guess = agent_choice.split('ACTION: ')[-1]
    
    # Build the action
    action = Action(action_value=guess)
    # Store action
    actions.append(action.action_value)
    
    # Run a mastermind round and get the observation
    obs = driver.step(action)
    
    # Compute current progress and repetition
    repeated_actions = get_mastermind_repetitions(actions)
    current_progress = get_mastermind_progress(driver.state, game)
    repetitions.append(repeated_actions)
    progress.append(current_progress)

    print()
    cpprint(f'THOUGHT: {thought}', 'magenta')
    cpprint(f'ACTION: {action.action_value}', 'yellow')
    cpprint(f'OBSERVATION: {obs.output}', 'cyan')
    cpprint(f'METRICS: {repeated_actions} repeated actions', 'red')
    cpprint(f'         {current_progress} reached milestones', 'red')
    
    step_cnt+=1


[35mTHOUGHT: I will start by guessing four different digits. Following the 
 sequence from 0-9, I will start with 0123.[0m
[33mACTION: 0123[0m
[36mOBSERVATION: Your guess has 1 correct numbers in the wrong position and 0 
 correct numbers in the correct position. Keep guessing.[0m
[31mMETRICS: 0 repeated actions[0m
[31m         0 reached milestones[0m

[35mTHOUGHT: This means there may be one digit among 0, 1, 2, 3 but it is 
 located at a different position other than my first guess. However, we can 
 not confirm that one of this digits is in the answer yet. It could be a 0, 
 1, 2, or 3 in the correct number but in the wrong position. I will now guess 
 the next four numbers in the sequence, 4567.[0m
[33mACTION: 4567[0m
[36mOBSERVATION: Your guess has 1 correct numbers in the wrong position and 0 
 correct numbers in the correct position. Keep guessing.[0m
[31mMETRICS: 0 repeated actions[0m
[31m         0 reached milestones[0m

[35mTHOUGHT: Now I know that one o

In [9]:
# Compute final metrics
PR = [x/len(game) for x in progress]
RR = [x/(len(actions)-1) for x in repetitions]
print(f'PR = {PR}')
print(f'PR = {RR}')

PR = [0.0, 0.0, 0.25, 0.25, 0.25, 0.25, 0.0, 0.5, 0.0, 0.0]
PR = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
