# An agent to play Sudoku

In this tutorial we shall show how we can build an agent that plays Sudoku in AgentQuest.

In [1]:
# ! pip3 install agentquest

In [2]:
from agentquest.benchmarks.sudoku import SudokuDriver, SudokuUtils
from dotenv import load_dotenv

load_dotenv()

game = SudokuUtils.load_data(data_path="__default__", category="easy")[0]
initial = SudokuUtils.convert_board_to_list_of_lists(board_state=game.get("board"))
goal = SudokuUtils.convert_board_to_list_of_lists(board_state=game.get("answer"))

Initialize the sudoku driver and get the first observation

In [3]:
driver = SudokuDriver(initial_state=initial, goal=goal)
obs = driver.reset()  # Get the first observation
first_obs = obs
print(f"OBSERVATION: {obs.output}")

OBSERVATION: Welcome to Sudoku.
Sudoku is a logic-based number puzzle game played on a 9x9 grid, divided into nine 3x3 subgrids called "regions."
The objective is to fill the grid so that each row, each column, and each 3x3 region contains all digits from 1 to 9 without repetition.
The puzzle starts with some cells pre-filled with numbers, which serve as clues.
You will be provided the state of the game every time the game progresses.
Your response should contain what the next number will be and in which row and column.
The rows and columns both range from 0 to 8 (zero-based indexing), and the value of digits is from 1 to 9.
The response should be exactly in the following format:
Row: <row_number>, Column: <column_number>, Value: <value>
Current game state: 
[['5', '6', '*', '*', '*', '8', '2', '3', '1'], ['*', '*', '7', '*', '1', '2', '5', '*', '*'], ['2', '3', '1', '6', '4', '5', '*', '*', '*'], ['6', '5', '3', '*', '*', '*', '1', '4', '2'], ['9', '*', '*', '4', '*', '*', '6', '*', '

Initialize the prompt and the agent.

In [4]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate


llm = ChatOpenAI(model_name="gpt-4o")


class CustomPromptTemplate(PromptTemplate):
    template: str

    def format(self, **kwargs) -> str:
        history = kwargs.pop("history")

        thoughts = ""
        for action, _, observation in history[-5:]:  # Last five states
            action, _, observation = history[-1]
            thoughts += f"\n Action: {str(action.value)}\n" if action else ""
            thoughts += (
                f"\n Observation: {str(observation.output)}\n" if observation else "'"
            )

        kwargs["history"] = thoughts
        return self.template.format(**kwargs)

In [5]:
prompt = """
You are an agent with a particular task. The task and it's rules are as follows.
Input task: {input}
Use the optimal strategy possible for the task without making mistakes. The actions are irreversible.
Here are the recent game states: {history}
"""

prompt_template = CustomPromptTemplate(
    template=prompt, input_variables=["input", "history"]
)

In [None]:
from langfuse.decorators import observe
import os

os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_HOST"] = ""


@observe
def lf_invoke(data):
    # print("======Prompt exact start======")
    # print(str(prompt_template.format(**data)))
    # print("======Prompt exact end======")
    return llm.invoke(prompt_template.format(**data))

Start the Agent loop solving the task

In [7]:
step_cnt = 0
while not obs.success and obs.can_proceed and step_cnt < 50:
    print("Step: ", step_cnt)
    if step_cnt % 10 == 0:
        print(obs.output)

    if len(driver.metrics.interactions) == 0:
        # Metrics interactions does not contain initial configuration. Keeping this in history.
        history = [(None, None, obs)]
    else:
        history = driver.metrics.interactions

    response = lf_invoke(
        {"input": SudokuUtils.get_initial_instructions(), "history": history}
    )
    # print(response.content)
    obs = driver.step_raw(response.content)
    step_cnt += 1

Step:  0
Welcome to Sudoku.
Sudoku is a logic-based number puzzle game played on a 9x9 grid, divided into nine 3x3 subgrids called "regions."
The objective is to fill the grid so that each row, each column, and each 3x3 region contains all digits from 1 to 9 without repetition.
The puzzle starts with some cells pre-filled with numbers, which serve as clues.
You will be provided the state of the game every time the game progresses.
Your response should contain what the next number will be and in which row and column.
The rows and columns both range from 0 to 8 (zero-based indexing), and the value of digits is from 1 to 9.
The response should be exactly in the following format:
Row: <row_number>, Column: <column_number>, Value: <value>
Current game state: 
[['5', '6', '*', '*', '*', '8', '2', '3', '1'], ['*', '*', '7', '*', '1', '2', '5', '*', '*'], ['2', '3', '1', '6', '4', '5', '*', '*', '*'], ['6', '5', '3', '*', '*', '*', '1', '4', '2'], ['9', '*', '*', '4', '*', '*', '6', '*', '3'],

In [8]:
print(obs.output)

Could not parse agent output. Make sure it is in format: Row: <row_number>, Column: <column_number>, Value: <value>


In [9]:
driver.metrics.export(
    repetition_function_kwargs={"theta_a": 1, "num_execution_steps": 50}
)

{'goal': [['5', '6', '4', '9', '7', '8', '2', '3', '1'],
  ['8', '9', '7', '3', '1', '2', '5', '6', '4'],
  ['2', '3', '1', '6', '4', '5', '8', '9', '7'],
  ['6', '5', '3', '7', '8', '9', '1', '4', '2'],
  ['9', '7', '8', '4', '2', '1', '6', '5', '3'],
  ['1', '4', '2', '5', '3', '6', '9', '7', '8'],
  ['3', '1', '5', '2', '6', '4', '7', '8', '9'],
  ['7', '8', '9', '1', '5', '3', '4', '2', '6'],
  ['4', '2', '6', '8', '9', '7', '3', '1', '5']],
 'success': False,
 'actions': [{'value': '9', 'row': 0, 'column': 3},
  {'value': '4', 'row': 0, 'column': 2},
  {'value': '7', 'row': 0, 'column': 4},
  {'value': '8', 'row': 1, 'column': 0},
  {'value': '6', 'row': 1, 'column': 7},
  {'value': '9', 'row': 1, 'column': 1},
  {'value': '4', 'row': 1, 'column': 8},
  {'value': '3', 'row': 1, 'column': 3},
  {'value': '7', 'row': 2, 'column': 6},
  {'value': '9', 'row': 2, 'column': 7},
  {'value': '8', 'row': 2, 'column': 8},
  {'value': '5', 'row': 0, 'column': 0}],
 'states': [{'value': [['5'