# 2048 playing agent

In this tutorial we shall see, how can we benchmark an agent to play 2048 game using agentquest.

In [1]:
# ! pip3 install agentquest

In [2]:
from agentquest.benchmarks.twothousandfortyeight import (
    TTFEDriver,
    TTFEUtils,
    TTFEAction,
)
from dotenv import load_dotenv

load_dotenv()

True

Initialize the twothousandfortyeight driver and get the first observation

In [3]:
driver = TTFEDriver()
obs = driver.reset()  # Get the first observation
print(f"OBSERVATION: {obs.output}")

OBSERVATION: Welcome to 2048!
Use 'w', 'a', 's', 'd' to move the tiles, where each move respectively moves all blocks that can move in the given direction:
'up', 'left', 'down', 'right'.
Your goal is to make the 2048 block, you work towards this by merging tiles of the same value with your movement keys.
The merged block will be the sum of the previous values of the blocks. You lose when no valid moves are possible.
Initially, two blocks appear, after each move following that a block will spawn. There is a 90 percent chance for it to be a '2' and a 10 percent chance for it to be a '4'.
You will receive feedback after you make a move: verbally and with a matrix. Base your moves on the feedback provided.
Your response should be strictly in the following format: 
Move: <w|a|s|d>
╔══════╦══════╦══════╦══════╗
║      ║      ║      ║      ║
╠══════╬══════╬══════╬══════╣
║      ║      ║      ║  2   ║
╠══════╬══════╬══════╬══════╣
║  4   ║      ║      ║      ║
╠══════╬══════╬══════╬══════╣
║  

Initialize the prompt and the agent.

In [4]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate


llm = ChatOpenAI(model_name="gpt-4o-mini")


class CustomPromptTemplate(PromptTemplate):
    template: str

    def format(self, **kwargs) -> str:
        history = kwargs.pop("history")

        thoughts = ""
        for action, _, observation in history[-5:]:  # Last five states
            action, _, observation = history[-1]
            thoughts += f"\n Action: {str(action.value)}\n" if action else ""
            thoughts += (
                f"\n Observation: {str(observation.output)}\n" if observation else "'"
            )

        kwargs["history"] = thoughts
        return self.template.format(**kwargs)

In [5]:
prompt = """
You are an agent with a particular task. The task and its rules are as follows.
Input task: {input}
Use the optimal strategy possible for the task.
Here are the recent game states: {history}
"""

prompt_template = CustomPromptTemplate(
    template=prompt, input_variables=["input", "history"]
)

In [None]:
from langfuse.decorators import observe
import os

os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_HOST"] = ""


@observe
def lf_invoke(data):
    # print("======Prompt exact start======")
    # print(str(prompt_template.format(**data)))
    # print("======Prompt exact end======")
    return llm.invoke(prompt_template.format(**data))

Start the Agent loop solving the task

In [7]:
step_cnt = 0
while not obs.success and obs.can_proceed and step_cnt < 50:
    print("Step: ", step_cnt)
    if step_cnt % 10 == 0:
        print(obs.output)

    if len(driver.metrics.interactions) == 0:
        # Metrics interactions does not contain initial configuration. Keeping this in history.
        history = [(None, None, obs)]
    else:
        history = driver.metrics.interactions

    response = lf_invoke(
        {"input": TTFEUtils.get_initial_instructions(), "history": history}
    )
    # print(response.content)
    obs = driver.step_raw(response.content)
    step_cnt += 1

    obs = driver.step(TTFEAction(value="a"))

Step:  0
Welcome to 2048!
Use 'w', 'a', 's', 'd' to move the tiles, where each move respectively moves all blocks that can move in the given direction:
'up', 'left', 'down', 'right'.
Your goal is to make the 2048 block, you work towards this by merging tiles of the same value with your movement keys.
The merged block will be the sum of the previous values of the blocks. You lose when no valid moves are possible.
Initially, two blocks appear, after each move following that a block will spawn. There is a 90 percent chance for it to be a '2' and a 10 percent chance for it to be a '4'.
You will receive feedback after you make a move: verbally and with a matrix. Base your moves on the feedback provided.
Your response should be strictly in the following format: 
Move: <w|a|s|d>
╔══════╦══════╦══════╦══════╗
║      ║      ║      ║      ║
╠══════╬══════╬══════╬══════╣
║      ║      ║      ║  2   ║
╠══════╬══════╬══════╬══════╣
║  4   ║      ║      ║      ║
╠══════╬══════╬══════╬══════╣
║      

In [8]:
print(obs.output)

Move did not change the board. Please enter 'w', 'a', 's', or 'd'.

╔══════╦══════╦══════╦══════╗
║  2   ║  32  ║  4   ║  8   ║
╠══════╬══════╬══════╬══════╣
║  32  ║  4   ║  2   ║      ║
╠══════╬══════╬══════╬══════╣
║  8   ║      ║      ║      ║
╠══════╬══════╬══════╬══════╣
║      ║      ║      ║      ║
╚══════╩══════╩══════╩══════╝



In [10]:
driver.metrics.export(
    repetition_function_kwargs={"theta_a": 1, "num_execution_steps": 50}
)

{'goal': 2048,
 'success': False,
 'actions': [{'value': 'd'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 's'},
  {'value': 's'},
  {'value': 's'},
  {'value': 's'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 's'},
  {'value': 's'},
  {'value': 'a'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'a'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'w'},
  {'value': 'a'},
  {'value': 'w'}],
 'states': [{'value': [[2, 0, 0, 0],
    [0, 0, 0, 2],