In [1]:
# the purpose of the notebook is to test LangChain agent architectures on clembench games

In [50]:
import langchain

# 1. Environment setup and dependency installation

## 1.1. Setup environment



In [51]:
import os

# Specify the game name here (this code can be adapted to any 2-player game)
GAME_NAME = "textmapworld"

# Local clone location of the clembench repository
CLEMBENCH_HOME = r"C:\Users\white\Desktop\agents_experiments\clembench_v3"

# Expose CLEMBENCH_HOME so the clem framework can find the games
os.environ["CLEMBENCH_HOME"] = CLEMBENCH_HOME

## 1.2. Install games and dependencies

In [None]:
# Clone the clembench repo (safe to re-run; git will warn if it already exists)
#!git clone https://github.com/clp-research/clembench $CLEMBENCH_HOME

# Install the requirements into the Python kernel
%pip install -r $CLEMBENCH_HOME/requirements.txt

# Make tqdm usable in Jupyter notebooks
%pip install --upgrade ipywidgets jupyter_client

In [None]:
%pip install --upgrade clemcore

In [52]:
# Sanity check: version + confirm that the game is an available game
!clem --version
!clem list games -s $GAME_NAME

clem 3.4.2
Listing all available games (use -v option to see the whole specs)
Found '1' game specs that match the game_selector='{'game_name': 'textmapworld'}'
textmapworld:
 	Exhaustively exploring a map.


# 2. Register and configure a model backend

# 3. Agent implementation

In [53]:
# pip install -qU langchain "langchain[anthropic]"
from langchain.agents import create_agent
from dataclasses import dataclass
from langchain.agents.structured_output import ToolStrategy
from langchain_openai import ChatOpenAI
from langchain.tools import tool
from langchain_core.messages import AIMessage

In [5]:
my_model = ChatOpenAI(
            model="gpt-oss:20b",
            base_url="",
            api_key="",  
            temperature=0,
            max_tokens = 300
        )

## 3.1. An Agentic Player with a Tag Extraction tool

In [6]:
subagent_prompt = """
You are given a small piece of text which contains gameplay rules. You need to extract the necessary tags (often written in CAPITAL LETTERS), so that the player can use them for the answer. Do not output any text apart from the tag(s). Example IO pair:

INPUT:
Letâ€™s play a guessing game! Your task is to answer the other player's questions. Based on your knowledge of the word: $TARGET WORD$, respond to the following questions or guesses. Limit your response to only 'yes' or 'no' with no explanation or other words. Never reveal the answer in your response.

You must reply using the format below and DO NOT ADD ANY TEXT OTHER THAN THIS:

ANSWER: <some text>

Target Word: $TARGET WORD$

OUTPUT:
ANSWER:

If you identified no tags, please return NO TAG as an answer.
"""

In [8]:
from playpen.agents import ClemAgent, ClemObservation
from langgraph.checkpoint.memory import InMemorySaver 
from langchain.agents import create_agent, AgentState
from langgraph.runtime import Runtime
from langchain_core.runnables import RunnableConfig



class MyAgenticPlayer(ClemAgent):


    def __init__(self,thread_id: str = "default"):
        super().__init__()
        self.model = ChatOpenAI(
            model="gpt-oss:20b",
            base_url="",
            api_key="",  
            temperature=0,
            max_tokens = 300
        )

        self.memory=InMemorySaver()
        self.base_thread_id = thread_id
        self.episode = 0
        
        @tool
        def extract_tags(initial_prompt: str) -> str:
            """
        Extract the tags that are necessary for the player from the game rules.
            """
    
            print(initial_prompt)
            subagent_extractor = create_agent(model=self.model,
                                      tools=[],
                                      system_prompt=subagent_prompt)

            tag_answer = subagent_extractor.invoke({"messages": [{"role": "user", "content": initial_prompt}]})
            final_ai_message = next(msg for msg in reversed(tag_answer["messages"]) if isinstance(msg, AIMessage))

            response_text = final_ai_message.content
            return response_text

            
        self.agent = create_agent(
                    model=self.model,
                    tools=[extract_tags],
                    checkpointer=self.memory,
                    system_prompt="You're going to play a game. You're a professional agent game player, but you also have a helpful tool extract_tags, that can help you. First of all, call the extract_tags tool, and use the result ")

    def reset(self):
          super().reset()
          self.episode += 1 
        
    def act(self, last: ClemObservation) -> str:

        
        print("Last content", last.content)
        result = self.agent.invoke({"messages": [{"role": "user", "content": last.content}]},
                                config = {"configurable": {"thread_id": f"{self.base_thread_id}_ep{self.episode}"}},)

        #for msg in result["messages"]:
        #    print(type(msg), msg)


        #final_ai_message = next(msg for msg in reversed(result["messages"]) if isinstance(msg, AIMessage))

        #response_text = final_ai_message.content
        #return response_text
        for msg in reversed(result["messages"]):
            if isinstance(msg, AIMessage) and msg.content:
                return msg.content

        return "CLUE: (fallback)"


describer = MyAgenticPlayer(thread_id="describer_game1")

## 3.2. A Human Model Player (for comparison)

In [10]:
from playpen.agents import ClemAgent, ClemObservation
from clemcore.backends import load_model
from clemcore.clemgame.player import Player
from clemcore.backends import HumanModel

class MyHumanPlayer(ClemAgent):

    """
      Human-controlled agent that prompts for terminal input.
    """

    def __init__(self):
        super().__init__()
        self.model = HumanModel()  

    def act(self, last: ClemObservation) -> str:
        if self.history:
            latest_message = self.history[-1].get("content", "")
            print(f"\n{latest_message}")

        user_input = input(f"Your response as {self.__class__.__name__}:\n")

        self.observe(dict(role="assistant", content=user_input))
        return user_input



human_player = MyHumanPlayer()

# 4. GameEnv and running

In [14]:
from clemcore.clemgame import env
from clemcore.clemgame import env, episode_results_folder_callbacks
from clemcore.clemgame.envs.pettingzoo.wrappers import AgentControlWrapper

# Create callbacks to record the interactions in a folder; here we name the folder after the models the agent uses
callbacks = episode_results_folder_callbacks(run_dir="clp-chat1", result_dir_path="playpen-records",
                                             player_model_infos="MyAgenticDescriber")

game_env = env(
      "textmapworld",
      single_pass=True,
      callbacks=callbacks
  )

game_env.reset()

2026-02-07 20:26:10,652 - clemcore.cli - INFO - Found '1' game matching the game_selector="textmapworld"
2026-02-07 20:26:10,654 - clemcore.cli - INFO - {
  "game_name": "textmapworld",
  "description": "Exhaustively exploring a map.",
  "main_game": "textmapworld",
  "players": 1,
  "image": "none",
  "languages": [
    "en"
  ],
  "benchmark": [
    "2.0",
    "3.0"
  ],
  "regression": "large",
  "roles": [
    "Describer"
  ],
  "game_path": "C:\\Users\\white\\Desktop\\agents_experiments\\clembench_v3\\textmapworld\\textmapworld_main"
}
2026-02-07 20:26:10,655 - clemcore.run - INFO - Loading game benchmark for textmapworld
2026-02-07 20:26:10,662 - clemcore.run - INFO - Loading game benchmark for textmapworld took: 0:00:00.005032
2026-02-07 20:26:10,668 - clemcore.run - INFO - Prepared instance queue for textmapworld using 5 experiments ['small', 'medium', 'large', 'medium_cycle', 'large_cycle'] and 50 instances in total.
2026-02-07 20:26:10,670 - clemcore.run - INFO - Detected sin

In [12]:
game_env.reset()

In [16]:
# Let's peek at the possible player ids (only available after reset, because reset() initiates the GameMaster)
print("possible agents:", game_env.possible_agents)
# In most cases, roles will be the content description of what player_0 and player_1 are
print("likely mapping:", game_env.unwrapped.game_master.game_spec["roles"])
agent_mapping = {"player_0": describer, "player_1": lambda obs: None}

possible agents: ['player_0', 'player_1']
likely mapping: ['Describer']


In [17]:
# Now we can do everything automated as well
game_env.reset()
context_response_pairs = []
for agent_id in game_env.agent_iter():
    context, reward, termination, truncation, info = game_env.last()
    if termination or truncation:
        response = None # we step one more time to remove the agent from the env (final reward was observed in last)
    else:
        response = agent_mapping[agent_id](context)
    context_response_pairs.append((agent_id, context, response, reward))
    game_env.step(response)


describer.reset()  #keep in mind that if guesser is an agent, then IT should be reset


print(f"Episode took these {len(context_response_pairs)} steps:")
print("-" * 20)
for idx, (agent_id, context, response, reward) in enumerate(context_response_pairs):
    print(f"Step {idx} / Reward {reward:.2f}:")
    print(f"Agent({agent_id}) <- Context:", context)
    print(f"Agent({agent_id}) -> Response:", response)
    print("-" * 20)

# Note that there is a last None-action step to collect any rewards that happened between the player's turns.
# For example, when the guesser guesses the word on its final turn, then also the describer should receive the positive game reward; if both cannot find the word until the last turn, then also the describer receives a negative game reward, although the guesser makes the last turn.
# You have to decide whether
# - to merge them with the second last observation
# - to ignore them (because the other player caused to abort the game, or was incapable of guessing an obvious word)
# - to keep them as a separate observation with a None action

Last content Please help me with the following task. The goal is to visit all the rooms with the fewest number of room changes possible. In each room, you need to decide the direction to go in. Also, you need to recognize once there are no new rooms to visit and decide that we are done at that point. Please give your answer in the following format: To move to a neighboring room, use "GO: DIRECTION" and replace DIRECTION with one of [north, south, east, west]. To stop the exploration, answer with "DONE" instead. Omit any other text.
Here is an example:
You are in the Kitchen. Currently available directions: south, west. What is your next instruction?
GO: west
You have made a step and entered a Lobby. Currently available directions: east, north. What is your next instruction?
GO: north
...
You have made a step and entered a Bedroom. Currently available directions: south. What is your next instruction?
DONE
Let us start. You are in the Closet. Currently available directions: north, south.

# 5. Some remarks about the scoring

In [115]:
print(f"Results dir: {callbacks.callbacks[0].results_folder.results_dir_path}")
print(f"Run dir: {callbacks.callbacks[0].results_folder.run_dir}")

Results dir: playpen-records
Run dir: clp-chat1


In [None]:
#how to evaluate
# with the playpen-env activated, cd agents_experiments

#clem score -g taboo -r "C:\Users\white\Desktop\agents_experiments\notebooks\playpen-records"
# clem score -g taboo -r playpen-records
#clem eval -g taboo -r playpen-records

# 6. How not to launch every episode manually

In [27]:
from clemcore.clemgame import env
from clemcore.clemgame import env, episode_results_folder_callbacks
from clemcore.clemgame.envs.pettingzoo.wrappers import AgentControlWrapper

# Create callbacks to record the interactions in a folder; here we name the folder after the models the agent uses
callbacks = episode_results_folder_callbacks(run_dir="clp-chat1", result_dir_path="playpen-records",
                                             player_model_infos="MyAgenticDescriber")

game_env = env(
      "textmapworld",
      single_pass=True,
      callbacks=callbacks
  )

game_env.reset()

2026-02-07 20:59:31,229 - clemcore.cli - INFO - Found '1' game matching the game_selector="textmapworld"
2026-02-07 20:59:31,234 - clemcore.cli - INFO - {
  "game_name": "textmapworld",
  "description": "Exhaustively exploring a map.",
  "main_game": "textmapworld",
  "players": 1,
  "image": "none",
  "languages": [
    "en"
  ],
  "benchmark": [
    "2.0",
    "3.0"
  ],
  "regression": "large",
  "roles": [
    "Describer"
  ],
  "game_path": "C:\\Users\\white\\Desktop\\agents_experiments\\clembench_v3\\textmapworld\\textmapworld_main"
}
2026-02-07 20:59:31,235 - clemcore.run - INFO - Loading game benchmark for textmapworld
2026-02-07 20:59:31,255 - clemcore.run - INFO - Loading game benchmark for textmapworld took: 0:00:00.016524
2026-02-07 20:59:31,265 - clemcore.run - INFO - Prepared instance queue for textmapworld using 5 experiments ['small', 'medium', 'large', 'medium_cycle', 'large_cycle'] and 50 instances in total.
2026-02-07 20:59:31,267 - clemcore.run - INFO - Detected sin

In [47]:
# Let's peek at the possible player ids (only available after reset, because reset() initiates the GameMaster)
print("possible agents:", game_env.possible_agents)
# In most cases, roles will be the content description of what player_0 and player_1 are
print("likely mapping:", game_env.unwrapped.game_master.game_spec["roles"])
agent_mapping = {"player_0": guesser, "player_1": game_env.unwrapped.game_master.describer}

possible agents: ['player_0', 'player_1']
likely mapping: ['Describer']


In [49]:
num_episodes = 25

all_episodes_data = []

for episode in range(num_episodes):
    game_env.reset()  #keep in mind that if guesser is an agent, then IT should be reset
    guesser.reset()

    context_response_pairs = []
    for agent_id in game_env.agent_iter():
        context, reward, termination, truncation, info = game_env.last()
        if termination or truncation:
            response = None # we step one more time to remove the agent from the env (final reward was observed in last
        elif agent_id == "player_1":
      # context here is the guesser's move (e.g., "GO: west")
          gm = game_env.unwrapped.game_master
          move_text = context['content'] if isinstance(context, dict) else context
          response = process_move_and_get_response(gm.describer, move_text)
        else:
            response = agent_mapping[agent_id](context)
        context_response_pairs.append((agent_id, context, response, reward))
        game_env.step(response)
   #     state = describer.memory.get({"configurable": {"thread_id": f"{describer.base_thread_id}_ep{describer.episode}"}})  #also change this if the player is called some other way

        #print("MEMORY",state)
    all_episodes_data.append(context_response_pairs)

    print(f"Episode {episode + 1}/{num_episodes} completed with {len(context_response_pairs)} steps")

    print(f"Episode took these {len(context_response_pairs)} steps:")
    print("-" * 20)
    for idx, (agent_id, context, response, reward) in enumerate(context_response_pairs):
        print(f"Step {idx} / Reward {reward:.2f}:")
        print(f"Agent({agent_id}) <- Context:", context)
        print(f"Agent({agent_id}) -> Response:", response)
        print("-" * 20)

# Note that there is a last None-action step to collect any rewards that happened between the player's turns.
# For example, when the guesser guesses the word on its final turn, then also the describer should receive the positive game reward; if both cannot find the word until the last turn, then also the describer receives a negative game reward, although the guesser makes the last turn.
# You have to decide whether
# - to merge them with the second last observation
# - to ignore them (because the other player caused to abort the game, or was incapable of guessing an obvious word)
# - to keep them as a separate observation with a None action

Last content Please help me with the following task. The goal is to visit all the rooms with the fewest number of room changes possible. In each room, you need to decide the direction to go in. Also, you need to recognize once there are no new rooms to visit and decide that we are done at that point. Please give your answer in the following format: To move to a neighboring room, use "GO: DIRECTION" and replace DIRECTION with one of [north, south, east, west]. To stop the exploration, answer with "DONE" instead. Omit any other text.
Here is an example:
You are in the Kitchen. Currently available directions: south, west. What is your next instruction?
GO: west
You have made a step and entered a Lobby. Currently available directions: east, north. What is your next instruction?
GO: north
...
You have made a step and entered a Bedroom. Currently available directions: south. What is your next instruction?
DONE
Let us start. You are in the Dining room. Currently available directions: north. W

## 7. Agent + core tool set

In [54]:
class CoreToolsAgent(ClemAgent):
      def __init__(self, thread_id: str = "default"):
          super().__init__()
          self.base_thread_id = thread_id
          self.episode = 0
          self.memory = InMemorySaver()
          self.model = ChatOpenAI(
              model="openai/gpt-4o-mini",
              base_url="https://openrouter.ai/api/v1",
              api_key="",
              temperature=0,
              max_tokens=150
          )
          self.store = {}
          self.observations = []

          tools = [
              self._remember(),
              self._recall(),
              self._observe(),
              self._get_observations()
          ]

          system_prompt = """You're a professional game player with memory tools.

  STRATEGY:
  1. On FIRST turn: understand the rules, store key info with remember()
  2. After 1-2 tool calls, you MUST give your final answer.
  Do NOT keep calling tools."""

          self.agent = create_agent(
              model=self.model,
              tools=tools,
              checkpointer=self.memory,
              system_prompt=system_prompt
          )

      def reset(self):
          super().reset()
          self.episode += 1
          self.store.clear()
          self.observations.clear()

      def _remember(self):
          store = self.store
          @tool
          def remember(key: str, value: str) -> str:
              """
              Store any important information.

              Examples:
                  remember("goal", "describe the target without forbidden words")
                  remember("format", "CLUE: <text>")
                  remember("target", "first grid")
                  remember("forbidden", "cat, dog, pet")
              """
              store[key] = value
              return f"Stored: {key} = {value}"
          return remember

      def _recall(self):
          store = self.store
          @tool
          def recall(key: str = "") -> str:
              """
              Retrieve stored information. 

              Args:
                  key: Specific key, or empty for everything
              """
              if not store:
                  return "Memory empty."
              if key and key in store:
                  return f"{key}: {store[key]}"
              return "\n".join(f"- {k}: {v}" for k, v in store.items())
          return recall

      def _observe(self):
          observations = self.observations
          @tool
          def observe(observation: str) -> str:
              """
              Note something important you noticed.

              Examples:
                  observe("Grid 1 has a red circle")
                  observe("The clue mentions 'round shape'")
                  observe("Player said 'no' to animal question")
              """
              observations.append(observation)
              return f"Noted: {observation}"
          return observe

      def _get_observations(self):
          observations = self.observations
          @tool
          def get_observations() -> str:
              """Get all observations you've noted."""
              if not observations:
                  return "No observations yet."
              return "\n".join(f"{i+1}. {o}" for i, o in enumerate(observations))
          return get_observations


      def act(self, last: ClemObservation) -> str:
          result = self.agent.invoke(
              {"messages": [{"role": "user", "content": last.content}]},
              config={
          "configurable": {"thread_id": f"{self.base_thread_id}_ep{self.episode}"},
          "recursion_limit": 100,  # Limits total steps
      }
          )

          for msg in reversed(result["messages"]):
              if isinstance(msg, AIMessage) and msg.content:
                  return msg.content

          return "(no response)"

In [55]:
from clemcore.clemgame import env
from clemcore.clemgame import env, episode_results_folder_callbacks
from clemcore.clemgame.envs.pettingzoo.wrappers import AgentControlWrapper

# Create callbacks to record the interactions in a folder; here we name the folder after the models the agent uses
callbacks = episode_results_folder_callbacks(run_dir="clp-chat1", result_dir_path="playpen-records",
                                             player_model_infos="MyAgenticDescriber")

game_env = env(
      "textmapworld",
      single_pass=True,
      callbacks=callbacks
  )

game_env.reset()

2026-02-09 21:35:13,953 - clemcore.cli - INFO - Found '1' game matching the game_selector="textmapworld"
2026-02-09 21:35:13,966 - clemcore.cli - INFO - {
  "game_name": "textmapworld",
  "description": "Exhaustively exploring a map.",
  "main_game": "textmapworld",
  "players": 1,
  "image": "none",
  "languages": [
    "en"
  ],
  "benchmark": [
    "2.0",
    "3.0"
  ],
  "regression": "large",
  "roles": [
    "Describer"
  ],
  "game_path": "C:\\Users\\white\\Desktop\\agents_experiments\\clembench_v3\\textmapworld\\textmapworld_main"
}
2026-02-09 21:35:13,973 - clemcore.run - INFO - Loading game benchmark for textmapworld
2026-02-09 21:35:14,380 - clemcore.run - INFO - Loading game benchmark for textmapworld took: 0:00:00.404748
2026-02-09 21:35:14,408 - clemcore.run - INFO - Prepared instance queue for textmapworld using 5 experiments ['small', 'medium', 'large', 'medium_cycle', 'large_cycle'] and 50 instances in total.
2026-02-09 21:35:14,411 - clemcore.run - INFO - Detected sin

In [56]:
describer1 = CoreToolsAgent(thread_id="describer")
guesser1 = CoreToolsAgent(thread_id="guesser")
# Let's peek at the possible player ids (only available after reset, because reset() initiates the GameMaster)
print("possible agents:", game_env.possible_agents)
# In most cases, roles will be the content description of what player_0 and player_1 are
print("likely mapping:", game_env.unwrapped.game_master.game_spec["roles"])
agent_mapping = {"player_0": guesser1, "player_1": game_env.unwrapped.game_master.describer}

possible agents: ['player_0', 'player_1']
likely mapping: ['Describer']


In [58]:
num_episodes = 50

all_episodes_data = []

for episode in range(num_episodes):
    game_env.reset()
    describer1.reset()  #keep in mind that if guesser is an agent, then IT should be reset
    guesser1.reset()

    context_response_pairs = []
    for agent_id in game_env.agent_iter():
        context, reward, termination, truncation, info = game_env.last()
        if termination or truncation:
            response = None # we step one more time to remove the agent from the env (final reward was observed in last)
        else:
            response = agent_mapping[agent_id](context)
        context_response_pairs.append((agent_id, context, response, reward))
        game_env.step(response)
        state = guesser1.memory.get({"configurable":{"thread_id":f"{guesser1.base_thread_id}_ep{guesser1.episode}"}})  #also change this if the player is called

        print("MEMORY1",state)

    all_episodes_data.append(context_response_pairs)

    print(f"Episode {episode + 1}/{num_episodes} completed with {len(context_response_pairs)} steps")

    print(f"Episode took these {len(context_response_pairs)} steps:")
    print("-" * 20)
    for idx, (agent_id, context, response, reward) in enumerate(context_response_pairs):
        print(f"Step {idx} / Reward {reward:.2f}:")
        print(f"Agent({agent_id}) <- Context:", context)
        print(f"Agent({agent_id}) -> Response:", response)
        print("-" * 20)

MEMORY1 {'v': 4, 'ts': '2026-02-09T20:35:58.168391+00:00', 'id': '1f105f6f-0967-6ccb-8001-1b0325b09ad7', 'channel_versions': {'__start__': '00000000000000000000000000000002.0.794347276465807', 'messages': '00000000000000000000000000000003.0.052969138929204185', 'branch:to:model': '00000000000000000000000000000003.0.052969138929204185'}, 'versions_seen': {'__input__': {}, '__start__': {'__start__': '00000000000000000000000000000001.0.9106806634441679'}, 'model': {'branch:to:model': '00000000000000000000000000000002.0.794347276465807'}}, 'updated_channels': ['messages'], 'channel_values': {'messages': [HumanMessage(content='Please help me with the following task. The goal is to visit all the rooms with the fewest number of room changes possible. In each room, you need to decide the direction to go in. Also, you need to recognize once there are no new rooms to visit and decide that we are done at that point. Please give your answer in the following format: To move to a neighboring room, u

StopIteration: 