In [1]:
# the purpose of the notebook is to test LangChain agent architectures on clembench games

In [2]:
import langchain

# 1. Environment setup and dependency installation

## 1.1. Setup environment



In [3]:
import os

# Specify the game name here (this code can be adapted to any 2-player game)
GAME_NAME = "taboo"

# Local clone location of the clembench repository
CLEMBENCH_HOME = r"C:\Users\white\Desktop\agents_experiments\clembench"

# Expose CLEMBENCH_HOME so the clem framework can find the games
os.environ["CLEMBENCH_HOME"] = CLEMBENCH_HOME

## 1.2. Install games and dependencies

In [None]:
# Clone the clembench repo (safe to re-run; git will warn if it already exists)
#!git clone https://github.com/clp-research/clembench $CLEMBENCH_HOME

# Install the requirements into the Python kernel
%pip install -r $CLEMBENCH_HOME/requirements.txt

# Make tqdm usable in Jupyter notebooks
%pip install --upgrade ipywidgets jupyter_client

In [5]:
# Sanity check: version + confirm that the game is an available game
!clem --version
!clem list games -s $GAME_NAME

clem 3.4.2
Listing all available games (use -v option to see the whole specs)
Found '2' game specs that match the game_selector='{'game_name': 'taboo'}'
taboo:
 	Taboo game between two agents where one has to describe a word for
	the other to guess.
taboo:
 	Taboo game between two agents where one has to describe a word for
	the other to guess.


# 2. Register and configure a model backend

In [6]:
from clemcore.backends import ModelRegistry

registry = ModelRegistry.register("clp-chat", backend="openai_compatible",
                                  model_id="gpt-oss:20b")
registry.get_first_model_spec_that_unify_with("clp-chat")

ModelSpec({'model_name': 'clp-chat', 'backend': 'openai_compatible', 'lookup_source': 'C:\\Users\\white\\Desktop\\agents_experiments\\model_registry.json', 'model_id': 'gpt-oss:20b'})

In [7]:
from clemcore.backends import KeyRegistry

API_KEY = "1234"
ORGANIZATION = "<insert-organisation-here>"
BASE_URL = ""

KeyRegistry.register("openai_compatible", api_key="1234", organisation=ORGANIZATION, base_url=BASE_URL, force_cwd=True)

KeyRegistry(file='C:\Users\white\Desktop\agents_experiments\key.json', backends=[openai_compatible])

# 3. Agent implementation

In [8]:
# pip install -qU langchain "langchain[anthropic]"
from langchain.agents import create_agent
from dataclasses import dataclass
from langchain.agents.structured_output import ToolStrategy
from langchain_openai import ChatOpenAI
from langchain.tools import tool
from langchain_core.messages import AIMessage

In [9]:
my_model = ChatOpenAI(
            model="gpt-oss:20b",
            base_url="",
            api_key="",  
            temperature=0,
            max_tokens = 300
        )

## 3.1. An Agentic Player with a Tag Extraction tool

In [10]:
subagent_prompt = """
You are given a small piece of text which contains gameplay rules. You need to extract the necessary tags (often written in CAPITAL LETTERS), so that the player can use them for the answer. Do not output any text apart from the tag(s). Example IO pair:

INPUT:
Letâ€™s play a guessing game! Your task is to answer the other player's questions. Based on your knowledge of the word: $TARGET WORD$, respond to the following questions or guesses. Limit your response to only 'yes' or 'no' with no explanation or other words. Never reveal the answer in your response.

You must reply using the format below and DO NOT ADD ANY TEXT OTHER THAN THIS:

ANSWER: <some text>

Target Word: $TARGET WORD$

OUTPUT:
ANSWER:

If you identified no tags, please return NO TAG as an answer.
"""

In [11]:
from playpen.agents import ClemAgent, ClemObservation



class MyAgenticPlayer(ClemAgent):


    def __init__(self):
        super().__init__()
        self.model = ChatOpenAI(
            model="gpt-oss:20b",
            base_url="",
            api_key="1234",  
            temperature=0,
            max_tokens = 300
        )

        
        @tool
        def extract_tags(initial_prompt: str) -> str:
            """
        Extract the tags that are necessary for the player from the game rules.
            """
    
            print(initial_prompt)
            subagent_extractor = create_agent(model=self.model,
                                      tools=[],
                                      system_prompt=subagent_prompt)

            tag_answer = subagent_extractor.invoke({"messages": [{"role": "user", "content": initial_prompt}]})
            final_ai_message = next(msg for msg in reversed(tag_answer["messages"]) if isinstance(msg, AIMessage))

            response_text = final_ai_message.content
            return response_text

        self.agent = create_agent(
                    model=self.model,
                    tools=[extract_tags],
                    system_prompt="You're going to play a game. You're a professional agent game player, but you also have a helpful tool extract_tags, that can help you. First of all, call the extract_tags tool, and use the result ")

    
        
    def act(self, last: ClemObservation) -> str:

        
        print("Last content", last.content)
        result = self.agent.invoke({"messages": [{"role": "user", "content": last.content}]},
                                   config={"configurable": {"thread_id": "1"}},)

        #for msg in result["messages"]:
        #    print(type(msg), msg)


        #final_ai_message = next(msg for msg in reversed(result["messages"]) if isinstance(msg, AIMessage))

        #response_text = final_ai_message.content
        #return response_text
        for msg in reversed(result["messages"]):
            if isinstance(msg, AIMessage) and msg.content:
                return msg.content

        return "CLUE: (fallback)"


describer = MyAgenticPlayer()
guesser = MyAgenticPlayer()


.--------------..--------------..--------------..--------------..--------------..--------------..--------------.
|   ______     ||   _____      ||      __      ||  ____  ____  ||   ______     ||  _________   || ____  _____  |
|  |_   __ \   ||  |_   _|     ||     /  \     || |_  _||_  _| ||  |_   __ \   || |_   ___  |  |||_   \|_   _| |
|    | |__) |  ||    | |       ||    / /\ \    ||   \ \  / /   ||    | |__) |  ||   | |_  \_|  ||  |   \ | |   |
|    |  ___/   ||    | |   _   ||   / ____ \   ||    \ \/ /    ||    |  ___/   ||   |  _|  _   ||  | |\ \| |   |
|   _| |_      ||   _| |__/ |  || _/ /    \ \_ ||    _|  |_    ||   _| |_      ||  _| |___/ |  || _| |_\   |_  |
|  |_____|     ||  |________|  |||____|  |____|||   |______|   ||  |_____|     || |_________|  |||_____|\____| |
'--------------''--------------''--------------''--------------''--------------''--------------''--------------'



## 3.2. A Human Model Player (for comparison)

In [12]:
from playpen.agents import ClemAgent, ClemObservation
from clemcore.backends import load_model
from clemcore.clemgame.player import Player
from clemcore.backends import HumanModel

class MyHumanPlayer(ClemAgent):

    """
      Human-controlled agent that prompts for terminal input.
    """

    def __init__(self):
        super().__init__()
        self.model = HumanModel()  

    def act(self, last: ClemObservation) -> str:
        if self.history:
            latest_message = self.history[-1].get("content", "")
            print(f"\n{latest_message}")

        user_input = input(f"Your response as {self.__class__.__name__}:\n")

        self.observe(dict(role="assistant", content=user_input))
        return user_input



human_player = MyHumanPlayer()

# 4. GameEnv and running

In [13]:
from clemcore.clemgame import env
from playpen.callbacks import episode_results_folder_callbacks
from clemcore.clemgame.envs.pettingzoo.wrappers import AgentControlWrapper

# Create callbacks to record the interactions in a folder; here we name the folder after the models the agent uses
callbacks = episode_results_folder_callbacks(run_dir="clp-chat", result_dir_path="playpen-records",
                                             agent_infos="MyAgenticDescriber")

game_env = env(
      "taboo",
      game_instance_filter=lambda game_name, experiment_name: [1, 5, 10],
      single_pass=False,
      callbacks=callbacks
  )

game_env.reset()

2026-01-30 19:43:36,531 - clemcore.cli - INFO - Found '1' game matching the game_selector="taboo"
2026-01-30 19:43:36,534 - clemcore.cli - INFO - {
  "game_name": "taboo",
  "description": "Taboo game between two agents where one has to describe a word for the other to guess.",
  "main_game": "taboo",
  "players": 2,
  "image": "none",
  "languages": [
    "en"
  ],
  "benchmark": [
    "0.9",
    "1.0",
    "1.5",
    "2.0"
  ],
  "regression": "small",
  "roles": [
    "Describer",
    "Guesser"
  ],
  "game_path": "C:\\Users\\white\\Desktop\\agents_experiments\\clembench\\taboo"
}
2026-01-30 19:43:36,539 - clemcore.run - INFO - Loading game benchmark for taboo
2026-01-30 19:43:37,368 - clemcore.run - INFO - Loading game benchmark for taboo took: 0:00:00.826434
2026-01-30 19:43:37,376 - clemcore.run - INFO - Sub-select for taboo experiment high_en instances with game_ids: [1, 5, 10]
2026-01-30 19:43:37,379 - clemcore.run - INFO - Sub-select for taboo experiment medium_en instances wi

In [14]:
# Let's peek at the possible player ids (only available after reset, because reset() initiates the GameMaster)
print("possible agents:", game_env.possible_agents)
# In most cases, roles will be the content description of what player_0 and player_1 are
print("likely mapping:", game_env.unwrapped.game_master.game_spec["roles"])
agent_mapping = {"player_0": describer, "player_1": human_player}

possible agents: ['player_0', 'player_1']
likely mapping: ['Describer', 'Guesser']


In [15]:
# Now we can do everything automated as well
game_env.reset()
context_response_pairs = []
for agent_id in game_env.agent_iter():
    context, reward, termination, truncation, info = game_env.last()
    if termination or truncation:
        response = None # we step one more time to remove the agent from the env (final reward was observed in last)
    else:
        response = agent_mapping[agent_id](context)
    context_response_pairs.append((agent_id, context, response, reward))
    game_env.step(response)


describer.reset()  #keep in mind that if guesser is an agent, then IT should be reset

print(f"Episode took these {len(context_response_pairs)} steps:")
print("-" * 20)
for idx, (agent_id, context, response, reward) in enumerate(context_response_pairs):
    print(f"Step {idx} / Reward {reward:.2f}:")
    print(f"Agent({agent_id}) <- Context:", context)
    print(f"Agent({agent_id}) -> Response:", response)
    print("-" * 20)

# Note that there is a last None-action step to collect any rewards that happened between the player's turns.
# For example, when the guesser guesses the word on its final turn, then also the describer should receive the positive game reward; if both cannot find the word until the last turn, then also the describer receives a negative game reward, although the guesser makes the last turn.
# You have to decide whether
# - to merge them with the second last observation
# - to ignore them (because the other player caused to abort the game, or was incapable of guessing an obvious word)
# - to keep them as a separate observation with a None action

Last content You are playing a collaborative word guessing game in which you have to describe a target word for another player to guess.

Rules:
(a) You have to reply in the form: CLUE: <some text>. Guesses from the other player will start with GUESS.
(b) You cannot use the target word itself, parts or morphological variants of it in your description.
(c) In addition, the same rules apply for related words which are provided below.

End conditions:
(i) If you use the target word or a related word in your description, then you lose.
(ii) If the other player can guess the target word in 3 tries, you both win.

Let us start.

This is the target word that you need to describe and that the other player needs to guess:

energy

Related words are:

- solar
- power
- electricity

Important: You are under time pressure, give short descriptions that are to the point!
You are playing a collaborative word guessing game in which you have to describe a target word for another player to guess.

Rules

Your response as MyHumanPlayer:
 GUESS: gasoline


Last content GUESS: gasoline
GUESS: gasoline
Episode took these 5 steps:
--------------------
Step 0 / Reward 0.00:
Agent(player_0) <- Context: {'role': 'user', 'content': 'You are playing a collaborative word guessing game in which you have to describe a target word for another player to guess.\n\nRules:\n(a) You have to reply in the form: CLUE: <some text>. Guesses from the other player will start with GUESS.\n(b) You cannot use the target word itself, parts or morphological variants of it in your description.\n(c) In addition, the same rules apply for related words which are provided below.\n\nEnd conditions:\n(i) If you use the target word or a related word in your description, then you lose.\n(ii) If the other player can guess the target word in 3 tries, you both win.\n\nLet us start.\n\nThis is the target word that you need to describe and that the other player needs to guess:\n\nenergy\n\nRelated words are:\n\n- solar\n- power\n- electricity\n\nImportant: You are under time pres

# 5. Some remarks about the scoring

In [16]:
print(f"Results dir: {callbacks.callbacks[0].results_folder.results_dir_path}")
print(f"Run dir: {callbacks.callbacks[0].results_folder.run_dir}")

Results dir: playpen-records
Run dir: clp-chat


In [17]:
#how to evaluate
# with the playpen-env activated, cd agents_experiments
# clem score -g taboo -r playpen-records
#clem eval -g taboo -r playpen-records