In [None]:
%pip install taskgen-pro textworld gym torch numpy

In [3]:
# Set up API key and do the necessary imports
from taskgen import *
import os
from google.colab import userdata

# this is only if you use OpenAI as your LLM
keyname = 'OPENAI_API_KEY'
os.environ['OPENAI_API_KEY'] = userdata.get(keyname)

In [4]:
!tw-make tw-simple --rewards dense --goal detailed --seed 18 --test --silent -f --output games/tw-rewardsDense_goalDetailed.z8
!tw-make tw-simple --rewards sparse --goal brief --seed 18 --test --silent -f --output games/tw-rewardsSparse_goalBrief.z8
!tw-make tw-simple --rewards sparse --goal none --seed 18 --test --silent -f --output games/tw-rewardsSparse_goalNone.z8

In [5]:
def llm(system_prompt: str, user_prompt: str) -> str:
    ''' Here, we use OpenAI for illustration, you can change it to your own LLM '''
    # ensure your LLM imports are all within this function
    from openai import OpenAI

    # define your own LLM here
    client = OpenAI()
    response = client.chat.completions.create(
        model='gpt-4o',
        seed=42,
        temperature = 0,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    return response.choices[0].message.content

In [8]:
import os
from glob import glob
import numpy as np
import textworld.gym
from typing import Mapping, Any
import torch


def play(agent, path, max_step=100, nb_episodes=10, verbose=True):
    torch.manual_seed(20211021)  # For reproducibility when using action sampling.

    infos_to_request = agent.infos_to_request
    infos_to_request.max_score = True  # Needed to normalize the scores.

    gamefiles = [path]
    if os.path.isdir(path):
        gamefiles = glob(os.path.join(path, "*.z8"))

    env_id = textworld.gym.register_games(gamefiles,
                                          request_infos=infos_to_request,
                                          max_episode_steps=max_step)
    env = textworld.gym.make(env_id)  # Create a Gym environment to play the text game.
    if verbose:
        if os.path.isdir(path):
            print(os.path.dirname(path), end="")
        else:
            print(os.path.basename(path), end="")

    # Collect some statistics: nb_steps, final reward.
    avg_moves, avg_scores, avg_norm_scores = [], [], []
    for no_episode in range(nb_episodes):
        obs, infos = env.reset()  # Start new episode.

        score = 0
        done = False
        nb_moves = 0
        while not done:
            command = agent.act(obs, score, done, infos)
            obs, score, done, infos = env.step(command)
            nb_moves += 1

        agent.act(obs, score, done, infos)  # Let the agent know the game is done.

        if verbose:
            print(".", end="")
        avg_moves.append(nb_moves)
        avg_scores.append(score)
        avg_norm_scores.append(score / infos["max_score"])

    env.close()
    if verbose:
        if os.path.isdir(path):
            msg = "  \tavg. steps: {:5.1f}; avg. normalized score: {:4.1f} / {}."
            print(msg.format(np.mean(avg_moves), np.mean(avg_norm_scores), 1))
        else:
            msg = "  \tavg. steps: {:5.1f}; avg. score: {:4.1f} / {}."
            print(msg.format(np.mean(avg_moves), np.mean(avg_scores), infos["max_score"]))

In [9]:
def remove_ascii_art(text):
    import re
    new_message = ""
    for line in text.splitlines():
        x = re.findall("[:alphanum:]", line)
        if x:
            new_message += line
    return new_message

class TaskgenAgent(textworld.gym.Agent):
    def __init__(self, seed=1234):
        self._seed = seed
        self._player = Agent('interactive fiction player',
                            'You are the player of an interactive fiction game. \
                            Consider which comands have been successful and try variations throughout the game. \
                            As the player, you may only issue commands in this game. \
                            complete game objectives then explore the game world. \
                            Consider whether the previous command was poorly formed or referred to an object not present. \
                            Commands are like "LOOK AT BOB"; "EAST"; "EXAMINE KNIFE". \
                            Respond only with a single command.',
                            llm = llm,
                            debug=False)
        self.new_conversation()

    def new_conversation(self):
      self._agent = ConversableAgent(self._player,
            persistent_memory = {'game objectives': 'an array of remaining game objectives. exploration may inform new objectives. array excludes objectives achieved.',
                                'successful commands': "an array of commands that succeeded in the game.",
                                'objects' : "an array of objects discovered and where they were last seen e.g.: 'red car in the northern carpark'; 'cup on the low table'; 'cardboard box in the cellar'.",
                                'rooms': "an array of rooms discovered and how they are organised e.g.: 'northern carpark east of the kitchen'; 'landing at the top of the stairs'."
                                },
            person = 'Game',
            verbose=False)

    @property
    def infos_to_request(self) -> textworld.EnvInfos:
        return textworld.EnvInfos(
            #admissible_commands=True,
            inventory=True)

    def act(self, obs: str, score: int, done: bool, infos: Mapping[str, Any]) -> str:
        if done:
          self.new_conversation()
          return "quit"
        else:
          # result must be shorter than 200 chars https://gitlab.com/DavidGriffith/frotz/-/blob/master/Makefile#L120
          obs = remove_ascii_art(obs)
          #[ print(f'CHT: {x}') for x in infos]
          chat_input = f"{obs}\n\n{infos['inventory']}\n"
          if 'admissible_commands' in infos :
            commands = "\n".join([ f"- {x}" for x in infos['admissible_commands'] ])
            chat_input += f"\npossible commands:\n{commands}"
          #print(f'INP: {chat_input}')
          out_str = self._agent.chat(chat_input)
          if len(out_str) > 99:
              out_str = out_str[:99]
          self._last_command = out_str
          #print(f' OUT: {out_str}')
          #foo = input('do anything to continue')
          return out_str

In [16]:
from openai import OpenAI
client = OpenAI()

class LlmAgent(textworld.gym.Agent):
    def __init__(self, seed=42):
        self.seed = seed
        self.previous_messages = []

    @property
    def infos_to_request(self) -> textworld.EnvInfos:
        return textworld.EnvInfos(
            admissible_commands=True,
            inventory=True)

    def act(self, obs: str, score: int, done: bool, infos: Mapping[str, Any]) -> str:
        # result should be shorter than 200 chars https://gitlab.com/DavidGriffith/frotz/-/blob/master/Makefile#L120
        messages = [{"role": "system", "content": f"You are the player of an interactive fiction game. As the player, you may only issue certain commands in this game. You must play the game by issuing commands of up to 200 characters."}]
        [ messages.append( {"role": msg[0], "content": msg[1]}) for msg in self.previous_messages[-10:]]
        obs = remove_ascii_art(obs)
        chat_input = f"{obs}\n\n{infos['inventory']}\n"
        if 'admissible_commands' in infos :
          commands = "\n".join([ f"- {x}" for x in infos['admissible_commands'] ])
          chat_input += f"\npossible commands:\n{commands}"
        #print(f'INP: {chat_input}')
        current_state = "\n\n".join([chat_input, "What is your command?"])
        messages.append( {"role": "user", "content": current_state})
        completion = client.chat.completions.create(model="gpt-4o", max_tokens=200, seed=self.seed, messages=messages)
        out_str = completion.choices[0].message.content
        self.previous_messages.append( ("user", obs))
        self.previous_messages.append( ("assistant", out_str) )
        if len(out_str) > 200:
            out_str = out_str[:200]
        return out_str

In [12]:
class RandomAgent(textworld.gym.Agent):
    """ Agent that randomly selects a command from the admissible ones. """
    def __init__(self, seed=1234):
        self.seed = seed
        self.rng = np.random.RandomState(self.seed)

    @property
    def infos_to_request(self) -> textworld.EnvInfos:
        return textworld.EnvInfos(admissible_commands=True)

    def act(self, obs: str, score: int, done: bool, infos: Mapping[str, Any]) -> str:
        return self.rng.choice(infos["admissible_commands"])

In [None]:
play(TaskgenAgent(), "./games/tw-rewardsDense_goalDetailed.z8")    # Dense rewards, detailed goals
play(TaskgenAgent(), "./games/tw-rewardsSparse_goalBrief.z8")    # Sparse rewards, brief goals
play(TaskgenAgent(), "./games/tw-rewardsSparse_goalNone.z8")    # Sparse rewards, no goal

In [None]:
play(LlmAgent(), "./games/tw-rewardsDense_goalDetailed.z8")    # Dense rewards, detailed goals
play(LlmAgent(), "./games/tw-rewardsSparse_goalBrief.z8")    # Sparse rewards, brief goals
play(LlmAgent(), "./games/tw-rewardsSparse_goalNone.z8")    # Sparse rewards, no goal

In [None]:
play(RandomAgent(), "./games/tw-rewardsDense_goalDetailed.z8")    # Dense rewards, detailed goals
play(RandomAgent(), "./games/tw-rewardsSparse_goalBrief.z8")    # Sparse rewards, brief goals
play(RandomAgent(), "./games/tw-rewardsSparse_goalNone.z8")    # Sparse rewards, no goal