In [3]:
import os
import sys
import time

import ollama
import jericho
import re
import json
import random

os.environ['PATH'] = f'{os.environ["PATH"]}:./ollama/bin' # Add local ollama
!echo $PATH

/s/chopin/b/grad/tmoleary/cs542/cs542-adventure/env/bin:/s/chopin/b/grad/tmoleary/.vscode-server/cli/servers/Stable-1e3c50d64110be466c0b4a45222e81d2c9352888/server/bin/remote-cli:/usr/local/nodejs/18.17.1/bin:/usr/local/postman/latest:/usr/local/maven/3.9.4/bin:/usr/lib/jvm/java-11-openjdk/bin:/usr/share/Modules/bin:/s/chopin/b/grad/tmoleary/.nvm/versions/node/v20.5.0/bin:/usr/lib64/openmpi/bin:/usr/local/cuda/latest/bin:/s/chopin/b/grad/tmoleary/bin:/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:.:./ollama/bin


In [4]:
response = ollama.generate(model='llama3.2:1b', prompt='What is a large language model?')
print(response)

KeyboardInterrupt: 

In [5]:
game = 'zork1.z5'
GAMES_DIR = "z-machine-games-master/jericho-game-suite"
env = jericho.FrotzEnv(f"{GAMES_DIR}/{game}")

In [None]:
# Just a try. Not in use
class GamePlayer:
    
    def __init__(self, game: str, max_retry=30):
        GAMES_DIR = "z-machine-games-master/jericho-game-suite"
        self.env = jericho.FrotzEnv(f"{GAMES_DIR}/{game}")
        
        # Properties
        self.max_retry = max_retry
        self.game = game
        
        # State
        self.info = None
        self.game_response = ''
        self.done = False
        self.retries = 0
        self.prev_score = -1
        
        # Metrics
        self.retries_per_score = []
        self.generate_times = []
        self.unique_hashes = set()
        self.unique_rooms = set()
        self.unique_items = set()
    
    def start(self):
        self.game_response, self.info = self.env.reset()
        
    def game_loop(self):
        while not self.done and self.retries <= self.max_retry:

            start = time.time()
            # Get action #######################################
            prompt = make_prompt(f'Game prompt:\n{self.game_response}')
            response = ollama.generate(model='llama3.2:1b', prompt=prompt).response
            print(f'[r{self.retries}] LLM Response: {response}')
            player_in = response.splitlines()[-1].strip()
            ####################################################
            self.generate_times.append(time.time() - start)
            print(player_in)

            # Take Action #######################################
            self.game_response, reward, self.done, self.info = self.env.step(player_in)
            print(self.game_response)
            ####################################################
            self.unique_rooms.add(self.env.get_player_location().name)
            self.unique_hashes.add(self.env.get_world_state_hash())
            self.unique_items.update([item.name for item in self.env.get_inventory()])

            self.retries += 1
            if self.info['score'] != self.prev_score: # If score changes, major step has been made so number of retries resets
                self.retries_per_score.append(self.retries)
                self.retries = 0
            self.prev_score = self.info['score']
        
        print('Score: ', self.info['score'], 'out of', self.env.get_max_score())
        
    def get_results(self):
        return {
            'unique_rooms': len(self.unique_rooms),
            'unique_items': len(self.unique_items),
            'unique_hashes': len(self.unique_hashes),
            'score': self.info['score'],
            'max_score': self.env.get_max_score(),
            'avg_retries': sum(self.retries_per_score) / len(self.retries_per_score) if len(self.retries_per_score) != 0 else 0,
            'avg_generate_time': sum(self.generate_times) / len(self.generate_times)
        }
        
    


In [10]:
def memory_analyze_provided_commands_chat():
    model = 'llama3.2:1b'
    system_prompt = (
        f'You are a smart video game tester who is skilled at playing old-school text adventure games. You are playing {game}\n'
        'Respond only with a single imperative command in plain lowercase from the list of possible actions below.\n'
        'Do not write sentences, explanations, strategies, or narratives.\n'
        'Act rationally based on the game’s last description.\n'
        "Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward\n"
    )

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'
    memory = [
        ollama.Message(role='system', content=system_prompt)
    ]
    analysis_prompt = 'Concisely describe the current state of the game and a potential action to take to move forward.'
    memory.append(ollama.Message(role='system', content=analysis_prompt))
    
    game_response, info = env.reset()
    print(game_response)

    def turn_func():
        nonlocal model, make_prompt, analysis_prompt, memory, game_response
        actions_list = env.get_valid_actions()
        random.shuffle(actions_list)
        actions = ', '.join(actions_list)

        memory.append(ollama.Message(role='user', content=f'{game_response}\n\nValid game actions: {actions}'))
        response = ollama.chat(model=model, messages=memory)
        memory.append(response.message)
        response = response.message.content
        print(f'LLM description: {response}')

        prompt = make_prompt(f'Given your analysis of the game state, issue a rational action to take to progress in the game. Respond only with a single imperative command in plain lowercase. Use only standard text-adventure verbs. IMPORTANT: Your response will be used directly as input to the game. Minimize the number of words you use.\n\nYour analysis:\n{response}\n\n Only use one of these valid actions: {actions}\n\n')
        print('[action prompt]', prompt)
        response = ollama.generate(model=model, prompt=prompt).response
        print(f'LLM action: {response}')
        response = response.removeprefix('type').strip() # Give it a shot. Keeps saying type! TODO: Probably remove. Bandaid

        player_in = response.splitlines()[-1].strip()

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)
        
        return done, info

    results = n_steps(turn_func, env)
    print(results)
    

def n_steps(turn_func, env, n=100):
    # Begin
    generate_times = []
    unique_rooms = set()
    unique_items = set()
    unique_hashes = set()
    retries = 0
    retries_per_score = []
    prev_score = 0

    for _ in range(n):

        # Turn
        start = time.time()
        done, info = turn_func()
        generate_times.append(time.time() - start)
        
        unique_rooms.add(env.get_player_location().name)
        unique_items.update([item.name for item in env.get_inventory()])
        unique_hashes.add(env.get_world_state_hash())
        
        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']

        if done:
            break

    return {
        'unique_rooms': len(unique_rooms),
        'unique_items': len(unique_items),
        'unique_hashes': len(unique_hashes),
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else f'{n}+',
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [11]:
memory_analyze_provided_commands_chat()

Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


LLM description: open mailbox
[action prompt] You are a smart video game tester who is skilled at playing old-school text adventure games. You are playing zork1.z5
Respond only with a single imperative command in plain lowercase from the list of possible actions below.
Do not write sentences, explanations, strategies, or narratives.
Act rationally based on the game’s last description.
Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward


Given your analysis of the game state, issue a rational action to take to progress in the game. Respond only with a single imperative command in plain lowercase. Use only standard text-adventu

In [4]:
def basic_llm(max_retry=30):
    # Basic
    system_prompt = (
        'You are a smart video game enthusiast who is skilled at playing old-school text adventure games. Given a description of your environment, explain your thought process and then give a command that is compatible with the game you are playing. Always put the command on its own line at the end of your response with nothing else. It needs to be easy and consistent to read with simple python'
        'This game does not run on an LLM, so it only recognizes a small vocabulary of commands. An example of a command is exactly the string "go west". This means you should only give exactly the command that the game recognizes '
    )

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    done = False
    retries = 0
    retries_per_score = []
    generate_times = []
    prev_score = -1

    unique_rooms = set()
    unique_items = set()
    unique_hashes = set()

    while not done and retries <= max_retry:

        prompt = make_prompt(f'Game prompt:\n{game_response}')
        start = time.time()
        response = ollama.generate(model='llama3.2:1b', prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM Response: {response}')
        player_in = response.splitlines()[-1].strip()
        print(player_in)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        print(game_response)
        unique_rooms.add(env.get_player_location().name)
        unique_items.update([item.name for item in env.get_inventory()])
        unique_hashes.add(env.get_world_state_hash())

        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']
    
    print('Scored', info['score'], 'out of', env.get_max_score())

    return {
        'unique_rooms': len(unique_rooms),
        'unique_items': len(unique_items),
        'unique_hashes': len(unique_hashes),
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [None]:
# basic_llm()

Playing game. 'q' to quit
Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


[r0] LLM Response: I'm currently standing in an open field west of the white house, with a boarded front door. The field stretches out in both directions, with no visible obstacles or hazards. I see a small mailbox at the far end of the field.

My thought process: Considering my current location and the fact that there is a mailbox, it's likely that my goal is to retrieve something from the mailbox. Given the layout of the scene, I can think of several possible directions to take:

* I could try to open the boarded front door (if it's not locked).
* I could search for another way into or out of the house.
* I could investigate the mailbox more closely.

My command: go north
My command

{'score': 0,
 'max_score': 350,
 'avg_retries': 1.0,
 'avg_generate_time': 31.602583374295914}

In [5]:
# Basic with memory
system_prompt_memory = (
    'You are a smart video game enthusiast who is skilled at playing old-school text adventure games. Given a description of your environment, explain your thought process and then give a command that is compatible with the game you are playing. Always put the command on its own line at the end of your response with nothing else. It needs to be easy and consistent to read with simple python'
    'At the beginning of your prompt, you will also receive up to 5 of the most recent interactions you\'ve had with the game'
    'This game does not run on an LLM, so it only recognizes a set vocabulary of commands. An example of a command is exactly the string "go west". This means you should only give exactly the command that the game recognizes '
)

In [6]:
def basic_llm_with_memory(memory_size=5, max_retry=30, system_prompt=system_prompt_memory):
    
    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    print("Playing game. 'q' to quit")

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    memory = []

    done = False
    total_steps = 0
    retries = 0
    retries_per_score = []
    generate_times = []
    prev_score = 0

    unique_rooms = set()
    unique_items = set()
    unique_hashes = set()

    while not done and retries <= max_retry:

        prompt = make_prompt(f'Game prompt:\n{game_response}')
        combined_memory = "\n".join(memory)
        prompt_with_memory = f'{combined_memory}\n\n{prompt}'
        start = time.time()
        response = ollama.generate(model='llama3.2:1b', prompt=prompt_with_memory).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM Response: {response}')
        player_in = response.splitlines()[-1].strip()

        memory.append(f'{prompt}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)
        unique_rooms.add(env.get_player_location().name)
        unique_items.update([item.name for item in env.get_inventory()])
        unique_hashes.add(env.get_world_state_hash())

        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']
        total_steps += 1
    
    print('Scored', info['score'], 'out of', env.get_max_score())

    return {
        'unique_rooms': len(unique_rooms),
        'unique_items': len(unique_items),
        'unique_hashes': len(unique_hashes),
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [None]:
# with open('./basic_llm_with_memory.out', 'w') as f:
#     print(basic_llm_with_memory(out_file=f, max_retry=30))

{'score': 0, 'max_score': 350, 'avg_retries': 0, 'avg_generate_time': 7.857473757959181}


In [7]:
# ChatGPT Provided. Is it better?
# Prompt:
#   What would be a good system prompt for a local LLM to prime it to play text adventure games?
#   The LLM is not running the text adventure game, it is acting as the player for existing games
#
system_prompt_memory_chatgpt = (
    "You are a player of a classic parser-based interactive fiction game.\n"
    "Respond only with a single imperative command in plain lowercase (e.g., “look”, “go east”, “get key”).\n"
    "Do not write sentences, explanations, strategies, or narratives.\n"
    "Use only standard text-adventure verbs: look, examine, go, take, drop, open, close, use, talk to, attack, inventory, etc.\n"
    "Act rationally based on the game’s last description.\n"
    "If confused, issue “look”.\n"
    "Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward.\n"
)

The chatgpt given prompt was ineffective. It tended to repeat commands over and over again, even when we provided the memory of the command failing

In [8]:
def memory_analyze_then_command(memory_size=5, max_retry=30, model='llama3.2:1b'):

    # Basic with memory
    system_prompt = (
        f'You are a smart video game enthusiast who is skilled at playing old-school text adventure games. You are playing {game}.\n'
        "You don't enjoy playing video games, but you must reach the proper end of the game to get paid.\n"
    )

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    print("Playing game. 'q' to quit")

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    memory = []

    done = False
    total_steps = 0
    retries = 0
    retries_per_score = []
    generate_times = []
    prev_score = 0

    unique_rooms = set()
    unique_items = set()
    unique_hashes = set()

    while not done and retries <= max_retry:

        combined_memory = "\n".join(memory)
        prompt = make_prompt(f'Analyze the following sequence of game environment descriptions or responses and actions already taken by you, the player. Concisely describe the current state of the game and a potential action to take to move forward.\n\n{combined_memory}\n\nGame text:\n{game_response}')
        start = time.time()
        response = ollama.generate(model=model, prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM description: {response}')

        prompt = make_prompt(f'Given your analysis of the game state, issue a rational action to take to progress in the game. Respond only with a single imperative command in plain lowercase. Use only standard text-adventure verbs. IMPORTANT: Your response will be used directly as input to the game. Minimize the number of words you use.\n\nYour analysis:\n{response}')
        start = time.time()
        response = ollama.generate(model=model, prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM action: {response}')
        response = response.removeprefix('type').strip() # Give it a shot. Keeps saying type! TODO: Probably remove. Bandaid

        player_in = response.splitlines()[-1].strip()

        memory.append(f'{game_response}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)
        unique_rooms.add(env.get_player_location().name)
        unique_items.update([item.name for item in env.get_inventory()])
        unique_hashes.add(env.get_world_state_hash())

        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']
        total_steps += 1
    
    print('Scored', info['score'], 'out of', env.get_max_score())

    return {
        'unique_rooms': len(unique_rooms),
        'unique_items': len(unique_items),
        'unique_hashes': len(unique_hashes),
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [None]:
# memory_analyze_then_command(max_retry=5, model='gpt-oss:20b')

Playing game. 'q' to quit
Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


[r0] LLM description: **Current state:**  
- You are standing in an open field west of a white house.  
- The front door of the house is boarded up.  
- There is a small mailbox on the ground.

**Suggested next move:**  
Open or examine the mailbox: `open mailbox` or `look mailbox`. This is a classic first step in Zork to retrieve a letter that can give you a clue or key. If you prefer to approach the house, you could also try `go east` or `enter house`, but the mailbox is a more straightforward path to progress.
[r0] LLM action: open mailbox
Received command: open mailbox
Opening the small mailbox reveals a leaflet.


[r1] LLM description: You’re still standing in the open field west

{'score': 0,
 'max_score': 350,
 'avg_retries': 0,
 'avg_generate_time': 60.48966830968857}

gpt-oss did much better than llama. Could be because of the number of parameters. Could also be that gpt-oss has seen zork in its training data (it seems like this is true). Or maybe it's really better at it? I think that's less likely.

In [9]:
def memory_and_provided_commands(memory_size=5, max_retry=30):
    system_prompt = (
        f'You are a smart video game tester who is skilled at playing old-school text adventure games. You are playing {game}\n'
        'Respond only with a single imperative command in plain lowercase from the list of possible actions below.\n'
        'Do not write sentences, explanations, strategies, or narratives.\n'
        'Act rationally based on the game’s last description.\n'
        "Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward\n"
    )

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    print("Playing game. 'q' to quit")

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    memory = []

    done = False
    total_steps = 0
    retries = 0
    retries_per_score = []
    generate_times = []
    prev_score = 0

    unique_rooms = set()
    unique_items = set()
    unique_hashes = set()

    while not done and retries <= max_retry:

        combined_memory = "\n".join(memory)
        actions = ', '.join(env.get_valid_actions())
        prompt = make_prompt(f'{combined_memory}\n\nGame text:\n{game_response}\n\nValid actions: {actions}')
        start = time.time()
        response = ollama.generate(model='llama3.2:1b', prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM Response: {response}')
        player_in = response.splitlines()[-1].strip()

        memory.append(f'{game_response}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)
        unique_rooms.add(env.get_player_location().name)
        unique_items.update([item.name for item in env.get_inventory()])
        unique_hashes.add(env.get_world_state_hash())

        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']
        total_steps += 1
    
    print('Scored', info['score'], 'out of', env.get_max_score())

    return {
        'unique_rooms': len(unique_rooms),
        'unique_items': len(unique_items),
        'unique_hashes': len(unique_hashes),
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [None]:
# memory_and_provided_commands()

Playing game. 'q' to quit
Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


[r0] LLM Response: open mailbox
Received command: open mailbox
Opening the small mailbox reveals a leaflet.


[r1] LLM Response: take leaflet
Received command: take leaflet
Taken.


[r2] LLM Response: put down leaflet
Received command: put down leaflet
Dropped.


[r3] LLM Response: close mailbox
Received command: close mailbox
Closed.


[r4] LLM Response: open door
Received command: open door
The door cannot be opened.


[r5] LLM Response: north
Received command: north
North of House
You are facing the north side of a white house. There is no door here, and all the windows are boarded up. To the north a narrow path winds through the trees.


[r6] LLM Response: open door
Received comma

{'score': 0,
 'max_score': 350,
 'avg_retries': 0,
 'avg_generate_time': 1.5271822021853538}

This certainly improved it's commands, but not for actually completing the game. Maybe having an analysis step could help. It does not have a sense of curiosity

In [10]:
def memory_analyze_provided_commands(memory_size=5, max_retry=30, model='llama3.2:1b'):

    system_prompt = (
        f'You are a smart video game tester who is skilled at playing old-school text adventure games. You are playing {game}\n'
        'Respond only with a single imperative command in plain lowercase from the list of possible actions below.\n'
        'Do not write sentences, explanations, strategies, or narratives.\n'
        'Act rationally based on the game’s last description.\n'
        "Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward\n"
    )

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    print("Playing game. 'q' to quit")

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    memory = []

    done = False
    total_steps = 0
    retries = 0
    retries_per_score = []
    generate_times = []
    prev_score = 0

    unique_rooms = set()
    unique_items = set()
    unique_hashes = set()

    while not done and retries <= max_retry:

        combined_memory = "\n".join(memory)
        prompt = make_prompt(f'Analyze the following sequence of game environment descriptions or responses and actions already taken by you, the player. Concisely describe the current state of the game and a potential action to take to move forward.\n\n{combined_memory}\n\nGame text:\n{game_response}')
        start = time.time()
        response = ollama.generate(model=model, prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM description: {response}')

        actions = ', '.join(env.get_valid_actions())
        prompt = make_prompt(f'Given your analysis of the game state, issue a rational action to take to progress in the game. Respond only with a single imperative command in plain lowercase. Use only standard text-adventure verbs. IMPORTANT: Your response will be used directly as input to the game. Minimize the number of words you use.\n\nYour analysis:\n{response}\n\nValid actions: {actions}')
        start = time.time()
        response = ollama.generate(model=model, prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM action: {response}')
        response = response.removeprefix('type').strip() # Give it a shot. Keeps saying type! TODO: Probably remove. Bandaid

        player_in = response.splitlines()[-1].strip()
        
        memory.append(f'{game_response}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)
        unique_rooms.add(env.get_player_location().name)
        unique_items.update([item.name for item in env.get_inventory()])
        unique_hashes.add(env.get_world_state_hash())

        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']
        total_steps += 1
    
    print('Scored', info['score'], 'out of', env.get_max_score())

    return {
        'unique_rooms': len(unique_rooms),
        'unique_items': len(unique_items),
        'unique_hashes': len(unique_hashes),
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [None]:
# memory_analyze_provided_commands()

Playing game. 'q' to quit
Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


[r0] LLM description: Based on the game environment descriptions and responses, I can summarize the current state of the game as follows:

The player has just entered a new area, a field west of a white house. The player's current location is indicated by the text "West of House". There are two objects in this area: an open field with no specific description or features, and a small mailbox.

Given my previous actions (not specified), I can infer that:

* The player has entered the field.
* The player may have exited the house at some point to access the field.
* The player is likely familiar with the basic layout of this area.

A potential action for me to take to move forward could 

{'score': 0,
 'max_score': 350,
 'avg_retries': 0,
 'avg_generate_time': 6.211477264281242}

In [11]:
def memory_analyze_provided_commands_chat(max_retry=30, model='llama3.2:1b'):

    system_prompt = (
        f'You are a smart video game tester who is skilled at playing old-school text adventure games. You are playing {game}\n'
        'Respond only with a single imperative command in plain lowercase from the list of possible actions below.\n'
        'Do not write sentences, explanations, strategies, or narratives.\n'
        'Act rationally based on the game’s last description.\n'
        "Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward\n"
    )

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    print("Playing game. 'q' to quit")

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    memory = [
        ollama.Message(role='system', content=system_prompt)
    ]
    analysis_prompt = 'Concisely describe the current state of the game and a potential action to take to move forward.'
    memory.append(ollama.Message(role='system', content=analysis_prompt))

    done = False
    total_steps = 0
    retries = 0
    retries_per_score = []
    generate_times = []
    prev_score = 0

    unique_rooms = set()
    unique_items = set()
    unique_hashes = set()

    while not done and retries <= max_retry:

        actions_list = env.get_valid_actions()
        random.shuffle(actions_list)
        actions = ', '.join(actions_list)

        memory.append(ollama.Message(role='user', content=f'{game_response}\n\nValid game actions: {actions}'))
        start = time.time()
        response = ollama.chat(model=model, messages=memory)
        memory.append(response.message)
        response = response.message.content
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM description: {response}')

        prompt = make_prompt(f'Given your analysis of the game state, issue a rational action to take to progress in the game. Respond only with a single imperative command in plain lowercase. Use only standard text-adventure verbs. IMPORTANT: Your response will be used directly as input to the game. Minimize the number of words you use.\n\nYour analysis:\n{response}\n\n Only use one of these valid actions: {actions}\n\n')
        print('[action prompt]', prompt)
        start = time.time()
        response = ollama.generate(model=model, prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM action: {response}')
        response = response.removeprefix('type').strip() # Give it a shot. Keeps saying type! TODO: Probably remove. Bandaid

        player_in = response.splitlines()[-1].strip()

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)
        unique_rooms.add(env.get_player_location().name)
        unique_items.update([item.name for item in env.get_inventory()])
        unique_hashes.add(env.get_world_state_hash())

        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']
        total_steps += 1
    
    print('Scored', info['score'], 'out of', env.get_max_score())
    print('Memory at end:')
    print(json.dumps(memory, indent=4, default=str))

    return {
        'unique_rooms': len(unique_rooms),
        'unique_items': len(unique_items),
        'unique_hashes': len(unique_hashes),
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [None]:
# memory_analyze_provided_commands_chat()

Playing game. 'q' to quit
Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


[r0] LLM description: I'm currently standing in the middle of this open field, surrounded by tall grass and wildflowers. The only landmarks are a few scattered rocks and a tall wooden fence on the east side of the field. In front of me is a small mailbox with a rusty lock.

To move forward, I can:

* Open the mailbox to investigate its contents
* Head north towards the boarded front door of the white house

I'm not really sure what's going on here, but I don't see any obvious way out...
[action prompt] You are a smart video game enthusiast who is skilled at playing old-school text adventure games. You are playing zork1.z5.
You don't enjoy playing video games, but you must reach the pr

Process SpawnPoolWorker-42:
Process SpawnPoolWorker-41:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/queues.py", line 365, in ge

KeyboardInterrupt: 

In [None]:
def agent(max_retry=30, model='qwen3'):

    system_prompt = (
        f'Think step by step. You are playing {game}, an interactive fiction game. You must analyze the scenario the game presents to you and choose an action that will make progress. Your goal is to finish the game\n'
        'Use the tools provided to you to take actions, view possible actions for your current location, and view the game walkthrough if necessary'
    )

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    memory = [
        ollama.Message(role='system', content=system_prompt)
    ]

    done = False
    unique_rooms = set()
    unique_items = set()
    unique_hashes = set()

    def do_game_action(action: str) -> str:
        """Perform an action in the active text adventure game and see the result"""
        """
        Args:
          action: game action string

        Returns:
          The game's response after performing the action
        """
        nonlocal unique_hashes, unique_rooms
        game_response, reward, done, info = env.step(action)
        unique_rooms.add(env.get_player_location().name)
        unique_items.update([item.name for item in env.get_inventory()])
        unique_hashes.add(env.get_world_state_hash())
        return game_response
    
    def view_possible_actions() -> str:
        """View a list of the actions that can be performed in the game's current state"""
        """
        Returns:
          String containg actions separated by commas
        """
        return ', '.join(env.get_valid_actions())
    
    def view_walkthrough():
        """View the full game walkthrough as a list of actions"""
        """
        Returns:
          String containing actions separated by newlines
        """
        return env.get_walkthrough()
    
    available_functions = {
        'do_game_action': do_game_action,
        'view_possible_actions': view_possible_actions,
        'view_walkthrough': view_walkthrough
    }

    generate_times = []
    prev_score = 0
    retries = 0
    retries_per_score = []
    while not done:

        memory.append(ollama.Message(role='user', content=f'{game_response}'))

        while True: # Keep looping until it finishes tool calls (which is kind of never)
            print(f'[r{retries}]')
            
            start = time.time()
            response = ollama.chat(model=model, messages=memory, think=True, tools=[do_game_action, view_possible_actions, view_walkthrough], options={'num_ctx': 2048})
            generate_times.append(time.time() - start)
            memory.append(response.message)

            print("Thinking: ", response.message.thinking)
            print("Content: ", response.message.content)

            if response.message.tool_calls:
                for tc in response.message.tool_calls:
                    if tc.function.name in available_functions:
                        print(f"Calling {tc.function.name} with arguments {tc.function.arguments}")
                        result = available_functions[tc.function.name](**tc.function.arguments)
                        print(f"Result: {result}")
                        # add the tool result to the messages
                        memory.append({'role': 'tool', 'tool_name': tc.function.name, 'content': str(result)})
            else:
                break
            
            retries += 1
            if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
                retries_per_score.append(retries)
                retries = 0
                print('Score changed! New score:', info['score'])
            prev_score = info['score']
        
        if retries >= max_retry:
            break
            
    print('Scored', info['score'], 'out of', env.get_max_score())
    print('Memory at end:')
    print(json.dumps(memory, indent=4, default=str))

    return {
        'unique_rooms': len(unique_rooms),
        'unique_items': len(unique_items),
        'unique_hashes': len(unique_hashes),
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [None]:
# agent()

Playing game. 'q' to quit
Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


Thinking:  Okay, so I'm playing Zork I, right? Let me think about what I need to do here. The user provided the initial setup: I'm west of a white house with a boarded front door and there's a small mailbox. The goal is to finish the game, so I need to figure out the right steps.

First, I should check what actions are possible here. The user mentioned using the view_possible_actions function. Let me call that to see the available commands. Common Zork commands include looking around, opening doors, checking mailboxes, etc. Once I know the possible actions, I can choose the most logical next step. For example, maybe I should check the mailbox first since it's mentioned here. Alternati

Process SpawnPoolWorker-1:
Process SpawnPoolWorker-7:
Process SpawnPoolWorker-6:
Process SpawnPoolWorker-4:
Process SpawnPoolWorker-9:
Process SpawnPoolWorker-5:
Process SpawnPoolWorker-8:
Process SpawnPoolWorker-3:
Process SpawnPoolWorker-2:
Process SpawnPoolWorker-10:
Traceback (most recent call last):
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/queues.py", line 365, i

KeyboardInterrupt: 

# Data Collection

In [13]:
import os
import pandas as pd
import datetime

output_dir = f'outputs/{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}'
os.makedirs(output_dir, exist_ok=True)

In [None]:

calls = {
    'basic_llm': basic_llm,
    'basic_llm_with_memory': basic_llm_with_memory,
    'basic_llm_with_memory_chatgpt': lambda: basic_llm_with_memory(system_prompt=system_prompt_memory_chatgpt),
    'memory_analyze_then_command': memory_analyze_then_command,
    'memory_and_provided_commands': memory_and_provided_commands,
    'memory_analyze_provided_commands': memory_analyze_provided_commands,
    'memory_analyze_provided_commands_chat': memory_analyze_provided_commands_chat,
}

full_results = []
for name, func in calls.items():
    
    # for repeat in range(3):
    #     with open(os.path.join(output_dir, f'{name}_output{repeat}.txt'), 'w') as f:
    #         results = func(out_file=f, )
    #     f = None # For some reason
    results = func()
    f = None # For some reason
        
    full_results.append({'name': name, **results})

df = pd.DataFrame(full_results)
df

Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


[r0] LLM Response: I am currently standing in the open field west of the white house. I see a small mailbox nearby. My thought process is to try and interact with my environment, which involves examining objects and their surroundings. Given this game's vocabulary limitations, my goal is to find something useful or interesting to do.

To explore further, I will examine the mailbox.

My current state:
- Location: West of House
- Object: Mailbox

The mailbox appears to be locked, but it might contain a message or an object that can help me.
The mailbox appears to be locked, but it might contain a message or an object that can help me.
I don't know the word "appears".


[r0] LLM Response: You are standing in a da



[r25] LLM Response: My thought process is to analyze the context and identify any relevant information. In this case, when I said "look", I was trying to describe my surroundings but got it wrong because the game only recognizes specific commands like "go west". Since I can only use a small vocabulary of commands, I need to think outside the box.

My thought process is: what if "look" means something else? What if it's not just about looking around, but rather about examining or investigating something?

Given this realization, my command for the game would be:

move north
move north
You used the word "north" in a way that I don't understand.


[r26] LLM Response: My thought process: The description mentions using a small vocabulary of commands, but it seems like we're actually playing with an older text-based system. Since there are no LLMs involved, my goal is to respond accordingly.

Game prompt:
You used the word "north" in a way that I don't understand.

I'm thinking that if I wer

DUMB-FROTZ: unknown escape char: b
Enter \help to see the list


[r16] LLM Response: You are exploring the vast, dimly lit chamber filled with rows upon rows of ancient, leather-bound books. Shelves stretch out before you, stacked high with tomes that seem to whisper secrets to each other.

As you browse through the shelves, you notice a few texts that catch your attention. One book appears to be bound in a strange, glowing material that seems to pulse with an inner light. Another tome has yellowed pages filled with cryptic symbols and phrases that seem to match the words of the whispering voices.

You begin to search through the books, hoping to find any information that might help you unlock the secrets of the Shadow Realm. As you turn a page in the glowing tome, you notice that it has a passage written in an ancient language that seems to match the whispers of the other texts.

The words seem to hint at the location of a hidden chamber within the Shadow Realm, where one can find the key to unlocking the secrets of this mysterious place. You study

DUMB-FROTZ: unknown escape char: b
Enter \help to see the list


[r17] LLM Response: You are standing in a vast, dimly lit chamber filled with rows upon rows of ancient, leather-bound books. Shelves stretch out before you, stacked high with tomes that seem to whisper secrets to each other.

As you browse through the shelves, you notice a few texts that catch your attention. One book appears to be bound in a strange, glowing material that seems to pulse with an inner light. Another tome has yellowed pages filled with cryptic symbols and phrases that seem to match the words of the whispering voices.

You begin to search through the books, hoping to find any information that might help you unlock the secrets of the Shadow Realm. As you turn a page in the glowing tome, you notice that it has a passage written in an ancient language that seems to match the whispers of the other texts.

The words seem to hint at the location of a hidden chamber within the Shadow Realm, where one can find the key to unlocking the secrets of this mysterious place. You study

DUMB-FROTZ: unknown escape char: b
Enter \help to see the list


[r18] LLM Response: You are standing in a vast, dimly lit chamber filled with rows upon rows of ancient, leather-bound books. Shelves stretch out before you, stacked high with tomes that seem to whisper secrets to each other.

As you browse through the shelves, you notice a few texts that catch your attention. One book appears to be bound in a strange, glowing material that seems to pulse with an inner light. Another tome has yellowed pages filled with cryptic symbols and phrases that seem to match the words of the whispering voices.

You begin to search through the books, hoping to find any information that might help you unlock the secrets of the Shadow Realm. As you turn a page in the glowing tome, you notice that it has a passage written in an ancient language that seems to match the whispers of the other texts.

The words seem to hint at the location of a hidden chamber within the Shadow Realm, where one can find the key to unlocking the secrets of this mysterious place. You study

DUMB-FROTZ: unknown escape char: b
Enter \help to see the list


[r19] LLM Response: You are standing in a vast, dimly lit chamber filled with rows upon rows of ancient, leather-bound books. Shelves stretch out before you, stacked high with tomes that seem to whisper secrets to each other.

As you browse through the shelves, you notice two texts that catch your attention. One book appears to be bound in a strange, glowing material that seems to pulse with an inner light. Another tome has yellowed pages filled with cryptic symbols and phrases that seem to match the words of the whispering voices.

You begin to search through the books, hoping to find any information that might help you unlock the secrets of the Shadow Realm. As you turn a page in the glowing tome, you notice that it has a passage written in an ancient language that seems to match the whispers of the other texts.

The words seem to hint at the location of a hidden chamber within the Shadow Realm, where one can find the key to unlocking the secrets of this mysterious place. You study t

DUMB-FROTZ: unknown escape char: b
Enter \help to see the list


[r20] LLM Response: You are standing in a vast, dimly lit chamber filled with rows upon rows of ancient, leather-bound books. Shelves stretch out before you, stacked high with tomes that seem to whisper secrets to each other.

As you browse through the shelves, you notice two texts that catch your attention. One book appears to be bound in a strange, glowing material that seems to pulse with an inner light. Another tome has yellowed pages filled with cryptic symbols and phrases that seem to match the words of the whispering voices.

You begin to search through the books, hoping to find any information that might help you unlock the secrets of the Shadow Realm. As you turn a page in the glowing tome, you notice that it has a passage written in an ancient language that seems to match the whispers of the other texts.

The words seem to hint at the location of a hidden chamber within the Shadow Realm, where one can find the key to unlocking the secrets of this mysterious place. You study t

DUMB-FROTZ: unknown escape char: b
Enter \help to see the list


[r21] LLM Response: You are currently standing in a vast, dimly lit chamber filled with rows upon rows of ancient, leather-bound books. Shelves stretch out before you, stacked high with tomes that seem to whisper secrets to each other.

As you browse through the shelves, you notice two texts that catch your attention. One book appears to be bound in a strange, glowing material that seems to pulse with an inner light. Another tome has yellowed pages filled with cryptic symbols and phrases that seem to match the words of the whispering voices.

You begin to search through the books, hoping to find any information that might help you unlock the secrets of the Shadow Realm. As you turn a page in the glowing tome, you notice that it has a passage written in an ancient language that seems to match the whispers of the other texts.

The words seem to hint at the location of a hidden chamber within the Shadow Realm, where one can find the key to unlocking the secrets of this mysterious place. Y

DUMB-FROTZ: unknown escape char: b
Enter \help to see the list


[r24] LLM Response: You continue to study the mysterious text once more, looking for any clues that might have been missed. After a few moments, you notice something that catches your eye: a faint symbol etched into the wall of the passageway, partially hidden by the dust.

Considering your options and the passage you followed, I recommend exploring the narrow tunnel further for safety and potential discovery.
You are a smart video game enthusiast who is skilled at playing old-school text adventure games. Given a description of your environment, explain your thought process and then give a command that is compatible with the game you are playing. Always put the command on its own line at the end of your response with nothing else. It needs to be easy and consistent to read with simple pythonAt the beginning of your prompt, you will also receive up to 5 of the most recent interactions you've had with the gameThis game does not run on an LLM, so it only recognizes a set vocabulary of com

Unnamed: 0,name,unique_rooms,unique_items,unique_hashes,score,max_score,avg_retries,avg_generate_time
0,basic_llm,5,0,5,0,350,1.0,0.818798
1,basic_llm_with_memory,1,0,2,0,350,0.0,12.66072
2,basic_llm_with_memory_chatgpt,1,0,1,0,350,0.0,0.154052
3,memory_analyze_then_command,1,0,2,0,350,0.0,0.499695
4,memory_and_provided_commands,1,1,5,0,350,0.0,0.099993
5,memory_analyze_provided_commands,4,0,5,0,350,0.0,0.098295
6,memory_analyze_provided_commands_chat,5,0,10,0,350,0.0,0.153376


Process ForkPoolWorker-23:
Process ForkPoolWorker-3:
Process ForkPoolWorker-7:
Process ForkPoolWorker-5:
Process ForkPoolWorker-22:
Process ForkPoolWorker-17:
Process ForkPoolWorker-18:
Process ForkPoolWorker-28:
Process ForkPoolWorker-8:
Process ForkPoolWorker-4:
Process ForkPoolWorker-16:
Process ForkPoolWorker-29:
Process ForkPoolWorker-15:
Process ForkPoolWorker-21:
Process ForkPoolWorker-20:
Process ForkPoolWorker-1:
Process ForkPoolWorker-6:
Process ForkPoolWorker-27:
Process ForkPoolWorker-2:
Process ForkPoolWorker-19:
Process ForkPoolWorker-24:
Process ForkPoolWorker-31:
Process ForkPoolWorker-26:
Process ForkPoolWorker-10:
Process ForkPoolWorker-13:
Process ForkPoolWorker-30:
Process ForkPoolWorker-9:
Process ForkPoolWorker-14:
Process ForkPoolWorker-32:
Process ForkPoolWorker-25:
Process ForkPoolWorker-11:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last

In [20]:
long_calls = {
    'agent': agent
}

full_results = []
for name, func in long_calls.items():
    results = func()
    full_results.append({'name': name, **results})

long_df = pd.DataFrame(full_results)
long_df

Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


Thinking:  Okay, let's see. I'm playing Zork I, and I'm at the starting location: West of House. The description says there's a small mailbox here. The goal is to figure out what to do next to progress in the game.

First, I should check what actions are possible here. The user mentioned using the view_possible_actions function. So, maybe I should call that to see the available commands. Common actions in such games usually include looking around, checking objects, opening doors, or interacting with items like the mailbox.

Alternatively, since the user provided the initial prompt, maybe the mailbox is a key item. In many text adventures, mailboxes can contain letters or items needed later. So, perhaps the fir

Unnamed: 0,name,unique_rooms,unique_items,unique_hashes,score,max_score,avg_retries,avg_generate_time
0,agent,7,0,10,0,350,0,7.883621


In [None]:
df = pd.concat([df, long_df])

In [None]:
df.to_csv(os.path.join(output_dir, 'results.csv'))