In [1]:
import os
import sys
import time

import ollama
import jericho
import re
import json
import random

os.environ['PATH'] = f'{os.environ["PATH"]}:./ollama/bin' # Add local ollama
!echo $PATH

/s/chopin/b/grad/tmoleary/cs542/cs542-adventure/env/bin:/s/chopin/b/grad/tmoleary/.vscode-server/cli/servers/Stable-1e3c50d64110be466c0b4a45222e81d2c9352888/server/bin/remote-cli:/usr/local/nodejs/18.17.1/bin:/usr/local/postman/latest:/usr/local/maven/3.9.4/bin:/usr/lib/jvm/java-11-openjdk/bin:/usr/share/Modules/bin:/s/chopin/b/grad/tmoleary/.nvm/versions/node/v20.5.0/bin:/usr/lib64/openmpi/bin:/usr/local/cuda/latest/bin:/s/chopin/b/grad/tmoleary/bin:/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:.:./ollama/bin


In [2]:
game = 'zork1.z5'
GAMES_DIR = "z-machine-games-master/jericho-game-suite"
env = jericho.FrotzEnv(f"{GAMES_DIR}/{game}")

In [3]:
import time
from adventure.metrics import ScoreTracker

def n_steps(turn_func, env, n=100):
    score_tracker = ScoreTracker(env)

    for _ in range(n):

        # Turn
        start = time.time()
        done, info = turn_func()
        end = time.time()
        
        score_tracker.update(info, start, end)

        if done:
            break

    return score_tracker.get_stats(env, info)

In [4]:
def basic_llm():
    # Basic
    system_prompt = (
        'You are a smart video game enthusiast who is skilled at playing old-school text adventure games. Given a description of your environment, explain your thought process and then give a command that is compatible with the game you are playing. Always put the command on its own line at the end of your response with nothing else. It needs to be easy and consistent to read with simple python'
        'This game does not run on an LLM, so it only recognizes a small vocabulary of commands. An example of a command is exactly the string "go west". This means you should only give exactly the command that the game recognizes '
    )

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    game_response, info = env.reset()
    print(game_response)


    def turn():
        nonlocal game_response
        prompt = make_prompt(f'Game prompt:\n{game_response}')
        response = ollama.generate(model='llama3.2:3b', prompt=prompt).response
        print(f'LLM Response: {response}')
        player_in = response.splitlines()[-1].strip()
        print(player_in)

        game_response, reward, done, info = env.step(player_in)
        print(game_response)
        return done, info
    
    results = n_steps(turn, env)
    print(results)
    return results

In [5]:
# basic_llm()

In [6]:
# Basic with memory
system_prompt_memory = (
    'You are a smart video game enthusiast who is skilled at playing old-school text adventure games. Given a description of your environment, explain your thought process and then give a command that is compatible with the game you are playing. Always put the command on its own line at the end of your response with nothing else. It needs to be easy and consistent to read with simple python'
    'At the beginning of your prompt, you will also receive up to 5 of the most recent interactions you\'ve had with the game'
    'This game does not run on an LLM, so it only recognizes a set vocabulary of commands. An example of a command is exactly the string "go west". This means you should only give exactly the command that the game recognizes '
)

In [7]:
def basic_llm_with_memory(memory_size=5, system_prompt=system_prompt_memory):
    
    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    game_response, info = env.reset()
    print(game_response)

    memory = []

    def turn():
        nonlocal game_response
        prompt = make_prompt(f'Game prompt:\n{game_response}')
        combined_memory = "\n".join(memory)
        prompt_with_memory = f'{combined_memory}\n\n{prompt}'
        response = ollama.generate(model='llama3.2:3b', prompt=prompt_with_memory).response
        print(f'LLM Response: {response}')
        player_in = response.splitlines()[-1].strip()

        memory.append(f'{prompt}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)
        
        return done, info
    
    result = n_steps(turn, env)
    print(result)
    return result

In [8]:
# with open('./basic_llm_with_memory.out', 'w') as f:
#     print(basic_llm_with_memory(out_file=f, max_retry=30))

In [9]:
# ChatGPT Provided. Is it better?
# Prompt:
#   What would be a good system prompt for a local LLM to prime it to play text adventure games?
#   The LLM is not running the text adventure game, it is acting as the player for existing games
#
system_prompt_memory_chatgpt = (
    "You are a player of a classic parser-based interactive fiction game.\n"
    "Respond only with a single imperative command in plain lowercase (e.g., “look”, “go east”, “get key”).\n"
    "Do not write sentences, explanations, strategies, or narratives.\n"
    "Use only standard text-adventure verbs: look, examine, go, take, drop, open, close, use, talk to, attack, inventory, etc.\n"
    "Act rationally based on the game’s last description.\n"
    "If confused, issue “look”.\n"
    "Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward.\n"
)

The chatgpt given prompt was ineffective. It tended to repeat commands over and over again, even when we provided the memory of the command failing

In [10]:
def memory_analyze_then_command(memory_size=5, max_retry=30, model='llama3.2:3b'):

    # Basic with memory
    system_prompt = (
        f'You are a smart video game enthusiast who is skilled at playing old-school text adventure games. You are playing {game}.\n'
        "You don't enjoy playing video games, but you must reach the proper end of the game to get paid.\n"
    )

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    game_response, info = env.reset()
    print(game_response)

    memory = []

    def turn():
        nonlocal game_response
        combined_memory = "\n".join(memory)
        prompt = make_prompt(f'Analyze the following sequence of game environment descriptions or responses and actions already taken by you, the player. Concisely describe the current state of the game and a potential action to take to move forward.\n\n{combined_memory}\n\nGame text:\n{game_response}')
        response = ollama.generate(model=model, prompt=prompt).response
        print(f'LLM description: {response}')

        prompt = make_prompt(f'Given your analysis of the game state, issue a rational action to take to progress in the game. Respond only with a single imperative command in plain lowercase. Use only standard text-adventure verbs. IMPORTANT: Your response will be used directly as input to the game. Minimize the number of words you use.\n\nYour analysis:\n{response}')
        response = ollama.generate(model=model, prompt=prompt).response
        print(f'LLM action: {response}')
        response = response.removeprefix('type').strip() # Give it a shot. Keeps saying type! TODO: Probably remove. Bandaid

        player_in = response.splitlines()[-1].strip()

        memory.append(f'{game_response}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)
        return done, info
    
    result = n_steps(turn, env)
    print(result)
    return result

gpt-oss did much better than llama. Could be because of the number of parameters. Could also be that gpt-oss has seen zork in its training data (it seems like this is true). Or maybe it's really better at it? I think that's less likely.

In [11]:
def memory_and_provided_commands(memory_size=5):
    system_prompt = (
        f'You are a smart video game tester who is skilled at playing old-school text adventure games. You are playing {game}\n'
        'Respond only with a single imperative command in plain lowercase from the list of possible actions below.\n'
        'Do not write sentences, explanations, strategies, or narratives.\n'
        'Act rationally based on the game’s last description.\n'
        "Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward\n"
    )

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    game_response, info = env.reset()
    print(game_response)

    memory = []

    def turn():
        nonlocal game_response
        combined_memory = "\n".join(memory)
        actions = ', '.join(env.get_valid_actions())
        prompt = make_prompt(f'{combined_memory}\n\nGame text:\n{game_response}\n\nValid actions: {actions}')
        response = ollama.generate(model='llama3.2:3b', prompt=prompt).response
        print(f'LLM Response: {response}')
        player_in = response.splitlines()[-1].strip()

        memory.append(f'{game_response}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)
        return done, info
    
    result = n_steps(turn, env)
    print(result)
    return result

This certainly improved it's commands, but not for actually completing the game. Maybe having an analysis step could help. It does not have a sense of curiosity

In [12]:
def memory_analyze_provided_commands(memory_size=5, model='llama3.2:3b'):

    system_prompt = (
        f'You are a smart video game tester who is skilled at playing old-school text adventure games. You are playing {game}\n'
        'Respond only with a single imperative command in plain lowercase from the list of possible actions below.\n'
        'Do not write sentences, explanations, strategies, or narratives.\n'
        'Act rationally based on the game’s last description.\n'
        "Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward\n"
    )

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    game_response, info = env.reset()
    print(game_response)
    
    memory = []

    def turn():
        nonlocal game_response
        combined_memory = "\n".join(memory)
        prompt = make_prompt(f'Analyze the following sequence of game environment descriptions or responses and actions already taken by you, the player. Concisely describe the current state of the game and a potential action to take to move forward.\n\n{combined_memory}\n\nGame text:\n{game_response}')
        response = ollama.generate(model=model, prompt=prompt).response
        print(f'LLM description: {response}')

        actions = ', '.join(env.get_valid_actions())
        prompt = make_prompt(f'Given your analysis of the game state, issue a rational action to take to progress in the game. Respond only with a single imperative command in plain lowercase. Use only standard text-adventure verbs. IMPORTANT: Your response will be used directly as input to the game. Minimize the number of words you use.\n\nYour analysis:\n{response}\n\nValid actions: {actions}')
        response = ollama.generate(model=model, prompt=prompt).response
        print(f'LLM action: {response}')
        response = response.removeprefix('type').strip() # Give it a shot. Keeps saying type! TODO: Probably remove. Bandaid

        player_in = response.splitlines()[-1].strip()
        
        memory.append(f'{game_response}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)
        return done, info
    
    result = n_steps(turn, env)
    print(result)
    return result

In [13]:
def memory_analyze_provided_commands_chat():
    model = 'llama3.2:3b'
    system_prompt = (
        f'You are a smart video game tester who is skilled at playing old-school text adventure games. You are playing {game}\n'
        'Act rationally based on the game’s last description.\n'
        "Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward\n"
    )

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'
    memory = [
        ollama.Message(role='system', content=system_prompt)
    ]
    analysis_prompt = 'Concisely describe the current state of the game and a potential action to take to move forward.'
    memory.append(ollama.Message(role='system', content=analysis_prompt))
    
    game_response, info = env.reset()
    print(game_response)

    def turn_func():
        nonlocal model, make_prompt, analysis_prompt, memory, game_response
        actions_list = env.get_valid_actions()
        random.shuffle(actions_list)
        actions = ', '.join(actions_list)

        memory.append(ollama.Message(role='user', content=f'{game_response}\n\nValid game actions: {actions}'))
        response = ollama.chat(model=model, messages=memory)
        memory.append(response.message)
        response = response.message.content
        print(f'LLM description: {response}')

        prompt = make_prompt(f'Given your analysis of the game state, issue a rational action to take to progress in the game. Respond only with a single imperative command in plain lowercase. Use only standard text-adventure verbs. IMPORTANT: Your response will be used directly as input to the game. Minimize the number of words you use.\n\nYour analysis:\n{response}\n\n Only use one of these valid actions: {actions}\n\n')
        print('[action prompt]', prompt)
        response = ollama.generate(model=model, prompt=prompt).response
        print(f'LLM action: {response}')
        response = response.removeprefix('type').strip() # Give it a shot. Keeps saying type! TODO: Probably remove. Bandaid

        lines = response.splitlines()
        player_in = lines[-1].strip() if len(lines) != 0 else ''

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)
        
        return done, info

    results = n_steps(turn_func, env)
    print(results)
    return results

In [14]:
#memory_analyze_provided_commands_chat()

In [21]:
def agent(max_retry=30, model='gpt-oss'):

    system_prompt = (
        f'Think step by step. You are playing {game}, an interactive fiction game. You must analyze the scenario the game presents to you and choose an action that will make progress. Your goal is to finish the game\n'
        'Use the tools provided to you to take actions, view possible actions for your current location, and view the game walkthrough if necessary'
    )

    game_response, info = env.reset()
    print(game_response)
    done = False

    memory = [
        ollama.Message(role='system', content=system_prompt)
    ]

    def do_game_action(action: str) -> str:
        """Perform an action in the active text adventure game and see the result"""
        """
        Args:
          action: game action string

        Returns:
          The game's response after performing the action
        """
        nonlocal done, info
        game_response, reward, done, info = env.step(action)
        return game_response
    
    def view_possible_actions() -> str:
        """View a list of the actions that can be performed in the game's current state"""
        """
        Returns:
          String containg actions separated by commas
        """
        return ', '.join(env.get_valid_actions())
    
    def view_walkthrough():
        """View the full game walkthrough as a list of actions"""
        """
        Returns:
          String containing actions separated by newlines
        """
        return env.get_walkthrough()
    
    available_functions = {
        'do_game_action': do_game_action,
        'view_possible_actions': view_possible_actions,
        'view_walkthrough': view_walkthrough
    }

    def turn():
        nonlocal game_response, memory
        memory.append(ollama.Message(role='user', content=f'{game_response}'))
            
        response = ollama.chat(model=model, messages=memory, think=True, tools=[do_game_action, view_possible_actions, view_walkthrough], options={'num_ctx': 2048})
        memory.append(response.message)

        print("Thinking: ", response.message.thinking)
        print("Content: ", response.message.content)

        if response.message.tool_calls:
            for tc in response.message.tool_calls:
                if tc.function.name in available_functions:
                    print(f"Calling {tc.function.name} with arguments {tc.function.arguments}")
                    result = available_functions[tc.function.name](**tc.function.arguments)
                    print(f"Result: {result}")
                    # add the tool result to the messages
                    memory.append({'role': 'tool', 'tool_name': tc.function.name, 'content': str(result)})
        return done, info
              
    result = n_steps(turn, env)              
    print(result)
    print('Memory at end:')
    print(json.dumps(memory, indent=4, default=str))
    return result


In [34]:
# agent()

# Data Collection

In [35]:
import os
import pandas as pd
import datetime

output_dir = f'outputs/{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}'
os.makedirs(output_dir, exist_ok=True)

In [36]:

calls = {
    'basic_llm': basic_llm,
    'basic_llm_with_memory': basic_llm_with_memory,
    'basic_llm_with_memory_chatgpt': lambda: basic_llm_with_memory(system_prompt=system_prompt_memory_chatgpt),
    # 'memory_analyze_then_command': memory_analyze_then_command,
    'memory_and_provided_commands': memory_and_provided_commands,
    # 'memory_analyze_provided_commands': memory_analyze_provided_commands,
    'memory_analyze_provided_commands_chat': memory_analyze_provided_commands_chat,
}

full_results = []
for name, func in calls.items():
    
    # for repeat in range(3):
    #     with open(os.path.join(output_dir, f'{name}_output{repeat}.txt'), 'w') as f:
    #         results = func(out_file=f, )
    #     f = None # For some reason
    for repeat in range(3):
        results = func()
        full_results.append({'name': name, 'iter': repeat, **results})

df = pd.DataFrame(full_results)
df

Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


LLM Response: To navigate this environment, I'll start by taking stock of my surroundings. I see that I'm standing in a field, and there's a house to the east (since I'm described as being west of it). There's also a mailbox nearby. Since I don't have any objects with me, I won't be able to pick anything up yet.

My next step would be to approach the mailbox to see if it contains any useful information or items. 

open mailbox
open mailbox
Opening the small mailbox reveals a leaflet.


LLM Response: Given the description of my environment, I'm thinking that the leaflet might be an important clue or a hint about what's going on. The fact that it was inside the mailbox suggests that it might be related to the ma



LLM Response: As I read through the description of my environment, I'm trying to visualize the setting and understand how it relates to me. However, the phrase "I don't know the word 'since'" seems out of place in a text adventure game. In a typical adventure game, players might be interacting with a location, object, or non-player character (NPC), but this phrase appears to be more related to language and vocabulary.

Given that I'm trying to navigate a game where my commands are limited to basic actions like moving or taking objects, I decide to ignore the linguistic reference. Instead, I focus on understanding the physical space around me. Unfortunately, the description doesn't provide enough information about my location, so I'll need to gather more clues.

Let's try to move forward and see if that helps me understand my surroundings better.

Look
Look
Rocky Ledge
You are on a ledge about halfway up the wall of the river canyon. You can see from here that the main flow from Aragain

Unnamed: 0,name,iter,moves,unique_rooms,unique_hashes,unique_items,score,max_score,avg_retries,avg_generate_time
0,basic_llm,0,52,7,10,0,0,350,1.0,1.859966
1,basic_llm,1,46,6,13,1,0,350,1.0,1.568749
2,basic_llm,2,40,6,11,0,0,350,1.0,1.55334
3,basic_llm_with_memory,0,19,5,8,0,10,350,30.0,1.382176
4,basic_llm_with_memory,1,54,8,10,1,0,350,1.0,2.517573
5,basic_llm_with_memory,2,79,8,13,0,0,350,1.0,1.628464
6,basic_llm_with_memory_chatgpt,0,96,5,13,1,0,350,1.0,0.255709
7,basic_llm_with_memory_chatgpt,1,74,7,16,1,0,350,1.0,0.261063
8,basic_llm_with_memory_chatgpt,2,92,8,11,1,0,350,1.0,0.271224
9,memory_and_provided_commands,0,98,8,53,5,10,350,6.0,0.156347


In [22]:
long_calls = {
    'agent': agent
}

full_results = []
for name, func in long_calls.items():
    results = func()
    full_results.append({'name': name, **results})

long_df = pd.DataFrame(full_results)
long_df

Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


Thinking:  We need to proceed with the game. The user hasn't provided a command yet. They provided the game description: "West of House". We need to choose an action. Typically, in Zork, to start you might "look" or "north" or "east". The user hasn't given a command; maybe they want us to play the game? The instructions say we must choose an action that will make progress. So we should pick a likely starting action: "look" to see details. But "look" is default; but we can also "go east" to go to the house. The typical first action is to "go east". Let's see what is at east: "You are standing in an open field west of a white house, with a boarded front door. There is a small mailbox here." So moving east leads 

KeyboardInterrupt: 

In [None]:
df = pd.concat([df, long_df])
df

Unnamed: 0,name,iter,unique_rooms,unique_items,unique_hashes,score,max_score,avg_retries,avg_generate_time
0,basic_llm,0.0,5,0,7,0,350,100+,1.886766
1,basic_llm,1.0,5,0,8,0,350,100+,1.689031
2,basic_llm,2.0,5,0,7,0,350,100+,1.691821
3,basic_llm_with_memory,0.0,1,0,1,0,350,100+,6.023718
4,basic_llm_with_memory,1.0,2,0,2,0,350,100+,4.256784
5,basic_llm_with_memory,2.0,2,0,2,0,350,100+,10.554098
6,basic_llm_with_memory_chatgpt,0.0,1,0,2,0,350,100+,0.315102
7,basic_llm_with_memory_chatgpt,1.0,1,0,1,0,350,100+,0.224428
8,basic_llm_with_memory_chatgpt,2.0,1,0,1,0,350,100+,0.286968
9,memory_analyze_then_command,0.0,3,0,3,0,350,100+,2.415618


In [None]:
df.to_csv(os.path.join(output_dir, 'results.csv'))