In [1]:
import sys
import time

import ollama
import jericho
import re

In [4]:
response = ollama.generate(model='llama3.2:1b', prompt='What is a large language model?')
print(response)

KeyboardInterrupt: 

In [2]:
GAMES_DIR = "z-machine-games-master/jericho-game-suite"
game = 'zork1.z5'
env = jericho.FrotzEnv(f"{GAMES_DIR}/{game}")

# System Prompts

In [None]:
# Basic
system_prompt = (
    'You are a smart video game enthusiast who is skilled at playing old-school text adventure games. Given a description of your environment, explain your thought process and then give a command that is compatible with the game you are playing. Always put the command on its own line at the end of your response with nothing else. It needs to be easy and consistent to read with simple python'
    'This game does not run on an LLM, so it only recognizes a small vocabulary of commands. An example of a command is exactly the string "go west". This means you should only give exactly the command that the game recognizes '
)

In [None]:
def basic_llm(max_retry=5, out_file=sys.stdout):
    original_stdout = sys.stdout
    sys.stdout = out_file

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    print("Playing game. 'q' to quit")

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    done = False
    total_steps = 0
    retries = 0
    retries_per_score = []
    generate_times = []
    prev_score = -1
    while not done and retries <= max_retry:

        prompt = make_prompt(f'Game prompt:\n{game_response}')
        start = time.time()
        response = ollama.generate(model='llama3.2:1b', prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM Response: {response}')
        player_in = response.splitlines()[-1].strip()
        print(player_in)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        print(game_response)

        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']
        total_steps += 1
    
    print('Scored', info['score'], 'out of', env.get_max_score())

    sys.stdout = original_stdout
    return {
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [12]:
basic_llm()

Playing game. 'q' to quit
Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


[r0] LLM Response: I'm currently standing in an open field west of the white house, with a boarded front door. The field stretches out in both directions, with no visible obstacles or hazards. I see a small mailbox at the far end of the field.

My thought process: Considering my current location and the fact that there is a mailbox, it's likely that my goal is to retrieve something from the mailbox. Given the layout of the scene, I can think of several possible directions to take:

* I could try to open the boarded front door (if it's not locked).
* I could search for another way into or out of the house.
* I could investigate the mailbox more closely.

My command: go north
My command

{'score': 0,
 'max_score': 350,
 'avg_retries': 1.0,
 'avg_generate_time': 31.602583374295914}

In [None]:
# Basic with memory
system_prompt = (
    'You are a smart video game enthusiast who is skilled at playing old-school text adventure games. Given a description of your environment, explain your thought process and then give a command that is compatible with the game you are playing. Always put the command on its own line at the end of your response with nothing else. It needs to be easy and consistent to read with simple python'
    'At the beginning of your prompt, you will also receive up to 5 of the most recent interactions you\'ve had with the game'
    'This game does not run on an LLM, so it only recognizes a set vocabulary of commands. An example of a command is exactly the string "go west". This means you should only give exactly the command that the game recognizes '
)

In [None]:
def basic_llm_with_memory(memory_size=5, max_retry=5, out_file=sys.stdout, system_prompt=system_prompt):
    original_stdout = sys.stdout
    sys.stdout = out_file

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    print("Playing game. 'q' to quit")

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    memory = []

    done = False
    total_steps = 0
    retries = 0
    retries_per_score = []
    generate_times = []
    prev_score = 0
    while not done and retries <= max_retry:

        prompt = make_prompt(f'Game prompt:\n{game_response}')
        combined_memory = "\n".join(memory)
        prompt_with_memory = f'{combined_memory}\n\n{prompt}'
        start = time.time()
        response = ollama.generate(model='llama3.2:1b', prompt=prompt_with_memory).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM Response: {response}')
        player_in = response.splitlines()[-1].strip()

        memory.append(f'{prompt}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)

        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']
        total_steps += 1
    
    print('Scored', info['score'], 'out of', env.get_max_score())

    sys.stdout = original_stdout
    return {
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [9]:
with open('./basic_llm_with_memory.out', 'w') as f:
    print(basic_llm_with_memory(out_file=f, max_retry=30))

{'score': 0, 'max_score': 350, 'avg_retries': 0, 'avg_generate_time': 7.857473757959181}


In [None]:
# ChatGPT Provided. Is it better?
system_prompt = (
    "You are a player of a classic parser-based interactive fiction game."
    "Respond only with a single imperative command in plain lowercase (e.g., “look”, “go east”, “get key”)."
    "Do not write sentences, explanations, strategies, or narratives."
    "Use only standard text-adventure verbs: look, examine, go, take, drop, open, close, use, talk to, attack, inventory, etc."
    "Act rationally based on the game’s last description."
    "If confused, issue “look”."
    "Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward"
)

In [None]:
with open('./basic_llm_with_memory.out', 'w') as f:
    print(basic_llm_with_memory(out_file=f, max_retry=30))

The chatgpt given prompt was ineffective. It tended to repeat commands over and over again, even when we provided the memory of the command failing

In [3]:
# Basic with memory
system_prompt = (
    f'You are a smart video game enthusiast who is skilled at playing old-school text adventure games. You are playing {game}'
)

In [4]:
def memory_analyze_then_command(memory_size=5, max_retry=5, out_file=sys.stdout, model='llama3.2:1b'):
    original_stdout = sys.stdout
    sys.stdout = out_file

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    print("Playing game. 'q' to quit")

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    memory = []

    done = False
    total_steps = 0
    retries = 0
    retries_per_score = []
    generate_times = []
    prev_score = 0
    while not done and retries <= max_retry:

        combined_memory = "\n".join(memory)
        prompt = make_prompt(f'Analyze the following sequence of game environment descriptions or responses and actions already taken by you, the player. Concisely describe the current state of the game and a potential action to take to move forward.\n\n{combined_memory}\n\nGame text:\n{game_response}')
        start = time.time()
        response = ollama.generate(model=model, prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM description: {response}')

        prompt = make_prompt(f'Given your analysis of the game state, issue a rational action to take to progress in the game. Respond only with a single imperative command in plain lowercase. Use only standard text-adventure verbs. IMPORTANT: Your response will be used directly as input to the game. Minimize the number of words you use.\n\nYour analysis:\n{response}')
        start = time.time()
        response = ollama.generate(model=model, prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM action: {response}')
        response = response.removeprefix('type').strip() # Give it a shot. Keeps saying type! TODO: Probably remove. Bandaid

        player_in = response.splitlines()[-1].strip()

        memory.append(f'{game_response}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)

        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']
        total_steps += 1
    
    print('Scored', info['score'], 'out of', env.get_max_score())

    sys.stdout = original_stdout
    return {
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [5]:
memory_analyze_then_command(max_retry=5, model='gpt-oss:20b')

Playing game. 'q' to quit
Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


[r0] LLM description: **Current state:**  
- You are standing in an open field west of a white house.  
- The front door of the house is boarded up.  
- There is a small mailbox on the ground.

**Suggested next move:**  
Open or examine the mailbox: `open mailbox` or `look mailbox`. This is a classic first step in Zork to retrieve a letter that can give you a clue or key. If you prefer to approach the house, you could also try `go east` or `enter house`, but the mailbox is a more straightforward path to progress.
[r0] LLM action: open mailbox
Received command: open mailbox
Opening the small mailbox reveals a leaflet.


[r1] LLM description: You’re still standing in the open field west

{'score': 0,
 'max_score': 350,
 'avg_retries': 0,
 'avg_generate_time': 60.48966830968857}

gpt-oss did much better than llama. Could be because of the number of parameters. Could also be that gpt-oss has seen zork in its training data. Or maybe it's really better at it? I think that's less likely.

In [35]:
system_prompt = (
    f'You are a smart video game enthusiast who is skilled at playing old-school text adventure games. You are playing {game}'
    'Respond only with a single imperative command in plain lowercase from the list of possible actions below.'
    'Do not write sentences, explanations, strategies, or narratives.'
    'Act rationally based on the game’s last description.'
    "Notice that the memory you receive contains commands you have issued in the past. Don't repeat commands that won't help you move forward"
)

In [36]:
def memory_and_provided_commands(memory_size=5, max_retry=5, out_file=sys.stdout, system_prompt=system_prompt):
    original_stdout = sys.stdout
    sys.stdout = out_file

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    print("Playing game. 'q' to quit")

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    memory = []

    done = False
    total_steps = 0
    retries = 0
    retries_per_score = []
    generate_times = []
    prev_score = 0
    while not done and retries <= max_retry:

        combined_memory = "\n".join(memory)
        actions = ', '.join(env.get_valid_actions())
        prompt = make_prompt(f'{combined_memory}\n\nGame text:\n{game_response}\n\nValid actions: {actions}')
        start = time.time()
        response = ollama.generate(model='llama3.2:1b', prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM Response: {response}')
        player_in = response.splitlines()[-1].strip()

        memory.append(f'{game_response}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)

        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']
        total_steps += 1
    
    print('Scored', info['score'], 'out of', env.get_max_score())

    sys.stdout = original_stdout
    return {
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [38]:
memory_and_provided_commands(max_retry=30)

Playing game. 'q' to quit
Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


[r0] LLM Response: open mailbox
Received command: open mailbox
Opening the small mailbox reveals a leaflet.


[r1] LLM Response: take leaflet
Received command: take leaflet
Taken.


[r2] LLM Response: put down leaflet
Received command: put down leaflet
Dropped.


[r3] LLM Response: close mailbox
Received command: close mailbox
Closed.


[r4] LLM Response: open door
Received command: open door
The door cannot be opened.


[r5] LLM Response: north
Received command: north
North of House
You are facing the north side of a white house. There is no door here, and all the windows are boarded up. To the north a narrow path winds through the trees.


[r6] LLM Response: open door
Received comma

{'score': 0,
 'max_score': 350,
 'avg_retries': 0,
 'avg_generate_time': 1.5271822021853538}

This certainly improved it's commands, but not for actually completing the game. Maybe having an analysis step could help. It does not have a sense of curiosity

In [7]:
def memory_analyze_provided_commands(memory_size=5, max_retry=5, out_file=sys.stdout, model='llama3.2:1b'):
    original_stdout = sys.stdout
    sys.stdout = out_file

    make_prompt = lambda x: f'{system_prompt}\n\n{x}'

    print("Playing game. 'q' to quit")

    initial_observation, info = env.reset()
    print(initial_observation)

    game_response = initial_observation

    memory = []

    done = False
    total_steps = 0
    retries = 0
    retries_per_score = []
    generate_times = []
    prev_score = 0
    while not done and retries <= max_retry:

        combined_memory = "\n".join(memory)
        prompt = make_prompt(f'Analyze the following sequence of game environment descriptions or responses and actions already taken by you, the player. Concisely describe the current state of the game and a potential action to take to move forward.\n\n{combined_memory}\n\nGame text:\n{game_response}')
        start = time.time()
        response = ollama.generate(model=model, prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM description: {response}')

        actions = ', '.join(env.get_valid_actions())
        prompt = make_prompt(f'Given your analysis of the game state, issue a rational action to take to progress in the game. Respond only with a single imperative command in plain lowercase. Use only standard text-adventure verbs. IMPORTANT: Your response will be used directly as input to the game. Minimize the number of words you use.\n\nYour analysis:\n{response}\n\nValid actions: {actions}')
        start = time.time()
        response = ollama.generate(model=model, prompt=prompt).response
        generate_times.append(time.time() - start)
        print(f'[r{retries}] LLM action: {response}')
        response = response.removeprefix('type').strip() # Give it a shot. Keeps saying type! TODO: Probably remove. Bandaid

        player_in = response.splitlines()[-1].strip()

        memory.append(f'{game_response}\n{response}')
        if len(memory) > memory_size:
            memory.pop(0)

        # Take an action in the environment using the step fuction.
        # The resulting text-observation, reward, and game-over indicator is returned.
        game_response, reward, done, info = env.step(player_in)
        game_response = f'Received command: {player_in}\n{game_response}' # Add text the game received so the LLM can hopefully improve it's formatting
        print(game_response)

        retries += 1
        if info['score'] != prev_score: # If score changes, major step has been made so number of retries resets
            retries_per_score.append(retries)
            retries = 0
        prev_score = info['score']
        total_steps += 1
    
    print('Scored', info['score'], 'out of', env.get_max_score())

    sys.stdout = original_stdout
    return {
        'score': info['score'],
        'max_score': env.get_max_score(),
        'avg_retries': sum(retries_per_score) / len(retries_per_score) if len(retries_per_score) != 0 else 0,
        'avg_generate_time': sum(generate_times) / len(generate_times)
    }

In [8]:
memory_analyze_provided_commands(max_retry=30)

Playing game. 'q' to quit
Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.




ResponseError: model 'llama3.2:1b' not found (status code: 404)