In [1]:
import sys
sys.path.append('..')

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
from environments.ALFWorldEnvironment import ALFWorldEnvironment
from agents.alfworld_llm_policy import ALFWorldLLMPolicyAgent
from agents.alfworld_llmzero import ALFWorldLLMZeroAgent

  from tqdm.autonotebook import tqdm, trange


# LLM Policy

In [4]:
env = ALFWorldEnvironment(config_path='../configs/alfworld_env.yaml')
agent = ALFWorldLLMPolicyAgent(env, device="cuda", debug=False, env_params={ "system_prompt_path": "../prompts/prompt_alfworld_policy.txt" }, save_buffer_interval=100)

Initializing AlfredTWEnv...


100%|██████████| 1/1 [00:00<00:00, 646.27it/s]

Overall we have 1 games in split=train
Training with 1 games





In [5]:
state, _ = env.reset()

valid_actions_text = env.get_valid_actions_text(state)
state_text = env.state_to_text(state)
print(state_text)
print(valid_actions_text)

-= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a armchair 1, a bed 1, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a dresser 1, a garbagecan 1, a sidetable 2, and a sidetable 1.

Your task is to: put a cellphone in bed.
['go to armchair 1', 'go to bed 1', 'go to cabinet 1', 'go to cabinet 2', 'go to cabinet 3', 'go to cabinet 4', 'go to drawer 1', 'go to drawer 2', 'go to drawer 3', 'go to drawer 4', 'go to drawer 5', 'go to drawer 6', 'go to dresser 1', 'go to garbagecan 1', 'go to sidetable 1', 'go to sidetable 2', 'inventory', 'look']


In [6]:
dist = agent.get_action_distribution(state)
print(valid_actions_text)
print(dist)
print(valid_actions_text[np.argmax(dist)])

['go to armchair 1', 'go to bed 1', 'go to cabinet 1', 'go to cabinet 2', 'go to cabinet 3', 'go to cabinet 4', 'go to drawer 1', 'go to drawer 2', 'go to drawer 3', 'go to drawer 4', 'go to drawer 5', 'go to drawer 6', 'go to dresser 1', 'go to garbagecan 1', 'go to sidetable 1', 'go to sidetable 2', 'inventory', 'look']
[2.1824786e-02 2.4233224e-02 1.6378373e-02 1.2040823e-02 8.2198270e-03
 5.6722960e-03 1.8627536e-02 1.5834114e-02 1.2858202e-02 1.1313704e-02
 9.4612231e-03 7.4650673e-03 2.1822326e-02 8.6408770e-03 4.2228645e-01
 3.8256669e-01 1.0908217e-04 6.4548216e-04]
go to sidetable 1


In [9]:
state, _, _, _, _ = env.step('open drawer 2')
valid_actions_text = env.get_valid_actions_text(state)
state_text = env.state_to_text(state)
print(state_text)
print(valid_actions_text)

You open the drawer 2. The drawer 2 is open. In it, you see a creditcard 2.
['close drawer 2', 'examine drawer 2', 'go to bed 1', 'go to desk 1', 'go to drawer 1', 'go to drawer 3', 'go to garbagecan 1', 'go to safe 1', 'go to shelf 1', 'go to shelf 2', 'go to shelf 3', 'go to shelf 4', 'go to shelf 5', 'go to sidetable 1', 'go to sidetable 2', 'inventory', 'look', 'take creditcard 2 from drawer 2']


# LLM Zero

In [64]:
env = ALFWorldEnvironment(config_path='../configs/alfworld_env.yaml')
cfg = {
        "llm_policy": {
            "env_params": {
                "system_prompt_path": "../prompts/prompt_alfworld_policy.txt",
                "extract_action_regex": r"optimal action: (.*)",
            },
            "load_prompt_buffer_path": None, # update this path to the path of the saved prompt buffer
            "prompt_buffer_prefix": "prompt_buffer/alfworld_policy",
            "save_buffer_interval": 100,
            "overwrite_prompt_buffer": False
        } ,
        "llm_transition": {
            "env_params": {
                "system_prompt_path": "../prompts/prompt_alfworld_transition.txt",
                "extract_state_regex": r"next state:(.*?)```",
                "extract_state_regex_fallback": [r"next state:(.*)", r"```plaintext(.*)```", r"\*\*Next State\*\*:\n(.*)"],
                "extract_action_regex": r"valid actions:\n?(.*?)\n?```",
                "extract_action_regex_fallback": [r"valid actions:\n?(.*)", r"```plaintext(.*)```", r"\*\*Valid Actions\*\*:\n(.*)"],
            },
            "load_prompt_buffer_path": None, # update this path to the path of the saved prompt buffer
            "prompt_buffer_prefix": "prompt_buffer/alfworld_transition",
            "save_buffer_interval": 100,
            "overwrite_prompt_buffer": False
        },
        "llm_reward": {
            "env_params": {
                "system_prompt_path": "../prompts/prompt_alfworld_reward.txt",
                "extract_reward_regex": r"TOTAL_REWARD_FINAL = (-?\d+)", # only use the first match, same line
                "extract_reward_regex_fallback": [],
                "extract_done_regex": r"done: (.*)",
                "extract_done_regex_fallback": [r"done: (.*)"],
            },
            "load_prompt_buffer_path": None, # update this path to the path of the saved prompt buffer
            "prompt_buffer_prefix": "prompt_buffer/alfworld_reward",
            "save_buffer_interval": 100,
            "overwrite_prompt_buffer": False
        },
        "llm_value": {
            "env_params": {
                "system_prompt_path": "../prompts/prompt_alfworld_value.txt",
                "extract_value_regex": r"\\boxed\{(-?\d*\.?\d+)\}",
                "extract_value_regex_fallback": [],
            },
            "load_prompt_buffer_path": None, # update this path to the path of the saved prompt buffer
            "prompt_buffer_prefix": "prompt_buffer/alfworld_value",
            "save_buffer_interval": 100,
            "overwrite_prompt_buffer": False
        }
}
llmzero = ALFWorldLLMZeroAgent(env, cfg=cfg)

Initializing AlfredTWEnv...


100%|██████████| 1/1 [00:00<00:00, 697.77it/s]

Overall we have 1 games in split=train
Training with 1 games





Error saving prompt buffer: [Errno 2] No such file or directory: 'prompt_buffer/alfworld_transition_20241122_045541.pkl'
Prompt buffer saved to prompt_buffer/alfworld_transition_20241122_045541.pkl


### Value

In [115]:
state, _ = env.reset()

In [116]:
valid_actions_text = env.get_valid_actions_text(state)
state_text = env.state_to_text(state)
print(state_text)
print(valid_actions_text)

-= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a bathtubbasin 1, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a countertop 1, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a dresser 1, a garbagecan 1, a handtowelholder 1, a sinkbasin 2, a sinkbasin 1, a toilet 1, a toiletpaperhanger 1, and a towelholder 1.

Your task is to: put a candle in toilet.
['go to bathtubbasin 1', 'go to cabinet 1', 'go to cabinet 2', 'go to cabinet 3', 'go to cabinet 4', 'go to countertop 1', 'go to drawer 1', 'go to drawer 2', 'go to drawer 3', 'go to drawer 4', 'go to dresser 1', 'go to garbagecan 1', 'go to handtowelholder 1', 'go to sinkbasin 1', 'go to sinkbasin 2', 'go to toilet 1', 'go to toiletpaperhanger 1', 'go to towelholder 1', 'inventory', 'look']


In [121]:
state_history, action_history = env.get_state_and_action_history()
value, _ = llmzero.value_model.get_value(state, state_history, action_history)
print(value)

--------------------------------------

State: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a bathtubbasin 1, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a countertop 1, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a dresser 1, a garbagecan 1, a handtowelholder 1, a sinkbasin 2, a sinkbasin 1, a toilet 1, a toiletpaperhanger 1, and a towelholder 1.

Your task is to: put a candle in toilet.
Action: go to cabinet 4


State: You arrive at loc 10. The cabinet 4 is closed.
Action: go to dresser 1


State: You arrive at loc 20. On the dresser 1, you see a spraybottle 1.
Action: go to cabinet 2


State: You arrive at loc 12. The cabinet 2 is closed.
Action: open cabinet 2


State: You open the cabinet 2. The cabinet 2 is open. In it, you see a soapbar 2.


**Steps**:
1. The task is to put a candle in the toilet. The goal is not reached as the object has not been placed yet. Move to step 2.
2. The target object (candle) ha

In [120]:
state, _, _, _, _ = env.step('open cabinet 2')
valid_actions_text = env.get_valid_actions_text(state)
state_text = env.state_to_text(state)
print(state_text)
print(valid_actions_text)

You open the cabinet 2. The cabinet 2 is open. In it, you see a soapbar 2.
['close cabinet 2', 'examine cabinet 2', 'examine garbagecan 1', 'go to bathtubbasin 1', 'go to cabinet 1', 'go to cabinet 3', 'go to cabinet 4', 'go to countertop 1', 'go to drawer 1', 'go to drawer 2', 'go to drawer 3', 'go to drawer 4', 'go to dresser 1', 'go to handtowelholder 1', 'go to sinkbasin 1', 'go to sinkbasin 2', 'go to toilet 1', 'go to toiletpaperhanger 1', 'go to towelholder 1', 'inventory', 'look', 'take soapbar 2 from cabinet 2', 'take soapbottle 2 from garbagecan 1', 'take toiletpaper 2 from garbagecan 1']


### Transition

In [57]:
state, _ = env.reset()

In [58]:
valid_actions_text = env.get_valid_actions_text(state)
state_text = env.state_to_text(state)
print(state_text)
print(valid_actions_text)

-= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a armchair 1, a bed 1, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a dresser 1, a garbagecan 1, a sidetable 2, and a sidetable 1.

Your task is to: put a cellphone in bed.
['go to armchair 1', 'go to bed 1', 'go to cabinet 1', 'go to cabinet 2', 'go to cabinet 3', 'go to cabinet 4', 'go to drawer 1', 'go to drawer 2', 'go to drawer 3', 'go to drawer 4', 'go to drawer 5', 'go to drawer 6', 'go to dresser 1', 'go to garbagecan 1', 'go to sidetable 1', 'go to sidetable 2', 'inventory', 'look']


In [59]:
action = 'go to drawer 6'
state, _, _, _, _ = env.step(action)

valid_actions_text = env.get_valid_actions_text(state)
state_text = env.state_to_text(state)
print(state_text)
print(valid_actions_text)

You arrive at loc 1. On the drawer 6, you see nothing.
['examine drawer 6', 'go to armchair 1', 'go to bed 1', 'go to cabinet 1', 'go to cabinet 2', 'go to cabinet 3', 'go to cabinet 4', 'go to drawer 1', 'go to drawer 2', 'go to drawer 3', 'go to drawer 4', 'go to drawer 5', 'go to dresser 1', 'go to garbagecan 1', 'go to sidetable 1', 'go to sidetable 2', 'inventory', 'look']


In [60]:
state

{'text_state': 'You arrive at loc 1. On the drawer 6, you see nothing.',
 'valid_actions': ['examine drawer 6',
  'go to armchair 1',
  'go to bed 1',
  'go to cabinet 1',
  'go to cabinet 2',
  'go to cabinet 3',
  'go to cabinet 4',
  'go to drawer 1',
  'go to drawer 2',
  'go to drawer 3',
  'go to drawer 4',
  'go to drawer 5',
  'go to dresser 1',
  'go to garbagecan 1',
  'go to sidetable 1',
  'go to sidetable 2',
  'inventory',
  'look']}

In [61]:
pred_state = {'text_state': 'You arrive at loc 1. On the drawer 6, you see nothing.',
 'valid_actions': ['examine drawer 6',
  'go to armchair 1',
  'go to bed 1',
  'go to cabinet 1',
  'go to cabinet 2',
  'go to cabinet 3',
  'go to cabinet 4',
  'go to drawer 1',
  'go to drawer 2',
  'go to drawer 3',
  'go to drawer 4',
  'go to drawer 5',
  'go to dresser 1',
  'go to garbagecan 1',
  'go to sidetable 1',
  'go to sidetable 2',
  'inventory',
  'look']}
pred_action = 'examine drawer 6'

In [62]:
def append_state_action_to_prompt(prompt, s, a):
    prompt += "\n**State**: "
    prompt += s['text_state']
    prompt += "\n**Valid actions**: "
    prompt += ', '.join(s['valid_actions'])
    prompt += "\n**Action**: "
    prompt += a
    prompt += "\n\n"
    return prompt

# construct user prompt
state_history, action_history = env.get_state_and_action_history()
user_prompt = ""
for s_h, a_h in zip(state_history, action_history):
    user_prompt = append_state_action_to_prompt(user_prompt, s_h, a_h)
user_prompt = append_state_action_to_prompt(user_prompt, pred_state, pred_action)
print(user_prompt)


**State**: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a armchair 1, a bed 1, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a dresser 1, a garbagecan 1, a sidetable 2, and a sidetable 1.

Your task is to: put a cellphone in bed.
**Valid actions**: go to armchair 1, go to bed 1, go to cabinet 1, go to cabinet 2, go to cabinet 3, go to cabinet 4, go to drawer 1, go to drawer 2, go to drawer 3, go to drawer 4, go to drawer 5, go to drawer 6, go to dresser 1, go to garbagecan 1, go to sidetable 1, go to sidetable 2, inventory, look
**Action**: go to drawer 6


**State**: You arrive at loc 1. On the drawer 6, you see nothing.
**Valid actions**: examine drawer 6, go to armchair 1, go to bed 1, go to cabinet 1, go to cabinet 2, go to cabinet 3, go to cabinet 4, go to drawer 1, go to drawer 2, go to drawer 3, go to drawer 4, go to drawer 5, go to dresser 

In [63]:
# Prediction
llm_state, _ = llmzero.transition_model.get_next_state(pred_state, pred_action, state_history, action_history)
print(llm_state)

{'text_state': ' On the drawer 6, you see nothing.\n', 'valid_actions': ['examine drawer 6', 'go to armchair 1', 'go to bed 1', 'go to cabinet 1', 'go to cabinet 2', 'go to cabinet 3', 'go to cabinet 4', 'go to drawer 1', 'go to drawer 2', 'go to drawer 3', 'go to drawer 4', 'go to drawer 5', 'go to dresser 1', 'go to garbagecan 1', 'go to sidetable 1', 'go to sidetable 2', 'inventory', 'look']}


In [145]:
valid_actions_text = env.get_valid_actions_text(llm_state)
state_text = env.state_to_text(llm_state)
print(state_text)
print(valid_actions_text)

 You open the cabinet 2. The cabinet 2 is open. In it, you see a soapbottle 2, a candle 1, and a sponge 1.

['close cabinet 2', 'examine cabinet 2', 'go to bathtubbasin 1', 'go to cabinet 1', 'go to cabinet 3', 'go to cabinet 4', 'go to countertop 1', 'go to drawer 1', 'go to drawer 2', 'go to drawer 3', 'go to drawer 4', 'go to dresser 1', 'go to handtowelholder 1', 'go to sinkbasin 1', 'go to sinkbasin 2', 'go to toilet 1', 'go to toiletpaperhanger 1', 'go to towelholder 1', 'inventory', 'look', 'take soapbottle 2 from cabinet 2', 'take candle 1 from cabinet 2', 'take sponge 1 from cabinet 2']


In [35]:
# Actual Value
actual_state, _, _, _, _ = env.step(pred_action)

valid_actions_text = env.get_valid_actions_text(actual_state)
state_text = env.state_to_text(actual_state)
print(state_text)
print(valid_actions_text)

The drawer 2 is closed.
['examine drawer 2', 'go to armchair 1', 'go to bed 1', 'go to cabinet 1', 'go to cabinet 2', 'go to cabinet 3', 'go to cabinet 4', 'go to drawer 1', 'go to drawer 3', 'go to drawer 4', 'go to drawer 5', 'go to drawer 6', 'go to dresser 1', 'go to garbagecan 1', 'go to sidetable 1', 'go to sidetable 2', 'inventory', 'look', 'open drawer 2']


### Reward

In [34]:
state, _ = env.reset()
valid_actions_text = env.get_valid_actions_text(state)
state_text = env.state_to_text(state)
print(state_text)
print(valid_actions_text)

-= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a bed 1, a desk 1, a drawer 2, a drawer 1, a garbagecan 1, a shelf 8, a shelf 7, a shelf 6, a shelf 5, a shelf 4, a shelf 3, a shelf 2, a shelf 1, and a sidetable 1.

Your task is to: put a book in bed.
['go to bed 1', 'go to desk 1', 'go to drawer 1', 'go to drawer 2', 'go to garbagecan 1', 'go to shelf 1', 'go to shelf 2', 'go to shelf 3', 'go to shelf 4', 'go to shelf 5', 'go to shelf 6', 'go to shelf 7', 'go to shelf 8', 'go to sidetable 1', 'inventory', 'look']


In [42]:
action = 'go to desk 1'
state, _, _, _, _ = env.step(action)

valid_actions_text = env.get_valid_actions_text(state)
state_text = env.state_to_text(state)
print(state_text)
print(valid_actions_text)

You arrive at loc 15. On the desk 1, you see a book 1.
['examine desk 1', 'go to bed 1', 'go to drawer 1', 'go to drawer 2', 'go to garbagecan 1', 'go to shelf 1', 'go to shelf 2', 'go to shelf 3', 'go to shelf 4', 'go to shelf 5', 'go to shelf 6', 'go to shelf 7', 'go to shelf 8', 'go to sidetable 1', 'inventory', 'look', 'take book 1 from desk 1']


In [43]:
state_history, action_history = env.get_state_and_action_history()
reward, done, _ = llmzero.reward_model.get_reward_done(state, state_history, action_history)
# print(reward)
# print(done)


State: -= Welcome to TextWorld, ALFRED! =-

You are in the middle of a room. Looking quickly around you, you see a bed 1, a desk 1, a drawer 2, a drawer 1, a garbagecan 1, a shelf 8, a shelf 7, a shelf 6, a shelf 5, a shelf 4, a shelf 3, a shelf 2, a shelf 1, and a sidetable 1.

Your task is to: put a book in bed.
Action: go to desk 1

State: You arrive at loc 15. On the desk 1, you see a book 1.
Action: go to bed 1

State: You arrive at loc 14. On the bed 1, you see a laptop 1, a pillow 2, and a pillow 1.
Action: go to desk 1

State: You arrive at loc 15. On the desk 1, you see a book 1.
Action: go to desk 1

State: Nothing happens.
Action: go to bed 1

State: You arrive at loc 14. On the bed 1, you see a laptop 1, a pillow 2, and a pillow 1.
Action: go to desk 1

State: You arrive at loc 15. On the desk 1, you see a book 1.

**Steps**:
1. The goal is to put **book** in **bed**. The book is not in the bed. Goal not reached, IS_TERMINAL=False. Move to step 2.
2. We did not pick up the