In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
import sys
import gym
import warnings
import babyai_text
import ollama

In [5]:
# Maps BabyAI actions to text:
action_to_text = {
    0: "turn left",
    1: "turn right",
    2: "go forward",
    3: "pick up",
    4: "drop",
    5: "toggle",
    6: "done",
}
text_to_action = {v: k for k, v in action_to_text.items()}

In [18]:
# Helper functions
def get_instructions(goal: str):
    return f"""
You are an agent playing a simple navigation game. Your goal is to **{goal}**. The following are the possible actions you can take in the game, followed by a short description of each action:

turn left: turn to the left,
turn right: turn to the right,
go forward: take one step forward,
pick up: pick up the object below you,
drop: drop the object that you are holding,
toggle: manipulate the object in front of you.

In a moment I will present you an observation.

Tips:
- Once the desired object you want to interact or pickup in front of you, you can use the 'toggle' action to interact with it.
- It doesn't make sense to repeat the same action over and over if the observation doesn't change.

PLAY!"""

def parse_observation(obs, info) -> str:
    """
    Returns a string with the textual descriptions stored in info['descriptions']
    """
    out = ""
    for description in info["descriptions"]:
        out += description + "\n"
    return out


def parse_action(action: str) -> int:
    """
    Returns the action ID corresponding to the given action text, or None if the action is invalid.
    """
    return text_to_action.get(action, None)

invalid_action_message = "Invalid action, the valid actions are: " + ", ".join(action_to_text.values()) + ".\n"
invalid_action_message += "Please output one of the above actions and nothing else."
print(invalid_action_message)

Invalid action, the valid actions are: turn left, turn right, go forward, pick up, drop, toggle, done.
Please output one of the above actions and nothing else.


In [20]:
# Set-up
env = gym.make("BabyAI-GoToObj-v0")
model = "llama3.2"

In [60]:
obs, info = env.reset()
goal = obs["mission"]
done = False
instructions = get_instructions(goal)
obs_text = parse_observation(obs, info)
messages = [
{"role": "system", "content": instructions},
    {"role": "user", "content": obs_text},
]
print(instructions)
print(obs_text)

while not done:
    
    # get action from LLM
    action_text = None
    while action_text not in text_to_action:
        if action_text is not None:
            messages.append({"role": "user", "content": invalid_action_message})
            print(invalid_action_message)
        response = ollama.chat(model, messages)
        action_text = response.message.content.lower().strip()
        messages.append({"role": "assistant", "content": action_text})
        print(action_text)
    
    # apply action
    action = text_to_action[action_text]
    obs, reward, done, info = env.step(action)
    obs_text = parse_observation(obs, info)
    messages.append({"role": "user", "content": obs_text})
    print(obs_text)
    
    if done:
        print("Done!")
        print(f"Reward: {reward}")
        break


You are an agent playing a simple navigation game. Your goal is to **go to the blue key**. The following are the possible actions you can take in the game, followed by a short description of each action:

turn left: turn to the left,
turn right: turn to the right,
go forward: take one step forward,
pick up: pick up the object below you,
drop: drop the object that you are holding,
toggle: manipulate the object in front of you.

In a moment I will present you an observation.

Tips:
- Once the desired object you want to interact or pickup in front of you, you can use the 'toggle' action to interact with it.
- It doesn't make sense to repeat the same action over and over if the observation doesn't change.

PLAY!
You see a wall 1 step forward
You see a wall 3 steps left
You see a blue key 2 steps right

i'll start by going forward, trying to get away from the wall. 

go forward
Invalid action, the valid actions are: turn left, turn right, go forward, pick up, drop, toggle, done.
Please out

## Observations
- Small models (Like SmolLM2-135M-Instruct and DistilGPT2) are not able to follow instructions well at all. It is difficult to get them to even respond with a valid action. It is likely that they won't be able to reason effectively either.
- A larger model llama3.2:3b is able to complete the game sometimes.