In [1]:
#change working directory, for moduls of other package
import os
os.chdir(os.path.abspath('../../../00_src'))

In [2]:
import numpy as np
from agents.agent_ollama import AgentOllama
from environment.environment import SokobanEnvImpl
import environment.util as env_util
import environment.const as env_const
import environment.visualization as env_vis
from knowledge_graph.knowledge_graph import KnowledgeGraph
%matplotlib inline

In [None]:
attempt = 1
UP, DOWN, LEFT, RIGHT = env_const.UP, env_const.DOWN, env_const.LEFT, env_const.RIGHT
WALL, FLOOR, BOX_TARGET, BOX_ON_TARGET, BOX, PLAYER = env_const.WALL, env_const.FLOOR, env_const.BOX_TARGET, env_const.BOX_ON_TARGET, env_const.BOX, env_const.PLAYER
env = SokobanEnvImpl(use_default_env=True)
kg = KnowledgeGraph(env)

In [4]:
agent_player = AgentOllama("qwen2.5-coder:7b")
agent_player.prompt([("system", 
"""
You are an agent who plays the game Sokoban.
As input you get the game state and all possible actions.
As output you choose one of the possible action ["UP" "DOWN" "LEFT" "RIGHT"].
The game is finished, when every BOX is placed on top of a BOX_TARGET.
""")])

'Sure! Please provide me with the current game state and the list of possible actions so I can make my move.'

In [5]:
room_caption_map = {WALL:"WALL", FLOOR:"FLOOR", BOX_TARGET:"BOX_TARGET", BOX_ON_TARGET:"BOX_ON_TARGET", BOX:"BOX", PLAYER:"PLAYER"}
action_caption_map = {0: "WAIT", UP:"UP", DOWN:"DOWN", LEFT:"LEFT", RIGHT:"RIGHT"}
caption_action_map = {"UP":UP, "DOWN":DOWN, "LEFT":LEFT, "RIGHT":RIGHT}

def generate_prompt():
    game_state = np.vectorize(room_caption_map.get)(env.room_state)
    possible_actions = np.vectorize(action_caption_map.get)(kg.get_possible_actions())
    return [("human", "game state :\n{game_state}\npossible actions:\n{possible_actions}".format(game_state=game_state, possible_actions=possible_actions))]

def doStep(step:int) -> bool:
    observation, reward_last, done, info = env.step(step)
    kg.update()
    return done

In [None]:
done = False
trajectory = []

while not done:
    agent_answer = agent_player.prompt(generate_prompt())
    next_action = 0 # default do nothing
    for action_id in caption_action_map.keys():
        if action_id in agent_answer:
            next_action = caption_action_map.get(action_id)
    print(action_caption_map.get(next_action), end=" ")
    trajectory.append(next_action)
    done = doStep(next_action)

env.reset()

RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT LEFT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT RIGHT LEFT 

In [7]:
agent_player.chain

[('system',
  '\nYou are an agent who plays the game Sokoban.\nAs input you get the game state and all possible actions.\nAs output you choose one of the possible action ["UP" "DOWN" "LEFT" "RIGHT"].\nThe game is finished, when every BOX is placed on top of a BOX_TARGET.\n'),
 ('ai',
  'Sure! Please provide me with the current game state and the list of possible actions so I can make my move.'),
 ('human',
  "game state :\n[['WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL']\n ['WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL']\n ['WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL' 'WALL']\n ['WALL' 'WALL' 'WALL' 'FLOOR' 'FLOOR' 'WALL' 'FLOOR' 'WALL' 'WALL'\n  'WALL']\n ['WALL' 'FLOOR' 'FLOOR' 'BOX' 'PLAYER' 'BOX_TARGET' 'FLOOR' 'FLOOR'\n  'FLOOR' 'WALL']\n ['WALL' 'FLOOR' 'FLOOR' 'FLOOR' 'BOX_TARGET' 'BOX' 'FLOOR' 'BOX' 'FLOOR'\n  'WALL']\n ['WALL' 'FLOOR' 'FLOOR' 'FLOOR' 'WALL' 'FLOOR' 'FLOOR' 'BOX_TARGET'\n  'FLOOR' 'WALL']\n ['WALL

In [None]:
env_vis.animate(env=env, path=trajectory, save_ani="../03_resource/09_LLM_KG/output/default_env_{:02}_attempt_trajectory.gif".format(attempt), dpi=300)