In [1]:
import numpy as np
from gym.spaces.discrete import Discrete
from gym_sokoban.envs import SokobanEnv
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation, PillowWriter
from functools import partial
%matplotlib inline

In [2]:
class SokobanEnvFixated(SokobanEnv): 
    def __init__(self):
        SokobanEnv.__init__(self,
                            dim_room=(10, 10), 
                            max_steps=40, 
                            num_boxes=4, 
                            num_gen_steps=None, 
                            reset=True)
        self.action_space = Discrete(5) # limit to push actions

    def reset(self, second_player=False, render_mode='rgb_array'):

        self.room_fixed = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 1, 1, 0, 1, 0, 0, 0],
                                    [0, 1, 1, 1, 1, 2, 1, 1, 1, 0],
                                    [0, 1, 1, 1, 2, 1, 1, 1, 1, 0],
                                    [0, 1, 1, 1, 0, 1, 1, 2, 1, 0],
                                    [0, 2, 1, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
        self.room_state = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 1, 1, 0, 1, 0, 0, 0],
                                    [0, 1, 1, 4, 5, 2, 1, 1, 1, 0],
                                    [0, 1, 1, 1, 2, 4, 1, 4, 1, 0],
                                    [0, 1, 1, 1, 0, 1, 1, 2, 1, 0],
                                    [0, 2, 4, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
                                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
        self.box_mapping = {(6, 7): (5, 7), (5, 4): (5, 5), (4, 5): (4, 3), (7, 1): (7, 2)}

        self.player_position = np.argwhere(self.room_state == 5)[0]
        self.num_env_steps = 0
        self.reward_last = 0
        self.boxes_on_target = 0

        starting_observation = self.render(render_mode)
        return starting_observation # Close environment after testing

In [3]:
UP, DOWN, LEFT, RIGHT = 1, 2, 3, 4
directions = {UP:(0,-1), DOWN:(0,1), LEFT:(-1,0), RIGHT:(1,0)}
findPlayer = lambda room_state: next((x, y) for y, row in enumerate(room_state) for x, val in enumerate(row) if val == 5)

def take_step(env, path, step, fig, draw_arrows):
    if not draw_arrows:
        env.step(path[step])
        return
    
    # reset
    for text in fig.gca().texts:
        text.remove()
    env.reset()

    # Convert path to coordinates
    player_coordinates = []
    for action in path[0:step+1]:
        start = findPlayer(env.room_state)
        env.step(action)
        end = findPlayer(env.room_state)
        if start == end:
            dx = start[0] + directions.get(action)[0]
            dy = start[1] + directions.get(action)[1]
            end = (dx, dy)
        player_coordinates.append((start,end,action))

    # Draw Arrows
    mid_points = []
    arrow_scale = 1
    action_arrow_x = {UP:0, DOWN:0, LEFT:-arrow_scale, RIGHT:arrow_scale}
    action_arrow_y = {UP:-arrow_scale, DOWN:arrow_scale, LEFT:0, RIGHT:0}
    for coordinates in player_coordinates:
        start = coordinates[0]
        end = coordinates[1]
        action = coordinates[2]

        scale_begin = lambda a : a * 16
        scale = lambda a : scale_begin(a) + 8
        scale_d = lambda a,b : scale(a) - scale(b)
        scale_mid = lambda a,b: (scale(a) + scale(b)) / 2
        plt.arrow(
            scale(start[0]) + action_arrow_x.get(action), 
            scale(start[1]) + action_arrow_y.get(action),
            scale_d(end[0], start[0]) - (2*action_arrow_x.get(action)), 
            scale_d(end[1], start[1]) - (2*action_arrow_y.get(action)),
            head_width=2,
            head_length=2,
            fc='green',
            ec='green',
            length_includes_head=True
        )

        mid_points.append((scale_mid(end[0], start[0]), scale_mid(end[1], start[1])))

    # Add step numbers
    mid_index_dict = {}
    for step, mid in enumerate(mid_points):
        mid_index_dict.setdefault(mid, []).append(step) 
    for mid, indexes in mid_index_dict.items():
        limited = indexes[:4]
        parts = []

        for step, num in enumerate(limited):
            parts.append(str(num+1))
            if step % 2 == 1 and step != len(limited) - 1:
                parts.append('\n')
            elif step != len(limited) - 1:
                parts.append(', ')

        if len(indexes) > 4:
            parts.append('...')

        step_numbers = ''.join(parts)
        plt.text(mid[0], mid[1], step_numbers, color='white', fontsize=6,
                 ha='center', va='center', bbox=dict(boxstyle="round,pad=0.2", fc="black", ec="none", alpha=0.7))

def animate(frame, env, path, im, fig, draw_arrows=False):
    if frame==0:
        env.reset()
    else:
        take_step(env, path, frame-1, fig, draw_arrows)

    im.set_array(env.render("rgb_array"))
    return [im]

def animate_path(env, path, saveAs):
    """
    Renders the Sokoban environment and overlays the path with arrows and step numbers.

    Args:
        env: The Sokoban environment.
        path: List of actions [UP, DOWN, LEFT, RIGHT] representing the agent's trajectory.
        saveAs: Saves the animation with given name.
    """
    fig = plt.figure(figsize=(5, 5), dpi=300, frameon=False)
    ax = plt.Axes(fig, [0., 0., 1., 1.]) 
    ax.set_axis_off()
    fig.add_axes(ax)

    image = env.render("rgb_array")
    im = plt.imshow(image)
    plt.axis('off')
    
    ani = FuncAnimation(fig, partial(animate, env=env, path=path, im=im, fig=fig, draw_arrows=False), frames=len(path)+1)
    ani.save("./animations/gif/" + saveAs +".gif", dpi=300, writer=PillowWriter(fps=3), savefig_kwargs={"pad_inches":0})
    ani = FuncAnimation(fig, partial(animate, env=env, path=path, im=im, fig=fig, draw_arrows=True), frames=len(path)+1)
    ani.save("./animations/gif_trajectory/" + saveAs +".gif", dpi=300, writer=PillowWriter(fps=3), savefig_kwargs={"pad_inches":0})

    plt.close()

In [4]:
# Human attempt
env = SokobanEnvFixated()
path = [RIGHT, RIGHT, RIGHT, DOWN, LEFT, LEFT, UP, LEFT, UP, LEFT, DOWN, LEFT, DOWN, DOWN, RIGHT, UP, LEFT, UP, RIGHT, RIGHT, LEFT, LEFT, LEFT, DOWN, DOWN, DOWN, DOWN, RIGHT, UP, LEFT, UP, UP, RIGHT, RIGHT, DOWN, LEFT, UP, LEFT, DOWN]
animate_path(env, path, "SokobanHumanAttempt")

In [5]:
# ChatGPT-4o Instructionset 1
env = SokobanEnvFixated()
path = [RIGHT, DOWN, LEFT, UP, LEFT, DOWN, RIGHT, RIGHT, UP, LEFT, DOWN, LEFT, UP]
animate_path(env, path, "SokobanGPT4oSetAttempt1")

In [6]:
# ChatGPT-4o Instructionset 2
env = SokobanEnvFixated()
path = [RIGHT, DOWN, RIGHT, UP, LEFT, DOWN, LEFT, DOWN, RIGHT, RIGHT, UP, LEFT, LEFT, DOWN]
animate_path(env, path, "SokobanGPT4oSetAttempt2")

In [7]:
# ChatGPT-4o (Reasoning) Instructionset
env = SokobanEnvFixated()
path = [RIGHT, RIGHT, DOWN, LEFT, LEFT, UP, LEFT, UP]
animate_path(env, path, "SokobanGPT4oReasoningSetAttempt")

In [8]:
# ChatGPT-4o Step by Step
env = SokobanEnvFixated()
path = [RIGHT, DOWN, LEFT, DOWN, RIGHT, RIGHT, RIGHT, LEFT, LEFT, UP]
animate_path(env, path, "SokobanGPT4oStepAttempt")

In [9]:
# ChatGPT-4o (Reasoning) Step by Step
env = SokobanEnvFixated()
path = [LEFT, RIGHT, RIGHT, RIGHT, DOWN, LEFT, UP, RIGHT, RIGHT, DOWN, LEFT, UP, LEFT, UP, LEFT, UP, RIGHT, DOWN, DOWN, RIGHT, DOWN, LEFT, DOWN, RIGHT, UP, LEFT, UP, UP, UP, UP]
animate_path(env, path, "SokobanGPT4oReasoningStepAttempt")

In [10]:
# Deekseek-R1 Instructionset
env = SokobanEnvFixated()
path = [RIGHT, RIGHT, RIGHT, DOWN, LEFT, LEFT, UP, LEFT, RIGHT, DOWN, DOWN, LEFT, LEFT, LEFT, UP, UP, LEFT, DOWN, LEFT, UP]
animate_path(env, path, "SokobanDeepseekR1SetAttempt")

In [11]:
# Deekseek-R1 Step by Step
env = SokobanEnvFixated()
path = [RIGHT, RIGHT, DOWN, LEFT, UP, RIGHT, RIGHT, DOWN, LEFT, LEFT, LEFT, LEFT, RIGHT, LEFT, LEFT, UP, RIGHT, RIGHT, DOWN, LEFT, UP, LEFT, LEFT, DOWN, DOWN, RIGHT, DOWN, LEFT]
animate_path(env, path, "SokobanDeepseekR1StepAttempt")

In [12]:
# Claude 3.7 Sonnet Instructionset 1
env = SokobanEnvFixated()
path = [RIGHT, RIGHT, UP, LEFT, DOWN, LEFT, UP, UP, RIGHT, DOWN, LEFT, DOWN, RIGHT, RIGHT, UP, LEFT, UP, LEFT, DOWN, RIGHT, DOWN, LEFT, UP, RIGHT, RIGHT, UP, LEFT, LEFT, DOWN, DOWN, RIGHT, UP, LEFT, UP, RIGHT, DOWN, DOWN, RIGHT, UP, UP, LEFT, DOWN, RIGHT, DOWN, LEFT, UP]
animate_path(env, path, "SokobanClaude3.7SetAttempt1")

In [13]:
# Claude 3.7 Sonnet Instructionset 2
env = SokobanEnvFixated()
path = [LEFT, DOWN, LEFT, LEFT, UP, RIGHT, RIGHT, DOWN, LEFT, UP, LEFT, DOWN, DOWN, RIGHT, UP, RIGHT, UP, LEFT, DOWN, LEFT, UP, UP, RIGHT, DOWN, LEFT, DOWN, RIGHT, RIGHT, UP, UP, LEFT, DOWN, RIGHT, DOWN, LEFT, LEFT, UP, RIGHT, RIGHT, UP, LEFT, LEFT, DOWN, LEFT, UP, RIGHT]
animate_path(env, path, "SokobanClaude3.7SetAttempt2")

In [14]:
# Claude 3.7 Sonnet Step by Step
env = SokobanEnvFixated()
path = [RIGHT, DOWN, RIGHT, RIGHT, LEFT, DOWN]
animate_path(env, path, "SokobanClaude3.7StepAttempt")

In [15]:
# DQN
env = SokobanEnvFixated()
path = [RIGHT, RIGHT, RIGHT, DOWN, LEFT, LEFT, UP, LEFT, LEFT, DOWN, LEFT, LEFT, UP, RIGHT, RIGHT, RIGHT, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN, DOWN]
animate_path(env, path, "SokobanDQNAttempt1")

In [16]:
# DQN
env = SokobanEnvFixated()
path = [RIGHT, RIGHT, RIGHT, DOWN, LEFT, DOWN, LEFT, UP, LEFT, UP, UP, LEFT, DOWN, LEFT, DOWN, RIGHT, LEFT, LEFT, DOWN, DOWN, DOWN, RIGHT, UP, UP, RIGHT, UP, LEFT, UP, LEFT, DOWN, DOWN]
animate_path(env, path, "SokobanDQNAttempt2")

In [17]:
# PPO
env = SokobanEnvFixated()
path = [RIGHT, RIGHT, RIGHT, DOWN, LEFT, LEFT, RIGHT, UP, UP, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT, RIGHT]
animate_path(env, path, "SokobanPPOAttempt1")

In [18]:
# PPO
env = SokobanEnvFixated()
path = [RIGHT, RIGHT, RIGHT, DOWN, LEFT, LEFT, UP, LEFT, LEFT, DOWN, LEFT, LEFT, UP, RIGHT, RIGHT, RIGHT, LEFT, DOWN, LEFT, DOWN, LEFT, DOWN, DOWN, RIGHT, UP, UP, RIGHT, UP, LEFT, UP, LEFT, DOWN, DOWN]
animate_path(env, path, "SokobanPPOAttempt2")

In [19]:
# Breadth first search
env = SokobanEnvFixated()
path = [RIGHT, RIGHT, RIGHT, DOWN, LEFT, LEFT, LEFT, LEFT, DOWN, LEFT, LEFT, DOWN, DOWN, RIGHT, UP, LEFT, UP, UP, RIGHT, RIGHT, DOWN, LEFT, UP, LEFT, DOWN, UP, UP, RIGHT, RIGHT, RIGHT]
animate_path(env, path, "SokobanBfsAttempt")