In [1]:
import gym
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.optim as optim
from IPython.display import clear_output
from mazelab.generators import random_maze

from actor import Actor
from agent import Agent
from environment import Env, maze_config
from tabular import GVFTable


In [2]:
# PARAMETERS
MAZE_SIZE = [10, 10]
MAZE_COMPLEXITY = 0.6  # measures complexity and density of walls in maze
MAX_STEPS_MAZE = 200
NUM_ACTIONS = 4

NUM_EPOCHS = 1500
CHANGE_MAZE_FREQ = 17
NUM_EXPLO_EPISODES = 15
BATCH_SIZE = 300

NUM_LEVELS_GVF = 6
GAMMA_GVFS = 0.99
LR_GVFS = 0.5
NUM_PRIMITIVES = 4
LR_ACTOR = 1e-3

EPSILON_EXPLO = 0.05


In [3]:
# Register environment in gym:
try:
    gym.envs.register(
        id="RandomMaze-v0", entry_point=Env, max_episode_steps=MAX_STEPS_MAZE
    )
except gym.error.Error as e:
    print("Environment already registered")

env = gym.make("RandomMaze-v0")

# INITIALIZE Agent, GVFs and Actor
agent = Agent(env, EPSILON_EXPLO)
gvfs = GVFTable(MAZE_SIZE, NUM_LEVELS_GVF, NUM_ACTIONS, GAMMA_GVFS, LR_GVFS)
actor = Actor(1, NUM_ACTIONS, NUM_PRIMITIVES)
actor_optimizer = torch.optim.Adam(actor.parameters(), lr=LR_ACTOR)


In [4]:
# Train
total_reward = []
for i in range(NUM_EPOCHS):
    clear_output(wait=True)
    if i == 0 or i % CHANGE_MAZE_FREQ == 0:
        gvfs.reset()  # Reset GVFs to zero for the new maze

        # Set parameters for the new maze and create new one

        maze_config["shape"] = random_maze(
            width=MAZE_SIZE[0],
            height=MAZE_SIZE[1],
            complexity=MAZE_COMPLEXITY,
            density=MAZE_COMPLEXITY,
        )
        env = gym.make("RandomMaze-v0")
        L = env.maze.objects.free.positions
        maze_config["goal_idx"] = [
            L[np.random.randint(0, len(L))]
        ]  # random goal position

    # first episode starts at reward location to ease exploration, otherwise start is random
    if i == 1 or i == 2 or any((i - t) % CHANGE_MAZE_FREQ == 0 for t in [1, 2]):
        maze_config["start_idx"] = maze_config["goal_idx"]
    else:
        maze_config["start_idx"] = [L[np.random.randint(0, len(L))]]

    if i < NUM_EXPLO_EPISODES or any(
        (i - t) % CHANGE_MAZE_FREQ == 0 for t in range(NUM_EXPLO_EPISODES)
    ):
        agent.exploration = True
    else:
        agent.exploration = False

    _ = env.reset()

    performance = agent.train_one_epoch(env, gvfs, actor, actor_optimizer, BATCH_SIZE)

    if agent.exploration is False:
        total_reward.append(performance)

    # print(len(env.motions))
    clear_output(wait=True)
    print(f"Episode: {i}     Reward:{performance} ")
    print(np.round_(gvfs.primitives, decimals=2))
    # print("\n" * 10)


    plt.plot(total_reward); plt.show()

    if (i+1) % CHANGE_MAZE_FREQ == 0:
        plt.figure(dpi=100)
        idx = 0
        for level in range(gvfs.num_levels):
            for prim in range(gvfs.num_primitives):
                idx += 1
                plt.subplot(gvfs.num_levels, gvfs.num_actions, idx)
                plt.imshow(gvfs.values[:, :, level, prim])
                if level==0:
                    plt.title(np.round_(gvfs.primitives[prim,:], decimals=1),fontsize=5)
                plt.axis('off')
        plt.show()

Episode: 18     Reward:-140.0 
[[0.25 0.25 0.25 0.25]
 [0.3  0.26 0.21 0.23]
 [0.23 0.21 0.27 0.29]
 [0.25 0.26 0.26 0.22]]


KeyboardInterrupt: 