In [1]:
#from google.colab import drive
#drive.mount('/content/gdrive')
#path = '/content/gdrive/My Drive/Colab Notebooks/707/Labs/'
#import sys
#sys.path.append(path)

In [2]:
# !pip install torch
# !pip install -U "ray[rllib]" torch

In [1]:
from dungeon.dungeon import Dungeon, index_to_actions

## Step 1 - Make a Wrapper to use Dungeon with RLLib.

Use the following guides:

Environment wrapper:
https://github.com/ray-project/ray/blob/81dcf9ff351d62da9c7e0493213c01765c9a2534/rllib/examples/custom_env.py#L77

Gym spaces:
https://gym.openai.com/docs/




In [12]:
import gym
import random, math
import numpy as np

from ray.rllib.env.env_context import EnvContext
from ray.rllib.models import ModelCatalog

from gym.spaces import Discrete, Box

class DungeonEnv(gym.Env):
    """Class that wrapps the Dungeon Environment to make it 
    compatible with RLLib."""
    
    def __init__(self, config: EnvContext):

        # Get the size from the config.
        dungeon_size = config.get("size_env")

        # Create a dungeon instance
        self.dungeon = Dungeon(N=dungeon_size)

        # Define the action spaces
        # We will normalize observations between -1 ans 1.
        self.action_space = Discrete(3)
        self.observation_space = Box(low=-1, high=1, shape=(dungeon_size, dungeon_size))
        
    def reset(self):
        obs_dungeon = self.dungeon.reset()
        obs = self.convert_observations(obs_dungeon)
        return obs

    def step(self, action):
        assert action in [0, 1, 2, 3]
        action_str = index_to_actions[action].name

        obs_dungeon, reward, done = self.dungeon.step(action_str)
        obs = self.convert_observations(obs_dungeon)

        return obs, reward, done, {}

    def seed(self, seed=None):
        random.seed(seed)

    def convert_observations(self, dungeon_obs):
        
        # We normalize and concatenate observations
        # Look into the Dungeon class to know what dungeon obs contains.
        
        relative_coord = dungeon_obs["relative_coordinates"]
        surroundings = dungeon_obs["surroundings"]
        
        obs = np.concatenate([relative_coord, surroundings])

        return obs


## Use DQN and train your algorithm on the Dungeon environment.

You can take inspiration from:
https://docs.ray.io/en/latest/rllib/rllib-training.html#basic-python-api

Experiment with the different parameters of the configuration:
https://docs.ray.io/en/latest/rllib/rllib-algorithms.html#deep-q-networks-dqn-rainbow-parametric-dqn





In [13]:
import ray
import ray.rllib.agents.dqn as dqn
from ray.tune.logger import pretty_print

config = dqn.DEFAULT_CONFIG.copy()
config["framework"] = "torch"
config["env"] = DungeonEnv
config["env_config"] = { "size_env": 15}

# Modify the config to disable dueling, double_q
# Use a fully connected network with 2 layers of 64 hidden units.
# Use relu activation function
#...


trainer = dqn.DQNTrainer(config=config)

# Can optionally call trainer.restore(path) to load a checkpoint.

avg_rewards = []

for i in range(100):
    # Perform one iteration of training the policy with DQN
    result = trainer.train()
    #print(pretty_print(result))
    print(result['episode_reward_mean'])
    avg_rewards.append(result['episode_reward_mean'])

    if i % 10 == 0:
        checkpoint = trainer.save()
        print("checkpoint saved at", checkpoint)


ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 2 dimension(s)

In [None]:
import matplotlib.pyplot as plt
plt.plot(avg_rewards, 'b.', alpha=.1)