In [1]:
import gym
import minerl
import copy
from collections import OrderedDict
from logging import getLogger
import cv2
from collections import deque

# from env_wrappers import (SerialDiscreteActionWrapper, FrameSkip, PoVWithCompassAngleWrapper)
from chainerrl.wrappers.atari_wrappers import LazyFrames
import numpy as np
                          
logger = getLogger(__name__)
# import logging

# Download Single experiment
# minerl.data.download('/home/sapanostic/Courses/WPI_RL/Project/minerl_data/',experiment='MineRLObtainDiamond-v0')

Please reinstall CuPy after you install cudnn
(see https://docs-cupy.chainer.org/en/stable/install.html#install-cudnn).
  'cuDNN is not enabled.\n'


In [3]:
# Uncomment below line to see the logging process
# logging.basicConfig(level=logging.DEBUG)
env = gym.make('MineRLNavigate-v0')

In [4]:
obs = env.reset()

### Observation
The obs variable will be a dictionary containing the following observations returned by the environment. In the case of the MineRLNavigate-v0 environment, three observations are returned:

1) pov: an RGB image of the agent’s first person perspective (64,64,3)

2) compassAngle, a float giving the angle of the agent to its (approximate) target

3) inventory, a dictionary containing the amount of 'dirt' blocks in the agent’s inventory



In [5]:
print(env.action_space)
print("number of objects in Observation Dict: ", len(obs))
print("First person view (pov) image size: ", np.shape(obs['pov']))
print("Inventory: ", obs['inventory'])

Dict(attack:Discrete(2), back:Discrete(2), camera:Box(2,), forward:Discrete(2), jump:Discrete(2), left:Discrete(2), place:Enum(none,dirt), right:Discrete(2), sneak:Discrete(2), sprint:Discrete(2))
number of objects in Observation Dict:  3
First person view (pov) image size:  (64, 64, 3)
Inventory:  {'dirt': 0}


In [6]:
print(env.observation_space.spaces['pov'])
a_ = env.observation_space.spaces['pov']
print(a_.shape[2])

Box(64, 64, 3)
3


In [9]:
class FrameStack(gym.Wrapper):
    def __init__(self, env, k, channel_order='hwc', use_tuple=False):
        """Stack k last frames.

        Returns lazy array, which is much more memory efficient.
        """
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.observations = deque([], maxlen=k)
        self.stack_axis = {'hwc': 2, 'chw': 0}[channel_order]
        self.use_tuple = use_tuple

        if self.use_tuple:
            pov_space = env.observation_space[0]
            inv_space = env.observation_space[1]
        else:
            pov_space = env.observation_space

        low_pov = np.repeat(pov_space.low, k, axis=self.stack_axis)
        high_pov = np.repeat(pov_space.high, k, axis=self.stack_axis)
        pov_space = gym.spaces.Box(low=low_pov, high=high_pov, dtype=pov_space.dtype)

        if self.use_tuple:
            low_inv = np.repeat(inv_space.low, k, axis=0)
            high_inv = np.repeat(inv_space.high, k, axis=0)
            inv_space = gym.spaces.Box(low=low_inv, high=high_inv, dtype=inv_space.dtype)
            self.observation_space = gym.spaces.Tuple(
                (pov_space, inv_space))
        else:
            self.observation_space = pov_space

    def reset(self):
        ob = self.env.reset()
        for _ in range(self.k):
            self.observations.append(ob)
        return self._get_ob()

    def step(self, action):
        ob, reward, done, info = self.env.step(action)
        self.observations.append(ob)
        return self._get_ob(), reward, done, info

    def _get_ob(self):
        assert len(self.observations) == self.k
        if self.use_tuple:
            frames = [x[0] for x in self.observations]
            inventory = [x[1] for x in self.observations]
            return (LazyFrames(list(frames), stack_axis=self.stack_axis),
                    LazyFrames(list(inventory), stack_axis=0))
        else:
            return LazyFrames(list(self.observations), stack_axis=self.stack_axis)

class FrameSkip(gym.Wrapper):
    """Return every `skip`-th frame and repeat given action during skip.

    Note that this wrapper does not "maximize" over the skipped frames.
    """
    def __init__(self, env, skip=4):
        super().__init__(env)

        self._skip = skip

    def step(self, action):
        total_reward = 0.0
        for _ in range(self._skip):
            obs, reward, done, info = self.env.step(action)
            total_reward += reward
            if done:
                break
        return obs, total_reward, done, info


class PoVWithCompassAngleWrapper(gym.ObservationWrapper):
    """Take 'pov' value (current game display) and concatenate compass angle information with it, as a new channel of image;
    resulting image has RGB+compass (or K+compass for gray-scaled image) channels.
    """
    def __init__(self, env):
        super().__init__(env)

        self._compass_angle_scale = 180 / 255  # NOTE: `ScaledFloatFrame` will scale the pixel values with 255.0 later

        pov_space = self.env.observation_space.spaces['pov']
        compass_angle_space = self.env.observation_space.spaces['compassAngle']

        low = self.observation({'pov': pov_space.low, 'compassAngle': compass_angle_space.low})
        high = self.observation({'pov': pov_space.high, 'compassAngle': compass_angle_space.high})

        self.observation_space = gym.spaces.Box(low=low, high=high)

    def observation(self, observation):
        pov = observation['pov']
        compass_scaled = observation['compassAngle'] / self._compass_angle_scale
        compass_channel = np.ones(shape=list(pov.shape[:-1]) + [1], dtype=pov.dtype) * compass_scaled
        return np.concatenate([pov, compass_channel], axis=-1)

class GrayScaleWrapper(gym.ObservationWrapper):
    def __init__(self, env, dict_space_key=None):
        super().__init__(env)

        self._key = dict_space_key

        if self._key is None:
            original_space = self.observation_space
        else:
            original_space = self.observation_space.spaces[self._key]
        height, width = original_space.shape[0], original_space.shape[1]

        # sanity checks
        ideal_image_space = gym.spaces.Box(low=0, high=255, shape=(height, width, 3), dtype=np.uint8)
        if original_space != ideal_image_space:
            raise ValueError('Image space should be {}, but given {}.'.format(ideal_image_space, original_space))
        if original_space.dtype != np.uint8:
            raise ValueError('Image should `np.uint8` typed, but given {}.'.format(original_space.dtype))

        height, width = original_space.shape[0], original_space.shape[1]
        new_space = gym.spaces.Box(low=0, high=255, shape=(height, width, 1), dtype=np.uint8)
        if self._key is None:
            self.observation_space = new_space
        else:
            new_space_dict = copy.deepcopy(self.observation_space)
            new_space_dict.spaces[self._key] = new_space
            self.observation_space = new_space_dict

    def observation(self, obs):
        if self._key is None:
            frame = obs
        else:
            frame = obs[self._key]
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        frame = np.expand_dims(frame, -1)
        if self._key is None:
            obs = frame
        else:
            obs[self._key] = frame
        return obs
    
class SerialDiscreteActionWrapper(gym.ActionWrapper):
    """Convert MineRL env's `Dict` action space as a serial discrete action space.

    The term "serial" means that this wrapper can only push one key at each step.
    "attack" action will be alwarys triggered.

    Parameters
    ----------
    env
        Wrapping gym environment.
    always_keys
        List of action keys, which should be always pressed throughout interaction with environment.
        If specified, the "noop" action is also affected.
    reverse_keys
        List of action keys, which should be always pressed but can be turn off via action.
        If specified, the "noop" action is also affected.
    exclude_keys
        List of action keys, which should be ignored for discretizing action space.
    exclude_noop
        The "noop" will be excluded from discrete action list.
    num_camera_discretize
        Number of discretization of yaw control (must be odd).
    allow_pitch
        If specified, this wrapper appends commands to control pitch.
    max_camera_range
        Maximum value of yaw control.
    """

    BINARY_KEYS = ['forward', 'back', 'left', 'right', 'jump', 'sneak', 'sprint', 'attack']

    def __init__(self, env, always_keys=None, reverse_keys=None, exclude_keys=None, exclude_noop=False,
                 num_camera_discretize=3, allow_pitch=False,
                 max_camera_range=10):
        super().__init__(env)

        self.always_keys = [] if always_keys is None else always_keys
        self.reverse_keys = [] if reverse_keys is None else reverse_keys
        self.exclude_keys = [] if exclude_keys is None else exclude_keys
        if len(set(self.always_keys) | set(self.reverse_keys) | set(self.exclude_keys)) != \
                len(self.always_keys) + len(self.reverse_keys) + len(self.exclude_keys):
            raise ValueError('always_keys ({}) or reverse_keys ({}) or exclude_keys ({}) intersect each other.'.format(
                self.always_keys, self.reverse_keys, self.exclude_keys))
        self.exclude_noop = exclude_noop

        self.wrapping_action_space = self.env.action_space
        self.num_camera_discretize = num_camera_discretize
        self._noop_template = OrderedDict([
            ('forward', 0),
            ('back', 0),
            ('left', 0),
            ('right', 0),
            ('jump', 0),
            ('sneak', 0),
            ('sprint', 0),
            ('attack', 0),
            ('camera', np.zeros((2, ), dtype=np.float32)),
            # 'none', 'dirt' (Obtain*:)+ 'stone', 'cobblestone', 'crafting_table', 'furnace', 'torch'
            ('place', 0),
            # (Obtain* tasks only) 'none', 'wooden_axe', 'wooden_pickaxe', 'stone_axe', 'stone_pickaxe', 'iron_axe', 'iron_pickaxe'
            ('equip', 0),
            # (Obtain* tasks only) 'none', 'torch', 'stick', 'planks', 'crafting_table'
            ('craft', 0),
            # (Obtain* tasks only) 'none', 'wooden_axe', 'wooden_pickaxe', 'stone_axe', 'stone_pickaxe', 'iron_axe', 'iron_pickaxe', 'furnace'
            ('nearbyCraft', 0),
            # (Obtain* tasks only) 'none', 'iron_ingot', 'coal'
            ('nearbySmelt', 0),
        ])
        for key, space in self.wrapping_action_space.spaces.items():
            if key not in self._noop_template:
                raise ValueError('Unknown action name: {}'.format(key))

        # get noop
        self.noop = copy.deepcopy(self._noop_template)
        for key in self._noop_template:
            if key not in self.wrapping_action_space.spaces:
                del self.noop[key]

        # check&set always_keys
        for key in self.always_keys:
            if key not in self.BINARY_KEYS:
                raise ValueError('{} is not allowed for `always_keys`.'.format(key))
            self.noop[key] = 1
        logger.info('always pressing keys: {}'.format(self.always_keys))
        # check&set reverse_keys
        for key in self.reverse_keys:
            if key not in self.BINARY_KEYS:
                raise ValueError('{} is not allowed for `reverse_keys`.'.format(key))
            self.noop[key] = 1
        logger.info('reversed pressing keys: {}'.format(self.reverse_keys))
        # check exclude_keys
        for key in self.exclude_keys:
            if key not in self.noop:
                raise ValueError('unknown exclude_keys: {}'.format(key))
        logger.info('always ignored keys: {}'.format(self.exclude_keys))

        # get each discrete action
        self._actions = [self.noop]
        for key in self.noop:
            if key in self.always_keys or key in self.exclude_keys:
                continue
            if key in self.BINARY_KEYS:
                # action candidate : {1}  (0 is ignored because it is for noop), or {0} when `reverse_keys`.
                op = copy.deepcopy(self.noop)
                if key in self.reverse_keys:
                    op[key] = 0
                else:
                    op[key] = 1
                self._actions.append(op)
            elif key == 'camera':
                # action candidate : {[0, -max_camera_range], [0, -max_camera_range + delta_range], ..., [0, max_camera_range]}
                # ([0, 0] is excluded)
                delta_range = max_camera_range * 2 / (self.num_camera_discretize - 1)
                if self.num_camera_discretize % 2 == 0:
                    raise ValueError('Number of camera discretization must be odd.')
             
                for i in range(self.num_camera_discretize):
                    op = copy.deepcopy(self.noop)
                    if i < self.num_camera_discretize // 2:
                        op[key] = np.array([0, -max_camera_range + delta_range * i], dtype=np.float32)
                    elif i > self.num_camera_discretize // 2:
                        op[key] = np.array([0, -max_camera_range + delta_range * (i - 1)], dtype=np.float32)
                    elif i == self.num_camera_discretize // 2:
                        op[key] = np.array([0, -max_camera_range + delta_range * (i + 1)], dtype=np.float32)                            
                    else:
                        continue
                    self._actions.append(op)

                if allow_pitch:
                    for i in range(self.num_camera_discretize):
                        op = copy.deepcopy(self.noop)
                        if i < self.num_camera_discretize // 2:
                            op[key] = np.array([-max_camera_range + delta_range * i, 0], dtype=np.float32)
                        elif i > self.num_camera_discretize // 2:
                            op[key] = np.array([-max_camera_range + delta_range * (i - 1), 0], dtype=np.float32)
                        else:
                            continue
                        self._actions.append(op)

            elif key in {'place', 'equip', 'craft', 'nearbyCraft', 'nearbySmelt'}:
                # action candidate : {1, 2, ..., len(space)-1}  (0 is ignored because it is for noop)
                for a in range(1, self.wrapping_action_space.spaces[key].n):
                    op = copy.deepcopy(self.noop)
                    op[key] = a
                    self._actions.append(op)
        if self.exclude_noop:
            del self._actions[0]

        n = len(self._actions)
        self.action_space = gym.spaces.Discrete(n)
        logger.info('{} is converted to {}.'.format(self.wrapping_action_space, self.action_space))

    def action(self, action):
        if not self.action_space.contains(action):
            raise ValueError('action {} is invalid for {}'.format(action, self.action_space))

        original_space_action = self._actions[action]
        logger.debug('discrete action {} -> original action {}'.format(action, original_space_action))
        return original_space_action
    
    def sample(self):
        rand_action = np.random.choice(np.array(range(self.action_space.n)),1)
        return rand_action[0]  

In [10]:
always_keys = ['forward', 'attack', 'jump']
exclude_keys = ['back', 'place', 'sneak']
reverse_keys=None
exclude_noop=False
num_camera_discretize=3
allow_pitch=False
max_camera_range=10


# env_Gray = GrayScaleWrapper(env, dict_space_key='pov')

env_FSkip= FrameSkip(env)
env_Gray = GrayScaleWrapper(env_FSkip, dict_space_key='pov')
env_pov_comm = PoVWithCompassAngleWrapper(env_Gray)
env_FStack = FrameStack(env_pov_comm, 4)

env_serial = SerialDiscreteActionWrapper(env_FStack, always_keys, reverse_keys, exclude_keys, exclude_noop,
                 num_camera_discretize, allow_pitch,
                 max_camera_range)

print('Action Space: ', env_serial.action_space.n)
print('observation_space: ', env_serial.observation_space.shape)

# print(env_serial.noop)
print(env_serial.action(1))

#0:Noop 
#1:left 
#2:right 
#3:sprint 
#4:Camera[0,-10] 
#5:Camera[0,10]
#6:Camera[0,0]

Action Space:  7
observation_space:  (64, 64, 8)
OrderedDict([('forward', 1), ('back', 0), ('left', 1), ('right', 0), ('jump', 1), ('sneak', 0), ('sprint', 0), ('attack', 1), ('camera', array([0., 0.], dtype=float32)), ('place', 0)])


In [None]:
import cv2
from matplotlib import pyplot as plt
obs_ = np.asarray(obs[:,:,:3]/255, dtype=np.float32)
# print(np.shape(obs_[:,:,:3]))
# print(obs_[:,:,0:3])
plt.imshow(obs_[:,:,0:3])
y = cv2.cvtColor(obs_[:,:,:3], cv2.COLOR_RGB2GRAY)
print(y)
np.concatenate((y, obs[:,:,3]), axis=2)

In [63]:
done = False
net_reward = 0
obs_ = env_serial.reset()

for i in range(100):
    a_ = env_serial.sample()
    print(a_)
#     action_ = env_serial.action(a_)
#     print(action_)
    obs, reward, done, info = env_serial.step(a_)
    net_reward += reward
    print("Total reward: ", net_reward)
    
# env_serial.close()    

3
Total reward:  0.0
3
Total reward:  0.0
5
Total reward:  0.0
2
Total reward:  0.0
4
Total reward:  0.0
6
Total reward:  0.0
3
Total reward:  0.0
0
Total reward:  0.0
0
Total reward:  0.0
2
Total reward:  0.0
6
Total reward:  0.0
3
Total reward:  0.0
4
Total reward:  0.0
2
Total reward:  0.0
0
Total reward:  0.0
5
Total reward:  0.0
6
Total reward:  0.0
4
Total reward:  0.0
3
Total reward:  0.0
3
Total reward:  0.0
1
Total reward:  0.0
5
Total reward:  0.0
2
Total reward:  0.0
3
Total reward:  0.0
2
Total reward:  0.0
0
Total reward:  0.0
2
Total reward:  0.0
5
Total reward:  0.0
6
Total reward:  0.0
2
Total reward:  0.0
2
Total reward:  0.0
3
Total reward:  0.0
4
Total reward:  0.0
5
Total reward:  0.0
6
Total reward:  0.0
0
Total reward:  0.0
3
Total reward:  0.0
1
Total reward:  0.0
2
Total reward:  0.0
0
Total reward:  0.0
1
Total reward:  0.0
3
Total reward:  0.0
6
Total reward:  0.0
4
Total reward:  0.0
3
Total reward:  0.0
3
Total reward:  0.0
5
Total reward:  0.0
6
Total rewar

In [None]:
print(info['life'])

In [95]:
print(np.array(obs_._frames).shape)
s_ = np.array(obs_._frames)
c = s_.swapaxes(1,3)
print(c.shape)
d = c.reshape(8,64,64)
print(d.shape)
d[0]

(4, 64, 64, 2)
(4, 2, 64, 64)
(8, 64, 64)


array([[157., 157., 157., ...,  70.,  70., 101.],
       [157., 157., 157., ...,  70., 101., 102.],
       [157., 157., 157., ..., 101., 102., 102.],
       ...,
       [157., 157., 157., ...,  99., 100.,  98.],
       [157., 157., 157., ...,  96.,  99.,  99.],
       [157., 157., 156., ..., 101.,  96.,  99.]])

array([[[[157.        , 157.        , 157.        , 157.        ],
         [157.        , 157.        , 157.        , 157.        ],
         [157.        , 157.        , 157.        , 157.        ],
         ...,
         [157.        , 157.        , 157.        , 157.        ],
         [157.        , 157.        , 157.        , 157.        ],
         [157.        , 157.        , 157.        , 157.        ]],

        [[157.        , 157.        , 157.        , 157.        ],
         [157.        , 157.        , 157.        , 157.        ],
         [157.        , 157.        , 157.        , 157.        ],
         ...,
         [157.        , 157.        , 157.        , 157.        ],
         [157.        , 157.        , 157.        , 157.        ],
         [157.        , 157.        , 157.        , 157.        ]],

        [[157.        , 157.        , 157.        , 157.        ],
         [157.        , 157.        , 157.        , 157.        ],
         [157.        , 157.  