In [11]:
from gym import Env, spaces
import numpy as np
import random

In [12]:
class GridWorld(Env):

    MAX_TRAJECTORY_LENGTH = 9
    
    def __init__(self, rewards):
        self.rewards = rewards
        self.action_space = spaces.Discrete(4)
        self.action_directions = {
            0 : np.array([1,0]), # N
            1 : np.array([0,1]), # E
            2 : np.array([-1,0]), # S
            3 : np.array([0,-1]) # W
        }
        self.reset()
    
    def _get_action(self, action):
        return self.action_directions[int(action)]
    
    def _get_next_location_reward(self, action):
        next_location = self.current_position+self._get_action(action)
        if not is_valid(next_location):
            return self.current_location.copy(), -1
        else:
            return next_location, self.rewards[next_location]
            
    def is_valid(location):
        x,y = location
        if x>=0 and y>=0 and x<len(self.rewards) and y<len(self.rewards[0]):
            return True
        return False
    
    def step(self, action):
        done = False
        if self.trajectory_size>MAX_TRAJECTORY_LENGTH:
            done = True    
        next_location, reward = self.__get_next_location_reward(action)
        info = None
        self.trajectory_size+=1        
        return next_location, reward, done, info
    
    def reset(self):
        x_pos = np.random.randint(len(self.rewards))
        y_pos = np.random.randint(len(self.rewards[0]))
        self.current_position = np.array([x_pos, y_pos])
        self.trajectory_size = 0
        return self.current_position
    
    def seed(self, seed=42):
        np.random.seed(seed)
        return [seed]
    
    def render(self):
        pass
    
    def close(self):
        pass
        
        

In [13]:
x = GridWorld(np.random.random((5,5)))

['MAX_TRAJECTORY_LENGTH',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_get_action',
 '_get_next_location_reward',
 'action_directions',
 'action_space',
 'close',
 'current_position',
 'is_valid',
 'metadata',
 'observation_space',
 'render',
 'reset',
 'reward_range',
 'rewards',
 'seed',
 'spec',
 'step',
 'trajectory_size',
 'unwrapped']