https://www.gymlibrary.dev/content/environment_creation/

In [1]:
import gym
from gym import spaces
import pygame
import numpy as np

The __init__ method of our environment will accept the integer size, that determines the size of the square grid. We will set up some variables for rendering and define self.observation_space and self.action_space. In our case, observations should provide information about the location of the agent and target on the 2-dimensional grid. We will choose to represent observations in the form of a dictionaries with keys "agent" and "target". An observation may look like {"agent": array([1, 0]), "target": array([0, 3])}. Since we have 4 actions in our environment (“right”, “up”, “left”, “down”), we will use Discrete(4) as an action space. Here is the declaration of GridWorldEnv and the implementation of __init__:

In [None]:
class GridWorldEnv(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps":4}

    def __init__(self, render_mode = None, size = 5):
        self.size = size #The size of the square grid
        self.window_size = 512 #The size of the PyGame window

        # Observations are dictionaries with the agent's and the target's location
        self.observation_space = spaces.Dict(
            {
                "agent": spaces.Box(0, size-1, shape=(2,), dtype=int),
                "target": spaces.Box(0, size-1, shape=(2,), dtype=int),
            }
        )
        # We have 4 actions, corresponding to "right", "up", "left", "down"
        self.action_space = spaces.Discrete(4)
        #The following dictionary maps abstract actions from "self.actions_space" to the direction we will walk in if that action is taken
        self._action_to_direction = {
            0: np.array([1, 0]),
            1: np.array([0, 1]),
            2: np.array([-1, 0]),
            3: np.array([0, -1]),
        }

        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        #If human-rendering is used, 'self window' will be a reference to the window that we draw to self.clock will be a clock that is used to ensure that the environment is rendered at the correct framerate in human-mode. They will remain 'None' until human-mode is used for the first-time.

        self.window = None
        self.clock = None

    def _get_obs(self):
        return{"agent": self._agent_location, "target": self._target_location}
    
    def _get_info(self):
        return{"distance": np.linalg.norm(self._agent_location - self._target_location, ord=1)}
    
    def reset(self, seed = None, options=None):
        #We need the following line to seed self.np_random
        super().reset(seed = seed)

        #Choose the agent's location uniformly at random
        self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)

        #We will sample the target's location randomly until it does not coincide with the agent's location
        self._target_location = self._agent_location
        while np.array_equal(self._target_location, self._agent_location):
            self._target_location = self.np_random.integers(
                0, self.size, size = 2, dtype = int
            )
        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()

        return observation, info
    
    def step(self, action):
        #Map the action (element of {0,1,2,3}) to the direction we walk in
        direction = self._action_to_direction[action]
        #We use 'np.clip' to make sure we do not leave the grid
        self._agent_location = np.clip(
            self._agent_location + direction, 0, self.size - 1
        )

        #An episode is done if the agent has reached the target
        terminated = np.array_equal(self._agent_location, self._target_location)
        reward = 1 if terminated else 0
        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()
        
        return observation, reward, terminated, False, info
    
    def close(self):
        if self.window is not None:
            pygame.display.quit()
            pygame.quit()

