# Environment

In [2]:
import gymnasium as gym

In [3]:
env = gym.make("LunarLander-v2")

In [4]:
obs, info = env.reset()

In [5]:
obs

array([ 0.00797129,  1.4154449 ,  0.807394  ,  0.20107688, -0.00922997,
       -0.18288693,  0.        ,  0.        ], dtype=float32)

In [6]:
info

{}

In [7]:
env.observation_space

Box([-1.5       -1.5       -5.        -5.        -3.1415927 -5.
 -0.        -0.       ], [1.5       1.5       5.        5.        3.1415927 5.        1.
 1.       ], (8,), float32)

In [8]:
env.close()

In [9]:
env.reward_range

(-inf, inf)

In [10]:
env.action_space

Discrete(4)

# Spaces

In [11]:
import numpy as np

### BOX

In [12]:
sp = gym.spaces.Box(low=-1.0, high=2.0, shape=(5,), dtype=np.float32)

In [13]:
sp.sample()

array([ 1.3424195 ,  1.1621513 ,  0.5732033 , -0.01178603,  1.3438237 ],
      dtype=float32)

### Dict

In [14]:
from gym.spaces import Box, Discrete

ModuleNotFoundError: No module named 'gym'

In [15]:
gym.spaces.Dict({"position": gym.spaces.Box(-1, 1, shape=(2,)), "color": gym.spaces.Discrete(3)})

Dict('color': Discrete(3), 'position': Box(-1.0, 1.0, (2,), float32))

In [16]:
tx = gym.spaces.Text(5)

In [17]:
tx.sample()

'3A'

In [18]:
gym.spaces.Text(5)

Text(1, 5, charset=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz)

# Own environment

In [23]:
from gymnasium import spaces
import pygame
import numpy as np

In [42]:
class GridWorld(gym.Env):
    
    def __init__(self, size):
        self.size = size
        self.observation_space = spaces.Dict({
        "agent": spaces.Box(0, size-1, shape=(2,), dtype=int),
        "target": spaces.Box(0, size-1, shape=(2,), dtype=int),
        })
        
        self.action_space = spaces.Discrete(4)
        
        self.action_to_direction = {
            0: np.array([1, 0]),
            1: np.array([0, 1]),
            2: np.array([-1, 0]),
            3: np.array([0, -1]),
        }
        
        self._agent_location = 0
        self._target_location = 0
        
        return
    
    def _get_obs(self):
        return {"agent": self._agent_location, "target": self._target_location}
    
    def _get_info(self):
        return {"distance": np.linalg.norm(self._agent_location - self._target_location, ord=1)}
    
    def reset(self, seed=None, options=None):
        super().reset(seed = seed)
        
        self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)
        
        self._target_location = self._agent_location
        
        while np.array_equal(self._target_location, self._agent_location):
            self._target_location = self.np_random.integers(0, self.size, size=2, dtype=int)
            
        observation = self._get_obs()
        info = self._get_info()
        
        return observation, info
    
    def step(self, action):
        
        direction = self._action_to_direction[action]
        self._agent_location = np.clip(self._agent_location + direction, 0, self.size-1)
        
        terminated = np.array_equal(self._target_location, self._agent_location)
        reward = 1 if terminated else 0 
        observation = self._get_obs()
        info = self._get_info()
        
        return observation, reward, terminated, False, info

In [43]:
gworld_env = GridWorld(size=5)

In [44]:
gworld_env.reset()

({'agent': array([4, 1]), 'target': array([2, 1])}, {'distance': 2.0})

In [45]:
gworld_env._get_info()

{'distance': 2.0}

In [46]:
gworld_env._get_obs()

{'agent': array([4, 1]), 'target': array([2, 1])}