<a href="https://colab.research.google.com/github/lywgit/rlbook-hands-on/blob/main/01_gymnasium_basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 01 gym basics

- Use [gymnasium](https://gymnasium.farama.org/) instead of gym

In [1]:
!pip install gymnasium
!pip install ale-py

Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/958.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.3/958.1 kB[0m [31m6.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m952.3/958.1 kB[0m [31m16.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0
Collecting ale-py
  Dow

In [5]:
import gymnasium as gym
import ale_py
gym.register_envs(ale_py)
# gym.pprint_registry()

## environment

- an environment instance knows its observaion space and action space
- an environment instance can take a step forward upon receiving your input action and feedback on your obtained reward
- it also knows if the game is done

In [39]:
from pprint import pprint
import gymnasium as gym

SEED = 42

# initialize environment
env = gym.make('CartPole-v1')

print('-- env --' )
print(env)

print('-- env.observation_space --')
print(env.observation_space)

print('-- env.action_space --')
print(env.action_space)
if isinstance(env.action_space, gym.spaces.discrete.Discrete):
    print(' n =', env.action_space.n)
    print(' start =', env.action_space.start)


# first observation
observation, info = env.reset(seed=SEED)
print('-- (first observation before any action) -- ')
print('-- observation --')
print(observation)
print('-- info --')
print(info)

# choose an action at random
action = env.action_space.sample()
print('-- a random action')
print(action)

# a step forward
observation, reward, terminated, truncated, info = env.step(action)
print('-- (after a step) --')
print('-- observation --')
print(observation)
print('-- reward --')
print(reward)
print('-- terminated --')
print(terminated)
print('-- truncated --')
print(truncated)
print('-- info --')
print(info)


-- env --
<TimeLimit<OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>>
-- env.observation_space --
Box([-4.8               -inf -0.41887903        -inf], [4.8               inf 0.41887903        inf], (4,), float32)
-- env.action_space --
Discrete(2)
 n = 2
 start = 0
-- (first observation before any action) -- 
-- observation --
[ 0.0273956  -0.00611216  0.03585979  0.0197368 ]
-- info --
{}
-- a random action
1
-- (after a step) --
-- observation --
[ 0.02727336  0.18847767  0.03625453 -0.26141977]
-- reward --
1.0
-- terminated --
False
-- truncated --
False
-- info --
{}


## wrappers
- There is a long list of predefined wrappers under `gymnasium.wrappers`:
    - ex: `NumpyToTorch, RescaleAction` etc
- To [implement custom wrapper](https://gymnasium.farama.org/tutorials/gymnasium_basics/implementing_custom_wrappers/), inherit from these classes:
    - `Wrapper, ActionWrapper, ObservationWrapper, RewardWrapper`


In [49]:
import gymnasium as gym
from gymnasium.wrappers import NumpyToTorch, RescaleAction
from gymnasium import Wrapper, ActionWrapper, ObservationWrapper, RewardWrapper

# to see the
env = gym.make('CartPole-v1')
print('current env (can be a wrapper):', env)
print('the env wrapped by current env:', env.env)
print('the env underneeth all wrappers:', env.unwrapped)
# print(env.unwrapped.env)


current env (can be a wrapper): <TimeLimit<OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>>
the env wrapped by current env: <OrderEnforcing<PassiveEnvChecker<CartPoleEnv<CartPole-v1>>>>
the env underneeth all wrappers: <CartPoleEnv<CartPole-v1>>


## ActionWrapper example

In [53]:
from gymnasium import ActionWrapper

class RandomActionWrapper(ActionWrapper):
    def __init__(self, env, epsilon=0.1):
        super().__init__(env)
        self.epsilon = epsilon
        self.action_space = gym.spaces.discrete.Discrete(n=2)
        # should also specify the correct overwridden action, here the same so not necessary

    def action(self, action): # override
        if self.env.np_random.uniform() < self.epsilon:
            action = self.env.action_space.sample()
            print(f'(random!) {action}', end=', ')
        else:
            print(action, end=', ')
        return action

env = gym.make('CartPole-v1')
env = RandomActionWrapper(env) # wrap the env

fixed_action = 0
observation, _ = env.reset()
for _ in range(100):
    observation, reward, terminated, truncated, info = env.step(fixed_action)
    # although always pass in a fixed_action, the ActionWrapper can override this behaviour
    if terminated or truncated:
        observation, _ = env.reset()
env.close()



(random!) 0, (random!) 0, 0, 0, 0, (random!) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (random!) 0, 0, 0, 0, 0, 0, (random!) 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (random!) 1, 0, (random!) 0, (random!) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (random!) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 