# Sandbox

### <span style="color:red">Please clean all outputs before add/commit/push</span>.

In [None]:
import numpy as np
import gymnasium as gym

### Gymnasium environment demo

In [None]:
env = gym.make("Blackjack-v1", sab=True) # create Blackjack environment following Sutto & Barto description

In [None]:
print("Observation space:", env.observation_space) # the agent can see the following: his current sum, whether he has a usable ace and the dealer's one showing card

print("\nSize of the Action space =", env.action_space.n) # the agent can only choose between 2 actions: Hit(1) or Stick(0)

In [None]:
env.reset() # reset the environment to an initial internal state (random state to ensure exploration here), returning an initial observation and info.

In [None]:
action = 2
has_action = env.action_space.contains(action)
print(f"Env contains action {action}: {has_action}") # check if the action space contains the action 2

random_action = env.action_space.sample() # sample a random action from the action space
print("\nRandomly sample action {random_action}")

In [None]:
env.step(1) # return the current observation

After receiving our first observation, we are only going to use the env.step(action) function to interact with the environment. This function takes an action as input and executes it in the environment. Because that action changes the state of the environment, it returns four useful variables to us. These are:

* `next_state` - This is the observation that the agent will receive after taking the action.
* `reward` - This is the reward that the agent will receive after taking the action.
* `terminated` - This is a boolean variable that indicates whether or not the environment has terminated.
* `truncated` - This is a boolean variable that also indicates whether the episode ended by early truncation, i.e., a time limit is reached.
* `info` - This is a dictionary that might contain additional information about the environment.

### Demo why default_dict can be useful

In [None]:
from collections import defaultdict 
    
d = defaultdict(int) 
L = [1, 2, 3, 4, 2, 4, 1, 2] 
for i in L: 
    d[i] += 1 # value by default is 0 therefore there is no need to initialize it => no error
print(d) 

d = defaultdict(lambda: 0) # another way to declare the default value
L = [1, 2, 3, 4, 2, 4, 1, 2] 
for i in L: 
    d[i] += 1
print(d) 

### Gymnasium environments management

In [None]:
gym.pprint_registry() # show registered environments
dict_envs = gym.envs.registration.registry.items() # fetch all env_id:env_builder pairs

In [None]:
# create a fake env to register
class FakeEnv(gym.Env):
    def __init__(self, n: int):
        self.action_space = gym.spaces.Discrete(n=n)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(3,3), dtype=np.uint8)

In [None]:
# register a custom env

with gym.envs.registration.namespace(ns="my_envs"):
    gym.register(
        id="fake_env",
        entry_point=FakeEnv
    )
    print(f"New environment was registered in the namespace: {gym.envs.registration.current_namespace}")

# then run gym.pprint_registry() to see your env registered in the list

In [None]:
gym.spec(env_id="my_envs/fake_env") # print specifications of an env

In [None]:
fake_env = gym.make(id="my_envs/fake_env", n=10) # create the environment

print(fake_env.unwrapped.action_space)
print(fake_env.unwrapped.observation_space)

### free zone

In [None]:
pass