In [2]:
import gymnasium as gym

1. Termination
2. Truncation


Terminal states are states where the environment would end and you won't be able to do anything afterwards. 
Truncation is what you need to prevent the environment going for an infinite number of steps. 

Action space is primarily the number of possible actions available to your agent. In this example, it would be left and right.

In [5]:
import gymnasium as gym
env = gym.make("CartPole-v1", render_mode="human")
env.action_space.seed(42)

observation, info = env.reset(seed=42)

print("Initial Observation:", observation)
print("Initial Info:", info)


for _ in range(1000):
    observation, reward, terminated, truncated, info = env.step(env.action_space.sample())

    print("Observation:", observation)
    print("Reward:", reward)
    print("Terminated:", terminated)
    print("Truncated:", truncated)
    print("Info:", info)

    if terminated or truncated:
        observation, info = env.reset()

env.close()


Initial Observation: [ 0.0273956  -0.00611216  0.03585979  0.0197368 ]
Initial Info: {}
Observation: [ 0.02727336 -0.20172954  0.03625453  0.32351476]
Reward: 1.0
Terminated: False
Truncated: False
Info: {}
Observation: [ 0.02323877 -0.00714208  0.04272482  0.04248186]
Reward: 1.0
Terminated: False
Truncated: False
Info: {}
Observation: [ 0.02309593  0.187342    0.04357446 -0.23642075]
Reward: 1.0
Terminated: False
Truncated: False
Info: {}
Observation: [ 0.02684277 -0.00837454  0.03884605  0.06968222]
Reward: 1.0
Terminated: False
Truncated: False
Info: {}
Observation: [ 0.02667528 -0.20403126  0.04023969  0.37436375]
Reward: 1.0
Terminated: False
Truncated: False
Info: {}
Observation: [ 0.02259465 -0.00950332  0.04772697  0.09463533]
Reward: 1.0
Terminated: False
Truncated: False
Info: {}
Observation: [ 0.02240459 -0.20527568  0.04961967  0.40198588]
Reward: 1.0
Terminated: False
Truncated: False
Info: {}
Observation: [ 0.01829907 -0.01089135  0.05765939  0.12535046]
Reward: 1.0
Term

In [6]:
env.action_space

Discrete(2)

In [7]:
env.observation_space

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)

In [1]:
def get_action(observation): #Optimal Policy already found by Q Learning
    if observation in [0, 4, 6, 10]:
        return 0
    elif observation in [9, 14]:
        return 1
    elif observation in [13]:
        return 2
    elif observation in [1, 2, 3, 8]:
        return 3


In [3]:
import gymnasium as gym
env = gym.make("FrozenLake-v1", is_slippery=True)
# env.action_space.seed(42)

observation, info = env.reset()

failure_count = 0
success_count = 0

for i in range(100):
    # print(i)
    done = False
    while not done:
        observation, reward, terminated, truncated, info = env.step(get_action(observation))

        # print("Observation:", observation)
        # print("Reward:", reward)
        # print("Terminated:", terminated)
        # print("Truncated:", truncated)
        # print("Info:", info)

        if observation == 15:
            success_count += 1
            # print(i)


        if terminated or truncated:
            # print(failure_count)
            observation, info = env.reset()
            done = True
            # failure_count += 1

env.close()



print(f"Success Percentage: {success_count/100 * 100}%")

Success Percentage: 67.0%


1. gym.make - just makes the environment
2. reward - the reward the agent gets at every time step
3. truncated - the environment ends when the number of steps exceed a particular value
4. env.step - given a particular action we find out what the next step is
5. terminated - this will be the terminal conditions and the environment would termiante
6. env.close - Just closes the environment
7. env.reset - Resets the environment
8. observation - The state in which the environment is in at that particular time step
9. action_space - This gives us information about how many actions are possible to take from any one state (discrete or continuous)
10. observation_space - the number of possible observations in this particular environment
11. env.close() - just closes the environment
