In [3]:
import gymnasium as gym
import imageio
import numpy as np
import os

env_names = ['Pendulum-v1', 'BipedalWalker-v3',
             'HalfCheetah-v4', 'Humanoid-v4',
             'Ant-v4', 'Walker2d-v4',
             'CarRacing-v2']

# Directory to save the GIFs
overview = 'overview'
os.makedirs(overview, exist_ok=True)

for env_name in env_names:

    print(f"Exploring {env_name} environment...\n")

    gif_path = os.path.join(overview, f'{env_name}.gif')
    
    if not os.path.exists(gif_path):

        # Create the environment
        env = gym.make(env_name, render_mode='rgb_array')

        # Initialize variables
        frames = []
        state, _ = env.reset()
        
        # Print observation space and shape
        print(f"Observation Space: {env.observation_space}")
        print(f"Observation Space Shape: {env.observation_space.shape}")
        
        # Print action space and shape
        print(f"Action Space: {env.action_space}")
        print(f"Action Space Shape: {env.action_space.shape}")
        
        # Print reward range
        print(f"Reward Range: {env.reward_range}")
        
        # Print max episode steps
        print(f"Max Episode Steps: {env.spec.max_episode_steps}")    

        action = env.action_space.sample()
        print(f"Action Sample:\n {action}")
        print(f"State Sample:\n {state}")
        
        done = False
        total_reward = 0

        while not done:
            # Capture the frame
            frame = env.render()
            frames.append(frame)

            action = env.action_space.sample()
            state, reward, done, truncated, info = env.step(action)
            total_reward += reward

            if done:
                print(f"Reward at termination: {reward:.2f}")
            
            if truncated:
                break

        env.close()

        # Save the frames as a GIF
        imageio.mimsave(gif_path, frames, fps=30)
        print(f"Total Reward: {total_reward:.2f}\n")


Exploring Pendulum-v1 environment...

Observation Space: Box([-1. -1. -8.], [1. 1. 8.], (3,), float32)
Observation Space Shape: (3,)
Action Space: Box(-2.0, 2.0, (1,), float32)
Action Space Shape: (1,)
Reward Range: (-inf, inf)
Max Episode Steps: 200
Action Sample:
 [0.8790874]
State Sample:
 [ 0.33019865  0.94391143 -0.9206078 ]
Total Reward: -966.87

Exploring BipedalWalker-v3 environment...

Observation Space: Box([-3.1415927 -5.        -5.        -5.        -3.1415927 -5.
 -3.1415927 -5.        -0.        -3.1415927 -5.        -3.1415927
 -5.        -0.        -1.        -1.        -1.        -1.
 -1.        -1.        -1.        -1.        -1.        -1.       ], [3.1415927 5.        5.        5.        3.1415927 5.        3.1415927
 5.        5.        3.1415927 5.        3.1415927 5.        5.
 1.        1.        1.        1.        1.        1.        1.
 1.        1.        1.       ], (24,), float32)
Observation Space Shape: (24,)
Action Space: Box(-1.0, 1.0, (4,), float32)


In [5]:
import gymnasium as gym
import imageio
import numpy as np
import os
from gymnasium.wrappers import RescaleAction

env_names = ['Pendulum-v1', 'BipedalWalker-v3',
             'HalfCheetah-v4', 'Humanoid-v4',
             'Ant-v4', 'Walker2d-v4',
             'CarRacing-v2']

for env_name in env_names:

    print(f"Exploring {env_name} environment...\n")

    # Create the environment
    env = gym.make(env_name, render_mode='rgb_array')
    env = RescaleAction(env, min_action=-1, max_action=1)
    
    state, _ = env.reset()
    
    # Print observation space and shape
    print(f"Observation Space: {env.observation_space}")
    print(f"Observation Space Shape: {env.observation_space.shape}")
    
    # Print action space and shape
    print(f"Action Space: {env.action_space}")
    print(f"Action Space Shape: {env.action_space.shape}")
    
    # Print reward range
    print(f"Reward Range: {env.reward_range}")
    
    # Print max episode steps
    print(f"Max Episode Steps: {env.spec.max_episode_steps}")    

    action = env.action_space.sample()
    print(f"Action Sample:\n {action}")
    
    done = False
    total_reward = 0

    while not done:

        action = env.action_space.sample()
        state, reward, done, truncated, info = env.step(action)
        total_reward += reward

        if done:
            print(f"Reward at termination: {reward:.2f}")
        
        if truncated:
            break

    env.close()

    print(f"Total Reward: {total_reward:.2f}\n")


Exploring Pendulum-v1 environment...

Observation Space: Box([-1. -1. -8.], [1. 1. 8.], (3,), float32)
Observation Space Shape: (3,)
Action Space: Box(-1.0, 1.0, (1,), float32)
Action Space Shape: (1,)
Reward Range: (-inf, inf)
Max Episode Steps: 200
Action Sample:
 [-0.91727823]
Total Reward: -867.86

Exploring BipedalWalker-v3 environment...

Observation Space: Box([-3.1415927 -5.        -5.        -5.        -3.1415927 -5.
 -3.1415927 -5.        -0.        -3.1415927 -5.        -3.1415927
 -5.        -0.        -1.        -1.        -1.        -1.
 -1.        -1.        -1.        -1.        -1.        -1.       ], [3.1415927 5.        5.        5.        3.1415927 5.        3.1415927
 5.        5.        3.1415927 5.        3.1415927 5.        5.
 1.        1.        1.        1.        1.        1.        1.
 1.        1.        1.       ], (24,), float32)
Observation Space Shape: (24,)
Action Space: Box(-1.0, 1.0, (4,), float32)
Action Space Shape: (4,)
Reward Range: (-inf, inf)
