In [None]:
import gymnasium as gym
import ale_py
import numpy as np
import time
from ale_py.roms import Superman

from stable_baselines3 import PPO, DQN, A2C	
from sb3_contrib import TRPO, QRDQN


In [None]:
print("gymnasium version:", gym.__version__)
print("ale_py version:", ale_py.__version__)

In [None]:
env = gym.make("ALE/Superman-v5", render_mode=None)

action_space = env.action_space
obs_space = env.observation_space
print("Action space:", action_space)
print("Number of actions:", action_space.n)

print("Action meanings:", env.unwrapped.get_action_meanings())
# ['NOOP', 'FIRE', 'UP', 'RIGHT', 'LEFT', 'DOWN', 'UPRIGHT', 'UPLEFT', 'DOWNRIGHT', 'DOWNLEFT', 'UPFIRE', 'RIGHTFIRE', 'LEFTFIRE', 'DOWNFIRE', 'UPRIGHTFIRE', 'UPLEFTFIRE', 'DOWNRIGHTFIRE', 'DOWNLEFTFIRE']

print("\nObservation space:", obs_space)


obs, _ = env.reset()
print("Observation shape:", obs.shape)
env.close()

In [None]:
# Create the Superman environment
# And enable visual rendering so humans can SEE it
RENDER_MODE = "human"
# RENDER_MODE = "rgb_array"
# RENDER_MODE = "ansi"
# RENDER_MODE = None

env = gym.make("ALE/Superman-v5", render_mode=RENDER_MODE)
# NOTICE: A python window appears

# RESET environment to initial state
# obs: initial game state
# info: extra info about environment state
obs, info = env.reset()

In [None]:
# 1 episode is 1 complete game session...
total_episodes_to_run = 50
max_steps = 1000

for episode in range(total_episodes_to_run):
    # Init counters to track reward & steps
    total_reward = 0
    steps = 0

    # Start an INFINITE LOOP that runs until the episode ends
    while steps < max_steps:
        # action_space: all possible actions agent can do
        
        # Select a RANDOM ACTION from action space
        action = env.action_space.sample()

        # env.step: EXECUTE the chosen action
        # Returns:
        # obs: new obsered state after taking the action
        # reward: received reward
        # terminated: T/F if episode ended bc of game rules
        # truncated: T/F if episode ended bc of external factors
        # info: extra environment info
        obs, reward, terminated, truncated, info = env.step(action)

        # Increment reward & steps
        total_reward += reward
        steps += 1

        
        # CHECK if episode ended
        if terminated or truncated:
            break
            
    print(f"Episode {episode+1}: Reward={total_reward}, Steps={steps}")
    # Reset the environment for the NEXT episode
    obs, info = env.reset()

    # REGULARLY RESTART THE ENVIRONEMNT every 10 episodes
    if (episode + 1) % 10 == 0:
        env.close()
        env = gym.make("ALE/Superman-v5", render_mode=RENDER_MODE)
        obs, info = env.reset()
        
# Close the environment & release any resources
env.close()