In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
import numpy as np

In [None]:
env = gym.make("ALE/Superman-v5", 
               render_mode=None,
               frameskip=4,
               full_action_space=False)

print("Environment created!")
print("Action space:", env.action_space)
print("Observation space:", env.observation_space)

In [2]:
# Initialize PPO model with optimized parameters for Atari
model = PPO(
    policy="CnnPolicy",           # Use CNN for pixel input
    env=env,
    learning_rate=2.5e-4,         # Adam learning rate
    n_steps=128,                  # Steps per environment per update
    batch_size=256,               # Minibatch size
    n_epochs=4,                   # Number of epoch when optimizing the surrogate
    gamma=0.99,                   # Discount factor
    gae_lambda=0.95,              # Factor for trade-off of bias vs variance
    clip_range=0.1,               # Surrogate clipping coefficient
    clip_range_vf=None,           # Value function clipping (None for auto)
    normalize_advantage=True,     # Normalize advantages
    ent_coef=0.01,                # Entropy coefficient (encourages exploration)
    vf_coef=0.5,                  # Value function coefficient in loss
    max_grad_norm=0.5,            # Maximum gradient norm
    tensorboard_log="./ppo_superman_tensorboard/",  # Log for TensorBoard
    verbose=1                     # Print training progress
)

print("PPO model created successfully!")

In [None]:
# Train the model
print("Starting PPO training...")
# 1M steps
model.learn(total_timesteps=1000000)

print("PPO training completed!")

In [None]:
# Save the model
model.save("ppo_superman")
print("PPO model saved as 'ppo_superman'")