In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
%cd /content/drive/MyDrive/MECE689_Superman
!ls -la

/content/drive/MyDrive/MECE689_Superman
total 20
drwx------ 2 root root 4096 Sep 25 04:10 checkpoints
drwx------ 2 root root 4096 Sep 25 04:10 code
drwx------ 2 root root 4096 Sep 25 04:10 logs
drwx------ 2 root root 4096 Sep 25 04:10 models
drwx------ 2 root root 4096 Sep 25 04:10 results


In [None]:
# !pip install stable-baselines3
# !pip install sb3_contrib
# !pip install "gymnasium==0.28.1"
# !pip install "gymnasium[atari,accept-rom-license]"
# !pip install ale-py

!pip install stable-baselines3 gymnasium[atari,accept-rom-license] ale-py

Collecting gymnasium<1.3.0,>=0.29.1 (from stable-baselines3)
  Using cached gymnasium-1.2.1-py3-none-any.whl.metadata (10.0 kB)
INFO: pip is looking at multiple versions of gymnasium[accept-rom-license,atari] to determine which version is compatible with other requirements. This could take a while.
Using cached gymnasium-1.2.1-py3-none-any.whl (951 kB)
Installing collected packages: gymnasium
  Attempting uninstall: gymnasium
    Found existing installation: gymnasium 0.28.1
    Uninstalling gymnasium-0.28.1:
      Successfully uninstalled gymnasium-0.28.1
Successfully installed gymnasium-1.2.1


In [None]:
import torch
import gymnasium as gym
import stable_baselines3
from ale_py import ALEInterface

print("GPU available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

print("All imports working")

GPU available: True
GPU: Tesla T4
All imports working


In [None]:
import gymnasium as gym
import ale_py
import numpy as np
from stable_baselines3 import DQN
import matplotlib.pyplot as plt
from collections import deque
import torch

# For debugging
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.callbacks import BaseCallback
import time

# Action masking
# from gymnasium import ActionWrapper

# Vector environment
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack, VecEnvWrapper

In [None]:
# # SIMPLE TEST
# env = gym.make("ALE/Superman-v5", render_mode=None)

# action_space = env.action_space
# obs_space = env.observation_space
# print("Action space:", action_space)
# print("Number of actions:", action_space.n)
# action_meanings = env.unwrapped.get_action_meanings()
# print("Action meanings:", action_meanings)
# print("\nObservation space:", obs_space)

# obs, _ = env.reset()
# print("Observation shape:", obs.shape)
# env.close()

# Action meanings: ['NOOP', 'FIRE', 'UP', 'RIGHT', 'LEFT', 'DOWN', 'UPRIGHT', 'UPLEFT', 'DOWNRIGHT', 'DOWNLEFT', 'UPFIRE', 'RIGHTFIRE', 'LEFTFIRE', 'DOWNFIRE', 'UPRIGHTFIRE', 'UPLEFTFIRE', 'DOWNRIGHTFIRE', 'DOWNLEFTFIRE']

# {0: 'NOOP', 1: 'FIRE', 2: 'UP', 3: 'RIGHT', 4: 'LEFT', 5: 'DOWN', 6: 'UPRIGHT', 7: 'UPLEFT', 8: 'DOWNRIGHT', 9: 'DOWNLEFT', 10: 'UPFIRE', 11: 'RIGHTFIRE', 12: 'LEFTFIRE', 13: 'DOWNFIRE', 14: 'UPRIGHTFIRE', 15: 'UPLEFTFIRE', 16: 'DOWNRIGHTFIRE', 17: 'DOWNLEFTFIRE'}

# # ACTIONS
# 0: NOOP
# 1: FIRE

# 2: UP
# 3: RIGHT
# 4: LEFT
# 5: DOWN
# 6: UPRIGHT
# 7: UPLEFT
# 8: DOWNRIGHT
# 9: DOWNLEFT

# 10: UPFIRE
# 11: RIGHTFIRE
# 12: LEFTFIRE
# 13: DOWNFIRE

# 14: UPRIGHTFIRE
# 15: UPLEFTFIRE
# 16: DOWNRIGHTFIRE
# 17: DOWNLEFTFIRE

In [None]:
def convert(seconds):
    seconds = seconds % (24 * 3600)
    hour = seconds // 3600
    seconds %= 3600
    minutes = seconds // 60
    seconds %= 60

    return "%d:%02d:%02d" % (hour, minutes, seconds)

In [None]:
class SupermanActionReducer(VecEnvWrapper):
    def __init__(self, venv):
        super().__init__(venv)

        # Basic movement only: Cardinal directions & diagonal directions
        self.allowed_actions = [2,3,4,5, 6,7,8,9]

        self.action_space = gym.spaces.Discrete(len(self.allowed_actions))

    def step(self, actions):
        # Map reduced actions back to original actions
        original_actions = np.array([self.allowed_actions[a] for a in actions])
        return self.venv.step(original_actions)

    def step_wait(self):
        # This method is required
        return self.venv.step_wait()

    def reset(self):
        return self.venv.reset()


CREATE ENVIRONMENT

In [None]:
# Use make_atari_env, then apply your vectorized wrapper
env = make_atari_env("ALE/Superman-v5", n_envs=4, seed=0)
# Action masking
env = SupermanActionReducer(env)
env = VecFrameStack(env, n_stack=4)

In [None]:
# # Confirm number of possible actions is smaller now
# print(f"Original action space: {make_atari_env('ALE/Superman-v5', n_envs=1).action_space}")
# print(f"Reduced action space: {env.action_space}")

Original action space: Discrete(18)
Reduced action space: Discrete(8)


In [None]:
# Policy: I tell it to use a CNN
# env: Pass in the environment
# Learing rat: Alpha = 0.0001
model = DQN(
    policy="CnnPolicy",
    env=env,
    learning_rate= 0.0001,
    buffer_size=100000,
    batch_size=32,
    target_update_interval=1000,
    verbose=1,
    device="cuda"
)

print("DQN model created successfully!")

Using cuda device
Wrapping the env in a VecTransposeImage.
DQN model created successfully!


In [None]:
# Save every 500K steps so you never lose more than 30 minutes
checkpoint_callback = CheckpointCallback(
    save_freq=500000,
    save_path="/content/drive/MyDrive/MECE689_Superman/checkpoints",
    name_prefix="dqn_baseline_10M"
)

total_timesteps = 10000000    # 10M
# total_timesteps =  2000000    # 2M
# total_timesteps =  1000000    # 1M
# total_timesteps =   100000    # 100K
# total_timesteps =    10000    # 10K
# total_timesteps =     5000    # 5K

# Time how long it takes
print("Training phase started.")
start_time = time.time()
# model.learn(total_timesteps=total_timesteps)
model.learn(total_timesteps=total_timesteps, callback=checkpoint_callback)

end_time = time.time()
print("Training phase completed!")

# Calculate run time
training_duration = end_time - start_time
time_in_minutes_and_seconds = convert(training_duration)
print(f"Time taken: {time_in_minutes_and_seconds}")
print(f"Speed: {total_timesteps/training_duration:.2f} steps/second")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
|    n_updates        | 62384    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.7e+04  |
|    ep_rew_mean      | 0        |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 152      |
|    fps              | 348      |
|    time_elapsed     | 2943     |
|    total_timesteps  | 1025220  |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.98e-09 |
|    n_updates        | 64070    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.7e+04  |
|    ep_rew_mean      | 0        |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 156      |
|    fps              | 347      |
|    time_elapsed     | 3024     |
|    total_timesteps  | 1

In [None]:
env.close()
print("Environment closed.")

Environment closed.


In [None]:
# Save model to Google Drive
model.save("/content/drive/MyDrive/MECE689_Superman/models/dqn_baseline_10M")
print("Model saved to Google Drive")

Model saved to Google Drive


[Errno 2] No such file or directory: '/content/MECE689_Superman'
/content
total 16
drwxr-xr-x 1 root root 4096 Sep 23 13:39 .
drwxr-xr-x 1 root root 4096 Sep 25 14:20 ..
drwxr-xr-x 4 root root 4096 Sep 23 13:39 .config
drwxr-xr-x 1 root root 4096 Sep 23 13:39 sample_data
