In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari/code

/content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari/code


In [3]:
!pip install gymnasium[atari,accept-rom-license] ale-py sb3_contrib stable-baselines3

Collecting sb3_contrib
  Downloading sb3_contrib-2.7.0-py3-none-any.whl.metadata (4.1 kB)
Collecting stable-baselines3
  Downloading stable_baselines3-2.7.0-py3-none-any.whl.metadata (4.8 kB)
Downloading sb3_contrib-2.7.0-py3-none-any.whl (93 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.2/93.2 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading stable_baselines3-2.7.0-py3-none-any.whl (187 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m187.2/187.2 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: stable-baselines3, sb3_contrib
Successfully installed sb3_contrib-2.7.0 stable-baselines3-2.7.0


In [68]:
import os
import torch
import gymnasium as gym
import stable_baselines3
import ale_py
import numpy as np

# RL Algorithm
from stable_baselines3 import A2C


# Visualization
from PIL import Image
import io
import base64
from IPython.display import display, HTML



# For debugging
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.callbacks import BaseCallback
import time

# Action masking
from gymnasium import ActionWrapper
from stable_baselines3.common.atari_wrappers import AtariWrapper

# Vector environment
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack, VecEnvWrapper, DummyVecEnv, VecTransposeImage
from stable_baselines3.common.env_util import make_vec_env

import gc
import random

from IPython import display
import matplotlib.pyplot as plt

print("All imports working")

All imports working


In [5]:
def convert(seconds):
    seconds = seconds % (24 * 3600)
    hour = seconds // 3600
    seconds %= 3600
    minutes = seconds // 60
    seconds %= 60

    return "%d:%02d:%02d" % (hour, minutes, seconds)

In [11]:
seed = 316
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
game_name = "ALE/Bowling-v5"

# make_atari_env internally uses make_vec_env
# wrapper_kwargs passes clip_reward to the AtariWrapper
env = make_atari_env(
    game_name,
    n_envs=4,         # Creates parallel envs that run simultaneously
    seed=seed,
    wrapper_kwargs=dict(clip_reward=False)
)

# n_stack gives 4 consecutive frames as input for each env
env = VecFrameStack(env, n_stack=4)

In [12]:
def train_a2c_model(config_name, total_timesteps, learning_rate, gamma, n_steps, ent_coef):
    print(f"Testing {config_name}...")

    model = A2C(
        "CnnPolicy",
        env,

        learning_rate=learning_rate,
        gamma=gamma,

        n_steps=n_steps,
        ent_coef=ent_coef,

        gae_lambda=0.99,
        vf_coef=0.5,
        max_grad_norm=0.5,
        use_rms_prop=True,
        verbose=0,
        device="cuda"
    )

    # Train for a short time to compare
    model.learn(total_timesteps=total_timesteps, progress_bar=True)

    return model

In [31]:
configs = {}
config_id = 0

# Define the ranges and step sizes
learning_rates = [0.0001, 0.0005, 0.001]
gammas = [0.99, 0.995, 0.999]
n_steps_list = [5, 10, 15, 30, 40, 50]
ent_coefs = [0.01, 0.05, 0.1]

# Generate all combinations
for lr in learning_rates:
    for gamma in gammas:
        for n_steps in n_steps_list:
            for ent_coef in ent_coefs:
                configs[config_id] = (lr, gamma, n_steps, ent_coef)
                config_id += 1

print(f"Generated {len(configs)} configurations")
# for config in configs:
  # print(configs[config])

Generated 162 configurations


In [15]:
# print(configs)
config_1 = configs[0]
config_2 = configs[10]
config_3 = configs[14]
config_4 = configs[24]
config_5 = configs[67]

print(config_1)
print(config_2)
print(config_3)
print(config_4)
print(config_5)

(0.0001, 0.99, 5, 0.01)
(0.0001, 0.99, 30, 0.05)
(0.0001, 0.99, 40, 0.1)
(0.0001, 0.995, 15, 0.01)
(0.0005, 0.99, 40, 0.05)


In [16]:
# total_timesteps = 10000000    # 10M
total_timesteps =   50000    # 50K
# total_timesteps =    10000    # 10K
# total_timesteps =     5000    # 5K

best_reward = -float('inf')
best_config = None

# for name, params in configs.items():
    # train_and_eval_model(name, total_timesteps, params)

name = "config_1"
model = train_a2c_model(name, total_timesteps, *config_1)
print("Model done training")

Testing config_1...


Output()

Model done training


In [17]:
# Save model
model_name = f"a2c_simple_test_{total_timesteps}"
trained_model_save_path = f"/content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari/models/{model_name}"
model.save(trained_model_save_path)
print("Model saved to Google Drive")

Model saved to Google Drive


In [24]:
# Load model
# /content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari

# Load model
model = A2C.load(
    f"/content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari/models/{model_name}",
    device="cuda"
)

print("Model loaded")

Model loaded


In [88]:
def evaluate_model(model, n_eval_episodes):
  """Evaluate model and return mean reward"""

  # Create an environment to do evaluations in
  eval_env = make_atari_env(
      game_name,
      n_envs=1,
      seed=seed,
      wrapper_kwargs=dict(clip_reward=False)
  )
  eval_env = VecFrameStack(eval_env, n_stack=4)

  action_dict = {
    0: "NOOP",
    1: "FIRE",
    2: "UP",
    3: "DOWN",
    4: "UPFIRE",
    5: "DOWNFIRE"
  }
  rewards = []
  for episode in range(n_eval_episodes):
    obs = eval_env.reset()
    episode_reward = 0
    done = [False]
    steps = 0

    while not done[0]:
      action, _ = model.predict(obs, deterministic=True)
      obs, reward, done, _ = eval_env.step(action)
      episode_reward += reward[0]
      steps += 1
      # print(done)
      # Actions: NOOP(0), FIRE(1), UP(2), DOWN(3), UPFIRE(4), DOWNFIRE(5)
      # print(f"Reward earned for doing {action_dict[action[0]]}: {reward[0]}")

    print(f"Episode {episode+1}: Reward = {episode_reward:6.1f}, Steps = {steps}")
    rewards.append(episode_reward)

  return np.mean(rewards)

In [89]:
# print("Model observation space:", model.observation_space)
# print("Model policy:", model.policy)

In [90]:
# eval_env = make_atari_env(game_name, n_envs=1, seed=seed, wrapper_kwargs=dict(clip_reward=False))
# eval_env = VecFrameStack(eval_env, n_stack=4)
# eval_env = VecTransposeImage(eval_env)
# print("Eval env observation space:", eval_env.observation_space)

# # Also check the actual observation shape
# obs = eval_env.reset()
# print("Actual obs shape:", obs.shape)

In [91]:
# n_eval_episodes = 10
n_eval_episodes = 30
# n_eval_episodes = 100

reward = evaluate_model(model, n_eval_episodes)
print(f"{name}: {reward:.4f} mean reward")

0.0
Episode 1: Reward =    0.0, Steps = 499
3.0
Episode 2: Reward =    3.0, Steps = 498
3.0
Episode 3: Reward =    3.0, Steps = 498
3.0
Episode 4: Reward =    3.0, Steps = 498
3.0
Episode 5: Reward =    3.0, Steps = 498
3.0
Episode 6: Reward =    3.0, Steps = 498
3.0
Episode 7: Reward =    3.0, Steps = 498
3.0
Episode 8: Reward =    3.0, Steps = 498
3.0
Episode 9: Reward =    3.0, Steps = 498
3.0
Episode 10: Reward =    3.0, Steps = 498
3.0
Episode 11: Reward =    3.0, Steps = 498
3.0
Episode 12: Reward =    3.0, Steps = 498
3.0
Episode 13: Reward =    3.0, Steps = 498
3.0
Episode 14: Reward =    3.0, Steps = 498
3.0
Episode 15: Reward =    3.0, Steps = 498
3.0
Episode 16: Reward =    3.0, Steps = 498
3.0
Episode 17: Reward =    3.0, Steps = 498
3.0
Episode 18: Reward =    3.0, Steps = 498
3.0
Episode 19: Reward =    3.0, Steps = 498
3.0
Episode 20: Reward =    3.0, Steps = 498
3.0
Episode 21: Reward =    3.0, Steps = 498
3.0
Episode 22: Reward =    3.0, Steps = 498
3.0
Episode 23: Rew

In [94]:
!pip install moviepy

!apt-get install -y xvfb ffmpeg
# !pip install gymnasium[atari, accept-rom-license] ale-py stable-baselines3==2.3.2 moviepy


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.15).
0 upgraded, 0 newly installed, 0 to remove and 38 not upgraded.


In [99]:
import moviepy.editor as mpy
from IPython.display import HTML
from base64 import b64encode

In [108]:
# rgb_array = off-screen frames
video_env = make_atari_env(
    game_name,
    n_envs=1,
    seed=seed,
    wrapper_kwargs=dict(clip_reward=False)
)
video_env = VecFrameStack(video_env, n_stack=4)


obs = video_env.reset()
frames = []

for _ in range(1000):
    # Predict next action
    action, _ = model.predict(obs, deterministic=True)
    obs, rewards, dones, infos = video_env.step(action)

    # Get a frame from the first environment inside the VecEnv
    frame = video_env.envs[0].render()
    frames.append(frame)

    if dones[0]:
      # Note: This check might not work as expected with a vectorized env (dones is an array)
      obs = video_env.reset()

video_env.close()

# Save to mp4
video_name = "a2c_bowling_simple_test_2.mp4"
video_save_path = f"/content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari/videos/{video_name}"
clip = mpy.ImageSequenceClip(frames, fps=30)
clip.write_videofile(video_save_path)

t:   0%|          | 2/1001 [02:46<23:05:14, 83.20s/it, now=None]

Moviepy - Building video /content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari/videos/a2c_bowling_simple_test_2.mp4.
Moviepy - Writing video /content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari/videos/a2c_bowling_simple_test_2.mp4




t:   0%|          | 0/1001 [00:00<?, ?it/s, now=None][A
t:  13%|█▎        | 127/1001 [00:00<00:00, 1251.28it/s, now=None][A
t:  25%|██▌       | 253/1001 [00:00<00:00, 1081.68it/s, now=None][A
t:  36%|███▋      | 363/1001 [00:00<00:00, 1060.86it/s, now=None][A
t:  47%|████▋     | 470/1001 [00:00<00:00, 971.77it/s, now=None] [A
t:  57%|█████▋    | 569/1001 [00:00<00:00, 839.00it/s, now=None][A
t:  66%|██████▌   | 656/1001 [00:00<00:00, 758.60it/s, now=None][A
t:  74%|███████▍  | 741/1001 [00:00<00:00, 778.90it/s, now=None][A
t:  82%|████████▏ | 821/1001 [00:00<00:00, 736.32it/s, now=None][A
t:  90%|████████▉ | 896/1001 [00:01<00:00, 698.23it/s, now=None][A
t:  97%|█████████▋| 967/1001 [00:01<00:00, 683.21it/s, now=None][A
t:   0%|          | 2/1001 [02:47<23:16:17, 83.86s/it, now=None]

Moviepy - Done !
Moviepy - video ready /content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari/videos/a2c_bowling_simple_test_2.mp4


In [109]:
# Display the video inline as part of Google Colab
mp4 = open(video_save_path, "rb").read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML(f'<video width=480 controls><source src="{data_url}" type="video/mp4"></video>')