Evaluate Baseline DQN

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari
!ls -la

/content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari
total 28
drwx------ 2 root root 4096 Sep 25 15:17 checkpoints
drwx------ 2 root root 4096 Sep 25 15:17 code
drwx------ 2 root root 4096 Sep 25 15:04 .git
-rw------- 1 root root 6686 Sep 26 22:56 github_terminal.ipynb
-rw------- 1 root root   33 Sep 26 19:25 .gitignore
drwx------ 2 root root 4096 Sep 25 15:17 models
-rw------- 1 root root   26 Sep 25 15:04 README.md
drwx------ 2 root root 4096 Sep 25 15:17 results


In [3]:
!pip install stable-baselines3 gymnasium[atari,accept-rom-license] ale-py

Collecting stable-baselines3
  Downloading stable_baselines3-2.7.0-py3-none-any.whl.metadata (4.8 kB)
Downloading stable_baselines3-2.7.0-py3-none-any.whl (187 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m187.2/187.2 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: stable-baselines3
Successfully installed stable-baselines3-2.7.0


In [4]:
from stable_baselines3.common.evaluation import evaluate_policy

import os
import torch
import gymnasium as gym
import stable_baselines3
import ale_py
import numpy as np
from stable_baselines3 import DQN


# Visualization
from PIL import Image
import io
import base64
from IPython.display import display, HTML



# For debugging
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.callbacks import BaseCallback
import time

# Action masking
from gymnasium import ActionWrapper
from stable_baselines3.common.atari_wrappers import AtariWrapper

# Vector environment
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack, VecEnvWrapper, DummyVecEnv


import gc

print("All imports working")


All imports working


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  return datetime.utcnow().replace(tzinfo=utc)


In [5]:
print("GPU available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

GPU available: True
GPU: Tesla T4


In [6]:
def convert(seconds):
    seconds = seconds % (24 * 3600)
    hour = seconds // 3600
    seconds %= 3600
    minutes = seconds // 60
    seconds %= 60

    return "%d:%02d:%02d" % (hour, minutes, seconds)

Create Environment

In [7]:
class ActionReducer(ActionWrapper):
  def __init__(self, env):
    super().__init__(env)

    # NOOP, FIRE, UP, and DOWN only. No UPFIRE. No DOWNFIRE.
    self.allowed_actions = [0,1,2,3]

    self.action_space = gym.spaces.Discrete(len(self.allowed_actions))

  def action(self, action):
    return self.allowed_actions[action]

In [8]:
def make_env():
  # DQN only supports single environments (not vectorized)
  env = gym.make("ALE/Bowling-v5")
  env = ActionReducer(env)
  env = Monitor(env)
  # disable reward clipping
  env = AtariWrapper(env, clip_reward=False)
  return env

In [9]:
seed = 316
torch.manual_seed(seed)

env = DummyVecEnv([make_env])
env = VecFrameStack(env, n_stack=4)


Load Model

In [10]:
# Current working directory:
# /content/drive/MyDrive/MECE689_Bowling/MECE689_RL_Bowling_Atari

# Load model
model_name = "dqn_baseline_10000000"

model = DQN.load(
    f"models/{model_name}",
    env=env,
    device="cuda"
)

print("Model loaded")

Wrapping the env in a VecTransposeImage.
Model loaded


  return datetime.utcnow().replace(tzinfo=utc)


Evaluate Model

In [11]:
all_rewards = []
all_lengths = []
total_episodes = 10

print("Running evaluation of trained DQN agent")
for episode in range(total_episodes):
    obs = env.reset()
    done = False
    total_reward = 0
    steps = 0

    # Time how long it takes
    print(f"Episode {episode+1} training started")
    start_time = time.time()
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)

        total_reward += reward[0]
        steps += 1
    # Episode is DONE
    end_time = time.time()
    print(f"Episode {episode+1} training done")

    # Calculate run time
    training_duration = end_time - start_time
    time_in_minutes_and_seconds = convert(training_duration)
    print(f"Time taken: {time_in_minutes_and_seconds}")

    # Record some values
    all_rewards.append(total_reward)
    all_lengths.append(steps)

    # print(type(total_reward))
    # print(total_reward)
    print(f"Episode {episode+1}: Reward = {total_reward:6.1f}, Steps = {steps}\n")

# NOTE: this is faster than training bc no back prop is done, so no gradients need to be calculated

Running evaluation of trained DQN agent
Episode 1 training started
Episode 1 training done
Time taken: 0:00:03
Episode 1: Reward =   75.0, Steps = 540

Episode 2 training started
Episode 2 training done
Time taken: 0:00:02
Episode 2: Reward =   75.0, Steps = 536

Episode 3 training started
Episode 3 training done
Time taken: 0:00:01
Episode 3: Reward =   78.0, Steps = 546

Episode 4 training started
Episode 4 training done
Time taken: 0:00:01
Episode 4: Reward =   78.0, Steps = 546

Episode 5 training started
Episode 5 training done
Time taken: 0:00:01
Episode 5: Reward =   75.0, Steps = 538

Episode 6 training started
Episode 6 training done
Time taken: 0:00:01
Episode 6: Reward =   72.0, Steps = 536

Episode 7 training started
Episode 7 training done
Time taken: 0:00:01
Episode 7: Reward =   75.0, Steps = 535

Episode 8 training started
Episode 8 training done
Time taken: 0:00:02
Episode 8: Reward =   73.0, Steps = 538

Episode 9 training started
Episode 9 training done
Time taken: 0

In [12]:
env.close()

Evaluate Performance

In [13]:
mean_reward = sum(all_rewards) / total_episodes

print(f"Average reward over {total_episodes} episodes: {mean_reward}")

Average reward over 10 episodes: 75.30000305175781


In [15]:
# Calculate comprehensive statistics
rewards_array = np.array(all_rewards)
lengths_array = np.array(all_lengths)

print("DQN PERFORMANCE REPORT:")
print(f"Mean reward: {np.mean(rewards_array):.2f}")
print(f"Median reward: {np.median(rewards_array):.2f}")
print(f"Min reward: {np.min(rewards_array):.2f}")
print(f"Max reward: {np.max(rewards_array):.2f}")
print(f"Standard deviation: {np.std(rewards_array):.2f}")
print(f"Average episode length: {np.mean(lengths_array):.1f} steps")

DQN PERFORMANCE REPORT:
Mean reward: 75.30
Median reward: 75.00
Min reward: 72.00
Max reward: 78.00
Standard deviation: 1.85
Average episode length: 539.6 steps


In [20]:
# Calculate Human World Record Normalized Score (HWRNS)
# Max score is 300 via Perfect Game aka 12 strikes in a row

# Divide score by 300 to normalize it to Human World Record

HWRN_score = (mean_reward/300) * 100
print(f"Human World Record Normalized Score: {(HWRN_score):.2f}%")

Human World Record Normalized Score: 25.10%
