In [3]:
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

import numpy as np


In [4]:
# Create the environment again
env = gym.make("ALE/Superman-v5", render_mode=None)


In [5]:
# Create a NEW DQN model with the same architecture
model = DQN("CnnPolicy", env, buffer_size=20000)




## Load the saved policy file

In [6]:
# Load the trained weights directly into the policy network
model.policy = model.policy.load("dqn_superman_policy")


## Evaluate Policy

In [None]:
# Evaluate the trained model over 10 episodes
# Run the agent through 10 complete games & calculate the average score. 
# deterministic=True means it to always choose the best action it knows (instead of exploring randomly)

# mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10, deterministic=True)
# print("Done evaluating!")

In [7]:
all_rewards = []
all_lengths = []

print("Running evaluation of trained DQN agent")
for episode in range(10):
    obs, _ = env.reset()
    done = False
    total_reward = 0
    steps = 0
    
    while not done and steps < 5000:  # Add a step limit to prevent infinite episodes
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        total_reward += reward
        steps += 1
        done = done or truncated
    
    all_rewards.append(total_reward)
    all_lengths.append(steps)
    print(f"Episode {episode+1}: Reward = {total_reward:6.1f}, Steps = {steps}")

env.close()

Running evaluation of trained DQN agent...
Episode 1: Reward =    0.0, Steps = 5000
Episode 2: Reward =    0.0, Steps = 5000
Episode 3: Reward =    0.0, Steps = 5000
Episode 4: Reward =    0.0, Steps = 5000
Episode 5: Reward =    0.0, Steps = 5000
Episode 6: Reward =    0.0, Steps = 5000
Episode 7: Reward =    0.0, Steps = 5000
Episode 8: Reward =    0.0, Steps = 5000
Episode 9: Reward =    0.0, Steps = 5000
Episode 10: Reward =    0.0, Steps = 5000


In [8]:
# Calculate comprehensive statistics
rewards_array = np.array(all_rewards)
lengths_array = np.array(all_lengths)

print("DQN PERFORMANCE REPORT:")
print(f"Mean reward: {np.mean(rewards_array):.2f}")
print(f"Median reward: {np.median(rewards_array):.2f}")
print(f"Min reward: {np.min(rewards_array):.2f}") 
print(f"Max reward: {np.max(rewards_array):.2f}")
print(f"Standard deviation: {np.std(rewards_array):.2f}")
print(f"Success rate (reward > 0): {np.mean(rewards_array > 0) * 100:.1f}%")
print(f"Average episode length: {np.mean(lengths_array):.1f} steps")

DQN PERFORMANCE REPORT:
Mean reward: 0.00
Median reward: 0.00
Min reward: 0.00
Max reward: 0.00
Standard deviation: 0.00
Success rate (reward > 0): 0.0%
Average episode length: 5000.0 steps


In [10]:
# print(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}")

# If mean_reward > 0: agent has learned something!
# This is bc agent usually gets 0 reward in Superman
# This will be the baseline performance...