# DQN Multi-Seed Report

This notebook loads pre-trained models (one per seed) and runs evaluation and visualization.
No training is required â€” run lab005_dqn.ipynb first to generate the model files.

---

In [None]:
import os, sys

import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt

import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

import imageio
from IPython.display import Image, display

In [None]:
# Configuration

SEED_LIST = [42, 123, 3407]

SELECTED_ALGORITHM = "dqn"
ALGORITHM_CLASS = DQN

NOTEBOOK_DIR = os.path.dirname(os.path.abspath("__file__"))
MODELS_DIR = os.path.join(NOTEBOOK_DIR, "../../../models", SELECTED_ALGORITHM)
OUTPUT_DIR = os.path.join(NOTEBOOK_DIR, "outputs_" + SELECTED_ALGORITHM)
os.makedirs(OUTPUT_DIR, exist_ok=True)

GYMNASIUM_MODEL = "LunarLander-v3"

WIND_ENABLED = False

EVALUATION_EPISODES = 20

DEVICE = "cpu"

print(f"Algorithm: {SELECTED_ALGORITHM.upper()}")
print(f"Seeds: {SEED_LIST}")
print(f"Wind enabled: {WIND_ENABLED}")
print(f"Evaluation episodes per seed: {EVALUATION_EPISODES}")
print(f"Models directory: {MODELS_DIR}")

In [None]:
# Load all models and evaluate

evaluation_results = {}

for seed in SEED_LIST:
    print(f"Loading and evaluating model for seed {seed}...")

    load_path = os.path.join(MODELS_DIR, f"lab005_{SELECTED_ALGORITHM}_{seed}")

    def make_env(s=seed):
        env = gym.make(GYMNASIUM_MODEL, render_mode="rgb_array", enable_wind=WIND_ENABLED)
        env.reset(seed=s)
        return env

    model = ALGORITHM_CLASS.load(load_path, env=DummyVecEnv([make_env]), device=DEVICE)

    eval_env = Monitor(gym.make(GYMNASIUM_MODEL, enable_wind=WIND_ENABLED))
    eval_env.reset(seed=seed)

    rewards, _ = evaluate_policy(
        model,
        eval_env,
        n_eval_episodes=EVALUATION_EPISODES,
        deterministic=True,
        return_episode_rewards=True
    )

    evaluation_results[seed] = np.array(rewards)
    eval_env.close()

print(f"\nEvaluation complete for all {len(SEED_LIST)} seeds.")

In [None]:
# Evaluation Summary Table

rows = []
for seed in SEED_LIST:
    r = evaluation_results[seed]
    rows.append({
        "Seed": seed,
        "Mean Reward": f"{np.mean(r):.2f}",
        "Std Dev": f"{np.std(r):.2f}",
        "Min Reward": f"{np.min(r):.2f}",
        "Max Reward": f"{np.max(r):.2f}",
        "Success Rate": f"{(r >= 200).sum() / len(r) * 100:.1f}%"
    })

all_rewards = np.concatenate(list(evaluation_results.values()))
rows.append({
    "Seed": "Overall",
    "Mean Reward": f"{np.mean(all_rewards):.2f}",
    "Std Dev": f"{np.std(all_rewards):.2f}",
    "Min Reward": f"{np.min(all_rewards):.2f}",
    "Max Reward": f"{np.max(all_rewards):.2f}",
    "Success Rate": f"{(all_rewards >= 200).sum() / len(all_rewards) * 100:.1f}%"
})

df_summary = pd.DataFrame(rows)
print(f"*** {SELECTED_ALGORITHM.upper()} MULTI-SEED EVALUATION SUMMARY ***")
print(f"Episodes per seed: {EVALUATION_EPISODES}")
print(f"Total episodes: {len(all_rewards)}")
print()
print(df_summary.to_string(index=False))

In [None]:
# Per-Seed: Evaluation Convergence Plots

fig, axes = plt.subplots(1, len(SEED_LIST), figsize=(6 * len(SEED_LIST), 5), sharey=True)
if len(SEED_LIST) == 1:
    axes = [axes]

for ax, seed in zip(axes, SEED_LIST):
    rewards = evaluation_results[seed]
    episodes = np.arange(1, len(rewards) + 1)
    running_mean = np.cumsum(rewards) / episodes
    running_std = np.array([np.std(rewards[:i]) for i in episodes])

    ax.scatter(episodes, rewards, color='gray', alpha=0.4, s=20, label='Episode Reward')
    ax.plot(episodes, running_mean, color='blue', linewidth=2, label='Running Mean')
    ax.fill_between(episodes, running_mean - running_std, running_mean + running_std,
                    color='blue', alpha=0.15)
    ax.axhline(y=200, color='red', linestyle='--')
    ax.set_title(f"Seed {seed}")
    ax.set_xlabel("Episode")
    ax.grid(True, alpha=0.3)

axes[0].set_ylabel("Reward")
fig.suptitle(f"{SELECTED_ALGORITHM.upper()} Evaluation: {EVALUATION_EPISODES} Episodes per Seed", fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
# Aggregated: Evaluation Bar Chart (mean reward per seed with error bars)

colors = plt.cm.tab10.colors

means = [np.mean(evaluation_results[s]) for s in SEED_LIST]
stds = [np.std(evaluation_results[s]) for s in SEED_LIST]
labels = [str(s) for s in SEED_LIST]

plt.figure(figsize=(max(8, 3 * len(SEED_LIST)), 6))
bars = plt.bar(labels, means, yerr=stds, capsize=5, color=colors[:len(SEED_LIST)], alpha=0.8)
plt.axhline(y=200, color='red', linestyle='--', label='Solved Threshold (200)')
plt.axhline(y=np.mean(all_rewards), color='blue', linestyle='-', linewidth=2,
            label=f'Overall Mean ({np.mean(all_rewards):.1f})')

plt.title(f"{SELECTED_ALGORITHM.upper()} Mean Reward per Seed ({EVALUATION_EPISODES} episodes each)", fontsize=14)
plt.xlabel("Seed")
plt.ylabel("Mean Reward")
plt.legend()
plt.grid(True, alpha=0.3, axis='y')
plt.show()

In [None]:
# Reward Distribution Histograms (overlaid)

plt.figure(figsize=(12, 5))
for i, seed in enumerate(SEED_LIST):
    plt.hist(evaluation_results[seed], bins=10, alpha=0.5, color=colors[i],
             edgecolor='black', label=f"Seed {seed}")

plt.axvline(x=200, color='red', linestyle='--', label='Solved Threshold (200)')
plt.axvline(x=np.mean(all_rewards), color='blue', linestyle='-', linewidth=2,
            label=f'Overall Mean ({np.mean(all_rewards):.1f})')
plt.title('Reward Distribution across Seeds')
plt.xlabel('Reward')
plt.ylabel('Frequency')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# GIF Visualizations (one per seed)

for seed in SEED_LIST:
    print(f"Generating GIF for seed {seed}...")

    load_path = os.path.join(MODELS_DIR, f"lab005_{SELECTED_ALGORITHM}_{seed}")

    def make_vis_env(s=seed):
        env = gym.make(GYMNASIUM_MODEL, render_mode="rgb_array", enable_wind=WIND_ENABLED)
        env.reset(seed=s)
        return env

    vis_model = ALGORITHM_CLASS.load(load_path, env=DummyVecEnv([make_vis_env]), device=DEVICE)

    vis_env = gym.make(GYMNASIUM_MODEL, render_mode="rgb_array", enable_wind=WIND_ENABLED)
    frames = []
    obs, info = vis_env.reset(seed=seed)
    done = False

    while not done:
        action, _ = vis_model.predict(obs, deterministic=True)
        obs, reward, terminated, truncated, info = vis_env.step(action)
        done = terminated or truncated
        frames.append(vis_env.render())

    vis_env.close()

    gif_path = os.path.join(OUTPUT_DIR, f"{SELECTED_ALGORITHM}_seed{seed}.gif")
    imageio.mimsave(gif_path, frames, fps=30)
    print(f"  Saved: {gif_path}")
    display(Image(filename=gif_path))

---

## Additional analysis cells can go below