<a href="https://colab.research.google.com/github/reponseashimwe/dqn-atari/blob/main/FM3_Atari_Environments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. ⚙️ Installation & Imports
Run this cell first. It installs all dependencies and handles the necessary environment registration.

In [None]:
# 1. INSTALLATION, REGISTRATION, AND IMPORTS

# Install core DRL libraries
!pip install stable-baselines3[extra] gymnasium[atari] ale-py -q
# Install OpenCV and AutoROM (required dependencies for Atari wrappers)
!pip install opencv-python autorom[accept-rom-license] -q

import gymnasium as gym
import ale_py
import os
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.atari_wrappers import AtariWrapper
from collections import defaultdict
import gc
import json

# FIX: Explicitly register the ALE environments to avoid NamespaceNotFound
gym.register_envs(ale_py)

print("Setup complete. Running core imports.")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m187.2/187.2 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m434.7/434.7 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for AutoROM.accept-rom-license (pyproject.toml) ... [?25l[?25hdone
Setup complete. Running core imports.


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  return datetime.utcnow().replace(tzinfo=utc)


# 2. 📁 Drive Setup and Experiment Definitions
This cells handles mounting the drive and defines the five hyperparameter sets, including the one that achieved your best result (Set 1).

In [None]:
# Drive Mount
from google.colab import drive

# --- Drive Configuration ---
# Mount Drive first
drive.mount('/content/drive')

DRIVE_PATH = '/content/drive/MyDrive'
ASSIGNMENT_FOLDER = 'DQN_Breakout_Final_Submission'
BASE_DEST_PATH = f'{DRIVE_PATH}/{ASSIGNMENT_FOLDER}/Experiments'
os.makedirs(BASE_DEST_PATH, exist_ok=True)

Mounted at /content/drive


In [None]:
# --- Global DRL Configuration ---
ENV_ID = "BreakoutNoFrameskip-v4"
TOTAL_TIMESTEPS = 1_000_000
N_ENVS = 4
TEMP_MODEL_DIR = "./temp_models/"
os.makedirs(TEMP_MODEL_DIR, exist_ok=True)

RESULTS_FILE = f'{BASE_DEST_PATH}/experiment_results.json'

# Dictionary to store results for final comparison
results_comparison = defaultdict(lambda: {'avg_reward': 0, 'params': {}})

# --- Hyperparameter Definitions (5 Sets) ---
HYPER_SETS = {
    # Set 1: Baseline
    'Set_1_Best_Policy': {
        'lr': 1e-4, 'gamma': 0.99, 'batch_size': 32,
        'epsilon_start': 1.0, 'epsilon_end': 0.05, 'epsilon_decay': 0.1
    },
    # Set 2: High LR
    'Set_2_High_LR': {
        'lr': 5e-4, 'gamma': 0.99, 'batch_size': 32,
        'epsilon_start': 1.0, 'epsilon_end': 0.05, 'epsilon_decay': 0.1
    },
    # Set 3: Low Gamma
    'Set_3_Low_Gamma': {
        'lr': 1e-4, 'gamma': 0.90, 'batch_size': 32,
        'epsilon_start': 1.0, 'epsilon_end': 0.05, 'epsilon_decay': 0.1
    },
    # Set 4: Extended Exploration
    'Set_4_Extended_Eps': {
        'lr': 1e-4, 'gamma': 0.99, 'batch_size': 32,
        'epsilon_start': 1.0, 'epsilon_end': 0.05, 'epsilon_decay': 0.5
    },
    # Set 5: Larger Batch Size (New experiment for robust tuning)
    'Set_5_Large_Batch': {
        'lr': 1e-4, 'gamma': 0.99, 'batch_size': 128,
        'epsilon_start': 1.0, 'epsilon_end': 0.05, 'epsilon_decay': 0.1
    }
}
print("Experiment definitions complete. Starting training runs.")

Experiment definitions complete. Starting training runs.


#3. 🧪 Training, Evaluation, and Drive Saving Loop (Core Logic)
This cell defines the functions and runs all five experiments, saving each to a distinct Drive folder.

In [None]:
def setup_environment(env_id, n_envs):
    env = make_atari_env(env_id, n_envs=n_envs, seed=np.random.randint(1000))
    env = VecFrameStack(env, n_stack=4)
    return env

def evaluate_agent(model, env_id, n_episodes=5):
    """Runs evaluation episodes, PRINTS individual results, and returns average."""
    eval_env = setup_environment(env_id, n_envs=1)
    episode_rewards = []

    print(f"\nStarting evaluation ({n_episodes} episodes)...")

    for i in range(n_episodes):
        obs = eval_env.reset()
        done = False
        total_reward = 0
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done_array, info = eval_env.step(action)
            total_reward += reward[0]
            done = done_array[0]
        episode_rewards.append(total_reward)
        print(f"Episode {i+1} finished with reward: {total_reward:.2f}")

    eval_env.close()
    mean_reward = np.mean(episode_rewards)
    print(f"--> Average Reward over {n_episodes} episodes: {mean_reward:.2f}")
    return mean_reward

def save_result_to_drive(name, reward, model_path):
    """Reads the existing JSON on Drive, updates it, and saves it back."""
    data = {}
    # 1. Load existing data if file exists
    if os.path.exists(RESULTS_FILE):
        try:
            with open(RESULTS_FILE, 'r') as f:
                data = json.load(f)
        except:
            data = {} # Start fresh if file is corrupt

    reward_value = float(reward)

    # 2. Update with new result
    data[name] = {
        'avg_reward': reward_value,
        'model_path': model_path
    }

    # 3. Write back to Drive
    with open(RESULTS_FILE, 'w') as f:
        json.dump(data, f, indent=4)
    print(f"📝 Result saved to persistent log: {RESULTS_FILE}")

def run_single_experiment(set_name):
    """Runs ONE experiment based on the set name key."""
    hyperparams = HYPER_SETS[set_name]

    print(f"\n" + "="*60)
    print(f"🚀 STARTING EXPERIMENT: {set_name}")
    print(f"Parameters: {hyperparams}")
    print(f"="*60)

    # Paths
    LOG_DIR = os.path.join(TEMP_MODEL_DIR, f"{set_name}_logs")
    TEMP_MODEL_PATH = os.path.join(TEMP_MODEL_DIR, f"{set_name}.zip")
    DRIVE_DEST_FOLDER = os.path.join(BASE_DEST_PATH, set_name)
    os.makedirs(DRIVE_DEST_FOLDER, exist_ok=True)

    # Init Environment & Model
    env = setup_environment(ENV_ID, N_ENVS)

    # Callback
    checkpoint_callback = CheckpointCallback(
        save_freq=200000 // N_ENVS, save_path=LOG_DIR, name_prefix="dqn_chkpt"
    )

    model = DQN(
        "CnnPolicy", env,
        learning_rate=hyperparams['lr'], gamma=hyperparams['gamma'],
        batch_size=hyperparams['batch_size'],
        exploration_initial_eps=hyperparams['epsilon_start'],
        exploration_final_eps=hyperparams['epsilon_end'],
        exploration_fraction=hyperparams['epsilon_decay'],
        buffer_size=50000,
        train_freq=4,
        verbose=0,
        tensorboard_log=LOG_DIR
    )

    # Train
    print("⏳ Training started... (Wait for progress bar)")
    model.learn(total_timesteps=TOTAL_TIMESTEPS, callback=checkpoint_callback, progress_bar=True)
    model.save(TEMP_MODEL_PATH)

    # Evaluate
    print(f"\n📊 Evaluating {set_name}...")
    avg_reward = evaluate_agent(model, ENV_ID, n_episodes=5)

    # Save Artifacts to Drive
    print(f"\n💾 Copying artifacts to Drive Folder: {DRIVE_DEST_FOLDER}...")
    !cp -r "$LOG_DIR" "$DRIVE_DEST_FOLDER/"
    !cp "$TEMP_MODEL_PATH" "$DRIVE_DEST_FOLDER/"

    # --- AUTOMATION STEP: Save Score to JSON ---
    final_drive_model_path = os.path.join(DRIVE_DEST_FOLDER, f"{set_name}.zip")
    save_result_to_drive(set_name, avg_reward, final_drive_model_path)

    # Cleanup
    env.close()
    del model
    del env
    gc.collect()

#▶️ Execution Cells (Run One by One)

## Run Set 1 (Best Policy)

In [None]:
run_single_experiment('Set_1_Best_Policy')


🚀 STARTING EXPERIMENT: Set_1_Best_Policy
Parameters: {'lr': 0.0001, 'gamma': 0.99, 'batch_size': 32, 'epsilon_start': 1.0, 'epsilon_end': 0.05, 'epsilon_decay': 0.1}
⏳ Training started... (Wait for progress bar)


  return datetime.utcnow().replace(tzinfo=utc)


Output()


📊 Evaluating Set_1_Best_Policy...

Starting evaluation (5 episodes)...
Episode 1 finished with reward: 4.00
Episode 2 finished with reward: 5.00
Episode 3 finished with reward: 8.00
Episode 4 finished with reward: 2.00
Episode 5 finished with reward: 8.00
--> Average Reward over 5 episodes: 5.40

💾 Copying artifacts to Drive Folder: /content/drive/MyDrive/DQN_Breakout_Final_Submission/Experiments/Set_1_Best_Policy...
📝 Result saved to persistent log: /content/drive/MyDrive/DQN_Breakout_Final_Submission/Experiments/experiment_results.json


### Run Set 2 (High LR)

In [None]:
run_single_experiment('Set_2_High_LR')


📊 Evaluating Set_2_High_LR...

Starting evaluation (5 episodes)...
Episode 1 finished with reward: 0.00
Episode 2 finished with reward: 0.00
Episode 3 finished with reward: 0.00
Episode 4 finished with reward: 0.00
Episode 5 finished with reward: 0.00
--> Average Reward over 5 episodes: 0.00

💾 Copying artifacts to Drive Folder: /content/drive/MyDrive/DQN_Breakout_Final_Submission/Experiments/Set_2_High_LR...
📝 Result saved to persistent log: /content/drive/MyDrive/DQN_Breakout_Final_Submission/Experiments/experiment_results.json


### Run Set 3 (Low Gamma)

In [None]:
run_single_experiment('Set_3_Low_Gamma')


📊 Evaluating Set_3_Low_Gamma...

Starting evaluation (5 episodes)...
Episode 1 finished with reward: 3.00
Episode 2 finished with reward: 1.00
Episode 3 finished with reward: 1.00
Episode 4 finished with reward: 1.00
Episode 5 finished with reward: 1.00
--> Average Reward over 5 episodes: 1.40

💾 Copying artifacts to Drive Folder: /content/drive/MyDrive/DQN_Breakout_Final_Submission/Experiments/Set_3_Low_Gamma...
📝 Result saved to persistent log: /content/drive/MyDrive/DQN_Breakout_Final_Submission/Experiments/experiment_results.json


### Run Set 4 (Extended Exploration)

In [None]:
run_single_experiment('Set_4_Extended_Eps')


📊 Evaluating Set_4_Extended_Eps...

Starting evaluation (5 episodes)...
Episode 1 finished with reward: 6.00
Episode 2 finished with reward: 2.00
Episode 3 finished with reward: 5.00
Episode 4 finished with reward: 1.00
Episode 5 finished with reward: 1.00
--> Average Reward over 5 episodes: 3.00

💾 Copying artifacts to Drive Folder: /content/drive/MyDrive/DQN_Breakout_Final_Submission/Experiments/Set_4_Extended_Eps...
📝 Result saved to persistent log: /content/drive/MyDrive/DQN_Breakout_Final_Submission/Experiments/experiment_results.json


### Run Set 5 (Large Batch)

In [None]:
run_single_experiment('Set_5_Large_Batch')


📊 Evaluating Set_5_Large_Batch...

Starting evaluation (5 episodes)...
Episode 1 finished with reward: 4.00
Episode 2 finished with reward: 12.00
Episode 3 finished with reward: 13.00
Episode 4 finished with reward: 4.00
Episode 5 finished with reward: 2.00
--> Average Reward over 5 episodes: 7.00

💾 Copying artifacts to Drive Folder: /content/drive/MyDrive/DQN_Breakout_Final_Submission/Experiments/Set_5_Large_Batch...
📝 Result saved to persistent log: /content/drive/MyDrive/DQN_Breakout_Final_Submission/Experiments/experiment_results.json


#4. 🏆 Final Selection and Play Script Setup
This cell determines the best model path and sets up the final play.py environment to use it.

In [None]:
# 4. AUTOMATED FINAL EVALUATION & PLAY

import gymnasium as gym
import json
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from stable_baselines3.common.atari_wrappers import AtariWrapper

# --- 1. AUTOMATICALLY LOAD BEST MODEL ---
if not os.path.exists(RESULTS_FILE):
    print(f"❌ Error: Results file not found at {RESULTS_FILE}")
    print("Did you run the experiments successfully?")
else:
    with open(RESULTS_FILE, 'r') as f:
        data = json.load(f)

    if not data:
        print("❌ Error: Results file is empty.")
    else:
        # Find the key with the highest 'avg_reward'
        best_set_name = max(data, key=lambda k: data[k]['avg_reward'])
        best_reward = data[best_set_name]['avg_reward']
        drive_model_path = data[best_set_name]['model_path']

        print(f"\n" + "="*60)
        print(f"🏆 AUTOMATIC WINNER: {best_set_name}")
        print(f"⭐ Average Reward: {best_reward:.2f}")
        print(f"📂 Loading Model from: {drive_model_path}")
        print("="*60)

        # Copy to local for playing
        final_path = "dqn_final_best_model.zip"
        !cp "$drive_model_path" "$final_path"

        # --- 2. PLAY SCRIPT ---
        def make_env_render():
            env = gym.make("BreakoutNoFrameskip-v4", render_mode="rgb_array")
            env = AtariWrapper(env)
            return env

        def play_agent(model_path):
            print(f"\n🎥 Starting Agent Evaluation Video Generation...")
            try:
                model = DQN.load(model_path)
            except:
                print("Model file not found. Cannot play.")
                return

            eval_env = DummyVecEnv([make_env_render])
            eval_env = VecFrameStack(eval_env, n_stack=4)

            for i in range(3):
                obs = eval_env.reset()
                done = False
                total_reward = 0
                while not done:
                    action, _ = model.predict(obs, deterministic=True)
                    obs, reward, done_array, info = eval_env.step(action)
                    eval_env.render() # Needed for internal state updates
                    total_reward += reward[0]
                    done = done_array[0]
                print(f"Visual Episode {i+1} finished with reward: {total_reward:.2f}")
            eval_env.close()
            print("\n✅ READY!")

        play_agent(final_path)


🏆 AUTOMATIC WINNER: Set_5_Large_Batch
⭐ Average Reward: 7.00
📂 Loading Model from: /content/drive/MyDrive/DQN_Breakout_Final_Submission/Experiments/Set_5_Large_Batch/Set_5_Large_Batch.zip

🎥 Starting Agent Evaluation Video Generation...
Visual Episode 1 finished with reward: 3.00
Visual Episode 2 finished with reward: 13.00
Visual Episode 3 finished with reward: 5.00

✅ READY!
