# Prince's DQN Atari Experiments

This notebook runs 10 unique Deep Q-Learning experiments on different Atari games.

## Installing Dependencies

In [None]:
print("Installing required packages...")
!pip uninstall numpy -y -q

print("Installing compatible numpy...")
!pip install "numpy>=1.26.0,<2.1" --force-reinstall -q

print("\nVerifying installations...")
import gymnasium as gym
import ale_py
from stable_baselines3 import DQN
import torch
import numpy

print(f"Gymnasium version: {gym.__version__}")
print(f"ALE-Py version: {ale_py.__version__}")
print(f"PyTorch version: {torch.__version__}")
print(f"Numpy version: {numpy.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

gym.register_envs(ale_py)

## Create Training Script

In [None]:
import os
os.makedirs('models', exist_ok=True)
os.makedirs('logs', exist_ok=True)

train_script = '''import argparse
import gymnasium as gym
import ale_py
from stable_baselines3 import DQN
from stable_baselines3.common.atari_wrappers import AtariWrapper
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback
import os

gym.register_envs(ale_py)

def create_atari_env(env_name):
    def _init():
        env = gym.make(env_name, render_mode=None)
        env = AtariWrapper(env)
        return env
    return _init

def train_dqn(env_name, total_timesteps, learning_rate, gamma, batch_size, 
              exploration_initial_eps, exploration_final_eps, exploration_fraction, 
              experiment_name):
    
    print(f"Starting experiment: {experiment_name}")
    print(f"Environment: {env_name}")
    print(f"Timesteps: {total_timesteps:,}")
    print(f"Hyperparameters:")
    print(f"  LR: {learning_rate}, Gamma: {gamma}, Batch: {batch_size}")
    print(f"  Exploration: {exploration_initial_eps} -> {exploration_final_eps}")
    
    env = DummyVecEnv([create_atari_env(env_name)])
    env = VecFrameStack(env, n_stack=4)
    
    log_dir = f"logs/{experiment_name}"
    os.makedirs(log_dir, exist_ok=True)
    
    # Adjust parameters for short runs
    buffer_size = min(50000, total_timesteps)
    learning_starts = min(1000, total_timesteps // 10)
    checkpoint_freq = max(5000, total_timesteps // 2)
    
    model = DQN(
        "CnnPolicy",
        env,
        learning_rate=learning_rate,
        gamma=gamma,
        batch_size=batch_size,
        buffer_size=buffer_size,
        learning_starts=learning_starts,
        target_update_interval=500,
        exploration_initial_eps=exploration_initial_eps,
        exploration_final_eps=exploration_final_eps,
        exploration_fraction=exploration_fraction,
        train_freq=4,
        gradient_steps=1,
        verbose=1,
        tensorboard_log=log_dir
    )
    
    checkpoint_callback = CheckpointCallback(
        save_freq=checkpoint_freq,
        save_path=f"models/{experiment_name}",
        name_prefix="dqn"
    )
    
    model.learn(
        total_timesteps=total_timesteps,
        callback=[checkpoint_callback],
        progress_bar=True
    )
    
    model_path = f"models/{experiment_name}_final.zip"
    model.save(model_path)
    print(f"Model saved to {model_path}")
    
    env.close()
    return model_path

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--env", type=str, required=True)
    parser.add_argument("--timesteps", type=int, default=10000)
    parser.add_argument("--lr", type=float, default=0.0001)
    parser.add_argument("--gamma", type=float, default=0.99)
    parser.add_argument("--batch-size", type=int, default=32)
    parser.add_argument("--eps-start", type=float, default=1.0)
    parser.add_argument("--eps-end", type=float, default=0.05)
    parser.add_argument("--exp-fraction", type=float, default=0.1)
    parser.add_argument("--experiment", type=str, required=True)
    
    args = parser.parse_args()
    
    train_dqn(
        env_name=args.env,
        total_timesteps=args.timesteps,
        learning_rate=args.lr,
        gamma=args.gamma,
        batch_size=args.batch_size,
        exploration_initial_eps=args.eps_start,
        exploration_final_eps=args.eps_end,
        exploration_fraction=args.exp_fraction,
        experiment_name=args.experiment
    )
'''

with open('train.py', 'w') as f:
    f.write(train_script)

print("Training script created successfully")

## Step 3: Define Experiments

Currently set to 10,000 timesteps for quick testing.


In [None]:
# Change this to 1000000 for full training
TIMESTEPS = 10000

EXPERIMENTS = [
    {
        "name": "prince_exp_1_seaquest_baseline",
        "env": "ALE/Seaquest-v5",
        "timesteps": TIMESTEPS,
        "learning_rate": 0.0001,
        "gamma": 0.99,
        "batch_size": 32,
        "exploration_initial_eps": 1.0,
        "exploration_final_eps": 0.05,
        "exploration_fraction": 0.1,
        "description": "Baseline with standard hyperparameters"
    },
    {
        "name": "prince_exp_2_asterix_high_lr",
        "env": "ALE/Asterix-v5",
        "timesteps": TIMESTEPS,
        "learning_rate": 0.0005,
        "gamma": 0.99,
        "batch_size": 32,
        "exploration_initial_eps": 1.0,
        "exploration_final_eps": 0.05,
        "exploration_fraction": 0.1,
        "description": "High learning rate test"
    },
    {
        "name": "prince_exp_3_boxing_low_gamma",
        "env": "ALE/Boxing-v5",
        "timesteps": TIMESTEPS,
        "learning_rate": 0.0001,
        "gamma": 0.95,
        "batch_size": 32,
        "exploration_initial_eps": 1.0,
        "exploration_final_eps": 0.05,
        "exploration_fraction": 0.1,
        "description": "Low gamma for short-term rewards"
    },
    {
        "name": "prince_exp_4_krull_large_batch",
        "env": "ALE/Krull-v5",
        "timesteps": TIMESTEPS,
        "learning_rate": 0.0001,
        "gamma": 0.99,
        "batch_size": 128,
        "exploration_initial_eps": 1.0,
        "exploration_final_eps": 0.05,
        "exploration_fraction": 0.1,
        "description": "Large batch size for stability"
    },
    {
        "name": "prince_exp_5_riverraid_extended_exploration",
        "env": "ALE/Riverraid-v5",
        "timesteps": TIMESTEPS,
        "learning_rate": 0.0001,
        "gamma": 0.99,
        "batch_size": 32,
        "exploration_initial_eps": 1.0,
        "exploration_final_eps": 0.05,
        "exploration_fraction": 0.3,
        "description": "Extended exploration phase"
    },
    {
        "name": "prince_exp_6_qbert_high_gamma",
        "env": "ALE/Qbert-v5",
        "timesteps": TIMESTEPS,
        "learning_rate": 0.0001,
        "gamma": 0.995,
        "batch_size": 32,
        "exploration_initial_eps": 1.0,
        "exploration_final_eps": 0.05,
        "exploration_fraction": 0.1,
        "description": "High gamma for long-term planning"
    },
    {
        "name": "prince_exp_7_mspacman_combined",
        "env": "ALE/MsPacman-v5",
        "timesteps": TIMESTEPS,
        "learning_rate": 0.0003,
        "gamma": 0.98,
        "batch_size": 64,
        "exploration_initial_eps": 1.0,
        "exploration_final_eps": 0.05,
        "exploration_fraction": 0.2,
        "description": "Balanced hyperparameters"
    },
    {
        "name": "prince_exp_8_zaxxon_very_large_batch",
        "env": "ALE/Zaxxon-v5",
        "timesteps": TIMESTEPS,
        "learning_rate": 0.0001,
        "gamma": 0.99,
        "batch_size": 256,
        "exploration_initial_eps": 1.0,
        "exploration_final_eps": 0.05,
        "exploration_fraction": 0.1,
        "description": "Very large batch size"
    },
    {
        "name": "prince_exp_9_battlezone_slow_exploration",
        "env": "ALE/BattleZone-v5",
        "timesteps": TIMESTEPS,
        "learning_rate": 0.0001,
        "gamma": 0.99,
        "batch_size": 32,
        "exploration_initial_eps": 1.0,
        "exploration_final_eps": 0.05,
        "exploration_fraction": 0.5,
        "description": "Very slow exploration decay"
    },
    {
        "name": "prince_exp_10_frostbite_aggressive",
        "env": "ALE/Frostbite-v5",
        "timesteps": TIMESTEPS,
        "learning_rate": 0.0005,
        "gamma": 0.98,
        "batch_size": 128,
        "exploration_initial_eps": 1.0,
        "exploration_final_eps": 0.02,
        "exploration_fraction": 0.15,
        "description": "Aggressive hyperparameters"
    }
]

mode_text = "TEST MODE (10K timesteps)" if TIMESTEPS == 10000 else "FULL MODE (1M timesteps)"
estimated_time = "10-20 minutes" if TIMESTEPS == 10000 else "8-12 hours"

print(f"Mode: {mode_text}")
print(f"Timesteps per experiment: {TIMESTEPS:,}")
print(f"Estimated total time: {estimated_time}")
print(f"\nDefined {len(EXPERIMENTS)} experiments:")
for i, exp in enumerate(EXPERIMENTS, 1):
    print(f"{i}. {exp['name']}: {exp['env']}")

## Run All Experiments

In [None]:
import subprocess
import time
import json
from datetime import datetime

results = []

print(f"Starting {len(EXPERIMENTS)} experiments at {datetime.now()}")
print("=" * 80)

for i, exp in enumerate(EXPERIMENTS, 1):
    print(f"\n[{i}/{len(EXPERIMENTS)}] {exp['name']}")
    print(f"Environment: {exp['env']}")
    print(f"Timesteps: {exp['timesteps']:,}")
    print("-" * 80)
    
    start_time = time.time()
    
    cmd = [
        'python', 'train.py',
        '--env', exp['env'],
        '--timesteps', str(exp['timesteps']),
        '--lr', str(exp['learning_rate']),
        '--gamma', str(exp['gamma']),
        '--batch-size', str(exp['batch_size']),
        '--eps-start', str(exp['exploration_initial_eps']),
        '--eps-end', str(exp['exploration_final_eps']),
        '--exp-fraction', str(exp['exploration_fraction']),
        '--experiment', exp['name']
    ]
    
    try:
        subprocess.run(cmd, check=True)
        elapsed_time = time.time() - start_time
        
        result = {
            'experiment': exp['name'],
            'env': exp['env'],
            'status': 'completed',
            'timesteps': exp['timesteps'],
            'duration_seconds': elapsed_time,
            'duration_minutes': f"{elapsed_time/60:.1f}",
            'hyperparameters': {
                'learning_rate': exp['learning_rate'],
                'gamma': exp['gamma'],
                'batch_size': exp['batch_size'],
                'exploration_fraction': exp['exploration_fraction']
            },
            'description': exp['description']
        }
        results.append(result)
        
        print(f"\nCompleted in {elapsed_time/60:.1f} minutes")
        
    except subprocess.CalledProcessError as e:
        print(f"\nError: {e}")
        results.append({
            'experiment': exp['name'],
            'status': 'failed',
            'error': str(e)
        })
    
    print("=" * 80)

print(f"\nAll experiments completed at {datetime.now()}")

with open('experiment_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print("Results saved to experiment_results.json")

## View Results Summary

In [None]:
import json

with open('experiment_results.json', 'r') as f:
    results = json.load(f)

print("EXPERIMENT RESULTS SUMMARY")
print("=" * 100)

completed = [r for r in results if r['status'] == 'completed']
failed = [r for r in results if r['status'] == 'failed']

print(f"\nCompleted: {len(completed)}/{len(results)}")
if failed:
    print(f"Failed: {len(failed)}")

print("\nDetailed Results:")
print("-" * 100)

for i, result in enumerate(results, 1):
    print(f"\n{i}. {result['experiment']}")
    print(f"   Environment: {result['env']}")
    print(f"   Status: {result['status']}")
    if result['status'] == 'completed':
        print(f"   Duration: {result['duration_minutes']} minutes")
        print(f"   Timesteps: {result['timesteps']:,}")
        print(f"   Description: {result['description']}")

print("\n" + "=" * 100)

## Download Results

In [None]:
import shutil
import os

print("Creating ZIP archives...")

if os.path.exists('models'):
    shutil.make_archive('prince_models', 'zip', 'models')
    print("Created: prince_models.zip")

if os.path.exists('logs'):
    shutil.make_archive('prince_logs', 'zip', 'logs')
    print("Created: prince_logs.zip")

print("\nFiles ready for download:")
print("- experiment_results.json")
print("- prince_models.zip")
print("- prince_logs.zip")
print("\nUse the file browser on the left to download.")