# ViZDoom Deep RL Ablation Study

**Master's RL Course Final Project**

## Experiment Plan (12-Hour Budget)

| Priority | Phase | Experiment | Scenarios | Seeds | Est. Time | Status |
|----------|-------|-----------|-----------|-------|-----------|--------|
| - | 1 | DQN vs Deep SARSA | All 3 | 3 | ~8h | **DONE** |
| 1 | 2 | N-step (n=3) | All 3 | 2 | ~4h | Pending |
| 2 | 3a | PER | Basic + TakeCover | 2 | ~3h | Pending |
| 3 | 3b | DDQN | Basic + TakeCover | 2 | ~2.5h | Pending |
| 4 | 3c | Dueling+DDQN | Basic | 2 | ~1.5h | Pending |

---
## 1. Environment Setup

In [None]:
# ONE-CLICK SETUP
import sys, os

IN_COLAB = 'google.colab' in sys.modules
print(f"Running on Colab: {IN_COLAB}")

if IN_COLAB:
    import shutil
    if not shutil.which('xvfb-run'):
        print("[1/5] Installing system dependencies...")
        os.system('apt-get update -qq && apt-get install -qq -y libboost-all-dev libsdl2-dev libopenal-dev xvfb python3-opengl')
    else:
        print("[1/5] System dependencies already installed")

    try:
        import vizdoom
        print("[2/5] Python packages already installed")
    except ImportError:
        print("[2/5] Installing Python packages...")
        os.system('pip install -q vizdoom==1.2.4 gymnasium==1.2.3 torch torchvision wandb hydra-core omegaconf matplotlib opencv-python numpy')

    os.system('Xvfb :1 -screen 0 1024x768x24 &')
    os.environ['DISPLAY'] = ':1'
    print("[3/5] Virtual display configured")

    from google.colab import drive
    if not os.path.exists('/content/drive/MyDrive'):
        drive.mount('/content/drive')
    print("[4/5] Google Drive mounted")

    DRIVE_OUTPUT = '/content/drive/MyDrive/vizdoom-ablation-results'
    REPO_PATH = '/content/vizdoom-ablation'
    os.makedirs(DRIVE_OUTPUT, exist_ok=True)

    if os.path.exists(REPO_PATH):
        os.system(f'cd {REPO_PATH} && git pull')
    else:
        os.system(f'git clone https://github.com/lynxrafu/visdoom-ablation.git {REPO_PATH}')
    print("[5/5] Repository ready")

    os.chdir(REPO_PATH)
    if os.path.exists('results') and not os.path.islink('results'):
        os.system('rm -rf results')
    if not os.path.exists('results'):
        os.symlink(DRIVE_OUTPUT, 'results')

    from pathlib import Path
    runs = list(Path(DRIVE_OUTPUT).glob('**/metadata.json'))
    print(f"\nReady! Existing runs: {len(runs)}")
else:
    os.makedirs('results', exist_ok=True)
    print("Local mode - results saved to: results/")

In [None]:
# Verify imports and WandB login
import sys
sys.path.insert(0, '.')

import torch, gymnasium, vizdoom, wandb
print(f"PyTorch: {torch.__version__}, CUDA: {torch.cuda.is_available()}")

try:
    import vizdoom.gymnasium_wrapper
except ImportError:
    from vizdoom import gymnasium_wrapper

wandb.login()
print("All imports successful!")

---
## 2. Configuration

In [None]:
# CONFIGURATION - EDIT HERE ONLY!
EPISODES = 2000
SEEDS = [1, 2]

SCENARIO_BASIC = 'VizdoomBasic-v0'
SCENARIO_TAKECOVER = 'VizdoomTakeCover-v0'
SCENARIO_DEATHMATCH = 'VizdoomDeathmatch-v0'

N_STEP_VALUE = 3
PER_ALPHA = 0.6
PER_BETA_START = 0.4

print(f"Episodes: {EPISODES}, Seeds: {SEEDS}, N-step: {N_STEP_VALUE}")

---
## 3. Experiment Status

In [None]:
# Check experiment status
import json
from pathlib import Path
from collections import defaultdict

def print_status():
    results_path = Path('results')
    if not results_path.exists():
        print("No results yet")
        return
    
    experiments = defaultdict(list)
    for meta_file in results_path.glob('**/metadata.json'):
        try:
            with open(meta_file) as f:
                meta = json.load(f)
            agent = meta.get('agent_type', 'unknown')
            n_step = meta.get('n_step', 1)
            per = meta.get('buffer_prioritized', False)
            scenario = meta.get('scenario_short', 'unknown')
            seed = meta.get('seed', 0)
            done = (meta_file.parent / 'summary.json').exists()
            
            if agent == 'dqn' and n_step == 1 and not per:
                cat = 'Phase1_DQN'
            elif agent == 'deep_sarsa':
                cat = 'Phase1_DeepSARSA'
            elif n_step > 1:
                cat = f'Phase2_Nstep{n_step}'
            elif per:
                cat = 'Phase3a_PER'
            elif agent == 'ddqn':
                cat = 'Phase3b_DDQN'
            elif 'dueling' in agent:
                cat = 'Phase3c_Dueling'
            else:
                cat = f'Other_{agent}'
            
            experiments[cat].append((scenario, seed, 'DONE' if done else 'RUNNING'))
        except: pass
    
    print("=" * 60)
    for cat in ['Phase1_DQN', 'Phase1_DeepSARSA', 'Phase2_Nstep3', 'Phase3a_PER', 'Phase3b_DDQN', 'Phase3c_Dueling']:
        runs = experiments.get(cat, [])
        done = sum(1 for r in runs if r[2] == 'DONE')
        print(f"{cat:25} | {done}/{len(runs)} done" if runs else f"{cat:25} | NOT STARTED")
    print("=" * 60)

print_status()

---
## 4. Phase 2: N-Step (n=3)
Tests TD/MC spectrum - higher n reduces bootstrapping bias

In [None]:
# PHASE 2: N-STEP EXPERIMENTS (~4h)
for seed in SEEDS:
    print(f"\n>>> N-step: Basic, seed={seed}")
    !python experiments/train.py agent.type=dqn agent.n_step={N_STEP_VALUE} env.scenario={SCENARIO_BASIC} training.num_episodes={EPISODES} seed={seed}

for seed in SEEDS:
    print(f"\n>>> N-step: TakeCover, seed={seed}")
    !python experiments/train.py agent.type=dqn agent.n_step={N_STEP_VALUE} env.scenario={SCENARIO_TAKECOVER} training.num_episodes={EPISODES} seed={seed}

print(f"\n>>> N-step: Deathmatch, seed=1")
!python experiments/train.py agent.type=dqn agent.n_step={N_STEP_VALUE} env.scenario={SCENARIO_DEATHMATCH} training.num_episodes={EPISODES} seed=1

print("\nPHASE 2 COMPLETE!")

---
## 5. Phase 3a: PER
Prioritized Experience Replay - samples high TD-error transitions more

In [None]:
# PHASE 3a: PER EXPERIMENTS (~3h)
for seed in SEEDS:
    print(f"\n>>> PER: Basic, seed={seed}")
    !python experiments/train.py agent.type=dqn buffer.prioritized=true buffer.per_alpha={PER_ALPHA} buffer.per_beta_start={PER_BETA_START} env.scenario={SCENARIO_BASIC} training.num_episodes={EPISODES} seed={seed}

for seed in SEEDS:
    print(f"\n>>> PER: TakeCover, seed={seed}")
    !python experiments/train.py agent.type=dqn buffer.prioritized=true buffer.per_alpha={PER_ALPHA} buffer.per_beta_start={PER_BETA_START} env.scenario={SCENARIO_TAKECOVER} training.num_episodes={EPISODES} seed={seed}

print("\nPHASE 3a PER COMPLETE!")

---
## 6. Phase 3b: DDQN
Double DQN - decouples action selection from evaluation

In [None]:
# PHASE 3b: DDQN EXPERIMENTS (~2.5h)
for seed in SEEDS:
    print(f"\n>>> DDQN: Basic, seed={seed}")
    !python experiments/train.py agent.type=ddqn env.scenario={SCENARIO_BASIC} training.num_episodes={EPISODES} seed={seed}

for seed in SEEDS:
    print(f"\n>>> DDQN: TakeCover, seed={seed}")
    !python experiments/train.py agent.type=ddqn env.scenario={SCENARIO_TAKECOVER} training.num_episodes={EPISODES} seed={seed}

print("\nPHASE 3b DDQN COMPLETE!")

---
## 7. Phase 3c: Dueling+DDQN
Separates V(s) from A(s,a) - better for states where actions don't matter

In [None]:
# PHASE 3c: DUELING+DDQN EXPERIMENTS (~1.5h)
for seed in SEEDS:
    print(f"\n>>> Dueling+DDQN: Basic, seed={seed}")
    !python experiments/train.py agent.type=dueling_ddqn env.scenario={SCENARIO_BASIC} training.num_episodes={EPISODES} seed={seed}

print("\nPHASE 3c DUELING COMPLETE!")
print("\nALL EXPERIMENTS COMPLETE! Run Section 8 to analyze.")

---
## 8. Results Analysis

In [None]:
# ANALYZE RESULTS
from src.utils.analysis import ResultsAnalyzer

analyzer = ResultsAnalyzer("results/")
num_loaded = analyzer.load_all()
print(f"Loaded {num_loaded} experiment results")

if num_loaded > 0:
    summary_df = analyzer.summary()
    display(summary_df)
    
    analyzer.generate_report(output_dir="results/report", include_plots=True)
    print("\nReport generated in results/report/")