# ViZDoom Ablation Study - Results Analysis

**IEEE Report Data Generation**

Analyzes all 34 experiment runs and generates tables/plots for the report.

**Sections:**
1. Setup & Load Data
2. Experiment Overview
3. Phase 1: DQN vs Deep SARSA
4. Phase 2: N-Step Ablation
5. Phase 3: Extensions (PER, DDQN, Dueling)
6. Statistical Significance Tests
7. IEEE LaTeX Tables
8. Key Findings Summary
9. Export to Drive

---
## 1. Setup & Load Data

In [None]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# Style
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.dpi'] = 150
plt.rcParams['font.size'] = 10

# Check if on Colab
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    RESULTS_DIR = Path('/content/drive/MyDrive/vizdoom-ablation-results')
else:
    RESULTS_DIR = Path('results')

print(f"Results directory: {RESULTS_DIR}")
print(f"Exists: {RESULTS_DIR.exists()}")

# List date folders
if RESULTS_DIR.exists():
    date_folders = [f.name for f in RESULTS_DIR.iterdir() if f.is_dir() and not f.name.startswith('.')]
    print(f"Date folders: {sorted(date_folders)}")

In [None]:
def load_all_results(results_dir):
    """Load all experiment metadata and summaries."""
    results = []
    
    for date_folder in sorted(results_dir.iterdir()):
        if not date_folder.is_dir() or date_folder.name.startswith('.'):
            continue
        if not date_folder.name.startswith('202'):  # Skip non-date folders
            continue
            
        for run_folder in sorted(date_folder.iterdir()):
            if not run_folder.is_dir():
                continue
                
            meta_file = run_folder / 'metadata.json'
            summary_file = run_folder / 'summary.json'
            csv_file = run_folder / 'training_log.csv'
            
            if not meta_file.exists():
                continue
                
            try:
                with open(meta_file) as f:
                    meta = json.load(f)
                
                # Load summary if exists
                summary = {}
                if summary_file.exists():
                    with open(summary_file) as f:
                        summary = json.load(f)
                
                # Load training CSV if exists
                training_df = None
                if csv_file.exists():
                    training_df = pd.read_csv(csv_file)
                
                # Categorize experiment
                agent = meta.get('agent_type', 'unknown')
                n_step = meta.get('n_step', 1)
                per = meta.get('buffer_prioritized', False)
                
                if agent == 'dqn' and n_step == 1 and not per:
                    phase = 'Phase1'
                    method = 'DQN'
                elif agent == 'deep_sarsa':
                    phase = 'Phase1'
                    method = 'DeepSARSA'
                elif n_step > 1:
                    phase = 'Phase2'
                    method = f'DQN_n{n_step}'
                elif per:
                    phase = 'Phase3a'
                    method = 'DQN_PER'
                elif agent == 'ddqn':
                    phase = 'Phase3b'
                    method = 'DDQN'
                elif 'dueling' in agent:
                    phase = 'Phase3c'
                    method = 'Dueling_DDQN'
                else:
                    phase = 'Other'
                    method = agent
                
                results.append({
                    'phase': phase,
                    'method': method,
                    'scenario': meta.get('scenario_short', meta.get('scenario', 'unknown')),
                    'seed': meta.get('seed', 0),
                    'n_step': n_step,
                    'per': per,
                    'lr': meta.get('learning_rate', 0.0001),
                    'gamma': meta.get('gamma', 0.99),
                    # Correct field names from summary.json
                    'final_reward': summary.get('final_avg_reward_100', None),
                    'best_eval': summary.get('best_eval_reward', None),
                    'total_episodes': summary.get('num_episodes', meta.get('num_episodes', None)),
                    'training_hours': summary.get('total_time_hours', None),
                    'run_dir': str(run_folder),
                    'run_name': meta.get('run_name', run_folder.name),
                    'training_df': training_df,
                    'date': date_folder.name
                })
            except Exception as e:
                print(f"Error loading {run_folder.name}: {e}")
                continue
    
    return results

# Load all results
all_results = load_all_results(RESULTS_DIR)
print(f"\nLoaded {len(all_results)} experiment runs")

# Convert to DataFrame (without training_df column)
df = pd.DataFrame([{k:v for k,v in r.items() if k != 'training_df'} for r in all_results])

if len(df) > 0:
    print(f"\nPhases: {df['phase'].unique()}")
    print(f"Methods: {df['method'].unique()}")
    print(f"Scenarios: {df['scenario'].unique()}")

---
## 2. Experiment Overview

In [None]:
# Full results table
print("=" * 80)
print("ALL EXPERIMENTS")
print("=" * 80)

display_cols = ['phase', 'method', 'scenario', 'seed', 'final_reward', 'best_eval', 'training_hours']
print(df[display_cols].sort_values(['phase', 'method', 'scenario', 'seed']).to_string(index=False))

In [None]:
# Summary statistics
print("\n" + "=" * 80)
print("SUMMARY BY METHOD AND SCENARIO")
print("=" * 80)

summary = df.groupby(['phase', 'method', 'scenario']).agg({
    'seed': 'count',
    'final_reward': ['mean', 'std'],
    'best_eval': ['mean', 'std'],
    'training_hours': 'mean'
}).round(2)

summary.columns = ['n_runs', 'final_mean', 'final_std', 'best_mean', 'best_std', 'hours']
print(summary.to_string())

In [None]:
# Runs per phase
print("\n" + "=" * 80)
print("RUNS PER PHASE")
print("=" * 80)

phase_summary = df.groupby('phase').agg({
    'seed': 'count',
    'training_hours': 'sum'
}).round(2)
phase_summary.columns = ['runs', 'total_hours']

for phase, row in phase_summary.iterrows():
    print(f"{phase:10} | {int(row['runs']):2} runs | {row['total_hours']:.1f} hours")

print(f"{'TOTAL':10} | {len(df):2} runs | {df['training_hours'].sum():.1f} hours")

---
## 3. Phase 1: DQN vs Deep SARSA

In [None]:
# Phase 1 detailed comparison
phase1 = df[df['phase'] == 'Phase1'].copy()

print("=" * 80)
print("PHASE 1: DQN vs Deep SARSA (Off-policy vs On-policy)")
print("=" * 80)

# Pivot table
phase1_pivot = phase1.pivot_table(
    values=['final_reward', 'best_eval'],
    index='scenario',
    columns='method',
    aggfunc=['mean', 'std']
).round(2)

print(phase1_pivot.to_string())

# Winner per scenario
print("\n--- Winner by Scenario ---")
for scenario in ['Basic', 'TakeCover', 'Deathmatch']:
    dqn_data = phase1[(phase1['method'] == 'DQN') & (phase1['scenario'] == scenario)]
    sarsa_data = phase1[(phase1['method'] == 'DeepSARSA') & (phase1['scenario'] == scenario)]
    
    dqn_best = dqn_data['best_eval'].mean() if len(dqn_data) > 0 else float('-inf')
    sarsa_best = sarsa_data['best_eval'].mean() if len(sarsa_data) > 0 else float('-inf')
    
    winner = 'DQN' if dqn_best > sarsa_best else 'DeepSARSA'
    diff = dqn_best - sarsa_best
    print(f"{scenario:12} | DQN: {dqn_best:8.2f} | SARSA: {sarsa_best:8.2f} | Winner: {winner} ({diff:+.2f})")

In [None]:
# Phase 1 Learning Curves
fig, axes = plt.subplots(1, 3, figsize=(14, 4))

scenarios = ['Basic', 'TakeCover', 'Deathmatch']
colors = {'DQN': '#1f77b4', 'DeepSARSA': '#ff7f0e'}

for idx, scenario in enumerate(scenarios):
    ax = axes[idx]
    
    for method in ['DQN', 'DeepSARSA']:
        method_results = [r for r in all_results 
                         if r['phase'] == 'Phase1' 
                         and r['method'] == method 
                         and r['scenario'] == scenario
                         and r['training_df'] is not None]
        
        if not method_results:
            continue
            
        # Collect all rewards
        all_rewards = []
        for r in method_results:
            tdf = r['training_df']
            if 'episode' in tdf.columns and 'reward' in tdf.columns:
                rewards = tdf.set_index('episode')['reward']
                all_rewards.append(rewards)
        
        if all_rewards:
            combined = pd.concat(all_rewards, axis=1)
            mean_reward = combined.mean(axis=1)
            std_reward = combined.std(axis=1)
            
            # Rolling average for smoothing
            window = 50
            mean_smooth = mean_reward.rolling(window, min_periods=1).mean()
            std_smooth = std_reward.rolling(window, min_periods=1).mean()
            
            ax.plot(mean_smooth.index, mean_smooth.values, 
                   label=method, color=colors[method], linewidth=1.5)
            ax.fill_between(mean_smooth.index, 
                           mean_smooth.values - std_smooth.values,
                           mean_smooth.values + std_smooth.values,
                           alpha=0.2, color=colors[method])
    
    ax.set_title(f'{scenario}', fontsize=11, fontweight='bold')
    ax.set_xlabel('Episode')
    ax.set_ylabel('Reward')
    ax.legend(loc='lower right')
    ax.grid(True, alpha=0.3)

plt.suptitle('Phase 1: DQN vs Deep SARSA Learning Curves (3 seeds avg)', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.savefig('phase1_learning_curves.png', dpi=150, bbox_inches='tight')
plt.show()
print("Saved: phase1_learning_curves.png")

---
## 4. Phase 2: N-Step Ablation (n=1 vs n=3)

In [None]:
# Phase 2: Compare DQN n=1 (baseline) vs DQN n=3
print("=" * 80)
print("PHASE 2: N-STEP ABLATION (TD/MC Bridge)")
print("=" * 80)

# Baseline: DQN with n=1 from Phase 1
baseline_n1 = df[(df['method'] == 'DQN') & (df['phase'] == 'Phase1')].copy()
baseline_n1['method'] = 'DQN_n1'

# N-step: DQN with n=3 from Phase 2
nstep_n3 = df[df['phase'] == 'Phase2'].copy()

# Combine
nstep_compare = pd.concat([baseline_n1, nstep_n3])

# Pivot
nstep_pivot = nstep_compare.pivot_table(
    values='best_eval',
    index='scenario',
    columns='method',
    aggfunc=['mean', 'std', 'count']
).round(2)

print(nstep_pivot.to_string())

# Impact analysis
print("\n--- N-step Impact (n=3 vs n=1) ---")
for scenario in ['Basic', 'TakeCover', 'Deathmatch']:
    n1 = nstep_compare[(nstep_compare['method'] == 'DQN_n1') & (nstep_compare['scenario'] == scenario)]['best_eval'].mean()
    n3 = nstep_compare[(nstep_compare['method'] == 'DQN_n3') & (nstep_compare['scenario'] == scenario)]['best_eval'].mean()
    
    if pd.notna(n1) and pd.notna(n3):
        diff = n3 - n1
        pct = (diff / abs(n1)) * 100 if n1 != 0 else 0
        verdict = "BETTER" if diff > 0 else "WORSE"
        print(f"{scenario:12} | n=1: {n1:8.2f} | n=3: {n3:8.2f} | Diff: {diff:+8.2f} ({pct:+6.1f}%) {verdict}")
    else:
        print(f"{scenario:12} | Data missing")

---
## 5. Phase 3: Extensions (PER, DDQN, Dueling)

In [None]:
# Phase 3: All extensions vs baseline
print("=" * 80)
print("PHASE 3: DQN EXTENSIONS")
print("=" * 80)

# Baseline DQN
baseline = df[(df['method'] == 'DQN') & (df['phase'] == 'Phase1')].copy()
baseline['method'] = 'Baseline_DQN'

# All Phase 3
phase3 = df[df['phase'].str.startswith('Phase3')].copy()

# Combine
ext_compare = pd.concat([baseline, phase3])

# Pivot for best_eval
ext_pivot = ext_compare.pivot_table(
    values='best_eval',
    index='scenario',
    columns='method',
    aggfunc='mean'
).round(2)

print("Mean Best Eval by Method:")
print(ext_pivot.to_string())

# Improvement over baseline
print("\n--- Improvement over Baseline DQN ---")
for scenario in ['Basic', 'TakeCover']:
    print(f"\n{scenario}:")
    base_val = ext_pivot.loc[scenario, 'Baseline_DQN'] if scenario in ext_pivot.index else None
    
    if base_val is None:
        continue
        
    for method in ['DQN_PER', 'DDQN', 'Dueling_DDQN']:
        if method in ext_pivot.columns and scenario in ext_pivot.index:
            val = ext_pivot.loc[scenario, method]
            if pd.notna(val):
                diff = val - base_val
                pct = (diff / abs(base_val)) * 100 if base_val != 0 else 0
                print(f"  {method:15} | {val:8.2f} | vs baseline: {diff:+8.2f} ({pct:+6.1f}%)")

In [None]:
# Bar chart: All methods comparison
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

scenarios = ['Basic', 'TakeCover']
method_order = ['Baseline_DQN', 'DQN_n3', 'DQN_PER', 'DDQN', 'Dueling_DDQN']
method_labels = ['DQN\n(baseline)', 'DQN\nn=3', 'DQN\n+PER', 'DDQN', 'Dueling\n+DDQN']
colors = ['#7f7f7f', '#1f77b4', '#2ca02c', '#ff7f0e', '#d62728']

# Combine all data
all_compare = pd.concat([baseline, nstep_n3, phase3])

for idx, scenario in enumerate(scenarios):
    ax = axes[idx]
    
    means = []
    stds = []
    valid_labels = []
    valid_colors = []
    
    for i, method in enumerate(method_order):
        data = all_compare[(all_compare['method'] == method) & (all_compare['scenario'] == scenario)]['best_eval']
        if len(data) > 0:
            means.append(data.mean())
            stds.append(data.std() if len(data) > 1 else 0)
            valid_labels.append(method_labels[i])
            valid_colors.append(colors[i])
    
    if means:
        x = range(len(means))
        bars = ax.bar(x, means, yerr=stds, capsize=4, color=valid_colors, alpha=0.8, edgecolor='black', linewidth=0.5)
        ax.set_xticks(x)
        ax.set_xticklabels(valid_labels, fontsize=9)
        ax.set_ylabel('Best Eval Reward')
        ax.set_title(f'{scenario}', fontsize=11, fontweight='bold')
        ax.grid(True, alpha=0.3, axis='y')
        
        # Add value labels on bars
        for bar, mean in zip(bars, means):
            ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5, 
                   f'{mean:.0f}', ha='center', va='bottom', fontsize=8)

plt.suptitle('All Methods Comparison (Best Eval Reward)', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.savefig('all_methods_comparison.png', dpi=150, bbox_inches='tight')
plt.show()
print("Saved: all_methods_comparison.png")

---
## 5b. Phase 2 & 3 Learning Curves

In [None]:
# Phase 2: N-step Learning Curves
fig, axes = plt.subplots(1, 3, figsize=(14, 4))

scenarios = ['Basic', 'TakeCover', 'Deathmatch']
colors = {'DQN': '#1f77b4', 'DQN_n3': '#ff7f0e'}
labels = {'DQN': 'DQN (n=1)', 'DQN_n3': 'DQN (n=3)'}

for idx, scenario in enumerate(scenarios):
    ax = axes[idx]

    # n=1 from Phase 1
    for method, phase_filter in [('DQN', 'Phase1'), ('DQN_n3', 'Phase2')]:
        method_results = [r for r in all_results
                         if r['phase'] == phase_filter
                         and (r['method'] == method or (method == 'DQN_n3' and r['method'].startswith('DQN_n')))
                         and r['scenario'] == scenario
                         and r['training_df'] is not None]

        if not method_results:
            continue

        all_rewards = []
        for r in method_results:
            tdf = r['training_df']
            if 'episode' in tdf.columns and 'reward' in tdf.columns:
                rewards = tdf.set_index('episode')['reward']
                all_rewards.append(rewards)

        if all_rewards:
            combined = pd.concat(all_rewards, axis=1)
            mean_reward = combined.mean(axis=1)
            std_reward = combined.std(axis=1)

            window = 50
            mean_smooth = mean_reward.rolling(window, min_periods=1).mean()
            std_smooth = std_reward.rolling(window, min_periods=1).mean()

            ax.plot(mean_smooth.index, mean_smooth.values,
                   label=labels[method], color=colors[method], linewidth=1.5)
            ax.fill_between(mean_smooth.index,
                           mean_smooth.values - std_smooth.values,
                           mean_smooth.values + std_smooth.values,
                           alpha=0.2, color=colors[method])

    ax.set_title(f'{scenario}', fontsize=11, fontweight='bold')
    ax.set_xlabel('Episode')
    ax.set_ylabel('Reward')
    ax.legend(loc='lower right')
    ax.grid(True, alpha=0.3)

plt.suptitle('Phase 2: N-Step Learning Curves (n=1 vs n=3)', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.savefig('phase2_learning_curves.png', dpi=150, bbox_inches='tight')
plt.show()
print("Saved: phase2_learning_curves.png")

In [None]:
# Phase 3: Extensions Learning Curves (Basic and TakeCover)
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

scenarios = ['Basic', 'TakeCover']
method_colors = {
    'DQN': '#7f7f7f',
    'DQN_PER': '#2ca02c',
    'DDQN': '#ff7f0e',
    'Dueling_DDQN': '#d62728'
}
method_labels = {
    'DQN': 'DQN (baseline)',
    'DQN_PER': 'DQN + PER',
    'DDQN': 'DDQN',
    'Dueling_DDQN': 'Dueling + DDQN'
}

for idx, scenario in enumerate(scenarios):
    ax = axes[idx]

    for method in ['DQN', 'DQN_PER', 'DDQN', 'Dueling_DDQN']:
        if method == 'DQN':
            method_results = [r for r in all_results
                             if r['phase'] == 'Phase1'
                             and r['method'] == 'DQN'
                             and r['scenario'] == scenario
                             and r['training_df'] is not None]
        else:
            method_results = [r for r in all_results
                             if r['phase'].startswith('Phase3')
                             and r['method'] == method
                             and r['scenario'] == scenario
                             and r['training_df'] is not None]

        if not method_results:
            continue

        all_rewards = []
        for r in method_results:
            tdf = r['training_df']
            if 'episode' in tdf.columns and 'reward' in tdf.columns:
                rewards = tdf.set_index('episode')['reward']
                all_rewards.append(rewards)

        if all_rewards:
            combined = pd.concat(all_rewards, axis=1)
            mean_reward = combined.mean(axis=1)

            window = 50
            mean_smooth = mean_reward.rolling(window, min_periods=1).mean()

            ax.plot(mean_smooth.index, mean_smooth.values,
                   label=method_labels[method], color=method_colors[method], linewidth=1.5)

    ax.set_title(f'{scenario}', fontsize=11, fontweight='bold')
    ax.set_xlabel('Episode')
    ax.set_ylabel('Reward')
    ax.legend(loc='lower right', fontsize=8)
    ax.grid(True, alpha=0.3)

plt.suptitle('Phase 3: DQN Extensions Learning Curves', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.savefig('phase3_learning_curves.png', dpi=150, bbox_inches='tight')
plt.show()
print("Saved: phase3_learning_curves.png")

---
## 5c. Statistical Significance Tests (t-test)

In [None]:
# Statistical Significance Tests (Independent t-tests)
from scipy import stats

print("=" * 80)
print("STATISTICAL SIGNIFICANCE (Independent t-tests)")
print("=" * 80)

def ttest_comparison(group1, group2):
    """Perform t-test and return results."""
    if len(group1) < 2 or len(group2) < 2:
        return None, None, "n/a"
    t_stat, p_value = stats.ttest_ind(group1, group2)
    sig = "***" if p_value < 0.001 else "**" if p_value < 0.01 else "*" if p_value < 0.05 else "ns"
    return t_stat, p_value, sig

# Phase 1: DQN vs Deep SARSA
print("\n--- Phase 1: DQN vs Deep SARSA ---")
print(f"{'Scenario':<12} | {'t-stat':>8} | {'p-value':>8} | {'Sig':>4}")
print("-" * 45)
for scenario in ['Basic', 'TakeCover', 'Deathmatch']:
    dqn = phase1[(phase1['method'] == 'DQN') & (phase1['scenario'] == scenario)]['best_eval'].dropna()
    sarsa = phase1[(phase1['method'] == 'DeepSARSA') & (phase1['scenario'] == scenario)]['best_eval'].dropna()
    
    if len(dqn) > 0 and len(sarsa) > 0:
        t_stat, p_val, sig = ttest_comparison(dqn.values, sarsa.values)
        if t_stat is not None:
            print(f"{scenario:<12} | {t_stat:>+8.3f} | {p_val:>8.4f} | {sig:>4}")
        else:
            print(f"{scenario:<12} | {'n/a':>8} | {'n/a':>8} | {sig:>4}")

# Phase 2: n=1 vs n=3
print("\n--- Phase 2: N-step (n=1 vs n=3) ---")
print(f"{'Scenario':<12} | {'t-stat':>8} | {'p-value':>8} | {'Sig':>4}")
print("-" * 45)
for scenario in ['Basic', 'TakeCover', 'Deathmatch']:
    n1 = nstep_compare[(nstep_compare['method'] == 'DQN_n1') & (nstep_compare['scenario'] == scenario)]['best_eval'].dropna()
    n3 = nstep_compare[(nstep_compare['method'] == 'DQN_n3') & (nstep_compare['scenario'] == scenario)]['best_eval'].dropna()
    
    if len(n1) > 0 and len(n3) > 0:
        t_stat, p_val, sig = ttest_comparison(n1.values, n3.values)
        if t_stat is not None:
            print(f"{scenario:<12} | {t_stat:>+8.3f} | {p_val:>8.4f} | {sig:>4}")
        else:
            print(f"{scenario:<12} | {'n/a':>8} | {'n/a':>8} | {sig:>4}")

# Phase 3: Extensions vs Baseline
print("\n--- Phase 3: Extensions vs Baseline DQN ---")
print(f"{'Scenario':<10} {'Method':<15} | {'t-stat':>8} | {'p-value':>8} | {'Sig':>4}")
print("-" * 60)
for scenario in ['Basic', 'TakeCover']:
    baseline_data = all_compare[(all_compare['method'] == 'Baseline_DQN') & (all_compare['scenario'] == scenario)]['best_eval'].dropna()
    
    for method in ['DQN_PER', 'DDQN', 'Dueling_DDQN']:
        ext_data = all_compare[(all_compare['method'] == method) & (all_compare['scenario'] == scenario)]['best_eval'].dropna()
        
        if len(baseline_data) > 0 and len(ext_data) > 0:
            t_stat, p_val, sig = ttest_comparison(baseline_data.values, ext_data.values)
            if t_stat is not None:
                print(f"{scenario:<10} {method:<15} | {t_stat:>+8.3f} | {p_val:>8.4f} | {sig:>4}")
            else:
                print(f"{scenario:<10} {method:<15} | {'n/a':>8} | {'n/a':>8} | {sig:>4}")

print("\n" + "=" * 60)
print("Significance levels: *** p<0.001, ** p<0.01, * p<0.05, ns=not significant")

---
## 6. IEEE Report Tables (LaTeX)

In [None]:
print("=" * 80)
print("LATEX TABLES FOR IEEE REPORT")
print("=" * 80)

# Table 1: Phase 1
print("\n% ===== TABLE 1: DQN vs Deep SARSA =====")
print(r"\begin{table}[h]")
print(r"\centering")
print(r"\caption{Phase 1: DQN vs Deep SARSA Comparison}")
print(r"\label{tab:phase1}")
print(r"\begin{tabular}{|l|l|c|c|}")
print(r"\hline")
print(r"\textbf{Scenario} & \textbf{Method} & \textbf{Best Eval} & \textbf{Final Reward} \\")
print(r"\hline")

for scenario in ['Basic', 'TakeCover', 'Deathmatch']:
    for method in ['DQN', 'DeepSARSA']:
        data = phase1[(phase1['scenario'] == scenario) & (phase1['method'] == method)]
        if len(data) > 0:
            best_mean = data['best_eval'].mean()
            best_std = data['best_eval'].std()
            final_mean = data['final_reward'].mean() if data['final_reward'].notna().any() else float('nan')
            final_std = data['final_reward'].std() if data['final_reward'].notna().any() else float('nan')
            
            best_str = f"${best_mean:.1f} \\pm {best_std:.1f}$" if pd.notna(best_std) else f"${best_mean:.1f}$"
            final_str = f"${final_mean:.1f} \\pm {final_std:.1f}$" if pd.notna(final_std) else "-"
            
            print(f"{scenario} & {method} & {best_str} & {final_str} \\\\")
    print(r"\hline")

print(r"\end{tabular}")
print(r"\end{table}")

In [None]:
# Table 2: All Extensions
print("\n% ===== TABLE 2: All Extensions Comparison =====")
print(r"\begin{table}[h]")
print(r"\centering")
print(r"\caption{DQN Extensions Comparison (Best Eval Reward)}")
print(r"\label{tab:extensions}")
print(r"\begin{tabular}{|l|c|c|c|}")
print(r"\hline")
print(r"\textbf{Method} & \textbf{Basic} & \textbf{TakeCover} & \textbf{Deathmatch} \\")
print(r"\hline")

all_compare = pd.concat([baseline, nstep_n3, phase3])
method_names = {
    'Baseline_DQN': 'DQN (baseline)',
    'DQN_n3': 'DQN + n-step (n=3)',
    'DQN_PER': 'DQN + PER',
    'DDQN': 'Double DQN',
    'Dueling_DDQN': 'Dueling + DDQN'
}

for method in ['Baseline_DQN', 'DQN_n3', 'DQN_PER', 'DDQN', 'Dueling_DDQN']:
    row = [method_names.get(method, method)]
    for scenario in ['Basic', 'TakeCover', 'Deathmatch']:
        data = all_compare[(all_compare['method'] == method) & (all_compare['scenario'] == scenario)]['best_eval']
        if len(data) > 0:
            mean = data.mean()
            std = data.std() if len(data) > 1 else 0
            row.append(f"${mean:.1f} \\pm {std:.1f}$" if std > 0 else f"${mean:.1f}$")
        else:
            row.append("-")
    print(f"{row[0]} & {row[1]} & {row[2]} & {row[3]} \\\\")

print(r"\hline")
print(r"\end{tabular}")
print(r"\end{table}")

---
## 7. Export Results

In [None]:
# Export to CSV
export_cols = ['phase', 'method', 'scenario', 'seed', 'n_step', 'per', 
               'final_reward', 'best_eval', 'training_hours', 'date', 'run_name']
export_df = df[export_cols].sort_values(['phase', 'method', 'scenario', 'seed'])

export_df.to_csv('all_results.csv', index=False)
print("Exported: all_results.csv")

# Display
print("\n" + "=" * 80)
print("COMPLETE RESULTS TABLE")
print("=" * 80)
display(export_df)

---
## 8. Key Findings Summary

In [ ]:
print("=" * 80)
print("KEY FINDINGS FOR IEEE REPORT")
print("=" * 80)

# Calculate key metrics
all_compare = pd.concat([baseline, nstep_n3, phase3])

print("\n1. PHASE 1: DQN vs Deep SARSA")
print("-" * 40)
for scenario in ['Basic', 'TakeCover', 'Deathmatch']:
    dqn = phase1[(phase1['method'] == 'DQN') & (phase1['scenario'] == scenario)]['best_eval'].mean()
    sarsa = phase1[(phase1['method'] == 'DeepSARSA') & (phase1['scenario'] == scenario)]['best_eval'].mean()
    winner = 'DQN' if dqn > sarsa else 'DeepSARSA'
    print(f"   {scenario}: {winner} wins (DQN={dqn:.1f}, SARSA={sarsa:.1f})")

print("\n2. PHASE 2: N-Step Impact")
print("-" * 40)
for scenario in ['Basic', 'TakeCover', 'Deathmatch']:
    n1 = nstep_compare[(nstep_compare['method'] == 'DQN_n1') & (nstep_compare['scenario'] == scenario)]['best_eval'].mean()
    n3 = nstep_compare[(nstep_compare['method'] == 'DQN_n3') & (nstep_compare['scenario'] == scenario)]['best_eval'].mean()
    if pd.notna(n1) and pd.notna(n3):
        verdict = "IMPROVED" if n3 > n1 else "DEGRADED"
        print(f"   {scenario}: n=3 {verdict} ({n3-n1:+.1f})")

print("\n3. PHASE 3: Best Extension per Scenario")
print("-" * 40)
for scenario in ['Basic', 'TakeCover']:
    scenario_data = all_compare[all_compare['scenario'] == scenario]
    if len(scenario_data) > 0:
        best = scenario_data.groupby('method')['best_eval'].mean().idxmax()
        best_val = scenario_data.groupby('method')['best_eval'].mean().max()
        print(f"   {scenario}: {best} ({best_val:.1f})")

print("\n4. TOTAL TRAINING TIME")
print("-" * 40)
print(f"   {df['training_hours'].sum():.1f} hours across {len(df)} experiments")

---
## 9. Export All Outputs to Drive

Save all plots, tables, and analysis data to Drive for local sync.

In [None]:
# =============================================================================
# Export ALL outputs to Drive for local sync
# =============================================================================
import shutil

# Output directory on Drive
OUTPUT_DIR = RESULTS_DIR / 'analysis_output'
OUTPUT_DIR.mkdir(exist_ok=True)

print("=" * 80)
print("EXPORTING ALL OUTPUTS TO DRIVE")
print("=" * 80)

# 1. Save all plots to Drive
plot_files = [
    'phase1_learning_curves.png',
    'phase2_learning_curves.png', 
    'phase3_learning_curves.png',
    'all_methods_comparison.png'
]

print("\n1. Saving plots...")
for fig_name in plot_files:
    if Path(fig_name).exists():
        shutil.copy(fig_name, OUTPUT_DIR / fig_name)
        print(f"   ✓ {fig_name}")
    else:
        print(f"   ✗ {fig_name} (not found)")

# 2. Save CSV to Drive
print("\n2. Saving CSV...")
export_df.to_csv(OUTPUT_DIR / 'all_results.csv', index=False)
print(f"   ✓ all_results.csv ({len(export_df)} rows)")

# 3. Export complete analysis JSON for report writing
print("\n3. Saving analysis JSON...")

def safe_float(val):
    """Convert to float, handling NaN."""
    if pd.isna(val):
        return None
    return float(val)

analysis_export = {
    'meta': {
        'total_experiments': len(df),
        'total_training_hours': safe_float(df['training_hours'].sum()),
        'date_generated': pd.Timestamp.now().isoformat(),
        'scenarios': list(df['scenario'].unique()),
        'methods': list(df['method'].unique()),
        'phases': list(df['phase'].unique()),
    },
    
    # Phase 1: DQN vs Deep SARSA
    'phase1': {},
    
    # Phase 2: N-step ablation
    'phase2': {},
    
    # Phase 3: Extensions
    'phase3': {},
    
    # Complete results table
    'all_results': []
}

# Phase 1 detailed stats
for scenario in ['Basic', 'TakeCover', 'Deathmatch']:
    analysis_export['phase1'][scenario] = {}
    for method in ['DQN', 'DeepSARSA']:
        data = phase1[(phase1['scenario'] == scenario) & (phase1['method'] == method)]
        if len(data) > 0:
            analysis_export['phase1'][scenario][method] = {
                'best_eval_mean': safe_float(data['best_eval'].mean()),
                'best_eval_std': safe_float(data['best_eval'].std()),
                'final_reward_mean': safe_float(data['final_reward'].mean()),
                'final_reward_std': safe_float(data['final_reward'].std()),
                'training_hours_mean': safe_float(data['training_hours'].mean()),
                'n_seeds': len(data)
            }

# Phase 2 stats
for scenario in ['Basic', 'TakeCover', 'Deathmatch']:
    analysis_export['phase2'][scenario] = {}
    for method_label, method_filter in [('n1', 'DQN_n1'), ('n3', 'DQN_n3')]:
        data = nstep_compare[(nstep_compare['scenario'] == scenario) & (nstep_compare['method'] == method_filter)]
        if len(data) > 0:
            analysis_export['phase2'][scenario][method_label] = {
                'best_eval_mean': safe_float(data['best_eval'].mean()),
                'best_eval_std': safe_float(data['best_eval'].std()),
                'n_seeds': len(data)
            }

# Phase 3 stats
for scenario in ['Basic', 'TakeCover']:
    analysis_export['phase3'][scenario] = {}
    for method in ['Baseline_DQN', 'DQN_PER', 'DDQN', 'Dueling_DDQN']:
        data = all_compare[(all_compare['scenario'] == scenario) & (all_compare['method'] == method)]
        if len(data) > 0:
            analysis_export['phase3'][scenario][method] = {
                'best_eval_mean': safe_float(data['best_eval'].mean()),
                'best_eval_std': safe_float(data['best_eval'].std()),
                'n_seeds': len(data)
            }

# All results as records
for _, row in export_df.iterrows():
    analysis_export['all_results'].append({
        'phase': row['phase'],
        'method': row['method'],
        'scenario': row['scenario'],
        'seed': int(row['seed']) if pd.notna(row['seed']) else None,
        'final_reward': safe_float(row['final_reward']),
        'best_eval': safe_float(row['best_eval']),
        'training_hours': safe_float(row['training_hours'])
    })

# Save JSON
json_path = OUTPUT_DIR / 'analysis_complete.json'
with open(json_path, 'w') as f:
    json.dump(analysis_export, f, indent=2)
print(f"   ✓ analysis_complete.json")

# 4. Summary
print("\n" + "=" * 80)
print("EXPORT COMPLETE")
print("=" * 80)
print(f"\nAll outputs saved to: {OUTPUT_DIR}")
print(f"\nLocal sync path (after Google Drive sync):")
print(f"   G:\\Drive'ım\\vizdoom-ablation-results\\analysis_output\\")
print(f"\nFiles exported:")
print(f"   - 4 PNG plots (learning curves, bar charts)")
print(f"   - all_results.csv ({len(export_df)} experiments)")
print(f"   - analysis_complete.json (full analysis data)")