# Transfer Learning Analysis for Atari Games

This notebook analyzes transfer learning results including:
1. Learning curves for each experiment
2. Transfer benefit analysis (comparing transfer vs from-scratch performance)
3. Visualizations split by algorithm

In [None]:
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from tensorboard.backend.event_processing import event_accumulator
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

## Configuration

Define the results directory and experiment parameters.

In [None]:
# Configuration
RESULTS_DIR = "results"
OUTPUT_DIR = "analysis_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Games and algorithms from your config
GAMES = ["Pong", "Breakout", "SpaceInvaders", "Tennis"]
ALGORITHMS = ["dqn", "ppo", "qrdqn"]

# Metrics to extract from TensorBoard
METRICS = [
    "rollout/ep_rew_mean",  # Episode reward
    "rollout/ep_len_mean",  # Episode length
    "train/loss",            # Training loss
]

## Helper Functions

In [None]:
def load_tensorboard_data(log_dir, metric="rollout/ep_rew_mean"):
    """
    Load data from TensorBoard event files.
    
    Args:
        log_dir: Path to TensorBoard log directory
        metric: Metric name to extract
    
    Returns:
        DataFrame with columns: step, value
    """
    try:
        ea = event_accumulator.EventAccumulator(log_dir)
        ea.Reload()
        
        if metric not in ea.Tags()['scalars']:
            print(f"Warning: Metric '{metric}' not found in {log_dir}")
            return pd.DataFrame(columns=['step', 'value'])
        
        events = ea.Scalars(metric)
        data = pd.DataFrame([
            {'step': e.step, 'value': e.value}
            for e in events
        ])
        return data
    except Exception as e:
        print(f"Error loading {log_dir}: {e}")
        return pd.DataFrame(columns=['step', 'value'])


def parse_experiment_name(exp_name):
    """
    Parse experiment name to extract metadata.
    
    Expected format: {algorithm}_{source}_to_{target}_{timestamp}
    or: {algorithm}_{source}_to_{target}_pretrained_{timestamp}
    
    Returns:
        dict with algorithm, source, target, pretrained, timestamp
    """
    parts = exp_name.split('_')
    
    if len(parts) < 4:
        return None
    
    algorithm = parts[0]
    
    # Find 'to' index
    try:
        to_idx = parts.index('to')
    except ValueError:
        return None
    
    source = parts[to_idx - 1]
    
    # Check if pretrained
    if 'pretrained' in parts:
        pretrained_idx = parts.index('pretrained')
        target = parts[to_idx + 1]
        pretrained = True
        timestamp = '_'.join(parts[pretrained_idx + 1:])
    else:
        target = parts[to_idx + 1]
        pretrained = False
        timestamp = '_'.join(parts[to_idx + 2:])
    
    return {
        'algorithm': algorithm,
        'source': source,
        'target': target,
        'pretrained': pretrained,
        'timestamp': timestamp
    }


def find_experiments(results_dir, pretrained_only=True):
    """
    Find all experiments in the results directory.
    
    Args:
        results_dir: Path to results directory
        pretrained_only: If True, only include pretrained experiments
    
    Returns:
        List of dicts with experiment metadata and paths
        - If multiple experiments exist for the same (algorithm, source, target) 
          combination, only the latest one (by timestamp) is kept
    """
    experiments = []
    
    if not os.path.exists(results_dir):
        print(f"Results directory '{results_dir}' not found!")
        return experiments
    
    for exp_name in os.listdir(results_dir):
        exp_path = os.path.join(results_dir, exp_name)
        
        if not os.path.isdir(exp_path):
            continue
        
        # Skip non-experiment directories
        if exp_name in ['slurm_scripts', 'slurm_scripts_pretrained', 'slurm_logs']:
            continue
        
        metadata = parse_experiment_name(exp_name)
        if metadata is None:
            continue
        
        # Filter for pretrained only if requested
        if pretrained_only and not metadata['pretrained']:
            continue
        
        # Find log directories
        source_logs = os.path.join(exp_path, 'source_logs')
        target_logs = os.path.join(exp_path, 'target_logs')
        
        metadata['name'] = exp_name
        metadata['path'] = exp_path
        metadata['source_logs'] = source_logs if os.path.exists(source_logs) else None
        metadata['target_logs'] = target_logs if os.path.exists(target_logs) else None
        
        experiments.append(metadata)
    
    # Keep only the latest experiment for each (algorithm, source, target) combination
    # Group by (algorithm, source, target)
    grouped = {}
    for exp in experiments:
        key = (exp['algorithm'], exp['source'], exp['target'])
        if key not in grouped:
            grouped[key] = []
        grouped[key].append(exp)
    
    # For each group, keep only the one with the latest timestamp
    latest_experiments = []
    for key, exps in grouped.items():
        # Sort by timestamp (lexicographic sort works for format YYYYMMDD_HHMMSS)
        latest_exp = max(exps, key=lambda e: e['timestamp'])
        latest_experiments.append(latest_exp)
    
    print(f"\nFiltering: pretrained_only={pretrained_only}")
    print(f"Found {len(experiments)} total experiments")
    print(f"Kept {len(latest_experiments)} latest unique experiments")
    
    # Show which experiments were kept/discarded
    if len(experiments) > len(latest_experiments):
        print("\nDuplicate experiments found (keeping only latest):")
        kept_names = set(exp['name'] for exp in latest_experiments)
        for exp in experiments:
            if exp['name'] not in kept_names:
                key = (exp['algorithm'], exp['source'], exp['target'])
                print(f"  Discarded: {exp['name']}")
    
    return latest_experiments

## Load Experiments

In [None]:
# Find all experiments
experiments = find_experiments(RESULTS_DIR)

print(f"Found {len(experiments)} experiments")
print("\nExperiment summary:")
exp_df = pd.DataFrame(experiments)
if len(exp_df) > 0:
    print(exp_df[['algorithm', 'source', 'target', 'pretrained']].to_string())
else:
    print("No experiments found!")

## 1. Learning Curves

Plot learning curves for each experiment, organized by algorithm.

In [None]:
def plot_learning_curves_by_algorithm(experiments, metric="rollout/ep_rew_mean"):
    """
    Plot learning curves grouped by algorithm.
    """
    algorithms = sorted(set(exp['algorithm'] for exp in experiments))
    
    for algo in algorithms:
        algo_exps = [exp for exp in experiments if exp['algorithm'] == algo]
        
        if not algo_exps:
            continue
        
        # Create subplots for source and target
        fig, axes = plt.subplots(1, 2, figsize=(16, 6))
        fig.suptitle(f'{algo.upper()} Learning Curves - {metric}', fontsize=14, fontweight='bold')
        
        # Plot source training
        ax_source = axes[0]
        for exp in algo_exps:
            if exp['source_logs'] is None or not os.path.exists(exp['source_logs']):
                continue
            
            data = load_tensorboard_data(exp['source_logs'], metric)
            if len(data) > 0:
                label = f"{exp['source']}"
                ax_source.plot(data['step'], data['value'], label=label, alpha=0.7)
        
        ax_source.set_xlabel('Timesteps')
        ax_source.set_ylabel('Reward')
        ax_source.set_title('Source Game Training (from scratch)')
        ax_source.legend()
        ax_source.grid(True, alpha=0.3)
        
        # Plot target training
        ax_target = axes[1]
        for exp in algo_exps:
            if exp['target_logs'] is None or not os.path.exists(exp['target_logs']):
                continue
            
            data = load_tensorboard_data(exp['target_logs'], metric)
            if len(data) > 0:
                pretrained_tag = " (pretrained)" if exp['pretrained'] else ""
                label = f"{exp['source']} → {exp['target']}{pretrained_tag}"
                ax_target.plot(data['step'], data['value'], label=label, alpha=0.7)
        
        ax_target.set_xlabel('Timesteps')
        ax_target.set_ylabel('Reward')
        ax_target.set_title('Target Game Training (with transfer)')
        ax_target.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        ax_target.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(os.path.join(OUTPUT_DIR, f'learning_curves_{algo}.png'), dpi=300, bbox_inches='tight')
        plt.show()

# Plot learning curves
plot_learning_curves_by_algorithm(experiments)

## 2. Individual Transfer Comparisons

For each transfer pair, compare source (baseline) vs target (transfer) performance.

In [None]:
def plot_transfer_comparison(experiments, metric="rollout/ep_rew_mean"):
    """
    Plot source vs target learning curves for each transfer experiment.
    """
    # Group by algorithm and transfer pair
    transfer_pairs = {}
    for exp in experiments:
        key = (exp['algorithm'], exp['source'], exp['target'])
        if key not in transfer_pairs:
            transfer_pairs[key] = []
        transfer_pairs[key].append(exp)
    
    for (algo, source, target), exps in sorted(transfer_pairs.items()):
        fig, ax = plt.subplots(1, 1, figsize=(10, 6))
        
        # Plot source baseline (from-scratch performance on target game)
        # This would be an experiment where source == target
        baseline_exp = next(
            (e for e in experiments 
             if e['algorithm'] == algo and e['source'] == target and e['target'] == target),
            None
        )
        
        if baseline_exp and baseline_exp['source_logs']:
            baseline_data = load_tensorboard_data(baseline_exp['source_logs'], metric)
            if len(baseline_data) > 0:
                ax.plot(baseline_data['step'], baseline_data['value'], 
                       label=f'{target} (from scratch)', 
                       linestyle='--', linewidth=2, alpha=0.7)
        
        # Plot transfer learning curves
        for exp in exps:
            if exp['target_logs'] is None or not os.path.exists(exp['target_logs']):
                continue
            
            data = load_tensorboard_data(exp['target_logs'], metric)
            if len(data) > 0:
                pretrained_tag = " (pretrained)" if exp['pretrained'] else ""
                label = f'{source} → {target}{pretrained_tag}'
                ax.plot(data['step'], data['value'], label=label, linewidth=2, alpha=0.8)
        
        ax.set_xlabel('Timesteps', fontsize=12)
        ax.set_ylabel('Episode Reward', fontsize=12)
        ax.set_title(f'{algo.upper()}: {source} → {target} Transfer Learning', 
                    fontsize=14, fontweight='bold')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(os.path.join(OUTPUT_DIR, f'transfer_{algo}_{source}_to_{target}.png'), 
                   dpi=300, bbox_inches='tight')
        plt.show()

# Plot transfer comparisons
plot_transfer_comparison(experiments)

## 3. Transfer Benefit Analysis

Compute transfer benefit as the improvement over from-scratch baseline.

In [None]:
def compute_final_performance(log_dir, metric="rollout/ep_rew_mean", last_n_steps=50000):
    """
    Compute average performance over the last N steps.
    """
    data = load_tensorboard_data(log_dir, metric)
    if len(data) == 0:
        return None
    
    # Get data from last N steps
    max_step = data['step'].max()
    final_data = data[data['step'] >= (max_step - last_n_steps)]
    
    if len(final_data) == 0:
        return data['value'].mean()
    
    return final_data['value'].mean()


def compute_transfer_benefits(experiments):
    """
    Compute transfer benefit for each experiment.
    
    Transfer benefit = (target performance - baseline) / |baseline| * 100
    where baseline is the source game's from-scratch performance.
    """
    results = []
    
    for exp in experiments:
        # Get target performance (with transfer)
        if exp['target_logs'] is None or not os.path.exists(exp['target_logs']):
            continue
        
        target_perf = compute_final_performance(exp['target_logs'])
        if target_perf is None:
            continue
        
        # Get baseline performance (source game from scratch)
        # This is the source_logs of the same experiment
        baseline_perf = None
        if exp['source_logs'] and os.path.exists(exp['source_logs']):
            baseline_perf = compute_final_performance(exp['source_logs'])
        
        if baseline_perf is None or baseline_perf == 0:
            continue
        
        # Compute transfer benefit
        benefit = ((target_perf - baseline_perf) / abs(baseline_perf)) * 100
        
        results.append({
            'algorithm': exp['algorithm'],
            'source': exp['source'],
            'target': exp['target'],
            'pretrained': exp['pretrained'],
            'baseline_performance': baseline_perf,
            'target_performance': target_perf,
            'transfer_benefit_pct': benefit
        })
    
    return pd.DataFrame(results)


# Compute transfer benefits
benefits_df = compute_transfer_benefits(experiments)

if len(benefits_df) > 0:
    print("\nTransfer Benefit Analysis:")
    print("=" * 80)
    print(benefits_df.to_string(index=False))
    
    # Save to CSV
    benefits_df.to_csv(os.path.join(OUTPUT_DIR, 'transfer_benefits.csv'), index=False)
    print(f"\nSaved to {os.path.join(OUTPUT_DIR, 'transfer_benefits.csv')}")
else:
    print("No transfer benefit data available yet.")

## 4. Transfer Benefit Visualization

In [None]:
if len(benefits_df) > 0:
    # Plot transfer benefits by algorithm
    algorithms = benefits_df['algorithm'].unique()
    
    for algo in algorithms:
        algo_df = benefits_df[benefits_df['algorithm'] == algo]
        
        if len(algo_df) == 0:
            continue
        
        # Create transfer pair labels
        algo_df = algo_df.copy()
        algo_df['transfer_pair'] = algo_df['source'] + ' → ' + algo_df['target']
        
        # Sort by benefit
        algo_df = algo_df.sort_values('transfer_benefit_pct')
        
        # Plot
        fig, ax = plt.subplots(figsize=(12, max(6, len(algo_df) * 0.4)))
        
        colors = ['green' if x > 0 else 'red' for x in algo_df['transfer_benefit_pct']]
        bars = ax.barh(algo_df['transfer_pair'], algo_df['transfer_benefit_pct'], color=colors, alpha=0.7)
        
        ax.axvline(x=0, color='black', linestyle='-', linewidth=0.8)
        ax.set_xlabel('Transfer Benefit (%)', fontsize=12)
        ax.set_ylabel('Transfer Pair', fontsize=12)
        ax.set_title(f'{algo.upper()} Transfer Benefit Analysis', fontsize=14, fontweight='bold')
        ax.grid(True, alpha=0.3, axis='x')
        
        # Add value labels
        for i, (idx, row) in enumerate(algo_df.iterrows()):
            value = row['transfer_benefit_pct']
            x_pos = value + (5 if value > 0 else -5)
            ha = 'left' if value > 0 else 'right'
            ax.text(x_pos, i, f'{value:.1f}%', ha=ha, va='center', fontweight='bold')
        
        plt.tight_layout()
        plt.savefig(os.path.join(OUTPUT_DIR, f'transfer_benefit_{algo}.png'), dpi=300, bbox_inches='tight')
        plt.show()
    
    # Summary statistics
    print("\n" + "="*80)
    print("TRANSFER BENEFIT SUMMARY BY ALGORITHM")
    print("="*80)
    summary = benefits_df.groupby('algorithm')['transfer_benefit_pct'].agg(['mean', 'std', 'min', 'max', 'count'])
    summary.columns = ['Mean (%)', 'Std (%)', 'Min (%)', 'Max (%)', 'N']
    print(summary.to_string())
else:
    print("No transfer benefit data to visualize.")

## 5. Transfer Matrix Heatmap

Create a heatmap showing transfer benefits between all game pairs.

In [None]:
if len(benefits_df) > 0:
    algorithms = benefits_df['algorithm'].unique()
    
    for algo in algorithms:
        algo_df = benefits_df[benefits_df['algorithm'] == algo]
        
        if len(algo_df) == 0:
            continue
        
        # Create pivot table for heatmap
        games = sorted(set(algo_df['source'].tolist() + algo_df['target'].tolist()))
        matrix = pd.DataFrame(index=games, columns=games, dtype=float)
        
        for _, row in algo_df.iterrows():
            matrix.loc[row['source'], row['target']] = row['transfer_benefit_pct']
        
        # Plot heatmap
        fig, ax = plt.subplots(figsize=(10, 8))
        
        sns.heatmap(matrix, annot=True, fmt=".1f", cmap="RdYlGn", center=0,
                   cbar_kws={'label': 'Transfer Benefit (%)'}, ax=ax,
                   linewidths=0.5, linecolor='gray')
        
        ax.set_xlabel('Target Game', fontsize=12)
        ax.set_ylabel('Source Game', fontsize=12)
        ax.set_title(f'{algo.upper()} Transfer Learning Matrix\n(Source → Target)', 
                    fontsize=14, fontweight='bold')
        
        plt.tight_layout()
        plt.savefig(os.path.join(OUTPUT_DIR, f'transfer_matrix_{algo}.png'), dpi=300, bbox_inches='tight')
        plt.show()
else:
    print("No data available for transfer matrix.")

## 6. Compare Pretrained vs From-Scratch Source

If you have both pretrained and from-scratch experiments, compare them.

In [None]:
if len(benefits_df) > 0 and 'pretrained' in benefits_df.columns:
    # Check if we have both pretrained and non-pretrained experiments
    has_pretrained = benefits_df['pretrained'].any()
    has_scratch = (~benefits_df['pretrained']).any()
    
    if has_pretrained and has_scratch:
        fig, ax = plt.subplots(figsize=(12, 6))
        
        for algo in benefits_df['algorithm'].unique():
            algo_df = benefits_df[benefits_df['algorithm'] == algo]
            
            pretrained_mean = algo_df[algo_df['pretrained']]['transfer_benefit_pct'].mean()
            scratch_mean = algo_df[~algo_df['pretrained']]['transfer_benefit_pct'].mean()
            
            x = np.arange(2)
            width = 0.25
            offset = list(benefits_df['algorithm'].unique()).index(algo) * width
            
            ax.bar(x + offset, [scratch_mean, pretrained_mean], width, 
                  label=algo.upper(), alpha=0.8)
        
        ax.set_xlabel('Source Model Type', fontsize=12)
        ax.set_ylabel('Average Transfer Benefit (%)', fontsize=12)
        ax.set_title('Pretrained vs From-Scratch Source Models', fontsize=14, fontweight='bold')
        ax.set_xticks([0.25, 1.25])
        ax.set_xticklabels(['From Scratch', 'Pretrained (Zoo)'])
        ax.legend()
        ax.grid(True, alpha=0.3, axis='y')
        ax.axhline(y=0, color='black', linestyle='-', linewidth=0.8)
        
        plt.tight_layout()
        plt.savefig(os.path.join(OUTPUT_DIR, 'pretrained_vs_scratch.png'), dpi=300, bbox_inches='tight')
        plt.show()
    else:
        print("Need both pretrained and from-scratch experiments for comparison.")
else:
    print("No pretrained comparison data available.")

## Summary

All plots have been saved to the `analysis_plots/` directory.

In [None]:
print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)
print(f"\nTotal experiments analyzed: {len(experiments)}")
print(f"Algorithms: {', '.join(sorted(set(exp['algorithm'] for exp in experiments)))}")
print(f"\nPlots saved to: {OUTPUT_DIR}/")
print("\nGenerated files:")
for f in sorted(os.listdir(OUTPUT_DIR)):
    print(f"  - {f}")