In [1]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
import glob
from collections import defaultdict
import argparse

SEEDS = [13, 19, 42, 94, 1337]
STEPS = [100000, 200000, 300000, 400000, 500000]
ALGORITHMS = ['Dreamer', 'R2I', 'DRQN']

def extract_metrics(metrics_file, metric_name="episodes/score_mean"):
    """Extract metrics from a JSONL file."""
    steps_to_values = {}
    with metrics_file.open('r') as f:
        for line in f:
            data = json.loads(line)
            if metric_name in data:
                steps_to_values[data["step"]] = data[metric_name]
                
    return steps_to_values

def collect_algorithm_metrics(algorithm_dir, metric_name="episodes/score_mean"):
    """Collect metrics for all seeds of an algorithm."""
    all_seed_metrics = []
    
    # Find all seed directories
    seed_dirs = [algorithm_dir / f"{seed}" for seed in SEEDS]
    
    for seed_dir in seed_dirs:
        metrics_file = seed_dir / "eval" / "metrics.jsonl"
        if not metrics_file.exists():
            print(f"Metrics file {metrics_file} does not exist")
            continue
        steps_to_values = extract_metrics(metrics_file, metric_name)    
        all_seed_metrics.append([steps_to_values[step] for step in STEPS])
    # return array of seeds x steps
    return np.array(all_seed_metrics)

In [2]:
# # Find all algorithm directories
# algo_dirs = [
    
# ]

# all_algorithms_data = {}

# for algo_dir in algo_dirs:
#     algo_name, metrics = collect_algorithm_metrics(algo_dir, args.metric)
#     all_algorithms_data[algo_name] = metrics

# plot_algorithms_comparison(all_algorithms_data, args.metric, args.output)

# print(f"Processed {len(all_algorithms_data)} algorithms:")
# for algo in all_algorithms_data:
#     print(f"  - {algo}")

In [3]:
import pathlib
from rliable import metrics
from rliable import library as rly
from rliable import plot_utils





scores_2_iqm = lambda scores: np.array([metrics.aggregate_iqm(scores[..., env_step_idx])
                               for env_step_idx in range(scores.shape[-1])])


base_dir = pathlib.Path('/work/dlclarge1/ramans-powm/powm/experiments/mordor_hike')

env2algos = {
    'easy': {
        'Dreamer': base_dir / '044_dreamer_easy_tuned',
        'R2I': base_dir / '046_r2i_easy_tuned',
        'DRQN': base_dir / '045_drqn_easy_tuned',
    },
    'medium': {
        'Dreamer': base_dir / '044_dreamer_medium_tuned',
        'R2I': base_dir / '046_r2i_medium_tuned',
        'DRQN': base_dir / '045_drqn_medium_tuned',
    },
    'hard': {
        'Dreamer': base_dir / '044_dreamer_hard_tuned',
        'R2I': base_dir / '046_r2i_hard_tuned',
        'DRQN': base_dir / '045_drqn_hard_tuned',
    },
}

save_path = pathlib.Path('metrics')
save_path.mkdir(parents=True, exist_ok=True)

metric_meta = {
    'score_mean': {
        'ylabel': 'Score (IQM)',
        'file_name': 'iqm_scores',
    },
    'episodic_kldiv': {
        'ylabel': 'KL Divergence',
        'file_name': 'episodic_kldiv',
    },
    'score_episodic_kldiv_corr_pearson': {
        'ylabel': 'Pearson\'s r',
        'file_name': 'pearson_r',
    },
    'score_episodic_kldiv_corr_spearman': {
        'ylabel': "Spearman's r",
        'file_name': 'spearman_r',
    },
}

trajectory_meta = {
    'episodes': 'in_distribution',
    'noisy_episodes': 'noisy_ood',
    'waypoint_episodes': 'waypoint_ood',
}


# for trajectory_type, trajectory_name in trajectory_meta.items():
#     for metric, metric_info in metric_meta.items():
#         for env in ['easy', 'medium', 'hard']:
#             algo_scores = {}
#             for algo, path in env2algos[env].items():
#                 algo_scores[algo] = collect_algorithm_metrics(pathlib.Path(path),f'{trajectory_type}/{metric}')
#             iqm_scores, iqm_cis = rly.get_interval_estimates(
#             algo_scores, scores_2_iqm, reps=50000)
#             fig, ax = plt.subplots(figsize=(7, 5))
#             plot_utils.plot_sample_efficiency_curve(
#                 [f"{step / 1000:.0f}k" for step in STEPS], iqm_scores, iqm_cis, algorithms=ALGORITHMS,
#                 ax=ax,
#                 xlabel=r'Steps',
#                 ylabel=metric_info['ylabel'], legend=True)
#             fig.savefig(save_path / f'{env}_{metric_info["file_name"]}_{trajectory_name}.pdf', dpi=300, bbox_inches='tight')
for trajectory_type, trajectory_name in trajectory_meta.items():
    for metric, metric_info in metric_meta.items():
        # Create one figure with 3 subplots for easy, medium, hard
        fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharey=True)
        
        # Collect data for all environments first
        all_env_data = {}
        for i, env in enumerate(['easy', 'medium', 'hard']):
            algo_scores = {}
            for algo, path in env2algos[env].items():
                algo_scores[algo] = collect_algorithm_metrics(pathlib.Path(path),f'{trajectory_type}/{metric}')
            
            iqm_scores, iqm_cis = rly.get_interval_estimates(
                algo_scores, scores_2_iqm, reps=50000)
            
            all_env_data[env] = {
                'iqm_scores': iqm_scores,
                'iqm_cis': iqm_cis
            }
            
            # Plot on the respective subplot
            plot_utils.plot_sample_efficiency_curve(
                [f"{step / 1000:.0f}k" for step in STEPS], 
                iqm_scores, iqm_cis, 
                algorithms=ALGORITHMS,
                ax=axes[i],
                xlabel=r'Steps',
                ylabel=metric_info['ylabel'] if i == 0 else '',  # Only show y-label on first subplot
                legend=False)  # No legend for individual subplots
            
            # Set title for each subplot
            axes[i].set_title(f'{env.capitalize()}')
        
        # Add a single legend for the entire figure
        handles, labels = axes[0].get_legend_handles_labels()
        fig.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.5, -0.12), ncol=len(ALGORITHMS))
        
        # Adjust layout and save
        plt.tight_layout()
        fig.subplots_adjust(bottom=0.2)  # Make room for the legend
        fig.savefig(save_path / f'{metric_info["file_name"]}_{trajectory_name}.pdf', dpi=300, bbox_inches='tight')
        plt.close(fig)

Metrics file /work/dlclarge1/ramans-powm/powm/experiments/mordor_hike/046_r2i_medium_tuned/42/eval/metrics.jsonl does not exist
Metrics file /work/dlclarge1/ramans-powm/powm/experiments/mordor_hike/046_r2i_medium_tuned/94/eval/metrics.jsonl does not exist
Metrics file /work/dlclarge1/ramans-powm/powm/experiments/mordor_hike/046_r2i_medium_tuned/42/eval/metrics.jsonl does not exist
Metrics file /work/dlclarge1/ramans-powm/powm/experiments/mordor_hike/046_r2i_medium_tuned/94/eval/metrics.jsonl does not exist
Metrics file /work/dlclarge1/ramans-powm/powm/experiments/mordor_hike/046_r2i_medium_tuned/42/eval/metrics.jsonl does not exist
Metrics file /work/dlclarge1/ramans-powm/powm/experiments/mordor_hike/046_r2i_medium_tuned/94/eval/metrics.jsonl does not exist
Metrics file /work/dlclarge1/ramans-powm/powm/experiments/mordor_hike/046_r2i_medium_tuned/42/eval/metrics.jsonl does not exist
Metrics file /work/dlclarge1/ramans-powm/powm/experiments/mordor_hike/046_r2i_medium_tuned/94/eval/metri