In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pandas as pd 
import json
import os
import glob

In [None]:
hyper_path = 'DIR_TO_HYPERPARAMETERS'

In [None]:

# Group runs by configuration (ignoring the 'seed' value).
grouped_runs = {}
csv_files = glob.glob(os.path.join(hyper_path, '*', 'episodic_rewards.csv'))
# Iterate over each csv file, extract its folder name and its args.txt.
episodic_rewards = []
smallest_eps_len = 1e9 
for csv_file in csv_files:
    folder_path = os.path.dirname(csv_file)
    folder_name = os.path.basename(folder_path)
    args_file = os.path.join(folder_path, "args.txt")
    
    if(smallest_eps_len > len(pd.read_csv(csv_file)['episode'])):
        smallest_eps_len = len(pd.read_csv(csv_file)['episode'])
        episodic_rewards = pd.read_csv(csv_file)['episode']

    # Read the JSON config from args.txt.
    with open(args_file, "r") as f:
        config = json.load(f)
    
    # Remove the seed attribute, since it's the only differing part.
    config.pop("seed", None)
    
    # Create a key from the remaining configuration; sorting ensures consistent keys.
    config_key = json.dumps(config, sort_keys=True)
    
    # Group folder names by the configuration key.
    grouped_runs.setdefault(config_key, []).append(folder_name)

In [None]:
plt.figure(figsize=(12, 8))
for csv_file in csv_files:
    df_tmp = pd.read_csv(csv_file)
    folder_name = os.path.basename(os.path.dirname(csv_file))
    plt.plot(df_tmp['episode'], df_tmp['reward'], label=folder_name)

plt.xlabel("Episode")
plt.ylabel("Reward")
plt.title("Reward per Episode for All Runs")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [9]:

plt.figure(figsize=(10, 6))
best_avg = -np.inf
best_config_key = None
best_folder = None

for config_key, folders in grouped_runs.items():
    group_rewards = []

    for folder in folders:
        filepath = os.path.join(hyper_path, folder, 'episodic_rewards.csv')
        df = pd.read_csv(filepath)
        rewards = df['reward'].values[:smallest_eps_len]
        group_rewards.append(rewards)

    group_rewards = np.array(group_rewards)
    avg_rewards = group_rewards.mean(axis=0)
    avg_last100 = avg_rewards[-100:].mean()

    label = config_key[:30] + '...' if len(config_key) > 30 else config_key
    plt.plot(episodic_rewards, avg_rewards, label=label)

    if avg_last100 > best_avg:
        best_avg = avg_last100
        best_config_key = config_key
        best_folder = folders[0]  # choose the first folder in this group

print("Best group config (highest average reward over the last 100 episodes):")
print(best_config_key)
args_path = os.path.join(hyper_path, best_folder, 'args.txt')
with open(args_path, 'r') as f:
    print(f.read())

plt.xlabel("Episode")
plt.ylabel("Average Episodic Reward")
plt.title("Average Episodic Reward per Episode for each grouped_run")
# plt.legend()
plt.ylim(0, 700)
plt.show()

# Plot the best run in a separate figure
# Retrieve all folders for the best configuration (across seeds 0, 1, 2)
best_group_folders = grouped_runs[best_config_key]
all_run_rewards = []

for folder in best_group_folders:
    filepath = os.path.join(hyper_path, folder, 'episodic_rewards.csv')
    df_tmp = pd.read_csv(filepath)
    rewards = df_tmp['reward'].values[:smallest_eps_len]
    all_run_rewards.append(rewards)

all_run_rewards = np.array(all_run_rewards)
avg_best_run_rewards = all_run_rewards.mean(axis=0)

plt.figure(figsize=(10, 6))
plt.plot(episodic_rewards, avg_best_run_rewards, color='red', linewidth=2,
         label='Best Run (Avg over 3 seeds)')
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.title("Reward per Episode for Best Run (Averaged over 3 seeds)")
plt.legend()
plt.ylim(0, 700)
plt.show()