Our notebooks run on ZIP files, which you can create using this script via Wandb.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
from ast import arg
import matplotlib.pyplot as plt
from tqdm import tqdm
import seaborn as sns
import scipy
from scipy.stats import norm
from concurrent.futures import ProcessPoolExecutor, as_completed
from itertools import product

from collector import RunCollector, CompleteRunCollector
from constants import *
from defaults import *
from run_configs import *

In [None]:
"""
Label your Wandb runs using tags, and include the relevant ones.

This saves an `npy` to the `collector_path` directory. If this file does not
already exist, this fetches runs from Wandb. Otherwise, it just loads from the file.
"""

collector_path = 'sac_utd_bs_lr_250121_complete'
zip_path = 'gym_250121_remake'

sac_utd_bs_lr_collector = CompleteRunCollector.create(
    project='prestonfu/crl',
    load=True,
    max_steps=max_steps,
    tags=[
        'sac_utd_bs_lr_250109', 
        'sac_utd_bs_lr_retry_250111', 
        'sac_utd_bs_lr_retry_250112', 
        'sac_utd_bs_lr_retry_250114', 
        'sac_utd_bs_lr_retry_250115', 
        'sac_utd_bs_lr_retry_250116',  
        'sac_utd_bs_lr_250118', 
        'sac_utd_bs_lr_cheetah_250121'
    ],
    path=f'../output/{collector_path}/data.npy',
    parallel=32
)

collector = sac_utd_bs_lr_collector

In [None]:
"""
Drop runs that do not go for the full number of steps, and extra runs beyond 
desired number of seeds.
"""

num_seeds = 8
collector.remove_short(0.95)
collector.trim(num_seeds)

In [None]:
from functools import reduce
from copy import deepcopy

def get_data(collector: CompleteRunCollector, env_name, varname, utd):
    all_data = collector.filter(env=env_name, utd=utd)
    data_dict = {}
    for key, summaries in all_data.items():
        bs = key[collector.category_index['bs']]
        lr = key[collector.category_index['lr']]
        name = (env_name, utd, bs, lr)
        short_summaries = []
        for i, df in enumerate(summaries):
            short_summaries.append(df[['_step', varname]].rename(columns={varname: f'seed{i}/{varname}'}))
        merged_df = reduce(
            lambda l, r: pd.merge(l, r, on='_step', how='outer'),
            short_summaries
        )
        
        merged_df['rounded_step'] = (merged_df['_step'] // 1000) * 1000  # resolve non-uniform logging
        agg_dict = {
            '_step': 'first', 
            **{col: 'mean' for col in merged_df.columns if col.startswith('seed')}
        }
        result_df = merged_df.groupby('rounded_step').agg(agg_dict).dropna().reset_index(drop=True)
        
        data_dict[name] = result_df.to_numpy()
    
    return data_dict


def save_data(collector, env_name, varname, utd):
    dirname = f'data/{zip_path}/utd_{utd}/{env_name}/{varname.replace("/", ".")}'
    os.makedirs(dirname, exist_ok=True)
    data_dict = get_data(collector, env_name, varname, utd)
    for key, value in data_dict.items():
        env_name, utd, bs, lr = key
        name = f'bs_{bs}_lr_{lr}'
        np.save(f'{dirname}/{name}', value)  

def save_loop(collector):
    for env in collector.get_unique('env'):
        for utd in collector.get_unique('utd', env=env):
            for varname in ['episode/return', 'val/critic_loss', 'training/critic_pnorm_l2']:
                save_data(collector, env, varname, utd)

save_loop(collector)