In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
from itertools import product
from scripts.core.fitting import fit_quadratic, params_to_width
from scripts.core.bootstrapping import (
    predict_inverse_power_product,
    predict_sum_of_powerlaw_shared_exponent,
)

In [3]:
model_sizes = [
    ('h1-crawl-v0', 'humanoid_bench', 128, 178632),
    ('h1-crawl-v0', 'humanoid_bench', 256, 619208),
    ('h1-crawl-v0', 'humanoid_bench', 512, 2286792),
    ('h1-crawl-v0', 'humanoid_bench', 1024, 8767688),
    ('h1-crawl-v0', 'humanoid_bench', 2048, 34312392),
    ('h1-crawl-v0', 'humanoid_bench', 4096, 135733448),
    ('h1-pole-v0', 'humanoid_bench', 256, 619208),
    ('h1-pole-v0', 'humanoid_bench', 512, 2286792),
    ('h1-pole-v0', 'humanoid_bench', 1024, 8767688),
    ('h1-pole-v0', 'humanoid_bench', 2048, 34312392),
    ('h1-pole-v0', 'humanoid_bench', 4096, 135733448),
    ('h1-stand-v0', 'humanoid_bench', 256, 619208),
    ('h1-stand-v0', 'humanoid_bench', 512, 2286792),
    ('h1-stand-v0', 'humanoid_bench', 1024, 8767688),
    ('h1-stand-v0', 'humanoid_bench', 2048, 34312392),
    ('h1-stand-v0', 'humanoid_bench', 4096, 135733448),
    ('humanoid-stand', 'dmc', 128, 183240),
    ('humanoid-stand', 'dmc', 256, 628424),
    ('humanoid-stand', 'dmc', 512, 2305224),
    ('humanoid-stand', 'dmc', 1024, 8804552),
    ('humanoid-stand', 'dmc', 2048, 34386120),
]

# Batch size fit

`inverse_power_product`, i.e. $B^* \sim \dfrac{a_{\text{env}}}{\sigma^{\alpha_{\text{env}}} \left( 1 + (b_{\text{env}} / N)^{c_{\text{env}}} \right) }$

In [4]:
batch_size_params_path = (
    '../saved_fits/utd_critic_params_best_bs_bootstrap_mean_inverse_power_product_250505.npy'
)
params_dict = np.load(batch_size_params_path, allow_pickle=True).item()

pd.DataFrame(
    [
        {
            'env_name': k,
            'a': params[0],
            'b': params[1],
            'c': params[2],
            'alpha': params[3],
            'a_unscaled': params[4],
            'b_unscaled': params[5],
        }
        for k, params in params_dict.items()
    ]
)

Unnamed: 0,env_name,a,b,c,alpha,a_unscaled,b_unscaled
0,h1-crawl-v0,54406.086168,26569530000.0,0.554514,0.479077,130.488638,148738.896038
1,h1-pole-v0,45553.278728,21510640000.0,0.648595,-0.048744,100.888729,9406.47084
2,h1-stand-v0,1658.998085,821760.1,3.10509,0.382536,1.625321,1.327115
3,humanoid-stand,1301.950726,4731815.0,0.384982,0.493316,3.237266,25.823049


In [5]:
batch_size_predictor = lambda df: predict_inverse_power_product(
    df, ['utd', 'critic_params'], params_dict
)


def make_batch_size_fit_df():
    utds = [1, 2, 4, 8, 16]

    inputs = [
        {
            'env_name': env_name,
            'benchmark': benchmark,
            'utd': utd,
            'critic_width': critic_width,
            'critic_params': critic_params,
        }
        for utd in utds
        for env_name, benchmark, critic_width, critic_params in model_sizes
    ]
    batch_size_fit_df = pd.DataFrame(inputs)
    batch_size_fit_df['fitted_batch_size'] = batch_size_predictor(batch_size_fit_df)
    batch_size_fit_df['batch_size_rounded'] = (
        np.round(batch_size_fit_df['fitted_batch_size'] / 16).astype(int) * 16
    )

    interpolate_df = batch_size_fit_df.query('utd <= 8 and critic_width <= 2048').reset_index(
        drop=True
    )
    extrapolate_df = batch_size_fit_df.query('utd > 8 or critic_width > 2048').reset_index(
        drop=True
    )

    interpolate_df.to_csv(
        'proposed_hparams/interpolate_batch_size_inverse_product_250505.csv', index=False
    )
    extrapolate_df.to_csv(
        'proposed_hparams/extrapolate_batch_size_inverse_product_250505.csv', index=False
    )


make_batch_size_fit_df()

In [6]:
def make_interpolate_batch_size_fit_df():
    model_size_df = pd.DataFrame(
        model_sizes, columns=['env_name', 'benchmark', 'critic_width', 'critic_params']
    )
    utds = [3, 6, 12]
    critic_widths = (
        np.round(np.array([128, 256, 512, 1024, 2048]) * np.sqrt(2) / 16).astype(int) * 16
    )
    a, b, c = fit_quadratic(model_size_df['critic_width'], model_size_df['critic_params'])
    critic_params = a * critic_widths**2 + b * critic_widths + c
    env_to_model_size = {
        'h1-crawl-v0': (critic_widths[1:], critic_params[1:]),
        'h1-pole-v0': (critic_widths[1:-1], critic_params[1:-1]),
        'h1-stand-v0': (critic_widths[1:-1], critic_params[1:-1]),
        'humanoid-stand': (critic_widths[:-1], critic_params[:-1]),
    }

    inputs = [
        {
            'env_name': env_name,
            'benchmark': 'humanoid_bench' if env_name.startswith('h1') else 'dmc',
            'utd': utd,
            'critic_width': critic_width,
            'critic_params': critic_params_,
        }
        for utd in utds
        for env_name, (critic_widths, critic_params) in env_to_model_size.items()
        for (critic_width, critic_params_) in zip(critic_widths, critic_params)
    ]
    batch_size_fit_df = pd.DataFrame(inputs)
    batch_size_fit_df['fitted_batch_size'] = batch_size_predictor(batch_size_fit_df)
    batch_size_fit_df['batch_size_rounded'] = (
        np.round(batch_size_fit_df['fitted_batch_size'] / 16).astype(int) * 16
    )

    batch_size_fit_df.to_csv(
        'proposed_hparams/interpolate_batch_size_inverse_product_250510.csv', index=False
    )


make_interpolate_batch_size_fit_df()

In [7]:
def remake_utd3_batch_size_fit_df():
    model_size_df = pd.DataFrame(
        model_sizes, columns=['env_name', 'benchmark', 'critic_width', 'critic_params']
    )
    critic_widths = (
        np.round(np.array([128, 256, 512, 1024, 2048]) * np.sqrt(2) / 16).astype(int) * 16
    )
    a, b, c = fit_quadratic(model_size_df['critic_width'], model_size_df['critic_params'])
    critic_params = a * critic_widths**2 + b * critic_widths + c
    env_to_model_size = {
        'h1-crawl-v0': (critic_widths[1:], critic_params[1:] + [4096]),
        'h1-pole-v0': (critic_widths[1:-1], critic_params[1:-1] + [4096]),
        'h1-stand-v0': (critic_widths[1:-1], critic_params[1:-1] + [4096]),
        'humanoid-stand': (critic_widths[:-1], critic_params[:-1] + [4096]),
    }

    inputs = [
        {
            'env_name': env_name,
            'benchmark': 'humanoid_bench' if env_name.startswith('h1') else 'dmc',
            'utd': utd,
            'critic_width': critic_width,
            'critic_params': critic_params_,
        }
        for utd in [3]
        for env_name, (critic_widths, critic_params) in env_to_model_size.items()
        for (critic_width, critic_params_) in zip(critic_widths, critic_params)
    ]
    batch_size_fit_df = pd.DataFrame(inputs)
    batch_size_fit_df['fitted_batch_size'] = batch_size_predictor(batch_size_fit_df)
    batch_size_fit_df['batch_size_rounded'] = (
        np.round(batch_size_fit_df['fitted_batch_size'] / 16).astype(int) * 16
    )

    batch_size_fit_df.to_csv(
        'proposed_hparams/utd3_batch_size_inverse_product_250513.csv', index=False
    )


remake_utd3_batch_size_fit_df()

# Compute optimal hparams

In [11]:
mine_data_efficiency_params_path = (
    '../saved_fits/utd_critic_params_time_to_threshold_sum_of_powerlaw_shared_exp_250508.npy'
)
mine_data_efficiency_params_dict = np.load(
    mine_data_efficiency_params_path, allow_pickle=True
).item()
mine_data_efficiency_predictor = lambda df: predict_sum_of_powerlaw_shared_exponent(
    df, ['utd', 'critic_params'], mine_data_efficiency_params_dict
)

dmc_data_efficiency_params_path = (
    '../saved_fits/dmc_utd_critic_params_last_crossing_sum_of_powerlaw_shared_exp_250520.npy'
)
dmc_data_efficiency_params_dict = np.load(dmc_data_efficiency_params_path, allow_pickle=True).item()
dmc_data_efficiency_predictor = lambda df: predict_sum_of_powerlaw_shared_exponent(
    df, ['utd', 'critic_params'], dmc_data_efficiency_params_dict
)

dog_humanoid_data_efficiency_params_path = '../saved_fits/dog_humanoid_utd_critic_params_last_crossing_sum_of_powerlaw_shared_exp_250520.npy'
dog_humanoid_data_efficiency_params_dict = np.load(
    dog_humanoid_data_efficiency_params_path, allow_pickle=True
).item()
dog_humanoid_data_efficiency_predictor = lambda df: predict_sum_of_powerlaw_shared_exponent(
    df, ['utd', 'critic_params'], dog_humanoid_data_efficiency_params_dict
)


def make_compute_optimal_hparams():
    compute_budgets_per_env = {
        'h1-stand-v0': [1e16, 2e16, 4e16, 8e16, 16e16],
        'h1-crawl-v0': [1e16, 2e16, 4e16, 8e16, 16e16],
        'h1-pole-v0': [2e16, 4e16, 8e16, 16e16, 32e16],
        'humanoid-stand': [0.25e16, 0.5e16, 1e16, 2e16, 4e16],
        'acrobot-swingup': [1e16, 2e16, 4e16, 8e16, 16e16],
        'cheetah-run': [1e16, 2e16, 4e16, 8e16, 16e16],
        'dog-run': [1e16, 2e16, 4e16, 8e16, 16e16],
        'finger-turn': [1e16, 2e16, 4e16, 8e16, 16e16],
        'fish-swim': [1e16, 2e16, 4e16, 8e16, 16e16],
        'hopper-hop': [0.5e16, 1e16, 2e16, 4e16, 8e16],
        'pendulum-swingup': [0.5e16, 1e16, 2e16, 4e16, 8e16],
        'quadruped-run': [1e16, 2e16, 4e16, 8e16, 16e16],
        'walker-run': [0.25e16, 0.5e16, 1e16, 2e16, 4e16],
        'dog-stand': [1e16, 2e16, 4e16, 8e16, 16e16],
        'dog-trot': [1e16, 2e16, 4e16, 8e16, 16e16],
        'dog-walk': [1e16, 2e16, 4e16, 8e16, 16e16],
        'humanoid-run': [1e16, 2e16, 4e16, 8e16, 16e16],
        'humanoid-walk': [1e16, 2e16, 4e16, 8e16, 16e16],
    }
    model_size_df = pd.DataFrame(
        model_sizes, columns=['env_name', 'benchmark', 'critic_width', 'critic_params']
    )
    mine_fit_df = pd.read_csv(
        'compute_fits/compute_optimal_fits_250508_234641.csv',
    )
    dmc_easy_df = pd.read_csv(
        'compute_fits/dmc_compute_optimal_shared_exp_fits_250520.csv',
    )
    dmc_dog_humanoid_df = pd.read_csv(
        'compute_fits/dog_humanoid_compute_optimal_shared_exp_fits_250520.csv',
    ).query('env_name != "humanoid-stand"')
    combined_df = pd.concat([mine_fit_df, dmc_easy_df, dmc_dog_humanoid_df])
    combined_df['benchmark'] = combined_df['env_name'].apply(
        lambda x: 'humanoid_bench' if x.startswith('h1') else 'dmc'
    )

    slopes, intercepts = {}, {}
    for _, row in combined_df.iterrows():
        slopes[row['env_name']] = row['slope']
        intercepts[row['env_name']] = row['intercept']

    batch_size = 256  # TODO
    utds = np.logspace(np.log10(1e-3), np.log10(1e3), 1000)
    results = []

    for (env, benchmark), _ in combined_df.groupby(['env_name', 'benchmark']):
        slope, intercept = slopes[env], intercepts[env]
        critic_params = np.exp(intercept) ** (-1 / slope) * utds ** (1 / slope)
        if env in mine_fit_df['env_name'].values:
            data_efficiency_predictor = mine_data_efficiency_predictor
        elif env in dmc_easy_df['env_name'].values:
            data_efficiency_predictor = dmc_data_efficiency_predictor
        elif env in dmc_dog_humanoid_df['env_name'].values:
            data_efficiency_predictor = dog_humanoid_data_efficiency_predictor
        else:
            raise ValueError(f'{env} not found in any fit')
        data_efficiency = data_efficiency_predictor(
            pd.DataFrame({'env_name': env, 'utd': utds, 'critic_params': critic_params})
        )
        compute = 10 * batch_size * critic_params * utds * data_efficiency
        for budget in compute_budgets_per_env[env]:
            idx = np.max(np.where(compute <= budget))
            utd, critic_params_ = utds[idx], critic_params[idx]
            results.append(
                {
                    'env_name': env,
                    'benchmark': benchmark,
                    'utd': utd,
                    'critic_params': critic_params_,
                    'compute_budget': budget,
                }
            )

    df = pd.DataFrame(results)

    # Use model size from existing data and only batch size fit for mine
    fake_env_mapping = {
        env: model_size_df.query(f'benchmark == "{benchmark}"')['env_name'].values[0]
        for (env, benchmark), _ in combined_df.groupby(['env_name', 'benchmark'])
    }
    actual_envs = df['env_name'].tolist()
    df['env_name'] = df['env_name'].map(fake_env_mapping)
    df['critic_width'] = params_to_width(model_size_df, df, 'critic_params')
    use_bs_prediction = [env in mine_fit_df['env_name'].values for env in actual_envs]
    df['batch_size'] = np.where(use_bs_prediction, batch_size_predictor(df), 256)
    df['env_name'] = actual_envs

    df['batch_size_rounded'] = np.round(df['batch_size'] / 16).astype(int) * 16
    df['critic_width'] = (df['critic_width'] / 16).astype(int) * 16
    df['utd'] = np.round(df['utd']).astype(int)

    df = df.query('utd >= 1 and utd <= 25')

    df[df['env_name'].isin(mine_fit_df['env_name'].values)].to_csv(
        'proposed_hparams/hb_compute_optimal_250520.csv', index=False
    )
    df[~df['env_name'].isin(mine_fit_df['env_name'].values)].to_csv(
        'proposed_hparams/dmc_compute_optimal_shared_exp_250520.csv', index=False
    )
    return df


for env, group in make_compute_optimal_hparams().groupby('env_name'):
    if len(group) < 5:
        print(env)

finger-turn
quadruped-run
