# Dataloading

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


from qscaled.preprocessing import bootstrap_crossings
from qscaled.bootstrap_envsteps_to_thresh import (
    grid_best_uncertainty_lr,
    grid_best_uncertainty_bs,
    get_bootstrap_optimal,
    compute_bootstrap_averages,
    linear_fit_shared,
    tabulate_proposed_params,
    tabulate_baseline_params
)
from utils.wandb_utils import CRLRunCollector, MyRunCollector
from utils.create_zip import save_and_load
from utils.configs import Config

np.random.seed(42)

To use this code:
1. Label your Wandb runs with tags.
2. Fill in `MyRunCollector` in `utils/wandb_utils.py`.
3. Update the following cell.

The latter two steps take ~5 minutes!

If you set `wandb_collect == True`, your `zip` file will be rebuilt using your
Wandb collector. Otherwise, the `zip` file must be present.

In [3]:
wandb_collect = False
name = 'gym_sweep'

if wandb_collect:
    wandb_collector = CRLRunCollector('prestonfu', 'crl').create(
        load=True,
        tags=['sac_grid_manual_250206'],  # Wandb tags to include
        path=f'../../cache/collector/{name}.npy',
        parallel=True
    )
    # Remove these lines if you'd like
    wandb_collector.remove_short(0.95)
    wandb_collector.trim(num_seeds=8, verbose=True)
else:
    wandb_collector = None

# Maximum possible returns (estimated with infinite data and compute) on each
# environment. These are mostly eyeballed such that runs reach 80% (hence
# the 1.25 multiplier) but not 90%. There is some variation for different environments.

config = Config(
    name=name,
    max_returns={
        "HalfCheetah-v4": 7300 * 1.25,
        "Walker2d-v4": 4000 * 1.25,
        "Ant-v4": 5300 * 1.25,
        "Humanoid-v4": 5200 * 1.25,
    },
    return_key='episode/return',
    utds_to_predict=[0.25, 0.5, 1, 2, 4, 8, 16],
    wandb_collector=wandb_collector,
    logging_freq=1000
)

**Note:** Due to bootstrapping, outputs are randomized. We have set a random seed,
so if you restart the kernel and run all cells, your final `csv` output should
be deterministic.

If you used the code above with `save_loop`, use `UTDGroupedLoader` below.
(`FullGroupedLoaderUnlabeled` is used below for DMC data, which was collected independently.)

In [4]:
grid_search_df, (envs, utds, batch_sizes, learning_rates) = save_and_load(config)

grid_search_df = bootstrap_crossings(
    grid_search_df, 
    config.thresholds, 
    bootstrap_cache_file=f'../../cache/bootstrap_results/{name}.pkl'
)

Average standard deviation across all conditions: 2771.15


In [5]:
best_lr = grid_best_uncertainty_lr(grid_search_df)
best_bs = grid_best_uncertainty_bs(grid_search_df)
best_lr_bs = best_lr.groupby(['env_name', 'utd']).apply(get_bootstrap_optimal, include_groups=False).reset_index()
best_lr_bs = compute_bootstrap_averages(best_lr, best_bs, best_lr_bs)

(
    proposed_lr_values,
    proposed_bs_values,
    lr_shared_slope,
    lr_env_intercepts,
    bs_shared_slope,
    bs_env_intercepts,
) = linear_fit_shared(config.utds_to_predict, grid_search_df, best_lr_bs, envs, name, plot=False)

pd.options.display.float_format = '{:.2e}'.format
proposed_values_df = tabulate_proposed_params(envs, config.utds_to_predict, proposed_lr_values, proposed_bs_values, name)
baseline_values_df = tabulate_baseline_params(grid_search_df, utds, config.utds_to_predict, len(envs), name)

Ant-v4: lr ~ 0.000150 * UTD^-0.284872
HalfCheetah-v4: lr ~ 0.001871 * UTD^-0.284872
Humanoid-v4: lr ~ 0.000191 * UTD^-0.284872
Walker2d-v4: lr ~ 0.000824 * UTD^-0.284872
Ant-v4: batch size ~ 435.654339 * UTD^-0.351999
HalfCheetah-v4: batch size ~ 432.333192 * UTD^-0.351999
Humanoid-v4: batch size ~ 357.492090 * UTD^-0.351999
Walker2d-v4: batch size ~ 339.933091 * UTD^-0.351999
Baseline based on UTD 2.0


  best_bs = group.loc[group['last_crossing'].idxmin(skipna=True), 'batch_size']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  utd_data['last_crossing'] = utd_data['crossings'].apply(lambda x: x[-1])


In [6]:
proposed_values_df

Unnamed: 0,Environment,UTD,Learning Rate,Learning Rate x√2,Learning Rate x√0.5,Batch Size,Batch Size x√2,Batch Size x√0.5,Batch Size(rounded),Batch Size x√2(rounded),Batch Size x√0.5(rounded)
0,Ant-v4,0.25,0.000223,0.000316,0.000158,710,1004,502,704,1008,496
1,Ant-v4,0.5,0.000182,0.000258,0.000129,552,781,390,544,784,384
2,Ant-v4,1.0,0.000149,0.00021,0.000105,429,607,304,432,608,304
3,Ant-v4,2.0,0.000123,0.000173,8.67e-05,339,479,240,336,480,240
4,Ant-v4,4.0,0.0001,0.000142,7.08e-05,264,373,186,256,368,192
5,Ant-v4,8.0,8.27e-05,0.000117,5.84e-05,208,294,147,208,288,144
6,Ant-v4,16.0,6.82e-05,9.65e-05,4.83e-05,164,232,116,160,224,112
7,HalfCheetah-v4,0.25,0.00278,0.00393,0.00196,704,996,498,704,992,496
8,HalfCheetah-v4,0.5,0.00227,0.0032,0.0016,548,775,387,544,768,384
9,HalfCheetah-v4,1.0,0.00185,0.00261,0.00131,426,602,301,432,608,304


In [7]:
baseline_values_df

Unnamed: 0,Environment,UTD,Learning Rate,Batch Size
0,Ant-v4,0.25,0.0001,256
1,Ant-v4,0.5,0.0001,256
2,Ant-v4,1.0,0.0001,256
3,Ant-v4,2.0,0.0001,256
4,Ant-v4,4.0,0.0001,256
5,Ant-v4,8.0,0.0001,256
6,Ant-v4,16.0,0.0001,256
7,HalfCheetah-v4,0.25,0.002,512
8,HalfCheetah-v4,0.5,0.002,512
9,HalfCheetah-v4,1.0,0.002,512
