# Note

This data was collected and saved as a `zip` file independently.
Thus, the code here looks somewhat different, and may be harder
to use directly. We recommend referring to the gym notebooks.

# Dataloading

In [7]:
%load_ext autoreload
%autoreload 2

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from qscaled.preprocessing import bootstrap_crossings, FullGroupedLoaderUnlabeled
from qscaled.bootstrap_envsteps_to_thresh import (
    grid_best_uncertainty_lr,
    grid_best_uncertainty_bs,
    get_bootstrap_optimal,
    compute_bootstrap_averages,
    linear_fit_shared,
    tabulate_proposed_params,
    tabulate_baseline_params
)
from utils.create_zip import save_and_load
from utils.configs import Config

np.random.seed(42)

In [2]:
wandb_collect = False
wandb_collector = None  # Zip data was collected separately.
name = 'dmc_sweep'

# Maximum possible returns (estimated with infinite data and compute) on each
# environment. These are mostly eyeballed such that runs reach 80% (hence
# the 1.25 multiplier) but not 90%. There is some variation for different environments.

config = Config(
    name=name,
    max_returns={},  # No need to normalize returns, DMC is already 0-1000.
    return_key=None,  # Not present in zip data.
    utds_to_predict=[0.25, 0.5, 1, 2, 4, 8, 16],
    wandb_collector=wandb_collector,
    zip_load_cls=FullGroupedLoaderUnlabeled,
    env_step_freq=1000,
    env_step_start=5000
)

In [3]:
grid_search_df, (envs, utds, batch_sizes, learning_rates) = save_and_load(config)

grid_search_df = bootstrap_crossings(
    grid_search_df, 
    config.thresholds, 
    bootstrap_cache_file=f'../../cache/bootstrap_results/{name}.pkl'
)

Average standard deviation across all conditions: 9428.81


# Bootstrapping

In [4]:
best_lr = grid_best_uncertainty_lr(grid_search_df)
best_bs = grid_best_uncertainty_bs(grid_search_df)
best_lr_bs = best_lr.groupby(['env_name', 'utd']).apply(get_bootstrap_optimal, include_groups=False).reset_index()
best_lr_bs = compute_bootstrap_averages(best_lr, best_bs, best_lr_bs)

(
    proposed_lr_values,
    proposed_bs_values,
    lr_shared_slope,
    lr_env_intercepts,
    bs_shared_slope,
    bs_env_intercepts,
) = linear_fit_shared(config.utds_to_predict, grid_search_df, best_lr_bs, envs, name, plot=False)

pd.options.display.float_format = '{:.2e}'.format
proposed_values_df = tabulate_proposed_params(envs, config.utds_to_predict, proposed_lr_values, proposed_bs_values, name)
baseline_values_df = tabulate_baseline_params(grid_search_df, utds, config.utds_to_predict, len(envs), name)

cartpole-swingup: lr ~ 0.000514 * UTD^0.054981
cheetah-run: lr ~ 0.000474 * UTD^0.054981
dog-stand: lr ~ 0.000538 * UTD^0.054981
finger-spin: lr ~ 0.000533 * UTD^0.054981
humanoid-stand: lr ~ 0.000575 * UTD^0.054981
quadruped-walk: lr ~ 0.000533 * UTD^0.054981
walker-walk: lr ~ 0.000551 * UTD^0.054981
cartpole-swingup: batch size ~ 246.843352 * UTD^-0.000581
cheetah-run: batch size ~ 200.467029 * UTD^-0.000581
dog-stand: batch size ~ 112.182332 * UTD^-0.000581
finger-spin: batch size ~ 226.862506 * UTD^-0.000581
humanoid-stand: batch size ~ 183.556686 * UTD^-0.000581
quadruped-walk: batch size ~ 220.970519 * UTD^-0.000581
walker-walk: batch size ~ 199.395989 * UTD^-0.000581
Baseline based on UTD 2.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  utd_data['last_crossing'] = utd_data['crossings'].apply(lambda x: x[-1])


In [5]:
proposed_values_df

Unnamed: 0,Environment,UTD,Learning Rate,Learning Rate x√2,Learning Rate x√0.5,Batch Size,Batch Size x√2,Batch Size x√0.5,Batch Size(rounded),Batch Size x√2(rounded),Batch Size x√0.5(rounded)
0,cartpole-swingup,0.25,0.000476,0.000674,0.000337,247,349,175,240,352,176
1,cartpole-swingup,0.5,0.000495,0.000701,0.00035,247,349,175,240,352,176
2,cartpole-swingup,1.0,0.000515,0.000729,0.000364,247,349,175,240,352,176
3,cartpole-swingup,2.0,0.000535,0.000756,0.000378,247,349,174,240,352,176
4,cartpole-swingup,4.0,0.000556,0.000786,0.000393,247,349,174,240,352,176
5,cartpole-swingup,8.0,0.000577,0.000816,0.000408,247,349,174,240,352,176
6,cartpole-swingup,16.0,0.000599,0.000847,0.000423,246,349,174,240,352,176
7,cheetah-run,0.25,0.000439,0.000621,0.000311,201,284,142,208,288,144
8,cheetah-run,0.5,0.000457,0.000646,0.000323,201,284,142,208,288,144
9,cheetah-run,1.0,0.000475,0.000672,0.000336,200,283,142,192,288,144


In [6]:
baseline_values_df

Unnamed: 0,Environment,UTD,Learning Rate,Batch Size
0,cartpole-swingup,0.25,0.0006,512
1,cartpole-swingup,0.5,0.0006,512
2,cartpole-swingup,1.0,0.0006,512
3,cartpole-swingup,2.0,0.0006,512
4,cartpole-swingup,4.0,0.0006,512
5,cartpole-swingup,8.0,0.0006,512
6,cartpole-swingup,16.0,0.0006,512
7,cheetah-run,0.25,0.0003,256
8,cheetah-run,0.5,0.0003,256
9,cheetah-run,1.0,0.0003,256
