# Note

This data was collected and saved as a `zip` file independently.
Thus, the code here looks somewhat different, and may be harder
to use directly. We recommend referring to the gym notebooks.

# Dataloading

In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from qscaled.preprocessing import bootstrap_crossings, FullGroupedLoaderUnlabeled
from qscaled.bootstrap_envsteps_to_thresh import (
    grid_best_uncertainty_lr,
    grid_best_uncertainty_bs,
    get_bootstrap_optimal,
    compute_bootstrap_averages,
    linear_fit_shared,
    tabulate_proposed_params,
    tabulate_baseline_params
)
from utils.create_zip import save_and_load
from utils.configs import Config

np.random.seed(42)

In [10]:
wandb_collect = False
wandb_collector = None  # Zip data was collected separately.
name = 'dmc_sweep'

# Maximum possible returns (estimated with infinite data and compute) on each
# environment. These are mostly eyeballed such that runs reach 80% (hence
# the 1.25 multiplier) but not 90%. There is some variation for different environments.

config = Config(
    name=name,
    max_returns={},  # No need to normalize returns, DMC is already 0-1000.
    return_key=None,  # Not present in zip data.
    utds_to_predict=[0.25, 0.5, 1, 2, 4, 8, 16],
    wandb_collector=wandb_collector,
    zip_load_cls=FullGroupedLoaderUnlabeled,
    env_step_freq=1000,
    env_step_start=5000
)

In [11]:
grid_search_df, (envs, utds, batch_sizes, learning_rates) = save_and_load(config)

grid_search_df = bootstrap_crossings(
    grid_search_df, 
    config.thresholds, 
    bootstrap_cache_file=f'../../cache/bootstrap_results/{name}.pkl'
)

Average standard deviation across all conditions: 9428.81


# Bootstrapping

In [12]:
best_lr = grid_best_uncertainty_lr(grid_search_df)
best_bs = grid_best_uncertainty_bs(grid_search_df)
best_lr_bs = best_lr.groupby(['env_name', 'utd']).apply(get_bootstrap_optimal, include_groups=False).reset_index()
best_lr_bs = compute_bootstrap_averages(best_lr, best_bs, best_lr_bs)

(
    proposed_lr_values,
    proposed_bs_values,
    lr_shared_slope,
    lr_env_intercepts,
    bs_shared_slope,
    bs_env_intercepts,
) = linear_fit_shared(config.utds_to_predict, grid_search_df, best_lr_bs, envs, name, plot=False)

pd.options.display.float_format = '{:.2e}'.format
proposed_values_df = tabulate_proposed_params(envs, config.utds_to_predict, proposed_lr_values, proposed_bs_values, name)
baseline_values_df = tabulate_baseline_params(grid_search_df, utds, config.utds_to_predict, len(envs), name)

cartpole-swingup: lr ~ 0.006269 * UTD^-0.799489
cheetah-run: lr ~ 0.003352 * UTD^-0.799489
dog-stand: lr ~ 0.001086 * UTD^-0.799489
finger-spin: lr ~ 0.007129 * UTD^-0.799489
humanoid-stand: lr ~ 0.002225 * UTD^-0.799489
quadruped-walk: lr ~ 0.007101 * UTD^-0.799489
walker-walk: lr ~ 0.008157 * UTD^-0.799489
cartpole-swingup: batch size ~ 579.893172 * UTD^-0.466023
cheetah-run: batch size ~ 579.133611 * UTD^-0.466023
dog-stand: batch size ~ 124.986402 * UTD^-0.466023
finger-spin: batch size ~ 611.136865 * UTD^-0.466023
humanoid-stand: batch size ~ 430.850662 * UTD^-0.466023
quadruped-walk: batch size ~ 529.683893 * UTD^-0.466023
walker-walk: batch size ~ 317.278598 * UTD^-0.466023
Baseline based on UTD 2.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  utd_data['last_crossing'] = utd_data['crossings'].apply(lambda x: x[-1])


In [13]:
proposed_values_df

Unnamed: 0,Environment,UTD,Learning Rate,Learning Rate x√2,Learning Rate x√0.5,Batch Size,Batch Size x√2,Batch Size x√0.5,Batch Size(rounded),Batch Size x√2(rounded),Batch Size x√0.5(rounded)
0,cartpole-swingup,0.25,0.019,0.0269,0.0134,1106,1565,782,1104,1568,784
1,cartpole-swingup,0.5,0.0107,0.0152,0.00759,793,1122,561,800,1120,560
2,cartpole-swingup,1.0,0.00606,0.00857,0.00429,569,804,402,576,800,400
3,cartpole-swingup,2.0,0.00354,0.00501,0.0025,416,588,294,416,592,288
4,cartpole-swingup,4.0,0.002,0.00283,0.00142,298,421,211,304,416,208
5,cartpole-swingup,8.0,0.00117,0.00165,0.000827,218,308,154,224,304,160
6,cartpole-swingup,16.0,0.000683,0.000966,0.000483,159,225,113,160,224,112
7,cheetah-run,0.25,0.0102,0.0144,0.00718,1105,1563,781,1104,1568,784
8,cheetah-run,0.5,0.00574,0.00811,0.00406,792,1120,560,800,1120,560
9,cheetah-run,1.0,0.00324,0.00458,0.00229,568,803,402,576,800,400


In [14]:
baseline_values_df

Unnamed: 0,Environment,UTD,Learning Rate,Batch Size
0,cartpole-swingup,0.25,0.0006,512
1,cartpole-swingup,0.5,0.0006,512
2,cartpole-swingup,1.0,0.0006,512
3,cartpole-swingup,2.0,0.0006,512
4,cartpole-swingup,4.0,0.0006,512
5,cartpole-swingup,8.0,0.0006,512
6,cartpole-swingup,16.0,0.0006,512
7,cheetah-run,0.25,0.0003,256
8,cheetah-run,0.5,0.0003,256
9,cheetah-run,1.0,0.0003,256
