# Note

This data was collected and saved as a `zip` file independently.
Thus, the code here looks somewhat different, and may be harder
to use directly. We recommend referring to the gym notebooks.

# Dataloading

In [3]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from qscaled.preprocessing import (
    bootstrap_crossings, 
    FullGroupedLoaderUnlabeled
)
from qscaled.bootstrap_envsteps_to_thresh import (
    grid_best_uncertainty_lr,
    grid_best_uncertainty_bs,
    get_bootstrap_optimal,
    compute_bootstrap_averages,
)
from qscaled.linear_fit import linear_fit_separate, linear_fit_shared
from qscaled.save_params import tabulate_proposed_params, tabulate_baseline_params

from utils.create_zip import save_and_load
from utils.configs import Config

np.random.seed(42)

In [None]:
wandb_collect = False
wandb_collector = None  # Zip data was collected separately.
name = 'dmc_sweep'

# Maximum possible returns (estimated with infinite data and compute) on each
# environment. These are mostly eyeballed such that runs reach 80% (hence
# the 1.25 multiplier) but not 90%. There is some variation for different environments.

config = Config(
    name=name,
    max_returns={},  # No need to normalize returns, DMC is already 0-1000.
    return_key=None,  # Not present in zip data.
    utds_to_predict=[0.25, 0.5, 1, 2, 4, 8, 16],
    wandb_collector=wandb_collector,
    zip_load_cls=FullGroupedLoaderUnlabeled,
    env_step_freq=1000,
    env_step_start=5000
)

**Note:** Due to bootstrapping, outputs are randomized. We have set a random seed,
so if you restart the kernel and run all cells, your final `csv` output should
be deterministic.

If you used the code above with `save_loop`, use `UTDGroupedLoader` below.
(`FullGroupedLoaderUnlabeled` is used below for DMC data, which was collected independently.)

In [6]:
grid_search_df = save_and_load(config)
grid_search_df = bootstrap_crossings(
    grid_search_df, 
    config.thresholds, 
    bootstrap_cache_file=f'../../cache/bootstrap_results/{name}.pkl'
)

Average standard deviation across all conditions: 9387.54


# Bootstrapping and Fitting

In [7]:
best_lr = grid_best_uncertainty_lr(grid_search_df)
best_bs = grid_best_uncertainty_bs(grid_search_df)
best_lr_bs = best_lr.groupby(['env_name', 'utd']).apply(get_bootstrap_optimal, include_groups=False).reset_index()
best_lr_bs = compute_bootstrap_averages(best_lr, best_bs, best_lr_bs)

# Empirically, we find that using a shared slope does better.
(
    proposed_lr_values_separate,
    proposed_bs_values_separate,
    lr_slopes_separate,
    lr_intercepts_separate,
    bs_slopes_separate,
    bs_intercepts_separate
) = linear_fit_separate(
    config.utds_to_predict, grid_search_df, best_lr_bs, save_path=None, plot=False
)

(
    proposed_lr_values_shared,
    proposed_bs_values_shared,
    lr_shared_slope_shared,
    lr_env_intercepts_shared,
    bs_shared_slope_shared,
    bs_env_intercepts_shared
) = linear_fit_shared(
    config.utds_to_predict, grid_search_df, best_lr_bs, save_path=name, plot=False
)

pd.options.display.float_format = '{:.2e}'.format
proposed_values_df = tabulate_proposed_params(
    config.utds_to_predict, 
    proposed_lr_values_shared, 
    proposed_bs_values_shared, 
    save_path=name
)

baseline_values_df = tabulate_baseline_params(
    config.utds_to_predict, 
    grid_search_df, 
    save_path=name
)

cartpole-swingup: learning rate ~ 0.001048 * UTD^-0.538793
cheetah-run: learning rate ~ 0.000536 * UTD^-0.054655
dog-stand: learning rate ~ 0.000445 * UTD^-0.229081
finger-spin: learning rate ~ 0.000828 * UTD^-0.214160
humanoid-stand: learning rate ~ 0.000603 * UTD^-0.584208
quadruped-walk: learning rate ~ 0.000943 * UTD^-0.342499
walker-walk: learning rate ~ 0.000898 * UTD^-0.135157
cartpole-swingup: batch size ~ 288.720667 * UTD^0.143446
cheetah-run: batch size ~ 490.006858 * UTD^-0.321182
dog-stand: batch size ~ 173.319018 * UTD^-0.801320
finger-spin: batch size ~ 477.405014 * UTD^-0.200032
humanoid-stand: batch size ~ 477.568622 * UTD^-0.501833
quadruped-walk: batch size ~ 444.118959 * UTD^-0.249561
walker-walk: batch size ~ 441.993244 * UTD^-0.904269
cartpole-swingup: lr ~ 0.000784 * UTD^-0.315195
cheetah-run: lr ~ 0.000679 * UTD^-0.315195
dog-stand: lr ~ 0.000524 * UTD^-0.315195
finger-spin: lr ~ 0.000932 * UTD^-0.315195
humanoid-stand: lr ~ 0.000476 * UTD^-0.315195
quadruped-wal

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  utd_data['last_crossing'] = utd_data['crossings'].apply(lambda x: x[-1])


In [8]:
proposed_values_df

Unnamed: 0,Environment,UTD,Learning Rate,Learning Rate x√2,Learning Rate x√0.5,Batch Size,Batch Size x√2,Batch Size x√0.5,Batch Size(rounded),Batch Size x√2(rounded),Batch Size x√0.5(rounded)
0,cartpole-swingup,1.00e+00,1.21e-03,1.72e-03,8.58e-04,1006,1422,711,1008,1424,704
1,cartpole-swingup,2.00e+00,9.76e-04,1.38e-03,6.90e-04,723,1022,511,720,1024,512
2,cartpole-swingup,4.00e+00,6.30e-04,8.91e-04,4.46e-04,373,528,264,368,528,256
3,cartpole-swingup,8.00e+00,4.07e-04,5.76e-04,2.88e-04,193,272,136,192,272,128
4,cartpole-swingup,1.00e+00,1.21e-03,1.72e-03,8.58e-04,1006,1422,711,1008,1424,704
...,...,...,...,...,...,...,...,...,...,...,...
163,walker-walk,8.00e+00,5.27e-04,7.45e-04,3.73e-04,110,156,78,112,160,80
164,walker-walk,1.00e+00,1.57e-03,2.22e-03,1.11e-03,575,813,407,576,816,400
165,walker-walk,2.00e+00,1.26e-03,1.79e-03,8.93e-04,413,584,292,416,576,288
166,walker-walk,4.00e+00,8.16e-04,1.15e-03,5.77e-04,213,302,151,208,304,144


In [9]:
baseline_values_df

Unnamed: 0,Environment,UTD,Learning Rate,Batch Size
0,cartpole-swingup,0.25,0.0006,512
1,cartpole-swingup,0.5,0.0006,512
2,cartpole-swingup,1.0,0.0006,512
3,cartpole-swingup,2.0,0.0006,512
4,cartpole-swingup,4.0,0.0006,512
5,cartpole-swingup,8.0,0.0006,512
6,cartpole-swingup,16.0,0.0006,512
7,cheetah-run,0.25,0.0003,256
8,cheetah-run,0.5,0.0003,256
9,cheetah-run,1.0,0.0003,256
