In [1]:
import pandas as pd
from pandas.api.types import is_numeric_dtype

In [2]:
# first round
first_round_lr_file = "../../ho_results/rdc_ho_learningrate_20250724-121736_final.pickle"
first_round_exploration_file = "../../ho_results/rdc_ho_exploration_20250725-114436_final.pickle"
first_round_architecture_file = "../../ho_results/rdc_ho_architecture_20250725-083924_final.pickle"
first_round_replay_buffer_file = "../../ho_results/rdc_ho_replaybuffer_20250725-114402_final.pickle"

# second round
second_round_lr_file = "../../ho_results/rdc_ho_learningrate_20250726-094043_final.pickle"
second_round_exploration_file = "../../ho_results/rdc_ho_exploration_20250726-094246_final.pickle"
second_round_architecture_file = "../../ho_results/rdc_ho_architecture_20250726-094115_final.pickle"
second_round_replay_buffer_file = "../../ho_results/rdc_ho_replaybuffer_20250726-094148_final.pickle"

In [3]:
first_round_lr = pd.read_pickle(first_round_lr_file)
first_round_architecture = pd.read_pickle(first_round_architecture_file)
first_round_exploration = pd.read_pickle(first_round_exploration_file)
first_round_replay_buffer = pd.read_pickle(first_round_replay_buffer_file)

In [4]:
second_round_lr = pd.read_pickle(second_round_lr_file)
second_round_architecture = pd.read_pickle(second_round_architecture_file)
second_round_exploration = pd.read_pickle(second_round_exploration_file)
second_round_replay_buffer = pd.read_pickle(second_round_replay_buffer_file)

In [5]:
first_round_lr.head()

Unnamed: 0,number,params_rdc_gamma,params_rdc_lr,params_rdc_lr_gamma,params_rdc_lr_step_size,value,datetime_start,duration
0,0,0.915,1e-06,0.86,4250,76.54501,2025-07-24 12:17:36.282400,0 days 01:39:43.952331
1,1,0.942,5.5e-05,0.77,4250,75.131343,2025-07-24 12:17:36.284322,0 days 01:37:15.296595
2,2,0.988,0.00027,0.68,1000,75.842182,2025-07-24 12:17:36.286424,0 days 01:36:14.874939
3,3,0.923,7e-06,0.82,1500,74.082101,2025-07-24 12:17:36.288980,0 days 01:38:32.619206
4,4,0.915,2e-06,0.78,1500,74.742332,2025-07-24 12:17:36.291418,0 days 01:38:32.898711


In [6]:
def extract_new_bounds(df: pd.DataFrame, top_n=5, slack=0.1):
    df = df.sort_values("value", ascending=False)

    topN = df.head(top_n)

    new_bounds = {}
    params = df.columns
    params = params.drop(["number", "value", "datetime_start", "duration"])
    for param in params:
        print(f"Top N values: {topN[param].value_counts()}")
        if not is_numeric_dtype(topN[param]):
            print(f"Skipping parameter '{param}' as it is of type string.")
            print(f"Top N values: {topN[param].value_counts()}")
            continue
        mn = float(topN[param].min())
        mx = float(topN[param].max())
        span = mx - mn

        new_bounds[param] = (
            max(0.0, mn - slack*span),
            mx + slack*span
        )

    return new_bounds

In [7]:
second_round_lr_bounds = extract_new_bounds(
    first_round_lr,
    top_n=10,
    slack=0.2
)
second_round_lr_bounds

Top N values: params_rdc_gamma
0.915    2
0.998    2
0.909    1
0.999    1
0.996    1
0.997    1
0.957    1
0.985    1
Name: count, dtype: int64
Top N values: params_rdc_lr
0.000001    1
0.000009    1
0.000013    1
0.000003    1
0.000006    1
0.000001    1
0.000011    1
0.000007    1
0.000093    1
0.000019    1
Name: count, dtype: int64
Top N values: params_rdc_lr_gamma
0.92    3
0.96    1
0.94    1
0.86    1
0.87    1
0.89    1
0.85    1
0.95    1
Name: count, dtype: int64
Top N values: params_rdc_lr_step_size
4250    4
4500    2
4000    2
3500    1
2750    1
Name: count, dtype: int64


{'params_rdc_gamma': (0.891, 1.017),
 'params_rdc_lr': (0.0, 0.0001110369807297679),
 'params_rdc_lr_gamma': (0.8280000000000001, 0.982),
 'params_rdc_lr_step_size': (2400.0, 4850.0)}

In [8]:
second_round_architecture_bounds = extract_new_bounds(
    first_round_architecture,
    top_n=10,
    slack=0.2
)
second_round_architecture_bounds

Top N values: params_rdc_batch_size
512    6
128    3
64     1
Name: count, dtype: int64
Top N values: params_rdc_hidden_dim
256    7
128    2
512    1
Name: count, dtype: int64


{'params_rdc_batch_size': (0.0, 601.6),
 'params_rdc_hidden_dim': (51.19999999999999, 588.8)}

In [9]:
second_round_exploration_bounds = extract_new_bounds(
    first_round_exploration,
    top_n=10,
    slack=0.2
)
second_round_exploration_bounds

Top N values: params_rdc_epsilon_decay
0.979071    1
0.955694    1
0.955712    1
0.972414    1
0.970773    1
0.990276    1
0.969950    1
0.968358    1
0.961744    1
0.966319    1
Name: count, dtype: int64
Top N values: params_rdc_epsilon_end
0.045    1
0.087    1
0.019    1
0.076    1
0.028    1
0.130    1
0.010    1
0.095    1
0.080    1
0.093    1
Name: count, dtype: int64


{'params_rdc_epsilon_decay': (0.9487780013493988, 0.9971918126605767),
 'params_rdc_epsilon_end': (0.0, 0.154)}

In [10]:
second_round_replay_buffer_bounds = extract_new_bounds(
    first_round_replay_buffer,
    top_n=10,
    slack=0.2
)
second_round_replay_buffer_bounds

Top N values: params_rdc_replay_buffer_alpha
0.1    5
0.4    1
0.0    1
0.5    1
0.2    1
0.7    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_frames
14989    1
17423    1
9115     1
24971    1
40425    1
34969    1
18747    1
16923    1
11770    1
3083     1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_start
0.2    5
0.1    3
0.5    1
0.3    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_capacity
29048    1
22852    1
38170    1
13304    1
13071    1
14035    1
9985     1
34017    1
28731    1
7462     1
Name: count, dtype: int64
Top N values: params_rdc_tau
0.003318    1
0.001606    1
0.000604    1
0.004072    1
0.003021    1
0.002527    1
0.000419    1
0.002123    1
0.000489    1
0.000618    1
Name: count, dtype: int64


{'params_rdc_replay_buffer_alpha': (0.0, 0.8400000000000001),
 'params_rdc_replay_buffer_beta_frames': (0.0, 47893.4),
 'params_rdc_replay_buffer_beta_start': (0.01999999999999999,
  0.5800000000000001),
 'params_rdc_replay_buffer_capacity': (1320.3999999999996, 44311.6),
 'params_rdc_tau': (0.0, 0.004802495068027235)}

In [11]:
final_round_lr_bounds = extract_new_bounds(
    second_round_lr,
    top_n=5,
    slack=0.1
)
final_round_lr_bounds

Top N values: params_rdc_gamma
0.958    2
0.997    1
0.994    1
0.941    1
Name: count, dtype: int64
Top N values: params_rdc_lr
0.000002    1
0.000001    1
0.000002    1
0.000002    1
0.000002    1
Name: count, dtype: int64
Top N values: params_rdc_lr_gamma
0.95    4
0.96    1
Name: count, dtype: int64
Top N values: params_rdc_lr_step_size
4500    3
4750    1
5000    1
Name: count, dtype: int64


{'params_rdc_gamma': (0.9354, 1.0026),
 'params_rdc_lr': (1.318734973143514e-06, 1.7899887728187195e-06),
 'params_rdc_lr_gamma': (0.949, 0.961),
 'params_rdc_lr_step_size': (4450.0, 5050.0)}

In [12]:
final_round_architecture_bounds = extract_new_bounds(
    second_round_architecture,
    top_n=5,
    slack=0.1
)
final_round_architecture_bounds

Top N values: params_rdc_batch_size
128    5
Name: count, dtype: int64
Top N values: params_rdc_hidden_dim
128    4
256    1
Name: count, dtype: int64


{'params_rdc_batch_size': (128.0, 128.0),
 'params_rdc_hidden_dim': (115.2, 268.8)}

In [13]:
final_round_exploration_bounds = extract_new_bounds(
    second_round_exploration,
    top_n=5,
    slack=0.1
)
final_round_exploration_bounds

Top N values: params_rdc_epsilon_decay
0.964167    1
0.977051    1
0.978554    1
0.961609    1
0.973855    1
Name: count, dtype: int64
Top N values: params_rdc_epsilon_end
0.041    1
0.068    1
0.141    1
0.065    1
0.045    1
Name: count, dtype: int64


{'params_rdc_epsilon_decay': (0.9599144971089095, 0.9802487514942165),
 'params_rdc_epsilon_end': (0.031, 0.15100000000000002)}

In [14]:
final_round_replay_buffer_bounds = extract_new_bounds(
    second_round_replay_buffer,
    top_n=5,
    slack=0.1
)
final_round_replay_buffer_bounds

Top N values: params_rdc_replay_buffer_alpha
0.2    2
0.5    2
0.0    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_frames
3346    1
2075    1
1466    1
1372    1
1175    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_start
0.28    2
0.47    1
0.43    1
0.44    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_capacity
10703    1
5969     1
11084    1
10173    1
6054     1
Name: count, dtype: int64
Top N values: params_rdc_tau
0.002019    1
0.022924    1
0.056362    1
0.028296    1
0.040059    1
Name: count, dtype: int64


{'params_rdc_replay_buffer_alpha': (0.0, 0.55),
 'params_rdc_replay_buffer_beta_frames': (957.9, 3563.1),
 'params_rdc_replay_buffer_beta_start': (0.261, 0.489),
 'params_rdc_replay_buffer_capacity': (5457.5, 11595.5),
 'params_rdc_tau': (0.0, 0.061795906643134405)}