In [1]:
import pandas as pd
from pandas.api.types import is_numeric_dtype

In [2]:
# first round
first_round_lr_file = "../../ho_results/rdc_ho_learningrate_20250730-095559_final.pickle"
first_round_exploration_file = "../../ho_results/rdc_ho_exploration_20250730-095800_final.pickle"
first_round_architecture_file = "../../ho_results/rdc_ho_architecture_20250730-095643_final.pickle"
first_round_replay_buffer_file = "../../ho_results/rdc_ho_replaybuffer_20250730-095727_final.pickle"

# second round
second_round_lr_file = "../../ho_results/rdc_ho_learningrate_20250726-094043_final.pickle"
second_round_exploration_file = "../../ho_results/rdc_ho_exploration_20250726-094246_final.pickle"
second_round_architecture_file = "../../ho_results/rdc_ho_architecture_20250726-094115_final.pickle"
second_round_replay_buffer_file = "../../ho_results/rdc_ho_replaybuffer_20250726-094148_final.pickle"

In [3]:
first_round_lr = pd.read_pickle(first_round_lr_file)
first_round_architecture = pd.read_pickle(first_round_architecture_file)
first_round_exploration = pd.read_pickle(first_round_exploration_file)
first_round_replay_buffer = pd.read_pickle(first_round_replay_buffer_file)

In [4]:
second_round_lr = pd.read_pickle(second_round_lr_file)
second_round_architecture = pd.read_pickle(second_round_architecture_file)
second_round_exploration = pd.read_pickle(second_round_exploration_file)
second_round_replay_buffer = pd.read_pickle(second_round_replay_buffer_file)

In [5]:
first_round_lr.head()

Unnamed: 0,number,params_rdc_gamma,params_rdc_lr,params_rdc_lr_gamma,params_rdc_lr_step_size,value,datetime_start,duration
0,0,0.9,1e-06,0.87,3250,77.661561,2025-07-30 09:55:59.345913,0 days 00:37:35.949596
1,1,0.9,5e-06,0.83,2750,78.267392,2025-07-30 09:55:59.347575,0 days 00:35:42.244834
2,2,0.99,1e-05,0.61,3250,78.03107,2025-07-30 09:55:59.351069,0 days 00:39:25.995437
3,3,0.93,1e-06,0.84,2750,78.060979,2025-07-30 09:55:59.357206,0 days 00:38:20.837748
4,4,0.92,6.1e-05,0.93,3000,77.498272,2025-07-30 09:55:59.359219,0 days 00:38:50.410561


In [6]:
def extract_new_bounds(df: pd.DataFrame, top_n=5, slack=0.1):
    df = df.sort_values("value", ascending=False)

    topN = df.head(top_n)

    new_bounds = {}
    params = df.columns
    params = params.drop(["number", "value", "datetime_start", "duration"])
    for param in params:
        print(f"Top N values: {topN[param].value_counts()}")
        if not is_numeric_dtype(topN[param]):
            print(f"Skipping parameter '{param}' as it is of type string.")
            print(f"Top N values: {topN[param].value_counts()}")
            continue
        mn = float(topN[param].min())
        mx = float(topN[param].max())
        span = mx - mn

        new_bounds[param] = (
            max(0.0, mn - slack*span),
            mx + slack*span
        )

    return new_bounds

In [7]:
second_round_lr_bounds = extract_new_bounds(
    first_round_lr,
    top_n=10,
    slack=0.2
)
second_round_lr_bounds

Top N values: params_rdc_gamma
0.97    4
0.96    2
0.95    2
0.93    1
0.98    1
Name: count, dtype: int64
Top N values: params_rdc_lr
0.000082    1
0.000050    1
0.000063    1
0.000092    1
0.000073    1
0.000014    1
0.000097    1
0.000054    1
0.000031    1
0.000007    1
Name: count, dtype: int64
Top N values: params_rdc_lr_gamma
0.72    2
0.76    1
0.88    1
0.89    1
0.54    1
0.71    1
0.95    1
0.66    1
0.74    1
Name: count, dtype: int64
Top N values: params_rdc_lr_step_size
3250    4
4750    2
1500    1
2250    1
3000    1
3750    1
Name: count, dtype: int64


{'params_rdc_gamma': (0.92, 0.99),
 'params_rdc_lr': (0.0, 0.00011511938752805936),
 'params_rdc_lr_gamma': (0.4580000000000001, 1.032),
 'params_rdc_lr_step_size': (850.0, 5400.0)}

In [8]:
second_round_architecture_bounds = extract_new_bounds(
    first_round_architecture,
    top_n=10,
    slack=0.2
)
second_round_architecture_bounds

Top N values: params_rdc_batch_size
512    5
256    4
64     1
Name: count, dtype: int64
Top N values: params_rdc_hidden_dim
512    6
256    4
Name: count, dtype: int64


{'params_rdc_batch_size': (0.0, 601.6),
 'params_rdc_hidden_dim': (204.8, 563.2)}

In [9]:
second_round_exploration_bounds = extract_new_bounds(
    first_round_exploration,
    top_n=10,
    slack=0.2
)
second_round_exploration_bounds

Top N values: params_rdc_epsilon_decay
0.971479    1
0.969283    1
0.977040    1
0.984120    1
0.969024    1
0.972279    1
0.978975    1
0.985371    1
0.982829    1
0.961795    1
Name: count, dtype: int64
Top N values: params_rdc_epsilon_end
0.09    2
0.10    2
0.11    2
0.02    1
0.07    1
0.08    1
0.12    1
Name: count, dtype: int64


{'params_rdc_epsilon_decay': (0.9570798768748315, 0.9900856769516452),
 'params_rdc_epsilon_end': (0.0, 0.13999999999999999)}

In [10]:
second_round_replay_buffer_bounds = extract_new_bounds(
    first_round_replay_buffer,
    top_n=10,
    slack=0.2
)
second_round_replay_buffer_bounds

Top N values: params_rdc_replay_buffer_alpha
0.8    2
0.0    2
0.1    2
0.3    1
0.9    1
0.5    1
1.0    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_frames
28041    1
32139    1
25939    1
34055    1
4472     1
32497    1
4486     1
48687    1
4424     1
25005    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_start
0.3    6
0.2    3
0.5    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_capacity
12714    1
39289    1
10346    1
18125    1
27001    1
8914     1
73450    1
48358    1
78261    1
10709    1
Name: count, dtype: int64
Top N values: params_rdc_tau
0.000289    1
0.004680    1
0.005444    1
0.006049    1
0.051832    1
0.000485    1
0.000487    1
0.000353    1
0.000928    1
0.012255    1
Name: count, dtype: int64


{'params_rdc_replay_buffer_alpha': (0.0, 1.2),
 'params_rdc_replay_buffer_beta_frames': (0.0, 57539.6),
 'params_rdc_replay_buffer_beta_start': (0.14, 0.56),
 'params_rdc_replay_buffer_capacity': (0.0, 92130.4),
 'params_rdc_tau': (0.0, 0.0621405999803569)}

In [11]:
final_round_lr_bounds = extract_new_bounds(
    second_round_lr,
    top_n=5,
    slack=0.1
)
final_round_lr_bounds

Top N values: params_rdc_gamma
0.958    2
0.997    1
0.994    1
0.941    1
Name: count, dtype: int64
Top N values: params_rdc_lr
0.000002    1
0.000001    1
0.000002    1
0.000002    1
0.000002    1
Name: count, dtype: int64
Top N values: params_rdc_lr_gamma
0.95    4
0.96    1
Name: count, dtype: int64
Top N values: params_rdc_lr_step_size
4500    3
4750    1
5000    1
Name: count, dtype: int64


{'params_rdc_gamma': (0.9354, 1.0026),
 'params_rdc_lr': (1.318734973143514e-06, 1.7899887728187195e-06),
 'params_rdc_lr_gamma': (0.949, 0.961),
 'params_rdc_lr_step_size': (4450.0, 5050.0)}

In [12]:
final_round_architecture_bounds = extract_new_bounds(
    second_round_architecture,
    top_n=5,
    slack=0.1
)
final_round_architecture_bounds

Top N values: params_rdc_batch_size
128    5
Name: count, dtype: int64
Top N values: params_rdc_hidden_dim
128    4
256    1
Name: count, dtype: int64


{'params_rdc_batch_size': (128.0, 128.0),
 'params_rdc_hidden_dim': (115.2, 268.8)}

In [13]:
final_round_exploration_bounds = extract_new_bounds(
    second_round_exploration,
    top_n=5,
    slack=0.1
)
final_round_exploration_bounds

Top N values: params_rdc_epsilon_decay
0.964167    1
0.977051    1
0.978554    1
0.961609    1
0.973855    1
Name: count, dtype: int64
Top N values: params_rdc_epsilon_end
0.041    1
0.068    1
0.141    1
0.065    1
0.045    1
Name: count, dtype: int64


{'params_rdc_epsilon_decay': (0.9599144971089095, 0.9802487514942165),
 'params_rdc_epsilon_end': (0.031, 0.15100000000000002)}

In [14]:
final_round_replay_buffer_bounds = extract_new_bounds(
    second_round_replay_buffer,
    top_n=5,
    slack=0.1
)
final_round_replay_buffer_bounds

Top N values: params_rdc_replay_buffer_alpha
0.2    2
0.5    2
0.0    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_frames
3346    1
2075    1
1466    1
1372    1
1175    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_start
0.28    2
0.47    1
0.43    1
0.44    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_capacity
10703    1
5969     1
11084    1
10173    1
6054     1
Name: count, dtype: int64
Top N values: params_rdc_tau
0.002019    1
0.022924    1
0.056362    1
0.028296    1
0.040059    1
Name: count, dtype: int64


{'params_rdc_replay_buffer_alpha': (0.0, 0.55),
 'params_rdc_replay_buffer_beta_frames': (957.9, 3563.1),
 'params_rdc_replay_buffer_beta_start': (0.261, 0.489),
 'params_rdc_replay_buffer_capacity': (5457.5, 11595.5),
 'params_rdc_tau': (0.0, 0.061795906643134405)}