In [1]:
import pandas as pd
from pandas.api.types import is_numeric_dtype

In [2]:
# first round
first_round_lr_file = "../../ho_results/rdc_ho_learningrate_20250704-083610_final.pickle"
first_round_exploration_file = "../../ho_results/rdc_ho_exploration_20250707-074432_final.pickle"
first_round_architecture_file = "../../ho_results/rdc_ho_architecture_20250706-120351_final.pickle"
first_round_replay_buffer_file = "../../ho_results/rdc_ho_replaybuffer_20250705-153041_final.pickle"
first_round_reward_weights_file = "../../ho_results/rdc_ho_rewardweights_20250707-202017_final.pickle"

# second round
second_round_lr_file = "../../ho_results/rdc_ho_learningrate_20250708-081844_final.pickle"
second_round_exploration_file = "../../ho_results/rdc_ho_exploration_20250709-204731_final.pickle"
second_round_architecture_file = "../../ho_results/rdc_ho_architecture_20250709-071406_final.pickle"
second_round_replay_buffer_file = "../../ho_results/rdc_ho_replaybuffer_20250708-200649_final.pickle"
second_round_reward_weights_file = "../../ho_results/rdc_ho_rewardweights_20250710-112452_final.pickle"

In [3]:
first_round_lr = pd.read_pickle(first_round_lr_file)
first_round_architecture = pd.read_pickle(first_round_architecture_file)
first_round_exploration = pd.read_pickle(first_round_exploration_file)
first_round_replay_buffer = pd.read_pickle(first_round_replay_buffer_file)
first_round_reward_weights = pd.read_pickle(first_round_reward_weights_file)

In [4]:
second_round_lr = pd.read_pickle(second_round_lr_file)
second_round_architecture = pd.read_pickle(second_round_architecture_file)
second_round_exploration = pd.read_pickle(second_round_exploration_file)
second_round_replay_buffer = pd.read_pickle(second_round_replay_buffer_file)
second_round_reward_weights = pd.read_pickle(second_round_reward_weights_file)

In [5]:
first_round_lr.head()

Unnamed: 0,number,params_rdc_gamma,params_rdc_lr,params_rdc_lr_gamma,params_rdc_lr_step_size,value,datetime_start,duration
0,0,0.98,0.000126,0.93,2500,94.871165,2025-07-04 08:36:11.063451,0 days 01:11:35.545510
1,1,0.95,2e-06,0.98,1500,97.260013,2025-07-04 08:36:11.065447,0 days 01:04:38.181229
2,2,0.98,1.6e-05,0.93,500,91.576332,2025-07-04 08:36:11.068118,0 days 01:06:47.324733
3,3,0.9,1.7e-05,0.98,500,95.528275,2025-07-04 08:36:11.070628,0 days 01:07:41.901750
4,4,0.91,0.000219,0.96,500,90.889619,2025-07-04 08:36:11.073899,0 days 01:09:34.605880


In [6]:
def extract_new_bounds(df: pd.DataFrame, top_n=5, slack=0.1):
    df = df.sort_values("value", ascending=False)

    topN = df.head(top_n)

    new_bounds = {}
    params = df.columns
    params = params.drop(["number", "value", "datetime_start", "duration"])
    for param in params:
        print(f"Top N values: {topN[param].value_counts()}")
        if not is_numeric_dtype(topN[param]):
            print(f"Skipping parameter '{param}' as it is of type string.")
            print(f"Top N values: {topN[param].value_counts()}")
            continue
        mn = float(topN[param].min())
        mx = float(topN[param].max())
        span = mx - mn

        new_bounds[param] = (
            max(0.0, mn - slack*span),
            mx + slack*span
        )

    return new_bounds

In [7]:
second_round_lr_bounds = extract_new_bounds(
    first_round_lr,
    top_n=10,
    slack=0.2
)
second_round_lr_bounds

Top N values: params_rdc_gamma
0.96    3
0.99    3
0.98    2
0.97    1
0.94    1
Name: count, dtype: int64
Top N values: params_rdc_lr
0.000002    1
0.000002    1
0.000010    1
0.000005    1
0.000010    1
0.000011    1
0.000009    1
0.000006    1
0.000048    1
0.000007    1
Name: count, dtype: int64
Top N values: params_rdc_lr_gamma
0.97    3
0.94    2
0.96    2
0.92    2
0.93    1
Name: count, dtype: int64
Top N values: params_rdc_lr_step_size
3000    5
2500    4
2000    1
Name: count, dtype: int64


{'params_rdc_gamma': (0.93, 1.0),
 'params_rdc_lr': (0.0, 5.731865945184508e-05),
 'params_rdc_lr_gamma': (0.91, 0.98),
 'params_rdc_lr_step_size': (1800.0, 3200.0)}

In [8]:
second_round_architecture_bounds = extract_new_bounds(
    first_round_architecture,
    top_n=10,
    slack=0.2
)
second_round_architecture_bounds

Top N values: params_rdc_batch_size
256    6
128    2
64     1
512    1
Name: count, dtype: int64
Top N values: params_rdc_hidden_dim
256    6
512    2
128    1
64     1
Name: count, dtype: int64


{'params_rdc_batch_size': (0.0, 601.6), 'params_rdc_hidden_dim': (0.0, 601.6)}

In [9]:
second_round_exploration_bounds = extract_new_bounds(
    first_round_exploration,
    top_n=10,
    slack=0.2
)
second_round_exploration_bounds

Top N values: params_rdc_epsilon_decay
0.999    2
0.994    2
0.990    2
0.992    1
0.997    1
0.995    1
0.991    1
Name: count, dtype: int64
Top N values: params_rdc_epsilon_end
0.05    2
0.08    2
0.17    1
0.11    1
0.06    1
0.10    1
0.02    1
0.15    1
Name: count, dtype: int64


{'params_rdc_epsilon_decay': (0.9882, 1.0008),
 'params_rdc_epsilon_end': (0.0, 0.19999999999999998)}

In [10]:
second_round_replay_buffer_bounds = extract_new_bounds(
    first_round_replay_buffer,
    top_n=10,
    slack=0.2
)
second_round_replay_buffer_bounds

Top N values: params_rdc_replay_buffer_alpha
0.4    5
0.3    3
0.5    1
0.8    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_frames
200000    4
50000     2
150000    2
100000    2
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_start
0.6    5
0.5    4
0.2    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_capacity
5000     6
15000    3
20000    1
Name: count, dtype: int64
Top N values: params_rdc_tau
0.006    3
0.007    2
0.008    2
0.004    1
0.009    1
0.005    1
Name: count, dtype: int64


{'params_rdc_replay_buffer_alpha': (0.19999999999999998, 0.9),
 'params_rdc_replay_buffer_beta_frames': (20000.0, 230000.0),
 'params_rdc_replay_buffer_beta_start': (0.12000000000000001,
  0.6799999999999999),
 'params_rdc_replay_buffer_capacity': (2000.0, 23000.0),
 'params_rdc_tau': (0.003, 0.010000000000000002)}

In [11]:
second_round_reward_weights_bounds = extract_new_bounds(
    first_round_reward_weights,
    top_n=10,
    slack=0.2
)
second_round_reward_weights_bounds

Top N values: params_rdc_reward_weight_demand
0.3    4
0.2    4
0.8    1
1.5    1
Name: count, dtype: int64
Top N values: params_rdc_reward_weight_gini
0.0    10
Name: count, dtype: int64
Top N values: params_rdc_reward_weight_rebalancing
0.9    4
2.0    1
0.7    1
0.6    1
1.6    1
1.0    1
0.8    1
Name: count, dtype: int64


{'params_rdc_reward_weight_demand': (0.0, 1.76),
 'params_rdc_reward_weight_gini': (0.0, 0.0),
 'params_rdc_reward_weight_rebalancing': (0.3200000000000001, 2.28)}

In [12]:
first_round_reward_weights.sort_values("value", ascending=False).head()

Unnamed: 0,number,params_rdc_reward_weight_demand,params_rdc_reward_weight_gini,params_rdc_reward_weight_rebalancing,value,datetime_start,duration
48,48,0.3,0.0,2.0,98.788562,2025-07-08 06:39:25.519081,0 days 00:57:52.263727
16,16,0.2,0.0,0.7,98.708349,2025-07-07 23:41:04.500940,0 days 01:07:20.908051
39,39,0.3,0.0,0.9,98.679879,2025-07-08 04:31:08.363082,0 days 01:09:54.101399
35,35,0.3,0.0,0.6,98.664284,2025-07-08 04:03:23.109113,0 days 01:06:54.568030
14,14,0.2,0.0,0.9,98.606539,2025-07-07 22:42:11.708084,0 days 01:09:41.879130


In [13]:
final_round_lr_bounds = extract_new_bounds(
    second_round_lr,
    top_n=5,
    slack=0.1
)
final_round_lr_bounds

Top N values: params_rdc_gamma
0.99    4
0.98    1
Name: count, dtype: int64
Top N values: params_rdc_lr
0.000003    1
0.000001    1
0.000008    1
0.000001    1
0.000004    1
Name: count, dtype: int64
Top N values: params_rdc_lr_gamma
0.95    3
0.93    1
0.92    1
Name: count, dtype: int64
Top N values: params_rdc_lr_step_size
2800    2
3000    1
2200    1
3200    1
Name: count, dtype: int64


{'params_rdc_gamma': (0.9790000000000001, 0.991),
 'params_rdc_lr': (3.9896904636627405e-07, 8.318892467996226e-06),
 'params_rdc_lr_gamma': (0.917, 0.9530000000000001),
 'params_rdc_lr_step_size': (2100.0, 3300.0)}

In [14]:
final_round_architecture_bounds = extract_new_bounds(
    second_round_architecture,
    top_n=5,
    slack=0.1
)
final_round_architecture_bounds

Top N values: params_rdc_batch_size
128    3
512    2
Name: count, dtype: int64
Top N values: params_rdc_hidden_dim
256    3
128    2
Name: count, dtype: int64


{'params_rdc_batch_size': (89.6, 550.4),
 'params_rdc_hidden_dim': (115.2, 268.8)}

In [15]:
final_round_exploration_bounds = extract_new_bounds(
    second_round_exploration,
    top_n=5,
    slack=0.1
)
final_round_exploration_bounds

Top N values: params_rdc_epsilon_decay
0.996    2
0.992    1
0.998    1
0.999    1
Name: count, dtype: int64
Top N values: params_rdc_epsilon_end
0.05    2
0.06    1
0.07    1
0.08    1
Name: count, dtype: int64


{'params_rdc_epsilon_decay': (0.9913, 0.9997),
 'params_rdc_epsilon_end': (0.047, 0.083)}

In [16]:
final_round_replay_buffer_bounds = extract_new_bounds(
    second_round_replay_buffer,
    top_n=5,
    slack=0.1
)
final_round_replay_buffer_bounds

Top N values: params_rdc_replay_buffer_alpha
0.8    3
0.7    1
0.9    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_frames
80000     1
50000     1
30000     1
200000    1
20000     1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_beta_start
0.3    2
0.2    2
0.5    1
Name: count, dtype: int64
Top N values: params_rdc_replay_buffer_capacity
10000    2
3000     1
6000     1
2000     1
Name: count, dtype: int64
Top N values: params_rdc_tau
0.010    3
0.008    1
0.009    1
Name: count, dtype: int64


{'params_rdc_replay_buffer_alpha': (0.6799999999999999, 0.92),
 'params_rdc_replay_buffer_beta_frames': (2000.0, 218000.0),
 'params_rdc_replay_buffer_beta_start': (0.17, 0.53),
 'params_rdc_replay_buffer_capacity': (1200.0, 10800.0),
 'params_rdc_tau': (0.0078000000000000005, 0.0102)}

In [17]:
final_round_reward_weights_bounds = extract_new_bounds(
    second_round_reward_weights,
    top_n=5,
    slack=0.1
)
final_round_reward_weights_bounds

Top N values: params_rdc_reward_weight_demand
0.2    4
0.3    1
Name: count, dtype: int64
Top N values: params_rdc_reward_weight_gini
0.05    5
Name: count, dtype: int64
Top N values: params_rdc_reward_weight_rebalancing
2.1    1
1.9    1
1.6    1
2.0    1
1.4    1
Name: count, dtype: int64


{'params_rdc_reward_weight_demand': (0.19, 0.31000000000000005),
 'params_rdc_reward_weight_gini': (0.05, 0.05),
 'params_rdc_reward_weight_rebalancing': (1.3299999999999998, 2.17)}