In [74]:
import pandas as pd
from pandas.api.types import is_numeric_dtype

In [75]:
# first round
first_round_core_file = "../../ho_results/uic_ho_ppo-core_20250704-105628_final.pickle"
first_round_architecture_file = "../../ho_results/uic_ho_architecture_20250705-132457_final.pickle"
first_round_stability_file = "../../ho_results/uic_ho_stability_20250706-120526_final.pickle"
first_round_reward_weights_file = "../../ho_results/uic_ho_rewardweights_20250707-074136_final.pickle"

# second round

In [76]:
first_round_core = pd.read_pickle(first_round_core_file)
first_round_architecture = pd.read_pickle(first_round_architecture_file)
first_round_reward_weights = pd.read_pickle(first_round_reward_weights_file)
first_round_stability = pd.read_pickle(first_round_stability_file)

In [77]:
first_round_core.head()

Unnamed: 0,number,params_batch_size,params_clip_range,params_ent_coef,params_learning_rate,params_n_steps,params_vf_coef,value,datetime_start,duration
0,0,64,0.12,2e-06,0.000141,512,0.9,180.713818,2025-07-04 10:56:28.848295,0 days 00:20:02.800548
1,1,64,0.12,9e-06,0.000121,512,0.4,181.083621,2025-07-04 11:16:31.650241,0 days 00:20:02.164734
2,2,64,0.22,3e-06,6e-06,512,0.3,181.106377,2025-07-04 11:36:33.816262,0 days 00:20:00.182664
3,3,32,0.14,8.5e-05,3.6e-05,256,0.6,180.773171,2025-07-04 11:56:33.999679,0 days 00:22:56.034392
4,4,32,0.12,0.000727,4e-06,512,0.7,181.415604,2025-07-04 12:19:30.035209,0 days 00:23:10.669907


In [78]:
def extract_new_bounds(df: pd.DataFrame, top_n=5, slack=0.1):
    df = df.sort_values("value", ascending=False)

    topN = df.head(top_n)

    new_bounds = {}
    params = df.columns
    params = params.drop(["number", "value", "datetime_start", "duration"])
    for param in params:
        if not is_numeric_dtype(topN[param]):
            print(f"Skipping parameter '{param}' as it is of type string.")
            print(f"Top N values: {topN[param].value_counts()}")
            continue
        mn = float(topN[param].min())
        mx = float(topN[param].max())
        span = mx - mn

        new_bounds[param] = (
            max(0.0, mn - slack*span),
            mx + slack*span
        )

    return new_bounds

In [79]:
second_round_core_bounds = extract_new_bounds(
    first_round_core,
    top_n=10,
    slack=0.2
)
second_round_core_bounds

{'params_batch_size': (32.0, 32.0),
 'params_clip_range': (0.09999999999999999, 0.24),
 'params_ent_coef': (0.0, 0.0056188837849982945),
 'params_learning_rate': (0.0, 3.503179856741068e-05),
 'params_n_steps': (512.0, 512.0),
 'params_vf_coef': (0.28, 1.12)}

In [80]:
second_round_architecture_bounds = extract_new_bounds(
    first_round_architecture,
    top_n=10,
    slack=0.2
)
second_round_architecture_bounds

Skipping parameter 'params_activation' as it is of type string.
Top N values: params_activation
ReLU         7
Tanh         2
LeakyReLU    1
Name: count, dtype: int64


{'params_hidden_size': (25.599999999999994, 294.4),
 'params_n_layers': (1.8, 3.2)}

In [81]:
second_round_stability_bounds = extract_new_bounds(
    first_round_stability,
    top_n=10,
    slack=0.2
)
second_round_stability_bounds

{'params_gae_lambda': (0.878, 0.962),
 'params_gamma': (0.9132, 0.9538),
 'params_use_target_kl': (0.008, 0.022)}

In [82]:
second_round_reward_weights_bounds = extract_new_bounds(
    first_round_reward_weights,
    top_n=10,
    slack=0.2
)
second_round_reward_weights_bounds

{'params_reward_weight_demand': (0.07999999999999999, 0.92),
 'params_reward_weight_gini': (0.0, 0.41800000000000004),
 'params_reward_weight_rebalancing': (1.04, 2.16)}