In [1]:
import pandas as pd
from pandas.api.types import is_numeric_dtype

In [2]:
# first round
first_round_core_file = "../../ho_results/uic_ho_ppo-core_20250802-114428_final.pickle"
first_round_architecture_file = "../../ho_results/uic_ho_architecture_20250802-114625_final.pickle"
first_round_stability_file = "../../ho_results/uic_ho_stability_20250802-114603_final.pickle"

# second round
second_round_core_file = "../../ho_results/uic_ho_ppo-core_20250802-114428_final.pickle"
second_round_architecture_file = "../../ho_results/uic_ho_architecture_20250802-114625_final.pickle"
second_round_stability_file = "../../ho_results/uic_ho_stability_20250802-114603_final.pickle"

In [3]:
first_round_core = pd.read_pickle(first_round_core_file)
first_round_architecture = pd.read_pickle(first_round_architecture_file)
first_round_stability = pd.read_pickle(first_round_stability_file)

In [4]:
second_round_core = pd.read_pickle(second_round_core_file)
second_round_architecture = pd.read_pickle(second_round_architecture_file)
second_round_stability = pd.read_pickle(second_round_stability_file)

In [5]:
first_round_core.head()

Unnamed: 0,number,params_batch_size,params_clip_range,params_ent_coef,params_learning_rate,params_n_steps,params_vf_coef,value,datetime_start,duration
0,0,128,0.1,0.000197,4.4e-05,2048,0.75,163.999148,2025-08-02 11:44:28.704519,0 days 00:36:34.671666
1,1,128,0.2,0.000538,0.000342,2048,0.55,163.917301,2025-08-02 12:21:03.377555,0 days 00:36:38.683952
2,2,64,0.1,2.2e-05,0.000362,1024,0.7,164.297104,2025-08-02 12:57:42.062744,0 days 00:37:20.470249
3,3,256,0.15,0.00012,0.000366,1024,0.55,164.456855,2025-08-02 13:35:02.534326,0 days 00:35:07.611855
4,4,32,0.15,0.000158,0.000282,512,0.75,164.681203,2025-08-02 14:10:10.146930,0 days 00:40:13.621571


In [13]:
def extract_new_bounds(df: pd.DataFrame, top_n=5, slack=0.1):
    df = df.sort_values("value", ascending=False)

    topN = df.head(top_n)

    new_bounds = {}
    params = df.columns
    params = params.drop(["number", "value", "datetime_start", "duration"])
    for param in params:
        if not is_numeric_dtype(topN[param]):
            print(f"Skipping parameter '{param}' as it is of type string.")
            print(f"Top N values: {topN[param].value_counts()}")
            continue

        print(f"Top N values: {topN[param].value_counts()}")
        mn = float(topN[param].min())
        mx = float(topN[param].max())
        span = mx - mn

        new_bounds[param] = (
            max(0.0, mn - slack*span),
            mx + slack*span
        )

    return new_bounds

In [14]:
second_round_core_bounds = extract_new_bounds(
    first_round_core,
    top_n=10,
    slack=0.2
)
second_round_core_bounds

Top N values: params_batch_size
32     8
256    1
128    1
Name: count, dtype: int64
Top N values: params_clip_range
0.25    3
0.15    2
0.10    2
0.30    2
0.20    1
Name: count, dtype: int64
Top N values: params_ent_coef
0.000037    1
0.000036    1
0.000065    1
0.000158    1
0.000046    1
0.000017    1
0.000047    1
0.000120    1
0.000074    1
0.000059    1
Name: count, dtype: int64
Top N values: params_learning_rate
0.000014    1
0.000026    1
0.000088    1
0.000282    1
0.000066    1
0.000011    1
0.000013    1
0.000366    1
0.000083    1
0.000098    1
Name: count, dtype: int64
Top N values: params_n_steps
512     7
256     2
1024    1
Name: count, dtype: int64
Top N values: params_vf_coef
0.75    4
0.70    4
0.65    1
0.55    1
Name: count, dtype: int64


{'params_batch_size': (0.0, 300.8),
 'params_clip_range': (0.060000000000000005, 0.33999999999999997),
 'params_ent_coef': (0.0, 0.00018576619024450198),
 'params_learning_rate': (0.0, 0.0004375139700098582),
 'params_n_steps': (102.39999999999998, 1177.6),
 'params_vf_coef': (0.51, 0.79)}

In [15]:
second_round_architecture_bounds = extract_new_bounds(
    first_round_architecture,
    top_n=10,
    slack=0.2
)
second_round_architecture_bounds

Skipping parameter 'params_activation' as it is of type string.
Top N values: params_activation
ReLU         5
LeakyReLU    3
Tanh         2
Name: count, dtype: int64
Top N values: params_hidden_size
256    5
128    3
96     2
Name: count, dtype: int64
Top N values: params_n_layers
3    7
2    3
Name: count, dtype: int64


{'params_hidden_size': (64.0, 288.0), 'params_n_layers': (1.8, 3.2)}

In [16]:
second_round_stability_bounds = extract_new_bounds(
    first_round_stability,
    top_n=10,
    slack=0.2
)
second_round_stability_bounds

Top N values: params_gae_lambda
0.90    6
0.96    1
0.92    1
0.97    1
0.91    1
Name: count, dtype: int64
Top N values: params_gamma
0.953255    1
0.959492    1
0.950719    1
0.953952    1
0.973148    1
0.953168    1
0.955099    1
0.951897    1
0.957707    1
0.956295    1
Name: count, dtype: int64
Top N values: params_use_target_kl
0.015    4
0.010    3
0.020    2
Name: count, dtype: int64


{'params_gae_lambda': (0.886, 0.984),
 'params_gamma': (0.9462326259010265, 0.9776340549472341),
 'params_use_target_kl': (0.008, 0.022)}

## Final Round Bounds

In [10]:
final_round_core_bounds = extract_new_bounds(
    second_round_core,
    top_n=5,
    slack=0.1
)
final_round_core_bounds

{'params_batch_size': (32.0, 32.0),
 'params_clip_range': (0.085, 0.265),
 'params_ent_coef': (2.3977304316759788e-05, 0.0001698329651584891),
 'params_learning_rate': (0.0, 0.0003085764943436267),
 'params_n_steps': (230.4, 537.6),
 'params_vf_coef': (0.64, 0.76)}

In [11]:
final_round_architecture_bounds = extract_new_bounds(
    second_round_architecture,
    top_n=5,
    slack=0.1
)
final_round_architecture_bounds

Skipping parameter 'params_activation' as it is of type string.
Top N values: params_activation
ReLU         2
Tanh         2
LeakyReLU    1
Name: count, dtype: int64


{'params_hidden_size': (80.0, 272.0), 'params_n_layers': (1.9, 3.1)}

In [12]:
final_round_stability_bounds = extract_new_bounds(
    second_round_stability,
    top_n=5,
    slack=0.1
)
final_round_stability_bounds

{'params_gae_lambda': (0.894, 0.966),
 'params_gamma': (0.9484755851186129, 0.9753910957296478),
 'params_use_target_kl': (0.009000000000000001, 0.021)}