In [77]:
import optuna
import pandas as pd
import json
import optuna.visualization as vis
from optuna.distributions import IntDistribution, FloatDistribution, CategoricalDistribution

In [65]:
df_trials = pd.read_json('hyperparameter_study_results.json', orient='records')

In [66]:
# compute the gap between the best known solution from the benchmark to the value gotten in the study
optimum_values = {'C101.txt':7093.45,
                    'C201.txt':5695.02,
                    'R101.txt':4314.36,
                    'R201.txt':3446.78,
                    'RC101.txt':5150.86,
                    'RC201.txt':4374.09}

df_trials['gap_optimal'] = df_trials.apply(
    lambda row: 100*(row['value'] - optimum_values[row['problem_instance']]) / optimum_values[row['problem_instance']], axis=1
)

In [67]:
# compute the gap between the current value and the worst value gotten in the study

df_trials['worst_value'] = df_trials.groupby('problem_instance')['value'].transform('max')
df_trials['gap_worst'] = 100* (df_trials['value'] - df_trials['worst_value']) / df_trials['worst_value']

In [37]:
df_trials

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_accepted_score,params_cooling_function,params_final_temp,params_global_best_score,params_init_temp,params_iterations,params_local_best_score,state,problem_instance,gap_optimal,worst_value,gap_worst
0,0,7774.113291,1742221719811,1742221895600,175789,1,exponential,46.466478,16,1000,2000,4,COMPLETE,C101.txt,9.595659,8122.068859,-4.284076
1,1,7687.050837,1742221895600,1742222069879,174279,6,linear,0.218388,20,1100,2000,16,COMPLETE,C101.txt,8.368295,8122.068859,-5.356000
2,2,7893.828320,1742222069879,1742222411216,341336,4,exponential,56.213530,20,2000,4000,5,COMPLETE,C101.txt,11.283343,8122.068859,-2.810128
3,3,7651.897286,1742222411216,1742222661156,249940,3,exponential,41.452035,20,1000,3000,4,COMPLETE,C101.txt,7.872718,8122.068859,-5.788815
4,4,8122.068859,1742222661156,1742222740270,79113,3,linear,0.725310,18,600,1000,4,COMPLETE,C101.txt,14.500967,8122.068859,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,15,4937.543039,1742216221885,1742217718767,1496882,1,exponential,66.704386,9,3000,8000,4,COMPLETE,RC201.txt,12.881606,4991.242289,-1.075869
116,16,4817.185446,1742217718767,1742218265844,547076,14,exponential,19.956398,17,2000,3000,16,COMPLETE,RC201.txt,10.130003,4991.242289,-3.487245
117,17,4933.892075,1742218265845,1742219028281,762436,2,exponential,80.200929,13,3000,4000,6,COMPLETE,RC201.txt,12.798138,4991.242289,-1.149017
118,18,4991.242289,1742219028281,1742220136655,1108373,1,exponential,53.205766,7,2000,6000,3,COMPLETE,RC201.txt,14.109273,4991.242289,0.000000


In [3]:
instances = ["C101.txt", "R101.txt", "RC101.txt", "C201.txt", "R201.txt", "RC201.txt"]

In [51]:
# create optuna study to check parameter importances
param_importances_per_instance = {}
for instance in instances:
    study = optuna.create_study(direction='minimize', study_name=f"study_{instance}")
    for _, row in df_trials[df_trials['problem_instance'] == instance].iterrows():
        # Extract parameters from your row
        params = {
            'iterations': row['params_iterations'],
            'init_temp': row['params_init_temp'],
            'final_temp': row['params_final_temp'],
            'global_best_score': row['params_global_best_score'],
            'local_best_score': row['params_local_best_score'],
            'accepted_score': row['params_accepted_score'],
            'cooling_function': row['params_cooling_function']
        }

        distributions = {
            "iterations": IntDistribution(1000, 8000, step=1000),
            "global_best_score": IntDistribution(5, 20),
            "cooling_function": CategoricalDistribution(["exponential", "linear"])
        }

        if params["cooling_function"] == 'linear':
            distributions["init_temp"] = IntDistribution(100, 1100, step=500)
            distributions["final_temp"] = FloatDistribution(0.001, 1)
        else:
            distributions["init_temp"] = IntDistribution(1000, 3000, step=1000)
            distributions["final_temp"] = FloatDistribution(0, 100)

        distributions["local_best_score"] = IntDistribution(3, params["global_best_score"]-1)
        distributions["accepted_score"] = IntDistribution(1, params["local_best_score"] - 1)

        trial = optuna.trial.create_trial(
            params=params,
            value=row['value'],
            state=optuna.trial.TrialState.COMPLETE,
            user_attrs={"problem_instance": row["problem_instance"]},
            distributions = distributions
        )

        study.add_trial(trial)
    importance = optuna.importance.get_param_importances(study)
    param_importances_per_instance[instance] = importance

[I 2025-03-19 17:44:22,328] A new study created in memory with name: study_C101.txt
[I 2025-03-19 17:44:22,717] A new study created in memory with name: study_R101.txt
[I 2025-03-19 17:44:23,104] A new study created in memory with name: study_RC101.txt
[I 2025-03-19 17:44:23,433] A new study created in memory with name: study_C201.txt
[I 2025-03-19 17:44:23,723] A new study created in memory with name: study_R201.txt
[I 2025-03-19 17:44:24,044] A new study created in memory with name: study_RC201.txt


In [52]:
param_importances_per_instance

{'C101.txt': {'iterations': np.float64(0.5207379617802412),
  'global_best_score': np.float64(0.2922854442882003),
  'cooling_function': np.float64(0.1869765939315584)},
 'R101.txt': {'global_best_score': np.float64(0.5105713515705841),
  'iterations': np.float64(0.3701264854356416),
  'cooling_function': np.float64(0.11930216299377427)},
 'RC101.txt': {'iterations': np.float64(0.7545906729303403),
  'global_best_score': np.float64(0.22192947065306087),
  'cooling_function': np.float64(0.02347985641659876)},
 'C201.txt': {'iterations': np.float64(0.6122031806059243),
  'global_best_score': np.float64(0.32005835767443386),
  'cooling_function': np.float64(0.06773846171964196)},
 'R201.txt': {'iterations': np.float64(0.6409525589855095),
  'global_best_score': np.float64(0.32610256623765854),
  'cooling_function': np.float64(0.032944874776831966)},
 'RC201.txt': {'global_best_score': np.float64(0.49446835121641436),
  'iterations': np.float64(0.40726235855028836),
  'cooling_function': n

In [38]:
# Get the best parameters per instance
best_indices = df_trials.groupby("problem_instance")["value"].idxmin()
best_trials = df_trials.loc[best_indices]

best_trials[['problem_instance','value','gap_optimal','gap_worst','params_iterations', 'params_global_best_score', 'params_cooling_function',
              'params_init_temp','params_final_temp','params_local_best_score','params_accepted_score']]

Unnamed: 0,problem_instance,value,gap_optimal,gap_worst,params_iterations,params_global_best_score,params_cooling_function,params_init_temp,params_final_temp,params_local_best_score,params_accepted_score
19,C101.txt,7509.701492,5.868111,-7.539549,8000,8,linear,100,0.322956,6,3
62,C201.txt,5695.021829,3.2e-05,-0.403503,7000,8,linear,600,0.677996,4,2
38,R101.txt,4575.002141,6.04127,-2.525036,3000,7,linear,600,0.021617,4,2
91,R201.txt,3526.49516,2.312743,-2.227978,3000,15,exponential,2000,42.447784,5,3
47,RC101.txt,5608.502729,8.884783,-3.522823,4000,20,exponential,2000,87.533306,16,2
105,RC201.txt,4731.413632,8.169096,-5.205691,2000,5,exponential,2000,41.665363,4,1


In [39]:
df_trials.groupby("problem_instance", group_keys=False).apply(
    lambda x: x.sort_values('value', ascending=True))[['problem_instance','value','gap_optimal','gap_worst','params_iterations', 'params_global_best_score', 'params_cooling_function',
              'params_init_temp','params_final_temp','params_local_best_score','params_accepted_score']]

  df_trials.groupby("problem_instance", group_keys=False).apply(


Unnamed: 0,problem_instance,value,gap_optimal,gap_worst,params_iterations,params_global_best_score,params_cooling_function,params_init_temp,params_final_temp,params_local_best_score,params_accepted_score
19,C101.txt,7509.701492,5.868111,-7.539549,8000,8,linear,100,0.322956,6,3
3,C101.txt,7651.897286,7.872718,-5.788815,3000,20,exponential,1000,41.452035,4,3
1,C101.txt,7687.050837,8.368295,-5.356000,2000,20,linear,1100,0.218388,16,6
11,C101.txt,7715.126374,8.764090,-5.010330,3000,20,linear,600,0.073131,18,17
9,C101.txt,7737.547838,9.080177,-4.734274,6000,11,exponential,3000,46.365124,3,1
...,...,...,...,...,...,...,...,...,...,...,...
104,RC201.txt,4923.120417,12.551877,-1.364828,8000,19,exponential,3000,42.609855,7,5
119,RC201.txt,4929.751968,12.703487,-1.231964,2000,5,exponential,2000,36.864727,3,1
117,RC201.txt,4933.892075,12.798138,-1.149017,4000,13,exponential,3000,80.200929,6,2
115,RC201.txt,4937.543039,12.881606,-1.075869,8000,9,exponential,3000,66.704386,4,1


In [50]:
df_trials.groupby('problem_instance')['value'].std()

problem_instance
C101.txt     149.513836
C201.txt       5.506455
R101.txt      33.002747
R201.txt      24.251229
RC101.txt     48.076666
RC201.txt     67.880755
Name: value, dtype: float64

In [78]:
# Function to normalize the 'value' column within each instance
def normalize(df):
    min_val = df['value'].min()
    max_val = df['value'].max()
    # Avoid division by zero if all values are the same
    if max_val == min_val:
        df['normalized_value'] = 0.0
    else:
        df['normalized_value'] = (df['value'] - min_val) / (max_val - min_val)
    return df

In [76]:
# Apply normalization for each problem instance
df_trials_normalized = df_trials.groupby('problem_instance').apply(normalize)
df_trials_normalized


KeyError: 'problem_instance'

In [64]:
# Now aggregate the normalized performance for each hyperparameter configuration
group_cols = ['params_iterations', 'params_init_temp', 'params_final_temp',
              'params_global_best_score', 'params_local_best_score',
              'params_accepted_score', 'params_cooling_function']

group_cols = ['params_iterations',
              'params_global_best_score', 'params_cooling_function']
# Calculate the average normalized value (lower is better)
aggregated = df_trials_normalized.groupby(group_cols)['normalized_value'].mean().reset_index()

# Sort to get the best overall parameters
best_overall_params = aggregated.sort_values('normalized_value')
best_overall_params

Unnamed: 0,params_iterations,params_global_best_score,params_cooling_function,normalized_value
26,3000,7,linear,0.000000
40,4000,5,exponential,0.000000
69,6000,12,exponential,0.000000
68,6000,11,linear,0.000135
75,6000,19,exponential,0.013823
...,...,...,...,...
15,2000,11,exponential,0.966629
22,2000,16,linear,0.996087
9,1000,18,linear,1.000000
5,1000,13,linear,1.000000


In [74]:
group_cols = ['params_iterations',
              'params_global_best_score', 'params_cooling_function']
# Calculate the average normalized value (lower is better)
aggregated = df_trials_normalized.groupby(group_cols).count()
aggregated

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,number,value,datetime_start,datetime_complete,duration,params_accepted_score,params_final_temp,params_init_temp,params_local_best_score,state,problem_instance,normalized_value
params_iterations,params_global_best_score,params_cooling_function,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1000,6,linear,1,1,1,1,1,1,1,1,1,1,1,1
1000,7,exponential,1,1,1,1,1,1,1,1,1,1,1,1
1000,7,linear,1,1,1,1,1,1,1,1,1,1,1,1
1000,10,exponential,2,2,2,2,2,2,2,2,2,2,2,2
1000,12,exponential,1,1,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8000,13,exponential,1,1,1,1,1,1,1,1,1,1,1,1
8000,13,linear,1,1,1,1,1,1,1,1,1,1,1,1
8000,14,exponential,1,1,1,1,1,1,1,1,1,1,1,1
8000,18,exponential,1,1,1,1,1,1,1,1,1,1,1,1


In [87]:
df_final_trials = pd.read_csv('hyperparameters.csv', header=0, names=['problem_instance', 'duration', 'value', 'iterations', 'global_best_score', 'local_best_score', 'accepted_score', 'cooling_function', 'init_temp', 'final_temp'])
df_final_trials

Unnamed: 0,problem_instance,duration,value,iterations,global_best_score,local_best_score,accepted_score,cooling_function,init_temp,final_temp
0,C101.txt,310.633278,7961.255404,3000,20,4,3,exponential,1000,41.452035
1,R101.txt,386.944922,4656.402618,3000,20,4,3,exponential,1000,41.452035
2,RC101.txt,328.962212,5768.161955,3000,20,4,3,exponential,1000,41.452035
3,C201.txt,347.608479,5696.858521,3000,20,4,3,exponential,1000,41.452035
4,R201.txt,518.544194,3538.887086,3000,20,4,3,exponential,1000,41.452035
5,RC201.txt,719.979089,5007.416623,3000,20,4,3,exponential,1000,41.452035
6,C101.txt,523.034944,7881.970426,4000,5,3,1,exponential,2000,49.059518
7,R101.txt,629.77688,4586.233522,4000,5,3,1,exponential,2000,49.059518
8,RC101.txt,588.775702,5705.081364,4000,5,3,1,exponential,2000,49.059518
9,C201.txt,565.821986,5695.021829,4000,5,3,1,exponential,2000,49.059518


In [89]:
df_final_trials_normalized = df_final_trials.groupby('problem_instance').apply(normalize)
df_final_trials_normalized

  df_final_trials_normalized = df_final_trials.groupby('problem_instance').apply(normalize)


Unnamed: 0_level_0,Unnamed: 1_level_0,problem_instance,duration,value,iterations,global_best_score,local_best_score,accepted_score,cooling_function,init_temp,final_temp,normalized_value
problem_instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
C101.txt,0,C101.txt,310.633278,7961.255404,3000,20,4,3,exponential,1000,41.452035,1.0
C101.txt,6,C101.txt,523.034944,7881.970426,4000,5,3,1,exponential,2000,49.059518,0.500787
C101.txt,12,C101.txt,306.390782,7916.719148,3000,7,4,2,linear,600,0.021617,0.71958
C101.txt,18,C101.txt,341.855043,7823.453434,3000,15,5,3,exponential,2000,42.447784,0.132338
C101.txt,24,C101.txt,475.414803,7857.481599,4000,20,16,2,exponential,2000,87.533306,0.346594
C101.txt,30,C101.txt,194.562145,7802.435596,2000,5,4,1,exponential,2000,41.665363,0.0
C201.txt,3,C201.txt,347.608479,5696.858521,3000,20,4,3,exponential,1000,41.452035,0.174749
C201.txt,9,C201.txt,565.821986,5695.021829,4000,5,3,1,exponential,2000,49.059518,0.0
C201.txt,15,C201.txt,345.650104,5702.116893,3000,7,4,2,linear,600,0.021617,0.675047
C201.txt,21,C201.txt,327.416763,5705.532308,3000,15,5,3,exponential,2000,42.447784,1.0


In [92]:
group_cols = ['iterations','global_best_score', 'local_best_score', 'accepted_score', 'cooling_function', 'init_temp', 'final_temp']
# Calculate the average normalized value (lower is better)
aggregated = df_final_trials_normalized.groupby(group_cols)['normalized_value'].mean().reset_index()

# Sort to get the best overall parameters
best_overall_params = aggregated.sort_values('normalized_value')
print(best_overall_params.iloc[0])
best_overall_params

iterations                  2000
global_best_score              5
local_best_score               4
accepted_score                 1
cooling_function     exponential
init_temp                   2000
final_temp             41.665363
normalized_value        0.280457
Name: 0, dtype: object


Unnamed: 0,iterations,global_best_score,local_best_score,accepted_score,cooling_function,init_temp,final_temp,normalized_value
0,2000,5,4,1,exponential,2000,41.665363,0.280457
4,4000,5,3,1,exponential,2000,49.059518,0.348033
5,4000,20,16,2,exponential,2000,87.533306,0.359222
2,3000,15,5,3,exponential,2000,42.447784,0.364234
3,3000,20,4,3,exponential,1000,41.452035,0.692598
1,3000,7,4,2,linear,600,0.021617,0.723666
