In [78]:
import pickle
import pandas as pd
import numpy as np
import optuna

In [79]:
storage = "sqlite:///optuna_results_parameter_tuning.sqlite3"

In [80]:
studies = optuna.study.get_all_study_summaries(storage=storage)

In [81]:
loaded_study = optuna.load_study(study_name=studies[0].study_name, storage=storage)
df = loaded_study.trials_dataframe()
for key, value in loaded_study.system_attrs.items():
    df[key] = value
for study in studies[1:]:
    loaded_study = optuna.load_study(study_name=study.study_name , storage=storage)
    df_new = loaded_study.trials_dataframe()
    for key, value in loaded_study.system_attrs.items():
        df_new[key] = value
    df = pd.concat([df, df_new])
df.reset_index(inplace=True, drop=True)

In [82]:
df.columns

Index(['number', 'value', 'datetime_start', 'datetime_complete', 'duration',
       'params_init_learning_rate', 'params_learning_rate_decay',
       'params_max_depth', 'params_n_experts', 'params_optimization_method',
       'params_use_2_dim_gate_based_on', 'params_use_posterior',
       'user_attrs_accuracy_train', 'user_attrs_accuracy_val',
       'user_attrs_std_train', 'user_attrs_std_val', 'state', 'Data X',
       'Data y', 'initialization_method', 'params_alpha', 'params_beta',
       'params_gamma', 'params_mean_precision_prior',
       'params_weight_concentration_prior',
       'params_weight_concentration_prior_type', 'params_weight_cutoff'],
      dtype='object')

In [83]:
df.head(1)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_init_learning_rate,params_learning_rate_decay,params_max_depth,params_n_experts,params_optimization_method,...,Data X,Data y,initialization_method,params_alpha,params_beta,params_gamma,params_mean_precision_prior,params_weight_concentration_prior,params_weight_concentration_prior_type,params_weight_cutoff
0,0,0.780093,2021-06-27 15:28:59.792766,2021-06-27 15:29:01.645766,0 days 00:00:01.853000,77.537892,0.995921,2,3,lasso_regression,...,cars_input.pd,cars_target.pd,str,,,,,,,


In [84]:
df.initialization_method.value_counts()

str              300
BGM_init         300
KDTmeans_init    300
Kmeans_init      300
Name: initialization_method, dtype: int64

In [85]:
np.unique(df.initialization_method)

array(['BGM_init', 'KDTmeans_init', 'Kmeans_init', 'str'], dtype=object)

In [86]:
np.unique(df["Data X"])

array(['cars_input.pd', 'students_input.pd'], dtype=object)

In [87]:
top = 10
df_best = pd.DataFrame()
for dataset in np.unique(df["Data X"]):
    df_new = df[df["Data X"] == dataset].sort_values(by=["user_attrs_accuracy_val"], ascending=False).head(top)
    df_best = pd.concat([df_best, df_new])
df_best.reset_index(inplace=True, drop=True)


In [88]:
df_best.shape

(20, 27)

In [89]:
df_best.head(2)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_init_learning_rate,params_learning_rate_decay,params_max_depth,params_n_experts,params_optimization_method,...,Data X,Data y,initialization_method,params_alpha,params_beta,params_gamma,params_mean_precision_prior,params_weight_concentration_prior,params_weight_concentration_prior_type,params_weight_cutoff
0,61,0.918981,2021-06-27 15:30:43.710765,2021-06-27 15:30:45.429765,0 days 00:00:01.719000,136.819742,0.994125,2,3,least_squares_linear_regression,...,cars_input.pd,cars_target.pd,str,,,,,,,
1,122,0.916667,2021-06-27 15:32:32.982089,2021-06-27 15:32:34.669090,0 days 00:00:01.687001,149.730407,0.992079,2,3,least_squares_linear_regression,...,cars_input.pd,cars_target.pd,str,,,,,,,


In [90]:
pickle.dump(df_best, open("dataframes/df_top10_hyperparameters_per_dataset.pd", "wb"))

In [92]:
df.groupby(["Data X"]).mean()

Unnamed: 0_level_0,number,value,params_init_learning_rate,params_learning_rate_decay,params_max_depth,params_n_experts,params_use_posterior,user_attrs_accuracy_train,user_attrs_accuracy_val,user_attrs_std_train,user_attrs_std_val,params_alpha,params_beta,params_gamma,params_mean_precision_prior,params_weight_concentration_prior,params_weight_cutoff
Data X,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
cars_input.pd,74.5,0.82903,95.747788,0.990093,2.0,3.0,0.298333,0.840195,0.82903,0.012467,0.018739,1.727876,0.148894,0.216165,0.31772,0.60947,0.0
students_input.pd,74.5,0.464243,94.522937,0.985094,2.0,3.0,0.08,0.538959,0.464243,0.015877,0.042211,2.244846,0.373535,0.239302,0.320073,0.293613,0.0


In [93]:
df["Data X"]

0           cars_input.pd
1           cars_input.pd
2           cars_input.pd
3           cars_input.pd
4           cars_input.pd
              ...        
1195    students_input.pd
1196    students_input.pd
1197    students_input.pd
1198    students_input.pd
1199    students_input.pd
Name: Data X, Length: 1200, dtype: object

In [97]:
df["params_use_2_dim_gate_based_on"].value_counts(dropna=False)

NaN                       531
feature_importance_lda    511
feature_importance         87
PCA                        71
Name: params_use_2_dim_gate_based_on, dtype: int64