# Using the hyperopt library

In [2]:
import numpy as np
import hyperopt
from hyperopt import fmin, tpe, hp, SparkTrials, Trials, space_eval
from sklearn.svm import SVC
from hyperopt import STATUS_OK
from src.utils import base_skf_test
import warnings
import pickle
from src.utils import load_synthetic_datasets
datasets = load_synthetic_datasets()

In [38]:
def write_result(classifier_name, dataset_name, trials, space_e, result_dict): 
    with open(f"./results/hyperopt/performance/{classifier_name}.csv", mode="a+") as f: 
        best_loss = trials.best_trial['result']['loss']
        f1, mac_rec = result_dict[best_loss] 
        f.write(f"{dataset_name}, {round(1-best_loss,5)}, {round(np.mean(f1),5)}, {round(np.mean(mac_rec),5)}, {','.join([str(x) for x in space_e.items()])}\n")



# Parameter Optimization SVM

In [40]:
from src.parameter_spaces import svm_space as space
warnings.filterwarnings('ignore')
dataset, dataset_name = datasets[-1]

for dataset, dataset_name in datasets: 
    warnings.filterwarnings('ignore')
    
    result_dict = {}
    def objective(params):
        clf = SVC(kernel=params['kernel'], gamma=params['gamma'], degree=params['degree'], max_iter=1000)
        gmeans, f1s, mac_recs = base_skf_test(dataset.data, dataset.target, clf)
        result_dict[1-np.mean(gmeans)] = (f1s, mac_recs)
        return {'loss': 1-np.mean(gmeans), 'status': STATUS_OK}

    trials = Trials()
    best = fmin(objective, 
            space = space, 
            algo=tpe.suggest,
            max_evals = 100,
            trials = trials
            )
    print(f"{dataset_name} {best}")
    write_result("svm",dataset_name, trials, space_eval(space, best), result_dict)

100%|██████████| 10/10 [00:08<00:00,  1.21trial/s, best loss: 0.07569903120057864]
02a {'degree': 2, 'gamma': 7, 'kernel': 0}
100%|██████████| 10/10 [00:08<00:00,  1.24trial/s, best loss: 0.2394355197213408]
02b {'degree': 0, 'gamma': 6, 'kernel': 0}
100%|██████████| 10/10 [00:07<00:00,  1.28trial/s, best loss: 0.19761792303318093]
subcl5 {'degree': 1, 'gamma': 1, 'kernel': 0}
100%|██████████| 10/10 [00:10<00:00,  1.05s/trial, best loss: 0.5522773504459862]
subcl5-noise {'degree': 2, 'gamma': 0, 'kernel': 2}
100%|██████████| 10/10 [00:07<00:00,  1.27trial/s, best loss: 0.18807762866458044]
clover {'degree': 3, 'gamma': 2, 'kernel': 0}
100%|██████████| 10/10 [00:08<00:00,  1.23trial/s, best loss: 0.4808119988833537]
clover-noise {'degree': 2, 'gamma': 1, 'kernel': 0}
100%|██████████| 10/10 [00:08<00:00,  1.18trial/s, best loss: 0.44974145263799836]
paw-2d {'degree': 3, 'gamma': 8, 'kernel': 0}
100%|██████████| 10/10 [00:08<00:00,  1.18trial/s, best loss: 0.2676912663938753]
paw-2d-borde

KeyboardInterrupt: 

# Parameter Optimization kNN 

In [42]:
from sklearn.neighbors import KNeighborsClassifier
from src.parameter_spaces import knn_space as space

for dataset, dataset_name in datasets: 
    result_dict = {}
    def objective(params):
        clf = KNeighborsClassifier(n_neighbors=params['n_neighbors'])
        gmeans, f1s, mac_recs = base_skf_test(dataset.data, dataset.target, clf)
        result_dict[1-np.mean(gmeans)] = (f1s, mac_recs)
        return {'loss': 1-np.mean(gmeans), 'status': STATUS_OK}

    trials = Trials()
    best = fmin(objective, 
            space = space, 
            algo=tpe.suggest,
            max_evals = 100,
            trials = trials
            )
    print(f"{dataset_name} {best}")
    write_result("knn",dataset_name, trials, space_eval(space, best), result_dict)

100%|██████████| 100/100 [01:22<00:00,  1.21trial/s, best loss: 0.07247943173624871]
02a {'n_neighbors': 0}
 93%|█████████▎| 93/100 [01:16<00:05,  1.22trial/s, best loss: 0.040572458602614936]


KeyboardInterrupt: 

# Parameter Optimization Multi-layer Perceptron 

In [2]:
from sklearn.neural_network import MLPClassifier
from src.parameter_spaces import get_mlp_space

for dataset, dataset_name in datasets: 
    m = len(dataset.data[0]) # Number of variables
    
    space = get_mlp_space(m)
    result_dict = {}
    def objective(params):
        clf = MLPClassifier(hidden_layer_sizes=params['hidden_layer_sizes'], learning_rate_init=params['learning_rate_init'])
        gmeans, f1s, mac_recs = base_skf_test(dataset.data, dataset.target, clf)
        result_dict[1-np.mean(gmeans)] = (f1s, mac_recs)
        return {'loss': 1-np.mean(gmeans), 'status': STATUS_OK}

    trials = Trials()
    best = fmin(objective, 
            space = space, 
            algo=tpe.suggest,
            max_evals = 100,
            trials = trials
            )
    print(f"{dataset_name} {best}")
    write_result("mlp",dataset_name, trials, space_eval(space, best), result_dict)

 12%|█▏        | 12/100 [00:14<01:47,  1.22s/trial, best loss: 0.9409661114274183]


KeyboardInterrupt: 

# Parameter Optimization Random Forest

In [3]:
from sklearn.ensemble import RandomForestClassifier
from src.parameter_spaces import get_rf_space

for dataset, dataset_name in datasets: 

    m = len(dataset.data[0]) # Number of variables

    space = get_rf_space(m)    
    result_dict = {}
    def objective(params):
        clf = RandomForestClassifier(n_estimators=params['n_estimators'], max_features=params['max_features'])
        gmeans, f1s, mac_recs = base_skf_test(dataset.data, dataset.target, clf)
        result_dict[1-np.mean(gmeans)] = (f1s, mac_recs)
        return {'loss': 1-np.mean(gmeans), 'status': STATUS_OK}

    trials = SparkTrials(parallelism=32)
    best = fmin(objective, 
            space = space, 
            algo=tpe.suggest,
            max_evals = 100,
            trials = trials
            )
    print(f"{dataset_name} {best}")
    write_result("rf",dataset_name, trials, space_eval(space, best), result_dict)

: 

: 

In [6]:
import pickle
with open("./results/hyperopt/02b_svm.hyperopt", "rb") as f: 
    best_params = pickle.load(f)

In [None]:
print(hyperopt.pyll.stochastic.sample(space))

In [4]:
best_params.best_trial

{'state': 2,
 'tid': 23,
 'spec': None,
 'result': {'loss': 0.06167879369142748, 'status': 'ok'},
 'misc': {'tid': 23,
  'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'workdir': None,
  'idxs': {'degree': [23], 'gamma': [23], 'kernel': [23]},
  'vals': {'degree': [1], 'gamma': [0], 'kernel': [0]}},
 'exp_key': None,
 'owner': None,
 'version': 0,
 'book_time': datetime.datetime(2022, 6, 29, 18, 18, 45, 846000),
 'refresh_time': datetime.datetime(2022, 6, 29, 18, 18, 46, 501000)}

In [33]:
from src.parameter_spaces import svm_space    
from hyperopt import Trials,fmin,tpe,STATUS_OK, space_eval
from sklearn.svm import SVC
import pickle 
from src.utils import base_skf_test
from ds_pipe.datasets.dataset_loader import DatasetCollections
import numpy as np 
import warnings 
warnings.filterwarnings('ignore')
dc = DatasetCollections()
space_eval(svm_space, best)
trials = Trials()
best = fmin(objective, 
        space = svm_space, 
        algo=tpe.suggest,
        max_evals = 10,
        trials = trials
        )

with open(f"./results/hyperopt/{dataset_name}_svm.hyperopt", mode="wb") as f:
    pickle.dump(trials, f)  
print(f"{dataset_name} {best}")

write_result("svm",dataset_name, trials, space_eval(svm_space, best))

100%|██████████| 10/10 [00:07<00:00,  1.25trial/s, best loss: 0.3145188900394126]
multi_modal_overlap {'degree': 1, 'gamma': 10, 'kernel': 0}
