In [3]:
from src.tests import test_dataset_random, test_dataset_mlp
from sklearn.datasets import load_breast_cancer, fetch_covtype
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
# 1. Load the breast cancer dataset (Easy example)


data = load_breast_cancer()
X = data.data
y = data.target

trials = 5
sample = [0] * trials
random = [0] * trials


# The third argument for the test_dataset function is the seed for reproductibility. 
# The warning that triggers is due to a lack of variance of Y outcomes during bayes opt.

# Try 5 examples of the process
for i in range(trials):
    print(f'------Trial number {i + 1}------')
    result = test_dataset_random(X, y, seed=40 + i)
    sample[i] = result[0]
    random[i] = result[1]

print(f'The average of best sample accuracy is {np.mean(sample)}')
print(f'The std of best sample accuracy is {np.std(sample)}')

print(f'The average of best random accuracy is {np.mean(random)}')
print(f'The std of best random accuracy is {np.std(random)}')

------Trial number 1------


Bayesian Optimization Progress: 100%|██████████| 20/20 [00:15<00:00,  1.31iter/s, Best Score=0.9626]


Best Hyperparameters (Random Init): {'n_estimators': 54, 'min_samples_split': 5, 'min_samples_leaf': 1}
Best accuracy: 0.9626373626373628



Bayesian Optimization Progress: 100%|██████████| 20/20 [00:11<00:00,  1.71iter/s, Best Score=0.9626]


Best Hyperparameters (Sample Init): {'n_estimators': 59, 'min_samples_split': 6, 'min_samples_leaf': 1}
Best accuracy: 0.9626373626373628

------Trial number 2------


Bayesian Optimization Progress: 100%|██████████| 20/20 [00:15<00:00,  1.33iter/s, Best Score=0.9648]


Best Hyperparameters (Random Init): {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 1}
Best accuracy: 0.9648351648351647



Bayesian Optimization Progress: 100%|██████████| 20/20 [00:14<00:00,  1.35iter/s, Best Score=0.9670]


Best Hyperparameters (Sample Init): {'n_estimators': 98, 'min_samples_split': 3, 'min_samples_leaf': 1}
Best accuracy: 0.9670329670329672

------Trial number 3------


Bayesian Optimization Progress: 100%|██████████| 20/20 [00:18<00:00,  1.08iter/s, Best Score=0.9626]


Best Hyperparameters (Random Init): {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 1}
Best accuracy: 0.9626373626373628



Bayesian Optimization Progress: 100%|██████████| 20/20 [00:17<00:00,  1.16iter/s, Best Score=0.9626]


Best Hyperparameters (Sample Init): {'n_estimators': 177, 'min_samples_split': 2, 'min_samples_leaf': 1}
Best accuracy: 0.9626373626373628

------Trial number 4------


Bayesian Optimization Progress: 100%|██████████| 20/20 [00:18<00:00,  1.08iter/s, Best Score=0.9582]


Best Hyperparameters (Random Init): {'n_estimators': 101, 'min_samples_split': 2, 'min_samples_leaf': 3}
Best accuracy: 0.9582417582417582



Bayesian Optimization Progress: 100%|██████████| 20/20 [00:19<00:00,  1.05iter/s, Best Score=0.9604]


Best Hyperparameters (Sample Init): {'n_estimators': 123, 'min_samples_split': 10, 'min_samples_leaf': 4}
Best accuracy: 0.9604395604395604

------Trial number 5------


Bayesian Optimization Progress: 100%|██████████| 20/20 [00:21<00:00,  1.07s/iter, Best Score=0.9714]


Best Hyperparameters (Random Init): {'n_estimators': 182, 'min_samples_split': 4, 'min_samples_leaf': 1}
Best accuracy: 0.9714285714285713



Bayesian Optimization Progress: 100%|██████████| 20/20 [00:17<00:00,  1.14iter/s, Best Score=0.9758]

Best Hyperparameters (Sample Init): {'n_estimators': 97, 'min_samples_split': 3, 'min_samples_leaf': 1}
Best accuracy: 0.9758241758241757

The average of best sample accuracy is 0.9657142857142859
The std of best sample accuracy is 0.005490108130460071
The average of best random accuracy is 0.9639560439560441
The std of best random accuracy is 0.004306795152146205





In [None]:
# 1. Load the Covertype dataset (A little bit more complicated with few data points)
data = fetch_covtype()
X = data.data
y = data.target


subset_size = 10 ** 4
X = X[:subset_size]
y = y[:subset_size]


# Try 5 examples of the process
for i in range(trials):
    print(f'------Trial number {i + 1}------')
    result = test_dataset_random(X, y, seed= 40 + i)
    sample[i] = result[0]
    random[i] = result[1]

print(f'The average of best sample accuracy is {np.mean(sample)}')
print(f'The std of best sample accuracy is {np.std(sample)}')

print(f'The average of best random accuracy is {np.mean(random)}')
print(f'The std of best random accuracy is {np.std(random)}')


------Trial number 1------


Bayesian Optimization Progress: 100%|██████████| 20/20 [00:49<00:00,  2.48s/iter, Best Score=0.8448]


Best Hyperparameters (Random Init): {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 1}
Best accuracy: 0.8447500000000001



Bayesian Optimization Progress: 100%|██████████| 20/20 [00:41<00:00,  2.09s/iter, Best Score=0.8471]


Best Hyperparameters (Sample Init): {'n_estimators': 145, 'min_samples_split': 2, 'min_samples_leaf': 1}
Best accuracy: 0.8471250000000001

------Trial number 2------


Bayesian Optimization Progress: 100%|██████████| 20/20 [00:46<00:00,  2.31s/iter, Best Score=0.8435]


Best Hyperparameters (Random Init): {'n_estimators': 190, 'min_samples_split': 4, 'min_samples_leaf': 1}
Best accuracy: 0.8435



Bayesian Optimization Progress: 100%|██████████| 20/20 [00:40<00:00,  2.04s/iter, Best Score=0.8444]


Best Hyperparameters (Sample Init): {'n_estimators': 133, 'min_samples_split': 4, 'min_samples_leaf': 1}
Best accuracy: 0.844375

------Trial number 3------


Bayesian Optimization Progress: 100%|██████████| 20/20 [00:56<00:00,  2.81s/iter, Best Score=0.8480]


Best Hyperparameters (Random Init): {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 1}
Best accuracy: 0.8480000000000001



Bayesian Optimization Progress: 100%|██████████| 20/20 [00:55<00:00,  2.77s/iter, Best Score=0.8480]


Best Hyperparameters (Sample Init): {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 1}
Best accuracy: 0.8480000000000001

------Trial number 4------


Bayesian Optimization Progress: 100%|██████████| 20/20 [00:50<00:00,  2.52s/iter, Best Score=0.8421]


Best Hyperparameters (Random Init): {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 1}
Best accuracy: 0.842125



Bayesian Optimization Progress: 100%|██████████| 20/20 [00:53<00:00,  2.66s/iter, Best Score=0.8430]


Best Hyperparameters (Sample Init): {'n_estimators': 171, 'min_samples_split': 2, 'min_samples_leaf': 1}
Best accuracy: 0.843

------Trial number 5------


Bayesian Optimization Progress: 100%|██████████| 20/20 [00:50<00:00,  2.50s/iter, Best Score=0.8425]


Best Hyperparameters (Random Init): {'n_estimators': 165, 'min_samples_split': 2, 'min_samples_leaf': 1}
Best accuracy: 0.8425



Bayesian Optimization Progress: 100%|██████████| 20/20 [00:50<00:00,  2.52s/iter, Best Score=0.8432]

Best Hyperparameters (Sample Init): {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 1}
Best accuracy: 0.8432499999999999

The average of best sample accuracy is 0.8451500000000001
The std of best sample accuracy is 0.0020423638265500764
The average of best random accuracy is 0.8441750000000001
The std of best random accuracy is 0.002117781858454766





: 

In [4]:
# 1. Load the breast cancer dataset (Easy example)


data = load_breast_cancer()
X = data.data
y = data.target

trials = 5
sample = [0] * trials
random = [0] * trials


# The third argument for the test_dataset function is the seed for reproductibility. 
# The warning that triggers is due to a lack of variance of Y outcomes during bayes opt.

# Try 5 examples of the process
for i in range(trials):
    print(f'------Trial number {i + 1}------')
    result = test_dataset_mlp(X, y, seed=40 + i)
    sample[i] = result[0]
    random[i] = result[1]

print(f'The average of best sample accuracy is {np.mean(sample)}')
print(f'The std of best sample accuracy is {np.std(sample)}')

print(f'The average of best random accuracy is {np.mean(random)}')
print(f'The std of best random accuracy is {np.std(random)}')

------Trial number 1------


Bayesian Optimization Progress: 100%|██████████| 5/5 [00:02<00:00,  1.69iter/s, Best Score=0.9692]


Best Score: 0.9692
Best Parameters: {'learning_rate_init': 0.09020209354408544, 'alpha': 0.0019768710853122367}
Best Hyperparameters (Random Init): {'learning_rate_init': 0.09020209354408544, 'alpha': 0.0019768710853122367}
Best accuracy: 0.9692307692307693



Bayesian Optimization Progress: 100%|██████████| 5/5 [00:02<00:00,  1.68iter/s, Best Score=0.9670]


Best Score: 0.9670
Best Parameters: {'learning_rate_init': 0.01863658973996561, 'alpha': 0.05933405212084762}
Best Hyperparameters (Sample Init): {'learning_rate_init': 0.01863658973996561, 'alpha': 0.05933405212084762}
Best accuracy: 0.9670329670329669

------Trial number 2------


Bayesian Optimization Progress: 100%|██████████| 5/5 [00:02<00:00,  1.80iter/s, Best Score=0.9736]


Best Score: 0.9736
Best Parameters: {'learning_rate_init': 0.03813240733652989, 'alpha': 0.07386504871155573}
Best Hyperparameters (Random Init): {'learning_rate_init': 0.03813240733652989, 'alpha': 0.07386504871155573}
Best accuracy: 0.9736263736263737



Bayesian Optimization Progress: 100%|██████████| 5/5 [00:02<00:00,  1.88iter/s, Best Score=0.9692]


Best Score: 0.9692
Best Parameters: {'learning_rate_init': 0.008554530154514241, 'alpha': 9.999999747378752e-06}
Best Hyperparameters (Sample Init): {'learning_rate_init': 0.008554530154514241, 'alpha': 9.999999747378752e-06}
Best accuracy: 0.9692307692307693

------Trial number 3------


Bayesian Optimization Progress: 100%|██████████| 5/5 [00:02<00:00,  2.03iter/s, Best Score=0.9802]


Best Score: 0.9802
Best Parameters: {'learning_rate_init': 0.01040246747469625, 'alpha': 0.03254078521524321}
Best Hyperparameters (Random Init): {'learning_rate_init': 0.01040246747469625, 'alpha': 0.03254078521524321}
Best accuracy: 0.9802197802197803



Bayesian Optimization Progress: 100%|██████████| 5/5 [00:02<00:00,  1.77iter/s, Best Score=0.9802]


Best Score: 0.9802
Best Parameters: {'learning_rate_init': 0.01388634884608099, 'alpha': 0.04097728426137092}
Best Hyperparameters (Sample Init): {'learning_rate_init': 0.01388634884608099, 'alpha': 0.04097728426137092}
Best accuracy: 0.9802197802197803

------Trial number 4------


Bayesian Optimization Progress: 100%|██████████| 5/5 [00:02<00:00,  2.03iter/s, Best Score=0.9780]


Best Score: 0.9780
Best Parameters: {'learning_rate_init': 0.03414977842706519, 'alpha': 9.999999747378752e-06}
Best Hyperparameters (Random Init): {'learning_rate_init': 0.03414977842706519, 'alpha': 9.999999747378752e-06}
Best accuracy: 0.9780219780219779



Bayesian Optimization Progress: 100%|██████████| 5/5 [00:02<00:00,  2.09iter/s, Best Score=0.9802]


Best Score: 0.9802
Best Parameters: {'learning_rate_init': 0.014733291365691752, 'alpha': 0.009063465592257668}
Best Hyperparameters (Sample Init): {'learning_rate_init': 0.014733291365691752, 'alpha': 0.009063465592257668}
Best accuracy: 0.9802197802197803

------Trial number 5------


Bayesian Optimization Progress: 100%|██████████| 5/5 [00:02<00:00,  1.78iter/s, Best Score=0.9758]


Best Score: 0.9758
Best Parameters: {'learning_rate_init': 0.09857896625330118, 'alpha': 9.999999747378752e-06}
Best Hyperparameters (Random Init): {'learning_rate_init': 0.09857896625330118, 'alpha': 9.999999747378752e-06}
Best accuracy: 0.9758241758241759



Bayesian Optimization Progress: 100%|██████████| 5/5 [00:02<00:00,  1.92iter/s, Best Score=0.9780]

Best Score: 0.9780
Best Parameters: {'learning_rate_init': 0.02756694019242078, 'alpha': 0.014054598845997444}
Best Hyperparameters (Sample Init): {'learning_rate_init': 0.02756694019242078, 'alpha': 0.014054598845997444}
Best accuracy: 0.9780219780219781

The average of best sample accuracy is 0.974945054945055
The std of best sample accuracy is 0.005663340099659438
The average of best random accuracy is 0.9753846153846155
The std of best random accuracy is 0.003781241875623088





In [5]:
# 1. Load the Covertype dataset (A little bit more complicated with few data points)
data = fetch_covtype()
X = data.data
y = data.target


subset_size = 10 ** 4
X = X[:subset_size]
y = y[:subset_size]


# Try 5 examples of the process
for i in range(trials):
    print(f'------Trial number {i + 1}------')
    result = test_dataset_mlp(X, y, seed = 40 + i)
    sample[i] = result[0]
    random[i] = result[1]

print(f'The average of best sample accuracy is {np.mean(sample)}')
print(f'The std of best sample accuracy is {np.std(sample)}')

print(f'The average of best random accuracy is {np.mean(random)}')
print(f'The std of best random accuracy is {np.std(random)}')

------Trial number 1------


Bayesian Optimization Progress: 100%|██████████| 5/5 [00:26<00:00,  5.38s/iter, Best Score=0.8019]


Best Score: 0.8019
Best Parameters: {'learning_rate_init': 0.030651388559844603, 'alpha': 9.999999747378752e-06}
Best Hyperparameters (Random Init): {'learning_rate_init': 0.030651388559844603, 'alpha': 9.999999747378752e-06}
Best accuracy: 0.8018749999999999



Bayesian Optimization Progress: 100%|██████████| 5/5 [00:21<00:00,  4.31s/iter, Best Score=0.8024]


Best Score: 0.8024
Best Parameters: {'learning_rate_init': 0.013824308477512042, 'alpha': 9.999999747378752e-06}
Best Hyperparameters (Sample Init): {'learning_rate_init': 0.013824308477512042, 'alpha': 9.999999747378752e-06}
Best accuracy: 0.8023750000000002

------Trial number 2------


Bayesian Optimization Progress: 100%|██████████| 5/5 [00:29<00:00,  5.95s/iter, Best Score=0.7930]


Best Score: 0.7930
Best Parameters: {'learning_rate_init': 0.029844580737943785, 'alpha': 9.999999747378752e-06}
Best Hyperparameters (Random Init): {'learning_rate_init': 0.029844580737943785, 'alpha': 9.999999747378752e-06}
Best accuracy: 0.7929999999999999



Bayesian Optimization Progress: 100%|██████████| 5/5 [00:25<00:00,  5.06s/iter, Best Score=0.8044]


Best Score: 0.8044
Best Parameters: {'learning_rate_init': 0.003670313289467468, 'alpha': 0.10000001026128302}
Best Hyperparameters (Sample Init): {'learning_rate_init': 0.003670313289467468, 'alpha': 0.10000001026128302}
Best accuracy: 0.804375

------Trial number 3------


Bayesian Optimization Progress: 100%|██████████| 5/5 [00:28<00:00,  5.79s/iter, Best Score=0.8043]


Best Score: 0.8043
Best Parameters: {'learning_rate_init': 0.010655684277454423, 'alpha': 0.004418588693356359}
Best Hyperparameters (Random Init): {'learning_rate_init': 0.010655684277454423, 'alpha': 0.004418588693356359}
Best accuracy: 0.80425



Bayesian Optimization Progress: 100%|██████████| 5/5 [00:27<00:00,  5.43s/iter, Best Score=0.8107]


Best Score: 0.8107
Best Parameters: {'learning_rate_init': 0.013711242479867403, 'alpha': 9.999999747378752e-06}
Best Hyperparameters (Sample Init): {'learning_rate_init': 0.013711242479867403, 'alpha': 9.999999747378752e-06}
Best accuracy: 0.81075

------Trial number 4------


Bayesian Optimization Progress: 100%|██████████| 5/5 [00:26<00:00,  5.24s/iter, Best Score=0.7930]


Best Score: 0.7930
Best Parameters: {'learning_rate_init': 0.028659315299275984, 'alpha': 9.999999747378752e-06}
Best Hyperparameters (Random Init): {'learning_rate_init': 0.028659315299275984, 'alpha': 9.999999747378752e-06}
Best accuracy: 0.7929999999999999



Bayesian Optimization Progress: 100%|██████████| 5/5 [00:24<00:00,  4.84s/iter, Best Score=0.8020]


Best Score: 0.8020
Best Parameters: {'learning_rate_init': 0.008378159958474896, 'alpha': 0.10000001026128302}
Best Hyperparameters (Sample Init): {'learning_rate_init': 0.008378159958474896, 'alpha': 0.10000001026128302}
Best accuracy: 0.8019999999999999

------Trial number 5------


Bayesian Optimization Progress: 100%|██████████| 5/5 [00:28<00:00,  5.65s/iter, Best Score=0.8017]


Best Score: 0.8017
Best Parameters: {'learning_rate_init': 0.02122612372988869, 'alpha': 9.999999747378752e-06}
Best Hyperparameters (Random Init): {'learning_rate_init': 0.02122612372988869, 'alpha': 9.999999747378752e-06}
Best accuracy: 0.80175



Bayesian Optimization Progress: 100%|██████████| 5/5 [00:24<00:00,  4.83s/iter, Best Score=0.8004]

Best Score: 0.8004
Best Parameters: {'learning_rate_init': 0.014159247002564926, 'alpha': 9.999999747378752e-06}
Best Hyperparameters (Sample Init): {'learning_rate_init': 0.014159247002564926, 'alpha': 9.999999747378752e-06}
Best accuracy: 0.8003750000000001

The average of best sample accuracy is 0.803975
The std of best sample accuracy is 0.0036188741343130145
The average of best random accuracy is 0.798775
The std of best random accuracy is 0.004798697740012409



