### Comparing GridSearch, Random Search, Hyperband,BOHB and GP-UCB

### Load Dataset

In [1]:
import pandas as pd

In [2]:
data = 'data/pulsar_stars.csv'
df = pd.read_csv(data)
df.columns = df.columns.str.strip()
# rename column names
df.columns = ['IP Mean', 'IP Sd', 'IP Kurtosis', 'IP Skewness', 
              'DM-SNR Mean', 'DM-SNR Sd', 'DM-SNR Kurtosis', 'DM-SNR Skewness', 'target_class']

X = df.drop(['target_class'], axis=1)
y = df['target_class']
# split X and y into training and testing sets

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
cols = X_train.columns

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Algorithm Comparison

### 1. GridSearch

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
rf_params = {
    'C': [1,2,3,4,5,6,7,8,9,10],
    "kernel":['rbf']
}
clf = SVC()
grid = GridSearchCV(clf, rf_params, cv=3,scoring='accuracy')
grid.fit(X_train, y_train)
print(grid.best_params_)
print("Accuracy:"+ str(grid.best_score_))

In [None]:
grid.cv_results_

### 2. RandomSearch

In [None]:
import scipy.stats as stats
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
rf_params = {
    'C': stats.uniform(1,10),
    "kernel":['rbf']
}
n_iter_search=20
clf = SVC(gamma='scale')
Random = RandomizedSearchCV(clf, param_distributions=rf_params,n_iter=n_iter_search,cv=3,scoring='accuracy')
Random.fit(X, y)
print(Random.best_params_)
print("Accuracy:"+ str(Random.best_score_))

In [None]:
Random.cv_results_

### 3. Hyperband

In [None]:
import sys
sys.path.append("/home/oem/Desktop/ODM_FinalProject/scikit-hyperband")

In [None]:
from hyperband import HyperbandSearchCV
## Hack since the sklearn version has some issues, https://github.com/scikit-optimize/scikit-optimize/issues/978
def hyperband_search_CV_init(self, estimator, param_distributions,
                 resource_param='n_estimators', eta=3, min_iter=1,
                 max_iter=81, skip_last=0, scoring=None, n_jobs=1,
                 iid=True, refit=True, cv=None,
                 verbose=0, pre_dispatch='2*n_jobs', random_state=None,
                 error_score='raise', return_train_score=False):

        self.param_distributions = param_distributions
        self.resource_param = resource_param
        self.eta = eta
        self.min_iter = min_iter
        self.max_iter = max_iter
        self.skip_last = skip_last
        self.random_state = random_state
        self.multimetric_ = False ## Again Hack to avoid error

        super(HyperbandSearchCV, self).__init__(estimator=estimator, scoring=scoring, n_jobs=n_jobs,
            refit=refit, cv=cv, verbose=verbose,
            pre_dispatch=pre_dispatch, error_score=error_score,
            return_train_score=return_train_score)
        
HyperbandSearchCV.__init__ = hyperband_search_CV_init

In [None]:
import scipy
from scipy.stats import randint as sp_randint
from random import randrange as sp_randrange
from sklearn.svm import SVC
param_space = [list(np.logspace(np.log10(1e-2), np.log10(1e1), base = 10, num = 10)),
               list(np.logspace(np.log10(1e-4), np.log10(1e1), base = 10, num = 10)),
              ]
rf_params = {
    'C': list(np.logspace(np.log10(1e-2), np.log10(1e1), base = 10, num = 10)),
    'gamma'
}
clf = SVC(gamma='scale')
hyper = HyperbandSearchCV(clf, param_distributions =rf_params,cv=3,min_iter=1,max_iter=20,scoring='accuracy',resource_param='C')

hyper.fit(X_train, y_train)
print(hyper.best_params_)
print("Accuracy:"+ str(hyper.best_score_))

In [None]:
## Reference: https://github.com/Yard1/hpbandster-sklearn

from sklearn.svm import SVC
import numpy as np
from sklearn.utils.validation import check_is_fitted
from hpbandster_sklearn import HpBandSterSearchCV
import ConfigSpace as cs
import ConfigSpace.hyperparameters as CSH


clf = SVC(random_state=0)
np.random.seed(0)

param_distributions = cs.ConfigurationSpace(
    seed=0,
    space={
        "C": (1,10),  # Note the decimal to make it a float
    }
)

hyperband = HpBandSterSearchCV(clf,param_distributions,optimizer='hyperband',random_state=0,n_jobs=1,cv=3, n_iter=20, verbose=1,scoring='accuracy').fit(X_train, y_train)
# hyperband.best_params_

In [None]:
hyperband.cv_results_

In [None]:
print("Accuracy:"+ str(hyperband.best_score_))

### 4. GP-UCB

In [4]:
from sklearn import datasets
d = datasets.load_digits()
X = d.data
y = d.target

# split X and y into training and testing sets

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
## Reference: https://github.com/LiYangHart/Hyperparameter-Optimization-of-Machine-Learning-Algorithms

from skopt import BayesSearchCV 
from sklearn.svm import SVC
## Hack since the sklearn version has some issues, https://github.com/scikit-optimize/scikit-optimize/issues/978
def bayes_search_CV_init(self, estimator, search_spaces, optimizer_kwargs=None,
                         n_iter=50, scoring=None, fit_params=None, n_jobs=1,
                         n_points=1, iid=True, refit=True, cv=None, verbose=0,
                         pre_dispatch='2*n_jobs', random_state=None,
                         error_score='raise', return_train_score=False):

        self.search_spaces = search_spaces
        self.n_iter = n_iter
        self.n_points = n_points
        self.random_state = random_state
        self.optimizer_kwargs = optimizer_kwargs
        self._check_search_space(self.search_spaces)
        self.fit_params = fit_params

        super(BayesSearchCV, self).__init__(
             estimator=estimator, scoring=scoring,
             n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score,
             return_train_score=return_train_score)
        
BayesSearchCV.__init__ = bayes_search_CV_init



from skopt import Optimizer
from skopt.space import Real, Categorical, Integer
rf_params = {
    'C': Real(0.01,10.0),
    'gamma': Real(1e-4,1e1)
}
clf = SVC()
Bayes = BayesSearchCV(clf, rf_params,optimizer_kwargs = {'acq_func':'LCB'},cv=5,n_iter=20, n_jobs=1,scoring='accuracy')
Bayes.fit(X_train, y_train)
print(Bayes.best_params_)
bclf = Bayes.best_estimator_
print("Accuracy:"+ str(Bayes.best_score_))



OrderedDict([('C', 10.0), ('gamma', 0.0001)])
Accuracy:0.9464237320944638


In [10]:
Bayes.cv_results_[]

{'mean_fit_time': array([0.12605648, 0.16359534, 0.16378541, 0.15394454, 0.15086856,
        0.12464113, 0.11287956, 0.1135735 , 0.11346483, 0.14633694,
        0.04523683, 0.04898419, 0.04874325, 0.13656893, 0.04663172,
        0.05051475, 0.05333457, 0.05411663, 0.04868598, 0.04855709]),
 'std_fit_time': array([0.00329496, 0.00225543, 0.00709409, 0.00211515, 0.00120157,
        0.00075555, 0.00061261, 0.00103106, 0.00508652, 0.0185607 ,
        0.00992022, 0.01084053, 0.01137152, 0.00264122, 0.0127674 ,
        0.01051861, 0.01466269, 0.00814189, 0.01109374, 0.00753251]),
 'mean_score_time': array([0.02186146, 0.03300776, 0.03281164, 0.03138375, 0.02872086,
        0.02312083, 0.01892171, 0.01935439, 0.02091961, 0.02713552,
        0.01561208, 0.01838746, 0.01638551, 0.02564349, 0.01699338,
        0.01916728, 0.01759872, 0.02124896, 0.01646986, 0.01865001]),
 'std_score_time': array([1.02258460e-04, 1.88961403e-04, 5.86958826e-04, 7.26296972e-04,
        1.02929256e-03, 1.70244516e-

### 4. BOHB

In [None]:
## Reference: https://github.com/Yard1/hpbandster-sklearn

from sklearn.svm import SVC
import numpy as np
from sklearn.utils.validation import check_is_fitted
from hpbandster_sklearn import HpBandSterSearchCV
import ConfigSpace as cs
import ConfigSpace.hyperparameters as CSH


clf = SVC(random_state=0)
np.random.seed(0)

param_distributions = cs.ConfigurationSpace(
    seed=0,
    space={
        "C": (1,10),  # Note the decimal to make it a float
    }
)

search = HpBandSterSearchCV(clf,param_distributions,optimizer='bohb',random_state=0,n_jobs=1, n_iter=20,cv=3,scoring='accuracy', verbose=1).fit(X_train, y_train)
search.best_params_

In [None]:
search.best_estimator_

In [None]:
search.best_score_

In [None]:
import numpy as np
np.random.seed(237)
import matplotlib.pyplot as plt
from skopt.plots import plot_gaussian_process

In [None]:
noise_level = 0.1

def f(x, noise_level=noise_level):
    return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2))\
           + np.random.randn() * noise_level

In [None]:
from skopt import gp_minimize

res = gp_minimize(f,                  # the function to minimize
                  [(-2.0, 2.0)],      # the bounds on each dimension of x
                  acq_func="LCB",      # the acquisition function
                  n_calls=15,         # the number of evaluations of f
                  n_random_starts=5,  # the number of random initialization points
                  noise=0.1**2,       # the noise level (optional)
                  random_state=1234)   # the random seed

In [None]:
print(res)

In [None]:
from skopt.plots import plot_convergence
plot_convergence(res);

In [None]:
import numpy as np
from sklearn.svm import SVC




# Function mapping hyperparameters to a real-valued scpre
def objective(hyperparameters):
    
    # Machine learning model
    svm = SVC(**hyperparameters)
    
    
    
    # Training 
    svm.fit(X_train, y_train)
    
    # Making predictions and evaluating
    predictions = svm.predict(X_valid)
    rmse = np.sqrt(np.mean(np.square(prediction - y_valid)))
    
    return rmse