# Tuning SVM hyperparameters on spambase dataset

There are two hyperparameters

1. C - the regularization parameter
2. gamma - the kernel bandwidth for the Gaussian kernel

Both the hyperparameters are optimized on the log-scale. The loss function here is the square root of the misclassification rate.

In [1]:
import numpy as np
import pandas as pd

from fcvopt.optimizers.fcvopt import FCVOpt

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# setting display resolution of plots
mpl.rcParams['figure.dpi']= 300

In [3]:
# loading dataset
dat = pd.read_csv('data/spambase.data',header=None)
X = dat.values[:,:-1]
y = dat.values[:,-1]

In [4]:
# define classifier object and the hyperparameter bounds
clf = Pipeline([('scale',StandardScaler()),
                 ('svc',SVC())])
param_bounds = {'svc__C':[np.exp(-10),np.exp(10)],
                'svc__gamma':[np.exp(-10),np.exp(10)]}

In [None]:
# optimizing square root of misclass rate
# def sqrt_mcr(y_true,y_pred):
#     return np.sqrt(1-accuracy_score(y_true,y_pred))

def mcr(y_true,y_pred):
    return 1-accuracy_score(y_true,y_pred)

# define optimizer object
opt = FCVOpt(clf,param_bounds,mcr,kernel="matern",
             logscale=np.array([0,1]),max_iter=30,
             integer = [],
             seed=1234,verbose=2,
             n_folds=5,n_init=3)

# tune hyperparameters
opt.run(X,y)

  iter    f_best   acq_best    sigma_f
     0 2.717e-01 1.225e-01 1.032e-01
     1 2.703e-01 1.142e-01 1.106e-01
     2 2.884e-01 1.449e-01 9.905e-02
     3 2.098e-01 7.832e-02 9.911e-02
     4 1.695e-01 2.597e-02 1.111e-01
     5 8.934e-02 -3.732e-02 1.108e-01
     6 8.846e-02 -7.255e-02 1.161e-01
     7 6.604e-02 -3.380e-02 1.213e-01
     8 6.355e-02 -2.511e-02 1.281e-01
     9 5.905e-02 3.039e-03 1.220e-01
  iter    f_best   acq_best    sigma_f
    10 5.656e-02 -1.108e-02 1.294e-01
    11 5.761e-02 -6.179e-03 1.340e-01


In [None]:
plt.figure()
plt.plot(opt.y_inc) # plot predicted incumbent value at each iteration
plt.xlabel('Iteration')
plt.ylabel(' MCR')
plt.title('Predicted incumbent at each iteration')
plt.grid()
plt.show()

In [None]:
# plotting termination metric
plt.figure()
plt.hlines(0.05,0,opt.max_iter-1,
           linestyles="dashed",colors="red")
plt.plot(opt.term_crit())
plt.xlabel('Iteration')
plt.ylabel('Termination metric')
plt.grid()
plt.show()

In [None]:
opt.total_time/60

In [None]:
opt.mcmc_time