# 1. Grid Search Function

### Preparation

In [2]:
# Import modules
import numpy as np

from sklearn.model_selection import GridSearchCV  # Grid search
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

### Prepare Dataset

In [5]:
# Set Random Seed
rng = np.random.RandomState(0)

# Generate data
x = rng.randn(100, 10)  # (Number of samples, Number of features)
y = rng.binomial(1, 0.5, 100)

print(x, y)

[[ 1.76405235e+00  4.00157208e-01  9.78737984e-01  2.24089320e+00
   1.86755799e+00 -9.77277880e-01  9.50088418e-01 -1.51357208e-01
  -1.03218852e-01  4.10598502e-01]
 [ 1.44043571e-01  1.45427351e+00  7.61037725e-01  1.21675016e-01
   4.43863233e-01  3.33674327e-01  1.49407907e+00 -2.05158264e-01
   3.13067702e-01 -8.54095739e-01]
 [-2.55298982e+00  6.53618595e-01  8.64436199e-01 -7.42165020e-01
   2.26975462e+00 -1.45436567e+00  4.57585173e-02 -1.87183850e-01
   1.53277921e+00  1.46935877e+00]
 [ 1.54947426e-01  3.78162520e-01 -8.87785748e-01 -1.98079647e+00
  -3.47912149e-01  1.56348969e-01  1.23029068e+00  1.20237985e+00
  -3.87326817e-01 -3.02302751e-01]
 [-1.04855297e+00 -1.42001794e+00 -1.70627019e+00  1.95077540e+00
  -5.09652182e-01 -4.38074302e-01 -1.25279536e+00  7.77490356e-01
  -1.61389785e+00 -2.12740280e-01]
 [-8.95466561e-01  3.86902498e-01 -5.10805138e-01 -1.18063218e+00
  -2.81822283e-02  4.28331871e-01  6.65172224e-02  3.02471898e-01
  -6.34322094e-01 -3.62741166e-01

In [7]:
# Create Pipelines
svm_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC())
])

print(svm_pipeline)

Pipeline(steps=[('scaler', StandardScaler()), ('svm', SVC())])


In [11]:
# Generate Hyperparameter grid
param_grid = {
    'svm__C': [0.1, 1, 10],
    'svm__kernel' : ['linear', 'rbf'],
    'svm__gamma' : ['scale', 'auto']
}

### Run Grid Search

In [12]:
# Define grid search
grid_search = GridSearchCV(svm_pipeline,
                           param_grid, 
                           cv = 5)

# Run Grid search
grid_search.fit(x, y)

print('The Best Parameters: ', grid_search.best_params_)
print('The Best Cross-Validation Score: ', grid_search.best_score_)

The Best Parameters:  {'svm__C': 0.1, 'svm__gamma': 'scale', 'svm__kernel': 'rbf'}
The Best Cross-Validation Score:  0.5799999999999998


# 2. Random Search

### Preparation

In [13]:
# Import modules
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from scipy.stats import loguniform

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### Prepare Dataset

In [15]:
# Load dataset
digits = load_digits()
print(digits)

{'data': array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]]), 'target': array([0, 1, 2, ..., 8, 9, 8]), 'frame': None, 'feature_names': ['pixel_0_0', 'pixel_0_1', 'pixel_0_2', 'pixel_0_3', 'pixel_0_4', 'pixel_0_5', 'pixel_0_6', 'pixel_0_7', 'pixel_1_0', 'pixel_1_1', 'pixel_1_2', 'pixel_1_3', 'pixel_1_4', 'pixel_1_5', 'pixel_1_6', 'pixel_1_7', 'pixel_2_0', 'pixel_2_1', 'pixel_2_2', 'pixel_2_3', 'pixel_2_4', 'pixel_2_5', 'pixel_2_6', 'pixel_2_7', 'pixel_3_0', 'pixel_3_1', 'pixel_3_2', 'pixel_3_3', 'pixel_3_4', 'pixel_3_5', 'pixel_3_6', 'pixel_3_7', 'pixel_4_0', 'pixel_4_1', 'pixel_4_2', 'pixel_4_3', 'pixel_4_4', 'pixel_4_5', 'pixel_4_6', 'pixel_4_7', 'pixel_5_0', 'pixel_5_1', 'pixel_5_2', 'pixel_5_3', 'pixel_5_4', 'pixel_5_5', 'pixel_5_6', 'pixel_5_7', 'pixel_6_0', '

In [17]:
# Set values
x = digits.data
y = digits.target

print(x)
print(y)

[[ 0.  0.  5. ...  0.  0.  0.]
 [ 0.  0.  0. ... 10.  0.  0.]
 [ 0.  0.  0. ... 16.  9.  0.]
 ...
 [ 0.  0.  1. ...  6.  0.  0.]
 [ 0.  0.  2. ... 12.  0.  0.]
 [ 0.  0. 10. ... 12.  1.  0.]]
[0 1 2 ... 8 9 8]


### Split Dataset

In [18]:
# Split dataset
x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                    test_size = 0.3,
                                                    random_state = 85)

print('x_train: ', x_train)
print('x_test: ', x_test)
print('y_train: ', y_train)
print('x_test: ', x_test)

x_train:  [[ 0.  3. 10. ...  2.  0.  0.]
 [ 0.  0.  4. ... 16. 15.  8.]
 [ 0.  0. 12. ...  0.  0.  0.]
 ...
 [ 0.  0.  4. ...  7.  0.  0.]
 [ 0.  0.  2. ...  0.  0.  0.]
 [ 0.  0.  1. ... 14.  9.  0.]]
x_test:  [[ 0.  0.  0. ... 12.  0.  0.]
 [ 0.  1.  8. ...  7.  0.  0.]
 [ 0.  0. 12. ... 15.  1.  0.]
 ...
 [ 0.  0.  8. ... 12. 13.  1.]
 [ 0.  0.  1. ...  0.  0.  0.]
 [ 0.  0.  3. ...  0.  0.  0.]]
y_train:  [5 1 7 ... 6 7 6]
x_test:  [[ 0.  0.  0. ... 12.  0.  0.]
 [ 0.  1.  8. ...  7.  0.  0.]
 [ 0.  0. 12. ... 15.  1.  0.]
 ...
 [ 0.  0.  8. ... 12. 13.  1.]
 [ 0.  0.  1. ...  0.  0.  0.]
 [ 0.  0.  3. ...  0.  0.  0.]]


### Set Hyperparameters

In [19]:
# Set parameters
params = {
    'C': loguniform(1e-4, 100),   # Regulation parameter for SVM model: Set a range of log scale from '1e-4' to '100'
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto'] + list(loguniform(1e-4, 10).rvs(10)),  
    'degree': range(1, 6),    # Degree for 'Poly' kernel function -> 'range(1, 6)': between 1 and 5
    'coef0': loguniform(1e-4, 10).rvs(10)  # Constant term for Poly & Sigmoid kernel functions
}

### Run Random Search

In [23]:
# Define a model
svm_model = SVC()

# Define Random Search
random_search = RandomizedSearchCV(svm_model,
                                   params,
                                   n_iter = 100,
                                   cv = 5,
                                   verbose = 2,
                                   n_jobs = -1)

# Run Random Search
random_search.fit(x_train, y_train)

print('The Best Hyperparameters: ', random_search.best_params_)

# Save the best model
best_model = random_search.best_estimator_

Fitting 5 folds for each of 100 candidates, totalling 500 fits
The Best Hyperparameters:  {'C': 5.251438137958801, 'coef0': 3.5756433831559002, 'degree': 3, 'gamma': 8.600296266996716, 'kernel': 'poly'}


### Validate Test Data

In [25]:
# Validate test data
y_pred = best_model.predict(x_test)

print('Test Accuracy: ', accuracy_score(y_test, y_pred))

Test Accuracy:  0.9814814814814815
