# Grid Search
Grid Search builds a model for every combination of the model's hyperparameters and evaluates each model.

In [1]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# import the data set
ads_df = pd.read_csv("datasets/social_network_ads.csv")

ads_df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [3]:
# x is the Age and Estimated Salary columns
x = ads_df.iloc[:, [2, 3]].values

# y is the Purchased column
y = ads_df.iloc[:, 4].values

In [4]:
# split the data set into training and testing data sets
from sklearn.model_selection import train_test_split 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

In [5]:
# import a Standarization Scaler for Feature Scaling
from sklearn.preprocessing import StandardScaler

# feature scale the training and testing sets
sc_x = StandardScaler()
x_train = sc_x.fit_transform(x_train)
x_test = sc_x.transform(x_test)



# Kernel RBF Support Vector Machine

In [6]:
# import the support vector classifier class
from sklearn.svm import SVC

In [7]:
# create a no parameter (for grid search later) SVC classifier, then fit to the training set
classifier = SVC()
classifier.fit(x_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

# Grid Search
In this Grid Search problem, it will tell us whether to use a linear or kernel (rbf) SVM from the hyperparameters. This Grid Search will also tell us which "c" (penalty) and "gamma" hyperparameters to use, as well.

As seen below, the best parameters determined by grid search were C = 1, gamma = 0.5, and kernel = rbf.

In [8]:
# import grid search
from sklearn.model_selection import GridSearchCV

In [9]:
"""
the hyperparameters that we want to optimize for the model

The variable is an Array that contains dictionaries for each combination of
parameters to tune and what values to test them as.
"""
parameters = [
    {"C": [1, 10, 100, 1000], "kernel": ["linear"]},
    {"C": [1, 10, 100, 1000], "kernel": ["rbf"], "gamma": [0.5, 0.1, 0.01, 0.001]}
]

In [10]:
"""
perform a grid search on the classification model using the parameter grid above
- estimator = classifier is the model
- param_grid = parameters is the list of hyperparameters to optimize
- scoring = accuracy is the scoring mechanism to determine the optimal hyperparameters
- cv = 10 means to use 10 cross folds (using k-fold cross validation) to test
- n_jobs = -1 means to use all the CPUs on the machine (used on large data sets)
"""
grid_search = GridSearchCV(
    estimator=classifier,
    param_grid=parameters,
    scoring="accuracy",
    cv=10,
    n_jobs=-1
)

In [11]:
# perform grid search on the training set
grid_search = grid_search.fit(x_train, y_train)



In [12]:
# the best accuracy score from the optimal hyperparameters, which is 90%
grid_search.best_score_

0.9

In [13]:
# the best parameters found by grid search
grid_search.best_params_

{'C': 1, 'gamma': 0.5, 'kernel': 'rbf'}