In [None]:
#grid search - hyperparameter tuning

"""
Grid search is the process of performing hyperparameter tuning so as to determine the optimal values for a given model. This is important as the 
performance of the entire model is based on the specified hyperparameter values.
"""

In [None]:
import numpy as np
from sklearn.grid_search import GridSearchCV
from sklearn import datasets, svm
import matplotlib.pyplot as plt

In [None]:
# Load the digit data
digits = datasets.load_digits()

# View the features of the first observation
digits.data[0:1]

#The target data is a vector containing the image’s true digit.

In [None]:
# Create dataset 1
data1_features = digits.data[:1000]
data1_target = digits.target[:1000]

# Create dataset 2
data2_features = digits.data[1000:]
data2_target = digits.target[1000:]

In [None]:
"""
Before looking for which combination of parameter values produces the most accurate model, we must specify the different candidate values we want to try. 
In the code below we have a number of candidate parameter values, including four different values for C (1, 10, 100, 1000), two values for gamma (0.001, 0.0001), and two 
kernels (linear, rbf). The grid search will try all combinations of parameter values and select the set of parameters which provides the most accurate model.
"""


parameter_candidates = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

In [None]:
"""
Conduct Grid Search To Find Parameters Producing Highest Score
Now we are ready to conduct the grid search using scikit-learn’s GridSearchCV which stands for grid search cross validation. By default, the GridSearchCV’s cross validation uses 
3-fold KFold or StratifiedKFold depending on the situation.
"""

# Create a classifier object with the classifier and parameter candidates
clf = GridSearchCV(estimator=svm.SVC(), param_grid=parameter_candidates, n_jobs=-1)

# Train the classifier on data1's feature and target data
clf.fit(data1_features, data1_target)   

In [None]:
# View the accuracy score
print('Best score for data1:', clf.best_score_) 

# View the best parameters for the model found using grid search
print('Best C:',clf.best_estimator_.C) 
print('Best Kernel:',clf.best_estimator_.kernel)
print('Best Gamma:',clf.best_estimator_.gamma)

In [None]:
"""
Remember the second dataset we created? Now we will use it to prove that those parameters are actually used by the model. First, we apply the classifier we just trained to the
second dataset. Then we will train a new support vector classifier from scratch using the parameters found using the grid search. We should get the same results for both models.
"""


# Apply the classifier trained using data1 to data2, and view the accuracy score
clf.score(data2_features, data2_target)  

# Train a new classifier using the best parameters found by the grid search
svm.SVC(C=10, kernel='rbf', gamma=0.001).fit(data1_features, data1_target).score(data2_features, data2_target)