In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import validation_curve, GridSearchCV
from sklearn import datasets

In [None]:
# load and plot the famous moons dataset from sklearn
xdat, ydat = datasets.make_moons(n_samples=200, noise=0.25, random_state=2)
fig0, ax0 = plt.subplots()
ax0.scatter(xdat[:,0], xdat[:,1], c=1-ydat, cmap='coolwarm') # set color to c=1-ydat to reverse colors of the moons
ax0.set(xlabel='x1', ylabel='x2')

In [None]:
# explore classification using rbf kernel by varying hyperparameters
clf = SVC(C=1, kernel='rbf', gamma=1)
clf.fit(xdat, ydat)

# set ranges for plots
meshstep = 0.01
x1_min, x1_max = xdat[:,0].min() - .5, xdat[:,0].max() + .5
x2_min, x2_max = xdat[:,1].min() - .5, xdat[:,1].max() + .5
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, meshstep), np.arange(x2_min, x2_max, meshstep))

# plot the decision function, which is proportional to the distance from the decision boundary
# (lighter colors cover points closer to the decision boundary)
z = clf.decision_function(np.c_[xx1.ravel(), xx2.ravel()])
z = z.reshape(xx1.shape)
fig1, ax1 = plt.subplots()
ax1.scatter(xdat[:,0], xdat[:,1], c=1-ydat, cmap='coolwarm')
ax1.set(xlabel='x1', ylabel='x2')
ax1.contourf(xx1, xx2, z, cmap=plt.cm.RdBu, alpha=0.35)

In [None]:
## IN THE FOLLOWING SECTION, WE WILL TRAIN RBF KERNEL SVC USING FIXED C=1

# (Note: n_jobs has been added to speed up calculations via parallelization, 
# you may adjust this based on your processor or default to 1; 
# verbose has also been added to print out information while the function is running.)
clf_fixedC = SVC(C=1, kernel='rbf')
gammalist = np.logspace(-3,3,num=7)
train_scores, valid_scores = validation_curve(clf_fixedC, xdat, ydat, 'gamma', gammalist, cv=10, n_jobs=4, verbose=1)

In [None]:
# compute the mean training and validation errors over k folds for each gamma
train_scores_mean = np.mean(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)

In [None]:
# plot training and validation error as a function of gamma
fig2, ax2 = plt.subplots()
ax2.plot(gammalist, 1-train_scores_mean, 'k-o', label='training')
ax2.plot(gammalist, 1-valid_scores_mean, 'r-o', label='validation')
ax2.set(xlabel='gamma', ylabel='1-accuracy', xscale='log', ylim=[0,1], title='rbf kernel SVC')
ax2.legend(loc='best')

In [None]:
## IN THE FOLLOWING SECTION, WE WILL USE GRID SEARCH OVER C AND GAMMA, WITH 10-FOLD CV

# (Note: n_jobs has been added to speed up calculations via parallelization, 
# you may adjust this based on your processor or default to 1; 
# verbose has also been added to print out information while the function is running.)
Clist = np.logspace(-8,6,num=8)
gammalist = np.logspace(-3,3,num=7)
parameters = {'C':Clist, 'gamma':gammalist}
clf_gridsearch = GridSearchCV(SVC(kernel='rbf'), parameters, cv=10, n_jobs=4, verbose=1)
clf_gridsearch.fit(xdat, ydat)

In [None]:
# print the best hyperparameters and plot the heat map of validation errors
print("The best parameters are %s with a validation accuracy of %0.2f" % (clf_gridsearch.best_params_, clf_gridsearch.best_score_))

scores = clf_gridsearch.cv_results_['mean_test_score'].reshape(len(Clist),len(gammalist))
fig3, ax3 = plt.subplots()
currentimage = ax3.imshow(scores, cmap=plt.cm.hot)
ax3.set(xlabel='gamma', ylabel='C', title='validation accuracy')
fig3.colorbar(currentimage)
plt.xticks(np.arange(len(gammalist)), gammalist, rotation=45)
plt.yticks(np.arange(len(Clist)), Clist)