In [1]:
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn import svm
from time import time
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier

In [2]:
train = np.loadtxt('P4_train.txt', delimiter=',')
test = np.loadtxt('P4_test.txt', delimiter=',')

In [3]:
train_X = train[:, :-1]
train_Y = train[:, -1]

test_X = test[:, :-1]
test_Y = test[:, -1]

## Quadratic Kernel

In [12]:
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [47]:
svc_quad = svm.SVC(kernel='poly', degree = 2)

# specify parameters and distributions to sample from
param_dist = {'gamma' : np.arange(0.01, 1.01, 0.01), 'C' : np.arange(0.1, 10.1, 0.1)}


In [48]:
n_iter_search = 1000
random_search = RandomizedSearchCV(svc_quad, param_distributions=param_dist,
                                   n_iter=n_iter_search, n_jobs = 4)

start = time()
random_search.fit(train_X, train_Y)
print("RandomizedSearchCV took %.2f seconds for %d candidates"
      " parameter settings." % ((time() - start), n_iter_search))
report(random_search.cv_results_)

RandomizedSearchCV took 11824.73 seconds for 1000 candidates parameter settings.
Model with rank: 1
Mean validation score: 0.842 (std: 0.004)
Parameters: {'C': 8.3000000000000007, 'gamma': 0.040000000000000001}

Model with rank: 1
Mean validation score: 0.842 (std: 0.004)
Parameters: {'C': 5.2999999999999998, 'gamma': 0.050000000000000003}

Model with rank: 3
Mean validation score: 0.842 (std: 0.003)
Parameters: {'C': 0.90000000000000002, 'gamma': 0.12}



### Best Quadratic SVC Classifier using RandomizedSearch with gamma = 0.12 and C = 0.9

In [4]:
best_quad_svc = svm.SVC(kernel='poly', degree = 2, gamma = 0.12, C = 0.9)

best_quad_svc.fit(train_X, train_Y)
print("Accuracy on test set is {0:.3f}".format(best_quad_svc.score(test_X, test_Y)))

Accuracy on test set is 0.846


## Gaussian(RBF) Kernel

In [49]:
svc_gaussian = svm.SVC(kernel='rbf')

# specify parameters and distributions to sample from
param_dist = {'gamma' : np.arange(0.01, 1.01, 0.01), 'C' : np.arange(0.1, 10.1, 0.1)}

In [50]:
n_iter_search = 100
random_search = RandomizedSearchCV(svc_gaussian, param_distributions=param_dist,
                                   n_iter=n_iter_search, n_jobs = 4)

start = time()
random_search.fit(train_X, train_Y)
print("RandomizedSearchCV took %.2f seconds for %d candidates"
      " parameter settings." % ((time() - start), n_iter_search))
report(random_search.cv_results_)

RandomizedSearchCV took 644.57 seconds for 100 candidates parameter settings.
Model with rank: 1
Mean validation score: 0.841 (std: 0.001)
Parameters: {'C': 2.6000000000000001, 'gamma': 0.029999999999999999}

Model with rank: 2
Mean validation score: 0.841 (std: 0.003)
Parameters: {'C': 1.7000000000000002, 'gamma': 0.069999999999999993}

Model with rank: 3
Mean validation score: 0.840 (std: 0.004)
Parameters: {'C': 0.80000000000000004, 'gamma': 0.13}



### Best Gaussian SVC Classifier using RandomizedSearch with gamma = 0.03 and C = 2.6

In [5]:
best_gaussian_svc = svm.SVC(kernel='rbf', gamma = 0.03, C = 2.6)

best_gaussian_svc.fit(train_X, train_Y)

print("Accuracy on test set is {0:.3f}".format(best_gaussian_svc.score(test_X, test_Y)))

Accuracy on test set is 0.845


## kNN Classifier

In [14]:
knn_clf = KNeighborsClassifier()

param_dist = {'n_neighbors' : range(1, 11)}

In [15]:
grid_search = GridSearchCV(knn_clf, param_grid=param_dist, n_jobs=4)
start = time()
grid_search.fit(train_X, train_Y)

print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
      % (time() - start, len(grid_search.cv_results_['params'])))
report(grid_search.cv_results_)

GridSearchCV took 66.38 seconds for 10 candidate parameter settings.
Model with rank: 1
Mean validation score: 0.826 (std: 0.005)
Parameters: {'n_neighbors': 10}

Model with rank: 2
Mean validation score: 0.822 (std: 0.006)
Parameters: {'n_neighbors': 7}

Model with rank: 3
Mean validation score: 0.822 (std: 0.007)
Parameters: {'n_neighbors': 8}



### Best kNN Classifier with n_neighbors = 10

In [8]:
best_knn_clf = KNeighborsClassifier(n_neighbors=10)

best_knn_clf.fit(train_X, train_Y)

print("Accuracy on test set is {0:.3f}".format(best_knn_clf.score(test_X, test_Y)))

Accuracy on test set is 0.828
