In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt

# This is a bit of magic to make matplotlib figures appear inline in the notebook
# rather than in a new window.

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython

%load_ext autoreload
%autoreload 2

In [3]:
from sklearn import preprocessing, metrics
import utils
import scipy.io
import numpy as np
from linear_classifier import LinearSVM_twoclass

# load the SPAM email training dataset

X,y = utils.load_mat('data/spamTrain.mat')
yy = np.ones(y.shape)
yy[y==0] = -1

# load the SPAM email test dataset

test_data = scipy.io.loadmat('data/spamTest.mat')
X_test = test_data['Xtest']
y_test = test_data['ytest'].flatten()
yy_test = np.ones(y_test.shape)
yy_test[y_test == 0] = -1

##################################################################################
#  YOUR CODE HERE for training the best performing SVM for the data above.       #
#  what should C be? What should num_iters be? Should X be scaled?               #
#  should X be kernelized? What should the learning rate be? What should the     #
#  number of iterations be?                                                      #
##################################################################################
Cvals = [100]
iterationvals = [2000,5000,10000]
learning_ratevals = [1e-3,1e-2,1e-1]

best_C = 0
best_learning_rate = 0
best_iteration = 0
best_val_acc = 0

for C in Cvals:
    for iteration in iterationvals: 
        for learning_rate in learning_ratevals:
            print "training with C: {}, num_iters: {}, learning_rate: {}".format(C, iteration, learning_rate)
            
            svm = LinearSVM_twoclass()
            svm.theta = np.zeros((X.shape[1],))
            svm.train(X, yy, learning_rate, reg=C, num_iters=iteration, verbose=False)
    
            val_acc = np.mean(svm.predict(X_test) == yy_test)
        
            print "When C = {}, num_iters = {}, learning_rate = {}, validation accuracy = {}".format(C, iteration, learning_rate, val_acc)
            print("-"*70)
            
            if val_acc >= best_val_acc:
                best_C = C
                best_iteration = iteration
                best_learning_rate = learning_rate
                best_val_acc=val_acc
                
print "So the best paramater set we derive is:", best_C, best_iteration, best_learning_rate
            


training with C: 100, num_iters: 2000, learning_rate: 0.001
When C = 100, num_iters = 2000, learning_rate = 0.001, validation accuracy = 0.991
----------------------------------------------------------------------
training with C: 100, num_iters: 2000, learning_rate: 0.01
When C = 100, num_iters = 2000, learning_rate = 0.01, validation accuracy = 0.985
----------------------------------------------------------------------
training with C: 100, num_iters: 2000, learning_rate: 0.1
When C = 100, num_iters = 2000, learning_rate = 0.1, validation accuracy = 0.988
----------------------------------------------------------------------
training with C: 100, num_iters: 5000, learning_rate: 0.001
When C = 100, num_iters = 5000, learning_rate = 0.001, validation accuracy = 0.987
----------------------------------------------------------------------
training with C: 100, num_iters: 5000, learning_rate: 0.01
When C = 100, num_iters = 5000, learning_rate = 0.01, validation accuracy = 0.982
---------