In [20]:
# Useful starting lines
%matplotlib inline

import random
from datetime import datetime
from scipy.sparse import diags

import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Support Vector Machines
## Classification Using SVM
Load dataset. We will re-use the CERN dataset from project 1, available from https://inclass.kaggle.com/c/epfml-project-1/data

In [2]:
from helpers import load_csv_data

DATA_TRAIN_PATH = 'data/train.csv'

y, X1, ids = load_csv_data(DATA_TRAIN_PATH, sub_sample=True)

NameError: name 'X' is not defined

In [6]:
def standardize(x, std_x = None, mean_x = None, ignore_first = True):
    """Standardize the original data set."""
    x = np.copy(x)
    if type(mean_x) == type(None):
        mean_x = np.mean(x, axis=0)
    x = x - mean_x
    if ignore_first:
        x[:,0] = 1
    if type(std_x) == type(None):
        std_x = np.std(x, axis=0)
    for i in range(std_x.shape[0]):
        if std_x[i] > 0: x[:, i] = x[:, i] / std_x[i]
    return x, mean_x, std_x

In [8]:
X, _, _ = standardize(X1, ignore_first = False)
print(y.shape, X.shape)

(5000,) (5000, 30)


## Prepare cost and prediction functions

In [10]:
def calculate_primal_objective(y, X, w, lambda_):
    """compute the full cost (the primal objective), that is loss plus regularizer.
    X: the full dataset matrix, shape = (num_examples, num_features)
    y: the corresponding +1 or -1 labels, shape = (num_examples)
    w: shape = (num_features)
    """
    
    return np.sum(np.maximum(0, 1 - np.multiply(y.flatten(), (X @ w).flatten()))) + lambda_ / 2 * (w.T @ w)

In [11]:
def calculate_accuracy(y, X, w):
    """compute the training accuracy on the training set (can be called for test set as well).
    X: the full dataset matrix, shape = (num_examples, num_features)
    y: the corresponding +1 or -1 labels, shape = (num_examples)
    w: shape = (num_features)
    """
    
    return np.mean((X @ w > 0) == (y > 0))

## Stochastic Gradient Descent for SVM

Compute the (stochastic) subgradient for the n-th summand of the SVM optimization objective

In [12]:
def calculate_stochastic_gradient(y, X, w, lambda_, n, num_examples):
    """compute the stochastic gradient of loss plus regularizer.
    X: the dataset matrix, shape = (num_examples, num_features)
    y: the corresponding +1 or -1 labels, shape = (num_examples)
    w: shape = (num_features)
    n: the index of the (one) datapoint we have sampled
    num_examples: N
    """
    # Be careful about the constant N (size) term!
    # The complete objective for SVM is a sum, not an average as in earlier SGD examples!
    x_n, y_n = X[n], y[n]
    
    return (-num_examples * y_n * x_n * np.maximum(0, 1 - y_n * x_n.T @ w) + lambda_ * w)

Implement stochastic gradient descent: Pick a data point uniformly at random and update w based on the gradient for the n-th summand of the objective

In [13]:
X.shape

(5000, 30)

In [15]:
def sgd_for_svm_demo(y, X):
    
    max_iter = 100000
    gamma = 1e-4
    lambda_ = 0.1
    
    num_examples, num_features = X.shape
    w = np.zeros(num_features)
    
    for it in range(max_iter):
        # n = sample one data point uniformly at random data from x
        n = random.randint(0,num_examples-1)
        
        grad = calculate_stochastic_gradient(y, X, w, lambda_, n, num_examples)
        w -= gamma/(it+1) * grad
        
        if it % 10000 == 0:
            cost = calculate_primal_objective(y, X, w, lambda_)
            print("iteration={i}, cost={c}, acc={a}".format(i=it, c=cost, a=calculate_accuracy(y, X, w)))
    
    print("training accuracy = {l}".format(l=calculate_accuracy(y, X, w)))

sgd_for_svm_demo(y, X)

iteration=0, cost=9162.76155325376, acc=0.4612
iteration=10000, cost=4524.505362916388, acc=0.6354
iteration=20000, cost=4382.754845841041, acc=0.6374
iteration=30000, cost=4304.855052218118, acc=0.6418
iteration=40000, cost=4288.665283215413, acc=0.6426
iteration=50000, cost=4256.959610216728, acc=0.645
iteration=60000, cost=4233.196572494346, acc=0.6462
iteration=70000, cost=4202.458438127098, acc=0.6492
iteration=80000, cost=4205.018803121497, acc=0.6478
iteration=90000, cost=4178.252816428664, acc=0.6504
training accuracy = 0.653


## Coordinate Descent (Ascent) for SVM

Compute the closed-form update for the n-th variable alpha, in the dual optimization problem, given alpha and the current corresponding w

In [84]:
def calculate_coordinate_update(y, X, lambda_, alpha, w, n):
    """compute a coordinate update (closed form) for coordinate n.
    X: the dataset matrix, shape = (num_examples, num_features)
    y: the corresponding +1 or -1 labels, shape = (num_examples)
    w: shape = (num_features)
    n: the coordinate to be updated
    """
    # ***************************************************
    # INSERT YOUR CODE HERE
    # TODO
    # ***************************************************
    # calculate the update of coordinate at index=n.
    x_n, y_n = X[n], y[n]
    old_alpha_n = np.copy(alpha[n])
    
    # w/o tricks
    #Y = diags(y)
    #alpha[n] = old_alpha_n + 1 - 1 / lambda_ * (Y @ X @ X.T @ Y @ alpha)[n]
    #w = 1 / lambda_ * X.T @ Y @ alpha
    
    alpha_n_delta = np.mean(1. - y_n * x_n @ w) / X.shape[0]
    
    #(Y @ X @ X.T @ Y)[n] @ alpha = lambda_
    
    if old_alpha_n + alpha_n_delta > 1:
        alpha[n] = 1
        alpha_n_delta = 1 - old_alpha_n
    elif old_alpha_n + alpha_n_delta < 0:
        alpha[n] = 0
        alpha_n_delta = -old_alpha_n
    else: alpha[n] = old_alpha_n + alpha_n_delta
    
    print(alpha_n_delta)
    
    w += 1. / lambda_ * x_n * y_n * alpha_n_delta
    
    return w, alpha

In [85]:
def calculate_dual_objective(y, X, w, alpha, lambda_):
    """calculate the objective for the dual problem."""
    #Y = diags(y)
    #return np.sum(alpha) - 0.5 / lambda_ * alpha.T @ Y @ X @ X.T @ Y @ alpha#return np.sum(alpha) - 0.5 / lambda_ * alpha.T @ Y @ X @ X.T @ Y @ alpha
    return np.sum(alpha) - 0.5 / lambda_ * w.T @ w

In [86]:
def coordinate_descent_for_svm_demo(y, X):
    max_iter = 300
    lambda_ = 0.1

    num_examples, num_features = X.shape
    w = np.zeros(num_features)
    alpha = np.zeros(num_examples)
    
    for it in range(max_iter):
        # n = sample one data point uniformly at random data from x
        n = random.randint(0,num_examples-1)
        
        w, alpha = calculate_coordinate_update(y, X, lambda_, alpha, w, n)
        #print(np.linalg.norm(w), np.linalg.norm(alpha))
        #break
            
        if it % 100 == 0:
            # primal objective
            primal_value = calculate_primal_objective(y, X, w, lambda_)
            # dual objective
            dual_value = calculate_dual_objective(y, X, w, alpha, lambda_)
            # primal dual gap
            duality_gap = primal_value - dual_value
            print('iteration=%i, primal:%.5f, dual:%.5f, gap:%.5f alpha_comp:%d alpha_norm:%.5f w_norm:%.5f'%(
                    it, primal_value, dual_value, duality_gap, np.sum(alpha > 0), np.linalg.norm(alpha),
            np.linalg.norm(w)))
    print("training accuracy = {l}".format(l=calculate_accuracy(y, X, w)))

coordinate_descent_for_svm_demo(y, X)

0.0002
iteration=0, primal:4987.50162, dual:-0.00028, gap:4987.50190 alpha_comp:1 alpha_norm:0.00020 w_norm:0.00980
0.000201405892279
0.000197257020213
0.000207570140295
0.000207218805539
0.000193194363066
0.0002046815101
0.000191508248147
0.000217543243692
0.000212446551568
0.00019956542187
0.000190832466701
0.00020645231403
0.000200177240568
0.000206780994966
0.000204612023452
0.000205717362062
0.00020077451535
0.000197108102753
0.000194197447643
0.000200186813588
0.000188176760829
0.000186060098671
0.000211908038626
0.000185708544028
0.00025443595152
0.000200227047287
0.000198982928405
0.000199270238783
0.000205035369609
0.000193048850457
0.000214762470196
0.000211406141171
0.000188820669982
0.000211271654975
0.00020554573034
0.000202557948462
0.000207333352149
0.000209977375049
0.000216923661062
0.000206232694413
0.000196932042985
0.000187533268602
0.000212317660916
0.000201306232551
0.000191752929948
0.000197688697702
0.000198456796599
0.000191714314237
0.0001970936693
0.000192586