In [1]:
import numpy as np
import cvxpy as cvx

### Preprocessing data

In [2]:
def read_in(file_path):
    X = []
    y = []
    with open(file_path, 'r') as f:
        for line in f:
            info = line.strip('\n').split(',')
            # label is the first column
            X.append([float(i) for i in info[1:]])
            y.append(float(info[0]))
    X = np.array(X)
    y = np.array(y)
    # change the output to *-1 and 1* instead of *0 and 1*
    #y[y == 0] = -1
    return X, y

In [3]:
# read in the data
X_train, y_train = read_in('./hw2_data/wdbc_train.data') 
X_valid, y_valid = read_in('./hw2_data/wdbc_valid.data')
X_test, y_test = read_in('./hw2_data/wdbc_test.data')

In [4]:
print(X_train.shape)
print(y_train.shape)

(336, 10)
(336,)


### Train the Primal SVM classifier

In [5]:
# parameters
D = X_train.shape[1] # number of features
N = X_train.shape[0] # number of samples
c = 1

In [6]:
def svm_primal_train(X, y, c=1):
    D = X.shape[1]
    W = cvx.Variable(D)
    b = cvx.Variable()
    loss = (0.5*cvx.sum_squares(W) +
            c*cvx.sum_entries(cvx.pos(1 - cvx.mul_elemwise(y, X*W + b))))
    # need to minimize loss/N to avoid error
    prob = cvx.Problem(cvx.Minimize(loss/N))
    prob.solve(solver=None)
    return W.value, b.value

def predict(W, b, X):
    preds = np.dot(X, W) + b
    preds[preds >= 0] = 1
    preds[preds < 0] = -1
    return preds

def accuracy(y_pred, y_truth):
    return np.mean(y_pred == y_truth)

In [7]:
best_valid_acc = -1
best_c = -1
c_test = [10**i for i in range(9)]
for c in c_test:
    W, b = svm_primal_train(X_train, y_train, c=c)
    pred = predict(W, b, X_valid)
    acc = accuracy(pred, y_valid)
    print('c =', c, ';', 'accuracy:', acc)
    if acc > best_valid_acc:
        best_valid_acc = acc
        best_c = c

print("Best C:", best_c)
print("Best validation accuracy:", best_valid_acc)

c = 1 ; accuracy: 0.527058823529
c = 10 ; accuracy: 0.524705882353
c = 100 ; accuracy: 0.524705882353
c = 1000 ; accuracy: 0.524705882353
c = 10000 ; accuracy: 0.517647058824
c = 100000 ; accuracy: 0.517647058824
c = 1000000 ; accuracy: 0.517647058824
c = 10000000 ; accuracy: 0.517647058824
c = 100000000 ; accuracy: 0.517647058824
Best C: 1
Best validation accuracy: 0.527058823529


### Test set accuracy

In [8]:
W, b = svm_primal_train(X_train, y_train, c=1)

pred = predict(W, b, X_train)
acc = accuracy(pred, y_train)
print("Train set accuracy:", acc)

pred = predict(W, b, X_test)
acc = accuracy(pred, y_test)
print("Test set accuracy:", acc)

Train set accuracy: 0.528946995465
Test set accuracy: 0.559349890431


### LinearSVC from sklearn

In [9]:
from sklearn import svm
clf = svm.LinearSVC(C=1)
clf.fit(X_train, y_train)

preds = clf.predict(X_train)
print(np.mean(preds == y_train))

preds = clf.predict(X_valid)
print(np.mean(preds == y_valid))

preds = clf.predict(X_test)
print(np.mean(preds == y_test))

0.535714285714
0.529411764706
0.466216216216


### Dual SVM with Slack

In [69]:
y = y_train
X = X_train
print(X[5])

[  1.28700000e+01   1.95400000e+01   8.26700000e+01   5.09200000e+02
   9.13600000e-02   7.88300000e-02   1.79700000e-02   2.09000000e-02
   1.86100000e-01   6.34700000e-02]


In [104]:
# parameters
D = X_train.shape[1] # number of features
N = X_train.shape[0] # number of samples
c = 10**8
sigma = 100

In [105]:
def gaussian_kernel(X, Z, sigma):
    return np.exp(-np.dot((X-Z),(X-Z)) / (2*sigma**2))
    # return np.exp(-1 * (np.linalg.norm(X-Z)**2 / (2*sigma**2)))

In [106]:
kernel_matrix = np.zeros((N, N))
for i in range(N):
    # print progress
    if i % 50 == 0:
        print(i/N * 100)
    for j in range(N):
        value = gaussian_kernel(X[i], X[j], sigma)
        if np.isclose(value, 0):
#         if np.abs(value) > 10e-6:
            kernel_matrix[i, j] = 0.
        elif np.isclose(value, 1):
            kernel_matrix[i, j] = 1.
        else:
            kernel_matrix[i, j] = value

0.0
14.880952380952381
29.761904761904763
44.642857142857146
59.523809523809526
74.40476190476191
89.28571428571429


In [107]:
print(kernel_matrix[30])
# for each in kernel_matrix[450]:
#     print(each)

[  0.00000000e+00   5.41818851e-01   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   1.09791487e-05   0.00000000e+00
   1.03572916e-04   3.30546049e-03   3.05155705e-01   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   8.81336051e-01   0.00000000e+00   5.81163750e-08
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   8.05583335e-07   0.00000000e+00
   0.00000000e+00   0.00000000e+00   1.00000000e+00   6.02126463e-08
   8.68689224e-01   4.76028062e-01   0.00000000e+00   0.00000000e+00
   0.00000000e+00   1.70513712e-02   1.43922305e-03   0.00000000e+00
   0.00000000e+00   0.00000000e+00   1.15603050e-04   0.00000000e+00
   0.00000000e+00   1.24842117e-06   0.00000000e+00   1.40110782e-01
   0.00000000e+00   0.00000000e+00   0.00000000e+00   1.60553847e-01
   9.78148570e-01   0.00000000e+00   3.43744948e-08   2.71402613e-01
   0.00000000e+00   0.00000000e+00

In [108]:
Delta = cvx.Variable(N) # Lagrangian multipliers

first_term = cvx.quad_form(cvx.mul_elemwise(y, Delta), kernel_matrix)
# for i in range(N):
#     for j in range(N):
#         print(i, j)
#         first_term += Delta[i]*Delta[j]*y[i]*y[j]*gaussian_kernel(X[i], X[j], sigma)
        # first_term += y[i]*y[j]*gaussian_kernel(X[i], X[j], sigma)
# print('Done')

second_term = cvx.sum_entries(Delta)        
# for i in range(N):
#     second_term += Delta[1]
    
loss = -0.5*first_term + second_term


In [109]:
# constraints
constraints = [Delta >= 0, Delta <= c]
# for i in range(N):
#     constraints.append(Delta[i] >= 0)
#     constraints.append(Delta[i] <= c)
dual_sum = cvx.sum_entries(cvx.mul_elemwise(y, Delta))
constraints.append(dual_sum == 0)

In [110]:
prob = cvx.Problem(cvx.Maximize(loss), constraints)

In [111]:
prob.solve(verbose=True, max_iters=200)


ECOS 2.0.4 - (C) embotech GmbH, Zurich Switzerland, 2012-15. Web: www.embotech.com/ECOS

It     pcost       dcost      gap   pres   dres    k/t    mu     step   sigma     IR    |   BT
 0  -1.545e+10  -7.049e+10  +3e+11  6e-01  3e-03  1e+00  5e+08    ---    ---    1  1  - |  -  - 
 1  -2.284e+10  -9.163e+10  +3e+11  8e-01  4e-02  1e+07  5e+08  0.0184  1e+00   1  1  1 |  0  0
 2  +1.480e+10  -1.051e+10  +3e+11  3e-01  7e-02  2e+08  5e+08  0.7961  8e-01   1  1  0 |  0  0
 3  -3.693e+09  -9.749e+09  +9e+10  9e-02  3e-02  4e+07  1e+08  0.9890  2e-01   1  1  0 |  0  0
 4  -7.027e+09  -8.930e+09  +2e+10  3e-02  1e-02  2e+07  4e+07  0.8060  8e-02   1  1  1 |  0  0
 5  -7.587e+09  -8.254e+09  +8e+09  1e-02  8e-03  6e+06  1e+07  0.7765  2e-01   1  1  1 |  0  0
 6  -7.631e+09  -8.225e+09  +7e+09  1e-02  4e-03  5e+06  1e+07  0.3449  6e-01   1  1  1 |  0  0
 7  -7.410e+09  -7.586e+09  +2e+09  4e-03  3e-03  2e+06  3e+06  0.9890  3e-01   1  1  1 |  0  0
 8  -7.432e+09  -7.586e+09  +2e+09  3e-03  2e-

100    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
101    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
102    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
103    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
104    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
105    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
106    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
107    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
108    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
109    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
110    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
111    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
112    nan    nan  nan  nan  nan  nan  nan  0.9890  1e-04   0  0  0 |  0  0
113    nan  

SolverError: Solver 'ECOS' failed. Try another solver.

In [40]:
print(Delta.value)

[[  3.25066077e-07]
 [  7.06009834e-01]
 [  2.05169368e+00]
 [  3.57321572e-08]
 [  1.87718772e-08]
 [  3.59353441e-08]
 [  3.24790438e+00]
 [  1.49941957e-06]
 [  6.71077603e+00]
 [  1.66982006e-01]
 [  2.04960187e+00]
 [  1.97826382e-08]
 [  5.61087833e-08]
 [  1.98609423e+02]
 [  1.41073543e-08]
 [  1.15677579e-08]
 [  3.14449119e-08]
 [  1.58885181e+00]
 [  4.89719790e-08]
 [  3.32165601e-08]
 [  7.31217464e-08]
 [  1.09608561e-07]
 [  1.89467850e-08]
 [  1.06184095e+02]
 [  8.01829279e-08]
 [  1.56743979e-06]
 [  2.98167040e+00]
 [  4.95848938e-01]
 [  3.02676066e-07]
 [  1.50631917e-06]
 [  1.10941109e+00]
 [  2.23486263e+02]
 [  1.89198870e-06]
 [  2.04899886e+00]
 [  2.03812488e-08]
 [  4.52674492e-07]
 [  4.68962560e-08]
 [  2.32839812e-07]
 [  1.09605931e-05]
 [  8.70278969e-09]
 [  2.81988471e-08]
 [  3.65406489e+02]
 [  6.28810473e+00]
 [  2.72382081e-07]
 [  9.96084906e+01]
 [  1.20673923e+00]
 [  1.15692942e-06]
 [  1.52968136e+00]
 [  2.56798378e+01]
 [  8.78182501e-08]


In [39]:
test_sum = 0
for i, val in enumerate(Delta.value):
    test_sum += val * y[i]
print(test_sum)

[[  7.32747196e-15]]


In [40]:
print(Delta.value)

[[ 0.7738046 ]
 [ 1.22618274]
 [ 1.22618286]
 [ 0.77380459]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.77380459]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 1.22618269]
 [ 0.7738046 ]
 [ 0.77380459]
 [ 1.22618286]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618228]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 0.77380459]
 [ 0.77380459]
 [ 1.22618228]
 [ 0.77380459]
 [ 0.7738046 ]
 [ 0.77380459]
 [ 1.22618243]
 [ 0.77380459]
 [ 1.22618286]
 [ 1.22618286]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.77380459]
 [ 1.22618259]
 [ 1.22618229]
 [ 0.77380459]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618245]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 1.22618268]
 [ 0.7738046 ]
 [ 1.22618282]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 1.2261828 ]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 1.22618286]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 1.22618286]
 [ 1.22618286]
 [ 1.22618

In [41]:
# Make prediction
# first, finding the bias term
bias = None
for i in range(N):
    if (not np.isclose(c, Delta.value[i])) and (not np.isclose(Delta.value[i], 0)):
        print('support vector idx:', i)
        print(Delta.value[i])
        result = 0
        for j in range(N):
            result += Delta.value[j]*y[j]*gaussian_kernel(X[i],X[j],sigma)
        bias = y[i] - result
        print('bias =', y[i] - result)
        break

support vector idx: 0
[[ 0.7738046]]
bias = [[-0.2261954]]


In [42]:
import random
# preds = np.sum(np.dot(X_test, (Delta.value*y*X_train).T), axis=1) + b 
preds = np.zeros(X_test.shape[0])

for i in range(X_test.shape[0]):
    pred = 0
    for j in range(N):
        pred += Delta.value[j]*y[j]*gaussian_kernel(X_test[i],X[j],sigma)
    # print(pred)
    pred += bias
    # print(pred)
    if pred < 0 and (not np.isclose(pred, 0)):
        preds[i] = -1
    elif np.isclose(pred, 0):
        preds[i] = random.choice([1, -1])
    else:
        preds[i] = 1


In [43]:
print(preds)

[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1.]


In [27]:
print(np.mean(preds == y_test))

0.675675675676


#### This is just one combination of c and sigma, I need to try every combination

In [45]:
# test on training set
import random
# preds = np.sum(np.dot(X_test, (Delta.value*y*X_train).T), axis=1) + b 
preds = np.zeros(X.shape[0])

for i in range(X.shape[0]):
    pred = 0
    for j in range(N):
        pred += Delta.value[j]*y[j]*gaussian_kernel(X[i],X[j],sigma)
    # print(pred)
    pred += bias
    # print(pred)
    if pred < 0 and (not np.isclose(pred, 0)):
        preds[i] = -1
    elif np.isclose(pred, 0):
        preds[i] = random.choice([1, -1])
    else:
        preds[i] = 1

In [46]:
print(preds)

[-1.  1.  1. -1. -1. -1. -1. -1. -1.  1.  1. -1. -1.  1. -1. -1. -1.  1.
 -1. -1.  1. -1. -1.  1. -1. -1.  1. -1. -1. -1.  1. -1.  1.  1. -1. -1.
 -1.  1.  1. -1. -1. -1.  1. -1.  1.  1. -1.  1. -1. -1. -1.  1.  1. -1.
 -1.  1. -1. -1. -1.  1.  1. -1. -1.  1.  1.  1.  1. -1. -1.  1. -1. -1.
 -1.  1.  1.  1. -1. -1. -1. -1. -1. -1.  1.  1.  1.  1. -1. -1. -1. -1.
  1.  1.  1.  1. -1.  1.  1. -1. -1.  1. -1. -1. -1.  1.  1. -1.  1.  1.
  1. -1. -1. -1. -1. -1. -1. -1.  1.  1. -1. -1. -1. -1. -1.  1. -1. -1.
 -1. -1.  1. -1.  1.  1.  1. -1. -1.  1. -1. -1. -1.  1.  1. -1. -1.  1.
  1. -1. -1.  1. -1.  1.  1. -1. -1. -1. -1. -1.  1. -1.  1. -1. -1. -1.
  1. -1. -1. -1.  1.  1. -1. -1. -1. -1.  1. -1.  1. -1. -1. -1. -1. -1.
 -1.  1.  1. -1. -1. -1.  1. -1.  1.  1. -1. -1. -1.  1.  1. -1.  1. -1.
  1.  1.  1. -1.  1. -1. -1.  1. -1. -1. -1.  1. -1.  1. -1.  1.  1. -1.
  1.  1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1.  1.  1. -1. -1.
 -1.  1.  1.  1.  1. -1. -1.  1. -1. -1. -1.  1.  1

In [47]:
print(np.mean(preds == y))

1.0


#### Good news
It seems I have the right answer.

In [2]:
for i in range(-1,4):
    print(i)

-1
0
1
2
3
