In [1]:
import numpy as np
import cvxpy as cvx

### Preprocessing data

In [2]:
def read_in(file_path):
    X = []
    y = []
    with open(file_path, 'r') as f:
        for line in f:
            info = line.strip('\n').split(',')
            # label is the first column
            X.append([float(i) for i in info[1:]])
            y.append(float(info[0]))
    X = np.array(X)
    y = np.array(y)
    # change the output to *-1 and 1* instead of *0 and 1*
    #y[y == 0] = -1
    return X, y

In [3]:
# read in the data
X_train, y_train = read_in('./hw2_data/wdbc_train.data') 
X_valid, y_valid = read_in('./hw2_data/wdbc_valid.data')
X_test, y_test = read_in('./hw2_data/wdbc_test.data')

In [4]:
print(X_train.shape)
print(y_train.shape)

(336, 10)
(336,)


### Train the Primal SVM classifier

In [5]:
# parameters
D = X_train.shape[1] # number of features
N = X_train.shape[0] # number of samples
c = 1

In [6]:
def svm_primal_train(X, y, c=1):
    D = X.shape[1]
    W = cvx.Variable(D)
    b = cvx.Variable()
    loss = (0.5*cvx.sum_squares(W) +
            c*cvx.sum_entries(cvx.pos(1 - cvx.mul_elemwise(y, X*W + b))))
    # need to minimize loss/N to avoid error
    prob = cvx.Problem(cvx.Minimize(loss/N))
    prob.solve(solver=None)
    return W.value, b.value

def predict(W, b, X):
    preds = np.dot(X, W) + b
    preds[preds >= 0] = 1
    preds[preds < 0] = -1
    return preds

def accuracy(y_pred, y_truth):
    return np.mean(y_pred == y_truth)

In [7]:
best_valid_acc = -1
best_c = -1
c_test = [10**i for i in range(9)]
for c in c_test:
    W, b = svm_primal_train(X_train, y_train, c=c)
    pred = predict(W, b, X_valid)
    acc = accuracy(pred, y_valid)
    print('c =', c, ';', 'accuracy:', acc)
    if acc > best_valid_acc:
        best_valid_acc = acc
        best_c = c

print("Best C:", best_c)
print("Best validation accuracy:", best_valid_acc)

c = 1 ; accuracy: 0.527058823529
c = 10 ; accuracy: 0.524705882353
c = 100 ; accuracy: 0.524705882353
c = 1000 ; accuracy: 0.524705882353
c = 10000 ; accuracy: 0.517647058824
c = 100000 ; accuracy: 0.517647058824
c = 1000000 ; accuracy: 0.517647058824
c = 10000000 ; accuracy: 0.517647058824
c = 100000000 ; accuracy: 0.517647058824
Best C: 1
Best validation accuracy: 0.527058823529


### Test set accuracy

In [8]:
W, b = svm_primal_train(X_train, y_train, c=1)

pred = predict(W, b, X_train)
acc = accuracy(pred, y_train)
print("Train set accuracy:", acc)

pred = predict(W, b, X_test)
acc = accuracy(pred, y_test)
print("Test set accuracy:", acc)

Train set accuracy: 0.528946995465
Test set accuracy: 0.559349890431


### LinearSVC from sklearn

In [9]:
from sklearn import svm
clf = svm.LinearSVC(C=1)
clf.fit(X_train, y_train)

preds = clf.predict(X_train)
print(np.mean(preds == y_train))

preds = clf.predict(X_valid)
print(np.mean(preds == y_valid))

preds = clf.predict(X_test)
print(np.mean(preds == y_test))

0.535714285714
0.529411764706
0.466216216216


### Dual SVM with Slack

In [30]:
y = y_train
X = X_train
print(X[5])

[  1.28700000e+01   1.95400000e+01   8.26700000e+01   5.09200000e+02
   9.13600000e-02   7.88300000e-02   1.79700000e-02   2.09000000e-02
   1.86100000e-01   6.34700000e-02]


In [31]:
# parameters
D = X_train.shape[1] # number of features
N = X_train.shape[0] # number of samples
c = 1
sigma = 0.1

In [32]:
def gaussian_kernel(X, Z, sigma):
    return np.exp(-np.dot((X-Z),(X-Z)) / (2*sigma**2))
    # return np.exp(-1 * (np.linalg.norm(X-Z)**2 / (2*sigma**2)))

In [33]:
kernel_matrix = np.zeros((N, N))
for i in range(N):
    # print progress
    if i % 50 == 0:
        print(i/N * 100)
    for j in range(N):
        kernel_matrix[i, j] = gaussian_kernel(X[i], X[j], sigma)

0.0
14.880952380952381
29.761904761904763
44.642857142857146
59.523809523809526
74.40476190476191
89.28571428571429


In [34]:
print(kernel_matrix[8])
# for each in kernel_matrix[450]:
#     print(each)

[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0

In [35]:
Delta = cvx.Variable(N) # Lagrangian multipliers

first_term = cvx.quad_form(cvx.mul_elemwise(y, Delta), kernel_matrix)
# for i in range(N):
#     for j in range(N):
#         print(i, j)
#         first_term += Delta[i]*Delta[j]*y[i]*y[j]*gaussian_kernel(X[i], X[j], sigma)
        # first_term += y[i]*y[j]*gaussian_kernel(X[i], X[j], sigma)
# print('Done')

second_term = cvx.sum_entries(Delta)        
# for i in range(N):
#     second_term += Delta[1]
    
loss = -0.5*first_term + second_term


In [36]:
# constraints
constraints = [Delta >= 0, Delta <= c]
# for i in range(N):
#     constraints.append(Delta[i] >= 0)
#     constraints.append(Delta[i] <= c)
dual_sum = cvx.sum_entries(cvx.mul_elemwise(y, Delta))
constraints.append(dual_sum == 0)

In [37]:
prob = cvx.Problem(cvx.Maximize(loss), constraints)

In [38]:
prob.solve(verbose=True)


ECOS 2.0.4 - (C) embotech GmbH, Zurich Switzerland, 2012-15. Web: www.embotech.com/ECOS

It     pcost       dcost      gap   pres   dres    k/t    mu     step   sigma     IR    |   BT
 0  -1.600e+03  -7.709e+03  +8e+03  9e-02  1e-03  1e+00  1e+01    ---    ---    1  1  - |  -  - 
 1  -2.103e+03  -5.512e+03  +5e+03  5e-02  8e-04  2e+00  7e+00  0.5005  2e-01   2  2  2 |  0  0
 2  -2.000e+03  -5.563e+03  +5e+03  5e-02  3e-04  2e+00  7e+00  0.0306  9e-01   2  2  2 |  0  0
 3  -2.298e+03  -3.762e+03  +2e+03  2e-02  9e-05  2e+00  3e+00  0.8127  3e-01   2  2  2 |  0  0
 4  -2.199e+03  -3.923e+03  +2e+03  3e-02  3e-05  3e+00  3e+00  0.0903  8e-01   2  2  3 |  0  0
 5  -2.078e+03  -2.909e+03  +1e+03  1e-02  2e-05  2e+00  2e+00  0.9890  5e-01   3  2  1 |  0  0
 6  -1.796e+03  -2.455e+03  +5e+02  1e-02  5e-06  2e+00  8e-01  0.7234  3e-01   2  2  2 |  0  0
 7  -1.592e+03  -1.894e+03  +2e+02  6e-03  2e-06  1e+00  3e-01  0.6940  2e-01   3  3  2 |  0  0
 8  -1.603e+03  -2.211e+03  +2e+02  2e-02  3e-

159.40476489464115

In [39]:
test_sum = 0
for i, val in enumerate(Delta.value):
    test_sum += val * y[i]
print(test_sum)

[[  7.32747196e-15]]


In [40]:
print(Delta.value)

[[ 0.7738046 ]
 [ 1.22618274]
 [ 1.22618286]
 [ 0.77380459]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.77380459]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 1.22618269]
 [ 0.7738046 ]
 [ 0.77380459]
 [ 1.22618286]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618228]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 0.77380459]
 [ 0.77380459]
 [ 1.22618228]
 [ 0.77380459]
 [ 0.7738046 ]
 [ 0.77380459]
 [ 1.22618243]
 [ 0.77380459]
 [ 1.22618286]
 [ 1.22618286]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.77380459]
 [ 1.22618259]
 [ 1.22618229]
 [ 0.77380459]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618245]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 1.22618268]
 [ 0.7738046 ]
 [ 1.22618282]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 1.2261828 ]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 1.22618286]
 [ 0.7738046 ]
 [ 0.7738046 ]
 [ 1.22618286]
 [ 1.22618286]
 [ 1.22618286]
 [ 1.22618

In [41]:
# Make prediction
# first, finding the bias term
bias = None
for i in range(N):
    if (not np.isclose(c, Delta.value[i])) and (not np.isclose(Delta.value[i], 0)):
        print('support vector idx:', i)
        print(Delta.value[i])
        result = 0
        for j in range(N):
            result += Delta.value[j]*y[j]*gaussian_kernel(X[i],X[j],sigma)
        bias = y[i] - result
        print('bias =', y[i] - result)
        break

support vector idx: 0
[[ 0.7738046]]
bias = [[-0.2261954]]


In [42]:
import random
# preds = np.sum(np.dot(X_test, (Delta.value*y*X_train).T), axis=1) + b 
preds = np.zeros(X_test.shape[0])

for i in range(X_test.shape[0]):
    pred = 0
    for j in range(N):
        pred += Delta.value[j]*y[j]*gaussian_kernel(X_test[i],X[j],sigma)
    # print(pred)
    pred += bias
    # print(pred)
    if pred < 0 and (not np.isclose(pred, 0)):
        preds[i] = -1
    elif np.isclose(pred, 0):
        preds[i] = random.choice([1, -1])
    else:
        preds[i] = 1


In [43]:
print(preds)

[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1.]


In [27]:
print(np.mean(preds == y_test))

0.675675675676


#### This is just one combination of c and sigma, I need to try every combination

In [45]:
# test on training set
import random
# preds = np.sum(np.dot(X_test, (Delta.value*y*X_train).T), axis=1) + b 
preds = np.zeros(X.shape[0])

for i in range(X.shape[0]):
    pred = 0
    for j in range(N):
        pred += Delta.value[j]*y[j]*gaussian_kernel(X[i],X[j],sigma)
    # print(pred)
    pred += bias
    # print(pred)
    if pred < 0 and (not np.isclose(pred, 0)):
        preds[i] = -1
    elif np.isclose(pred, 0):
        preds[i] = random.choice([1, -1])
    else:
        preds[i] = 1

In [46]:
print(preds)

[-1.  1.  1. -1. -1. -1. -1. -1. -1.  1.  1. -1. -1.  1. -1. -1. -1.  1.
 -1. -1.  1. -1. -1.  1. -1. -1.  1. -1. -1. -1.  1. -1.  1.  1. -1. -1.
 -1.  1.  1. -1. -1. -1.  1. -1.  1.  1. -1.  1. -1. -1. -1.  1.  1. -1.
 -1.  1. -1. -1. -1.  1.  1. -1. -1.  1.  1.  1.  1. -1. -1.  1. -1. -1.
 -1.  1.  1.  1. -1. -1. -1. -1. -1. -1.  1.  1.  1.  1. -1. -1. -1. -1.
  1.  1.  1.  1. -1.  1.  1. -1. -1.  1. -1. -1. -1.  1.  1. -1.  1.  1.
  1. -1. -1. -1. -1. -1. -1. -1.  1.  1. -1. -1. -1. -1. -1.  1. -1. -1.
 -1. -1.  1. -1.  1.  1.  1. -1. -1.  1. -1. -1. -1.  1.  1. -1. -1.  1.
  1. -1. -1.  1. -1.  1.  1. -1. -1. -1. -1. -1.  1. -1.  1. -1. -1. -1.
  1. -1. -1. -1.  1.  1. -1. -1. -1. -1.  1. -1.  1. -1. -1. -1. -1. -1.
 -1.  1.  1. -1. -1. -1.  1. -1.  1.  1. -1. -1. -1.  1.  1. -1.  1. -1.
  1.  1.  1. -1.  1. -1. -1.  1. -1. -1. -1.  1. -1.  1. -1.  1.  1. -1.
  1.  1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1.  1.  1. -1. -1.
 -1.  1.  1.  1.  1. -1. -1.  1. -1. -1. -1.  1.  1

In [47]:
print(np.mean(preds == y))

1.0


#### Good news
It seems I have the right answer.

In [2]:
for i in range(-1,4):
    print(i)

-1
0
1
2
3
