In [22]:
import numpy as np
from cvxpy import *

In [154]:
# read in the data
X = []
y = []

with open('./mystery.data', 'r') as f:
    for line in f:
        info = line.strip('\n').split(',')
        X.append([float(i) for i in info[:-1]])
        y.append(float(info[-1]))

In [None]:
X = np.vstack(X)
y = np.array(y)

# test with smaller number of samples
# X = X[500:505]
# y = y[500:505]


### Feature engineering

Idea:
- each feature squared
- product of features

In [169]:
D = X.shape[1]

In [157]:
features = []
for i in range(D):
    for j in range(i, D):
        features.append(X[:,i]*X[:,j])

In [170]:
for i in range(D):
    for j in range(i, D):
        for k in range(j, D):
            features.append(X[:,i]*X[:,j]*X[:,k])

In [126]:
for i in range(D):
    for j in range(i, D):
        for k in range(j, D):
            for m in range(k, D):
                features.append(X[:,i]*X[:,j]*X[:,k]*X[:,m])

In [171]:
X_add = np.vstack(features)

In [172]:
X_add.T.shape

(1000, 30)

In [173]:
X2 = np.column_stack((X, X_add.T))

In [174]:
print(X2.shape)

(1000, 34)


### Original Primal SVM

In [175]:
# parameters
D = X2.shape[1]
N = len(y)

In [176]:
W = Variable(D)
b = Variable()
loss = sum_squares(W)
# loss = W1**2 + W2**2 + W3**2 + W4**2

In [177]:
constraints = []
for i in range(N):
    forward = y[i] * (W.T*X2[i] + b)
    constraints.append(forward >= 1)

In [179]:
objective = Minimize(loss)
prob = Problem(objective, constraints)

In [180]:
result = prob.solve(solver=None, verbose=True)
print(result)


ECOS 2.0.4 - (C) embotech GmbH, Zurich Switzerland, 2012-15. Web: www.embotech.com/ECOS

It     pcost       dcost      gap   pres   dres    k/t    mu     step   sigma     IR    |   BT
 0  -4.948e-17  +1.000e+03  +2e+03  6e-01  9e+00  1e+00  2e+00    ---    ---    1  1  - |  -  - 
 1  +7.764e+03  +2.740e+04  +5e+02  1e+01  4e+02  2e+02  5e-01  0.8576  9e-02   1  1  1 |  0  0
 2  +1.539e+03  +5.054e+03  +4e+02  2e+00  4e+01  9e+01  4e-01  0.1890  8e-01   2  1  2 |  0  0
 3  +1.292e+03  +3.488e+03  +3e+02  1e+00  2e+01  6e+01  3e-01  0.3872  2e-01   2  1  1 |  0  0
 4  +2.766e+03  +5.621e+03  +9e+01  2e+00  5e+01  2e+02  9e-02  0.9890  3e-01   2  1  1 |  0  0
 5  +1.387e+03  +2.353e+03  +7e+01  6e-01  1e+01  1e+00  7e-02  0.4830  4e-01   2  1  2 |  0  0
 6  +7.975e+02  +1.105e+03  +4e+01  2e-01  1e+00  5e+00  4e-02  0.5232  3e-01   2  2  2 |  0  0
 7  +1.649e+03  +2.043e+03  +3e+01  2e-01  1e+00  1e+01  3e-02  0.4898  5e-01   2  2  2 |  0  0
 8  +2.327e+03  +2.547e+03  +2e+01  1e-01  4e-

In [181]:
print(W.value)
print(b.value)

[[ -8.41115659]
 [-25.6793397 ]
 [ 23.1111561 ]
 [  6.19060486]
 [-38.17264422]
 [ 19.0705384 ]
 [  7.30296488]
 [ -9.47574343]
 [-11.61375605]
 [-17.39065898]
 [ 11.95301097]
 [ 23.03928693]
 [-49.83705507]
 [ 62.24284997]
 [-52.2541133 ]
 [ 18.42088092]
 [  6.01150469]
 [ -4.55755844]
 [ -5.69525223]
 [  1.84246607]
 [  2.35545237]
 [ -3.45433686]
 [ -1.81663261]
 [ 14.72096588]
 [-68.06072162]
 [ -1.24896327]
 [-29.24493448]
 [ 19.49549445]
 [  5.18246701]
 [ -3.49098151]
 [ 59.00988784]
 [  3.29845913]
 [  7.94573781]
 [ 33.27565587]]
0.242384032314


In [182]:
W_val = W.value
b_val = b.value

### Support Vectors

In [187]:
num_wrong = 0
for i in range(1000):
    value = y[i]*(np.dot(W_val.T, X2[i]) + b_val)
    if value < 0:
        print(i, value)
        num_wrong += 1
    elif value < 1:
        print(i, value)

119 [[ 1.]]
151 [[ 1.]]
161 [[ 1.]]
187 [[ 1.]]
188 [[ 1.]]
219 [[ 1.]]
239 [[ 1.]]
282 [[ 1.]]
309 [[ 1.]]
516 [[ 1.]]
549 [[ 1.]]
551 [[ 0.99999999]]
645 [[ 1.]]
691 [[ 1.]]
840 [[ 1.]]
856 [[ 1.]]
911 [[ 1.]]
925 [[ 1.]]


### Margin value

In [188]:
np.linalg.norm(W.value)

157.18521923222443