In [1]:
import pandas as pd
import numpy as np
from cvxopt import matrix, solvers
from cvxopt.modeling import variable
from sklearn.svm import SVC

In [2]:
data = pd.read_csv('2019EE10577.csv', header = None)

t1 = 0
t2 = 1
num_f = 25
train_ex = 480

bin_data = data[(data[25] == t1) | (data[25] == t2)].sample(frac=1)
X_bin = np.array(bin_data.loc[:,:num_f-1])
t_bin = np.array(bin_data.loc[:,25])
t_bin[t_bin == 0] = -1

X_bin_train = X_bin[:train_ex]
t_bin_train = t_bin[:train_ex]
X_bin_val = X_bin[train_ex:]
t_bin_val = t_bin[train_ex:]

In [3]:
ker = 'linear'
if ker is 'linear':
    C = 1
if ker is 'rbf':
    C = 10
    gamma = 0.001

n_samples, n_features = X_bin_train.shape
K = np.zeros((n_samples, n_samples))

for i in range(n_samples):
    for j in range(n_samples):
        if ker is 'linear':
            K[i,j] = np.dot(X_bin_train[i],np.transpose(X_bin_train[j]))
        if ker is 'rbf':
            K[i,j] = np.exp(-1*gamma*np.sum(np.square(X_bin_train[i]-X_bin_train[j])))

P = matrix(np.outer(t_bin_train,t_bin_train) * K)
q = matrix(np.ones(n_samples) * -1)
A = matrix(t_bin_train, (1,n_samples))
A = matrix(A,(1,n_samples),'d')
b = matrix(0.0)
G = matrix(np.vstack((np.diag(np.ones(n_samples) * -1), np.identity(n_samples))))
h = matrix(np.hstack((np.zeros(n_samples), np.ones(n_samples) * C)))
solution = solvers.qp(P, q, G, h, A, b)
a = np.ravel(solution['x'])
sv = a > 1e-5

     pcost       dcost       gap    pres   dres
 0: -2.7501e+01 -8.2379e+02  4e+03  2e+00  6e-14
 1: -1.5087e+01 -4.0611e+02  7e+02  3e-01  4e-14
 2: -3.9615e+00 -8.2329e+01  1e+02  5e-02  2e-14
 3: -1.8183e-01 -1.5192e+01  2e+01  7e-03  6e-15
 4:  1.2470e-02 -1.3521e+00  2e+00  3e-04  1e-15
 5: -1.3634e-01 -5.8279e-01  5e-01  9e-05  9e-16
 6: -2.1093e-01 -4.2805e-01  2e-01  1e-05  8e-16
 7: -2.5068e-01 -3.5404e-01  1e-01  2e-16  8e-16
 8: -2.7671e-01 -3.0951e-01  3e-02  2e-16  1e-15
 9: -2.8407e-01 -2.9746e-01  1e-02  2e-16  9e-16
10: -2.9008e-01 -2.9041e-01  3e-04  2e-16  1e-15
11: -2.9023e-01 -2.9023e-01  3e-06  2e-16  1e-15
12: -2.9023e-01 -2.9023e-01  3e-08  2e-16  1e-15
Optimal solution found.


In [4]:


lm = a[sv]
lm = lm.reshape(len(lm),1)
sv_t = t_bin_train[sv]
sv_t = sv_t.reshape(len(sv_t),1)
sv_x = X_bin_train[sv]



In [6]:
w = np.sum(lm*sv_t*sv_x,axis=0)
print('w =', w)

w = [-0.563549   -0.09544524 -0.03698974  0.04727053  0.15937897  0.07517939
  0.05214242 -0.09049166 -0.11843432  0.06621365 -0.02536556  0.07187129
  0.0207943   0.1463733   0.1977143  -0.0357065  -0.20133262 -0.11939814
  0.1604992   0.01375913  0.16633819  0.00768156  0.00918114  0.00416929
 -0.11485394]


In [7]:
b = np.mean(t_bin_train - np.dot(X_bin_train,w))
print('b =', b)

b = 0.3950632229517911


In [8]:
y_train = np.dot(X_bin_train,w) + b
y_val = np.dot(X_bin_val,w)+b
y_train[y_train<0] = -1
y_train[y_train>=0] = 1
y_val[y_val<0] = -1
y_val[y_val>=0] = 1
y_train = y_train.astype(int)
y_val = y_val.astype(int)

In [9]:


res1 = (y_train == t_bin_train)
res2 = (y_val == t_bin_val)
print('Training score =',(res1==True).sum()/len(res1))
print('Validation score =',(res2==True).sum()/len(res2))
print(str(len(sv_x))+' support vectors using CVXOPT:', np.where(sv==True)[0].tolist())



Training score = 1.0
Validation score = 1.0
15 support vectors using CVXOPT: [2, 93, 108, 127, 146, 172, 249, 294, 300, 317, 387, 406, 409, 462, 463]


In [10]:
if ker is 'linear':
    s = SVC(kernel='linear',C = 1)
if ker is 'rbf':
    s = SVC(kernel='rbf', C=10, gamma=0.001)
s.fit(X_bin_train,t_bin_train)
print('Training score using SVC =',s.score(X_bin_train,t_bin_train))
print('Validation score using SVC =',s.score(X_bin_val,t_bin_val))
print(str(len(s.support_))+' support vectors using SVC:', sorted(s.support_))

Training score using SVC = 1.0
Validation score using SVC = 1.0
15 support vectors using SVC: [2, 93, 108, 127, 146, 172, 249, 294, 300, 317, 387, 406, 409, 462, 463]
