In [131]:
from cvxopt import matrix
from cvxopt import solvers
def SVM_primal(input,c):
    # Load input
    row_length,col_length = input.shape
    row_training  = row_length

    # Initialize training
    train_x = input[:,0:col_length-1]
    train_x = np.concatenate((train_x,np.ones((row_training,1))),axis = 1)#Add ones column as bias
    train_y = input[:,col_length-1:col_length]
    feature_num = train_x.shape[1] #11

    P = np.eye(feature_num-1)
    P = np.concatenate((P,np.zeros((feature_num-1,row_training+1))),axis = 1)
    P = np.concatenate((P,np.zeros((row_training+1,row_training+feature_num))),axis = 0) #347 x 347

    q = c * np.concatenate((np.zeros((1,feature_num)),train_y.T),axis=1)

    h = np.concatenate((-np.ones((row_training,1)),np.zeros((row_training,1))),axis=0)

    x = np.concatenate((train_x,np.zeros((row_training,feature_num))),axis=0) #336 x 347
    eye = np.eye(row_training) # 336 x 336
    eye = np.vstack((eye,eye)) #Add ones column as bias -> already added above
    x = np.concatenate((x,eye),axis=1) #672 x 347
    y = np.concatenate((train_y,train_y),axis=0)
    G = -y * x

    # Quadprop for Primal

    # Define QP parameters (directly)
    P_ = matrix(P)
    q_ = matrix(q.T)
    G_ = matrix(G)
    h_ = matrix(h)

    # Construct the QP, invoke solver
    sol = solvers.qp(P_,q_,G_,h_)

    # Get weight and b
    train_w = np.array(sol['x'])
    train_w = train_w[0:feature_num]
    
    return train_w

def load_data(filename):
    input = np.loadtxt(filename, delimiter=',')
    row_length,col_length = input.shape

    # Initialize training
    train_x = input[:,0:col_length-1]
    train_y = input[:,col_length-1:col_length]
    train_y[train_y == 0] = -1
    return [train_x,train_y]

def testing(filename,w,V_r):
    data = load_data(filename)
    train_x = data[0]
    train_y = data[1]
    
    row_length,col_length = train_x.shape

    
    #Projection of X along given eigenvectors
    X_r = np.dot(train_x,V_r) # Projection of X along eigen vectors
    Y = np.reshape(train_y,(train_y.shape[0],1))
    
#     SVM_data = np.concatenate((X_r,Y),axis=1) #672 x 347
    num_feature = X_r.shape[1]
    f = np.dot(X_r,w[0:num_feature]) + w[num_feature]
    f[f < 0] = -1
    f[f > 0] =  1 
    accuracy = 100*np.count_nonzero(Y*f+1)/row_length
    return accuracy

In [164]:
def PCA(filename,k):
    # Read data
    data = load_data(filename)
    X = data[0]
    Y = data[1]
    # Define input and output
    num_data = X.shape[0]
    num_feature = X.shape[1]
    # Construct matrix W - sample covariance matrix 
    X_mean = np.mean(X,axis=0)
    X_mean = np.reshape(X_mean,(num_feature,1))
    W = X - np.dot(np.ones((num_data,1)),X_mean.T)
    # Find SVD of W - covariance matrix
    U, s, V = np.linalg.svd(W, full_matrices=True)
    # eigen vectors and eigen value of covariance matrix 
    eigen_vec = U
    # s = np.reshape(s,(s.shape[0],1))
    # eigen_value = np.dot(s,s.T)
    eigen_value = s * s
    # Top k eigen value
    print("Top ",k," eigen values: ",eigen_value[:k])
    V_r = V[:,:k]
    return V_r

In [180]:
# Training

# Vr
train_file = "spam_train.data"
valid_file = "spam_validation.data"
k = 3
V_r = PCA(train_file,k)

# Prepare data
data = load_data(train_file)
X = data[0]
Y = data[1]
X_r = np.dot(X,V_r) # Projection of X along eigen vectors
Y = np.reshape(Y,(Y.shape[0],1))
SVM_data = np.concatenate((X_r,Y),axis=1) #672 x 347

# Feed data to SVM
for c in [1,10,100,1000]:
    train_w = SVM_primal(SVM_data,c)

    # Testing on validation set
    V_r = PCA(valid_file,k)
    print("c =",c," Accuracy on validation test =",testing(valid_file,train_w,V_r))

Top  3  eigen values:  [  1.52029434e+09   1.28884476e+08   3.44289156e+06]
     pcost       dcost       gap    pres   dres
 0: -6.7519e+02  2.0280e+04  3e+05  1e+01  5e+02
 1:  1.4814e+04 -2.7097e+04  5e+04  2e+00  9e+01
 2:  1.1306e+04 -2.1590e+03  1e+04  2e-01  1e+01
 3:  3.1309e+03  1.1590e+03  2e+03  3e-02  1e+00
 4:  2.4654e+03  1.4305e+03  1e+03  1e-02  7e-01
 5:  2.4413e+03  1.5393e+03  9e+02  6e-03  3e-01
 6:  1.9817e+03  1.7189e+03  3e+02  1e-03  6e-02
 7:  1.8859e+03  1.7731e+03  1e+02  4e-04  2e-02
 8:  1.8577e+03  1.7899e+03  7e+01  2e-04  1e-02
 9:  1.8355e+03  1.8042e+03  3e+01  8e-05  4e-03
10:  1.8291e+03  1.8083e+03  2e+01  5e-05  3e-03
11:  1.8222e+03  1.8130e+03  9e+00  2e-05  1e-03
12:  1.8187e+03  1.8152e+03  4e+00  5e-16  2e-13
13:  1.8172e+03  1.8167e+03  5e-01  6e-16  6e-14
14:  1.8170e+03  1.8169e+03  2e-01  5e-16  7e-12
15:  1.8170e+03  1.8169e+03  2e-02  6e-16  2e-12
16:  1.8169e+03  1.8169e+03  4e-04  5e-16  5e-12
Optimal solution found.
Top  3  eigen value

In [133]:
# Training
import numpy as np
# Read data
filename = "spam_train.data"
data = np.loadtxt(filename,delimiter=',')
# Define input and output
num_data = data.shape[0]
num_feature = data.shape[1]-1
X = data[:,0:num_feature]
Y = data[:,num_feature]
# Construct matrix W - sample covariance matrix 
X_mean = np.mean(X,axis=0)
X_mean = np.reshape(X_mean,(57,1))
W = X - np.dot(np.ones((num_data,1)),X_mean.T)
# Find SVD of W - covariance matrix
U, s, V = np.linalg.svd(W, full_matrices=True)
# eigen vectors and eigen value of covariance matrix 
eigen_vec = U
# s = np.reshape(s,(s.shape[0],1))
# eigen_value = np.dot(s,s.T)
eigen_value = s * s
# Top k eigen value
k = 1
print("Top ",k," eigen values: ",eigen_value[:k])

Top  1  eigen values:  [  1.52029434e+09]


In [176]:
# Testing on Validation set

# File Vr
filename = "spam_validation.data"
k = 6
V_r = PCA(filename,k)

# Prepare data
# data = load_data(filename)
# X = data[0]
# Y = data[1]
# X_r = np.dot(X,V_r) # Projection of X along eigen vectors
# Y = np.reshape(Y,(Y.shape[0],1))
# SVM_data = np.concatenate((X_r,Y),axis=1) #672 x 347

testing(filename,train_w,V_r)


Top  6  eigen values:  [  1.28577529e+08   3.92867543e+05   2.86376741e+04   6.70054786e+03
   4.29256011e+03   1.79410591e+03]


0.75

In [141]:
data = load_data("spam_train.data")
X = data[0]
Y = data[1]
# Define input and output
num_data = X.shape[0]
num_feature = X.shape[1]
num_feature

57

In [134]:
# PCA and SVM
# k eigen vector
V_r = V[:,0:k]
X_r = np.dot(X,V_r) # Projection of X along eigen vectors
Y = np.reshape(Y,(Y.shape[0],1))
Y[Y==0] = -1
SVM_data = np.concatenate((X_r,Y),axis=1) #672 x 347

In [135]:
c= 100
solv = SVM_primal(SVM_data,c)
# print("c =",c," Accuracy on validation test =",testing(SVM_data,solv[0],solv[1]))

     pcost       dcost       gap    pres   dres
 0: -1.5912e+07  4.8937e+07  3e+08  4e+02  1e+02
 1:  3.5692e+07 -1.9158e+07  7e+07  6e+01  2e+01
 2:  7.7086e+06  1.3826e+05  8e+06  3e-14  1e-13
 3:  3.6304e+05  1.4273e+05  2e+05  9e-15  2e-13
 4:  3.1430e+05  2.0362e+05  1e+05  4e-15  1e-13
 5:  2.5683e+05  2.3616e+05  2e+04  4e-16  9e-14
 6:  2.3765e+05  2.3737e+05  3e+02  3e-16  2e-13
 7:  2.3740e+05  2.3740e+05  3e+00  3e-16  2e-13
 8:  2.3740e+05  2.3740e+05  3e-02  3e-16  1e-13
Optimal solution found.


In [138]:
# test on validation 
filename = "spam_train.data"
testing(filename,solv,V_r)

60.43333333333333

In [143]:
solv

array([[  5.14359971e-08],
       [  9.99999777e-01]])

In [168]:
# test on validation 
filename = "spam_validation.data"
V_r = PCA(filename,2)
# V_r
testing(filename,solv,V_r)

Top  2  eigen values:  [  1.28577529e+08   3.92867543e+05]


IndexError: index 2 is out of bounds for axis 0 with size 2

In [167]:
V_r.shape

(57, 2)

In [88]:
# test on validation 
filename = "spam_validation.data"
data = np.loadtxt(filename,delimiter=',')
# Define input and output
num_data = data.shape[0]
num_feature = data.shape[1]-1
X = data[:,0:num_feature]
Y = data[:,num_feature]

#Projection of X along given eigenvectors
X_r = np.dot(X,V_r) # Projection of X along eigen vectors
Y = np.reshape(Y,(Y.shape[0],1))
Y[Y==0] = -1
SVM_data = np.concatenate((X_r,Y),axis=1) #672 x 347

In [None]:
testing(SVM_data,solv[0],solv[1],V)