In [1]:
import numpy as np
import cvxopt as copt

In [2]:
from google.colab import drive

In [3]:
#Data Loading and Pre-Processing
data = np.loadtxt('/content/drive/MyDrive/PRNN/Assignment_2/binary_class/multi_class_classification_data_group_5_train.txt', delimiter='\t',skiprows=1)
print(data.shape)

train_ratio,test_ratio = 0.7,0.3
np.random.shuffle(data)

num_samples = len(data)
num_train,num_test = int(train_ratio * num_samples),int(test_ratio * num_samples)

(14000, 11)


Data Splitting

In [None]:
#Data Splitting
train_data,test_data = data[:num_train],data[num_train:]

print("Training set size:", len(train_data))
print("Test set size:", len(test_data))

X_train = train_data[:, :-1]  # Features
y_train = train_data[:, -1]   # Labels
X_test = test_data[:, :-1]  # Features
y_test = test_data[:, -1]   # Labels
float_array = np.array(y_test)
y_test = float_array.astype(int)
num_classes = 2

y_train = np.array(y_train * 2 - 1)
y_test = np.array(y_test * 2 - 1)

Training set size: 11200
Test set size: 2800


Define Kernels

In [None]:
def linear_kernel(X1, X2):
    return np.dot(X1, X2.T)

def polynomial_kernel(X1, X2, degree=3):
    return (np.dot(X1, X2.T) + 1) ** degree

def rbf_kernel(X1, X2, gamma=1.0):
    n1 = np.shape(X1)[0]
    n2 = np.shape(X2)[0]
    K = np.zeros((n1, n2))
    for i in range(n1):
        for j in range(n2):
            K[i,j] = np.exp(-gamma * np.linalg.norm(X1[i] - X2[j])**2)
    return K

Define Optimization Function (without slack)

In [None]:
def optimize_dual(X, y, kernel):
    n_samples, n_features = X.shape

    # Compute the Gram matrix
    K = kernel(X, X)

    # Define the quadratic and linear terms of the QP problem
    P = copt.matrix(np.outer(y, y) * K)
    q = copt.matrix(-np.ones(n_samples))
    G = copt.matrix(-np.eye(n_samples))  # No slack variables
    h = copt.matrix(np.zeros(n_samples)) # No slack variables
    A = copt.matrix(y.astype(float), (1, n_samples))
    b = copt.matrix(0.0)

    # Solve the QP problem
    solution = copt.solvers.qp(P, q, G, h, A, b)

    # Extract lagrange multipliers
    alpha = np.array(solution['x'])
    return alpha


Train Function

In [None]:
def train_svm(X_train, y_train, kernel):
    alpha = optimize_dual(X_train, y_train, kernel)

    # Compute support vectors
    sv_idx = alpha > 1e-5  # Select support vectors with non-zero lagrange multipliers
    sv_idx = sv_idx.flatten()
    support_vectors = X_train[sv_idx]
    support_vector_labels = y_train[sv_idx]
    alpha_sv = alpha[sv_idx]

    # Compute bias term
    kernel_matrix = kernel(support_vectors, support_vectors)
    alpha_sv = alpha_sv.reshape(-1,)
    product = (support_vector_labels * alpha_sv)
    decision_values = np.dot(kernel_matrix, product)
    bias = np.mean(support_vector_labels - decision_values)
    return support_vectors, support_vector_labels, alpha_sv, bias

Predict Function

In [None]:
def predict_svm(X_test, support_vectors, support_vector_labels, alpha_sv, bias, kernel):
    decision_function = np.dot(kernel(X_test, support_vectors), (support_vector_labels * alpha_sv)) + bias
    return np.sign(decision_function)

Grid Search

In [None]:
def grid_search(X_train, y_train, X_test, y_test):
    best_accuracy = -1<<31  #minimum value in 32-bit signed integer
    best_kernel = None

    # Define kernels to search
    # kernels = [linear_kernel, polynomial_kernel, rbf_kernel]
    kernels = [rbf_kernel]
    for kernel in kernels:
        # Train SVM model
        support_vectors, support_vector_labels, alpha_sv, bias = train_svm(X_train, y_train, kernel)

        # Predict using trained model
        y_pred = predict_svm(X_test, support_vectors, support_vector_labels, alpha_sv, bias, kernel)

        # Evaluate accuracy
        accuracy = np.mean(y_pred == y_test)

        # Check if this model is the best so far
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            if kernel == linear_kernel:
                best_kernel = 'linear'
            elif kernel == polynomial_kernel:
                best_kernel = 'polynomial'
            else:
                best_kernel = 'rbf'

    return {'kernel': best_kernel, 'accuracy': best_accuracy}

Perform Grid Search

In [None]:
best_model = grid_search(X_train, y_train, X_test, y_test)

# Print best hyperparameters and performance metrics
print("Best Kernel:", best_model['kernel'])
print("Best Accuracy:", best_model['accuracy'])

     pcost       dcost       gap    pres   dres
 0: -3.5263e+03 -1.3631e+04  5e+04  1e+02  3e+00
 1: -5.6272e+03 -2.1105e+04  3e+04  7e+01  1e+00
 2: -5.8875e+03 -1.8169e+04  2e+04  3e+01  7e-01
 3: -6.1747e+03 -1.1186e+04  5e+03  3e+00  5e-02
 4: -6.8821e+03 -8.1603e+03  1e+03  5e-01  1e-02
 5: -7.0627e+03 -7.3655e+03  3e+02  8e-02  2e-03
 6: -7.1129e+03 -7.1607e+03  5e+01  8e-03  2e-04
 7: -7.1227e+03 -7.1272e+03  5e+00  6e-04  1e-05
 8: -7.1238e+03 -7.1240e+03  2e-01  2e-05  3e-07
 9: -7.1239e+03 -7.1239e+03  6e-03  3e-07  6e-09
10: -7.1239e+03 -7.1239e+03  2e-04  4e-09  8e-11
Optimal solution found.
Best Kernel: rbf
Best Accuracy: 0.7364285714285714
