In [None]:
# import libraries
import pandas as pd
import numpy as np
import cvxpy as cp
from sklearn.preprocessing import StandardScaler

In [None]:
# load datasets
train_data = pd.read_csv("train.csv") # load training data
test_data = pd.read_csv("test.csv") # load testing data

In [None]:
# separate features (X) and labels (y) for training and test datasets
X_train = train_data.iloc[:4000, 1:].values
label_train = train_data.iloc[:4000, 0].values
X_val = train_data.iloc[4000:, 1:].values
label_val = train_data.iloc[4000:, 0].values
X_test = test_data.iloc[:, 1:].values
label_test = test_data.iloc[:, 0].values

In [None]:
# scale the data
scaler = StandardScaler()
data_train = scaler.fit_transform(X_train)
data_val = scaler.transform(X_val)
data_test = scaler.transform(X_test)

Question 2:

In [None]:
# training algorithm for soft-margin SVM from its primal form
def svm_train_primal(data_train, label_train, regularisation_para_C):
    N, num_features = data_train.shape

    # optimization variables
    w = cp.Variable(num_features) # weight vector
    b = cp.Variable() # bias term
    xi = cp.Variable(N) # slack variable for soft-margin

    # objective function to minimise
    hinge_loss = cp.sum(cp.maximum(0, 1 - cp.multiply(label_train, data_train @ w + b)))
    regularisation_term = 0.5 * cp.norm(w, 'fro') ** 2
    soft_margin_term = regularisation_para_C * cp.sum(xi)
    objective = cp.Minimize(hinge_loss + regularisation_term + soft_margin_term)

    # constraints
    constraints = [xi >= 0, xi >= 1 - cp.multiply(label_train, data_train @ w + b)]
    
    # create and solve optimisation problem
    prob = cp.Problem(objective, constraints)
    prob.solve()

    # extract solutions (w,b) as a dictionary
    svm_model = {'w': w.value, 'b': b.value}
    
    return svm_model

In [None]:
# prediction and accuracy calculation
def svm_predict_primal(data_test, label_test, svm_model):
    w = svm_model['w']
    b = svm_model['b']

    # make prediction
    prediction = np.sign(data_test @ w + b)

    # calculate accuracy
    accuracy = np.mean(prediction == label_test)

    return accuracy

In [None]:
regularisation_para_C = 100

In [None]:
# train SVM model and obtain model parameters
svm_model = svm_train_primal(data_train, label_train, regularisation_para_C)
test_accuracy = svm_predict_primal(data_test, label_test, svm_model)

In [None]:
# report solution of b and sum of all dimensions of w
b = svm_model['b']
w_sum = np.sum(svm_model['w'])
print("Solution of b:", b)
print("Sum of all dimensions of w:", w_sum)

In [None]:
# report test accuracy
print("Test accuracy:", test_accuracy)

Question 3:

In [None]:
def svm_train_dual(data_train, label_train, regularisation_para_C):
    N, num_features = data_train.shape

    # optimisation variable
    a = cp.Variable(N)

    # objective to maximise
    objective = cp.Maximize(cp.sum(a) - 0.5 * cp.norm(cp.multiply(label_train, a).T @ data_train, 'fro')**2)

    # constraints
    constraints = [a >= 0, a <= regularisation_para_C / N, cp.sum(cp.multiply(label_train, a)) == 0]

    # create and solve optimisation problem
    prob = cp.Problem(objective, constraints)
    prob.solve()

    # add optimal a to the dictionary
    svm_model = {'a': a.value}

    return svm_model

In [None]:
# train SVM model and obtain optimal alpha
svm_model = svm_train_dual(data_train, label_train, regularisation_para_C)

In [None]:
# report sum of all dimensions of optimal alpha
a_sum = np.sum(svm_model['a'])
print("Sum of all dimensions of optimal a:", a_sum)

Question 4:

In [None]:
def primal_solution(data_train, label_train, regularisation_para_C, alpha):
    # find indices of support vectors with 0 < alpha < C
    indices = np.where((alpha > 0) & (alpha < regularisation_para_C))[0]

    # calculate w* using formula: w* = Σ(a_i * y_i * x_i) for all support vectors (a_i > 0)
    primal_w = np.sum(alpha[indices][:, np.newaxis] * label_train[indices][:, np.newaxis] * data_train[indices], axis=0)

    # get b* values for each support vector
    primal_b_values = label_train[indices] - np.dot(data_train[indices], primal_w)

    # average the b* values
    optimal_b = np.mean(primal_b_values)

    return primal_w, optimal_b

In [None]:
primal_w, primal_b = primal_solution(data_train, label_train, regularisation_para_C, svm_model['a'])

In [None]:
# report values of w* and b*
print("Primal solution w*:", primal_w)
print("Primal solution b*:", primal_b)

Question 5:

In [None]:
# find support vectors from primal problem solutions
def find_support_vectors(data, labels, primal_w, primal_b):
    sv = [] # initialise empty list to store support vectors
    
    for i in range(len(data)):
        # calculate margin condition for the i-th data point
        margin_condition = 1 - labels[i] * (np.dot(primal_w, data[i]) + primal_b)
        
        # check if margin condition is <= 0
        if margin_condition <= 0:
            # if condition is met, data point is a support vector
            sv.append(data[i])
    
    # convert list of support vectors to numpy array for convenience
    return np.array(sv)

In [None]:
support_vectors = find_support_vectors(data_train, label_train, primal_w, primal_b)
print("Support Vectors:", support_vectors)

Question 6:

In [None]:
def find_support_vectors_dual(data_train, label_train, regularisation_para_C, alpha):
    # initialise empty list to store the indices of support vectors
    indices = np.where((alpha > 0) & (alpha < regularisation_para_C))[0]

    # extract support vectors from training data based on computed alpha values
    support_vectors = data_train[indices]
    
    # extract labels corresponding to support vectors
    support_vector_labels = label_train[indices]
    
    return support_vectors, support_vector_labels

In [None]:
support_vectors, support_vector_labels = find_support_vectors_dual(data_train, label_train, regularisation_para_C, svm_model['a'])
print("Support Vectors:", support_vectors)
print("Support Vector Labels:", support_vector_labels)

Question 7:

In [None]:
# define range of C values to search within
C_values = [2 ** i for i in range(-10, 11)]

# initialise variables to store best C and its validation accuracy
best_C = None
best_validation_accuracy = 0.0

# iterate through each C value and evaluate on the validation set
for C in C_values:
    # train SVM model with current C
    svm_model = svm_train_primal(data_train, label_train, C)
    
    # test SVM model on validation set
    validation_accuracy = svm_predict_primal(data_val, label_val, svm_model)
    
    # check if current C resulted in higher validation accuracy
    if validation_accuracy > best_validation_accuracy:
        best_C = C
        best_validation_accuracy = validation_accuracy

In [None]:
# report the best C and its validation accuracy
print("Best C:", best_C)
print("Validation Accuracy with Best C:", best_validation_accuracy)

Question 8:

In [150]:
from sklearn.svm import SVC

# use best C value found during validation
best_svm_model = SVC(C=best_C, kernel='linear')

# train SVM model on the entire training dataset
best_svm_model.fit(data_train, label_train)

# test SVM model on the test dataset
test_accuracy_sklearn = best_svm_model.score(data_test, label_test)

# print test accuracy
print("Test Accuracy with Scikit-learn SVM:", test_accuracy_sklearn)

Test Accuracy with Scikit-learn SVM: 0.9659773182121414
