In [26]:
import cvxopt
import numpy as np
import pandas as pd

In [27]:
def load_data(file_path):
    df = pd.read_csv(file_path, header=None)
    input_data = np.array(df[df.columns[1:]])
    temp = np.ones((input_data.shape[0], input_data.shape[1] + 1))
    temp[:, :-1] = input_data
    input_data = temp
    output_data = df[df.columns[0]]
    temp = []
    for x in output_data:
        if x == 0:
            temp.append(-1)
        else:
            temp.append(1)
    output_data = np.transpose(np.array(temp))
    return input_data, output_data

In [32]:
park_train_in, park_train_out = load_data(r"D:\sklearn\park_train.csv")
park_valid_in, park_valid_out = load_data("D:\sklearn\park_validation.csv")
park_test_in, park_test_out = load_data("D:\sklearn\park_test.csv")

In [33]:
def gaussian_kernel(x, y, sigma):
    z = x - y
    l2_norm = np.linalg.norm(z, ord=2)
    gaussian = np.exp((-1*l2_norm)/(2*(sigma)))
    return gaussian

In [34]:
def get_accuracy(input_data, output_data, train_input_data, train_output_data, lambdas, sigma):
    dataset_size = len(output_data)
    margin = np.zeros((dataset_size, 1))
    for k in range(0, dataset_size):
        for l in range(0, train_input_data.shape[0]):
            margin[k] = margin[k] + (train_output_data[l]*lambdas[l]*gaussian_kernel(input_data[k], train_input_data[l], sigma))
        margin[k] = output_data[k]*margin[k]
    acc = 100 - ((len(np.where(margin <= 0)[0])/len(input_data)) * 100)
    print(acc)
    return acc

In [35]:
def dual_svm_slack(i_data, o_data, c, sigma):
    if i_data.shape[0] != o_data.shape[0]:
        raise ValueError("Input and Output data size Mismatch")
    dataset_size = i_data.shape[0]
    input_size = i_data.shape[1]
    c_matrix = np.zeros((2*dataset_size, dataset_size))
    h = np.zeros((dataset_size * 2, 1))
    for j in range(0, dataset_size):
        c_matrix[j, j] = -1
        c_matrix[j+dataset_size, j] = 1
        h[j] = 0
        h[j+dataset_size] = c
    p = np.zeros((dataset_size, dataset_size))
    for i in range(0, dataset_size):
        for j in range(0, dataset_size):
            p[i][j] = o_data[i]*o_data[j]*gaussian_kernel(i_data[i], i_data[j], sigma)
    q = -1 * np.ones((dataset_size, 1))
    cvxopt.solvers.options['maxiters'] = 1000
    p = cvxopt.matrix(p, tc='d')
    q = cvxopt.matrix(q, tc='d')
    g = cvxopt.matrix(c_matrix, tc='d')
    h = cvxopt.matrix(h, tc='d')
    sol = cvxopt.solvers.qp(p, q, g, h)
    lambdas = sol['x']
    return list(lambdas)

In [36]:
input_size = park_train_in.shape[1]
accuracy_matrix = np.zeros((9, 5, 3))
for i in range(0, 9):
    for j in range(0, 5):
        c = np.power(10, i)
        sigma = 10 ** (j-1)
        lambda_values = dual_svm_slack(park_train_in, park_train_out, c, sigma)
        train_accuracy = get_accuracy(park_train_in, park_train_out, park_train_in, park_train_out, lambda_values, sigma)
        valid_accuracy = get_accuracy(park_valid_in, park_valid_out, park_train_in, park_train_out, lambda_values, sigma)
        test_accuracy = get_accuracy(park_test_in, park_test_out, park_train_in, park_train_out, lambda_values, sigma)
        accuracy_matrix[i, j, :] = [train_accuracy, valid_accuracy, test_accuracy]

print(accuracy_matrix)

     pcost       dcost       gap    pres   dres
 0: -3.4667e+01 -1.2133e+02  9e+01  0e+00  3e-16
 1: -3.6600e+01 -4.2180e+01  6e+00  1e-16  2e-16
 2: -3.8640e+01 -3.9438e+01  8e-01  1e-16  7e-17
 3: -3.8948e+01 -3.9058e+01  1e-01  1e-16  5e-17
 4: -3.8992e+01 -3.9008e+01  2e-02  2e-16  4e-17
 5: -3.8999e+01 -3.9001e+01  2e-03  2e-16  5e-17
 6: -3.9000e+01 -3.9000e+01  3e-04  3e-16  4e-17
 7: -3.9000e+01 -3.9000e+01  5e-05  2e-16  5e-17
 8: -3.9000e+01 -3.9000e+01  7e-06  1e-16  4e-17
Optimal solution found.
100.0
81.0344827586207
81.35593220338983
     pcost       dcost       gap    pres   dres
 0: -3.4138e+01 -1.2454e+02  9e+01  0e+00  4e-16
 1: -3.5917e+01 -4.1928e+01  6e+00  1e-16  2e-16
 2: -3.7839e+01 -3.8620e+01  8e-01  2e-16  1e-16
 3: -3.8109e+01 -3.8205e+01  1e-01  2e-16  9e-17
 4: -3.8141e+01 -3.8151e+01  1e-02  2e-16  9e-17
 5: -3.8144e+01 -3.8146e+01  1e-03  2e-16  8e-17
 6: -3.8145e+01 -3.8145e+01  1e-04  2e-16  9e-17
 7: -3.8145e+01 -3.8145e+01  2e-05  2e-16  7e-17
Optima