In [2]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from cvxopt import solvers
from cvxopt import matrix

from scipy.spatial.distance import cdist

# RBF KERNEL:
def getRBFKernelMatrix(X1,X2,sigma):
    return np.exp(-cdist(X1, X2, 'sqeuclidean') / (2*sigma**2))

In [3]:
# Processed titanic dataset from Exercise 2
titanic_df = pd.read_csv('datasets/titanic_processed.csv')
X = titanic_df.drop('Survived', 1).values
y = titanic_df['Survived'].values
y[y==0] = -1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [8]:
sigma=.5
C=.001
n=X_train.shape[0]
I_n = np.eye(n)
X_dash = X_train * y_train.reshape(-1,1)
P=getRBFKernelMatrix(X_dash,X_dash,sigma)
q=np.full(n,-1)
G=np.vstack((I_n,-1*I_n))
h=np.hstack((np.full(n,C),np.zeros(n)))
A=y_train.reshape(1,-1)
b=np.zeros(1)

P,q,G,h,A,b = map(lambda x : matrix(x,tc="d"),(P,q,G,h,A,b))

a = solvers.qp(P, q, G, h, A, b)
a = np.asarray(a['x']).squeeze()

     pcost       dcost       gap    pres   dres
 0: -7.8006e+01 -2.9033e+01  3e+03  5e+01  4e-16
 1: -3.0075e+01 -7.6546e+00  2e+02  4e+00  5e-16
 2: -2.5655e+00 -1.5058e+00  1e+01  2e-01  1e-15
 3: -5.5347e-01 -1.2688e+00  9e-01  3e-03  1e-15
 4: -5.2935e-01 -5.6159e-01  3e-02  8e-05  9e-16
 5: -5.3103e-01 -5.3768e-01  7e-03  6e-06  3e-16
 6: -5.3223e-01 -5.3401e-01  2e-03  1e-06  3e-16
 7: -5.3267e-01 -5.3302e-01  4e-04  1e-07  3e-16
 8: -5.3276e-01 -5.3288e-01  1e-04  2e-08  3e-16
 9: -5.3279e-01 -5.3284e-01  5e-05  5e-09  3e-16
10: -5.3280e-01 -5.3283e-01  3e-05  1e-09  3e-16
11: -5.3280e-01 -5.3282e-01  2e-05  6e-10  3e-16
12: -5.3281e-01 -5.3281e-01  5e-06  9e-18  3e-16
13: -5.3281e-01 -5.3281e-01  2e-06  5e-18  3e-16
14: -5.3281e-01 -5.3281e-01  8e-07  5e-18  3e-16
15: -5.3281e-01 -5.3281e-01  3e-07  6e-18  3e-16
Optimal solution found.


In [9]:
idx = np.logical_and(a>=1e-10,  a<C)
Xy = X_train * y_train.reshape(-1,1)
w0 = np.mean((y_train - getRBFKernelMatrix(Xy,X_train,sigma) @ a)[idx] )
y_pred = np.sign(getRBFKernelMatrix(X_test,Xy,sigma).dot(a)+w0)
print(f'Accuracy: {accuracy_score(y_test, y_pred)*100:.2f}')

Accuracy: 58.99


$$\newcommand {\norm}[1] {\lVert{#1}\rVert}$$
$$
\min \frac{1}{2}\norm{w}_2^2+C\sum_{i=1}^{n} \xi_i\\
s.t. y_i(w^Tx_i−b)≥1−\xi i,\xi i≥0,∀i
$$


$$\norm{W}$$