In [1]:
# Question 1

# Setup
import numpy as np

X = np.array([
    [-1, -1,],
    [-1,  1,],
    [ 1, -1,],
    [ 1,  1,],
]).reshape(4, 2, 1)

f = np.array([-1, 1, 1, -1]).reshape(4, 1) / 8

print(f'X\n{X}')
print()
print(f'f\n{f}')

X
[[[-1]
  [-1]]

 [[-1]
  [ 1]]

 [[ 1]
  [-1]]

 [[ 1]
  [ 1]]]

f
[[-0.125]
 [ 0.125]
 [ 0.125]
 [-0.125]]


In [2]:
# Question 1, part (a)
sqrt_2 = 2 ** 0.5

def PHI(x):
    x = x.flatten()
    return np.array([
        [1, x[0] ** 2, x[1] ** 2,
         sqrt_2 * x[0], sqrt_2 * x[1], sqrt_2 * np.prod(x)]
    ])

known_PHI = np.array([
    [1, 1, 1, -sqrt_2, -sqrt_2,  sqrt_2,],
    [1, 1, 1, -sqrt_2,  sqrt_2, -sqrt_2,],
    [1, 1, 1,  sqrt_2, -sqrt_2, -sqrt_2,],
    [1, 1, 1,  sqrt_2,  sqrt_2,  sqrt_2,],
])

phi = np.empty_like(known_PHI)

for idx, x in enumerate(X):
    phi[idx] = PHI(x.T)

np.testing.assert_array_almost_equal(known_PHI, phi) # floating point errors
print('Verified phi')

known_K = np.diag([9,] * 4)
known_K[known_K == 0] = 1

K_ = phi @ phi.T
np.testing.assert_array_almost_equal(known_K, K_) # floating point errors
print('Verified K')

Verified phi
Verified K


In [3]:
# Question 1, part(b)
def K(x, y):
    return (1 + np.dot(x.T, y)) ** 2

K_2 = np.empty_like(known_K)

for i, x in enumerate(X):
    for j, y in enumerate(X):
        K_2[i][j] = K(x, y)

np.testing.assert_array_almost_equal(known_K, K_2) # floating point errors
print('Verified K calculated directly')

Verified K calculated directly


In [4]:
# Question 1, part (c)
def _K(X, y):
    result = np.empty((1, len(X),))
    for i, x in enumerate(X):
        result[0][i] = K(x, y)
    return result

def g(y):
    return np.dot(f.T, _K(X, y).T)

y = np.array([[0.7, -0.2]]).T
print(f'g(y) = {g(y)[0][0]}')

g(y) = 0.14


In [5]:
# Question 2

filename = 'wine.csv'
data = np.genfromtxt(filename, dtype='f4', delimiter=',', skip_header=1)
X = data[:,:-1]
y = data[:,-1]

In [6]:
# Partition the data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    train_size=0.7, 
    shuffle=True,
    stratify=y
)

In [7]:
# Preprocess the data
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Setup

from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [9]:
# Question 2, part (a)

clf_a = SVC(kernel='linear').fit(X_train,y_train)

print('Linear Kernel')
print(confusion_matrix(y_test, clf_a.predict(X_test)))
print(classification_report(y_test, clf_a.predict(X_test),zero_division=0))

Linear Kernel
[[18  0  0]
 [ 0 21  1]
 [ 0  0 14]]
              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00        18
         2.0       1.00      0.95      0.98        22
         3.0       0.93      1.00      0.97        14

    accuracy                           0.98        54
   macro avg       0.98      0.98      0.98        54
weighted avg       0.98      0.98      0.98        54



In [10]:
# Question 2, part (b)

clf_b = SVC(kernel='poly').fit(X_train,y_train)

print('Polynomial Kernel')
print(confusion_matrix(y_test, clf_b.predict(X_test)))
print(classification_report(y_test, clf_b.predict(X_test),zero_division=0))

Polynomial Kernel
[[15  3  0]
 [ 0 22  0]
 [ 0  0 14]]
              precision    recall  f1-score   support

         1.0       1.00      0.83      0.91        18
         2.0       0.88      1.00      0.94        22
         3.0       1.00      1.00      1.00        14

    accuracy                           0.94        54
   macro avg       0.96      0.94      0.95        54
weighted avg       0.95      0.94      0.94        54



In [11]:
# Question 2, part (c)

clf_c = SVC(kernel='rbf').fit(X_train,y_train)

print('Radial Basis (Gaussian) Kernel')
print(confusion_matrix(y_test, clf_c.predict(X_test)))
print(classification_report(y_test, clf_c.predict(X_test),zero_division=0))

Radial Basis (Gaussian) Kernel
[[18  0  0]
 [ 0 21  1]
 [ 0  0 14]]
              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00        18
         2.0       1.00      0.95      0.98        22
         3.0       0.93      1.00      0.97        14

    accuracy                           0.98        54
   macro avg       0.98      0.98      0.98        54
weighted avg       0.98      0.98      0.98        54

