# Machine Learning
Yorick Juffer - s1993623 \
Leon Koole - s4436563 \
... \
Alejandro Sánchez Roncero - s5279402

## Libraries

In [22]:

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix 

# new contribution
from sklearn import svm
from sklearn.model_selection import GridSearchCV

from sklearn.decomposition import PCA

# Loading data
This dataset contains 2000 grayscale images of handwritten digits, 200 from each class. The images are 15 × 16 
sized, making for n = 240 dimensional image vectors. The data are in the attached text file mfeat-pix.txt, 
one vector per row, sorted such that the first 200 rows are "0" digit examples, the next 200 are "1" digit examples 
etc. The grayscale encoding in mfeat-pix.txt is done by integer steps from 0 (white) to 6 (black). 

In [23]:
# Load lines from mfeat-pix.txt
features = open('mfeat-pix.txt').readlines()

# Create labels for each line, 0-9 for each 200 lines
labels = []
for i in range(10):
  for j in range(200):
    labels.append(i)
labels = np.array(labels)

# Convert each line to a numpy array
for i in range(len(features)):
  features[i] = np.array(features[i].split()).astype('float').reshape(16*15)
  # Normalize
  features[i] = features[i] / 6
features = np.array(features)

print('features: {}, labels: {}'.format(features.shape, labels.shape))

features: (2000, 240), labels: (2000,)


In [24]:
# Load augmented data
X_train_aug = np.load('X_train_augmented.npy')
X_train_aug = np.array([ff.reshape(240) for ff in X_train_aug])

X_test_aug = np.load('X_test_augmented.npy')
X_test_aug = np.array([ff.reshape(240) for ff in X_test_aug])

y_train_aug = np.array(np.load('y_train_augmented.npy'))
y_test_aug = np.array(np.load('y_test_augmented.npy'))
y_train_aug.reshape(y_train_aug.shape[0]); y_test_aug.reshape(y_test_aug.shape[0]);

## Split

In [26]:
split_train_test = 0.5

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=split_train_test, random_state=40, stratify=labels)
y_train.reshape(y_train.shape[0]); y_test.reshape(y_test.shape[0]);

# SVM Model
Training of SVC and NuSVC.
- Both use one-vs-one approach
- C: by default 1 is fine. Less C means more regularization (more smooth surface), only useful for noisy observations
- Data has been scaled since they're not scale invariant
- kernel options: polynomial, rbf, sigmoid

In [28]:
# Grid search evaluation
# scores = ["precision"]
gamma_search = [1e-2, 2.5e-2, 5e-2, 7.5e-2, 1e-1, 5e-1]
C_search = [1e-1, 5e-1, 1, 1.5, 2, 3, 4, 5, 10]
r_search = [0, 0.5, 1, 5, 10]
degree_search = [2, 3]

pca = PCA(n_components = 50)
pca.fit(X_train)
X_train_reduced = pca.transform(X_train)
X_test_reduced = pca.transform(X_test)

pca_aug = PCA(n_components = 50)
pca_aug.fit(X_train_aug)
X_train_aug_reduced = pca.transform(X_train_aug)
X_test_aug_reduced = pca.transform(X_test_aug)

param_grid = [
    {"kernel": ["rbf"], "gamma": gamma_search, "C": C_search},
    {"kernel": ["poly"], "degree": degree_search, "coef0": r_search, "gamma": gamma_search, "C": C_search},
    {"kernel": ["sigmoid"], "coef0": r_search, "gamma": gamma_search, "C": C_search}
]

for param_dict in param_grid:
    print("Optimizing kernel: {}".format(param_dict["kernel"]))
    
    grid_search = GridSearchCV(
        svm.SVC(), param_dict, 
        verbose=3,
        return_train_score=True,
        n_jobs=4
    )
    
    grid_search.fit(X_train_reduced, y_train)
    best_score_on_test = grid_search.score(X_test_reduced, y_test)

    print("params: {} with mean test score: {}".format(grid_search.best_params_, grid_search.best_score_))

    # grid_search_df = pd.DataFrame(grid_search.cv_results_)
    # grid_search_df.to_csv("SVM/grid_search.csv")

Optimizing kernel: ['rbf']
Fitting 5 folds for each of 54 candidates, totalling 270 fits
params: {'C': 1.5, 'gamma': 0.05, 'kernel': 'rbf'} with mean test score: 0.9790000000000001
Optimizing kernel: ['poly']
Fitting 5 folds for each of 540 candidates, totalling 2700 fits
params: {'C': 0.1, 'coef0': 5, 'degree': 2, 'gamma': 0.5, 'kernel': 'poly'} with mean test score: 0.9829999999999999
Optimizing kernel: ['sigmoid']
Fitting 5 folds for each of 270 candidates, totalling 1350 fits
params: {'C': 2, 'coef0': 0, 'gamma': 0.01, 'kernel': 'sigmoid'} with mean test score: 0.969


In [30]:
# Training of the rbf-based kernel
svc_rbf = svm.SVC(decision_function_shape='ovo', C=1.5, gamma=0.05, kernel="rbf") 
svc_rbf.fit(X_train_reduced, y_train)

svc_rbf_aug = svm.SVC(decision_function_shape='ovo', C=1.5, gamma=0.05, kernel="rbf") 
svc_rbf_aug.fit(X_train_aug_reduced, y_train_aug)

# Training of the polynomial-based kernel
svc_poly = svm.SVC(decision_function_shape='ovo', C=0.1, coef0=5, degree=2, gamma=0.5, kernel="poly") 
svc_poly.fit(X_train_reduced, y_train)

svc_poly_aug = svm.SVC(decision_function_shape='ovo', C=0.1, coef0=5, degree=2, gamma=0.5, kernel="poly") 
svc_poly_aug.fit(X_train_aug_reduced, y_train_aug)

## Model evaluation -> average across several splits of the dataset

In [32]:
random_seeds = [0, 21, 42, 100, 200, 300, 400, 500, 700, 1000]
scores_rbf = []; scores_poly = []
for rs in random_seeds:

    split_train_test = 0.2

    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=split_train_test, random_state=rs, stratify=labels)
    y_train.reshape(y_train.shape[0]); y_test.reshape(y_test.shape[0])

    pca = PCA(n_components = 50)
    pca.fit(X_train)
    X_train_reduced = pca.transform(X_train)
    X_test_reduced = pca.transform(X_test)
    
    svc_rbf = svm.SVC(decision_function_shape='ovo', C=1.5, gamma=0.05, kernel="rbf") 
    svc_rbf.fit(X_train_reduced, y_train)

    svc_poly = svm.SVC(decision_function_shape='ovo', C=0.1, coef0=5, degree=2, gamma=0.5, kernel="poly") 
    svc_poly.fit(X_train_reduced, y_train)
    
    scores_rbf.append(svc_rbf.score(X_test_reduced, y_test))
    scores_poly.append(svc_poly.score(X_test_reduced, y_test))

print("RBF: {:.3f}, POLY: {}".format(np.mean(scores_rbf), np.mean(scores_poly)))


RBF: 0.980, POLY: 0.9785


In [34]:
random_seeds = [0, 21, 42, 100, 200, 300, 400, 500, 700, 1000]
scores_rbf = []; scores_poly = []

pca_aug = PCA(n_components = 50)
pca_aug.fit(X_train_aug)
X_train_aug_reduced = pca.transform(X_train_aug)
X_test_aug_reduced = pca.transform(X_test_aug)

for rs in random_seeds:
    
    svc_rbf = svm.SVC(decision_function_shape='ovo', C=1.5, gamma=0.05, kernel="rbf") 
    svc_rbf.fit(X_train_aug_reduced, y_train_aug)

    svc_poly = svm.SVC(decision_function_shape='ovo', C=0.1, coef0=5, degree=2, gamma=0.5, kernel="poly") 
    svc_poly.fit(X_train_aug_reduced, y_train_aug)
    
    scores_rbf.append(svc_rbf.score(X_test_aug_reduced, y_test_aug))
    scores_poly.append(svc_poly.score(X_test_aug_reduced, y_test_aug))

print("RBF: {:.3f}, POLY: {}".format(np.mean(scores_rbf), np.mean(scores_poly)))

RBF: 0.985, POLY: 0.9925
