# Mklearn vs. Sklearn Comparison

In [21]:
# import standard packages
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import importlib
import sklearn
from sklearn import svm
import sklearn.multiclass
from sklearn import preprocessing
import scipy

# import mklearn and multiclass
LIB_PATH = 'mklearn/'
sys.path.append(LIB_PATH)
import mklearn
import multiclass
importlib.reload(mklearn)
importlib.reload(multiclass)

<module 'multiclass' from 'mklearn/multiclass.py'>

Again, we will use *Elements of Statistical Learning*'s Vowels dataset for performance comparison with scikit-learn. It can be downloaded in the following link:

https://web.stanford.edu/~hastie/ElemStatLearn/data.html

In [2]:
# load dataset
vowel_train = pd.read_csv('https://web.stanford.edu/~hastie/ElemStatLearn/datasets/vowel.train')
vowel_test = pd.read_csv('https://web.stanford.edu/~hastie/ElemStatLearn/datasets/vowel.test')
print(vowel_train.shape)
print(vowel_test.shape)
# divide into X and Y
X_train = vowel_train.drop('y', axis=1)
y_train = vowel_train['y']
X_test = vowel_test.drop('y', axis=1)
y_test = vowel_test['y']
# convert into numpy array
X_train, X_test = np.array(X_train), np.array(X_test)
y_train, y_test = np.array(y_train), np.array(y_test)
# check results
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

(528, 12)
(462, 12)
X_train shape: (528, 11)
y_train shape: (528,)
X_test shape: (462, 11)
y_test shape: (462,)


## Mklearn's LinearSVC

In [3]:
# input parameters
classes = np.arange(11) + 1
penalty = 1.0
max_iter = 1000
n_threads = 10
verbose = False
tol = 1e-6
multi_class = 'ovo'
# fit model
myLinearSVC_model = mklearn.myLinearSVC(loss='smooth_hinge', tol=tol, C=penalty)
myLinearSVC_ovo = multiclass.Multiclass(myLinearSVC_model, classes, multiclass=multi_class,
                                       n_threads=n_threads, Cs=None, k=1) 
myLinearSVC_ovo.fit(X_train, y_train)

ovo multiclass using 1-folds cross-validation
Fitting ovo model for pair (1, 2)
k = 1, no cross-validation case...
fitting with optimal penalty 1.0
Model fit complete. Final objective cost: 0.8820192799859071
Final training error: 0.23958333333333334

Fitting ovo model for pair (1, 3)
k = 1, no cross-validation case...
fitting with optimal penalty 1.0
Model fit complete. Final objective cost: 0.7459935035456859
Final training error: 0.0625

Fitting ovo model for pair (1, 4)
k = 1, no cross-validation case...
fitting with optimal penalty 1.0
Model fit complete. Final objective cost: 0.5907270478459653
Final training error: 0.0625

Fitting ovo model for pair (1, 5)
k = 1, no cross-validation case...
fitting with optimal penalty 1.0
Model fit complete. Final objective cost: 0.5592127172400974
Final training error: 0.0625

Fitting ovo model for pair (1, 6)
k = 1, no cross-validation case...
fitting with optimal penalty 1.0
Model fit complete. Final objective cost: 0.592851186695085
Final t

Model fit complete. Final objective cost: 0.7698410134303034
Final training error: 0.16666666666666666

Fitting ovo model for pair (7, 10)
k = 1, no cross-validation case...
fitting with optimal penalty 1.0
Model fit complete. Final objective cost: 0.6806773555399155
Final training error: 0.052083333333333336

Fitting ovo model for pair (7, 11)
k = 1, no cross-validation case...
fitting with optimal penalty 1.0
Model fit complete. Final objective cost: 0.6912627021297149
Final training error: 0.052083333333333336

Fitting ovo model for pair (8, 9)
k = 1, no cross-validation case...
fitting with optimal penalty 1.0
Model fit complete. Final objective cost: 0.7960111949654924
Final training error: 0.15625

Fitting ovo model for pair (8, 10)
k = 1, no cross-validation case...
fitting with optimal penalty 1.0
Model fit complete. Final objective cost: 0.7314357298811762
Final training error: 0.11458333333333333

Fitting ovo model for pair (8, 11)
k = 1, no cross-validation case...
fitting w

In [4]:
# predict and check accuracy
y_train_pred = multiclass.predict_multiclass(X_train, myLinearSVC_ovo.scalers, 
                                             myLinearSVC_ovo.fitted_betas, classes, 
                                             n_threads, 'ovo', multiclass.predict_binary_class)
y_test_pred = multiclass.predict_multiclass(X_test, myLinearSVC_ovo.scalers, 
                                            myLinearSVC_ovo.fitted_betas, classes, 
                                            n_threads, 'ovo', multiclass.predict_binary_class)
ovo_error_train = multiclass.compute_multi_classification_error(y_train, y_train_pred)
ovo_error_test = multiclass.compute_multi_classification_error(y_test, y_test_pred)
print('OVO training error: {}, OVO test error: {}'.format(round(ovo_error_train, 3), 
                                                          round(ovo_error_test, 3)))

Performing multiclass prediction using 10 threads...
Multiclass prediction complete. Elapsed time: 0.107s
Performing multiclass prediction using 10 threads...
Multiclass prediction complete. Elapsed time: 0.112s
OVO training error: 0.299, OVO test error: 0.574


## Sklearn's LinearSVC

For comparison with sklearn, we use their LinearSVC module. Sklearn's LinearSVC only supports one-vs-rest multiclass classification. 

In [42]:
# fit sklearn's LinearSVC model using OVR
LinearSVC_model = svm.LinearSVC(penalty='l2', loss='squared_hinge', 
                                tol=0.00001, C=1.0, multi_class='ovr')
LinearSVC_model.fit(X_train, y_train)
# predict on test set
y_pred_sklearn = LinearSVC_model.predict(X_test)
# get errors
error_train_sklearn = 1.0 - LinearSVC_model.score(X_train, y_train)
error_test_sklearn = 1.0 - LinearSVC_model.score(X_test, y_test)
# error_test_sklearn = multiclass.compute_multi_classification_error(y_test, y_pred_sklearn)
print('sklearn OVR LinearSVC train error: {}'.format(error_train_sklearn))
print('sklearn OVR LinearSVC test error: {}'.format(error_test_sklearn))

sklearn OVR LinearSVC train error: 0.5852272727272727
sklearn OVR LinearSVC test error: 0.7683982683982684


## Sklearn's SVC 

In [17]:
# fit sklearn's LinearSVC model
SVC_model = svm.SVC(C=1.0, kernel='rbf', tol=0.001, decision_function_shape='ovo')
SVC_model.fit(X_train, y_train)
# predict on test set
y_pred_sklearn = SVC_model.predict(X_test)
# get errors
error_train_sklearn = 1.0 - SVC_model.score(X_train, y_train)
error_test_sklearn = 1.0 - SVC_model.score(X_test, y_test)
# error_test_sklearn = multiclass.compute_multi_classification_error(y_test, y_pred_sklearn)
print('sklearn OVO SVC train error: {}'.format(error_train_sklearn))
print('sklearn OVO SVC test error: {}'.format(error_test_sklearn))

sklearn OVO SVC train error: 0.0
sklearn OVO SVC test error: 0.35064935064935066


MKlearn's LinearSVC implementation yields test error of $57.4\%$, which is noticeably better than that of sklearn's LinearSVC model using one-vs-rest classification. 

However, against sklearn's SVC, mklearn's LinearSVC implementation does not perform as well. With one-vs-rest classification, sklearn's SVC yields test error of $35.1\%$, exceeding mklearn's LinearSVC.