# TP3 : Support Vector Machine (SVM) with Sklearn

In [0]:
# import library
import numpy as np
import matplotlib.pyplot as plt
import time 

from sklearn.datasets import fetch_mldata
from sklearn.datasets import fetch_openml

from sklearn.model_selection import train_test_split

from sklearn.svm import SVC, LinearSVC, NuSVC

from sklearn.metrics import precision_score, classification_report, confusion_matrix

## Load MNIST dataset

In [0]:
# import data
mnist = fetch_openml('mnist_784')

In [3]:
images = mnist.data.reshape((-1,28,28))
data = mnist.data / 255
labels = mnist.target
print("Shape of training images : ", images.shape)
print("Shape of training data : ", data.shape)
print("Shape of label : ", labels.shape)

Shape of training images :  (70000, 28, 28)
Shape of training data :  (70000, 784)
Shape of label :  (70000,)


In [4]:
target_list = np.unique(mnist.target)
print(target_list)

['0' '1' '2' '3' '4' '5' '6' '7' '8' '9']


## Split training & test set

In [4]:
X_train, X_test, y_train, y_test = train_test_split(data,labels,train_size=0.5)
print("Traning data shape : ", X_train.shape)
print("Traning target shape : ", y_train.shape)
print("Testing data shape : ", X_test.shape)
print("Testing target shape : ", y_test.shape)

Traning data shape :  (35000, 784)
Traning target shape :  (35000,)
Testing data shape :  (35000, 784)
Testing target shape :  (35000,)


## Create a SVM classifier

In [6]:
# clf = SVC(gamma='scale', decision_function_shape='ovo')
clf = LinearSVC()
clf.fit(X_train,y_train)



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0)

## Prediction score

In [8]:
y_pred = clf.predict(X_test)
y_pred
# precision_score(y_test,y_pred,labels = target_list,average='micro')

array(['8', '6', '0', ..., '0', '7', '1'], dtype=object)

In [9]:
print("Classification report for classifier %s:\n%s\n"
      % (clf, classification_report(y_test, y_pred)))
print("Confusion matrix:\n%s" % confusion_matrix(y_test, y_pred))

Classification report for classifier LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=0):
              precision    recall  f1-score   support

           0       0.95      0.97      0.96      2084
           1       0.94      0.98      0.96      2376
           2       0.91      0.90      0.90      2102
           3       0.90      0.88      0.89      2111
           4       0.92      0.93      0.92      2036
           5       0.88      0.86      0.87      1912
           6       0.94      0.96      0.95      2073
           7       0.92      0.93      0.93      2202
           8       0.90      0.84      0.87      2024
           9       0.88      0.88      0.88      2080

    accuracy                           0.91     21000
   macro avg       0.91      0.91      0.91     21000
weighted avg       0.91     

## Test noyal function 

In [0]:
## poly
clf = SVC(kernel='poly')
start =  time.time()
clf.fit(X_train,y_train)
end_train = time.time()
print("Training execution time :", end_train - start)
y_pred = clf.predict(X_test)
end_pred = time.time() 
print("Prediction time :", end_pred - end_train)
print("Classification report for classifier %s:\n%s\n" % (clf, classification_report(y_test, y_pred)))
print("Confusion matrix:\n%s" % confusion_matrix(y_test, y_pred))



In [0]:
## rbf
clf = SVC(kernel='rbf')
start =  time.time()
clf.fit(X_train,y_train)
end_train = time.time()
print("Training execution time :", end_train - start)
y_pred = clf.predict(X_test)
end_pred = time.time() 
print("Prediction time :", end_pred - end_train)
print("Classification report for classifier %s:\n%s\n" % (clf, classification_report(y_test, y_pred)))
print("Confusion matrix:\n%s" % confusion_matrix(y_test, y_pred))

In [0]:
## sigmoid
clf = SVC(kernel='sigmoid')
start =  time.time()
clf.fit(X_train,y_train)
end_train = time.time()
print("Training execution time :", end_train - start)
y_pred = clf.predict(X_test)
end_pred = time.time() 
print("Prediction time :", end_pred - end_train)
print("Classification report for classifier %s:\n%s\n" % (clf, classification_report(y_test, y_pred)))
print("Confusion matrix:\n%s" % confusion_matrix(y_test, y_pred))

In [0]:
## precomputed
clf = SVC(kernel='precomputed')
start =  time.time()
clf.fit(X_train,y_train)
end_train = time.time()
print("Training execution time :", end_train - start)
y_pred = clf.predict(X_test)
end_pred = time.time() 
print("Prediction time :", end_pred - end_train)
print("Classification report for classifier %s:\n%s\n" % (clf, classification_report(y_test, y_pred)))
print("Confusion matrix:\n%s" % confusion_matrix(y_test, y_pred))