# SVM image classification

Tackle the Kaggle MNIST dataset with a simple LinearSVC, the grid search on a LinearSVC.

In [11]:
import math
import pandas as pd
import numpy as np

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error

%pylab inline
pylab.rcParams['figure.figsize'] = (15, 6)

# Do not use normal form (scietific notation) when printing numbers, exponents can make it harder to compare values
pd.set_option('float_format', '{:f}'.format)

Populating the interactive namespace from numpy and matplotlib


## Import

Downloaded from https://www.kaggle.com/c/digit-recognizer

In [2]:
mnist_data = pd.read_csv("../datasets/kaggle/mnist/train.csv")

## Explore

Every image is 28x28 = 784 pixels, greyscale

In [3]:
mnist_data.tail()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41999,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Prepare

Initially the pixel intensities range 0..255, so divide by 255 to convert to 0..1

In [8]:
features = mnist_data.columns[1:]

X = mnist_data[features]
Y = mnist_data['label']

X_train, X_test, Y_train, y_test = train_test_split(X/255., Y, test_size=0.1, random_state=0)

## Train

Train a simple model

In [9]:
from sklearn.svm import LinearSVC

clf_svm = LinearSVC(penalty="l2", dual=False, tol=1e-5)
clf_svm.fit(X_train, Y_train)

LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=1e-05,
     verbose=0)

Now grid search the model hyperparameters using the same undeylying model

In [14]:
from sklearn.model_selection import GridSearchCV

penalties = ["l1", "l2"]
tolerances = [1e-3, 1e-4, 1e-5]

param_grid = { "penalty": penalties, "tol": tolerances }

grid_search = GridSearchCV(LinearSVC(dual=False), param_grid, cv=3)
grid_search.fit(X_train, Y_train)

GridSearchCV(cv=3, error_score='raise',
       estimator=LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'penalty': ['l1', 'l2'], 'tol': [0.001, 0.0001, 1e-05]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [16]:
grid_search.best_params_

{'penalty': 'l1', 'tol': 1e-05}

Train a model using the best hyperparameters found

In [17]:
best_svc = LinearSVC(penalty="l1", dual=False, tol=1e-5)
best_svc.fit(X_train, Y_train)

LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l1', random_state=None, tol=1e-05,
     verbose=0)

## Evaluate

Evaluate the model trained without grid search

In [13]:
y_pred_svm = clf_svm.predict(X_test)
acc_svm = accuracy_score(y_test, y_pred_svm)

SVM accuracy: 0.9102380952380953


In [None]:
print("SVM accuracy:", acc_svm)

Evaluate the grid searched model

In [18]:
y_pred_best_svm = best_svc.predict(X_test)
acc_best_svm = accuracy_score(y_test, y_pred_best_svm)

In [19]:
print("Beast SVM accuracy:", acc_best_svm)

Beast SVM accuracy: 0.9121428571428571
