# Handwritten Digits Recognition by Scikit-Learn, Chainer, PyTorch & Tensorflow+Keras

## Checking available MNIST datasets from each libraries

### sklearn

In [22]:
#!pip install sklearn
from sklearn import datasets

In [23]:
skl_digits = datasets.load_digits()
# shape of each digit image
print('Shape: ', skl_digits.data.shape)

# ==> (1797, 64): 1797 * 8x8 image

Shape:  (1797, 64)


In [96]:
skl_digits.data[1]

array([ 0.,  0.,  0., 12., 13.,  5.,  0.,  0.,  0.,  0.,  0., 11., 16.,
        9.,  0.,  0.,  0.,  0.,  3., 15., 16.,  6.,  0.,  0.,  0.,  7.,
       15., 16., 16.,  2.,  0.,  0.,  0.,  0.,  1., 16., 16.,  3.,  0.,
        0.,  0.,  0.,  1., 16., 16.,  6.,  0.,  0.,  0.,  0.,  1., 16.,
       16.,  6.,  0.,  0.,  0.,  0.,  0., 11., 16., 10.,  0.,  0.])

In [98]:
test= [ 0.,  0.,  0., 12., 13.,  5.,  0.,  0.,  0.,  0.,  0., 11., 16.,
        9.,  0.,  0.,  0.,  0.,  3., 15., 16.,  6.,  0.,  0.,  0.,  7.,
       15., 16., 16.,  2.,  0.,  0.,  0.,  0.,  1., 16., 16.,  3.,  0.,
        0.,  0.,  0.,  1., 16., 16.,  6.,  0.,  0.,  0.,  0.,  1., 16.,
       16.,  6.,  0.,  0.,  0.,  0.,  0., 11., 16., 10.,  0.,  0.]
len(test)

64

### chainer

In [24]:
#!pip install chainer
import chainer

In [25]:
train, test = chainer.datasets.get_mnist(withlabel = True, ndim=1)

In [26]:
print('# of train data',len(train))
print('# of test data',len(test))

# of train data 60000
# of test data 10000


In [27]:
len(train[10][0])

# => 784 = 28*28

784

### PyTorch

In [28]:
#!pip install torch torchvision

In [29]:
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST

In [30]:
data_folder = './torch-data'
BATCH_SIZE = 8

mnist_data = MNIST(data_folder, train=True, download=True, transform=transforms.ToTensor())

data_loader = DataLoader(mnist_data, batch_size=BATCH_SIZE, shuffle=False)


In [31]:
data_iterator = iter(data_loader)
images,_ = data_iterator.next()

In [32]:
images[0].shape

# => 1 * 28 * 28

torch.Size([1, 28, 28])

### Keras

In [33]:
#!pip install tensorflow
#!pip install keras

In [34]:
from keras.datasets import mnist

(train_data, train_teacher_labels), (test_data, test_teacher_labels) = mnist.load_data()

train_data.shape

# 60000 x 28 x 28 

(60000, 28, 28)

## Fitting models for MNIST

### sklearn

In [37]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [38]:
skl_digits = datasets.load_digits()

In [41]:
X = skl_digits.data
y = skl_digits.target

In [64]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [65]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
 
models = []
models.append(("LogisticRegression",LogisticRegression()))
models.append(("k-Nearest Neighbors",KNeighborsClassifier()))
models.append(("Decision Tree",DecisionTreeClassifier()))
models.append(("Support Vector Machine(linear)",SVC(kernel='linear')))
models.append(("Support Vector Machine(rbf)",SVC(kernel='rbf')))
models.append(("Random Forest",RandomForestClassifier()))
models.append(("Perceptron",Perceptron()))
models.append(("Multilayer Perceptron",MLPClassifier()))
 
names = []
results = []
for name, model in models:
    
    print(model.fit(X_train,y_train),"\n")
    
    names.append(name)
    results.append(model.score(X_test,y_test))

LogisticRegression() 

KNeighborsClassifier() 

DecisionTreeClassifier() 

SVC(kernel='linear') 

SVC() 

RandomForestClassifier() 

Perceptron() 

MLPClassifier() 



In [66]:
import pandas as pd
summary = pd.DataFrame(columns=['Classifier', 'Score'])

for i in range(len(names)):
    summary = summary.append(pd.Series( [names[i],results[i]], index=summary.columns ), ignore_index=True)
 
summary.sort_values(by='Score', ascending=False)

Unnamed: 0,Classifier,Score
4,Support Vector Machine(rbf),0.991111
1,k-Nearest Neighbors,0.98
3,Support Vector Machine(linear),0.971111
5,Random Forest,0.971111
7,Multilayer Perceptron,0.971111
0,LogisticRegression,0.951111
6,Perceptron,0.926667
2,Decision Tree,0.835556


By initial comparison, SVM(rbf) has the best performance on score.
Now using grid-search to find the optimal parameters

In [67]:
from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],  'gamma' : [0.001, 0.01, 0.1, 1, 10, 100]}

grid_search = GridSearchCV(SVC(), param_grid, cv=5)

# fit 関数を呼ぶことで交差検証とグリッドサーチがどちらも実行される
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100],
                         'gamma': [0.001, 0.01, 0.1, 1, 10, 100]})

In [68]:
print('Test set score: {}'.format(grid_search.score(X_test, y_test)))
print('Best parameters: {}'.format(grid_search.best_params_))
print('Best cross-validation: {}'.format(grid_search.best_score_))

Test set score: 0.9933333333333333
Best parameters: {'C': 10, 'gamma': 0.001}
Best cross-validation: 0.9918353297535452


In [69]:
model = SVC(C=10, gamma=0.001)
model.fit(X_train, y_train)

SVC(C=10, gamma=0.001)

In [70]:
predicted = model.predict(X_test)

In [72]:
from sklearn.metrics import confusion_matrix, classification_report

In [74]:
print(classification_report(y_test, predicted))

precision    recall  f1-score   support

           0       1.00      1.00      1.00        37
           1       0.98      1.00      0.99        43
           2       1.00      1.00      1.00        44
           3       1.00      1.00      1.00        45
           4       1.00      1.00      1.00        38
           5       0.98      0.98      0.98        48
           6       1.00      1.00      1.00        52
           7       1.00      1.00      1.00        48
           8       1.00      0.98      0.99        48
           9       0.98      0.98      0.98        47

    accuracy                           0.99       450
   macro avg       0.99      0.99      0.99       450
weighted avg       0.99      0.99      0.99       450



In [75]:
print(confusion_matrix(y_test, predicted))

[[37  0  0  0  0  0  0  0  0  0]
 [ 0 43  0  0  0  0  0  0  0  0]
 [ 0  0 44  0  0  0  0  0  0  0]
 [ 0  0  0 45  0  0  0  0  0  0]
 [ 0  0  0  0 38  0  0  0  0  0]
 [ 0  0  0  0  0 47  0  0  0  1]
 [ 0  0  0  0  0  0 52  0  0  0]
 [ 0  0  0  0  0  0  0 48  0  0]
 [ 0  1  0  0  0  0  0  0 47  0]
 [ 0  0  0  0  0  1  0  0  0 46]]


In [76]:
import pickle

In [87]:
pickle.dump(model, open('../models/sklearn.pickle','wb'))

In [91]:
del model
model = pickle.load(open('../models/sklearn.pickle', 'rb'))