# Prediction handwritten digits - demo

AlgorithmHub. All rights reserved.

https://www.algorithmhub.com/

### Import the required libraries

In [1]:
import matplotlib.pyplot as plt
from sklearn import (
    datasets,
    svm,
    metrics,
    naive_bayes
)

### Load the digits datasets

In [2]:
digits = datasets.load_digits()

### Get the digit images and the target values

In [3]:
images_and_labels = list(zip(digits.images, digits.target))

#### View the values inside the first item, an example for number '0'

In [4]:
print images_and_labels[0][0]
print
print images_and_labels[0][1]

[[  0.   0.   5.  13.   9.   1.   0.   0.]
 [  0.   0.  13.  15.  10.  15.   5.   0.]
 [  0.   3.  15.   2.   0.  11.   8.   0.]
 [  0.   4.  12.   0.   0.   8.   8.   0.]
 [  0.   5.   8.   0.   0.   9.   8.   0.]
 [  0.   4.  11.   0.   1.  12.   7.   0.]
 [  0.   2.  14.   5.  10.  12.   0.   0.]
 [  0.   0.   6.  13.  10.   0.   0.   0.]]

0


#### An example for number '1'

In [5]:
print images_and_labels[1][0]
print
print images_and_labels[1][1]

[[  0.   0.   0.  12.  13.   5.   0.   0.]
 [  0.   0.   0.  11.  16.   9.   0.   0.]
 [  0.   0.   3.  15.  16.   6.   0.   0.]
 [  0.   7.  15.  16.  16.   2.   0.   0.]
 [  0.   0.   1.  16.  16.   3.   0.   0.]
 [  0.   0.   1.  16.  16.   6.   0.   0.]
 [  0.   0.   1.  16.  16.   6.   0.   0.]
 [  0.   0.   0.  11.  16.  10.   0.   0.]]

1


#### Actual shape or dimension of the first item

In [6]:
images_and_labels[0][0].shape

(8, 8)

### Flatten the 2D array to 1D array

In [7]:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))

#### Actual array

In [8]:
digits.images[0]

array([[  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.],
       [  0.,   0.,  13.,  15.,  10.,  15.,   5.,   0.],
       [  0.,   3.,  15.,   2.,   0.,  11.,   8.,   0.],
       [  0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.],
       [  0.,   5.,   8.,   0.,   0.,   9.,   8.,   0.],
       [  0.,   4.,  11.,   0.,   1.,  12.,   7.,   0.],
       [  0.,   2.,  14.,   5.,  10.,  12.,   0.,   0.],
       [  0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]])

#### Flattened array

In [9]:
data[0]

array([  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.,   0.,   0.,  13.,
        15.,  10.,  15.,   5.,   0.,   0.,   3.,  15.,   2.,   0.,  11.,
         8.,   0.,   0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.,   0.,
         5.,   8.,   0.,   0.,   9.,   8.,   0.,   0.,   4.,  11.,   0.,
         1.,  12.,   7.,   0.,   0.,   2.,  14.,   5.,  10.,  12.,   0.,
         0.,   0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.])

#### Actual shape of the entire digits dataset

In [10]:
digits.images.shape

(1797, 8, 8)

#### Flattened shape of the entire digits dataset

In [11]:
data.shape

(1797, 64)

#### Actual shape of single item

In [12]:
digits.images[0].shape

(8, 8)

#### Flattened shape of single item

In [13]:
data[0].shape

(64,)

### Splitting the datasets into training and testing, does a cross validation

In [14]:
# splits into 70% training and 30% testing
limit = 0.7
precent_of_sample = int(limit * n_samples)
X_train = data[:precent_of_sample]
y_train = digits.target[:precent_of_sample]
X_test = data[precent_of_sample:]
y_test = digits.target[precent_of_sample:]

### Making the model, and passing few parameters
### Support Vector Classifier - RBF kernel

In [15]:
clf_rbf = svm.SVC(C=1, gamma=0.001, kernel='rbf')

### Fitting the model

In [16]:
clf_rbf.fit(X_train, y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

### Predicting the test values

In [17]:
expected = y_test
predicted = clf_rbf.predict(X_test)

### Classification report

In [18]:
print("Classification report for classifier %s:\n%s"
      % (clf_rbf, metrics.classification_report(expected, predicted)))

Classification report for classifier SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
             precision    recall  f1-score   support

          0       1.00      0.98      0.99        53
          1       0.96      1.00      0.98        53
          2       1.00      0.98      0.99        53
          3       0.96      0.89      0.92        53
          4       0.98      0.95      0.96        57
          5       0.95      0.98      0.96        56
          6       0.98      0.98      0.98        54
          7       1.00      1.00      1.00        54
          8       0.91      0.98      0.94        52
          9       0.96      0.96      0.96        55

avg / total       0.97      0.97      0.97       540



### Getting the confusion matrix

In [19]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))

Confusion matrix:
[[52  0  0  0  1  0  0  0  0  0]
 [ 0 53  0  0  0  0  0  0  0  0]
 [ 0  0 52  1  0  0  0  0  0  0]
 [ 0  0  0 47  0  2  0  0  4  0]
 [ 0  0  0  0 54  0  0  0  1  2]
 [ 0  0  0  0  0 55  1  0  0  0]
 [ 0  1  0  0  0  0 53  0  0  0]
 [ 0  0  0  0  0  0  0 54  0  0]
 [ 0  1  0  0  0  0  0  0 51  0]
 [ 0  0  0  1  0  1  0  0  0 53]]


### Support Vector Classifier - POLY kernel

In [20]:
clf_poly = svm.SVC(C=1, gamma=0.001, kernel='poly')

In [21]:
clf_poly.fit(X_train, y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [22]:
expected = y_test
predicted = clf_poly.predict(X_test)

In [23]:
print("Classification report for classifier %s:\n%s"
      % (clf_poly, metrics.classification_report(expected, predicted)))

Classification report for classifier SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
             precision    recall  f1-score   support

          0       0.96      0.96      0.96        53
          1       0.96      0.94      0.95        53
          2       1.00      0.94      0.97        53
          3       0.92      0.89      0.90        53
          4       0.98      0.95      0.96        57
          5       0.93      0.98      0.96        56
          6       0.96      0.98      0.97        54
          7       0.95      0.98      0.96        54
          8       0.91      0.94      0.92        52
          9       0.91      0.91      0.91        55

avg / total       0.95      0.95      0.95       540



In [24]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))

Confusion matrix:
[[51  0  0  0  1  0  1  0  0  0]
 [ 0 50  0  1  0  0  0  0  0  2]
 [ 1  0 50  2  0  0  0  0  0  0]
 [ 0  0  0 47  0  2  0  1  3  0]
 [ 0  0  0  0 54  0  0  0  0  3]
 [ 0  0  0  0  0 55  1  0  0  0]
 [ 0  1  0  0  0  0 53  0  0  0]
 [ 0  0  0  0  0  0  0 53  1  0]
 [ 0  1  0  0  0  1  0  1 49  0]
 [ 1  0  0  1  0  1  0  1  1 50]]


### Naive Bayes - GaussianNB

In [25]:
clf_gnb = naive_bayes.GaussianNB()

In [26]:
clf_gnb.fit(X_train, y_train)

GaussianNB()

In [27]:
expected = y_test
predicted = clf_gnb.predict(X_test)

In [28]:
print("Classification report for classifier %s:\n%s"
      % (clf_poly, metrics.classification_report(expected, predicted)))

Classification report for classifier SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
             precision    recall  f1-score   support

          0       0.96      0.96      0.96        53
          1       0.65      0.79      0.71        53
          2       0.98      0.81      0.89        53
          3       0.92      0.64      0.76        53
          4       1.00      0.86      0.92        57
          5       0.83      0.93      0.87        56
          6       0.96      0.98      0.97        54
          7       0.73      0.83      0.78        54
          8       0.59      0.71      0.64        52
          9       0.82      0.73      0.77        55

avg / total       0.84      0.83      0.83       540



In [29]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))

Confusion matrix:
[[51  0  0  0  0  0  0  0  2  0]
 [ 0 42  1  0  0  0  0  0  3  7]
 [ 0  5 43  1  0  0  1  0  1  2]
 [ 0  3  0 34  0  3  0  2 11  0]
 [ 1  0  0  0 49  0  0  6  1  0]
 [ 0  2  0  0  0 52  1  1  0  0]
 [ 0  1  0  0  0  0 53  0  0  0]
 [ 0  0  0  0  0  2  0 45  7  0]
 [ 0 11  0  1  0  1  0  2 37  0]
 [ 1  1  0  1  0  5  0  6  1 40]]


### Naive Bayes - BernoulliNB

In [30]:
clf_bnb = naive_bayes.BernoulliNB()

In [31]:
clf_bnb.fit(X_train, y_train)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

In [32]:
expected = y_test
predicted = clf_bnb.predict(X_test)

In [33]:
print("Classification report for classifier %s:\n%s"
      % (clf_bnb, metrics.classification_report(expected, predicted)))

Classification report for classifier BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True):
             precision    recall  f1-score   support

          0       1.00      0.94      0.97        53
          1       0.69      0.58      0.63        53
          2       0.95      0.77      0.85        53
          3       0.75      0.75      0.75        53
          4       0.93      0.93      0.93        57
          5       0.90      0.79      0.84        56
          6       0.94      0.94      0.94        54
          7       0.88      0.96      0.92        54
          8       0.65      0.69      0.67        52
          9       0.64      0.87      0.74        55

avg / total       0.84      0.83      0.83       540



In [34]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))

Confusion matrix:
[[50  1  0  0  2  0  0  0  0  0]
 [ 0 31  0  0  0  0  0  0 10 12]
 [ 0  0 41 10  0  0  0  0  1  1]
 [ 0  2  2 40  0  1  0  2  5  1]
 [ 0  2  0  0 53  0  0  1  1  0]
 [ 0  1  0  1  1 44  3  0  0  6]
 [ 0  3  0  0  0  0 51  0  0  0]
 [ 0  0  0  0  1  0  0 52  1  0]
 [ 0  4  0  1  0  2  0  2 36  7]
 [ 0  1  0  1  0  2  0  2  1 48]]
