# Multi-class classification of handwritten digits using a deep CNN model

Using the MNIST dataset.

In [33]:
# Import packages
import numpy as np
import random

# Reproducible results
np.random.seed(42)
random.seed(42)

import sklearn.svm

import keras
import keras.datasets

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

K.set_image_data_format("channels_last")

(X, y), (X_val, y_val) = keras.datasets.mnist.load_data()

X_test = X_val[-2000:, :]
y_test = y_val[-2000:]
X_val = X_val[:-2000, :]
y_val = y_val[:-2000]

# Add channel dimensions last
X = X[..., np.newaxis]
X_val = X_val[..., np.newaxis]
X_test = X_test[..., np.newaxis]

X = X.astype(np.float32)
X_val = X_val.astype(np.float32)
X_test = X_test.astype(np.float32)
X /= 255.0
X_val /= 255.0
X_test /= 255.0

num_classes = len(set(y))

y_orig = y.copy()
y_val_orig = y_val.copy()
y = keras.utils.to_categorical(y, num_classes)
y_val = keras.utils.to_categorical(y_val, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

input_shape = (*X.shape[1:3], 1)

print(f"Training set size X  : {X.shape}")
print(f"Training set size y  : {y.shape}")
print(f"Validation set size X: {X_val.shape}")
print(f"Validation set size y: {y_val.shape}")
print(f"Test set size X      : {X_test.shape}")
print(f"Test set size y      : {y_test.shape}")
print(f"Input shape          : {input_shape}")
print(f"Number of classes    : {num_classes}")

Training set size X  : (60000, 28, 28, 1)
Training set size y  : (60000, 10)
Validation set size X: (8000, 28, 28, 1)
Validation set size y: (8000, 10)
Test set size X      : (2000, 28, 28, 1)
Test set size y      : (2000, 10)
Input shape          : (28, 28, 1)
Number of classes    : 10


In [34]:
# Create baseline model

# Reshape for the SVC model
X_ = X.reshape(X.shape[0], -1)
X_val_ = X_val.reshape(X_val.shape[0], -1)
X_test_ = X_test.reshape(X_test.shape[0], -1)

# Fit baseline model
model_baseline = sklearn.svm.SVC(C=1.0,
                                 kernel="rbf",
                                 gamma="auto",
                                 shrinking=True,
                                 tol=0.001,
                                 cache_size=200,
                                 class_weight=None,
                                 verbose=True,
                                 max_iter=5,  # Note, only five iterations here!
                                 decision_function_shape="ovr")
_ = model_baseline.fit(X_, y_orig)

[LibSVM]



In [36]:
# Evaluate baseline model
yhat = model_baseline.predict(X_)
yhat_val = model_baseline.predict(X_val_)
mse = sklearn.metrics.accuracy_score(y_orig, yhat)
mse_val = sklearn.metrics.accuracy_score(y_val_orig, yhat_val)
print(f"Training data error  : {mse}")
print(f"Validation data error: {mse_val}")

Training data error  : 0.4898166666666667
Validation data error: 0.47075


In [0]:
# Create deep CNN model

def create_model():

    model = "... add your code here!"

    return model

model = create_model()

model.compile(loss="categorical_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])

In [39]:
# Train model
model.fit(X, y,
          batch_size=64,
          epochs=1,  # Note, only one epoch here!
          verbose=1,
          validation_data=(X_val, y_val))
score = model.evaluate(X_val, y_val, verbose=0)
print("Validation loss    :", score[0])
print("Validation accuracy:", score[1])

Train on 60000 samples, validate on 8000 samples
Epoch 1/1
Validation loss    : 0.05683108211332001
Validation accuracy: 0.98225


In [30]:
# Evaluate the final model on the test data.
# This is only ever done once, and as the last thing we do.
# Training another model after this, based on the performance on the test data
# leads to biased results.
acc = model.evaluate(X, y, verbose=0)
acc_val = model.evaluate(X_val, y_val, verbose=0)
acc_test = model.evaluate(X_test, y_test, verbose=0)
print(f"Training data error  : {acc[1]:.3f}")
print(f"Validation data error: {acc_val[1]:.3f}")
print(f"Test data error      : {acc_test[1]:.3f}")

Training data error  : 0.985
Validation data error: 0.982
Test data error      : 0.991
