The MNIST dataset with KNN, SVM, NN and CNN.

In [0]:
import struct
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import svm
from sklearn import neighbors

Manual loading MNIST for offline access.

Original files @ http://yann.lecun.com/exdb/mnist/

In [0]:
# def read_idx(filename):
#     with open(filename, 'rb') as f:
#         zero, data_type, dims = struct.unpack('>HBB', f.read(4))
#         shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
#         return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

# files = ['train-images-idx3-ubyte','train-labels-idx1-ubyte','t10k-images-idx3-ubyte','t10k-labels-idx1-ubyte']
# arrays = []

# for file in files:
#     arrays.append(read_idx(file))

# (x_train, y_train, x_test, y_test) = tuple(arrays)

Loading MNIST from keras

In [3]:
(x_train, y_train),(x_test, y_test) = tf.keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


Normalize dataset

In [0]:
x_train, x_test = x_train / 255.0, x_test / 255.0
#y_train, y_test = y_train / 255.0, y_test / 255.0

KNN model

There is significant over-fitting.

Score on training data = ~98%

Score on test data = ~96%

In [0]:
def knn_model():
  model = neighbors.KNeighborsClassifier(n_neighbors=5, p=2)
  return model

In [6]:
train_size_knn = 60000
test_size_knn = 10000

x_train_knn = x_train[:train_size_knn].reshape(train_size_knn, 28*28)
y_train_knn = y_train[:train_size_knn]
x_test_knn = x_test[:test_size_knn].reshape(test_size_knn, 28*28)
y_test_knn = y_test[:test_size_knn]

knn_model = knn_model()
knn_model.fit(x_train_knn, y_train_knn)

print('Score on the training data: {}'.format(knn_model.score(x_train_knn, y_train_knn)))
print('Score on the test data: {}'.format(knn_model.score(x_test_knn, y_test_knn)))

Score on the training data: 0.9819166666666667
Score on the test data: 0.9688


SVM model

There is significant over-fitting.

Score on training data = ~99.5%

Score on test data = ~98%

In [0]:
def svm_model():
  model = svm.SVC(C=1, kernel='rbf',gamma=0.02)
  return model

In [8]:
train_size_svm = 60000
test_size_svm = 10000

x_train_svm = x_train[:train_size_svm].reshape(train_size_svm, 28*28)
y_train_svm = y_train[:train_size_svm]
x_test_svm = x_test[:test_size_svm].reshape(test_size_svm, 28*28)
y_test_svm = y_test[:test_size_svm]

svm_model = svm_model()
svm_model.fit(x_train_svm, y_train_svm)

print('Score on the training data: {}'.format(svm_model.score(x_train_svm, y_train_svm)))
print('Score on the test data: {}'.format(svm_model.score(x_test_svm, y_test_svm)))

Score on the training data: 0.9946
Score on the test data: 0.9826


Baseline NN model

Less over-fitting, and took way less time to train.

Score on training data = 98.5%

Score on test data = ~98%

In [0]:
def base_model():
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
  model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
  # Add some dropout to reduce overfitting
  model.add(tf.keras.layers.Dropout(rate=0.2))
  model.add(tf.keras.layers.Dense(10, kernel_initializer='normal', activation=tf.nn.softmax))
  model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
  return model

In [10]:
base_model = base_model()
base_model.fit(x_train, y_train, epochs=10)
base_model.evaluate(x_test, y_test)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.07434345606982824, 0.9797]

CNN Model

Minimal over-fiting, and took little time.

Score on training data = ~ 99.7%

Score on test data = ~99.4%

There is still a lot of room to tune the (hyper)parameters.

In [0]:
def cnn_model():
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Conv2D(64, padding = 'same', kernel_size=(3, 3), activation=tf.nn.relu, input_shape=(28, 28, 1)))
  model.add(tf.keras.layers.MaxPooling2D((2, 2), strides=2))
  model.add(tf.keras.layers.Conv2D(64, padding = 'same', kernel_size=(3, 3), activation=tf.nn.relu))
  model.add(tf.keras.layers.MaxPooling2D((2, 2), strides=2))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
  model.add(tf.keras.layers.Dropout(0.5))
  model.add(tf.keras.layers.Dense(10, activation=tf.nn.softmax))
  model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])
  return model

In [12]:
batch_size = 64
epochs = 30

cnn_model = cnn_model()
x_train_4d = np.expand_dims(x_train, axis=3)
x_test_4d = np.expand_dims(x_test, axis=3)
cnn_model.fit(x_train_4d, y_train, batch_size=batch_size, epochs=epochs, verbose=1)
cnn_model.evaluate(x_test_4d, y_test)

Train on 60000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[0.036995108777207755, 0.9941]