The MNIST dataset with KNN, SVM, NN and CNN.

In [0]:
import struct
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import svm
from sklearn import neighbors

Manual loading MNIST for offline access.

Original files @ http://yann.lecun.com/exdb/mnist/

In [0]:
# def read_idx(filename):
#     with open(filename, 'rb') as f:
#         zero, data_type, dims = struct.unpack('>HBB', f.read(4))
#         shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
#         return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

# files = ['train-images-idx3-ubyte','train-labels-idx1-ubyte','t10k-images-idx3-ubyte','t10k-labels-idx1-ubyte']
# arrays = []

# for file in files:
#     arrays.append(read_idx(file))

# (x_train, y_train, x_test, y_test) = tuple(arrays)

Loading MNIST from keras

In [0]:
(x_train, y_train),(x_test, y_test) = tf.keras.datasets.mnist.load_data()

Normalize dataset

In [0]:
x_train, x_test = x_train / 255.0, x_test / 255.0

#y_train = tf.keras.utils.to_categorical(y_train)
#y_test = tf.keras.utils.to_categorical(y_test)

KNN model

Taking too long to train.

With the reduced train_size = 5000 and test_size = 500, there is significant over-fitting.

Score on training data = ~96%

Score on test data = ~91%

We expect the overfitting to decrease with the full sample data.

In [0]:
def knn_model():
  model = neighbors.KNeighborsClassifier(n_neighbors=5, p=2)
  return model

In [6]:
train_size_knn = 5000
test_size_knn = 500

x_train_knn = x_train[:train_size_knn].reshape(train_size_knn, 28*28)
y_train_knn = y_train[:train_size_knn]
x_test_knn = x_test[:test_size_knn].reshape(test_size_knn, 28*28)
y_test_knn = y_test[:test_size_knn]

### Uncomment to run knn

# knn_model = knn_model()
# knn_model.fit(x_train_knn, y_train_knn)

# print('Score on the training data: {}'.format(knn_model.score(x_train_knn, y_train_knn)))
# print('Score on the test data: {}'.format(knn_model.score(x_test_knn, y_test_knn)))

Score on the training data: 0.9608
Score on the test data: 0.91


SVM model

Taking too long to train.

With the reduced train_size = 5000 and test_size = 500, there is significant over-fitting.

Score on training data = ~99%

Score on test data = ~95%

We expect the overfitting to decrease with the full sample data.

In [0]:
def svm_model():
  model = svm.SVC(C=1, kernel='rbf',gamma=0.02)
  return model

In [0]:
train_size_svm = 5000
test_size_svm = 500

x_train_svm = x_train[:train_size_svm].reshape(train_size_svm, 28*28)
y_train_svm = y_train[:train_size_svm]
x_test_svm = x_test[:test_size_svm].reshape(test_size_svm, 28*28)
y_test_svm = y_test[:test_size_svm]

### uncomment to run svm

# svm_model = svm_model()
# svm_model.fit(x_train_svm, y_train_svm)

# print('Score on the training data: {}'.format(svm_model.score(x_train_svm, y_train_svm)))
# print('Score on the test data: {}'.format(svm_model.score(x_test_svm, y_test_svm)))

Baseline NN model

Score on test data = ~97%

In [0]:
def base_model():
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
  model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
  # Add some dropout to reduce overfitting
  model.add(tf.keras.layers.Dropout(rate=0.2))
  model.add(tf.keras.layers.Dense(10, kernel_initializer='normal', activation=tf.nn.softmax))
  model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
  return model

In [0]:
### uncomment to run baseline neural network model

# base_model = base_model()
# base_model.fit(x_train, y_train, epochs=5)
# base_model.evaluate(x_test, y_test)

CNN Model

Score on test data = ~99.3%

There is still a lot of room to tune the (hyper)parameters.

In [0]:
def cnn_model():
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Conv2D(64, padding = 'same', kernel_size=(3, 3), activation=tf.nn.relu, input_shape=(28, 28, 1)))
  model.add(tf.keras.layers.MaxPooling2D((2, 2), strides=2))
  model.add(tf.keras.layers.Conv2D(64, padding = 'same', kernel_size=(3, 3), activation=tf.nn.relu))
  model.add(tf.keras.layers.MaxPooling2D((2, 2), strides=2))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
  model.add(tf.keras.layers.Dropout(0.5))
  model.add(tf.keras.layers.Dense(10, activation=tf.nn.softmax))
  model.compile(optimizer=tf.keras.optimizers.Adadelta(), loss=tf.keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])
  return model

In [0]:
batch_size = 64
epochs = 12

### uncomment to run convolutional neural network model

# cnn_model = cnn_model()
# x_train_4d = np.expand_dims(x_train, axis=3)
# x_test_4d = np.expand_dims(x_test, axis=3)
# cnn_model.fit(x_train_4d, y_train, batch_size=batch_size, epochs=epochs, verbose=1)
# cnn_model.evaluate(x_test_4d, y_test)