In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

In [3]:
# Kaggle MNIST Dataset - https://www.kaggle.com/c/digit-recognizer/data
# Read MNIST in csv format
train_src = pd.read_csv('train.csv')
test_src = pd.read_csv('test.csv')

In [4]:
train_src.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
test_src.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
# Check shape of the data
train_src.shape, test_src.shape

((42000, 785), (28000, 784))

In [7]:
# Convert a pandas data frame to an array
train_src = train_src.reset_index().values
test_src = test_src.reset_index().values
train_src.shape, test_src.shape

((42000, 786), (28000, 785))

In [8]:
# Drop target values. Get only X
train_src[:, 2:].shape, test_src[:, 1:].shape

((42000, 784), (28000, 784))

In [9]:
# Reshape train and test data 
train = np.reshape(train_src[:, 2:], (42000,28,28))
test = np.reshape(test_src[:, 1:], (28000,28,28))
y_train = train_src[:, 1:2]

train.shape, test.shape, y_train.shape

((42000, 28, 28), (28000, 28, 28), (42000, 1))

In [10]:
# Scale the data between 0 and 1
x_train = train/255
x_test = test/255

In [13]:
# Get the data to Keras Input shape of the model 
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
x_train.shape, y_train.shape, x_test.shape

((42000, 28, 28, 1), (42000, 1), (28000, 28, 28, 1))

In [11]:
## Build the model, CNN with Conv2D, maxpooling and dropout
num_classes = 10
input_shape = (28, 28, 1)

model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Flatten(),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation="softmax"),
    ]
)

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1600)              0         
_________________________________________________________________
dropout (Dropout)            (None, 1600)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                1

In [14]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, 10)
# y_test = keras.utils.to_categorical(y_test, num_classes)

In [15]:
y_train.shape

(42000, 10)

In [16]:
# Train the model
batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x2452caff388>

Model still not converged, after 15 epochs we are at 98.87 accuracy. 

In [17]:
# We need to predict on test data, as the test data has no labels
preds = model.predict(x_test)
preds = np.argmax(preds, axis=1)
preds.shape

(28000,)

In [18]:
preds # an array of labels 

array([2, 0, 9, ..., 3, 9, 2], dtype=int64)