In [5]:
#import matplotlib.pyplot as plt
#%matplotlib inline

from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

# plot the first image in the dataset
#plt.imshow(X_train[0])
#check image shape
X_train[0].shape

(28, 28)

In [7]:
#reshape data to fit model
# nr of images, shape, 1 = greyscale
X_train = X_train.reshape(60000,28,28,1)
X_test = X_test.reshape(10000,28,28,1)

from tensorflow.keras.utils import to_categorical
# one-hot encode target column:
# a column will be created for each output category
# and a binary variable is inputted for each category
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_train[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout

# create model Sequential - it allows us to build a model 
# layer by layer
model = Sequential()

# add model layers
# 64 and 32 are the nr of nodes in each layer
# kernel size is the size of the filter matrix for our convolution
# 3 means 3x3
# The first layer also takes an input shape, 28x28, greyscale
# Each layer has an activation function
# ReLU = Rectified Linear Activation
#model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(28,28,1)))
model.add(Conv2D(32, (5,5), activation='relu', input_shape=(28,28,1)))
model.add(MaxPooling2D(pool_size=(2,2)))
#model.add(Conv2D(32, kernel_size=3, activation='relu'))
model.add(Conv2D(32, (5,5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
# Flatten layer serves as a connection between the convolution and dense layers
model.add(Flatten())
# Dense is the layer type we use for out output layer
# We will have 10 nodes in our output layer (0-9)
# The activation is 'softmax' Softmax makes the output sum
# up to 1 so the output can be interpreted as probabilities.
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(10, activation='softmax'))

In [10]:
# For compiling the model we need three parameters:
# optimizer (controls the learning rate, loss (categorical_crossentropy is 
# the most common choice for classification) and metrics (accuracy)
# Compile model using accuracy to measure model performance
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Training the model
# nr of epochs is the nr of times the model will cycle through the data
# The model will improve up to a certain nr of epochs
%time model.fit(X_train, y_train, batch_size=100, epochs=10, verbose=1)
# %time model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10