In [1]:
import matplotlib.pyplot as plt

In [2]:
from keras.datasets import mnist

Using TensorFlow backend.


In [3]:
#make the training and testing data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [7]:
#plt.imshow(X_test[0])
X_train.shape

(60000, 28, 28)

In [5]:
#reshape the data to feed into the network
X_train = X_train.reshape(60000,28,28,1)
X_test = X_test.reshape(10000,28,28,1)

In [6]:
#column is created for each output category and a binary variable is inputted for each category
#essentially what this means is that we have a column with ten 0s [0-9], and for every number/digit that is 
#identified, a 1 is inputted to that position. So if a 5 is identified, the 0 at position 6 is put to a 1
#This is making our output a categorical variable

from keras.utils import to_categorical

#one-hot encode target column
#y_train = to_categorical(y_train)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [7]:
#Sequential model allows you to build the network layer by layer
from keras.models import Sequential

#Conv2D allows each layer to have convolutions on 2D matrices
from keras.layers import Dense, Conv2D, Flatten

In [8]:
#create the model
model = Sequential()

In [9]:
#add layers to the model

#first layer is a convolutional layer with 64 nodes, 3x3 kernel matrix, and uses relu activation 
#the input size is set to 28,28,1 to indicate the length, height and channels of the input image
#channels of the image is set to 1 to indicate that the image is greyscale
model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(28,28,1)))

#second layer is a convolutional layer with 32 nodes, 3x3 kernel matrix, and also uses relu activation
model.add(Conv2D(32, kernel_size=3, activation='relu'))

#Flatten is used to connect the convolutional input layers to the dense output layer
model.add(Flatten())

#Dense layer is the output layer which has 10 nodes, for each category of output from 0-9
#it uses the softmax function to generate probability distributions
model.add(Dense(10, activation='softmax'))

In [10]:
#compiling the model

#takes 3 items: Optimiser, loss function and metrics

#Optimiser controls the learning rate, here adam optimiser is being used. 
#Learning rate controls how the fast the weights are being calculated. Faster learning rate means that 
#the weights are calculated faster but the accuracy may decrease, and a slower learning rate means that the
#accuracy of the system is being ensured by the optimal calculation of weights

#The Loss function used here is the categorical_crossentropy. A lower score means the system is performing better

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [11]:
#training the model

#training the model is done by taking into account the training data (X_train), target data(y_train), 
#validation data (X_test, y_test), and number of iterations or epochs
model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=7)

Train on 60000 samples, validate on 10000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.History at 0x7fcb1c690e10>

In [12]:
#making the model predict the first 5 digits of the testing set
prediction = model.predict(X_test[:5])

array([[1.4427099e-16, 9.3042950e-23, 1.9323265e-17, 5.8781777e-17,
        2.0721988e-24, 5.8083123e-20, 1.8270465e-27, 1.0000000e+00,
        5.9787913e-17, 4.1664704e-14],
       [2.5312275e-13, 7.1791664e-16, 1.0000000e+00, 6.3707333e-16,
        1.9963160e-22, 5.8991438e-22, 4.4988122e-10, 7.1922159e-23,
        7.9582929e-12, 4.0994105e-22],
       [3.6527323e-14, 1.0000000e+00, 2.0620748e-08, 1.0835665e-14,
        3.9185713e-10, 1.3211507e-12, 3.6665813e-14, 1.9338836e-12,
        5.9225993e-09, 2.0003471e-16],
       [1.0000000e+00, 5.9945935e-25, 3.0456144e-19, 5.1858157e-23,
        2.4402388e-18, 1.3705241e-15, 8.2802050e-17, 5.9118101e-20,
        1.2642033e-18, 1.1107185e-15],
       [7.0076974e-13, 7.7651160e-21, 2.7564328e-18, 1.4151109e-19,
        1.0000000e+00, 3.8685542e-18, 5.3635150e-21, 1.4542215e-13,
        4.7586419e-13, 5.7659827e-12]], dtype=float32)

In [17]:
#checking the labels for the first 5 digits from the validation set
y_test[:5]

array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)

In [2]:
import numpy as np

In [16]:
prediction = np.array([[1.4427099e-16, 9.3042950e-23, 1.9323265e-17, 5.8781777e-17,
        2.0721988e-24, 5.8083123e-20, 1.8270465e-27, 1.0000000e+00,
        5.9787913e-17, 4.1664704e-14],
       [2.5312275e-13, 7.1791664e-16, 1.0000000e+00, 6.3707333e-16,
        1.9963160e-22, 5.8991438e-22, 4.4988122e-10, 7.1922159e-23,
        7.9582929e-12, 4.0994105e-22],
       [3.6527323e-14, 1.0000000e+00, 2.0620748e-08, 1.0835665e-14,
        3.9185713e-10, 1.3211507e-12, 3.6665813e-14, 1.9338836e-12,
        5.9225993e-09, 2.0003471e-16],
       [1.0000000e+00, 5.9945935e-25, 3.0456144e-19, 5.1858157e-23,
        2.4402388e-18, 1.3705241e-15, 8.2802050e-17, 5.9118101e-20,
        1.2642033e-18, 1.1107185e-15],
       [7.0076974e-13, 7.7651160e-21, 2.7564328e-18, 1.4151109e-19,
        1.0000000e+00, 3.8685542e-18, 5.3635150e-21, 1.4542215e-13,
        4.7586419e-13, 5.7659827e-12]])

In [17]:
for elements in prediction:
    print(np.where(elements==1))

(array([7]),)
(array([2]),)
(array([1]),)
(array([0]),)
(array([4]),)
