In [6]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf

In [7]:
!pip install tensorflow



In [8]:
# Cell 1: Importing necessary libraries

# TensorFlow is our primary deep learning library
import tensorflow as tf

# Keras is TensorFlow's high-level API for deep learning
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D

# Matplotlib is a graphing library
import matplotlib.pyplot as plt

# Numpy is used for handling arrays
import numpy as np


In [9]:
# Cell 2: Load the MNIST dataset and perform preprocessing

# The MNIST database contains 60,000 training images and 10,000 testing images of handwritten digits.
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# We can look at the shape of the dataset. Print the shape of x_train. It should show (60000, 28, 28), meaning there are 60,000 samples, each image is 28x28 pixels.
print('x_train shape:', x_train.shape)

# For feeding the data into our model, we want to flatten the images into a single dimension. We'll also normalize the pixel values from [0, 255] to [0, 1]. Neural networks work best with small input values.
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
x_train, x_test = x_train / 255.0, x_test / 255.0

# Each image is labeled with a digit from 0-9. We will one-hot encode these labels, i.e., a label of '3' will become the array [0, 0, 0, 1, 0, 0, 0, 0, 0, 0].
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)


x_train shape: (60000, 28, 28)


In [10]:
# Cell 3: Define the model architecture

# We're using the Keras API to build the architecture for our CNN. We'll use a sequential model, which allows us to stack layers on top of each other. The input flows from top to bottom.
model = Sequential()

# The first layer will be a convolutional layer with 32 filters/kernels, each of size 3x3. We're using 'relu' (Rectified Linear Unit) as our activation function.
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))

# Another convolutional layer with 64 filters of size 3x3 and 'relu' activation.
model.add(Conv2D(64, (3, 3), activation='relu'))

# Max pooling reduces the spatial dimensions, i.e., height and width, of our input. This makes our model less complex and speeds up the training process.
model.add(MaxPooling2D(pool_size=(2, 2)))

# Dropout is a regularization technique that randomly 'drops out', i.e., sets to zero, a number of output features of the layer during training. Here we drop out 25% of our layers.
model.add(Dropout(0.25))

# Flatten the input. As we're using fully connected layers, we need to flatten our input into a single dimension.
model.add(Flatten())

# A fully connected layer with 128 units and 'relu' activation.
model.add(Dense(128, activation='relu'))

# Add another dropout layer, this time we drop out 50% of the neurons to prevent overfitting.
model.add(Dropout(0.5))

# The final layer is a fully connected layer with 10 units (one for each class i.e., digit from 0 to 9) and 'softmax' activation. Softmax makes the output sum up to 1 so the output can be interpreted as probabilities. The model will then make its prediction based on which option has the highest probability.
model.add(Dense(10, activation='softmax')) 

In [11]:
# Cell 4: Compile the model

# We're using categorical crossentropy as our loss function. This is the most common choice for classification. 
# A lower score indicates that the model is performing better.
# 'accuracy' is the metrics used to measure performance of our model. 
# Adam is an optimization algorithm that can used instead of the classical stochastic gradient descent to update network weights iterative based on training data.
model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])


In [12]:
# Cell 5: Train the model

# We're now ready to train our model. We're going to train over 10 epochs (an arbitrary choice). 
# At each epoch, the model will have seen every example in the dataset once. 
# The batch size defines the number of samples that will be propagated through the network and we've chosen a batch size of 128. 
# The higher the batch size, the more memory space you'll need. 
# Also, we pass our validation or test data to see the model's performance on it after every epoch.
model.fit(x_train, y_train,
          batch_size=128,
          epochs=10,
          verbose=1,
          validation_data=(x_test, y_test))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fb0acc02dc0>

In [13]:
# Cell 6: Evaluate the model on the test data

# Finally, we'll evaluate our model. 
# The model's performance is usually lower on the test data compared to the training data because the model is fit to the training data and not the test data. 
# We're interested in the accuracy of our model, so we'll print out the accuracy by evaluating our trained model on the test data.
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


Test loss: 0.026431087404489517
Test accuracy: 0.991599977016449
