In [1]:
# Importing the MNIST dataset from TensorFlow's Keras module
from tensorflow.keras.datasets import mnist

# Loading the MNIST dataset and unpacking it into training and testing sets
# train_images: array of training images
# train_labels: array of labels corresponding to the training images
# test_images: array of testing images
# test_labels: array of labels corresponding to the testing images
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [2]:
# Training data

# Shape of the training images array
train_images.shape

(60000, 28, 28)

In [3]:
# Number of labels in the training set
len(train_labels)

60000

In [4]:
# Actual labels (integers 0-9) in the training set
train_labels

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [5]:
# Test data

# Shape of the testing images array
test_images.shape

(10000, 28, 28)

In [6]:
# Number of labels in the test set
len(test_labels)

10000

In [7]:
# Actual labels (integers 0-9) in the test set
test_labels

array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)

In [8]:
# The network architecture

# Importing necessary modules from TensorFlow and Keras
from tensorflow import keras
from tensorflow.keras import layers

# Creating a Sequential model using Keras
model = keras.Sequential([
    # Adding a dense layer with 512 neurons and ReLU activation function
    layers.Dense(512, activation="relu"),
    
    # Adding a dense layer with 10 neurons (for 10 classes) and softmax activation function
    layers.Dense(10, activation="softmax")
])

In [9]:
# The compilation step

# Compile the model with specific settings
model.compile(
    optimizer="rmsprop",  # Use the RMSprop optimizer
    loss="sparse_categorical_crossentropy",  # Use sparse categorical cross-entropy as the loss function
    metrics=["accuracy"]  # Track accuracy as a metric during training
)

In [10]:
# Preparing the image data

# Reshape the training images from (60000, 28, 28) to (60000, 784)
train_images = train_images.reshape((60000, 28 * 28))

# Convert the training images to float32 and normalize pixel values to the range [0, 1]
train_images = train_images.astype("float32") / 255

# Reshape the test images from (10000, 28, 28) to (10000, 784)
test_images = test_images.reshape((10000, 28 * 28))

# Convert the test images to float32 and normalize pixel values to the range [0, 1]
test_images = test_images.astype("float32") / 255

In [11]:
# "Fitting" (training) the model

# Train the model on the training data
model.fit(
    train_images,  # Training images
    train_labels,  # Corresponding labels for the training images
    epochs=5,  # Number of epochs (iterations over the entire dataset) to train
    batch_size=128  # Number of samples per gradient update (batch size)
)

Epoch 1/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.8773 - loss: 0.4327
Epoch 2/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9644 - loss: 0.1219
Epoch 3/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9785 - loss: 0.0717
Epoch 4/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9851 - loss: 0.0510
Epoch 5/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9888 - loss: 0.0380


<keras.src.callbacks.history.History at 0x1ec8a2a49d0>

In [12]:
# Using the model to make predictions

# Select the first 10 images from the test set for prediction
test_digits = test_images[0:10]

# Use the model to predict the labels for the selected test images
predictions = model.predict(test_digits)

# Display the prediction for the first test image
predictions[0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step


array([6.8701539e-08, 4.7897281e-10, 2.2061920e-06, 7.6696117e-05,
       8.3021950e-11, 1.5852699e-07, 5.1362088e-13, 9.9989879e-01,
       2.8950703e-07, 2.1793907e-05], dtype=float32)

In [13]:
# Get the index of the highest predicted probability (the predicted label) for the first test image
predictions[0].argmax()

7

In [14]:
# Get the predicted probability for the class with index 7 for the first test image
predictions[0][7]

0.9998988

In [15]:
# Display the true label for the first test image
test_labels[0]

7

In [16]:
# Evaluating the model on new data

# Evaluate the model's performance on the test dataset
test_loss, test_acc = model.evaluate(test_images, test_labels)

# Print the test accuracy
print(f"test_acc: {test_acc}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9773 - loss: 0.0792
test_acc: 0.9803000092506409
