In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.activations import linear, relu, sigmoid
np.set_printoptions(precision=2)
from datasets import load_dataset


Data Capturing and Transformations

In [15]:
ds = load_dataset("ylecun/mnist")

# Each value in X_train is a list of integers representing the pixel intensities of the images, flattened into a single list.
X_train = np.array([np.array(sample['image']).flatten() for sample in ds['train']])
y_train = np.array([sample['label'] for sample in ds['train']])

X_test = np.array([np.array(sample['image']).flatten() for sample in ds['test']])
y_test = np.array([sample['label'] for sample in ds['test']])

# Print shapes to verify
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(60000, 784) (60000,)
(10000, 784) (10000,)


Creating the neural network. Using the SparseCategoricalCrossentropy function with logits to perform softmax regression algorithm while also reducing numeric roundoff errors. (Note: the last layer is linear and not softmax since we have logits set to True).

In [18]:
model = Sequential(
    [               
        Dense(25, activation = 'relu'),
        Dense(15, activation = 'relu'),
        Dense(10, activation = 'linear')
    ], name = "digit_recognition" 
)

Training

In [20]:
# Using Adam ooptimization algorithm for auto selection of the learning rate value.
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(0.001),
)

model.fit(X_train, y_train, epochs=20)

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 497us/step - loss: 0.1701
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 486us/step - loss: 0.1551
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 500us/step - loss: 0.1485
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 523us/step - loss: 0.1518
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 501us/step - loss: 0.1470
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 493us/step - loss: 0.1388
Epoch 7/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 490us/step - loss: 0.1388
Epoch 8/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 588us/step - loss: 0.1336
Epoch 9/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 486us/step - loss: 0.1310
Epoch 10/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x16a4f2bf0>

Testing and evaluating the neural network model.

In [None]:
results = model.evaluate(X_test, y_test)
print(f"Test Loss: {results}")

# Predict the labels for the test data
predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Print the first 10 predicted labels and their corresponding true labels
print("Predicted labels: ", predicted_labels[:10])
print("True labels: ", y_test[:10])

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 419us/step - loss: 0.2516
Test Loss: 0.22124598920345306
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285us/step
Predicted labels:  [7 2 1 0 4 1 4 9 5 9]
True labels:  [7 2 1 0 4 1 4 9 5 9]
