In [1]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.9.2


## Load a dataset

In [4]:
# Handwritten digits dataset: http://yann.lecun.com/exdb/mnist/
mnist = tf.keras.datasets.mnist

# Load the train and test data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize data?
x_train, x_test = x_train / 255.0, x_test / 255.0

## Build a machine learning model

In [17]:
# Returns a vector of logits/log-odds
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10)
])

In [18]:
predictions = model(x_train[:1]).numpy()
predictions

array([[-0.94793516,  0.5231623 , -0.58516455, -0.27086276,  0.49766904,
         0.50770223,  0.07812713,  0.50418687, -0.12841138, -1.440588  ]],
      dtype=float32)

In [16]:
# Softmax converts logits to probabilities... these are easier to interpret as a human
# It is possible to add softmax as a layer in the model, but apparently makes the loss calculation unstable and inexact
tf.nn.softmax(predictions).numpy()

array([[0.03671921, 0.15987583, 0.05277674, 0.07226734, 0.15585157,
        0.15742314, 0.10244869, 0.15687071, 0.08333126, 0.02243556]],
      dtype=float32)

In [19]:
# loss_fn = (-) log probability of the true class
#   eg. for 10 classes, with random probabilities: -log(1/10)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss_fn(y_train[:1], predictions).numpy()

1.8488178

In [20]:
# compile the model with the appropriate parameters (specified by the tutorial)
model.compile(  optimizer='adam',
                loss=loss_fn,
                metrics=['accuracy'])

In [21]:
# train the model!
model.fit(x_train, y_train, epochs=5)

Epoch 1/5


2022-08-15 11:50:05.107337: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-08-15 11:50:05.306512: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1631ffd90>

In [22]:
# test the model!
model.evaluate(x_test, y_test, verbose=2)

2022-08-15 11:51:27.297764: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


313/313 - 1s - loss: 0.0754 - accuracy: 0.9770 - 1s/epoch - 4ms/step


[0.07540934532880783, 0.9770000576972961]

In [23]:
# Wrap the trained model and add the Softmax to make the outputs human-readable 
probability_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
])
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[5.5662863e-08, 2.4467342e-08, 1.0303500e-05, 3.8491041e-04,
        1.0933100e-12, 7.5878283e-08, 1.6313891e-13, 9.9960190e-01,
        3.3346063e-07, 2.4476724e-06],
       [1.5816752e-06, 7.6452343e-05, 9.9970508e-01, 9.0812980e-05,
        3.7614277e-14, 2.1114691e-05, 8.8706764e-07, 5.2392934e-10,
        1.0416993e-04, 4.9329624e-10],
       [1.1034965e-06, 9.9941552e-01, 2.4718798e-05, 7.4770119e-06,
        5.6506531e-05, 1.7350978e-05, 4.7811271e-05, 3.4993075e-04,
        7.7990611e-05, 1.4903596e-06],
       [9.9996841e-01, 1.3263765e-08, 1.3361727e-05, 3.9353516e-08,
        3.4649350e-08, 3.7001294e-07, 4.5343127e-06, 7.4466411e-06,
        3.8818076e-08, 5.8417263e-06],
       [6.3085719e-07, 6.5193331e-07, 2.3477245e-05, 1.2983535e-06,
        9.8691374e-01, 2.5127690e-06, 5.2059906e-05, 5.3191918e-04,
        2.6929862e-05, 1.2446856e-02]], dtype=float32)>

In [24]:
# Visually comparing the probability_model with the actual values shows a pretty successful model!
y_test[:5]

array([7, 2, 1, 0, 4], dtype=uint8)