In [1]:
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard

Load and prepare the data

In [2]:
#load mnist dataset
mnist = tf.keras.datasets.mnist

(x_train,y_train), (x_test,y_test) = mnist.load_data()

x_train, x_test = x_train/255.0 , x_test/255.0

In [3]:
y_train

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

Build a sequential model

In [4]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    tf.keras.layers.Dense(128, activation ='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10)
])

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


Getting raw predictions

In [6]:
# for each example the model returns a vector of logits or log-odds
# taking the first row from training as an example 
predictions = model(x_train[:1]).numpy()
predictions

array([[ 0.6489387 ,  0.53997725, -0.64532   ,  1.0014428 ,  0.765196  ,
        -0.6920389 , -0.7460827 , -0.00943998,  0.22190559, -0.15234366]],
      dtype=float32)

For multi class problems pass this through a softmax to get the probablities

In [7]:
tf.nn.softmax(predictions).numpy()

array([[0.14609036, 0.13100873, 0.04004351, 0.20783187, 0.16410111,
        0.03821575, 0.03620524, 0.07562952, 0.09531549, 0.06555851]],
      dtype=float32)

Defining the loss

In [8]:
# the `losses.SparseCategoricalCrossEntropy` takes a vector of logits and a True index and returns a scalar loss for each example

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [9]:
# checking the loss for an example using the untrained model

loss_fn(y_train[:1],predictions).numpy()

3.2645075

Compiling the model

In [10]:
model.compile(
    optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy'])

Start training

In [11]:

model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7ff43397b5e0>

Evaluate model on validation set

In [12]:
model.evaluate(x_test, y_test, verbose=2)

313/313 - 0s - loss: 0.0773 - accuracy: 0.9772


[0.07729168981313705, 0.9771999716758728]