# MNIST Beginner
https://www.tensorflow.org/tutorials/quickstart/beginner

In [113]:
import numpy as np
import tensorflow as tf
mnist = tf.keras.datasets.mnist

In [114]:
# Load and prepare the MNIST dataset.
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#  Convert the samples from integers to floating-point numbers in the range [0, 1.0]
x_train, x_test = x_train / 255.0, x_test / 255.0

In [115]:
# training has 60,000 samples.  Each sample is a 28x28 B&W image.
x_train.shape, y_train.shape

((60000, 28, 28), (60000,))

In [116]:
# test set has 10,000 samples.
x_test.shape, y_test.shape

((10000, 28, 28), (10000,))

In [117]:
# training data: labels, counts, and percentages
l, c = np.unique(y_train, return_counts=True)
list(zip(l, c, c / len(y_train) * 100))

[(0, 5923, 9.871666666666666),
 (1, 6742, 11.236666666666666),
 (2, 5958, 9.93),
 (3, 6131, 10.218333333333334),
 (4, 5842, 9.736666666666666),
 (5, 5421, 9.035),
 (6, 5918, 9.863333333333333),
 (7, 6265, 10.441666666666666),
 (8, 5851, 9.751666666666667),
 (9, 5949, 9.915000000000001)]

In [118]:
# For reproducable results, set the random seed.
# Needs to be done prior to building the model
tf.random.set_seed(0xfafa)

In [119]:
# Build the tf.keras.Sequential model by stacking layers.
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])

In [120]:
print(model.to_yaml())

backend: tensorflow
class_name: Sequential
config:
  layers:
  - class_name: Flatten
    config:
      batch_input_shape: !!python/tuple
      - null
      - 28
      - 28
      data_format: channels_last
      dtype: float32
      name: flatten_4
      trainable: true
  - class_name: Dense
    config:
      activation: relu
      activity_regularizer: null
      bias_constraint: null
      bias_initializer:
        class_name: Zeros
        config: {}
      bias_regularizer: null
      dtype: float32
      kernel_constraint: null
      kernel_initializer:
        class_name: GlorotUniform
        config:
          seed: null
      kernel_regularizer: null
      name: dense_8
      trainable: true
      units: 128
      use_bias: true
  - class_name: Dropout
    config:
      dtype: float32
      name: dropout_4
      noise_shape: null
      rate: 0.2
      seed: null
      trainable: true
  - class_name: Dense
    config:
      activation: softmax
      activity_regularizer: null
    

In [121]:
model.layers

[<tensorflow.python.keras.layers.core.Flatten at 0x1504c95d0>,
 <tensorflow.python.keras.layers.core.Dense at 0x130d21590>,
 <tensorflow.python.keras.layers.core.Dropout at 0x130fc0090>,
 <tensorflow.python.keras.layers.core.Dense at 0x1326e9b10>]

In [122]:
l0 = model.layers[0]
l0.input_shape, l0.output_shape

((None, 28, 28), (None, 784))

In [123]:
l1 = model.layers[1]
print(f'input_shape: {l1.input_shape}, output_shape: {l1.output_shape}, activation: {l1.activation.__name__}')
[(w.name, w.shape) for w in l1.weights], [(v.name, v.shape) for v in l1.variables]

input_shape: (None, 784), output_shape: (None, 128), activation: relu


([('dense_8/kernel:0', TensorShape([784, 128])),
  ('dense_8/bias:0', TensorShape([128]))],
 [('dense_8/kernel:0', TensorShape([784, 128])),
  ('dense_8/bias:0', TensorShape([128]))])

`weights` and `variables` are the same thing.

In [124]:
print(l1.weights[0][0][0:4])
print(l1.variables[0][0][0:4])
l1.weights == l1.variables

tf.Tensor([-0.07438198  0.06026352 -0.06951502 -0.02914837], shape=(4,), dtype=float32)
tf.Tensor([-0.07438198  0.06026352 -0.06951502 -0.02914837], shape=(4,), dtype=float32)


True

In [125]:
# Sum of the bias terms, obtained in two different ways
print('Bias sum:', sum(l1.bias.value()), sum(l1.weights[1].value()))
# Sum of the weights matrix
print('Weights sum:', sum(sum(l1.weights[0].value())))

Bias sum: tf.Tensor(0.0, shape=(), dtype=float32) tf.Tensor(0.0, shape=(), dtype=float32)
Weights sum: tf.Tensor(16.841835, shape=(), dtype=float32)


In [126]:
try:
    model._assert_compile_was_called()
    print('model has been compiled')
except RuntimeError as err:
    print('Error:', err)
try:
    model._assert_weights_created()
    print('weights have been created')
except RuntimeError as err:
    print(err)

Error: You must compile your model before training/testing. Use `model.compile(optimizer, loss)`.
weights have been created


Compile the model.

In [127]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [128]:
model._assert_compile_was_called()

In [129]:
# Train the model.
# If you run just this cell over and over again, then that amounts to
# continuing from where you previously left off.
model.fit(x_train, y_train, epochs=5)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x13114a8d0>

In [130]:
print('Weights sum', sum(sum(l1.weights[0].value())))
print('Bias sum', sum(l1.weights[1].value()))

Weights sum tf.Tensor(-536.99927, shape=(), dtype=float32)
Bias sum tf.Tensor(2.0992267, shape=(), dtype=float32)


In [131]:
# Evaluate on training data
model.evaluate(x_train, y_train, verbose=2)

60000/1 - 2s - loss: 0.0226 - accuracy: 0.9878


[0.039811225586694976, 0.9877833]

In [132]:
# Evaluate on test data
model.evaluate(x_test, y_test, verbose=2)

10000/1 - 0s - loss: 0.0377 - accuracy: 0.9766


[0.0740614623770467, 0.9766]

In [133]:
# Running model.fit again, continues the training from where we left off
model.fit(x_train, y_train, epochs=4)

Train on 60000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x132e98cd0>

### TODO
 * Try different dropouts `range(0, 0.1, 1)`
 * Try different sizes for layer 1: `[64, 128, 256]`