# Purpose of this notebook
- Use Simple RNN, GRU, LSTM to solve mnist

# Import

In [1]:
from tensorflow import keras # type: ignore
from tensorflow.keras import layers, regularizers # type: ignore
from tensorflow.keras.datasets import mnist # type: ignore

# Load data

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


# Before normalize, range in [0, 255]

In [3]:
print(x_train[0][10][10: 15])
print(x_test[0][10][10: 15])

[  1 154 253  90   0]
[ 0 17 66 14 67]


# Normalize
- Note: Run following cell only once

In [4]:
x_train = x_train / 255
x_test = x_test / 255

# After normalize, range in [0, 1]

In [5]:
print(x_train[0][10][10: 15])
print(x_test[0][10][10: 15])

[0.00392157 0.60392157 0.99215686 0.35294118 0.        ]
[0.         0.06666667 0.25882353 0.05490196 0.2627451 ]


# Keras Sequential API using Simple RNN

In [10]:
model = keras.Sequential([
    layers.Input(shape=(None, 28), name='input_layer'),
    layers.SimpleRNN(
        256,
        return_sequences=True,
        activation='tanh',
        name='rnn_layer1',
    ),
    layers.SimpleRNN(
        256,
        activation='tanh',
        name='rnn_layer2',
    ),
    layers.Dense(10, name='output_layer'),
], name="sequential_model")

# model.summary 

In [11]:
print(model.summary())

None


# model.compile

In [12]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

# Note
- If output layer `layers.Dense(10)` then use `from_logits=True`
- If output layer `layers.Dense(10, activation='softmax')` then use `from_logits=False`, or just simply remove `from_logits=False`

# Train the model using model.fit

In [13]:
model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)

Epoch 1/5
1875/1875 - 42s - 23ms/step - accuracy: 0.8887 - loss: 0.3644
Epoch 2/5
1875/1875 - 40s - 21ms/step - accuracy: 0.9276 - loss: 0.2448
Epoch 3/5
1875/1875 - 39s - 21ms/step - accuracy: 0.9345 - loss: 0.2271
Epoch 4/5
1875/1875 - 38s - 20ms/step - accuracy: 0.9394 - loss: 0.2109
Epoch 5/5
1875/1875 - 40s - 21ms/step - accuracy: 0.9380 - loss: 0.2159


<keras.src.callbacks.history.History at 0x7f3250532400>

# Note
- model.fit above run in 3m19s (longest time so far)
- And achieve accuracy 0.9380 (good)

# Varients
- SimpleRNN, GRU, LSTM are varients
- To use GRU, just replace all "SimpleRNN" by "GRU"
- To use LSTM, just replace all "SimpleRNN" by "LSTM"

# Evaluate test data using model.evaluate

In [14]:
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

313/313 - 3s - 10ms/step - accuracy: 0.9624 - loss: 0.1301


[0.13007856905460358, 0.9624000191688538]