# Name: Ololade Adetula 
# Course: Programming and Algorithms for Data Science

In [1]:
# Importing environment supporting tensorflow
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [2]:
# Packages implemented
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [3]:
# Specific Dataset from the Keras website. 
mnist = keras.datasets.mnist

In [6]:
# Creating testing and training data.
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
# 28, 28 -> treat image as sequence
# input_size=28
# seq_length=28

The standard output of an RNN layer is a single vector per sample, representing the RNN cell output at the last timestep. This vector contains comprehensive information about the entire input sequence and has a shape of (N, units), where N is the number of samples.

Alternatively, by configuring the RNN layer with return_sequences=True, it can produce the complete sequence of outputs for each sample. In this case, each timestep for every sample is represented by a vector, resulting in an output shape of (N, timesteps, units).

In [7]:
# Sequential API aprroach using Keras 
model = keras.models.Sequential()
model.add(keras.Input(shape=(28,28))) # shape= (seq_length, input_size)

model.add(layers.LSTM(128, return_sequences=False, activation='relu')) # N, 128
#(ReLU) is an activation function that introduces the property of non-linearity to a deep learning model and solves the vanishing gradients issue.
model.add(layers.Dense(10))
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 128)               80384     
                                                                 
 dense (Dense)               (None, 10)                1290      
                                                                 
Total params: 81674 (319.04 KB)
Trainable params: 81674 (319.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [8]:
# loss and optimizer
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optim = keras.optimizers.Adam(lr=0.001)
metrics = ["accuracy"]



In [9]:
model.compile(loss=loss, optimizer=optim, metrics=metrics)

In [10]:
# training
batch_size = 64 # Allocated size, changeable if need be. 
epochs = 5

In [11]:
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/5
938/938 - 51s - loss: 0.5123 - accuracy: 0.8374 - 51s/epoch - 54ms/step
Epoch 2/5
938/938 - 44s - loss: 0.1363 - accuracy: 0.9584 - 44s/epoch - 47ms/step
Epoch 3/5
938/938 - 42s - loss: 0.0938 - accuracy: 0.9707 - 42s/epoch - 45ms/step
Epoch 4/5
938/938 - 43s - loss: 0.0747 - accuracy: 0.9769 - 43s/epoch - 46ms/step
Epoch 5/5
938/938 - 43s - loss: 0.0630 - accuracy: 0.9811 - 43s/epoch - 46ms/step


<keras.src.callbacks.History at 0x2f04a79c5d0>

In [12]:
# Evaulation of the split model. 
model.evaluate(x_test, y_test, batch_size=batch_size, verbose=2)

157/157 - 6s - loss: 0.0594 - accuracy: 0.9836 - 6s/epoch - 38ms/step


[0.059382110834121704, 0.9836000204086304]