In [0]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import Model, layers
from __future__ import absolute_import,print_function,division

In [0]:
#Mnist parameters
num_classes = 10
num_features = 784

#Training Params
learning_rate = 0.001
training_steps = 2000
batch_size = 16
display_step = 100

#Network Paramters
# MNIST image shape is 28*28px, we will then handle 28 sequences of 28 timesteps for every sample.
num_input = 28 # number of sequences.
timesteps = 28 # timesteps.
num_units = 64 # number of neurons for the LSTM layer.

In [0]:
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
x_train = x_train.reshape([-1, 28, 28])
x_test = x_test.reshape([-1, num_features])

x_train, x_test = x_train/255. , x_test/255.

In [0]:
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)

In [0]:
class LSTM(Model):
  #set layers
  def __init__(self):
    super(LSTM, self).__init__()
    #RNN (LSTM) hidden layer
    self.lstm_layer = layers.LSTM(units=num_classes)
    self.out = layers.Dense(num_classes)

  #set forward pass
  def call(self, x, is_trainable=False):
    #LSTM layer
    x = self.lstm_layer(x)
    #output layers 
    x = self.out(x)

    if not is_trainable:
      x = tf.nn.softmax(x)

    return x

lstm_net = LSTM()

In [0]:
def cross_entropy_loss(x, y):
  y = tf.cast(y, tf.int64)

  loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits=x)

  return tf.reduce_mean(loss)

def accuracy(y_pred, y_true):
  correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
  return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)

optimizer = tf.optimizers.Adam(learning_rate)


In [0]:
def run_optimization(x, y):

  with tf.GradientTape() as g:

    pred = lstm_net(x, is_trainable=True)
    loss = cross_entropy_loss(pred, y)

  gradients = g.gradient(loss, lstm_net.trainable_variables)
  optimizer.apply_gradients(zip(gradients, lstm_net.trainable_variables))




In [49]:
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):

  run_optimization(batch_x, batch_y)

  if step % display_step == 0:
    pred = lstm_net(batch_x, is_trainable = True)
    loss = cross_entropy_loss(pred, batch_y)
    acc = accuracy(pred, batch_y)
    print("step: %i, loss: %f, accuracy: %f" %(step, loss, acc))

step: 100, loss: 2.280466, accuracy: 0.250000
step: 200, loss: 2.287636, accuracy: 0.062500
step: 300, loss: 2.042763, accuracy: 0.375000
step: 400, loss: 1.809712, accuracy: 0.375000
step: 500, loss: 1.716209, accuracy: 0.500000
step: 600, loss: 1.784284, accuracy: 0.375000
step: 700, loss: 1.107306, accuracy: 0.562500
step: 800, loss: 1.409682, accuracy: 0.375000
step: 900, loss: 1.433112, accuracy: 0.562500
step: 1000, loss: 1.285776, accuracy: 0.562500
step: 1100, loss: 1.040275, accuracy: 0.687500
step: 1200, loss: 1.198076, accuracy: 0.687500
step: 1300, loss: 0.670228, accuracy: 0.937500
step: 1400, loss: 1.424047, accuracy: 0.625000
step: 1500, loss: 1.398649, accuracy: 0.437500
step: 1600, loss: 1.353969, accuracy: 0.437500
step: 1700, loss: 0.832241, accuracy: 0.750000
step: 1800, loss: 0.761235, accuracy: 0.812500
step: 1900, loss: 0.929557, accuracy: 0.687500
step: 2000, loss: 0.727454, accuracy: 0.875000
