## Set-Up

In [3]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [4]:
def mod_5(in_list):  #Input is a binary array [a, b, c ,d , e] encoding a number e + 2 d + 4 c + ...
                # Output is the encoded number mod 5 (as an int).
  return int("".join(str(x) for x in in_list[::-1]), 2) % 5

def div_5(in_list): 
  return int( bool( mod_5( in_list ) ) )


In [5]:
target_func = div_5
input_size = 20

In [6]:
def get_batch(batch_size):
  X = np.random.randint(0, 2, (batch_size, input_size), dtype = np.uint8) % 2
  y = np.apply_along_axis(mod_5, 1, X) 
  return (X[..., np.newaxis].astype(np.uint32), y)

#This time, output all the partial results:
def get_batch_out_seq(batch_size):

  X = np.random.randint(0, 2, (batch_size, input_size), dtype = np.uint8) % 2
  y = np.empty((batch_size, input_size), dtype = "uint64",)
  for i in range(input_size):
    y[:,i] = np.apply_along_axis(mod_5, 1, X[:, :i + 1])

  return (X[..., np.newaxis].astype(np.uint32), y)




## Network with GRU units

In [7]:
model = keras.models.Sequential([
  keras.layers.GRU(20, return_sequences=True, input_shape=[None, 1]),
  keras.layers.GRU(20, return_sequences=True),
  keras.layers.Dense(5, activation = "softmax")
])

def last_time_step_scc(Y_true, Y_pred):
  m = tf.keras.metrics.SparseCategoricalAccuracy()
  m.update_state(Y_true[:, -1], Y_pred[:, -1])
  return m.result().numpy()


  return keras.metrics.mean_squared_error(Y_true[:, -1], Y_pred[:, -1])

model.compile(loss = "sparse_categorical_crossentropy", optimizer = tf.keras.optimizers.Adam(), metrics = [last_time_step_scc])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, None, 20)          1380      
                                                                 
 gru_1 (GRU)                 (None, None, 20)          2520      
                                                                 
 dense (Dense)               (None, None, 5)           105       
                                                                 
Total params: 4,005
Trainable params: 4,005
Non-trainable params: 0
_________________________________________________________________


## Generate data and fit

In [8]:
X_train, y_train = get_batch_out_seq(70000)
X_valid, y_valid = get_batch_out_seq(10000)
X_test, y_test = get_batch_out_seq(2000)


In [9]:
model.compile(loss = "sparse_categorical_crossentropy", optimizer = tf.keras.optimizers.Adam(), metrics = ["accuracy"])

model.fit(X_train, y_train, epochs = 20, validation_data = (X_valid, y_valid),
          callbacks = [tf.keras.callbacks.EarlyStopping(patience=2)])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20


<keras.callbacks.History at 0x7f24dc51ed50>

Works quite nicely! Let's try different network architectures...

## Different architectures

In [10]:
# Only one layer
model_2 = keras.models.Sequential([
  keras.layers.GRU(20, return_sequences=True, input_shape = [None, 1]),
  keras.layers.Dense(5, activation = "softmax")
])

model_2.compile(loss = "sparse_categorical_crossentropy", optimizer = tf.keras.optimizers.Adam(), metrics = ["accuracy"])

model_2.fit(X_train, y_train, epochs = 20, validation_data = (X_valid, y_valid), # 0.999 accuracy after 20 epochs
            callbacks = [tf.keras.callbacks.EarlyStopping(patience=2)]) 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f24dc37b210>

In [12]:
# Only one layer, less cells
model_3 = keras.models.Sequential([
  keras.layers.GRU(10, return_sequences=True, input_shape = [None, 1]),
  keras.layers.Dense(5, activation = "softmax")
])

model_3.compile(loss = "sparse_categorical_crossentropy", optimizer = tf.keras.optimizers.Adam(), metrics = ["accuracy"])

model_3.fit(X_train, y_train, epochs = 20, validation_data = (X_valid, y_valid), # 0.64 accuracy after 20 epochs
            callbacks = [tf.keras.callbacks.EarlyStopping(patience=2)]) 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f24dbd92a10>

In [None]:
model_3.fit(X_train, y_train, epochs = 20, validation_data = (X_valid, y_valid), 
            callbacks = [tf.keras.callbacks.EarlyStopping(patience=2)]) #No improvement anymore, add training data
X_train, y_train = get_batch_out_seq(70000)

model_3.fit(X_train, y_train, epochs = 20, validation_data = (X_valid, y_valid), 
            callbacks = [tf.keras.callbacks.EarlyStopping(patience=2)]) 



In [17]:
X_train, y_train = get_batch_out_seq(70000)
model_3.compile(loss = "sparse_categorical_crossentropy", optimizer = tf.keras.optimizers.Adam(learning_rate = 0.003), metrics = ["accuracy"])

model_3.fit(X_train, y_train, epochs = 20, validation_data = (X_valid, y_valid), 
            callbacks = [tf.keras.callbacks.EarlyStopping(patience=2)]) 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


<keras.callbacks.History at 0x7f24d78e5310>