In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np

(ds_train, ds_test), ds_info = tfds.load('mnist', 
                                         split=['train', 'test'],
                                         as_supervised=True,
                                         with_info=True)

2022-11-29 22:40:56.750370: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-29 22:41:00.075345: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-29 22:41:00.076445: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [2]:

def pipeline(ds, expression, batch_size=32):
  ds = ds.map(lambda img, label: (tf.cast(img, float), label))
  ds = ds.map(lambda img, label: (tf.reshape(img, [-1]), label))
  ds = ds.map(lambda img, label: (img / 128.0 - 1.0, label))
  
  zip_ds = tf.data.Dataset.zip((ds.shuffle(2000), ds.shuffle(2000)))
  zip_ds = zip_ds.map(lambda x1, x2: (x1[0], x2[0], expression(x1[1], x2[1])))
  zip_ds = zip_ds.cache()
  zip_ds = zip_ds.batch(batch_size)
  zip_ds = zip_ds.prefetch(tf.data.AUTOTUNE)
  return zip_ds


In [7]:
class MNISTMath(tf.keras.Model):
  def __init__(self, layer_list, optimizer, loss_fn, out_layer):
    super().__init__()
    
    self.metrics_list = [tf.keras.metrics.CategoricalAccuracy(name='categorical_accuracy'),
                         tf.keras.metrics.BinaryAccuracy(name='binary_accuracy'),
                         tf.keras.metrics.Mean(name='loss')]
    
    self.optimizer = optimizer
    self.loss_fn = loss_fn
    
    self.layer_list = [tf.keras.layers.Dense(layer_size, activation='relu') for layer_size in layer_list]
    #self.midlayer = tf.keras.layers.Dense(256, activation='relu')
    self.out_layer = out_layer
    
  def call(self, x, training=False):
    img1, img2 = x
    
    for layer in self.layer_list:
      img1 = layer(img1)
      img2 = layer(img2)
      
    x = tf.concat((img1, img2), axis=1)
    #x = self.midlayer(x)
            
    return self.out_layer(x)
  
  def reset_metrics(self):
    for metric in self.metrics:
      metric.reset_states()
      
  #@tf.function
  def train_step(self, data):
    img1, img2, label = data
    
    with tf.GradientTape() as tape:
      output = self((img1, img2), training=True)
      loss = self.loss_fn(label, output)
    
    grads = tape.gradient(loss, self.trainable_variables)
    self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
    
    self.metrics[0].update_state(label, output)
    self.metrics[1].update_state(label, output)
    self.metrics[2].update_state(loss)
    
    return {m.name: m.result() for m in self.metrics}
  
  @tf.function
  def test_step(self, data):
    img1, img2, label = data
    
    output = self((img1, img2), training=False)
    loss = self.loss_fn(label, output)
    
    self.metrics[0].update_state(label, output)
    self.metrics[1].update_state(label, output)
    self.metrics[2].update_state(loss)
    
    return {m.name: m.result() for m in self.metrics}

In [8]:
import tqdm

def training_loop(model, ds_train, ds_test, epochs):
  train_metrics = {m.name: [] for m in model.metrics}
  test_metrics = {m.name: [] for m in model.metrics}
  
  for epoch in range(epochs):
    print(f'Epoch {epoch + 1}')
    for data in tqdm.tqdm(ds_train, position=0, leave=True):
      metrics = model.train_step(data)
    
    print([f'train_{k}: {v.numpy():.4f}' for k, v in metrics.items()])
    for key, value in metrics.items():
      train_metrics[key].append(value.numpy())
    model.reset_metrics()

    for data in ds_test:
      metrics = model.test_step(data)
      
    print([f'test_{k}: {v.numpy():.4f}' for k, v in metrics.items()])
    for key, value in metrics.items():
      test_metrics[key].append(value.numpy())
    model.reset_metrics()
    
  return train_metrics, test_metrics

### Preparing data for subtask 1

In [15]:
subtask_1 = lambda x1, x2: tf.cast(x1 + x2 >= 5, tf.int32)
ds_train_1 = pipeline(ds_train, subtask_1)
ds_test_1 = pipeline(ds_test, subtask_1)

#### Subtask 1 using Adam optimizer

In [16]:
out_layer = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
loss_fn   = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()

model_1_adam = MNISTMath([128, 128], optimizer, loss_fn, out_layer)

train_metrics_1_adam, test_metrics_1_adam = training_loop(model_1_adam, ds_train_1, ds_test_1, 5)

Epoch 1


100%|██████████| 1875/1875 [01:23<00:00, 22.36it/s]


['train_categorical_accuracy: 0.0389', 'train_binary_accuracy: 0.9268', 'train_loss: 0.1846']
['test_categorical_accuracy: 0.0543', 'test_binary_accuracy: 0.9468', 'test_loss: 0.1368']
Epoch 2


100%|██████████| 1875/1875 [01:40<00:00, 18.57it/s]


['train_categorical_accuracy: 0.0949', 'train_binary_accuracy: 0.9528', 'train_loss: 0.1238']
['test_categorical_accuracy: 0.2364', 'test_binary_accuracy: 0.9584', 'test_loss: 0.1226']
Epoch 3


100%|██████████| 1875/1875 [01:33<00:00, 19.96it/s]


['train_categorical_accuracy: 0.1723', 'train_binary_accuracy: 0.9610', 'train_loss: 0.1063']
['test_categorical_accuracy: 0.2780', 'test_binary_accuracy: 0.9607', 'test_loss: 0.1168']
Epoch 4


100%|██████████| 1875/1875 [01:18<00:00, 23.99it/s]


['train_categorical_accuracy: 0.2485', 'train_binary_accuracy: 0.9665', 'train_loss: 0.0962']
['test_categorical_accuracy: 0.3482', 'test_binary_accuracy: 0.9693', 'test_loss: 0.1097']
Epoch 5


100%|██████████| 1875/1875 [01:30<00:00, 20.69it/s]


['train_categorical_accuracy: 0.3067', 'train_binary_accuracy: 0.9698', 'train_loss: 0.0888']
['test_categorical_accuracy: 0.3706', 'test_binary_accuracy: 0.9652', 'test_loss: 0.1162']


#### Subtask 1 using SGD optimizer

In [17]:
out_layer = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
loss_fn   = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()

model_1_sgd = MNISTMath([128, 128], optimizer, loss_fn, out_layer)

train_metrics_1_sgd, test_metrics_1_sgd = training_loop(model_1_sgd, ds_train_1, ds_test_1, 5)

Epoch 1


100%|██████████| 1875/1875 [01:05<00:00, 28.67it/s]


['train_categorical_accuracy: 0.0411', 'train_binary_accuracy: 0.9265', 'train_loss: 0.1857']
['test_categorical_accuracy: 0.0575', 'test_binary_accuracy: 0.9514', 'test_loss: 0.1376']
Epoch 2


100%|██████████| 1875/1875 [01:09<00:00, 26.83it/s]


['train_categorical_accuracy: 0.1003', 'train_binary_accuracy: 0.9538', 'train_loss: 0.1243']
['test_categorical_accuracy: 0.1789', 'test_binary_accuracy: 0.9596', 'test_loss: 0.1206']
Epoch 3


100%|██████████| 1875/1875 [01:15<00:00, 24.77it/s]


['train_categorical_accuracy: 0.1765', 'train_binary_accuracy: 0.9624', 'train_loss: 0.1069']
['test_categorical_accuracy: 0.2780', 'test_binary_accuracy: 0.9660', 'test_loss: 0.1123']
Epoch 4


100%|██████████| 1875/1875 [01:17<00:00, 24.28it/s]


['train_categorical_accuracy: 0.2368', 'train_binary_accuracy: 0.9668', 'train_loss: 0.0978']
['test_categorical_accuracy: 0.3035', 'test_binary_accuracy: 0.9653', 'test_loss: 0.1140']
Epoch 5


100%|██████████| 1875/1875 [01:13<00:00, 25.50it/s]


['train_categorical_accuracy: 0.2795', 'train_binary_accuracy: 0.9705', 'train_loss: 0.0902']
['test_categorical_accuracy: 0.3514', 'test_binary_accuracy: 0.9674', 'test_loss: 0.1081']


### Preparing the data for subtask 2

In [None]:
subtask_2 = lambda x1, x2: tf.one_hot(x1 - x2, depth=19)
ds_train_2 = pipeline(ds_train, subtask_2)
ds_test_2 = pipeline(ds_test, subtask_2)

#### Subtask 2 using Adam optimizer

In [11]:
# seed for reproducible results
tf.keras.utils.set_random_seed(133742069)

out_layer = tf.keras.layers.Dense(19, activation=tf.nn.softmax)
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.CategoricalCrossentropy()

model_2_adam = MNISTMath([128, 128, 128], optimizer, loss_fn, out_layer)

test_metrics_2_adam, train_metrics_2_adam = training_loop(model_2_adam, ds_train_2, ds_test_2, 10)

Epoch 1


100%|██████████| 1875/1875 [01:16<00:00, 24.39it/s]


['train_categorical_accuracy: 0.5361', 'train_binary_accuracy: 0.9586', 'train_loss: 0.9398']
['test_categorical_accuracy: 0.6559', 'test_binary_accuracy: 0.9561', 'test_loss: 0.7735']
Epoch 2


100%|██████████| 1875/1875 [01:23<00:00, 22.37it/s]


['train_categorical_accuracy: 0.6701', 'train_binary_accuracy: 0.9548', 'train_loss: 0.7572']
['test_categorical_accuracy: 0.7114', 'test_binary_accuracy: 0.9545', 'test_loss: 0.7036']
Epoch 3


100%|██████████| 1875/1875 [01:25<00:00, 21.90it/s]


['train_categorical_accuracy: 0.7185', 'train_binary_accuracy: 0.9553', 'train_loss: 0.6936']
['test_categorical_accuracy: 0.7352', 'test_binary_accuracy: 0.9556', 'test_loss: 0.6970']
Epoch 4


100%|██████████| 1875/1875 [01:23<00:00, 22.37it/s]


['train_categorical_accuracy: 0.7488', 'train_binary_accuracy: 0.9561', 'train_loss: 0.6464']
['test_categorical_accuracy: 0.7553', 'test_binary_accuracy: 0.9563', 'test_loss: 0.6588']
Epoch 5


100%|██████████| 1875/1875 [01:25<00:00, 22.03it/s]


['train_categorical_accuracy: 0.7698', 'train_binary_accuracy: 0.9569', 'train_loss: 0.6124']
['test_categorical_accuracy: 0.7662', 'test_binary_accuracy: 0.9570', 'test_loss: 0.6581']
Epoch 6


100%|██████████| 1875/1875 [01:24<00:00, 22.07it/s]


['train_categorical_accuracy: 0.7822', 'train_binary_accuracy: 0.9575', 'train_loss: 0.5902']
['test_categorical_accuracy: 0.7839', 'test_binary_accuracy: 0.9579', 'test_loss: 0.6442']
Epoch 7


100%|██████████| 1875/1875 [01:23<00:00, 22.38it/s]


['train_categorical_accuracy: 0.7950', 'train_binary_accuracy: 0.9580', 'train_loss: 0.5698']
['test_categorical_accuracy: 0.7913', 'test_binary_accuracy: 0.9577', 'test_loss: 0.6555']
Epoch 8


100%|██████████| 1875/1875 [01:23<00:00, 22.33it/s]


['train_categorical_accuracy: 0.8061', 'train_binary_accuracy: 0.9587', 'train_loss: 0.5475']
['test_categorical_accuracy: 0.8021', 'test_binary_accuracy: 0.9587', 'test_loss: 0.6501']
Epoch 9


100%|██████████| 1875/1875 [01:23<00:00, 22.53it/s]


['train_categorical_accuracy: 0.8151', 'train_binary_accuracy: 0.9593', 'train_loss: 0.5353']
['test_categorical_accuracy: 0.8079', 'test_binary_accuracy: 0.9593', 'test_loss: 0.6630']
Epoch 10


100%|██████████| 1875/1875 [01:28<00:00, 21.16it/s]


['train_categorical_accuracy: 0.8278', 'train_binary_accuracy: 0.9602', 'train_loss: 0.5115']
['test_categorical_accuracy: 0.8086', 'test_binary_accuracy: 0.9593', 'test_loss: 0.6572']


#### Subtask 2 using SGD optimizer

In [14]:
# seed for reproducible results
tf.keras.utils.set_random_seed(133742069)

out_layer = tf.keras.layers.Dense(19, activation=tf.nn.softmax)
optimizer = tf.keras.optimizers.SGD()
loss_fn = tf.keras.losses.CategoricalCrossentropy()

model_2_sgd = MNISTMath([128, 128, 128], optimizer, loss_fn, out_layer)

test_metrics_2_sgd, train_metrics_2_sgd = training_loop(model_2_sgd, ds_train_2, ds_test_2, 10)

Epoch 1


100%|██████████| 1875/1875 [01:00<00:00, 30.83it/s]


['train_categorical_accuracy: 0.4204', 'train_binary_accuracy: 0.9654', 'train_loss: 1.0966']
['test_categorical_accuracy: 0.5322', 'test_binary_accuracy: 0.9606', 'test_loss: 0.9838']
Epoch 2


100%|██████████| 1875/1875 [01:05<00:00, 28.53it/s]


['train_categorical_accuracy: 0.5240', 'train_binary_accuracy: 0.9586', 'train_loss: 0.9527']
['test_categorical_accuracy: 0.5920', 'test_binary_accuracy: 0.9568', 'test_loss: 0.8876']
Epoch 3


100%|██████████| 1875/1875 [01:06<00:00, 28.05it/s]


['train_categorical_accuracy: 0.5889', 'train_binary_accuracy: 0.9557', 'train_loss: 0.8666']
['test_categorical_accuracy: 0.6337', 'test_binary_accuracy: 0.9541', 'test_loss: 0.8176']
Epoch 4


100%|██████████| 1875/1875 [01:00<00:00, 30.76it/s]


['train_categorical_accuracy: 0.6296', 'train_binary_accuracy: 0.9542', 'train_loss: 0.8034']
['test_categorical_accuracy: 0.6642', 'test_binary_accuracy: 0.9533', 'test_loss: 0.7701']
Epoch 5


100%|██████████| 1875/1875 [01:07<00:00, 27.60it/s]


['train_categorical_accuracy: 0.6599', 'train_binary_accuracy: 0.9538', 'train_loss: 0.7567']
['test_categorical_accuracy: 0.6828', 'test_binary_accuracy: 0.9530', 'test_loss: 0.7380']
Epoch 6


100%|██████████| 1875/1875 [01:13<00:00, 25.59it/s]


['train_categorical_accuracy: 0.6826', 'train_binary_accuracy: 0.9537', 'train_loss: 0.7201']
['test_categorical_accuracy: 0.6964', 'test_binary_accuracy: 0.9528', 'test_loss: 0.7128']
Epoch 7


100%|██████████| 1875/1875 [01:13<00:00, 25.39it/s]


['train_categorical_accuracy: 0.7016', 'train_binary_accuracy: 0.9540', 'train_loss: 0.6905']
['test_categorical_accuracy: 0.7071', 'test_binary_accuracy: 0.9529', 'test_loss: 0.6952']
Epoch 8


100%|██████████| 1875/1875 [01:14<00:00, 25.22it/s]


['train_categorical_accuracy: 0.7160', 'train_binary_accuracy: 0.9543', 'train_loss: 0.6645']
['test_categorical_accuracy: 0.7191', 'test_binary_accuracy: 0.9529', 'test_loss: 0.6819']
Epoch 9


100%|██████████| 1875/1875 [01:16<00:00, 24.56it/s]


['train_categorical_accuracy: 0.7303', 'train_binary_accuracy: 0.9546', 'train_loss: 0.6420']
['test_categorical_accuracy: 0.7307', 'test_binary_accuracy: 0.9532', 'test_loss: 0.6685']
Epoch 10


100%|██████████| 1875/1875 [01:00<00:00, 30.93it/s]


['train_categorical_accuracy: 0.7424', 'train_binary_accuracy: 0.9549', 'train_loss: 0.6216']
['test_categorical_accuracy: 0.7371', 'test_binary_accuracy: 0.9532', 'test_loss: 0.6623']
