2.1 Preparing MNIST math dataset

In [20]:
import tensorflow_datasets as tfds
import tensorflow as tf

(train_ds, test_ds) , ds_info = tfds . load ('mnist', split =['train', 'test'], as_supervised = True , with_info = True )

2.2 Two MNIST math datasets

In [25]:
def prepare_data(dataset):

    '''
    :param dataset: the dataset to be prepared for input into the network
    :return: dataset run through the prep steps below
    '''

    # first step
    # convert data from uint8 to float32
    dataset = dataset.map(lambda x, t: (tf.cast(x, tf.float32), t))
    # flatten the images into vectors
    dataset = dataset.map(lambda x, t: (tf.reshape(x, (-1,)), t))
    # input normalization, just bringing image values from range [0, 255] to [-1, 1]
    dataset = dataset.map(lambda x, t: ((x / 128.) - 1., t))

    # second step (check out the flippedclassroom04 in the course github, we did sth similiar last week)
    # we want to have two mnist images in each example
    # this leads to a single example being ((x1,y1),(x2,y2))
    zipped_ds = tf.data.Dataset.zip((dataset.shuffle(2000), 
                                     dataset.shuffle(2000)))

    # subtask (1)
    # map ((x1,y1),(x2,y2)) to (x1,x2, y1+y2>=5*) *boolean
    zipped_ds1 = zipped_ds.map(lambda x1, x2: (x1[0], x2[0], x1[1] + x2[1] >= 5))
    # transform boolean target to int
    zipped_ds1 = zipped_ds1.map(lambda x1, x2, t: (x1, x2, tf.cast(t, tf.int32)))

    # subtask (2)
    # map ((x1,y1),(x2,y2)) to (x1,x2, y1-y2=t*) *t=target
    zipped_ds2 = zipped_ds.map(lambda x1, x2: (x1[0], x2[0], x1[1] - x2[1]))

    # third step
    # batch, prefetch
    zipped_ds1 = zipped_ds1.batch(2**6)
    zipped_ds1 = zipped_ds1.prefetch(tf.data.AUTOTUNE)
    
    return zipped_ds1

test_ds = prepare_data(test_ds)
train_ds = prepare_data(train_ds)

In [26]:
# check the contents of the dataset
for img1, img2, label in train_ds.take(1):
    print(img1.shape, img2.shape, label.shape)

(64, 784) (64, 784) (64,)
