# IANNwTF HW 4
## Group 10

The following contains our solution to the exercises in IANNwTF HW 04. A Jupyter notebook versus a module format was chosen this time for purposes of organization.

### Assigment 1: Reviews
We review the homeworks for Groups 15 and 32.

### Assignment 2: MNIST Math

### 2.1 Preparing the MNIST Math Dataset

In [497]:
# Needed Imports
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Dense
import tqdm
import datetime
import matplotlib.pyplot as plt
import numpy as np


In [498]:
# 2.1 Load Dataset
(train_ds, test_ds), ds_info = tfds.load ('mnist', split =['train', 'test'], as_supervised = True, with_info = True)

# Info on the dataset (refresher)
# print("ds_info: \n", ds_info)
# tfds.show_examples(train_ds, ds_info)

In [499]:
# 2.2 Data Pipeline
def prepare_data(dataset, batchsize):

    '''
    :param dataset: the dataset to be prepared for input into the network
    :return: 2 datasets, one each for each of the math problems defined (see below), created after the original database was preprocessed with the
    steps below
    '''

    # Step One - General Preprocessing

    # convert data from uint8 to float32
    dataset = dataset.map(lambda img, target: (tf.cast(img, tf.float32), target))

    # flatten the images into vectors
    dataset = dataset.map(lambda img, target: (tf.reshape(img, (-1,)), target))

    # input normalization, just bringing image values from range [0, 255] to [-1, 1]
    dataset = dataset.map(lambda img, target: ((img / 128.) - 1., target))

    # Step 2 - Pairing Data Tuples & Respective Parameterized Targets

    # create a dataset that contains 2000 samples from the overall dataset paired with 2000 other samples
    data = tf.data.Dataset.zip((dataset.shuffle(2000), dataset.shuffle(2000)))

    # create the dataset for the first math problem (a + b >= 5) - remembering to cast to int versus boolean!
    greateqfive = data.map(lambda x1, x2: (x1[0], x2[0], x1[1]+x2[1]>=5))
    greateqfive = greateqfive.map(lambda x1, x2, t: (x1, x2, tf.cast(t, tf.int32)))

    # create the dataset for the second math problem (a - b = y)
    subtr = data.map(lambda x1, x2: (x1[0], x2[0], x1[1]-x2[1]))

    # Step 3 - Batching & Prefetching

    # run batching and prefetching for both datasets
    greateqfive = greateqfive.batch(batchsize)
    greateqfive = greateqfive.prefetch(tf.data.AUTOTUNE)
    subtr = subtr.batch(batchsize)
    subtr = subtr.prefetch(tf.data.AUTOTUNE)

    # return BOTH datasets
    return greateqfive, subtr


In [500]:
# Check data pipeline by examining one example from each of the four created datasets (one for each math problem for train and test)

train_ds_gef, train_ds_subtr = prepare_data(train_ds, batchsize = 32)
test_ds_gef, test_ds_subtr = prepare_data(test_ds, batchsize = 32)

for img1, img2, label in train_ds_gef.take(1):
    print(img1.shape, img2.shape, label.shape)

for img1, img2, label in train_ds_subtr.take(1):
    print(img1.shape, img2.shape, label.shape)

for img1, img2, label in test_ds_gef.take(1):
    print(img1.shape, img2.shape, label.shape)

for img1, img2, label in test_ds_subtr.take(1):
    print(img1.shape, img2.shape, label.shape)


(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)


### Assignment 3: Building Shared Weight Models

In [501]:
# a suggestion:
class MyModel(tf.keras.Model):
    def __init__(self, subtask, optimizer): # numlayers, subtask):

        '''
        param: numlayers - the desired number of hidden layers
        param: subtask - the subtask the network is being asked to solve (relevant for output layer)
        '''
        super(MyModel, self).__init__()

        self.optimizer = optimizer # tf.keras.optimizers.Adam()
        self.subtask = subtask
        # self.numlayers = numlayers
        # self.layer_list = []

        # self.flatten = tf.keras.layers.Flatten()
        
        # create 2 hidden layers with 256 units and ReLU as the activation function
        self.hidden_layer_1 = tf.keras.layers.Dense(units=256, activation=tf.nn.relu)
        self.hidden_layer_2 = tf.keras.layers.Dense(units=256, activation=tf.nn.relu)

        # add desired number of hidden layers
        # for i in range(numlayers):
        #     self.layer_list.append(tf.keras.layers.Dense(units=256, activation=tf.nn.relu))
        
        if subtask == 0:
            self.output_layer = tf.keras.layers.Dense(units=1, activation=tf.nn.sigmoid)
            self.loss_function = tf.keras.losses.BinaryCrossentropy()
        elif subtask == 1:
            self.output_layer = tf.keras.layers.Dense(units=1, activation=tf.nn.softmax) # not 10 units, since the label.shape is (32,) not (32,10)
            self.loss_function = tf.keras.losses.MeanSquaredError()
        
        self.metrics_list = [
                    tf.keras.metrics.Mean(name="loss"),
                    tf.keras.metrics.BinaryAccuracy(name="acc"), # only for subtask 0, not for subtask 1
                    # tf.keras.metrics.TopKCategoricalAccuracy(3,name="top-3-acc") 
                    ]

    @tf.function
    def __call__(self, input: tuple, training = False):
        
        # # feed both inputs seperatedly into a layer, then concatenate the results before passing activity to the next layer
        # i1 = self.flatten(input[0])
        i1 = self.hidden_layer_1(input[0])
        i1 = self.hidden_layer_2(i1)

        # i2 = self.flatten(input[1])
        i2 = self.hidden_layer_1(input[1])
        i2 = self.hidden_layer_2(i2)

        i = tf.concat([i1, i2], axis=1)    

        # i1 = self.flatten(input[0])
        # i2 = self.flatten(input[1])
        
        # feed the activity through the network UP TO the output layer
        # for numlayers in self.layer_list:
        #     if numlayers == 0:
        #         i1 = self.hidden_layer_1(i1)
        #         i2 = self.hidden_layer_1(i2)
        #         i = tf.concat([i1, i2], axis=1)   # e.g. axis=1: (32,784) + (32,784) -> (32, 1568)
        #     else:
        #         i = self.layer_list[numlayers](i)

        out = self.output_layer(i)

        return out

    @property
    def metrics(self):
        return self.metrics_list

    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()

    @tf.function
    def train_step(self, input):
        img1, img2, label = input
        print("train begins")

        with tf.GradientTape() as tape:
            prediction = self((img1, img2), training=True)
            loss = self.loss_function(label, prediction)

        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        # update loss metric
        self.metrics[0].update_state(loss)
        
        # for all metrics except loss, update states (accuracy etc.)
        for metric in self.metrics[1:]:
            metric.update_state(label, prediction) # + tf.reduce_sum(self.losses)

        # Return a dictionary mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

    @tf.function
    def test_step(self, input):

        img1, img2, label = input

        prediction = self((img1, img2), training=False)
        loss = self.loss_function(label, prediction) # + tf.reduce_sum(self.losses)

        # update loss metric
        self.metrics[0].update_state(loss)

        # for accuracy metrics:
        for metric in self.metrics[1:]:
            metric.update_state(label, prediction)

        # Return a dictionary mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}


### Assignment 4: Training the Networks

In [502]:
# Initiate the logs and metrics
config_name= "config_name"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_path = f"logs/{config_name}/{current_time}/train"
val_log_path = f"logs/{config_name}/{current_time}/val"

# log writer for training metrics
train_summary_writer = tf.summary.create_file_writer(train_log_path)

# log writer for validation metrics
val_summary_writer = tf.summary.create_file_writer(val_log_path)

# Initiate epochs and learning rate as global variables
epochs = 2
learning_rate = 0.01

In [503]:
def training(subtask: int, optimizer): # numlayers: int, ):#, optimizer):
    '''
    :param subtask: defines the subtask to be solved, 0 is a + b >= 5, 1 is a - b = y
    :param optimizer: the optimizer function to use
    :return:
    '''

    # make the if statement

    # Initiate a model with the requested parameters
    network = MyModel(subtask, optimizer) # numlayers, subtask)

    # Initialize the datasets for the two problems

    # Note - ignore the fact that train_ds and test_ds may be flagged as not defined; when the whole program is run, this should not be an issue
    # train_ds_gef, train_ds_subtr = prepare_data(train_ds, batchsize = 32)
    # test_ds_gef, test_ds_subtr = prepare_data(test_ds, batchsize = 32)

    # train = train_ds_gef
    # test = test_ds_gef

    # Initialize the train and test datasets, and the loss function, based on the subtask
    if subtask == 0:
        train = train_ds_gef
        test = test_ds_gef

    else:
        train = train_ds_subtr
        test = test_ds_subtr

    # Train the model
    for epoch in range(epochs):
        print(f"Starting Epoch {epoch}")

        for data in tqdm.tqdm(train, position=0, leave=True):
            metrics = network.train_step(data)

            with train_summary_writer.as_default():
                for metric in network.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(),step=epoch)


        # print the end acc and loss
        print([f"{key}: {value.numpy()}" for (key,value) in metrics.items()])

        # reset metrics for next round
        network.reset_metrics()

        # Testing
        for data in test:
            metrics = network.test_step(data)

            # log the accs and losses
            with val_summary_writer.as_default():
                for metric in network.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step = epoch)

        # print the end acc and loss
        print([f"val_{key}: {value.numpy()}" for (key,value) in metrics.items()])

        # reset all metrics
        network.reset_metrics()


In [504]:
# Train a model to solve the first math problem
training(0, tf.keras.optimizers.Adam())
training(0, tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.0))
training(0, tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.5))
training(0, tf.keras.optimizers.RMSprop())
training(0, tf.keras.optimizers.Adagrad())

Starting Epoch 0


  0%|          | 0/1875 [00:00<?, ?it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)
train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


100%|██████████| 1875/1875 [00:18<00:00, 102.80it/s]


['loss: 0.17579670250415802', 'acc: 0.9319666624069214']
['val_loss: 0.13314342498779297', 'val_acc: 0.9517999887466431']
Starting Epoch 1


100%|██████████| 1875/1875 [00:12<00:00, 155.83it/s]


['loss: 0.12372072786092758', 'acc: 0.95496666431427']
['val_loss: 0.13623151183128357', 'val_acc: 0.9467999935150146']
Starting Epoch 0


  0%|          | 0/1875 [00:00<?, ?it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


  0%|          | 1/1875 [00:00<15:53,  1.96it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


100%|██████████| 1875/1875 [00:13<00:00, 134.91it/s]


['loss: 0.24911729991436005', 'acc: 0.8996666669845581']
['val_loss: 0.19633346796035767', 'val_acc: 0.9247999787330627']
Starting Epoch 1


100%|██████████| 1875/1875 [00:11<00:00, 158.65it/s]


['loss: 0.18332628905773163', 'acc: 0.9274166822433472']
['val_loss: 0.1577572077512741', 'val_acc: 0.9419999718666077']
Starting Epoch 0


  0%|          | 0/1875 [00:00<?, ?it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


  0%|          | 1/1875 [00:00<15:46,  1.98it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


100%|██████████| 1875/1875 [00:13<00:00, 143.50it/s]


['loss: 0.2229168713092804', 'acc: 0.9102333188056946']
['val_loss: 0.1666749268770218', 'val_acc: 0.9337000250816345']
Starting Epoch 1


100%|██████████| 1875/1875 [00:12<00:00, 155.85it/s]


['loss: 0.15060395002365112', 'acc: 0.94118332862854']
['val_loss: 0.13546736538410187', 'val_acc: 0.9470000267028809']
Starting Epoch 0


  0%|          | 0/1875 [00:00<?, ?it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)
train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


100%|██████████| 1875/1875 [00:13<00:00, 136.48it/s]


['loss: 0.18835093080997467', 'acc: 0.9263499975204468']
['val_loss: 0.21172402799129486', 'val_acc: 0.9398999810218811']
Starting Epoch 1


100%|██████████| 1875/1875 [00:13<00:00, 140.97it/s]


['loss: 0.1318303346633911', 'acc: 0.9547333121299744']
['val_loss: 0.12684421241283417', 'val_acc: 0.9545000195503235']
Starting Epoch 0


  0%|          | 0/1875 [00:00<?, ?it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


  0%|          | 1/1875 [00:00<15:56,  1.96it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


100%|██████████| 1875/1875 [00:12<00:00, 149.20it/s]


['loss: 0.300747811794281', 'acc: 0.8759499788284302']
['val_loss: 0.24697284400463104', 'val_acc: 0.8970999717712402']
Starting Epoch 1


100%|██████████| 1875/1875 [00:12<00:00, 149.65it/s]


['loss: 0.24272911250591278', 'acc: 0.9045000076293945']
['val_loss: 0.22105538845062256', 'val_acc: 0.9161999821662903']


In [505]:
# Train a model to solve the second math problem
training(1, tf.keras.optimizers.Adam())
training(1, tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.0))
training(1, tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.5))
training(1, tf.keras.optimizers.RMSprop())training(1, tf.keras.optimizers.Adagrad())

Starting Epoch 0


  0%|          | 0/1875 [00:00<?, ?it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)
train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


100%|██████████| 1875/1875 [00:13<00:00, 143.82it/s]


['loss: 17.715633392333984', 'acc: 0.09103333204984665']
['val_loss: 17.74460792541504', 'val_acc: 0.08479999750852585']
Starting Epoch 1


100%|██████████| 1875/1875 [00:20<00:00, 90.40it/s] 


['loss: 17.714799880981445', 'acc: 0.090549997985363']
['val_loss: 17.6551513671875', 'val_acc: 0.08730000257492065']
Starting Epoch 0


  0%|          | 0/1875 [00:00<?, ?it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)
train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


100%|██████████| 1875/1875 [00:21<00:00, 85.30it/s] 


['loss: 17.745433807373047', 'acc: 0.09003333002328873']
['val_loss: 17.745107650756836', 'val_acc: 0.0925000011920929']
Starting Epoch 1


100%|██████████| 1875/1875 [00:20<00:00, 90.48it/s] 


['loss: 17.7009334564209', 'acc: 0.08906666934490204']
['val_loss: 17.787939071655273', 'val_acc: 0.09139999747276306']
Starting Epoch 0


  0%|          | 0/1875 [00:00<?, ?it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)
train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


100%|██████████| 1875/1875 [00:20<00:00, 91.60it/s] 


['loss: 17.610933303833008', 'acc: 0.09061666578054428']
['val_loss: 17.755290985107422', 'val_acc: 0.09109999984502792']
Starting Epoch 1


100%|██████████| 1875/1875 [00:18<00:00, 100.29it/s]


['loss: 17.779666900634766', 'acc: 0.09123333543539047']
['val_loss: 17.68061065673828', 'val_acc: 0.08900000154972076']
Starting Epoch 0


  0%|          | 0/1875 [00:00<?, ?it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)
train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


100%|██████████| 1875/1875 [00:21<00:00, 87.52it/s] 


['loss: 17.64973258972168', 'acc: 0.09128333628177643']
['val_loss: 17.549320220947266', 'val_acc: 0.09480000287294388']
Starting Epoch 1


100%|██████████| 1875/1875 [00:21<00:00, 88.38it/s] 


['loss: 17.678966522216797', 'acc: 0.08951666951179504']
['val_loss: 17.660642623901367', 'val_acc: 0.0860000029206276']
Starting Epoch 0


  0%|          | 0/1875 [00:00<?, ?it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)
train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


100%|██████████| 1875/1875 [00:25<00:00, 72.84it/s]


['loss: 17.691165924072266', 'acc: 0.08833333104848862']
['val_loss: 17.84664535522461', 'val_acc: 0.08950000256299973']
Starting Epoch 1


100%|██████████| 1875/1875 [00:22<00:00, 83.54it/s]


['loss: 17.691967010498047', 'acc: 0.08953333646059036']
['val_loss: 17.899660110473633', 'val_acc: 0.08940000087022781']


### the loss is too high and the acc is too low for the 2nd math prob, need to figure out why & visualization is not done yet

### Assignment 5 - Experiments

Run training w/ classic SGD (no momentum)

Run training w/ Adam

Run training w/ SGD + Momentum

Run training w/ RMSrop

Run training w/ AdaGrad

In [506]:
# Visualize the results of the above training runs

# NEED TO BE WORKED ON
fig, axs = plt.subplots(5) 
fig.suptitle('Vertically stacked subplots')
x = np.linspace(0, epoch)
y = np.sin(x ** 2)
ax1.plot(x, y)
ax2.plot(x, -y)

line1, = plt.plot(train_losses)
line2, = plt.plot(test_losses)
line3, = plt.plot(train_accuracies)
line4, = plt.plot(test_accuracies)
plt.xlabel("Epochs")
plt.ylabel("Loss/Accuracy")
plt.legend((line1, line2, line3, line4),("Training Loss", "Test Loss", "Training Accuracy", "Test Accuracy"))
fig.savefig("Title-Of-The-Figure")
plt.show()


NameError: name 'plt' is not defined