# IANNwTF HW 4
## Group 10

The following contains our solution to the exercises in IANNwTF HW 04. A Jupyter notebook versus a module format was chosen this time for purposes of organization.

### Assigment 1: Reviews
We review the homeworks for Groups 15 and 32.

### Assignment 2: MNIST Math

### 2.1 Preparing the MNIST Math Dataset

In [181]:
# Needed Imports
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Dense
import numpy as np
import tqdm
import datetime

In [182]:
# 2.1 Load Dataset
(train_ds, test_ds), ds_info = tfds.load ('mnist', split =['train', 'test'], as_supervised = True, with_info = True)

# Info on the dataset (refresher)
# print("ds_info: \n", ds_info)
# tfds.show_examples(train_ds, ds_info)

In [183]:
# 2.2 Data Pipeline
def prepare_data(dataset, batchsize):

    '''
    :param dataset: the dataset to be prepared for input into the network
    :return: 2 datasets, one each for each of the math problems defined (see below), created after the original database was preprocessed with the
    steps below
    '''

    # Step One - General Preprocessing

    # convert data from uint8 to float32
    dataset = dataset.map(lambda img, target: (tf.cast(img, tf.float32), target))

    # flatten the images into vectors
    dataset = dataset.map(lambda img, target: (tf.reshape(img, (-1,)), target))

    # input normalization, just bringing image values from range [0, 255] to [-1, 1]
    dataset = dataset.map(lambda img, target: ((img / 128.) - 1., target))

    # Step 2 - Pairing Data Tuples & Respective Parameterized Targets

    # create a dataset that contains 2000 samples from the overall dataset paired with 2000 other samples
    data = tf.data.Dataset.zip((dataset.shuffle(2000), dataset.shuffle(2000)))

    # create the dataset for the first math problem (a + b >= 5) - remembering to cast to int versus boolean!
    greateqfive = data.map(lambda x1, x2: (x1[0], x2[0], x1[1]+x2[1]>=5))
    greateqfive = greateqfive.map(lambda x1, x2, t: (x1, x2, tf.cast(t, tf.int32)))

    # create the dataset for the second math problem (a - b = y)
    subtr = data.map(lambda x1, x2: (x1[0], x2[0], x1[1]-x2[1]))

    # Step 3 - Batching & Prefetching

    # run batching and prefetching for both datasets
    greateqfive = greateqfive.batch(batchsize)
    greateqfive = greateqfive.prefetch(tf.data.AUTOTUNE)
    subtr = subtr.batch(batchsize)
    subtr = subtr.prefetch(tf.data.AUTOTUNE)

    # return BOTH datasets
    return greateqfive, subtr


In [184]:
# Check data pipeline by examining one example from each of the four created datasets (one for each math problem for train and test)

train_ds_gef, train_ds_subtr = prepare_data(train_ds, batchsize = 32)
test_ds_gef, test_ds_subtr = prepare_data(test_ds, batchsize = 32)

for img1, img2, label in train_ds_gef.take(1):
    print(img1.shape, img2.shape, label.shape)

for img1, img2, label in train_ds_subtr.take(1):
    print(img1.shape, img2.shape, label.shape)

for img1, img2, label in test_ds_gef.take(1):
    print(img1.shape, img2.shape, label.shape)

for img1, img2, label in test_ds_subtr.take(1):
    print(img1.shape, img2.shape, label.shape)


(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)


### Assignment 3: Building Shared Weight Models

In [185]:
# a suggestion:
class MyModel(tf.keras.Model):
    def __init__(self): #, subtask): # numlayers, subtask):

        '''
        param: numlayers - the desired number of hidden layers
        param: subtask - the subtask the network is being asked to solve (relevant for output layer)
        '''
    	
        self.optimizer = tf.keras.optimizers.Adam()

        super(MyModel, self).__init__()

        # create 2 hidden layers with 256 units and ReLU as the activation function
        self.hidden_layer_1 = Dense(units=256, activation=tf.nn.relu)
        self.hidden_layer_2 = Dense(units=256, activation=tf.nn.relu)
        
        self.output_layer = Dense(units=1, activation=tf.nn.sigmoid)
        # add desired number of hidden layers
        # for i in range(numlayers):
        #     self.layers.append(Dense(units=256, activation=tf.nn.relu))

        # self.subtask = subtask

        self.loss_function = tf.keras.losses.BinaryCrossentropy()

        # if subtask == 0:
        #     self.loss_function = tf.keras.losses.BinaryCrossentropy()
        # else:
        #     self.loss_function = tf.keras.losses.MeanSquaredError()

        self.metrics_list = [
                    tf.keras.metrics.Mean(name="loss"),
                    tf.keras.metrics.BinaryAccuracy(name="acc"), # only for subtask 0, not for subtask 1
                    # tf.keras.metrics.TopKCategoricalAccuracy(3,name="top-3-acc") 
                    ]

    @tf.function
    def __call__(self, input: tuple, training = False):
        
        # feed both inputs seperatedly into a layer, then concatenate the results before passing activity to the next layer

        i1 = self.hidden_layer_1(input[0])
        i2 = self.hidden_layer_1(input[1])
        
        i1 = self.hidden_layer_2(i1)
        i2 = self.hidden_layer_2(i2)

        i = tf.concat([i1, i2], axis=1)    # e.g. axis=1: (32,784) + (32,784) -> (32, 1568)
        # i = self.hidden_layer_2(i)

        signal = self.output_layer(i)

        # feed the activity through the network UP TO the output layer
        # for i in self.layers:
        #     if i == 0:
        #         i1 = self.layers[i](input[0])
        #         i2 = self.layers[i](input[1])
        #         signal = tf.concat([i1, i2], axis=0)
        #     else:
        #         signal = self.layers[i](signal)



        # create an output layer based on the
        # if self.subtask == 0:
        #     self.output_layer = Dense(units=2, activation=tf.nn.sigmoid)
        #     signal = self.output_layer(i)
        # elif self.subtask == 1:
        #     self.output_layer = Dense(units=10, activation=tf.nn.softmax)
        #     signal = self.output_layer(i)

        return signal

    @property
    def metrics(self):
        return self.metrics_list

    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()

    @tf.function
    def train_step(self, input):
        img1, img2, label = input
        print("train begins")

        with tf.GradientTape() as tape:
            print("prediction starts")
            prediction = self((img1, img2), training=True) ###????
            print("first prediction", prediction)
            loss = self.loss_function(label, prediction)

        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        # update loss metric
        self.metrics[0].update_state(loss)
        
        # for all metrics except loss, update states (accuracy etc.)
        for metric in self.metrics[1:]:
            metric.update_state(label, prediction) # + tf.reduce_sum(self.losses)

        # Return a dictionary mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

    @tf.function
    def test_step(self, input):

        img1, img2, label = input

        prediction = self((img1, img2), training=False)
        loss = self.loss_function(label, prediction) # + tf.reduce_sum(self.losses)

        # update loss metric
        self.metrics[0].update_state(loss)

        # for accuracy metrics:
        for metric in self.metrics[1:]:
            metric.update_state(label, prediction)

        # Return a dictionary mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}


### Assignment 4: Training the Networks

In [186]:
# Initiate the logs and metrics
config_name= "config_name"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_path = f"logs/{config_name}/{current_time}/train"
val_log_path = f"logs/{config_name}/{current_time}/val"

# log writer for training metrics
train_summary_writer = tf.summary.create_file_writer(train_log_path)

# log writer for validation metrics
val_summary_writer = tf.summary.create_file_writer(val_log_path)

# Initiate epochs and learning rate as global variables
epochs = 10
learning_rate = 0.01

In [187]:
def training():#subtask: int):#, optimizer):
    '''
    :param subtask: defines the subtask to be solved, 0 is a + b >= 5, 1 is a - b = y
    :param optimizer: the optimizer function to use
    :return:
    '''

    # make the if statement

    # Initiate a model with the requested parameters
    network = MyModel()#subtask)

    # Initialize the datasets for the two problems

    # Note - ignore the fact that train_ds and test_ds may be flagged as not defined; when the whole program is run, this should not be an issue
    # train_ds_gef, train_ds_subtr = prepare_data(train_ds, batchsize = 32)
    # test_ds_gef, test_ds_subtr = prepare_data(test_ds, batchsize = 32)

    train = train_ds_gef
    test = test_ds_gef

    # Initialize the train and test datasets, and the loss function, based on the subtask
    # if subtask == 0:
    #     train = train_ds_gef
    #     test = test_ds_gef
    #     # network.loss_function = tf.keras.losses.BinaryCrossentropy

    # else:
    #     train = train_ds_subtr
    #     test = test_ds_subtr
        # network.loss_function = tf.keras.losses.MeanSquaredError

    # Train the model
    for epoch in range(epochs):
        print(f"Starting Epoch {epoch}")

        for data in tqdm.tqdm(train, position=0, leave=True):
            metrics = network.train_step(data)

            with train_summary_writer.as_default():
                for metric in network.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(),step=epoch)


        # print the end acc and loss
        print([f"{key}: {value.numpy()}" for (key,value) in metrics.items()])

        # reset metrics for next round
        network.reset_metrics()

        # Testing
        for data in test:
            metrics = network.test_step(data)

            # log the accs and losses
            with val_summary_writer.as_default():
                for metric in network.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step = epoch)

        # print the end acc and loss
        print([f"val_{key}: {value.numpy()}" for (key,value) in metrics.items()])

        # reset all metrics
        network.reset_metrics()


In [188]:
# Train a model to solve the first math problem
training()

Starting Epoch 0


  0%|          | 0/1875 [00:00<?, ?it/s]

train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)
train begins
prediction starts
first prediction Tensor("StatefulPartitionedCall:0", shape=(32, 1), dtype=float32)


100%|██████████| 1875/1875 [00:18<00:00, 102.29it/s]


['loss: 0.18152807652950287', 'acc: 0.9291166663169861']
['val_loss: 0.13546493649482727', 'val_acc: 0.9487000107765198']
Starting Epoch 1


100%|██████████| 1875/1875 [00:12<00:00, 149.38it/s]


['loss: 0.12244006246328354', 'acc: 0.9562000036239624']
['val_loss: 0.12382649630308151', 'val_acc: 0.9580000042915344']
Starting Epoch 2


100%|██████████| 1875/1875 [00:11<00:00, 159.21it/s]


['loss: 0.10534211248159409', 'acc: 0.9646499752998352']
['val_loss: 0.10426236689090729', 'val_acc: 0.9641000032424927']
Starting Epoch 3


100%|██████████| 1875/1875 [00:12<00:00, 154.46it/s]


['loss: 0.09758567810058594', 'acc: 0.9687666893005371']
['val_loss: 0.12065297365188599', 'val_acc: 0.9571999907493591']
Starting Epoch 4


100%|██████████| 1875/1875 [00:11<00:00, 156.65it/s]


['loss: 0.09390214830636978', 'acc: 0.9699333310127258']
['val_loss: 0.09814545512199402', 'val_acc: 0.9718000292778015']
Starting Epoch 5


100%|██████████| 1875/1875 [00:11<00:00, 159.03it/s]


['loss: 0.0889250785112381', 'acc: 0.9728500247001648']
['val_loss: 0.09208716452121735', 'val_acc: 0.9729999899864197']
Starting Epoch 6


100%|██████████| 1875/1875 [00:11<00:00, 159.10it/s]


['loss: 0.08458562940359116', 'acc: 0.9741666913032532']
['val_loss: 0.087535560131073', 'val_acc: 0.9735999703407288']
Starting Epoch 7


100%|██████████| 1875/1875 [00:12<00:00, 155.76it/s]


['loss: 0.08216635137796402', 'acc: 0.9748166799545288']
['val_loss: 0.09740924835205078', 'val_acc: 0.972100019454956']
Starting Epoch 8


100%|██████████| 1875/1875 [00:11<00:00, 159.85it/s]


['loss: 0.07898574322462082', 'acc: 0.9768999814987183']
['val_loss: 0.09125948697328568', 'val_acc: 0.9696999788284302']
Starting Epoch 9


100%|██████████| 1875/1875 [00:11<00:00, 158.61it/s]


['loss: 0.07719042152166367', 'acc: 0.9774166941642761']
['val_loss: 0.08818335086107254', 'val_acc: 0.9779999852180481']


In [189]:
# Train a model to solve the second math problem
training(1, tf.optimizers.Adam)

TypeError: training() takes 0 positional arguments but 2 were given

### Assignment 5 - Experiments

Run training w/ classic SGD (no momentum)

Run training w/ Adam

Run training w/ SGD + Momentum

Run training w/ RMSrop

Run training w/ AdaGrad

In [None]:
# Visualize the results of the above training runs

