# IANNwTF HW 4
## Group 10

The following contains our solution to the exercises in IANNwTF HW 04. A Jupyter notebook versus a module format was chosen this time for purposes of organization.

### Assigment 1: Reviews
We review the homeworks for Groups 15 and 32.

### Assignment 2: MNIST Math

### 2.1 Preparing the MNIST Math Dataset

In [6]:
# Needed Imports
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Dense
import numpy as np
import tqdm
import datetime

In [None]:
# 2.1 Load Dataset
(train_ds, test_ds), ds_info = tfds.load ('mnist', split =['train', 'test'], as_supervised = True, with_info = True)

# Info on the dataset (refresher)
# print("ds_info: \n", ds_info)
# tfds.show_examples(train_ds, ds_info)

In [40]:
# 2.2 Data Pipeline
def prepare_data(dataset, batchsize):

    '''
    :param dataset: the dataset to be prepared for input into the network
    :return: 2 datasets, one each for each of the math problems defined (see below), created after the original database was preprocessed with the
    steps below
    '''

    # Step One - General Preprocessing

    # convert data from uint8 to float32
    dataset = dataset.map(lambda img, target: (tf.cast(img, tf.float32), target))

    # flatten the images into vectors
    dataset = dataset.map(lambda img, target: (tf.reshape(img, (-1,)), target))

    # input normalization, just bringing image values from range [0, 255] to [-1, 1]
    dataset = dataset.map(lambda img, target: ((img / 128.) - 1., target))

    # Step 2 - Pairing Data Tuples & Respective Parameterized Targets

    # create a dataset that contains 2000 samples from the overall dataset paired with 2000 other samples
    data = tf.data.Dataset.zip((dataset.shuffle(2000), dataset.shuffle(2000)))

    # create the dataset for the first math problem (a + b >= 5) - remembering to cast to int versus boolean!
    greateqfive = data.map(lambda x1, x2: (x1[0], x2[0], x1[1]+x2[1]>=5))
    greateqfive = greateqfive.map(lambda x1, x2, t: (x1, x2, tf.cast(t, tf.int32)))

    # create the dataset for the second math problem (a - b = y)
    subtr = data.map(lambda x1, x2: (x1[0], x2[0], x1[1]-x2[1]))

    # Step 3 - Batching & Prefetching

    # run batching and prefetching for both datasets
    greateqfive = greateqfive.batch(batchsize)
    greateqfive = greateqfive.prefetch(tf.data.AUTOTUNE)
    subtr = subtr.batch(batchsize)
    subtr = subtr.prefetch(tf.data.AUTOTUNE)

    # return BOTH datasets
    return greateqfive, subtr


In [39]:
# Check data pipeline by examining one example from each of the four created datasets (one for each math problem for train and test)

train_ds_gef, train_ds_subtr = prepare_data(train_ds, batchsize = 32)
test_ds_gef, test_ds_subtr = prepare_data(test_ds, batchsize = 32)

for img1, img2, label in train_ds_gef.take(1):
    print(img1.shape, img2.shape, label.shape)

for img1, img2, label in train_ds_subtr.take(1):
    print(img1.shape, img2.shape, label.shape)

for img1, img2, label in test_ds_gef.take(1):
    print(img1.shape, img2.shape, label.shape)

for img1, img2, label in test_ds_subtr.take(1):
    print(img1.shape, img2.shape, label.shape)


(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)
(32, 784) (32, 784) (32,)


### Assignment 3: Building Shared Weight Models

In [None]:
# a suggestion:
class MyModel(tf.keras.Model):
    def __init__(self,numlayers,subtask):

        '''
        param: numlayers - the desired number of hidden layers
        param: subtask - the subtask the network is being asked to solve (relevant for output layer)
        '''

        super(MyModel, self).__init__()

        # create 2 hidden layers with 256 units and ReLU as the activation function

        # self.hidden_layer_1 = Dense(units=256, activation=tf.nn.relu)
        # self.hidden_layer_2 = Dense(units=256, activation=tf.nn.relu)

        # add desired number of hidden layers
        for i in range(numlayers):
            self.layers.append(Dense(units=256,activation=tf.nn.relu))

        self.subtask = subtask

    @tf.function
    def __call__(self, input: tuple):
        
        # feed both inputs seperatedly into a layer, then concatenate the results before passing activity to the next layer

        # i1 = self.hidden_layer_1(input[0])
        # i2 = self.hidden_layer_1(input[1])
        # i = tf.concat([i1, i2], axis=0)    # e.g. (32,784) + (32,784) -> (32, 1568)
        # i = self.hidden_layer_2(i)

        # feed the activity through the network UP TO the output layer
        for i in self.layers:
            if i == 0:
                i1 = self.layers[i](input[0])
                i2 = self.layers[i](input[1])
                signal = tf.concat([i1, i2], axis=0)
            else:
                signal = self.layers[i](signal)

        # create an output layer based on the
        if self.subtask==0:
            self.output = Dense(units=2, activation=tf.nn.sigmoid)
            signal = self.output(signal)
        elif self.subtask==1:
            self.output = Dense(units=10, activation=tf.nn.softmax)
            signal = self.output(signal)

        return signal

    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()

    def train_step(self,input):
        img1, img2, label = input

        with tf.GradientTape() as tape:
            output = self((img1,img2),training=True)
            loss = self.loss_function(label,output)

        gradients = tape.gradient(loss,self.trainable_variables)

        # finish this

    def test_step(self,input):

        # finish this


### Assignment 4: Training the Networks

In [None]:
# Initiate the logs and metrics
config_name= "config_name"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_path = f"logs/{config_name}/{current_time}/train"
val_log_path = f"logs/{config_name}/{current_time}/val"

# log writer for training metrics
train_summary_writer = tf.summary.create_file_writer(train_log_path)

# log writer for validation metrics
val_summary_writer = tf.summary.create_file_writer(val_log_path)

# Initiate epochs and learning rate as global variables
epochs = 10
learning_rate = 0.01

In [1]:
def training(subtask, optimizer):
    '''
    :param subtask: defines the subtask to be solved, 0 is a + b >= 5, 1 is a - b = y
    :param optimizer: the optimizer function to use
    :return:
    '''

    # Initiate a model with the requested parameters
    network = MyModel(2,subtask)

    # Initialize the datasets for the two problems

    # Note - ignore the fact that train_ds and test_ds may be flagged as not defined; when the whole program is run, this should not be an issue
    train_ds_gef, train_ds_subtr = prepare_data(train_ds, batchsize = 32)
    test_ds_gef, test_ds_subtr = prepare_data(test_ds, batchsize = 32)

    # Initialize the train and test datasets, and the loss function, based on the subtask
    if subtask == 0:
        train_ds = train_ds_gef
        test_ds = test_ds_gef
        network.loss_function = tf.keras.losses.BinaryCrossentropy

    else:
        train_ds = train_ds_subtr
        test_ds = test_ds_subtr
        network.loss_function = tf.keras.losses.MeanSquaredError

    # Train the model
    for epoch in range(epochs):
        print(f"Starting Epoch {epoch}")

        for data in tqdm.tqdm(train_ds,position=1,leave=True):
            metrics = model.train_step(data)

            with train_summary_writer.as_default():
                for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}",metric.result(),step=epoch)


        # print the end acc and loss
        print([f"{key}: {value.numpy()}" for (key,value) in metrics.items()])

        # reset metrics for next round
        model.reset_metrics()

        # Testing
        for data in test_ds:
            metrics = model.test_step(data)

            # log the accs and losses
            with val_summary_writer.as_default():
                for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step = epoch)

        # print the end acc and loss
        print([f"val_{key}: {value.numpy()}" for (key,value) in metrics.items()])

        # reset all metrics
        model.reset_metrics()


In [None]:
# Train a model to solve the first math problem
training(0, tf.optimizers.Adam)

In [None]:
# Train a model to solve the second math problem
training(1, tf.optimizers.Adam)

### Assignment 5 - Experiments

Run training w/ classic SGD (no momentum)

Run training w/ Adam

Run training w/ SGD + Momentum

Run training w/ RMSrop

Run training w/ AdaGrad

In [None]:
# Visualize the results of the above training runs

