In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# CONFIGS
np.random.seed(1234)
MINIBATCH_SIZE = 128

In [None]:
# Load FMNIST dataset and display sample data 
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data() # Load FMNIST
assert X_train.shape == (60000, 28, 28)
assert X_test.shape == (10000, 28, 28)
assert y_train.shape == (60000,)
assert y_test.shape == (10000,)


# Display randomly selected data
indices = list(np.random.randint(X_train.shape[0],size=3))
for i in range(3):
    plt.subplot(1,3,i+1)
    plt.imshow(X_train[indices[i]].reshape(28,28), cmap='gray', interpolation='none')
    plt.title("Index {} Class {}".format(indices[i], y_train[indices[i]]))
    plt.tight_layout()

In [None]:
# Preproc X and Y

# Split train dataset into train and validation
X_val = X_train[50000:60000]
X_train = X_train[0:50000]
y_val = y_train[50000:60000]
y_train = y_train[0:50000]

print("size of training set is", str(X_train.shape[0]), "samples")
print("every train example is", str(X_train.shape[1]), "by", str(X_train.shape[2]))

print("size of validation set is", str(X_val.shape[0]), "samples")
print("every validation example is", str(X_val.shape[1]), "by", str(X_val.shape[2]))

X_train = X_train.astype(np.float32).reshape(50000, 28*28)
X_val = X_val.astype(np.float32).reshape(10000, 28*28)
X_test = X_test.astype(np.float32).reshape(10000, 28*28)

print("After reshaping ->")
print("size of training set is", str(X_train.shape[0]), "samples")
print("every train example has", str(X_train.shape[1]), "features")

print("size of validation set is", str(X_val.shape[0]), "samples")
print("every validation example has", str(X_val.shape[1]), "features")

# Normalize Data
X_train = X_train/255
X_val = X_val/255
X_test = X_test/255
np.max(X_train)


size_input = X_train.shape[1]
print("\nInput shape = ", size_input)
size_hidden = 256
size_hidden1 = 256
size_hidden2 = 256

number_of_train_examples = X_train.shape[0]
number_of_test_examples = X_test.shape[0]

# one hot encoding for y
y_train = tf.one_hot(y_train, depth=10)
y_val = tf.one_hot(y_val, depth=10)
y_test = tf.one_hot(y_test, depth=10)

print("\nPreproc done...")

In [None]:
class BatchNormLayer(tf.keras.layers.Layer):
    def __init__(self, units):
        '''
        Take in num units and init BN layer
        '''
        super(BatchNormLayer, self).__init__()
        self.units = units
        self.gamma = tf.Variable(initial_value=tf.ones((1, units), dtype="float32"), 
                                 trainable=True) 
        self.beta = tf.Variable(initial_value=tf.zeros((1, units), dtype="float32"),
                                trainable=True)
        
        # rmean -> running mean
        self.rmeanx = tf.zeros((1, units))
        self.rvarx = tf.zeros((1, units))
        self.meanx = tf.zeros((1, units))
        self.varx = tf.zeros((1, units))
        
        self.stddevx = tf.zeros((1, units))
        self.standardx = tf.zeros((1, units))
        self.ravg_weight = 0.9
        
        self.training = True
        
        
    def set_training_ON(self):
        self.training = True
        return
    
    def set_training_OFF(self):
        self.training = False
        return
        
    def call(self, x, training):
        '''
        Perform forward and backward layer calls on inputs
        '''
        self.training = training
            
        if self.training: # training, update running mean and var
            # print("call 1", x.shape)
            # print("[DBG] Running in TEST mode")
            self.meanx = tf.reduce_mean(x, 0)
            self.meanx = tf.reshape(self.meanx, (1, self.units))
            # print("call 2", self.meanx)
            self.varx = tf.reduce_mean((x - self.meanx)**2, 0)
            self.varx = tf.reshape(self.varx, (1, self.units))
            # print("call 3", self.varx.shape)

            is_mean_emp = tf.math.equal(tf.zeros((1, self.units)), self.rmeanx)
            is_mean_emp = tf.reduce_all(is_mean_emp) 
            is_var_emp = tf.math.equal(tf.zeros((1, self.units)), self.rvarx)
            is_var_emp = tf.reduce_all(is_var_emp)

            if is_mean_emp and not is_var_emp:
                raise ValueError("Mismatched initializaiotn in BNLayer")
            
            # its the first time, set rmeanx, rvarx
            if is_mean_emp:
                self.rmeanx = tf.identity(self.meanx)
                self.rvarx = tf.identity(self.varx) 
            else:
                # take weighted avg  
                wt = self.ravg_weight
                self.rmeanx = wt * self.rmeanx + (1-wt)*self.meanx
                self.rvarx = wt * self.rvarx + (1-wt)*self.varx
        
        else: # not in training, use global mean and varx
            # print("[DBG] Running in EVAL mode, TRAIN = OFF")
            self.meanx = tf.identity(self.rmeanx) 
            self.varx = tf.identity(self.rvarx)
            
        # compute out
        self.varx += 1e-8
        self.stddevx = tf.sqrt(self.varx)
        # print("call 7", self.stddevx.shape)
        self.standardx = (x - self.meanx) /(self.stddevx)
        # print("call 6", self.standardx.shape)
        
        return self.gamma*self.standardx + self.beta 
                        

In [None]:

class MLPImageRecognition(tf.keras.Model):
    def __init__(self, num_classes, device='cpu:0', checkpoint_directory=None):
        '''
        Args:
                num_classes: the number of labels in the network.
                device: string, 'cpu:n' or 'gpu:n' (n can vary). Default, 'cpu:0'.
                checkpoint_directory: the directory where you would like to save or 
                                      restore a model.
        '''
        super(MLPImageRecognition, self).__init__()
        # self.input_size = input_size
        
        # MLP with two hidden layers, all activations turned-OFF
        # self.inlayer = tf.keras.Input(shape=(size_input, ))
        self.hlayer1 = tf.keras.layers.Dense(size_hidden1, 
                                             input_shape=(size_input,), 
                                             activation=None)
        self.bnlayer1 = BatchNormLayer(size_hidden1)
        self.hlayer2 = tf.keras.layers.Dense(size_hidden2, activation=None)
        self.bnlayer2 = BatchNormLayer(size_hidden2)
        self.hlayer3 = tf.keras.layers.Dense(size_hidden2, activation=None)
        self.bnlayer3 = BatchNormLayer(size_hidden2)
        self.hlayer4 = tf.keras.layers.Dense(size_hidden2, activation=None)
        self.bnlayer4 = BatchNormLayer(size_hidden2)
        self.outlayer = tf.keras.layers.Dense(num_classes, activation=None)
        
        self.device = device
        
        self.checkpoint_directory = checkpoint_directory
        self.acc = tf.keras.metrics.Accuracy()
        
        
    def predict_BN_PREactivation(self, images, training=True):
        '''
        Predicts the probability of each class, based on the input sample.
            
            Args:
                images: 4D tensor. Either an image or a batch of images.
                training: Boolean. Either the network is predicting in
                          training mode or not.
        '''
        # x = self.inlayer(images)
        x = self.hlayer1(images)
        x = self.bnlayer1(x, training)
        x = tf.nn.relu(x)
        
        x = self.hlayer2(x)
        x = self.bnlayer2(x, training)
        x = tf.nn.relu(x)
        
        x = self.hlayer3(x)
        x = self.bnlayer3(x, training)
        x = tf.nn.relu(x)
        
        x = self.hlayer4(x)
        x = self.bnlayer4(x, training)
        x = tf.nn.relu(x)
        
        x = self.outlayer(x)
        
        x = tf.reshape(x, (-1, 1, 10))
        return x
    
    
    def predict_BN_POSTactivation(self, images, training=True):
        '''
        Predicts the probability of each class, based on the input sample.
            
            Args:
                images: 4D tensor. Either an image or a batch of images.
                training: Boolean. Either the network is predicting in
                          training mode or not.
        '''
        # x = self.inlayer(images)
        x = self.hlayer1(images)
        x = tf.nn.relu(x)
        x = self.bnlayer1(x, training)
        
        x = self.hlayer2(x)
        x = tf.nn.relu(x)
        x = self.bnlayer2(x, training)
        
        x = self.hlayer3(x)
        x = tf.nn.relu(x)
        x = self.bnlayer3(x, training)
        
        x = self.hlayer4(x)
        x = tf.nn.relu(x)
        x = self.bnlayer4(x, training)
        
        x = self.outlayer(x)
        
        x = tf.reshape(x, (-1, 1, 10))
        return x
    
    
    def predict_plain(self, images, training):
        # x = self.inlayer(images)
        x = self.hlayer1(images)
        x = tf.nn.relu(x)
        
        x = self.hlayer2(x)
        x = tf.nn.relu(x)
        
        x = self.hlayer3(x)
        x = tf.nn.relu(x)
        
        x = self.hlayer4(x)
        x = tf.nn.relu(x)
        
        x = self.outlayer(x)
        
        x = tf.reshape(x, (-1, 1, 10))
        return x    
    
    
    def predict(self, images, training):
        # return self.predict_plain(self, images, training)
        # return self.predict_BN_PREactivation(self, images, training)
        return self.predict_BN_POSTactivation(self, images, training)
    
    
    def loss_fn(self, images, target, training):
        '''
        '''
        preds = self.predict(images, training)
        loss = tf.nn.softmax_cross_entropy_with_logits(labels=target, logits=preds)
        return loss
    
    
    def grads_fn(self, images, target, training):
        """ 
        Dynamically computes the gradients of the loss value
            with respect to the parameters of the model, in each
            forward pass.
        """
        with tf.GradientTape() as tape:
            loss = self.loss_fn(images, target, training)
        return tape.gradient(loss, self.variables)
    
    def run_tests(self, test_data):
        '''
        Run the model on the test data
        '''
        # Initialize classes to update the mean loss of train and eval
        test_loss = tf.keras.metrics.Mean('test_loss')
        acc_test = tf.keras.metrics.Mean('test_acc')
        
        # Initialize dictionary to store the loss history
        self.history = {}
        self.history['test_loss'] = []
        self.history['test_acc'] = []
        
        # Compute the loss on the eval data after one epoch
        for step, (images, target) in enumerate(test_data):
            loss = self.loss_fn(images, target, False)
            accuracy = self.compute_accuracy_2(images,target)
            acc_test(accuracy)
            test_loss(loss)
        self.history['test_loss'].append(test_loss.result().numpy())
        self.history['test_acc'].append(acc_test.result().numpy())
        print("Test Accuracy = " + str(self.history['test_acc'][-1]) + "\nAverage Loss = " + str(self.history['test_loss'][-1]))
        
        # Reset metrics
        # test_loss.reset_states()
        # acc_test.reset_states()
        
    
    def restore_model(self):
        """ 
        Function to restore trained model.
        """
        with tf.device(self.device):
            # Run the model once to initialize variables
            dummy_input = tf.constant(tf.zeros((1,48,48,1)))
            dummy_pred = self.predict(dummy_input, training=False)
            # Restore the variables of the model
            saver = tf.Saver(self.variables)
            saver.restore(tf.train.latest_checkpoint
                          (self.checkpoint_directory))
            
    
    def save_model(self, global_step=0):
        """ 
        Function to save trained model.
        """
        tf.Saver(self.variables).save(self.checkpoint_directory, 
                                       global_step=global_step) 
        
        
    def compute_accuracy_2(self, images, targets):
        """ 
        Compute the accuracy on the input data.
        """
        with tf.device(self.device):
            
            # Predict the probability of each class
            logits = self.predict(images, training=False)
        
            # Select the class with the highest probability    
            logits = tf.nn.softmax(logits)
            logits = tf.reshape(logits, [-1, 10])
            targets = tf.reshape(targets, [-1,10])
            preds = tf.argmax(logits, axis=1)
            goal = tf.argmax(targets, axis=1)
            self.acc.update_state(goal, preds)
            
            # Compute the accuracy
            result = self.acc.result().numpy()
        return result
    
    
    def fit_fc(self, training_data, eval_data, optimizer, num_epochs=500, 
            early_stopping_rounds=10, verbose=10, train_from_scratch=False):
        """ 
            1. Function to train the model, using the selected optimizer and for the desired number of epochs. 
            2. You can either train from scratch or load the latest model trained. 
            3. Early stopping is used in order to mitigate the risk of overfitting the network.
            
            Args:
                training_data         : tf.data.Dataset - the data you would like to train the model on.
                eval_data             : tf.data.Dataset - the data you would like to evaluate the model on.
                optimizer             : Object - the optimizer used during training.
                num_epochs            : int - the maximum number of iterations you would like to train the model.
                early_stopping_rounds : int - stop training if the loss on the eval dataset does not decrease after n epochs.
                verbose               : int - Specify how often to print the loss value of the network.
                train_from_scratch    : boolean - initialize variables of the last trained model or randomly.
        """ 
    
        if train_from_scratch==False:
            self.restore_model()
        
        # Initialize best loss. This variable will store the lowest loss on the
        # eval dataset.
        best_loss = 999
        
        # Initialize classes to update the mean loss of train and eval
        train_loss = tf.keras.metrics.Mean('train_loss')
        eval_loss = tf.keras.metrics.Mean('eval_loss')
        acc_train = tf.keras.metrics.Mean('train_acc')
        acc_val = tf.keras.metrics.Mean('val_acc')
        
        # Initialize dictionary to store the loss history
        self.history = {}
        self.history['train_loss'] = []
        self.history['eval_loss'] = []
        self.history['train_acc'] = []
        self.history['val_acc'] = []
        
        # Begin training
        with tf.device(self.device):
            for i in range(num_epochs):
                # Training with gradient descent
                for step, (images, target) in enumerate(training_data):
                    grads = self.grads_fn(images, target, True)
                    optimizer.apply_gradients(zip(grads, self.variables))
                    
                print("[DBG] TRAINING DONE")
                # Compute the loss on the training data after one epoch
                for step, (images, target) in enumerate(training_data):
                    loss = self.loss_fn(images, target, False)
                    accuracy = self.compute_accuracy_2(images,target)
                    acc_train(accuracy)
                    train_loss(loss)
                self.history['train_loss'].append(train_loss.result().numpy())
                self.history['train_acc'].append(acc_train.result().numpy())
                # Reset metrics
                train_loss.reset_states()
                acc_train.reset_states()
                
                # Compute the loss on the eval data after one epoch
                for step, (images, target) in enumerate(eval_data):
                    loss = self.loss_fn(images, target, False)
                    accuracy = self.compute_accuracy_2(images,target)
                    acc_val(accuracy)
                    eval_loss(loss)
                self.history['eval_loss'].append(eval_loss.result().numpy())
                self.history['val_acc'].append(acc_val.result().numpy())
                # Reset metrics
                eval_loss.reset_states()
                acc_val.reset_states()
                
                # Print train and eval losses
                # if (i==0) | ((i+1)%verbose==0):
                print('Epoch %d' %(i+1))
                print('Train loss = ', self.history['train_loss'][-1], 'Accuracy = ', self.history['train_acc'][-1])
                print('Eval loss = ', self.history['eval_loss'][-1], 'Accuracy = ', self.history['val_acc'][-1])
                print()
                
                # Check for early stopping
                if self.history['eval_loss'][-1]<best_loss:
                    best_loss = self.history['eval_loss'][-1]
                    count = early_stopping_rounds
                else:
                    count -= 1
                if count==0:
                    break
                
        print("Train loss")
        for i in self.history['train_loss']:
            print(i)
            
        print("Train Accuracy")
        for i in self.history['train_acc']:
            print(i)
            
        print("Val loss")
        for i in self.history['eval_loss']:
            print(i)
            
        print("Val accuracy")
        for i in self.history['val_acc']:
            print(i)    

In [None]:
# Setup and train network ->

# Specify the path where you want to save/restore the trained variables.
checkpoint_directory = 'models_checkpoints/fmnist-mlp/'

# Check if Gpu is activated
print('tensorflow version',tf.__version__)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print(tf.test.is_built_with_cuda())
print(tf.config.list_physical_devices('GPU'))
# Use the GPU if available.
device = 'gpu:0'

# train, test and val minibatches
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
import time
t = int(time.time())
tlast = t*1e-5
tlast = tlast - int(tlast)
rseed = int(tlast*1e5)

train_dataset = train_dataset.shuffle(buffer_size=1024, seed=rseed).batch(MINIBATCH_SIZE)
# train_dataset_full = train_dataset.shuffle(buffer_size=1024).batch(len(train_dataset))

val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_dataset = val_dataset.batch(MINIBATCH_SIZE)

test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.batch(MINIBATCH_SIZE)

print(len(train_dataset))
print(len(test_dataset))
# shuffle(25, seed=epoch*(1234))

# Define optimizer.
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=1e-4)

# Instantiate model. This doesn't initialize the variables yet.
model = MLPImageRecognition(num_classes=10, device=device, 
                              checkpoint_directory=checkpoint_directory)

In [None]:
import time
time_start = time.time()
# Train model
model.fit_fc(train_dataset, val_dataset, optimizer, num_epochs=20, 
          early_stopping_rounds=4, verbose=2, train_from_scratch=True)

dur = time.time() - time_start
print("\nTime Taken = ", int(dur), "sec")

time_start = time.time()
model.run_tests(test_dataset)
dur = time.time() - time_start
print("\nTime on test data:", int(dur), "sec")