# This notebook just shows that a neural network using the Context Layer has the capacity to learn several MNIST tasks mapped to its different contexts.
# However, this is not dynamic like the logic gate experiment

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Conv2D, Flatten, MaxPooling2D, GlobalMaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import os
import numpy as np

from tensorflow.keras.datasets import mnist


from Context_Layer import Context as Ntask

import matplotlib.pyplot as plt


from tqdm.notebook import tqdm
import time



from numpy.random import seed
seed(1)
#from tensorflow import random
#random.set_seed(2)


#### Load MNIST Test & Train Data

In [5]:
train, test = mnist.load_data()

# Create General MNIST train and test numpy datasets

#### X_train := train[0] & y_train := train[1]

In [12]:
X_train, y_train = train

#### X_test := test[0] & y_test := test[1]

In [13]:
X_test, y_test = test

# Normalize input images for train and test datasets

#### Cast X_train values to 32-bit floats and Normalize X_train

In [14]:
X_train = X_train.astype(np.float32)
X_train /= 255.0

#### Cast X_test values to 32-bit floats and Normalize X_test

In [15]:
X_test = X_test.astype(np.float32)
X_test /= 255.0

#### Reshape X_train so that it is the shape that Conv2D expects

In [16]:
img_rows = 28
img_cols = 28
num_channels = 1

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, num_channels)

print(X_train.shape)

X_train[0].shape

(60000, 28, 28, 1)


(28, 28, 1)

#### Reshape X_test so that it is the shape that Conv2D expects


In [17]:
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, num_channels)

print(X_test.shape)

X_test[0].shape

(10000, 28, 28, 1)


(28, 28, 1)

# Create MNIST tf dataset

#### tf dataset for training images and labels

In [18]:
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))

In [19]:
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test))

# MNIST Shuffle and minibatch train_ds & test_ds

#### Shuffle entire dataset (shuffle_buffer_size := 60k = num_train_examples) & make mini-batches of 32 examples

In [20]:
# Explanation from TensorFlow: "This dataset fills a buffer with buffer_size elements, 
# then randomly samples elements from this buffer, replacing the selected elements with new elements. 
# For perfect shuffling, a buffer size greater than or equal to the full size of the dataset is required." 
# https://www.tensorflow.org/api_docs/python/tf/data/Dataset?version=stable

In [21]:
train_ds = train_ds.shuffle(60000, seed=1).batch(32)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [22]:
test_ds = test_ds.shuffle(10000, seed=1).batch(32)

# Create "is it odd?" tf dataset

In [23]:
y_odd_train = np.loadtxt("train_labels_mnist_odd_classification").astype(np.uint8)

In [24]:
y_odd_test = np.loadtxt("test_labels_mnist_odd_classification").astype(np.uint8)

In [25]:
odd_train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_odd_train))

In [26]:
odd_test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_odd_test))

# "is it odd?" Shuffle and minibatch train and test datasets

In [27]:
odd_train_ds = odd_train_ds.shuffle(60000, seed=1).batch(32)

In [28]:
odd_test_ds = odd_test_ds.shuffle(10000, seed=1).batch(32)

# Dataset label creation

In [29]:
# div by 3 training and testing labels
div_3_y_train = (y_train%3 == 0).astype(np.uint8)
div_3_y_test = (y_test%3 == 0).astype(np.uint8)

# div by 5 training and testing labels
div_5_y_train = (y_train%5 == 0).astype(np.uint8)
div_5_y_test = (y_test%5 == 0).astype(np.uint8)

# even training and testing labels
even_y_train = (y_train%2 == 0).astype(np.uint8)
even_y_test = (y_test%2 == 0).astype(np.uint8)

# bottom half training and testing labels
bottom_y_train = (y_train<5).astype(np.uint8)
bottom_y_test = (y_test<5).astype(np.uint8)

# top half training and testing labels
top_y_train = (y_train>4).astype(np.uint8)
top_y_test = (y_test>4).astype(np.uint8)


In [30]:
def are_labels_correct(mnist_train_labels, mnist_test_labels, custom_task_train_labels, custom_task_test_labels):
    print("mnist train y: ", mnist_train_labels[:10])
    print("custom train y:", custom_task_train_labels[:10])
    
    print("mnist test y: ", mnist_test_labels[:10])
    print("custom test y:", custom_task_test_labels[:10])


In [31]:
are_labels_correct(y_train, y_test, div_3_y_train, div_3_y_test)

mnist train y:  [5 0 4 1 9 2 1 3 1 4]
custom train y: [0 1 0 0 1 0 0 1 0 0]
mnist test y:  [7 2 1 0 4 1 4 9 5 9]
custom test y: [0 0 0 1 0 0 0 1 0 1]


In [32]:
are_labels_correct(y_train, y_test, div_5_y_train, div_5_y_test)

mnist train y:  [5 0 4 1 9 2 1 3 1 4]
custom train y: [1 1 0 0 0 0 0 0 0 0]
mnist test y:  [7 2 1 0 4 1 4 9 5 9]
custom test y: [0 0 0 1 0 0 0 0 1 0]


In [33]:
are_labels_correct(y_train, y_test, even_y_train, even_y_test)

mnist train y:  [5 0 4 1 9 2 1 3 1 4]
custom train y: [0 1 1 0 0 1 0 0 0 1]
mnist test y:  [7 2 1 0 4 1 4 9 5 9]
custom test y: [0 1 0 1 1 0 1 0 0 0]


In [34]:
are_labels_correct(y_train, y_test, bottom_y_train, bottom_y_test)

mnist train y:  [5 0 4 1 9 2 1 3 1 4]
custom train y: [0 1 1 1 0 1 1 1 1 1]
mnist test y:  [7 2 1 0 4 1 4 9 5 9]
custom test y: [0 1 1 1 1 1 1 0 0 0]


In [35]:
are_labels_correct(y_train, y_test, top_y_train, top_y_test)

mnist train y:  [5 0 4 1 9 2 1 3 1 4]
custom train y: [1 0 0 0 1 0 0 0 0 0]
mnist test y:  [7 2 1 0 4 1 4 9 5 9]
custom test y: [1 0 0 0 0 0 0 1 1 1]


# tf dataset creation

In [36]:
def make_new_task_train_and_test_datasets(new_train_labels, new_test_labels):
    
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, new_train_labels))
    test_dataset = tf.data.Dataset.from_tensor_slices((X_test, new_test_labels))
    
    return train_dataset, test_dataset
    

In [37]:
div_3_train_ds, div_3_test_ds = make_new_task_train_and_test_datasets(div_3_y_train, div_3_y_test)

div_5_train_ds, div_5_test_ds = make_new_task_train_and_test_datasets(div_5_y_train, div_5_y_test)

even_train_ds, even_test_ds = make_new_task_train_and_test_datasets(even_y_train, even_y_test)

bottom_train_ds, bottom_test_ds = make_new_task_train_and_test_datasets(bottom_y_train, bottom_y_test)

top_train_ds, top_test_ds = make_new_task_train_and_test_datasets(top_y_train, top_y_test)



# Shuffle

In [38]:
div_3_train_ds = div_3_train_ds.shuffle(60000, seed=1).batch(32)
div_3_test_ds = div_3_test_ds.shuffle(60000, seed=1).batch(32)

div_5_train_ds = div_5_train_ds.shuffle(60000, seed=1).batch(32)
div_5_test_ds = div_5_test_ds.shuffle(60000, seed=1).batch(32)

even_train_ds = even_train_ds.shuffle(60000, seed=1).batch(32)
even_test_ds = even_test_ds.shuffle(60000, seed=1).batch(32)

bottom_train_ds = bottom_train_ds.shuffle(60000, seed=1).batch(32)
bottom_test_ds = bottom_test_ds.shuffle(60000, seed=1).batch(32)

top_train_ds = top_train_ds.shuffle(60000, seed=1).batch(32)
top_test_ds = top_test_ds.shuffle(60000, seed=1).batch(32)

# Architecture

In [39]:
# model based off of https://www.tensorflow.org/guide/data

In [40]:
conv_model = tf.keras.Sequential([
  tf.keras.layers.Conv2D(filters=16, kernel_size=(3,3), padding="same", activation="relu", use_bias=True),
  tf.keras.layers.Conv2D(filters=16, kernel_size=(3,3), padding="same", activation="relu", use_bias=True),
  tf.keras.layers.Conv2D(filters=16, kernel_size=(3,3), padding="same", activation="relu", use_bias=True),
  tf.keras.layers.Conv2D(filters=16, kernel_size=(3,3), padding="same", activation="relu", use_bias=True),
  tf.keras.layers.Conv2D(filters=16, kernel_size=(3,3), padding="same", activation="relu", use_bias=True),




  tf.keras.layers.Flatten(),
    
  Ntask(num_hrrs=7),  
    
  tf.keras.layers.Dense(10, activation="softmax", use_bias=True)
])

init called


In [41]:
n_task_idx = 6

In [42]:
conv_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(), 
              metrics=['accuracy'])

In [43]:
# conv_model.summary()

##### Test model performance utility function

In [44]:
def eval_performance(model, test_dataset):   
    score = model.evaluate(test_dataset)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

# Training

# This works: 100 multi_epochs, 1 epoch per task, 5 conv layers(16, (3,3)) 

In [45]:
n_epochs = 100
epcohs_per_task=1

# Manually training each task on a respective context -- NOT DYNAMIC

In [46]:
start_time = time.time()

for multitask_epoch in tqdm(range(n_epochs)):

    #Can't set hot until model has been fit or built with sequential model
    try:
        conv_model.layers[n_task_idx].set_hot_context(0)
    
    except:
        pass
    
    #MNIST training
    conv_model.fit(train_ds, epochs=epcohs_per_task ,shuffle=False)

    #odd training
    conv_model.layers[n_task_idx].set_hot_context(1)
    conv_model.fit(odd_train_ds, epochs=epcohs_per_task ,shuffle=False)
    
    #div_3 training
    conv_model.layers[n_task_idx].set_hot_context(2)
    conv_model.fit(div_3_train_ds, epochs=epcohs_per_task ,shuffle=False)
    
    #div_5 training
    conv_model.layers[n_task_idx].set_hot_context(3)
    conv_model.fit(div_5_train_ds, epochs=epcohs_per_task ,shuffle=False)

    #even training
    conv_model.layers[n_task_idx].set_hot_context(4)
    conv_model.fit(even_train_ds, epochs=epcohs_per_task ,shuffle=False)

    #bottom training
    conv_model.layers[n_task_idx].set_hot_context(5)
    conv_model.fit(bottom_train_ds, epochs=epcohs_per_task ,shuffle=False)

    #top training
    conv_model.layers[n_task_idx].set_hot_context(6)
    conv_model.fit(top_train_ds, epochs=epcohs_per_task ,shuffle=False)

end_time = time.time()
print("Elapsed time: {:2f} minutes".format((end_time - start_time)/60))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

build called
hardcoed_contexts=False







Elapsed time: 116.574230 minutes


In [47]:
#(end_time-start_time)/60

#### Evaluate model

# Model has pretty accuractely learned the 7 tasks to its 7 contexts

In [47]:
# MNIST
print("MNIST")
conv_model.layers[n_task_idx].set_hot_context(0)
eval_performance(conv_model, test_ds)
print()

# Odd
print("Odd")
conv_model.layers[n_task_idx].set_hot_context(1)
eval_performance(conv_model, odd_test_ds)
print()

# Div 3
print("Div 3")
conv_model.layers[n_task_idx].set_hot_context(2)
eval_performance(conv_model, div_3_test_ds)
print()

# Div 5
print("Div 5")
conv_model.layers[n_task_idx].set_hot_context(3)
eval_performance(conv_model, div_5_test_ds)
print()

# Even
print("Even")
conv_model.layers[n_task_idx].set_hot_context(4)
eval_performance(conv_model, even_test_ds)
print()

# Bottom half digits
print("Bottom")
conv_model.layers[n_task_idx].set_hot_context(5)
eval_performance(conv_model, bottom_test_ds)
print()

# Top half digits
print("Top")
conv_model.layers[n_task_idx].set_hot_context(6)
eval_performance(conv_model, top_test_ds)
print()

MNIST
Test loss: 0.13293178665411146
Test accuracy: 0.9854

Odd
Test loss: 0.0846629048427571
Test accuracy: 0.9836

Div 3
Test loss: 0.24879794012116918
Test accuracy: 0.9484

Div 5
Test loss: 0.119822266672538
Test accuracy: 0.9765

Even
Test loss: 0.11996755623433414
Test accuracy: 0.9784

Bottom
Test loss: 0.08186503138546346
Test accuracy: 0.9838

Top
Test loss: 0.08748524494700581
Test accuracy: 0.9833

