In [60]:
#!pip freeze > requirements.txt

In [4]:
# Widen jupyter notebook 
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [5]:
# TensorFlow
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from Context_Layer_auto_switch_numpy_untrainable_weights import Context as Ntask

# Graphing
import matplotlib.pyplot as plt
%matplotlib inline

# Seeding
from random import randrange 
import random
random.seed(5)
tf.set_random_seed(5)
tf.enable_eager_execution()

# Datasets
import dataset_8_logic_gates as data
import logic_gate_test

# General
import numpy as np
from tqdm.notebook import tqdm

### Turn off GPU


In [6]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

### Datasets

In [7]:
xor_data = data.xor_data
xnor_data = data.xnor_data
and_data = data.and_data
or_data = data.or_data
nor_data = data.nor_data
nand_data = data.nand_data
custom_gate_0_data = data.custom_gate_0_data
custom_gate_1_data = data.custom_gate_1_data

#List of all data above
all_data = data.all_data

In [8]:
# all_data = [
    
#     xor_data,
#     xnor_data,
#     and_data,
#     #or_data,
#     #nor_data,
#     #nand_data,
#     #custom_gate_0_data
    
# ]

In [9]:

len(all_data)

8

### Architecture

In [10]:
NTASK_LAYER_IDX = 2
num_task_contexts=len(all_data)

In [11]:
inp = Input(2,)
x = Dense(20, activation='relu')(inp)
x = Ntask(num_task_contexts, hardcoded_contexts=False)(x)
#x = Dense(20, activation='relu')(x)
x = Dense(1, activation="sigmoid")(x)

init called
build called
hardcoed_contexts=False


### Model

In [12]:
model = Model(inputs=inp, outputs=x)

In [13]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 2)]               0         
_________________________________________________________________
dense (Dense)                (None, 20)                60        
_________________________________________________________________
context (Context)            (None, 20)                160       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 21        
Total params: 241
Trainable params: 81
Non-trainable params: 160
_________________________________________________________________


### Loss

In [14]:
def loss_fn(labels, predictions):
    return tf.keras.losses.binary_crossentropy(y_true=labels, y_pred=predictions)

### Optimizer

In [15]:
optimizer = tf.keras.optimizers.Adam(1e-4)

# Context Switching Functions

In [16]:
def switch_to_better_fitting_context(next_context_idx, model, NTASK_LAYER_IDX, epoch_grads, cur_epoch_context_loss):

    """
    Swith Ntask layer to better fitting context, when provided better fitting context.
    Resets several variables.
    """
    
    hot_context_idx = next_context_idx
    model.layers[NTASK_LAYER_IDX].set_hot_context( hot_context_idx )

    epoch_grads.clear()
    cur_epoch_context_loss[hot_context_idx] = 0
    #epoch_loss = 0
    return hot_context_idx
    #return hot_context_idx, epoch_loss

In [17]:
def switch_to_next_context(next_context_idx, model, context_idx, hot_context_idx):
    
    """
    Switch Ntask layer hot context to the next context & update hot_context_idx.
    ex: If hot_context_idx := 2 
            func call results in:
                hot_context_idx := 3
                the hot context is now the context at idx 3
    """
    
    hot_context_idx = next_context_idx
    model.layers[NTASK_LAYER_IDX].set_hot_context( hot_context_idx )

    return hot_context_idx

In [18]:
def calc_context_loss(gradients, model, ntask_layer_idx_in_model, idx_of_next_layer_bias_gradient, idx_of_next_layer_weights_in_get_weights_call=0):
    """
    IMPORTANT: 
    1) Assumes no use of activation function on Ntask layer
    2) Assumes that the layer following the Ntask layer:
        a) Is a Dense layer
        b) Is using bias 
           — ex: Dense(20, ... , use_bias=True) 
           — note Keras Dense layer uses bias by default if no value is given for use_bias param
    3) Assumes index of the next layer's gradient is known within the gradients list returned from gradient tape in a tape.gradient call
    """
    
    delta_at_next_layer = gradients[idx_of_next_layer_bias_gradient]
    transpose_of_weights_at_next_layer = tf.transpose(model.layers[ntask_layer_idx_in_model+1].get_weights()[idx_of_next_layer_weights_in_get_weights_call])
      
    # Calculate delta at ntask layer
    context_delta = np.dot( delta_at_next_layer, transpose_of_weights_at_next_layer ).astype(np.float)
    
    # Calculate Context Error
    # Keras MSE must have both args be arrs of floats, if one or both are arrs of ints, the output will be rounded to an int
    context_loss = tf.keras.losses.mean_squared_error(np.zeros(len(context_delta)), context_delta)

    return context_loss

### Training Functions

In [19]:
def custom_forward_pass(dataset, model, epoch_grads, context_idx, cur_epoch_context_loss, all_epoch_losses):
 

    sum_loss = 0
    
    for x, y in dataset:   

        with tf.GradientTape(persistent=True) as tape:            
            predictions = model(x, training=True) # forward pass
            pred_loss = loss_fn(y, predictions)   # get loss

            
        sum_loss += pred_loss
        
        gradients = tape.gradient(pred_loss, model.trainable_variables)
        epoch_grads.append(gradients)

        context_loss = calc_context_loss(gradients, 
                                           model, 
                                           context_idx, 
                                           idx_of_next_layer_bias_gradient=3, 
                                           idx_of_next_layer_weights_in_get_weights_call=0)
        
        
        cur_epoch_context_loss[hot_context_idx] += context_loss

    avg_loss_for_epoch = sum_loss / len(dataset)

    all_epoch_losses.append(avg_loss_for_epoch)

    return  


In [20]:
def apply_grads_and_update_conditional_vars_etc(moving_avg_context_loss, hot_context_idx, cur_epoch_context_loss, epoch_grads, optimizer, model):

    # ORINGIAL
    moving_avg_context_loss[hot_context_idx] = (moving_avg_context_loss[hot_context_idx] + cur_epoch_context_loss[hot_context_idx]) / 2.0
    
    diff_errs = [0 for x in range(num_task_contexts)]

    #Backprop
    for grads in epoch_grads:
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

    num_epochs_without_learning = 0

    return diff_errs, num_epochs_without_learning 

### Training Hyperparameters

In [21]:
thresh = 1.0
moving_avg_context_loss = [thresh for x in range(num_task_contexts)]
hot_context_idx = 0
diff_errs = [thresh for x in range(num_task_contexts)]
cur_epoch_context_loss = [0 for x in range(num_task_contexts)]
all_epoch_losses = []
context_switch_threshold = 0


# Train

In [22]:
num_epochs_without_learning = 0

def train(model, dataset, n_epochs, debug, plotting_debug, num_task_contexts):
    
    global all_epoch_losses

    global hot_context_idx
    
    global cur_epoch_context_loss
    global moving_avg_context_loss
    global diff_errs
    global context_switch_threshold
    
    global num_epochs_without_learning


    for epoch in range(n_epochs):
        
        #avg_loss_for_epoch = 0
        epoch_grads = []
        cur_epoch_context_loss[hot_context_idx] = 0
        
        
        #======================#
        # General Forward Pass #      
        #----------------------#
        custom_forward_pass(dataset, model, epoch_grads, NTASK_LAYER_IDX, cur_epoch_context_loss, all_epoch_losses)

        
        # If gone num_task_contexts epochs without learning on a context
        # And No Context Fits Well, Need To Pick Best Fit
        if num_epochs_without_learning >= num_task_contexts:
            
            
            # Find Best Fitting Context For Current Task
            # bc went over all the contexts on the current task
            # the diff_errs accurately tells us which context fits best for this task
            next_context_idx = diff_errs.index(max(diff_errs))
            
            # Best fitting context is the one that just had a forward pass performed on it
            # So -> Apply the Gradients
            # Continue to next epoch 
            if next_context_idx == hot_context_idx:
                diff_errs, num_epochs_without_learning = apply_grads_and_update_conditional_vars_etc(moving_avg_context_loss, hot_context_idx, cur_epoch_context_loss, epoch_grads, optimizer, model)
                continue
            
            # Current Context does not have the best fit, so don't apply its grads
            # Now that the best fitting context has been found, train on it
            else:
                
                # Switch to best fitting Context
                #hot_context_idx, epoch_loss = switch_to_better_fitting_context(next_context_idx, model, NTASK_LAYER_IDX, epoch_grads, cur_epoch_context_error, epoch_loss)
                hot_context_idx = switch_to_better_fitting_context(next_context_idx, model, NTASK_LAYER_IDX, epoch_grads, cur_epoch_context_loss)

                #==================#
                # General Training #
                #==================#
                custom_forward_pass(dataset, model, epoch_grads, NTASK_LAYER_IDX, cur_epoch_context_loss, all_epoch_losses)

            
                diff_errs, num_epochs_without_learning = apply_grads_and_update_conditional_vars_etc(moving_avg_context_loss, hot_context_idx, cur_epoch_context_loss, epoch_grads, optimizer, model)
                continue
        
        next_context_idx = (hot_context_idx + 1) % len(moving_avg_context_loss)
                
        
        diff_errs[hot_context_idx] = moving_avg_context_loss[hot_context_idx] - cur_epoch_context_loss[hot_context_idx]
        
        ##########################
        # Should this happen if right context too?
        # If so, where should this occur?
        ###########################
        if diff_errs[hot_context_idx] < context_switch_threshold:            
            num_epochs_without_learning += 1
            
            
            # TRACK WHICH EPOCH THIS BOCK HAPPENS IN
            # PLOT ON GRAPH
            # OVER TIME, THIS BLOCK SHOULD GET EXECUTED LESS
            # SO WHEN TASKS ARE LEARNED WELL, SHOULD SEE THIS EXECTUED ONLY ONCE, FIRST EPOCH OF NEW TASK
            
            # find best context
            # if it is my current context -> learn
            # else switch to best 
            
            #WE DONT LOSE EPOCHS HERE BY DOING IT THIS WAY            
            
            hot_context_idx = switch_to_next_context(next_context_idx, model, NTASK_LAYER_IDX, hot_context_idx)
            
            

            
        #--------------------------    
        # didnt switch, so apply grads
        else:
            #update moving avg err
            moving_avg_context_loss[hot_context_idx] = (moving_avg_context_loss[hot_context_idx] + cur_epoch_context_loss[hot_context_idx]) / 2.0
            
            for grads in epoch_grads:
                optimizer.apply_gradients(zip(grads, model.trainable_variables))


### Train Randomly on tasks for N cycles

In [23]:
def random_training_in_cycles(all_data, model, num_tasks, num_cycles, num_epochs):
    
    prev_task_data_idx = num_tasks-1    # init first choice as last task
    order_of_tasks_learned_on = []

    #FYI this is correct:
    # same as for c in range(cycle): for t in range num_tasks:
    for i in tqdm(range( num_cycles * num_tasks )):

        cur_task_data_idx = randrange(num_tasks)

        while cur_task_data_idx == prev_task_data_idx:
            cur_task_data_idx = randrange(num_tasks)

        cur_task_data = all_data[cur_task_data_idx]
        #print("Currently training on data from all_data[ ", cur_task_data_idx)
        order_of_tasks_learned_on.append(cur_task_data_idx)


        
        train(model, cur_task_data, n_epochs=num_epochs, debug=False, plotting_debug=False, num_task_contexts=num_tasks)

        prev_task_data_idx = cur_task_data_idx
        
    return order_of_tasks_learned_on

# ------------------------------------------

# Testing prep

In [40]:
def update_conditional_vars_etc(moving_avg_context_loss, hot_context_idx, cur_epoch_context_loss, epoch_grads, optimizer, model):

    # ORINGIAL
    moving_avg_context_loss[hot_context_idx] = (moving_avg_context_loss[hot_context_idx] + cur_epoch_context_loss[hot_context_idx]) / 2.0
    
    diff_errs = [0 for x in range(num_task_contexts)]

    #Backprop
    #for grads in epoch_grads:
    #    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    num_epochs_without_learning = 0

    return diff_errs, num_epochs_without_learning 

In [41]:
epoch_accuracies = []
double_epoch_count = 0

In [42]:
global test_switch_epoch_counter

test_switch_epoch_counter = np.nan

In [43]:
def test_forward_pass(dataset, model, epoch_grads, context_idx, cur_epoch_context_loss, all_epoch_losses):
 
    global epoch_accuracies
    
    global test_switch_epoch_counter
    
    if  ( not np.isnan(test_switch_epoch_counter) ) and ( test_switch_epoch_counter < num_task_contexts-1 ):
        test_switch_epoch_counter += 1
        #append switch
        epoch_accuracies.append("switch")
        
        return
    
    if test_switch_epoch_counter >= num_task_contexts-1:
        test_switch_epoch_counter = np.nan
    
    
    sum_loss = 0
    
    acc = 0.0
    #print("acc at top of loop:", acc)
    #print()
    for x, y in dataset:   

        with tf.GradientTape(persistent=True) as tape:            
            predictions = model(x, training=True) # forward pass
            pred_loss = loss_fn(y, predictions)   # get loss

        rounded_pred = int(tf.math.round(predictions).numpy()[0][0])
            
        #print("predictions:", rounded_pred )   
        #print("label:", y)
        
        #print()
        
        if rounded_pred == y:
            acc += 1.0
            
        sum_loss += pred_loss
        
        gradients = tape.gradient(pred_loss, model.trainable_variables)
        epoch_grads.append(gradients)

        context_loss = calc_context_loss(gradients, 
                                           model, 
                                           context_idx, 
                                           idx_of_next_layer_bias_gradient=3, 
                                           idx_of_next_layer_weights_in_get_weights_call=0)
        
        
        cur_epoch_context_loss[hot_context_idx] += context_loss
    
    acc /= len(dataset)
    #print("acc at end of epoch:", acc)

    epoch_accuracies.append(acc)
    
    avg_loss_for_epoch = sum_loss / len(dataset)

    all_epoch_losses.append(avg_loss_for_epoch)

    return  

# General dynamic testing

In [44]:
num_epochs_without_learning = 0

def test(model, dataset, n_epochs, debug, plotting_debug, num_task_contexts):

    #global weights_before
    #global weights_after
    
    global double_epoch_count
    
    #global a
    #global b
    #global c
    #global d
    
    global all_epoch_losses

    global hot_context_idx
    
    global cur_epoch_context_loss
    global moving_avg_context_loss
    global diff_errs
    global context_switch_threshold
    
    global num_epochs_without_learning


    
    #weights_before.append(model.get_weights())

    
    for epoch in range(n_epochs):
        
        #if epoch == 0:
        #    weights_before.append([model.get_weights()])
        
        #if a and b and c and d:
            
        #    weights_after.append(model.get_weights())
            
        #    break
        
        #avg_loss_for_epoch = 0
        epoch_grads = []
        cur_epoch_context_loss[hot_context_idx] = 0
        
        
        #======================#
        # General Forward Pass #      
        #----------------------#
        test_forward_pass(dataset, model, epoch_grads, NTASK_LAYER_IDX, cur_epoch_context_loss, all_epoch_losses)

        
        # If gone num_task_contexts epochs without learning on a context
        # And No Context Fits Well, Need To Pick Best Fit
        if num_epochs_without_learning >= num_task_contexts:
            
            
            # Find Best Fitting Context For Current Task
            # bc went over all the contexts on the current task
            # the diff_errs accurately tells us which context fits best for this task
            next_context_idx = diff_errs.index(max(diff_errs))
            
            # Best fitting context is the one that just had a forward pass performed on it
            # So -> Apply the Gradients
            # Continue to next epoch 
            if next_context_idx == hot_context_idx:
                diff_errs, num_epochs_without_learning = update_conditional_vars_etc(moving_avg_context_loss, hot_context_idx, cur_epoch_context_loss, epoch_grads, optimizer, model)
                
                #a=True
                
                continue
            
            # Current Context does not have the best fit, so don't apply its grads
            # Now that the best fitting context has been found, train on it
            else:
                
                # Switch to best fitting Context
                #hot_context_idx, epoch_loss = switch_to_better_fitting_context(next_context_idx, model, NTASK_LAYER_IDX, epoch_grads, cur_epoch_context_error, epoch_loss)
                hot_context_idx = switch_to_better_fitting_context(next_context_idx, model, NTASK_LAYER_IDX, epoch_grads, cur_epoch_context_loss)

                #==================#
                # General Training #
                #==================#
                test_forward_pass(dataset, model, epoch_grads, NTASK_LAYER_IDX, cur_epoch_context_loss, all_epoch_losses)

                #this is running an extra epoch, we need to keep track
                double_epoch_count += 1
            
                diff_errs, num_epochs_without_learning = update_conditional_vars_etc(moving_avg_context_loss, hot_context_idx, cur_epoch_context_loss, epoch_grads, optimizer, model)
                
                # b = True
                continue
        
        next_context_idx = (hot_context_idx + 1) % len(moving_avg_context_loss)
                
        
        diff_errs[hot_context_idx] = moving_avg_context_loss[hot_context_idx] - cur_epoch_context_loss[hot_context_idx]
        
        ##########################
        # Should this happen if right context too?
        # If so, where should this occur?
        ###########################
        if diff_errs[hot_context_idx] < context_switch_threshold:            
            num_epochs_without_learning += 1
            
            
            # TRACK WHICH EPOCH THIS BOCK HAPPENS IN
            # PLOT ON GRAPH
            # OVER TIME, THIS BLOCK SHOULD GET EXECUTED LESS
            # SO WHEN TASKS ARE LEARNED WELL, SHOULD SEE THIS EXECTUED ONLY ONCE, FIRST EPOCH OF NEW TASK
            
            # find best context
            # if it is my current context -> learn
            # else switch to best 
            
            #WE DONT LOSE EPOCHS HERE BY DOING IT THIS WAY            
            
            hot_context_idx = switch_to_next_context(next_context_idx, model, NTASK_LAYER_IDX, hot_context_idx)
            
            #c = True
            
            

            
        #--------------------------    
        # didnt switch, so apply grads
        else:
            #update moving avg err
            moving_avg_context_loss[hot_context_idx] = (moving_avg_context_loss[hot_context_idx] + cur_epoch_context_loss[hot_context_idx]) / 2.0
            
            #d = True
            
            #for grads in epoch_grads:
            #    optimizer.apply_gradients(zip(grads, model.trainable_variables))


In [45]:
def random_testing_in_cycles(all_data, model, num_tasks, num_cycles, num_epochs):
    
    global epoch_accuracies
    global double_epoch_count
    global weights_after
    global weights_before
    
    
    global a
    global b
    global c
    global d
    
    double_epoch_count = 0
    
    epoch_accuracies = []
    
    prev_task_data_idx = num_tasks-1    # init first choice as last task
    order_of_tasks_learned_on = []

    weights_before = []
    weights_after = []
    
    #FYI this is correct:
    # same as for c in range(cycle): for t in range num_tasks:
    for i in tqdm(range( num_cycles * num_tasks )):
    #while len(weights_after) == 0:
    
        cur_task_data_idx = randrange(num_tasks)

        while cur_task_data_idx == prev_task_data_idx:
            cur_task_data_idx = randrange(num_tasks)

        cur_task_data = all_data[cur_task_data_idx]
        #print("Currently training on data from all_data[ ", cur_task_data_idx)
        order_of_tasks_learned_on.append(cur_task_data_idx)

        
        global test_switch_epoch_counter
        test_switch_epoch_counter = 0
        
        test(model, cur_task_data, n_epochs=num_epochs, debug=False, plotting_debug=False, num_task_contexts=num_tasks)
        
     #   print("Len of weights before:", len(weights_before))
        
      #  print(a, b, c, d)

        prev_task_data_idx = cur_task_data_idx
        
    return order_of_tasks_learned_on

# For checking percent of correct epochs during testing

In [46]:
def percent_correct_epochs(epoch_acc_list):
    
    
    print( "number of epochs:", len( epoch_acc_list ) )
    
    correct_indices = []
    
    num_switches = 0
    
    num_correct = 0
    for idx, acc in enumerate(epoch_acc_list):
        if acc == 1.0:
            num_correct += 1
            correct_indices.append(idx)
            
        elif acc == "switch":
            num_switches += 1
    
    
    print("num_correct:",num_correct)
    print(len(correct_indices))
    
    print("num switches", num_switches)
    
    return num_correct / ( float(len( epoch_acc_list )-num_switches) )
            
    

#### Test

In [None]:
epoch_accuracies = []
double_epoch_count = 0

In [None]:
task_order = random_testing_in_cycles(all_data, model, num_tasks=num_task_contexts, num_cycles=10, num_epochs=100)



In [None]:
plt.plot(task_order)
plt.ylabel("Task")
plt.xlabel("Num Cycles * Num Tasks")
plt.title("Pre-Training Task Order")

In [None]:
pre_train_percent_correct_epochs = percent_correct_epochs( epoch_accuracies )

In [None]:
double_epoch_count

In [None]:
pre_train_percent_correct_epochs

#### Train

In [25]:
task_order = random_training_in_cycles(all_data, model, num_tasks=num_task_contexts, num_cycles=20, num_epochs=500)

#task_order = random_training_in_cycles(all_data, model, num_tasks=num_task_contexts, num_cycles=400, num_epochs=500)

#task_order = random_training_in_cycles(all_data, model, num_tasks=num_task_contexts, num_cycles=10, num_epochs=100)

#training_percent_correct_epochs = percent_correct_epochs( epoch_accuracies )
#training_percent_correct_epochs

HBox(children=(FloatProgress(value=0.0, max=160.0), HTML(value='')))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



#### Test

In [47]:
epoch_accuracies = []
double_epoch_count = 0

In [48]:
task_order = random_testing_in_cycles(all_data, model, num_tasks=num_task_contexts, num_cycles=10, num_epochs=100)

post_train_percent_correct_epochs = percent_correct_epochs( epoch_accuracies )
post_train_percent_correct_epochs

HBox(children=(FloatProgress(value=0.0, max=80.0), HTML(value='')))


number of epochs: 8043
num_correct: 2526
2526
num switches 560


0.33756514766804757

In [None]:
task_order

In [None]:
len(task_order)

In [None]:
epoch_accuracies


In [None]:
plt.plot(task_order)
plt.ylabel("Task")
plt.xlabel("Num Cycles * Num Tasks")
plt.title("Post-Training Task Order")

In [None]:
objects = ('Pre-Train Testing', 'Post-Train Testing')
y_pos = np.arange(len(objects))
performance = [pre_train_percent_correct_epochs, post_train_percent_correct_epochs]

plt.ylim(0, 1)

plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('% Correct Epochs')
plt.title('{} Logic Gates'.format(num_task_contexts))

plt.show()

In [None]:
hot = 1

In [None]:
model.layers[NTASK_LAYER_IDX].set_hot_context(hot)

In [None]:

for i in all_data[1]:
    print(model.predict(i[0]) )



In [None]:
all_data[0]

In [None]:
all_data[1]

In [None]:
all_data[2]

### Prepare for Plotting

In [None]:
epochs = [int(x+1) for x in range(len(all_epoch_losses))]
print(epochs)

In [None]:
all_epoch_losses_as_float = [x.numpy()[0] for x in all_epoch_losses]

### Plotting

### Try using a log plot
#### use a logarithmic transform log_base10 is fine
#### compresses top but leaves bottom the same

In [None]:
sns.set_style("darkgrid")
plt.figure(figsize=(100, 10))

sns.lineplot(x=epochs, y=all_epoch_losses_as_float).set_title("Title", fontsize=100)



### Test

In [49]:
def test(num_contexts, model, cont_idx):
    for i in range(num_contexts):
        model.layers[cont_idx].set_hot_context(i)
        a, b = logic_gate_test.test(model)
        
        print(b)
        #return a, b
        print()

In [50]:
test(num_task_contexts, model, NTASK_LAYER_IDX)

[array([[1.]], dtype=float32), array([[1.]], dtype=float32), array([[1.]], dtype=float32), array([[0.]], dtype=float32)]

[array([[1.]], dtype=float32), array([[0.]], dtype=float32), array([[0.]], dtype=float32), array([[1.]], dtype=float32)]

[array([[1.]], dtype=float32), array([[0.]], dtype=float32), array([[1.]], dtype=float32), array([[0.]], dtype=float32)]

[array([[0.]], dtype=float32), array([[1.]], dtype=float32), array([[1.]], dtype=float32), array([[1.]], dtype=float32)]

[array([[0.]], dtype=float32), array([[1.]], dtype=float32), array([[0.]], dtype=float32), array([[1.]], dtype=float32)]

[array([[0.]], dtype=float32), array([[1.]], dtype=float32), array([[1.]], dtype=float32), array([[0.]], dtype=float32)]

[array([[0.]], dtype=float32), array([[0.]], dtype=float32), array([[0.]], dtype=float32), array([[1.]], dtype=float32)]

[array([[1.]], dtype=float32), array([[0.]], dtype=float32), array([[0.]], dtype=float32), array([[0.]], dtype=float32)]



In [None]:
all_data

In [28]:
labels

NameError: name 'labels' is not defined

In [None]:
b

In [None]:
#raw_preds, rounded_preds = logic_gate_test.test(model)

In [51]:
def consolidate_task_preds(preds):
    
    return [ preds[idx][0][0] for idx, x in enumerate(preds) ]

In [52]:
def get_all_raw_and_rounded_preds(num_tasks, cont_idx, model):
    all_raw_preds = []
    all_rounded_preds = []

    for i in range(num_tasks):
        model.layers[cont_idx].set_hot_context(i)

        raw_preds, rounded_preds = logic_gate_test.test(model)

        raw_preds = consolidate_task_preds(raw_preds)
        rounded_preds = consolidate_task_preds(rounded_preds)

        all_raw_preds.append( raw_preds )
        all_rounded_preds.append( rounded_preds )

    return all_raw_preds, all_rounded_preds



In [53]:
def remove_duplicates(all_rounded_preds):
    set_rounded_preds = set(tuple(x) for x in all_rounded_preds)
    dups_removed_rounded_preds = [ list(x) for x in set_rounded_preds ]
    dups_removed_rounded_preds.sort(key = lambda x: all_rounded_preds.index(x) )
    
    return dups_removed_rounded_preds

In [54]:
def get_accuracy_over_all_tasks(all_rounded_preds, dups_removed_rounded_preds, labels, num_tasks):

    """
    Purpose:
        Find accuracy over all tasks.
        For a task to be considered correct, it must produce the EXACT right output.
        Producing the EXACT right output more than once only counts correct once.
        If labels are: [ [a, b], [a, c], [b, c] ] and model produces: [ [a, b], [a, b], [a, a] ] -> 33% accurate
        If labels are: [ [a, b], [a, c], [b, c] ] and model produces: [ [a, b], [a, c], [a, a] ] -> 66% accurate
    
    """
    
    
    num_correct_duplicates = len(all_rounded_preds) - len(dups_removed_rounded_preds)

    num_wrong = 0

    for i in range(len(dups_removed_rounded_preds)):
        if dups_removed_rounded_preds[i] not in labels:
            num_wrong += 1

    num_wrong += num_correct_duplicates

    num_correct = num_tasks - num_wrong

    acc = ( num_correct / num_tasks ) * 100

    return acc

In [55]:
def get_labels(all_data):

    labels = []

    for i in range(len(all_data)):

        label = []
        for inp in all_data[i]:
            label.append(  inp[-1] )

        labels.append(label)
    
    return labels

In [56]:
all_raw_preds, all_rounded_preds = get_all_raw_and_rounded_preds(num_task_contexts, NTASK_LAYER_IDX, model)

In [57]:
no_duplicates_all_rounded_preds = remove_duplicates(all_rounded_preds)


In [58]:
labels = get_labels(all_data)


In [59]:
acc = get_accuracy_over_all_tasks(all_rounded_preds, no_duplicates_all_rounded_preds, labels, num_task_contexts)
acc

100.0

In [None]:
my_set = set(all_rounded_preds)

In [None]:
for i in range(8):
    if all_rounded_preds[i] not in labels:
        print("Not")

In [None]:
raw_preds

In [None]:
def consolidate_all_tasks_preds()

In [None]:
a = consolidate_preds(rounded_preds)

In [None]:
test(num_task_contexts, model, NTASK_LAYER_IDX)

In [None]:
all_data

In [None]:
# to test model, need to see if learned all representations 1 TIME

In [None]:
def acc(model, labels):