In [4]:
"""
4/15: 
Neuron selection for (1) store (2) counter (3) ig
Neuron verification on these two tasks.
"""

import numpy as np 
import matplotlib.pyplot as plt
import matplotlib
import importlib
import os
import pickle
import sys
from sklearn.utils import shuffle
matplotlib.use('GTK')
sys.path.insert(0, '../src/')


import call_classifier
import call_integrated_gradient as call_ig
import evaluator
import sample_getter
import state_getter
import utils 
import verification
from neuron_mapping import evaluate_intersection
from neuron_mapping import get_intersection
importlib.reload(utils)



task = 'autoenc-last'
data_name = 'auto-last-toy'
units = 32
random_seed = 2
token = 3
saved_path = os.path.join('../result/', "%s_units=%d_seed=%d" % (data_name, units, random_seed))
                         # 'neuron_selection_token=%d' % token)
print('saved_path =', saved_path)
target_units = units // 2


try:
    os.mkdir(os.path.join(saved_path))  # Make directory.
except FileExistsError:
    pass

seq2seq = utils.get_trained_model(task, data_name, units, random_seed=random_seed)

saved_path = ../result/auto-last-toy_units=32_seed=2
Instructions for updating:
non-resource variables are not supported in the long term

get_data
(Load data) token_size  = 6 6
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_inputs (InputLayer)  [(None, None)]            0         
_________________________________________________________________
encoder_emb (Embedding)      (None, None, 32)          192       
_________________________________________________________________
forward (GRU)                [(None, None, 32), (None, 6240      
Total params: 6,432
Trainable params: 6,432
Non-trainable params: 0
_________________________________________________________________

In [None]:
def get_store_neuron(token, T_list): 
    print("\tGet Store")
    #print("Token %d (%s)" % (token, seq2seq.tgt_itoc[token]))
    result = {}
    for T in T_list:
        print("=" * 50 + "\nT = %d" % T)
        si1 = sample_getter.get_sample_by_one_condition(seq2seq.decoder_in_test, 
                                                        token=token, position=T, N=1000)
        si2 = sample_getter.get_sample_by_one_condition(seq2seq.decoder_in_test, 
                                                        token=token, position=T, N=1000, 
                                                        except_this_token=True)    
        sample_index = sample_getter.get_different_amount_sample([si1, si2])
        if sample_index is None or sample_index.shape[1] < 5:
            print("\tToo less samples in this condition.")
            continue  # No any sample for this condition.
        
        result[T] = {}
        state = state_getter.get_hidden_state(seq2seq, sample_index)
        for t in range(T+2):
            print("=" * 50 + "\nt = %d" % t)
            x = state[:, :, t, :]
            y = np.concatenate([np.full([x.shape[1]], 1, dtype=int), np.full([x.shape[1]], 0, dtype=int)])
            x = np.reshape(x, [-1, seq2seq.units])
            x, y = shuffle(x, y, random_state=42)
            features = call_classifier.call_recursive_rfe(x, y, max_count=target_units, one_threshold=0.5)
            result[T][t] = features
            print("features =", features)
    return result

In [18]:
def get_counter_neuron(token, T_list):
    print("\tGet Counter")
    #print("Token %d (%s)" % (token, seq2seq.tgt_itoc[token])) 
    result = {}
    for T in T_list:
        print("=" * 50 + "\nT = %d" % T)
        si = sample_getter.get_sample_by_one_condition(seq2seq.decoder_in_test, 
                                                       token=token, position=T, N=1000)
        sample_index = sample_getter.get_different_amount_sample([si])
        if sample_index is None or sample_index.shape[1] < 5:
            print("\tToo less samples in this condition.")
            return  # No any sample for this condition.
        state = state_getter.get_hidden_state(seq2seq, sample_index)
        state = state[:, :, :T]

        x = state[0].transpose([1, 0, 2])  # [N, t, units] -> [t, N, units]
        y = np.full([x.shape[1]], 0, dtype=int)
        for t in range(1, x.shape[0]):
            y = np.concatenate([y, np.full([x.shape[1]], t, dtype=int)])  
        x = np.reshape(x, [-1, seq2seq.units])
        x, y = shuffle(x, y, random_state=42)
        result[T] = call_classifier.call_recursive_rfe(x, y, max_count=target_units, one_threshold=0.5)
    return result

In [16]:
def get_ig_neuron(token, T_list):
    print("\tGet IG")
    #print("Token %d (%s)" % (token, seq2seq.tgt_itoc[token]))
    result = {}
    for T in T_list:
        print("=" * 50 + "\nT = %d" % T)
        si1 = sample_getter.get_sample_by_one_condition(seq2seq.decoder_in_test, 
                                                        token=token, position=T, N=1000)
        result[T] = {}
        decoder_states, decoder_inputs = call_ig.get_state_by_sample_index(seq2seq, si1)
        for t in range(T+1):
            print("=" * 50 + "\nt = %d" % t)
            decoder_model = call_ig.get_model_without_argmax(seq2seq, input_t=t, output_t=T)
            score = call_ig.compute_ig_steps(decoder_model, decoder_states[t], decoder_inputs, target_class=token)
            selected = call_ig.get_important_neurons_by_IG(score, k=target_units)
            result[T][t] = selected
            print("\tselected =", selected)
    return result

In [19]:
T_list = [5, 7]
store_neuron = get_store_neuron(token, T_list)
counter_neuron = get_counter_neuron(token, T_list)
ig_neuron = get_ig_neuron(token, T_list)

with open(os.path.join(saved_path, 'neuron_token=%d.pickle' % token), 'wb') as handle:
    data = {'store': store_neuron,
           'counter': counter_neuron,
           'ig': ig_neuron}
    pickle.dump(data, handle)

	Get Store
T = 5
	Find 1000 samples with token==3 and position=5.
	Find 1000 samples with token!=3 and position=5.
	(Hidden value) container.shape = (2, 1000, 15, 32)
t = 0
	count = 8, 	(RFE, one) Train accuracy = 1.000, 	Test accuracy = 1.000
	count = 16, 	(RFE, one) Train accuracy = 1.000, 	Test accuracy = 1.000
features = [1, 2, 4, 7, 13, 17, 22, 27, 3, 9, 12, 15, 18, 21, 23, 25]
t = 1
	count = 8, 	(RFE, one) Train accuracy = 1.000, 	Test accuracy = 1.000
	count = 16, 	(RFE, one) Train accuracy = 1.000, 	Test accuracy = 1.000
features = [2, 4, 7, 13, 15, 17, 18, 23, 1, 6, 14, 22, 24, 26, 27, 28]
t = 2
	count = 8, 	(RFE, one) Train accuracy = 1.000, 	Test accuracy = 1.000
	count = 16, 	(RFE, one) Train accuracy = 1.000, 	Test accuracy = 1.000
features = [2, 8, 9, 15, 17, 20, 21, 23, 0, 4, 7, 16, 25, 29, 30, 31]
t = 3
	count = 8, 	(RFE, one) Train accuracy = 1.000, 	Test accuracy = 1.000
	count = 16, 	(RFE, one) Train accuracy = 1.000, 	Test accuracy = 1.000
features = [0, 4, 8, 16, 2

In [35]:
# Return the accuracy of original task to compare with the result after replacement of the important neurons.
from tensorflow.keras.preprocessing.sequence import pad_sequences

def verify_original_model(seq2seq, sample, real, token, T):
    print("\tVerify on original model.")
    pred = seq2seq.inference_batch(sample)
    pred = pad_sequences(pred, maxlen=seq2seq.tgt_max_len, padding='post', truncating='post')
    print(pred[:3])
    print("", end="\t")
    evaluator.evaluate_autoencoder_at_time(real, pred, time_step=T, verbose=2)  
    evaluator.evaluate_autoencoder_token(real, pred, token=token)

In [36]:
# Verify store neurons


def verify_store_one_step(T, t, feature1, feature2, seq2seq, sample, real):
    # Enable and disable store neuron and calculate the accuracy.
    print("T=%d\tt=%d\tN1=%d\tN2=%d" % (T, t, len(feature1), len(feature2)), end="\t")
    evaluate_intersection(feature1, feature2, verbose=2)
    feature = get_intersection(feature1, feature2)
    print("\n\t\t\t", end="")
    pred = verification.verify_decoder(seq2seq, sample, feature, time_step=t, 
                                       mode="disable", replace_by="zero", verbose=2)
    evaluator.evaluate_autoencoder_at_time(real, pred, time_step=T, verbose=2) 
    print("\t\t\t", end="")
    pred = verification.verify_decoder(seq2seq, sample, feature, time_step=t, 
                                       mode="enable", replace_by="zero", verbose=2) 
    print(pred[:3])
    evaluator.evaluate_autoencoder_at_time(real, pred, time_step=T, verbose=2) 

    
def verify_store_one_token(token):
    with open(os.path.join(saved_path, 'neuron_token=%d.pickle' % token), 'rb') as handle:
        result = pickle.load(handle)
        store_neuron = result['store']
        ig_neuron = result['ig']
        
    for T in ig_neuron:
        print("-" * 50)
        si1 = sample_getter.get_sample_by_one_condition(seq2seq.decoder_in_test, 
                                                        token=token, position=T, N=100)
        sample = seq2seq.encoder_in_test[si1]
        real = evaluator.get_evaluate_real(seq2seq, si1)
        verify_original_model(seq2seq, sample, real, token, T)
        for t in [1, 2, 4]: #ig_neuron[T]:
            #t = 3
            verify_store_one_step(T, t, store_neuron[T][t], ig_neuron[T][t], seq2seq, sample, real)
            
        break

verify_store_one_token(token=token)

--------------------------------------------------
	Find 100 samples with token==3 and position=5.
	Verify on original model.
[[5 5 5 5 3 2 0 0 0 0 0 0 0 0 0]
 [5 5 5 5 3 2 0 0 0 0 0 0 0 0 0]
 [5 5 5 5 3 2 0 0 0 0 0 0 0 0 0]]
	(t=5)	1.0000	1.0000	1.0000
	Each accuracy: [0.0, 0.0, 0.0, 0.0, 1.0, 0.0]
T=5	t=1	N1=16	N2=16	N=10	jac=0.45	
			disable	(t=5)	0.5000	0.0000	1.0000
			enable	[[5. 5. 5. 4. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
 [5. 5. 5. 4. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
 [5. 5. 5. 4. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]
(t=5)	0.7500	0.0000	1.0000
T=5	t=2	N1=16	N2=16	N=9	jac=0.39	
			disable	(t=5)	0.7500	0.0000	1.0000
			enable	[[5. 5. 5. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
 [5. 5. 5. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
 [5. 5. 5. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]
(t=5)	0.7500	0.0000	1.0000
T=5	t=4	N1=16	N2=16	N=11	jac=0.52	
			disable	(t=5)	1.0000	0.3800	1.0000
			enable	[[5. 5. 5. 5. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
 [5. 5. 5. 5. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
 [5. 5. 5. 

In [39]:
# Verify counter neurons
import neuron_mapping
def verify_one_time_step(T, t, feature1, feature2, seq2seq, sample, real):
    print("T=%d\tt=%d\tN1=%d\tN2=%d" % (T, t, len(feature1), len(feature2)), end="\t")
    neuron_mapping.evaluate_intersection(feature1, feature2, verbose=2)
    feature = neuron_mapping.get_intersection(feature1, feature2)
    print("\n", end="\t")
    pred = verification.verify_decoder(seq2seq, sample, feature, time_step=t, 
                                       mode="disable", replace_by="last_h", verbose=2)
    evaluator.evaluate_autoencoder_at_time(real, pred, time_step=T, verbose=2)  
    evaluator.evaluate_autoencoder_token(real, pred, token=3)
    
    
def verify_counter_one_token(token):
    with open(os.path.join(saved_path, 'neuron_token=%d.pickle' % token), 'rb') as handle:
        result = pickle.load(handle)
        counter_neuron = result['counter']
        ig_neuron = result['ig']
        
    for T in ig_neuron:
        si1 = sample_getter.get_sample_by_one_condition(seq2seq.decoder_in_test, 
                                                        token=token, position=T, N=100)
        sample = seq2seq.encoder_in_test[si1]
        real = evaluator.get_evaluate_real(seq2seq, si1)
        verify_original_model(seq2seq, sample, real, token, T)
        for t in ig_neuron[T]:
            verify_one_time_step(T, t, counter_neuron[T], ig_neuron[T][t], seq2seq, sample, real)
        break
        
verify_counter_one_token(token)

	Find 100 samples with token==3 and position=5.
	Verify on original model.
[[5 5 5 5 3 2 0 0 0 0 0 0 0 0 0]
 [5 5 5 5 3 2 0 0 0 0 0 0 0 0 0]
 [5 5 5 5 3 2 0 0 0 0 0 0 0 0 0]]
	(t=5)	1.0000	1.0000	1.0000
	Each accuracy: [0.0, 0.0, 0.0, 0.0, 1.0, 0.0]
T=5	t=0	N1=16	N2=16	N=10	jac=0.45	
	disable	(t=5)	1.0000	1.0000	1.0000
	Each accuracy: [0.0, 0.0, 0.0, 0.0, 1.0, 0.0]
T=5	t=1	N1=16	N2=16	N=10	jac=0.45	
	disable	(t=5)	1.0000	1.0000	1.0000
	Each accuracy: [0.0, 0.0, 0.0, 0.0, 1.0, 0.0]
T=5	t=2	N1=16	N2=16	N=9	jac=0.39	
	disable	(t=5)	1.0000	0.9400	0.9400
	Each accuracy: [0.0, 0.0, 0.0, 0.0, 0.94, 0.06]
T=5	t=3	N1=16	N2=16	N=9	jac=0.39	
	disable	(t=5)	1.0000	0.0000	0.0000
	Each accuracy: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
T=5	t=4	N1=16	N2=16	N=8	jac=0.33	
	disable	(t=5)	1.0000	0.9400	0.9400
	Each accuracy: [0.0, 0.0, 0.0, 0.0, 0.94, 0.06]
T=5	t=5	N1=16	N2=16	N=6	jac=0.23	
	disable	(t=5)	1.0000	1.0000	1.0000
	Each accuracy: [0.0, 0.0, 0.0, 0.0, 1.0, 0.0]
