In [2]:
import keras
import numpy as np
import pickle
from time import time 

In [3]:
from snli_rnn import build_model
from attacks import EntailmentAttack


82
62
59
55
57
30
RNN / Embed / Sent = None, 300, 300
GloVe / Trainable Word Embeddings = True, False


In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
with open('./nli_tokenizer.pkl', 'rb') as fh:
    tokenizer = pickle.load(fh)

In [6]:
with open('./nli_testing.pkl', 'rb') as fh:
    test = pickle.load(fh)

vocab= {w:i for (w, i) in tokenizer.word_index.items()}
inv_vocab = {i:w for (w,i) in vocab.items()}

In [7]:
def reconstruct(sent, inv_vocab):
    word_list = [inv_vocab[w] for w in sent if w != 0]
    return ' '.join(word_list)
    

## Building the model

In [8]:
VOCAB = len(tokenizer.word_counts) + 1
LABELS = {'contradiction': 0, 'neutral': 1, 'entailment': 2}

In [11]:
model = build_model()

Loading GloVe
Total number of null word embeddings:
4043


  W_regularizer=l2(L2) if L2 else None)(joint)
  model = Model(input=[premise, hypothesis], output=pred)


In [12]:
model.load_weights('./nli_model.h5')

## Test accuracy

In [13]:
test_accuracy = model.evaluate([test[0], test[1]], test[2])[1]
print('\nTest accuracy = ', test_accuracy)


Test accuracy =  0.8136197068403909


## Fooling the model

In [14]:
dist_mat = np.load('./aux_files/nli_dist_counter_42390.npy')
skip_words = np.load('./aux_files/nli_missed_embeddings_counter_42390.npy')

In [15]:
def visulaize_result(model, attack_input, attack_output):
    str_labels = ['Contradiction', 'neutral', 'entailment']
    orig_pred = model.predict(attack_input)
    adv_pred = model.predict([attack_output[0][np.newaxis,:], attack_output[1][np.newaxis,:]])
    print('Original pred = {} ({:.2f})'.format(str_labels[np.argmax(orig_pred[0])], np.max(orig_pred[0])))
    print(reconstruct(attack_input[0].ravel(), inv_vocab) , ' || ', reconstruct(attack_input[1].ravel(), inv_vocab))
    print('-' * 40)
    print('New pred = {} ({:.2f})'.format(str_labels[np.argmax(adv_pred[0])], np.max(adv_pred[0])))
    print(reconstruct(attack_output[0].ravel(), inv_vocab) , ' || ', reconstruct(attack_output[1].ravel(), inv_vocab))


In [16]:
adversary = EntailmentAttack(model, dist_mat, pop_size=128, max_iters=12, n1=5)

In [17]:
TEST_SIZE = 500
test_idxs = np.random.choice(len(test[0]), size=TEST_SIZE, replace=False)
test_list = []
input_list = []
output_list = []
dist_list = []

In [18]:
test_times = []

In [19]:
success_count = 0
for i in range(TEST_SIZE):
    print('\n')
    test_idx = test_idxs[i]
    attack_input = [test[0][test_idx][np.newaxis,:], test[1][test_idx][np.newaxis,:]]
    if np.sum(np.sign(attack_input[1])) < 10:
        continue
    attack_pred = np.argmax(model.predict(attack_input))
    true_label = np.argmax(test[2][test_idx])
    if attack_pred != true_label:
        print('Wrong classified')
    else:
        if true_label == 2:
            target = 0
        elif true_label == 0:
            target = 2
        else:
            target = 0 if np.random.uniform() < 0.5 else 2
        start_time = time()
        attack_result = adversary.attack(attack_input, target)
        if attack_result is None:
            print('**** Attack failed **** ')
        else:
            success_count += 1
            print('***** DONE ', len(test_list) , '------' )
            visulaize_result(model, attack_input, attack_result)
            test_times.append(time()-start_time)
        test_list.append(test_idx)
        input_list.append(attack_input)
        output_list.append(attack_result)
print(success_count / len(test_list))





0  :  0.037441637
1  :  0.074287295
2  :  0.23543617
3  :  0.23543617
4  :  0.23543617
5  :  0.23543617
6  :  0.32830888
7  :  0.32830888
8  :  0.32830888
9  :  0.32830888
10  :  0.32830888
11  :  0.32830888
**** Attack failed **** 


0  :  0.41141328
***** DONE  1 ------
Original pred = neutral (0.72)
A man speaking with two women .  ||  A man is giving two women directions at work .
----------------------------------------
New pred = Contradiction (0.64)
A man speaking with two women .  ||  A man is giving two women directions into working .












Wrong classified






***** DONE  2 ------
Original pred = neutral (0.50)
The black woman is laying out literature .  ||  A woman is placing pamphlets on a display table .
----------------------------------------
New pred = Contradiction (0.63)
The black woman is laying out literature .  ||  A woman is placing pamphlets on a display chalkboard .




0  :  0.18540055
1  :  0.29953915
***** DONE  3 ------
Original pred = neutral (0

4  :  0.18250787
5  :  0.34263983
6  :  0.34263983
7  :  0.34263983
8  :  0.34263983
9  :  0.34263983
10  :  0.34263983
***** DONE  21 ------
Original pred = Contradiction (0.99)
A little girl with pointy pigtails is sitting in the sand on the beach .  ||  The girl runs through the grass outside her school .
----------------------------------------
New pred = entailment (0.57)
A little girl with pointy pigtails is sitting in the sand on the beach .  ||  The woman operates through the grass outside her educational .
























***** DONE  22 ------
Original pred = neutral (0.49)
A group of women and men are in the water wit their clothes on .  ||  The group of people are swimming in the water .
----------------------------------------
New pred = Contradiction (0.53)
A group of women and men are in the water wit their clothes on .  ||  The group of people are swam in the water .




0  :  0.31411994
***** DONE  23 ------
Original pred = Contradiction (0.52)
A man and a woman

4  :  0.030581737
5  :  0.033141002
6  :  0.0592722
7  :  0.0798739
8  :  0.0798739
9  :  0.0798739
10  :  0.0798739
11  :  0.0798739
**** Attack failed **** 




























0  :  0.01514198
1  :  0.059656058
2  :  0.07494654
3  :  0.14267392
4  :  0.14267392
5  :  0.14267392
6  :  0.14267392
7  :  0.14267392
8  :  0.14267392
9  :  0.14267392
10  :  0.14267392
11  :  0.14267392
**** Attack failed **** 








0  :  0.05510516
1  :  0.116687335
2  :  0.16135907
3  :  0.21018296
4  :  0.21018296
5  :  0.21018296
6  :  0.23277274
7  :  0.23277274
8  :  0.26863194
9  :  0.26863194
10  :  0.26863194
11  :  0.26863194
**** Attack failed **** 




0  :  0.06383987
1  :  0.12741238
2  :  0.16217782
3  :  0.16217782
4  :  0.22260858
5  :  0.22260858
6  :  0.41564083
7  :  0.41564083
8  :  0.41564083
9  :  0.41564083
10  :  0.41564083
11  :  0.41564083
**** Attack failed **** 




0  :  0.023409246
1  :  0.034021143
2  :  0.045576684
3  :  0.045576684
4  :  0.045576684
5  :  0.0455

***** DONE  65 ------
Original pred = neutral (0.79)
A woman is leaning against a wall with her shoe off .  ||  A woman has her shoe off to adjust her sock .
----------------------------------------
New pred = entailment (0.56)
A woman is leaning against a wall with her shoe off .  ||  A woman possesses her footwear off of adaptation her sock .


0  :  0.1465907
1  :  0.2047897
2  :  0.39871255
3  :  0.39871255
4  :  0.39871255
***** DONE  66 ------
Original pred = neutral (0.93)
A group of people sitting around tables with computer monitors on them paying attention to something on the left .  ||  The group of people are game developers creating a new video game in their office .
----------------------------------------
New pred = Contradiction (0.50)
A group of people sitting around tables with computer monitors on them paying attention to something on the left .  ||  The panels de citizens be ballgame builders creating a new videos ballgame in their office .








0  :  0.041457094

1  :  0.19818516
2  :  0.31211862
3  :  0.31211862
4  :  0.31211862
5  :  0.37988463
6  :  0.37988463
***** DONE  87 ------
Original pred = entailment (0.76)
Six rescuers cooperate to place an injured man on a transport device in snowy , mountainous terrain .  ||  six people act together to get another , injured man on a transport in snowy terrain
----------------------------------------
New pred = Contradiction (0.51)
Six rescuers cooperate to place an injured man on a transport device in snowy , mountainous terrain .  ||  six people act whole to get latest , wound man on a transport onto snowy terra


***** DONE  88 ------
Original pred = neutral (0.57)
A young dark-haired woman with red sun visor holding an open white umbrella amidst a crowd of people  ||  The attractive brunette in a red visor gets a lot of attention from the crowd as she holds a white umbrella .
----------------------------------------
New pred = entailment (0.56)
A young dark-haired woman with red sun visor holdi

10  :  0.3279172
11  :  0.3279172
**** Attack failed **** 




























***** DONE  107 ------
Original pred = entailment (0.79)
a snowboarder wearing a green hat is riding along a metal railing .  ||  A snowboarder is riding along a metal railing while wearing a green hat .
----------------------------------------
New pred = Contradiction (0.55)
a snowboarder wearing a green hat is riding along a metal railing .  ||  A skier is riding along a metal railing while wearing a green hat .


0  :  0.14045653
1  :  0.298401
2  :  0.298401
3  :  0.32850206
4  :  0.3548379
***** DONE  108 ------
Original pred = entailment (0.84)
A woman in an office making a phone call  ||  A woman is making a phone call in an office environment .
----------------------------------------
New pred = Contradiction (0.47)
A woman in an office making a phone call  ||  A girl is doing a phones invitation in an office surroundings .
0.6788990825688074


In [24]:
print('Success rate: ', (success_count / len(test_list)))

Success rate:  0.6788990825688074
