In [1]:
import numpy as np
import tensorflow as tf

from keras.preprocessing.sequence import pad_sequences
import pickle

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
import data_utils
import glove_utils
import models
import display_utils
from goog_lm import LM

In [3]:
import lm_data_utils
import lm_utils

In [5]:
np.random.seed(1001)
tf.set_random_seed(1001)



In [6]:
%load_ext autoreload
%autoreload 2

In [7]:
VOCAB_SIZE  = 50000
with open('aux_files/dataset_%d.pkl' %VOCAB_SIZE, 'rb') as f:
    dataset = pickle.load(f)

In [8]:
doc_len = [len(dataset.test_seqs2[i]) for i in 
           range(len(dataset.test_seqs2))]

In [9]:
dist_mat = np.load('aux_files/dist_counter_%d.npy' %VOCAB_SIZE)
# Prevent returning 0 as most similar word because it is not part of the dictionary
dist_mat[0,:] = 100000
dist_mat[:,0] = 100000

skip_list = np.load('aux_files/missed_embeddings_counter_%d.npy' %VOCAB_SIZE)

### Demonstrating how we find the most similar words

In [10]:
src_word = dataset.dict['elwes']

In [11]:
src_word

34090

In [12]:
for i in range(300, 305):
    src_word = i
    nearest, nearest_dist = glove_utils.pick_most_similar_words(src_word, dist_mat,20, 0.5)
        
    print('Closest to `%s` are:' %(dataset.inv_dict[src_word]))
    for w_id, w_dist in zip(nearest, nearest_dist):
          print(' -- ', dataset.inv_dict[w_id], ' ', w_dist)

    print('----')

Closest to `later` are:
 --  subsequent   0.18323109771400015
 --  subsequently   0.1867195991340007
 --  afterward   0.2509214012219996
 --  afterwards   0.2576958961479996
 --  thereafter   0.2741981096589998
 --  trailing   0.3368002712810001
 --  after   0.34520261237799876
 --  then   0.36472839338299834
 --  posterior   0.4310855888389997
 --  following   0.4833073676040003
----
Closest to `takes` are:
 --  pick   0.31130546563200046
 --  taking   0.42471158462800007
 --  picked   0.48527412495900113
----
Closest to `instead` are:
 --  conversely   0.30340380498499964
 --  however   0.3475382865829997
 --  alternatively   0.39540487543000014
 --  alternately   0.4439627395600003
 --  nevertheless   0.477163975792001
----
Closest to `seem` are:
 --  seems   0.007052995653001215
 --  appears   0.32837244735200044
 --  looks   0.33534638306400066
 --  transpires   0.456207185493001
----
Closest to `beautiful` are:
 --  gorgeous   0.019236443661999614
 --  wonderful   0.1014964337829

### Preparing the dataset

In [13]:
max_len = 250
train_x = pad_sequences(dataset.train_seqs2, maxlen=max_len, padding='post')
train_y = np.array(dataset.train_y)
test_x = pad_sequences(dataset.test_seqs2, maxlen=max_len, padding='post')
test_y = np.array(dataset.test_y)

### Loading the sentiment analysis model

In [14]:
tf.reset_default_graph()
if tf.get_default_session():
    sess.close()
sess = tf.Session()
batch_size = 1
lstm_size = 128
#max_len =  100

with tf.variable_scope('imdb', reuse=False):
    model = models.SentimentModel(batch_size=batch_size,
                           lstm_size = lstm_size,
                           max_len = max_len,
                           embeddings_dim=300, vocab_size=dist_mat.shape[1],is_train = False)
saver = tf.train.Saver()
saver.restore(sess, './models/imdb_model')



Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

INFO:tensorflow:Restoring parameters from ./models/imdb_model


## Loading the Google Language model

In [15]:
goog_lm = LM()

LM vocab loading done


Recovering graph.


INFO:tensorflow:Recovering Graph goog_lm/graph-2016-09-10.pbtxt


Recovering checkpoint goog_lm/ckpt-*


#### demonstrating the GoogLM

In [16]:
src_word = dataset.dict['play']
nearest, nearest_dist = glove_utils.pick_most_similar_words(src_word, dist_mat,20)
nearest_w = [dataset.inv_dict[x] for x in nearest]
print('Closest to `%s` are %s' %(dataset.inv_dict[src_word], nearest_w))

Closest to `play` are ['playing', 'gaming', 'games', 'toy', 'playback', 'game', 'plaything', 'cheek', 'gambling', 'toys', 'toying', 'replay', 'stake', 'plays', 'jeu', 'gamble', 'staking', 'reproduction', 'casino', 'sets']


In [17]:
prefix = 'is'
suffix = 'with'
lm_preds = goog_lm.get_words_probs(prefix, nearest_w, suffix)
print('most probable is ', nearest_w[np.argmax(lm_preds)])


most probable is  game


## Try Attack

In [52]:
from attacks import GreedyAttack, GeneticAtack, NewBaseline

## Main Attack 

In [55]:
pop_size = 32
n1 = 8

with tf.variable_scope('imdb', reuse=True):
    batch_model = models.SentimentModel(batch_size=pop_size,
                           lstm_size = lstm_size,
                           max_len = max_len,
                           embeddings_dim=300, vocab_size=dist_mat.shape[1],is_train = False)
    
with tf.variable_scope('imdb', reuse=True):
    neighbour_model = models.SentimentModel(batch_size=n1,
                           lstm_size = lstm_size,
                           max_len = max_len,
                           embeddings_dim=300, vocab_size=dist_mat.shape[1],is_train = False)
base_atttack = NewBaseline(sess, model, batch_model, neighbour_model, dataset, dist_mat, 
                                  skip_list,
                                  goog_lm, max_iters=20, 
                                   pop_size=pop_size,
                                  
                                  n1 = n1,
                                  n2 = 4,
                                 use_lm =      True, use_suffix=False)

In [56]:
TEST_SIZE = 1000
test_idx = np.random.choice(len(dataset.test_y), TEST_SIZE, replace=False)
test_len = []
for i in range(TEST_SIZE):
    test_len.append(len(dataset.test_seqs2[test_idx[i]]))
print('Shortest sentence in our test set is %d words' %np.min(test_len))

test_list = []
orig_list = []
orig_label_list = []
adv_list = []
dist_list = []

for i in range(TEST_SIZE):
    x_orig = test_x[test_idx[i]]
    orig_label = test_y[test_idx[i]]
    orig_preds=  model.predict(sess, x_orig[np.newaxis, :])[0]
    # print(orig_label, orig_preds, np.argmax(orig_preds))
    if np.argmax(orig_preds) != orig_label:
        print('skipping wrong classifed ..')
        print('--------------------------')
        continue
    x_len = np.sum(np.sign(x_orig))
    if x_len >= 100:
        print('skipping too long input..')
        print('--------------------------')
        continue
    # if np.max(orig_preds) < 0.90:
    #    print('skipping low confidence .. \n-----\n')
    #    continue
    print('****** ', len(test_list) + 1, ' ********')
    test_list.append(test_idx[i])
    orig_list.append(x_orig)
    target_label = 1 if orig_label == 0 else 0
    orig_label_list.append(orig_label)
    x_adv = base_atttack.attack( x_orig, target_label)
    adv_list.append(x_adv)
    if x_adv is None:
        print('%d failed' %(i+1))
        dist_list.append(100000)
    else:
        num_changes = np.sum(x_orig != x_adv)
        print('%d - %d changed.' %(i+1, num_changes))
        dist_list.append(num_changes)
        # display_utils.visualize_attack(sess, model, dataset, x_orig, x_adv)
    print('--------------------------')
    if (len(test_list)>= 50):
        break

Shortest sentence in our test set is 18 words
skipping wrong classifed ..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping wrong classifed ..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping wrong classifed ..
--------------------------
skipping too long input..
--------------------------
skipping wrong classifed ..
--------------------------
skipping wrong classifed ..
--------------------------
skipping too long input..
--------------------------
******  1  ********
   0   0.23210569
   1   0.23784746
   2   0.2623802
   3   0.2623802
   4  

   0   0.1559615
   1   0.15812121
   2   0.17499195
   3   0.17571938
   4   0.18338975
   5   0.26433367
   6   0.2773033
   7   0.2862654
   8   0.29815322
   9   0.29815322
   10   0.30741596
   11   0.31927732
   12   0.3369692
   13   0.38798693
   14   0.4382376
   15   0.46514332
   16   0.46514332
   17   0.48210397
   18   0.48876822
   19   0.4908003
99 failed
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
******  12  ********
   0   0.41798633
   1   0.42007938
   2   0.42107067
   3   0.42557657
   4   0.4661164
   5   0.4673812
   6   0.52662814
107 - 6 changed.
--------------------------
skipping too long input..
-------------

skipping too long input..
--------------------------
skipping too long input..
--------------------------
******  21  ********
   0   0.21502261
   1   0.2295747
   2   0.37856516
   3   0.3840299
   4   0.41174752
   5   0.41223103
   6   0.41992217
   7   0.4735098
   8   0.48998052
   9   0.48998052
   10   0.49558276
   11   0.4991076
   12   0.5514155
200 - 11 changed.
--------------------------
******  22  ********
   0   0.031959638
   1   0.031959638
   2   0.042491388
   3   0.047284983
   4   0.055870786
   5   0.064582966
   6   0.06590499
   7   0.10670218
   8   0.11919804
   9   0.120056555
   10   0.120056555
   11   0.120056555
   12   0.2415046
   13   0.27905315
   14   0.28999278
   15   0.3528862
   16   0.3546689
   17   0.36605182
   18   0.40327713
   19   0.45806977
201 failed
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
sk

skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
******  30  ********
   0   0.2534697
   1   0.2558621
   2   0.2579595
   3   0.2579595
   4   0.30280936
   5   0.30863488
   6   0.31693015
   7   0.31907302
   8   0.32444727
   9   0.34049797
   10   0.36807513
   11   0.41131827
   12   0.41131827
   13   0.52804327
316 - 11 changed.
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------

   2   0.24318717
   3   0.37166297
   4   0.37862012
   5   0.3920549
   6   0.39294758
   7   0.42792192
   8   0.47635013
   9   0.47635013
   10   0.47795597
   11   0.49107504
   12   0.51356685
417 - 11 changed.
--------------------------
skipping wrong classifed ..
--------------------------
skipping wrong classifed ..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping wrong classifed ..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
******  39  ********
   0   0.43435755
   1   0.4387269
   2   0.43882665
   3   0.47210252
   4   0.493502
   5   0.493502
   6   0.493502
   7   0.5102028
429 - 5 c

skipping wrong classifed ..
--------------------------
skipping wrong classifed ..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
******  48  ********
   0   0.046202816
   1   0.046566263
   2   0.046566263
   3   0.047685396
   4   0.049216777
   5   0.05433861
   6   0.055251013
   7   0.06379594
   8   0.07161685
   9   0.08525872
   10   0.091364056
   11   0.11213515
   12   0.11213515
   13   0.11213515
   14   0.124794126
   15   0.12595299
   16   0.13024926
   17   0.13065724
   18   0.13065724
   19   0.13288482
526 failed
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
******  49  ********
   0   0.36103743
   1   0.37091884
   2   0.37997788
   3   0.3845485
   4   0.38513982
   5   0.39234382
   6   

In [None]:
#with open('attack_results.pkl', 'wb') as f:
#    pickle.dump((orig_list, orig_label_list, adv_list, dist_list), f)

## Visualize some output results

In [61]:
orig_len = [np.sum(np.sign(x)) for x in orig_list]
ratio = [dist_list[i]/orig_len[i] for i in range(len(orig_list))]

In [39]:
print('Median number of modifications ' , np.median([x for x in dist_list if x != 100000]))

Median number of modifications  8.0


In [57]:
success = [x is not None for x in adv_list]

In [62]:
ratio_success = [x for x in ratio if x < 1.0]

In [63]:
np.mean(ratio_success)

0.16605805177778876

In [60]:
orig_

[100000,
 100000,
 100000,
 13,
 100000,
 7,
 9,
 13,
 100000,
 100000,
 100000,
 6,
 11,
 14,
 100000,
 100000,
 100000,
 9,
 5,
 100000,
 11,
 100000,
 100000,
 100000,
 7,
 100000,
 4,
 100000,
 7,
 11,
 16,
 9,
 100000,
 3,
 18,
 100000,
 10,
 11,
 5,
 10,
 7,
 100000,
 100000,
 11,
 4,
 100000,
 100000,
 100000,
 9,
 100000]

In [None]:
test_x.r

In [65]:
visual_idx = 24
display_utils.visualize_attack(sess, model, dataset, orig_list[visual_idx], adv_list[visual_idx])

Original Prediction = Negative. (Confidence = 76.12) 


---------  After attack -------------
New Prediction = Positive. (Confidence = 53.23) 


In [None]:
print('Average number of changed wrods = ', np.mean(dist_list))

In [34]:
## Save success
with open('attack_results_final.pkl', 'wb') as f:
    pickle.dump((test_list, orig_list, orig_label_list, adv_list, dist_list), f)
    


In [66]:
import display_utils

In [68]:
visual_idx = 24

display_utils.visualize_attack2(dataset, test_list[visual_idx], orig_list[visual_idx], adv_list[visual_idx], orig_label_list[visual_idx])

RAW TEXT: 


--------------------
Original Prediction = Negative.  


---------  After attack -------------
New Prediction = Positive.


In [None]:
src_text[0]

In [None]:
orig_text[0]

In [None]:
adv_text[0]

In [None]:
## Baseline
koontz's