In [None]:
import numpy as np
import tensorflow as tf

from keras.preprocessing.sequence import pad_sequences
import pickle

In [2]:
import data_utils
import glove_utils
import models
import display_utils
from goog_lm import LM

In [3]:
import lm_data_utils
import lm_utils

In [12]:
np.random.seed(1001)
tf.set_random_seed(1001)



In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
VOCAB_SIZE  = 50000
with open('aux_files/dataset_%d.pkl' %VOCAB_SIZE, 'rb') as f:
    dataset = pickle.load(f)

In [7]:
doc_len = [len(dataset.test_seqs2[i]) for i in 
           range(len(dataset.test_seqs2))]

In [8]:
dist_mat = np.load('aux_files/dist_counter_%d.npy' %VOCAB_SIZE)
# Prevent returning 0 as most similar word because it is not part of the dictionary
dist_mat[0,:] = 100000
dist_mat[:,0] = 100000

skip_list = np.load('aux_files/missed_embeddings_counter_%d.npy' %VOCAB_SIZE)

### Demonstrating how we find the most similar words

In [9]:
src_word = dataset.dict['elwes']

In [10]:
src_word

34090

In [13]:
for i in range(300, 305):
    src_word = i
    nearest, nearest_dist = glove_utils.pick_most_similar_words(src_word, dist_mat,20, 0.5)
        
    print('Closest to `%s` are:' %(dataset.inv_dict[src_word]))
    for w_id, w_dist in zip(nearest, nearest_dist):
          print(' -- ', dataset.inv_dict[w_id], ' ', w_dist)

    print('----')

Closest to `later` are:
 --  subsequent   0.18323109771400015
 --  subsequently   0.1867195991340007
 --  afterward   0.2509214012219996
 --  afterwards   0.2576958961479996
 --  thereafter   0.2741981096589998
 --  trailing   0.3368002712810001
 --  after   0.34520261237799876
 --  then   0.36472839338299834
 --  posterior   0.4310855888389997
 --  following   0.4833073676040003
----
Closest to `takes` are:
 --  pick   0.31130546563200046
 --  taking   0.42471158462800007
 --  picked   0.48527412495900113
----
Closest to `instead` are:
 --  conversely   0.30340380498499964
 --  however   0.3475382865829997
 --  alternatively   0.39540487543000014
 --  alternately   0.4439627395600003
 --  nevertheless   0.477163975792001
----
Closest to `seem` are:
 --  seems   0.007052995653001215
 --  appears   0.32837244735200044
 --  looks   0.33534638306400066
 --  transpires   0.456207185493001
----
Closest to `beautiful` are:
 --  gorgeous   0.019236443661999614
 --  wonderful   0.1014964337829

### Preparing the dataset

In [14]:
max_len = 250
train_x = pad_sequences(dataset.train_seqs2, maxlen=max_len, padding='post')
train_y = np.array(dataset.train_y)
test_x = pad_sequences(dataset.test_seqs2, maxlen=max_len, padding='post')
test_y = np.array(dataset.test_y)

### Loading the sentiment analysis model

In [15]:
tf.reset_default_graph()
if tf.get_default_session():
    sess.close()
sess = tf.Session()
batch_size = 1
lstm_size = 128
#max_len =  100

with tf.variable_scope('imdb', reuse=False):
    model = models.SentimentModel(batch_size=batch_size,
                           lstm_size = lstm_size,
                           max_len = max_len,
                           embeddings_dim=300, vocab_size=dist_mat.shape[1],is_train = False)
saver = tf.train.Saver()
saver.restore(sess, './models/imdb_model')



Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

INFO:tensorflow:Restoring parameters from ./models/imdb_model


## Loading the Google Language model

In [16]:
goog_lm = LM()

LM vocab loading done


Recovering graph.


INFO:tensorflow:Recovering Graph goog_lm/graph-2016-09-10.pbtxt


Recovering checkpoint goog_lm/ckpt-*


#### demonstrating the GoogLM

In [17]:
src_word = dataset.dict['play']
nearest, nearest_dist = glove_utils.pick_most_similar_words(src_word, dist_mat,20)
nearest_w = [dataset.inv_dict[x] for x in nearest]
print('Closest to `%s` are %s' %(dataset.inv_dict[src_word], nearest_w))

Closest to `play` are ['playing', 'gaming', 'games', 'toy', 'playback', 'game', 'plaything', 'cheek', 'gambling', 'toys', 'toying', 'replay', 'stake', 'plays', 'jeu', 'gamble', 'staking', 'reproduction', 'casino', 'sets']


In [18]:
prefix = 'is'
suffix = 'with'
lm_preds = goog_lm.get_words_probs(prefix, nearest_w, suffix)
print('most probable is ', nearest_w[np.argmax(lm_preds)])


most probable is  game


## Try Attack

In [19]:
from attacks import GreedyAttack, GeneticAtack

## Main Attack 

In [21]:
pop_size = 32
n1 = 8

with tf.variable_scope('imdb', reuse=True):
    batch_model = models.SentimentModel(batch_size=pop_size,
                           lstm_size = lstm_size,
                           max_len = max_len,
                           embeddings_dim=300, vocab_size=dist_mat.shape[1],is_train = False)
    
with tf.variable_scope('imdb', reuse=True):
    neighbour_model = models.SentimentModel(batch_size=n1,
                           lstm_size = lstm_size,
                           max_len = max_len,
                           embeddings_dim=300, vocab_size=dist_mat.shape[1],is_train = False)
ga_atttack = GeneticAtack(sess, model, batch_model, neighbour_model, dataset, dist_mat, 
                                  skip_list,
                                  goog_lm, max_iters=30, 
                                   pop_size=pop_size,
                                  
                                  n1 = n1,
                                  n2 = 4,
                                 use_lm =      True, use_suffix=False)

In [22]:
TEST_SIZE = 1000
test_idx = np.random.choice(len(dataset.test_y), TEST_SIZE, replace=False)
test_len = []
for i in range(TEST_SIZE):
    test_len.append(len(dataset.test_seqs2[test_idx[i]]))
print('Shortest sentence in our test set is %d words' %np.min(test_len))

test_list = []
orig_list = []
orig_label_list = []
adv_list = []
dist_list = []

for i in range(TEST_SIZE):
    x_orig = test_x[test_idx[i]]
    orig_label = test_y[test_idx[i]]
    orig_preds=  model.predict(sess, x_orig[np.newaxis, :])[0]
    # print(orig_label, orig_preds, np.argmax(orig_preds))
    if np.argmax(orig_preds) != orig_label:
        print('skipping wrong classifed ..')
        print('--------------------------')
        continue
    x_len = np.sum(np.sign(x_orig))
    if x_len >= 100:
        print('skipping too long input..')
        print('--------------------------')
        continue
    # if np.max(orig_preds) < 0.90:
    #    print('skipping low confidence .. \n-----\n')
    #    continue
    print('****** ', len(test_list) + 1, ' ********')
    test_list.append(test_idx[i])
    orig_list.append(x_orig)
    target_label = 1 if orig_label == 0 else 0
    orig_label_list.append(orig_label)
    x_adv = ga_atttack.attack( x_orig, target_label)
    adv_list.append(x_adv)
    if x_adv is None:
        print('%d failed' %(i+1))
        dist_list.append(100000)
    else:
        num_changes = np.sum(x_orig != x_adv)
        print('%d - %d changed.' %(i+1, num_changes))
        dist_list.append(num_changes)
        # display_utils.visualize_attack(sess, model, dataset, x_orig, x_adv)
    print('--------------------------')
    if (len(test_list)>= 50):
        break

Shortest sentence in our test set is 16 words
skipping wrong classifed ..
--------------------------
skipping wrong classifed ..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
******  1  ********
		 0  --  0.35230252
		 1  --  0.36194578
		 2  --  0.40592965
		 3  --  0.47743848
		 4  --  0.5287754
6 - 6 changed.
--------------------------
******  2  ********
		 0  --  0.05999096
		 1  --  0.083567075
		 2  --  0.11059701
		 3  --  0.13808562
		 4  --  0.13883431
		 5  --  0.14607526
		 6  --  0.14607526
		 7  --  0.14607526
		 8  --  0.16570853
		 9  --  0.19092996
		 10  --  0.24687818
		 11  --  0.24687818
		 12  --  0.25345075
		 13  --  0.29303515
		 14  --  0.31488502
		 15  --  0.33291987
		 16  --  0.42738083
		 17  --  0.42738083
		 18  --  0.5090042
7 - 21 changed.
--------------------------
skipping too long input..
----------------------

skipping wrong classifed ..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
******  14  ********
		 0  --  0.2780798
		 1  --  0.42535764
		 2  --  0.46358812
		 3  --  0.59846497
117 - 5 changed.
--------------------------
skipping wrong classifed ..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
******  15  ********
		 0  --  0.25391755
		 1  --  0.26152465
		 2  --  0.31985328
		 3  --  0.31985328
		 4  --  0.33081475
		 5  --  0.38068584
		 6  --  0.38959813
		 7  --  0.38959813
		 8  --  0.4041461
		 9  --  0.4041461
		 10  --  0.4359135
		 11  --  0.47622898
		 12  --  0.52183485
121 - 14 changed.
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
------

skipping too long input..
--------------------------
skipping wrong classifed ..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
skipping too long input..
--------------------------
******  27  ********
		 0  --  0.08497903
		 1  --  0.116107926
		 2  --  0.12578022
		 3  --  0.14885618
		 4  --  0.17253011
		 5  --  0.1941384
		 6  --  0.22847949
		 7  --  0.23138979
		 8  --  0.24679948
		 9  --  0.24801531
		 10  --  0.27816123
		 11  --  0.29613268
		 12  --  0.29613268
		 13  --  0

KeyboardInterrupt: 

In [None]:
#with open('attack_results.pkl', 'wb') as f:
#    pickle.dump((orig_list, orig_label_list, adv_list, dist_list), f)

## Visualize some output results

In [37]:
orig_len = [np.sum(np.sign(x)) for x in orig_list]
ratio = [dist_list[i]/orig_len[i] for i in range(len(orig_list))]

In [39]:
print('Median number of modifications ' , np.median([x for x in dist_list if x != 100000]))

Median number of modifications  8.0


In [40]:
ratio_success = [x for x in ratio if x < 1.0]

In [43]:
np.mean(ratio_success)

0.14703799051733282

In [None]:
test_x.r

In [65]:
visual_idx = 24
display_utils.visualize_attack(sess, model, dataset, orig_list[visual_idx], adv_list[visual_idx])

Original Prediction = Negative. (Confidence = 76.12) 


---------  After attack -------------
New Prediction = Positive. (Confidence = 53.23) 


In [None]:
print('Average number of changed wrods = ', np.mean(dist_list))

In [34]:
## Save success
with open('attack_results_final.pkl', 'wb') as f:
    pickle.dump((test_list, orig_list, orig_label_list, adv_list, dist_list), f)
    


In [66]:
import display_utils

In [68]:
visual_idx = 24

display_utils.visualize_attack2(dataset, test_list[visual_idx], orig_list[visual_idx], adv_list[visual_idx], orig_label_list[visual_idx])

RAW TEXT: 


--------------------
Original Prediction = Negative.  


---------  After attack -------------
New Prediction = Positive.


In [None]:
src_text[0]

In [None]:
orig_text[0]

In [None]:
adv_text[0]

In [None]:
## Baseline
koontz's