In [3]:
import numpy as np
import tensorflow as tf

from keras.preprocessing.sequence import pad_sequences
import pickle

In [5]:
import data_utils
import glove_utils
import models
import display_utils
import goog_lm

In [14]:
import lm_data_utils
import lm_utils

In [34]:
np.random.seed(1000)
tf.set_random_seed(1000)



In [6]:
%load_ext autoreload
%autoreload 2

In [8]:
VOCAB_SIZE  = 50000
with open('aux_files/dataset_%d.pkl' %VOCAB_SIZE, 'rb') as f:
    dataset = pickle.load(f)

In [9]:
doc_len = [len(dataset.test_seqs2[i]) for i in 
           range(len(dataset.test_seqs2))]

In [11]:
dist_mat = np.load('aux_files/dist_counter_%d.npy' %VOCAB_SIZE)
# Prevent returning 0 as most similar word because it is not part of the dictionary
dist_mat[0,:] = 100000
dist_mat[:,0] = 100000

### Demonstrating how we find the most similar words

In [19]:
for i in range(300, 305):
    src_word = i
    nearest, nearest_dist = glove_utils.pick_most_similar_words(src_word, dist_mat,20)
    print('Closest to `%s` are %s' %(dataset.inv_dict[src_word], [dataset.inv_dict[x] for x in nearest]))
    print('----')

Closest to `later` are ['subsequent', 'subsequently', 'afterward', 'afterwards', 'thereafter', 'trailing', 'after', 'then', 'posterior', 'following', 'resultant', 'eventually', 'farther', 'next', 'ultimately', 'further', 'successive', 'latter', 'ensuing', 'upon']
----
Closest to `takes` are ['pick', 'taking', 'picked', 'take', 'surrounds', 'took', 'assumes', 'taken', 'selected', 'chosen', 'select', 'choose', 'selecting', 'opted', 'chooses', 'selects', 'choosing', 'picks', 'picking', 'chose']
----
Closest to `instead` are ['conversely', 'however', 'alternatively', 'alternately', 'nevertheless', 'rather', 'nonetheless', 'similarly', 'equally', 'likewise', 'yet', 'also', 'notwithstanding', 'though', 'despite', 'even', 'otherwise', 'still', 'albeit', 'additionally']
----
Closest to `seem` are ['seems', 'appears', 'looks', 'transpires', 'sounds', 'emerges', 'appear', 'seemingly', 'apparently', 'look', 'emerge', 'occurs', 'gaze', 'noises', 'appearing', 'listens', 'arise', 'evidently', 'happe

### Preparing the dataset

In [20]:
max_len = 250
train_x = pad_sequences(dataset.train_seqs2, maxlen=max_len, padding='post')
train_y = np.array(dataset.train_y)
test_x = pad_sequences(dataset.test_seqs2, maxlen=max_len, padding='post')
test_y = np.array(dataset.test_y)

### Loading the sentiment analysis model

In [21]:
tf.reset_default_graph()
if tf.get_default_session():
    sess.close()
sess = tf.Session()
batch_size = 1
lstm_size = 128
#max_len =  100

with tf.variable_scope('imdb', reuse=False):
    model = models.SentimentModel(batch_size=batch_size,
                           lstm_size = lstm_size,
                           max_len = max_len,
                           embeddings_dim=300, vocab_size=dist_mat.shape[1],is_train = False)
saver = tf.train.Saver()
saver.restore(sess, './models/imdb_model')



INFO:tensorflow:Restoring parameters from ./models/imdb_model


## Loading the Google Language model

In [23]:
goog_lm = goog_lm.LM()

LM vocab loading done


Recovering graph.


INFO:tensorflow:Recovering Graph goog_lm/graph-2016-09-10.pbtxt


Recovering checkpoint goog_lm/ckpt-*


#### demonstrating the GoogLM

In [27]:
src_word = dataset.dict['play']
nearest, nearest_dist = glove_utils.pick_most_similar_words(src_word, dist_mat,20)
nearest_w = [dataset.inv_dict[x] for x in nearest]
print('Closest to `%s` are %s' %(dataset.inv_dict[src_word], nearest_w))

Closest to `play` are ['playing', 'gaming', 'games', 'toy', 'playback', 'game', 'plaything', 'cheek', 'gambling', 'toys', 'toying', 'replay', 'stake', 'plays', 'jeu', 'gamble', 'staking', 'reproduction', 'casino', 'sets']


In [32]:
prefix = 'he'
lm_preds = goog_lm.get_words_probs(prefix, nearest_w)
print('most probable is ', nearest_w[np.argmax(lm_preds)])


most probable is  plays


## Try Attack

In [33]:
from attacks import GreedyAttack, GeneticAtack

In [36]:
TEST_SIZE = 10
test_idx = np.random.choice(len(dataset.test_y), TEST_SIZE, replace=False)

In [39]:
## Compute the length of teset set
test_len = []
for i in range(10):
    test_len.append(len(dataset.test_seqs2[test_idx[i]]))
print('Shortest sentence in our test set is %d words' %np.min(test_len))

Shortest sentence in our test set is 68 words


## Main Attack 

In [49]:
pop_size = 60
with tf.variable_scope('imdb', reuse=True):
    batch_model = models.SentimentModel(batch_size=pop_size,
                           lstm_size = lstm_size,
                           max_len = max_len,
                           embeddings_dim=300, vocab_size=dist_mat.shape[1],is_train = False)
ga_atttack = GeneticAtack(sess, model, batch_model, dataset, dist_mat, 
                                  
                                  goog_lm, max_iters=50, 
                                   pop_size=pop_size,
                                  
                                  n1 = 8,
                                  n2 = 3,
                                 use_lm =      True)

In [50]:
test_idx = np.random.choice(len(dataset.test_y), TEST_SIZE, replace=False)

test_list = []
orig_list = []
orig_label_list = []
adv_list = []
dist_list = []

for i in range(TEST_SIZE):
    
    x_orig = test_x[test_idx[i]]
    orig_label = test_y[test_idx[i]]
    orig_preds=  model.predict(sess, x_orig[np.newaxis, :])[0]
    # print(orig_label, orig_preds, np.argmax(orig_preds))
    if np.argmax(orig_preds) != orig_label:
        print('skipping wrong classifed ..')
        print('--------------------------')
        continue
    x_len = np.sum(np.sign(x_orig))
    if x_len >= 150:
        print('skipping too long input..')
        print('--------------------------')
        continue
    # if np.max(orig_preds) < 0.90:
    #    print('skipping low confidence .. \n-----\n')
    #    continue
    test_list.append(test_idx[i])
    orig_list.append(x_orig)
    target_label = 1 if orig_label == 0 else 0
    orig_label_list.append(orig_label)
    x_adv = ga_atttack.attack( x_orig, target_label)
    adv_list.append(x_adv)
    if x_adv is None:
        print('%d failed' %(i+1))
    else:
        num_changes = np.sum(x_orig != x_adv)
        print('%d - %d changed.' %(i+1, num_changes))
        dist_list.append(num_changes)
        # display_utils.visualize_attack(sess, model, dataset, x_orig, x_adv)
    print('--------------------------')

skpping long ..
0  --  0.45439288
1  --  0.47749314
2  --  0.4910039
3  --  0.5279712
2 - 6 changed.
--------------------------
0  --  0.06369818
1  --  0.15067357
2  --  0.15067357
3  --  0.15697743
4  --  0.1793845


KeyboardInterrupt: 

In [51]:
#with open('attack_results.pkl', 'wb') as f:
#    pickle.dump((orig_list, orig_label_list, adv_list, dist_list), f)

## Visualize some output results

In [52]:

display_utils.visualize_attack(sess, model, dataset, orig_list[0], adv_list[0])

Original Prediction = Negative. (Confidence = 62.18) 


---------  After attack -------------
New Prediction = Positive. (Confidence = 52.80) 


In [53]:
print('Average number of changed wrods = ', np.mean(dist_list))

Average number of changed wrods =  6.0
