In [2]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
%matplotlib inline

from IPython.core.display import HTML

In [3]:

import numpy as np
import os
import data_utils
import pickle
import attacks
import random
from keras.models import load_model

import greedy_utils

Using TensorFlow backend.


In [4]:
TOKENIZER_PATH = 'tokenizer.pickle'
TEXT_DATA_DIR = '20_newsgroup/'
MODEL_PATH = '20news_model.h5'

In [5]:
tokenizer, inverse_tokenizer = data_utils.load_tokenizer(TOKENIZER_PATH)
model = load_model(MODEL_PATH)
labels =  list(sorted(os.listdir(TEXT_DATA_DIR)))

In [None]:
sample_file, true_label = data_utils.pick_random_file(TEXT_DATA_DIR)
file_text = data_utils.load_textfile(sample_file)
file_features = data_utils.process_text(tokenizer, file_text)

In [None]:
pred_scores = model.predict(file_features)
orig_prediction = np.argmax(pred_scores[0])
print('TrueLabel = %s' %true_label)
print('Predicted "%s" with %f .' %(labels[orig_prediction], pred_scores[0][orig_prediction]))

## Randomised Attack (Non-targeted)

 Random attack will pick and replace words randomly.

In [6]:
COMPUTE_PROBS = False
if COMPUTE_PROBS:
    num_words = len(inverse_tokenizer)
    topics_words, topics_words_probs = greedy_utils.compute_topic_words(TEXT_DATA_DIR, tokenizer, 
                                                                  labels, num_words)
    with open('topic_words.pickle', 'wb') as handle:
        pickle.dump(topics_words, handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open('topc_words_probs.pickle', 'wb') as handle:
        pickle.dump(topics_words_probs, handle, protocol=pickle.HIGHEST_PROTOCOL)
else:
    with open('topic_words.pickle', 'rb') as handle:
        topics_words = pickle.load(handle)
    with open('topc_words_probs.pickle', 'rb') as handle:
        topics_words_probs = pickle.load( handle)

Greedy attack choose word from document that has the highest p(original class|word) and replace it with the one most similar with it in the top 200 words with p(goal class|word)

In [None]:
greedy_attack = attacks.GreedyAttack(model, topics_words, topics_words_probs, temp=0.15)

In [None]:
other_labels = [x for x in range(len(labels)) if x != orig_prediction]
random_target = np.random.choice(other_labels)
print('Random target = %s' %labels[random_target])
x_orig = file_features.copy()
x_adv, o_hist, t_hist = greedy_attack.attack(x_orig, random_target)

In [None]:
if x_adv is None:
    print('Attack failed. !')
else:
    adv_prediction = np.argmax(model.predict(x_adv))
    print('Attack succeeded after %d iterations.' %(len(t_hist)))
    print('Original class: %s, Attack class: %s' %(labels[orig_prediction], labels[adv_prediction]))
    print("Number of changed words = %d (%0.2f %%)"
          %(np.count_nonzero(x_adv != x_orig),
            100*(np.count_nonzero(x_adv != x_orig))/np.count_nonzero(x_orig)))
    plt.plot(o_hist, 'g', label=labels[orig_prediction] + ' - greedy attack v2')
    plt.plot(t_hist, 'r', label=labels[adv_prediction] + '- greedy attack v2')
    plt.legend()

In [None]:
"""
with open('ohist.p', 'wb') as handle:
        pickle.dump(o_hist, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('thist.p', 'wb') as handle:
        pickle.dump(t_hist, handle, protocol=pickle.HIGHEST_PROTOCOL)
"""

## Reconstruct Text Document

In [None]:
orig_text = data_utils.reconstruct_text(inverse_tokenizer, file_features[0])
adv_text = data_utils.reconstruct_text(inverse_tokenizer, x_adv[0])

## Visualize Attack

In [None]:
orig_html, adv_html = data_utils.render_attack(orig_text, adv_text)

In [None]:
HTML("<b> Original Text </b>")
HTML(orig_html)

In [None]:
HTML("<b> Adversarial Text </b>")
HTML(adv_html)

## experiment across all classes

In [None]:
results = []
num_docs_per_class = 3   ##should be changed to a larger number (ex. 50) later
class_ind = 0
for name in sorted(os.listdir(TEXT_DATA_DIR)):
    path = os.path.join(TEXT_DATA_DIR, name)
    print(path)
    docnum = num_docs_per_class
    if os.path.isdir(path):
        to_test = random.sample(sorted(os.listdir(path)), num_docs_per_class)
        changed_avg = [0]*20
        for fname in to_test:
            file_text = data_utils.load_textfile(os.path.join(path, fname))
            file_features = data_utils.process_text(tokenizer, file_text)
            pred_scores = model.predict(file_features)
            orig_prediction = np.argmax(pred_scores[0])
            if(orig_prediction!=class_ind):
                docnum = docnum - 1
                continue
            other_labels = [x for x in range(len(labels)) if x != orig_prediction]
            for label in other_labels:
                #print(label)
                greedy_attack = attacks.GreedyAttack(model, topics_words, topics_words_probs, temp=0.15)
                x_orig = file_features.copy()
                x_adv, o_hist, t_hist = greedy_attack.attack(x_orig, label)
                changed_avg[label] = changed_avg[label] + np.count_nonzero(x_adv != x_orig)/np.count_nonzero(x_orig)        
        newList = [x/docnum if docnum!=0 else -1 for x in changed_avg]
        print(newList)
        results = results + newList
    class_ind = class_ind + 1    
print(results)   
with open('results.p', 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

20_newsgroup/alt.atheism
