## Solving dependencies

### Git repo, embedding, NLTK

In [0]:
! git clone https://github.com/josipjukic/Adversarial-NLP.git
% cd /content/Adversarial-NLP/src

% mkdir .vector_cache
% cp '/content/drive/My Drive/Master Thesis/glove/glove.6B.100d.txt.pt' .vector_cache/
% cp '/content/drive/My Drive/Master Thesis/glove/counter-fitted-vectors.txt' .vector_cache/

import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

## Dataset save/load

In [0]:
import torch
from torchtext import data
from torchtext import datasets
import spacy
import random
from preprocessing import imdb_preprocess
from data_utils import load_dataset

In [0]:
SEED = 42
torch.manual_seed(SEED)
LOAD_PATH = '/content/drive/My Drive/Master Thesis/IMDB'
MAX_VOCAB_SIZE = 25_000
EMBEDDINGS_FILE = 'glove.6B.100d'

splits, fields = load_dataset(LOAD_PATH,
                              include_lengths=True,
                              lower=False,
                              stop_words=None)
train_data, valid_data, test_data = splits
TEXT, LABEL, RAW, ID = fields
RAW.is_target = ID.is_target = False
LABEL.build_vocab(train_data)
TEXT.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE, 
                 vectors = EMBEDDINGS_FILE, 
                 unk_init = torch.Tensor.normal_)

In [0]:
from argparse import Namespace
from data_utils import expand_paths
from models import PackedLSTM

args = Namespace(
    # Data and Path hyper parameters
    model_path='/content/drive/My Drive/Master Thesis/torch_models/imdb/imdb_model.torch',
    train_state_file='train_state.json',
    save_dir='/content/drive/My Drive/Master Thesis/torch_models/imdb/',
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token],
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token],
    # Model hyper parameters
    input_dim = len(TEXT.vocab),
    embedding_dim=100,
    hidden_dim=256,
    output_dim = 1,
    num_layers=2,
    bidirectional=True,
    # Training hyper parameter
    seed=SEED,
    learning_rate=0.001,
    dropout_p=0.5,
    batch_size=64,
    num_epochs=20,
    early_stopping_criteria=5,
    # Runtime option
    reload_from_files=True,
    expand_filepaths_to_save_dir=True,
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

pretrained_embeddings = TEXT.vocab.vectors
pretrained_embeddings[args.UNK_IDX] = torch.zeros(args.embedding_dim)
pretrained_embeddings[args.PAD_IDX] = torch.zeros(args.embedding_dim)

model = PackedLSTM(
    args.embedding_dim, 
    args.hidden_dim, 
    args.output_dim, 
    args.num_layers,
    pretrained_embeddings,
    args.bidirectional,
    args.dropout_p, 
    args.PAD_IDX,
    args.device
)
model.load_state_dict(torch.load(args.model_path, map_location=args.device))
model = model.to(args.device)
model.eval()

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size=512,
    sort_within_batch=True,
    sort_key = lambda x: len(x.text),
    device=args.device)
iterator = dict(train=train_iterator, valid=valid_iterator, test=test_iterator)

In [0]:
def word_target(model, batch, y_preds, num_classes, device):
    inputs = batch[0]
    losses = torch.zeros(inputs.shape)
    target = None
    for i in range(inputs.shape[0]):
        if target:
            index, vals = target
            inputs[i-1,:] = vals
        target = (i, torch.clone(inputs[i,:]))
        inputs[i,:] = 0
        with torch.no_grad():
            out = model.predict_proba(batch)
            if num_classes == 2:
                out = torch.cat([1.-out, out], dim=1)
            losses[i,:] = out.gather(1, y_preds).squeeze()
    
    if target:
        index, vals = target
        inputs[-1,:] = vals
    return 1.-losses


def temporal(model, batch, y_preds, num_classes, device):
    inputs, lengths = batch
    new_preds = torch.zeros(inputs.shape)
    losses = torch.zeros(inputs.shape)
    for i in range(inputs.shape[0]):
        preinputs = inputs[:i+1,:]
        with torch.no_grad():
            new_lengths = torch.min(lengths, torch.tensor(i+1).to(device))
            preout = model.predict_proba((preinputs, new_lengths))
            if num_classes == 2:
                preout = torch.cat([1.-preout, preout], dim=1).to(device)
            new_preds[i,:] = preout.gather(1, y_preds).squeeze()
            
    losses[0,:] = new_preds[0,:] - 1.0/num_classes
    for i in range(1, inputs.shape[0]):
        losses[i,:] = new_preds[i,:] - new_preds[i-1,:]

    return losses


def temporal_tail(model, batch, y_preds, num_classes, device):
    inputs, lengths = batch
    new_preds = torch.zeros(inputs.shape)
    losses = torch.zeros(inputs.shape)
    for i in range(inputs.shape[0]):
        postinputs = inputs[i:,:]
        with torch.no_grad():
            new_lengths = torch.max(lengths-i, torch.tensor(1).to(device))
            postout = model.predict_proba((postinputs, new_lengths))
            if num_classes == 2:
                postout = torch.cat([1.-postout, postout], dim=1).to(device)
            new_preds[i,:] = postout.gather(1, y_preds).squeeze()
            
    losses[-1,:] = new_preds[-1,:] - 1.0/num_classes
    for i in range(inputs.shape[0]-1):
        losses[i,:] = new_preds[i,:] - new_preds[i+1,:]

    return losses


def combined_temporal(model, batch, y_preds, num_classes, device, alpha=1.):
    temporal_score = temporal(model, batch, y_preds, num_classes, device)
    temporal_tail_score = temporal_tail(model, batch, y_preds, num_classes, device)
    return temporal_score + alpha*temporal_tail_score


def random(inputs, *args, **kwargs):
    losses = torch.rand(inputs.size()[0], inputs.size()[1])
    return losses

In [0]:
from data_utils import spacy_revtok
import numpy as np

num_classes = len(LABEL.vocab)
nlp = spacy.load('en', disable=['parser', 'tagger', 'ner', 'textcat'])

def reconstruct(tensor, vocab):
    words = [vocab.itos[idx] for idx in tensor]
    return ' '.join(words)

def adversarial_text(raw, nlp, indices, transform):
    adv_words = [token.text_with_ws for token in nlp(raw)]
    for i in indices:
        if i >= len(adv_words): continue
        adv_words[i] = transform(adv_words[i])
    return ''.join(adv_words)

def binary_accuracy(model, batch):
    y_pred = model.predict(batch.text).to(args.device).squeeze()
    correct = (y_pred == batch.label).float()
    return (correct.sum() / len(correct)).item()

homos = {'-':'˗','9':'৭','8':'Ȣ','7':'𝟕','6':'б','5':'Ƽ','4':'Ꮞ','3':'Ʒ','2':'ᒿ','1':'l','0':'O',
         "'":'`','a': 'ɑ', 'b': 'Ь', 'c': 'ϲ', 'd': 'ԁ', 'e': 'е', 'f': '𝚏', 'g': 'ɡ', 'h': 'հ',
         'i': 'і', 'j': 'ϳ', 'k': '𝒌', 'l': 'ⅼ', 'm': 'ｍ', 'n': 'ո', 'o':'о', 'p': 'р', 'q': 'ԛ',
         'r': 'ⲅ', 's': 'ѕ', 't': '𝚝', 'u': 'ս', 'v': 'ѵ', 'w': 'ԝ', 'x': '×', 'y': 'у', 'z': 'ᴢ'}

def homoglyph(word):
    N = len(word)-1 if word[-1] == ' ' else len(word)
    N = max(1, N)
    s = np.random.randint(0, N)
    if word[s] in homos: 
        adv_char = homos[word[s]]
    else:
        adv_char = word[s]
    adv_word = word[:s] + adv_char + word[s+1:]
    return adv_word

def remove_char(word):
    N = len(word)-1 if word[-1] == ' ' else len(word)
    N = max(1, N)
    s = np.random.randint(0, N)
    adv_word = word[:s] + word[s+1:]
    return adv_word

def flip_char(word):
    N = len(word)-1 if word[-1] == ' ' else len(word)
    N = max(1, N)
    s = np.random.randint(0, N)
    letter = ord(word[s])
    adv_char = np.random.randint(0,25) + 97
    adv_word = word[:s] + chr(adv_char) + word[s+1:]
    return adv_word

attack_power = 20
reg_acc = 0.
adv_acc = 0.
for batch_index, batch in enumerate(iterator['test'], 1):
    # print('Length: ', batch.text[0].shape[0])
    print('Batch: ', batch_index)
    x_in, lengths = batch.text
    y_preds = model.predict(batch.text).to(args.device)
    losses = word_target(model=model, batch=batch.text,
                         y_preds=y_preds, num_classes=num_classes,
                         device=args.device)
    sorted_losses, indices = torch.sort(losses, dim=0, descending=True)
    acc_t = binary_accuracy(model, batch)
    reg_acc += (acc_t - reg_acc) / batch_index


    for i in range(x_in.shape[1]):
        inds = indices[0:attack_power,i]
        x_in[inds,i] = 0
        # print(adversarial_text(batch.raw[i], nlp, inds, flip_char))
        # print(batch.raw[i])

    acc_t = binary_accuracy(model, batch)
    adv_acc += (acc_t - adv_acc) / batch_index
    print(reg_acc, adv_acc)
    print('-----------------------------')

    
    break

In [0]:
from data_utils import save_dataset

SAVE_PATH = '/content/drive/My Drive/Master Thesis/IMDB'

dataset = dict(train=train_data, test=test_data, valid=valid_data)
save_dataset(dataset, SAVE_PATH)