In [1]:
import torchtext as tt
import torch
from torchtext.datasets import IMDB
import torch.nn as nn
import torch.functional as F
from torchtext.data.utils import get_tokenizer
from collections import Counter
from torchtext.vocab import GloVe, vocab, Vectors
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
import spacy

In [2]:
SEED = 1234

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
import torch
from torchtext.legacy import data, datasets, vocab

TEXT = data.Field(tokenize='spacy', lower=True, batch_first=True, include_lengths=True)
LABEL = data.LabelField(dtype=torch.long, batch_first=True, sequential=False)

train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
TEXT.build_vocab(train_data, vectors='glove.6B.100d')
LABEL.build_vocab(train_data)

downloading aclImdb_v1.tar.gz


100%|██████████| 84.1M/84.1M [00:02<00:00, 37.1MB/s]
.vector_cache/glove.6B.zip: 862MB [02:40, 5.36MB/s]                           
100%|█████████▉| 399999/400000 [00:25<00:00, 15929.53it/s]


In [4]:
#set batch size
BATCH_SIZE = 64

train_itr, test_itr = data.BucketIterator.splits(
    (train_data, test_data),
    batch_size=BATCH_SIZE,
    sort_key=lambda x:len(x.text),
    device=device,
    shuffle=True,
    sort_within_batch=True,
    sort=False
)

In [142]:
class birlm(nn.Module):

  def __init__(self, vocab_dim, embedding_dim, lstm_dim, lstm_stacks, dropout, output_dim):

    super(birlm, self).__init__()

    # Define layers to be used
    self.embed_layer = nn.Embedding(vocab_dim, embedding_dim)
    self.lstm = nn.LSTM(embedding_dim, lstm_dim, num_layers=lstm_stacks, bidirectional=True, batch_first=True, dropout=dropout)
    self.fc = nn.Linear(2*lstm_dim, output_dim)
    self.soft = nn.Softmax()

  
  def forward(self, inputs, input_len):
    
    # Embedding
    x = self.embed_layer(inputs)

    x_packed = nn.utils.rnn.pack_padded_sequence(x, input_len, batch_first=True)

    # Bidirectional LSTM
    packed_output, (hidden, cell) = self.lstm(x_packed)

    # Concat the final forward and backward hidden state
    hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)

    # Fully Connected Logits
    logits = self.fc(hidden.squeeze(0))

    outs = self.soft(logits)

    return outs


In [143]:
len(TEXT.vocab)

101520

In [144]:
TEXT.vocab.vectors.shape[1]

100

In [145]:
# Hyperparams

vocab_len = len(TEXT.vocab)
embedding_len = 100
lstm_len = 32
output_len = 2
stack = 2
dropout = 0.2

model = birlm(vocab_len, embedding_len, lstm_len, stack, dropout, output_len)
model.embed_layer.from_pretrained(TEXT.vocab.vectors)

Embedding(101520, 100)

In [146]:
opt = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
model.to(device)

birlm(
  (embed_layer): Embedding(101520, 100)
  (lstm): LSTM(100, 32, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (fc): Linear(in_features=64, out_features=2, bias=True)
  (soft): Softmax(dim=None)
)

In [147]:
def accuracy(preds, y):
    _, preds = torch.max(preds, dim= 1)
    acc = torch.sum(preds == y) / len(y)
    return acc
def calculateLoss(model, batch, criterion):
    text, text_len = batch.text
    preds = model(text, text_len.to('cpu'))
    loss = criterion(preds, batch.label)
    acc = accuracy(preds, batch.label)
    return loss, len(batch.label), acc

In [148]:
import numpy as np

N_EPOCH = 10
for i in range(N_EPOCH):
    model.train()
    train_len, train_acc, train_loss  = 0, [], []
    for batch_no, batch in enumerate(train_itr):
        opt.zero_grad()
        loss, blen, acc = calculateLoss( model, batch,
                          criterion)
        train_loss.append(loss * blen)
        train_acc.append(acc * blen)
        train_len = train_len + blen
        loss.backward()
        opt.step()
    train_epoch_loss = np.sum(train_loss) / train_len
    train_epoch_acc = np.sum( train_acc ) / train_len
    model.eval()
    with torch.no_grad():
        for batch in test_itr:
            test_results = [calculateLoss( model, batch,
                                          criterion)
                           for batch in test_itr]
            loss, batch_len, acc = zip(*test_results)
            epoch_loss = np.sum(np.multiply(loss, batch_len))/ np.sum(batch_len)
            epoch_acc = np.sum(np.multiply(acc , batch_len))/ np.sum(batch_len)
        print('epoch:{}/{} epoch_train_loss:{:.4f},epoch_train_acc:{:.4f}'
              ' epoch_val_loss:{:.4f},epoch_val_acc:{:.4f}'.format(i+1, N_EPOCH,
                train_epoch_loss.item(), train_epoch_acc.item(),
                epoch_loss.item(), epoch_acc.item()))



epoch:1/10 epoch_train_loss:0.6215,epoch_train_acc:0.6537 epoch_val_loss:0.6376,epoch_val_acc:0.6610
epoch:2/10 epoch_train_loss:0.5264,epoch_train_acc:0.7756 epoch_val_loss:0.6896,epoch_val_acc:0.5870
epoch:3/10 epoch_train_loss:0.5510,epoch_train_acc:0.7505 epoch_val_loss:0.5546,epoch_val_acc:0.7454
epoch:4/10 epoch_train_loss:0.5009,epoch_train_acc:0.8042 epoch_val_loss:0.5186,epoch_val_acc:0.7826
epoch:5/10 epoch_train_loss:0.6037,epoch_train_acc:0.6963 epoch_val_loss:0.5386,epoch_val_acc:0.7658
epoch:6/10 epoch_train_loss:0.5036,epoch_train_acc:0.7982 epoch_val_loss:0.5561,epoch_val_acc:0.7342
epoch:7/10 epoch_train_loss:0.4557,epoch_train_acc:0.8526 epoch_val_loss:0.5015,epoch_val_acc:0.8002
epoch:8/10 epoch_train_loss:0.4198,epoch_train_acc:0.8901 epoch_val_loss:0.4745,epoch_val_acc:0.8308
epoch:9/10 epoch_train_loss:0.4018,epoch_train_acc:0.9087 epoch_val_loss:0.4698,epoch_val_acc:0.8357
epoch:10/10 epoch_train_loss:0.3878,epoch_train_acc:0.9250 epoch_val_loss:0.4657,epoch_val_

In [None]:
!pip install textattack[tensorflow,optional]

In [157]:
import textattack
import torchtext

model_wrapper = CustomPyTorchModelWrapper(model)

dataset = textattack.datasets.HuggingFaceDataset("imdb", split="test")
attack = textattack.attack_recipes.pwws_ren_2019.PWWSRen2019.build(model_wrapper)
attack_args = textattack.AttackArgs(num_examples=-1)
attacker = textattack.Attacker(attack, dataset, attack_args)
attacker.attack_dataset()

Reusing dataset imdb (/root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/90099cb476936b753383ba2ae6ab2eae419b2e87f71cd5189cb9c8e5814d12a3)
textattack: Loading [94mdatasets[0m dataset [94mimdb[0m, split [94mtest[0m.
textattack: Unknown if model of class <class '__main__.birlm'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  weighted-saliency
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapWordNet
  (constraints): 
    (0): RepeatModification
    (1): StopwordModification
  (is_black_box):  True
) 









 10%|█         | 1/10 [00:04<00:40,  4.54s/it][A[A[A


[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1:  10%|█         | 1/10 [00:04<00:41,  4.56s/it][A[A[A

--------------------------------------------- Result 1 ---------------------------------------------

[[I]] went and saw this movie last night after being coaxed to by a few friends of mine. I'll admit that I was reluctant to see it because from what I knew of Ashton Kutcher he was only able to do comedy. I was wrong. Kutcher played the character of Jake Fischer very well, and Kevin Costner played Ben Randall with such professionalism. The sign of a good movie is that it can toy with our emotions. This one did exactly that. The entire theater (which was sold out) was overcome by laughter during the first half of the movie, and were moved to tears during the second half. While exiting the theater I not only saw many women in tears, but many full grown men as well, trying desperately not to let anyone see them crying. This movie was great, and I suggest that you go see it before you judge.

[[1]] went and saw this movie last night after being coaxed to by a few friends of mine. I'll admi

  0%|          | 0/10 [01:13<?, ?it/s]



[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1:  20%|██        | 2/10 [00:30<02:00, 15.00s/it][A[A[A


[Succeeded / Failed / Skipped / Total] 2 / 0 / 0 / 2:  20%|██        | 2/10 [00:30<02:00, 15.01s/it][A[A[A

--------------------------------------------- Result 2 ---------------------------------------------

[[Actor]] turned director Bill Paxton follows up his promising debut, the Gothic-horror "Frailty", with this family friendly sports drama about the 1913 U.S. Open where a young American caddy rises from his humble background to play against his Bristish idol in what was dubbed as "The Greatest Game Ever Played." I'm no fan of golf, and these scrappy underdog sports flicks are a dime a dozen (most recently done to grand effect with "Miracle" and "Cinderella Man"), but some how this film was enthralling all the same.<br /><br />The film starts with some creative opening credits (imagine a Disneyfied version of the animated opening credits of HBO's "Carnivale" and "Rome"), but lumbers along slowly for its first by-the-numbers hour. Once the action moves to the U.S. Open things pick up very well. Paxton does a nice job and shows a knack for effective directorial flourishes (I loved the rai




[Succeeded / Failed / Skipped / Total] 2 / 0 / 0 / 2:  30%|███       | 3/10 [00:41<01:36, 13.77s/it][A[A[A


[Succeeded / Failed / Skipped / Total] 3 / 0 / 0 / 3:  30%|███       | 3/10 [00:41<01:36, 13.78s/it][A[A[A

--------------------------------------------- Result 3 ---------------------------------------------

[[As]] a recreational golfer with some knowledge of the sport's history, I was pleased with Disney's sensitivity to the issues of class in golf in the early twentieth century. The movie depicted well the psychological battles that Harry Vardon fought within himself, from his childhood trauma of being evicted to his own inability to break that glass ceiling that prevents him from being accepted as an equal in English golf society. Likewise, the young Ouimet goes through his own class struggles, being a mere caddie in the eyes of the upper crust Americans who scoff at his attempts to rise above his standing. <br /><br />What I loved best, however, is how this theme of class is manifested in the characters of Ouimet's parents. His father is a working-class drone who sees the value of hard work but is intimidated by the upper class; his mother, however, recognizes her son's talent and desi




[Succeeded / Failed / Skipped / Total] 3 / 0 / 0 / 3:  40%|████      | 4/10 [00:44<01:06, 11.06s/it][A[A[A


[Succeeded / Failed / Skipped / Total] 4 / 0 / 0 / 4:  40%|████      | 4/10 [00:44<01:06, 11.06s/it][A[A[A

--------------------------------------------- Result 4 ---------------------------------------------

[[I]] saw this film in a sneak preview, and it is delightful. The cinematography is unusually creative, the acting is good, and the story is fabulous. If this movie does not do well, it won't be because it doesn't deserve to. Before this film, I didn't realize how charming Shia Lebouf could be. He does a marvelous, self-contained, job as the lead. There's something incredibly sweet about him, and it makes the movie even better. The other actors do a good job as well, and the film contains moments of really high suspense, more than one might expect from a movie about golf. Sports movies are a dime a dozen, but this one stands out. <br /><br />This is one I'd recommend to anyone.

[[1]] saw this film in a sneak preview, and it is delightful. The cinematography is unusually creative, the acting is good, and the story is fabulous. If this movie does not do well, it won't be because it does




[Succeeded / Failed / Skipped / Total] 4 / 0 / 0 / 4:  50%|█████     | 5/10 [00:52<00:52, 10.48s/it][A[A[A


[Succeeded / Failed / Skipped / Total] 5 / 0 / 0 / 5:  50%|█████     | 5/10 [00:52<00:52, 10.48s/it][A[A[A

--------------------------------------------- Result 5 ---------------------------------------------

[[Bill]] Paxton has taken the true story of the 1913 US golf open and made a film that is about much more than an extra-ordinary game of golf. The film also deals directly with the class tensions of the early twentieth century and touches upon the profound anti-Catholic prejudices of both the British and American establishments. But at heart the film is about that perennial favourite of triumph against the odds.<br /><br />The acting is exemplary throughout. Stephen Dillane is excellent as usual, but the revelation of the movie is Shia LaBoeuf who delivers a disciplined, dignified and highly sympathetic performance as a working class Franco-Irish kid fighting his way through the prejudices of the New England WASP establishment. For those who are only familiar with his slap-stick performances in "Even Stevens" this demonstration of his maturity is a delightful surprise. And Josh Flitter




[Succeeded / Failed / Skipped / Total] 5 / 0 / 0 / 5:  60%|██████    | 6/10 [01:16<00:50, 12.69s/it][A[A[A


[Succeeded / Failed / Skipped / Total] 6 / 0 / 0 / 6:  60%|██████    | 6/10 [01:16<00:50, 12.69s/it][A[A[A


[Succeeded / Failed / Skipped / Total] 6 / 0 / 0 / 6:  70%|███████   | 7/10 [01:16<00:32, 10.89s/it][A[A[A


[Succeeded / Failed / Skipped / Total] 6 / 0 / 1 / 7:  70%|███████   | 7/10 [01:16<00:32, 10.89s/it][A[A[A

--------------------------------------------- Result 6 ---------------------------------------------

[[I]] saw this film on September 1st, 2005 in Indianapolis. I am one of the judges for the Heartland Film Festival that screens films for their Truly Moving Picture Award. A Truly Moving Picture "...explores the human journey by artistically expressing hope and respect for the positive values of life." Heartland gave that award to this film.<br /><br />This is a story of golf in the early part of the 20th century. At that time, it was the game of upper class and rich "gentlemen", and working people could only participate by being caddies at country clubs. With this backdrop, this based-on-a-true-story unfolds with a young, working class boy who takes on the golf establishment and the greatest golfer in the world, Harry Vardon.<br /><br />And the story is inspirational. Against all odds, Francis Ouimet (played by Shia LaBeouf of "Holes") gets to compete against the greatest golfers of t




[Succeeded / Failed / Skipped / Total] 6 / 0 / 1 / 7:  80%|████████  | 8/10 [01:20<00:20, 10.04s/it][A[A[A


[Succeeded / Failed / Skipped / Total] 7 / 0 / 1 / 8:  80%|████████  | 8/10 [01:20<00:20, 10.05s/it][A[A[A

--------------------------------------------- Result 8 ---------------------------------------------

[[I]] felt this film did have many good qualities. The cinematography was certainly different exposing the stage aspect of the set and story. The original characters as actors was certainly an achievement and I felt most played quite convincingly, of course they are playing themselves, but definitely unique. The cultural aspects may leave many disappointed as a familiarity with the Chinese and Oriental culture will answer a lot of questions regarding parent/child relationships and the stigma that goes with any drug use. I found the Jia Hongsheng story interesting. On a down note, the story is in Beijing and some of the fashion and music reek of early 90s even though this was made in 2001, so it's really cheesy sometimes (the Beatles crap, etc). Whatever, not a top ten or twenty but if it's on the television, check it out.

[[1]] felt this film did have many good qualities. The cinemato




[Succeeded / Failed / Skipped / Total] 7 / 0 / 1 / 8:  90%|█████████ | 9/10 [01:24<00:09,  9.44s/it][A[A[A


[Succeeded / Failed / Skipped / Total] 8 / 0 / 1 / 9:  90%|█████████ | 9/10 [01:24<00:09,  9.44s/it][A[A[A

--------------------------------------------- Result 9 ---------------------------------------------

This movie is amazing because the fact that the real people portray themselves and their real life experience and do such a good job it's like they're almost living the past over again. Jia Hongsheng plays himself an actor who quit everything except music and drugs struggling with depression and searching for the meaning of life while being angry at everyone especially the people who care for him most. There's moments in the movie that will make you wanna cry because the family especially the father did such a good job. However, this movie is not for everyone. Many people who suffer from depression will understand Hongsheng's problem and why he does the things he does for example keep himself shut in a dark room or go for walks or bike rides by himself. Others might see the [[movie]] as boring because it's just so real that its almost like a documentary. Overall this movie is great and




[Succeeded / Failed / Skipped / Total] 8 / 0 / 1 / 9: 100%|██████████| 10/10 [04:28<00:00, 26.89s/it][A[A[A


[Succeeded / Failed / Skipped / Total] 9 / 0 / 1 / 10: 100%|██████████| 10/10 [04:28<00:00, 26.90s/it]

--------------------------------------------- Result 10 ---------------------------------------------

"[[Quitting]]" may be as much about exiting a pre-ordained identity as about drug withdrawal. As a rural guy coming to Beijing, class and success must have struck this young artist face on as an appeal to separate from his roots and far surpass his peasant parents' acting success. Troubles arise, however, when the new man is too new, when it demands too big a departure from family, history, nature, and personal identity. The ensuing splits, and confusion between the imaginary and the real and the dissonance between the ordinary and the heroic are the stuff of a gut check on the one hand or a complete escape from self on the other. Hongshen slips into the latter and his long and lonely road back to self can be grim.<br /><br />But what an exceptionally convincing particularity, honesty, and sensuousness director Zhang Yang, and his actors, bring to this journey. No clichés, no stereoty




[<textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fca7bc54050>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fca23919450>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fca20ac95d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fca7a1de290>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fca7a96ab10>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fca27c65e50>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x7fca7c188950>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fca76f46f50>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fca70344550>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x7fca52f40050>]

In [156]:
class CustomPyTorchModelWrapper(textattack.models.wrappers.model_wrapper.ModelWrapper):
    def __init__(self, model, tokenizer=torchtext.data.utils.get_tokenizer("spacy")):
        self.model = model
        self.tokenizer = tokenizer
    
    def __call__(self, text_input_list):
      preds = torch.zeros(size=(len(text_input_list),2))
      for i, review in enumerate(text_input_list):
        tokens = TEXT.preprocess(review)
        indexes = torch.tensor([TEXT.vocab.stoi[s] for s in tokens], device=device)
      with torch.no_grad():
        prediction = self.model(torch.unsqueeze(indexes,dim=0), torch.tensor([len(indexes)], dtype=torch.int64))
        preds[i] = prediction

      return preds