In [None]:
import torch
from torchtext.datasets import IMDB, AG_NEWS, YahooAnswers
from torchtext.vocab import GloVe
from torchtext.data import to_map_style_dataset
from torchtext.data.utils import get_tokenizer
from torch.nn import LSTM, GRU, Linear, Softmax, CrossEntropyLoss
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, random_split, Dataset
from torch.optim import Adam
from tqdm import tqdm
import numpy as np
!pip install transformers
from transformers import BertTokenizer, BertForSequenceClassification, AdamW



In [None]:
DATASET = 'IMDB'
MODEL = 'BERT'
VALIDATION_SPLIT = 0.5 # of test data
BATCH_SIZE = 64
SHUFFLE = True
NUM_EPOCHS = 5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
'''from google.colab import drive
drive.mount('/content/drive')
PATH = '/content/drive/MyDrive/Checkpoints/model'
!mkdir '/content/drive/MyDrive/Checkpoints/model'''

"from google.colab import drive\ndrive.mount('/content/drive')\nPATH = '/content/drive/MyDrive/Checkpoints/model'\n!mkdir '/content/drive/MyDrive/Checkpoints/model"

BERT FREEZER

In [None]:
def get_child(model, *arg):
    res = model
    for i in arg:
        res = list(res.children())[i]
    return res

def freeze_model(model):
    for param in model.parameters():
            param.requires_grad = False
            
def unfreeze_model(model):
    for param in model.parameters():
            param.requires_grad = True

def count_parameters(model, trainable_only = True):
    if trainable_only:
        return sum(p.numel() for p in model.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in model.parameters())

def custom_freezer(model):
    unfreeze_model(model)
    #print('All parameters unfreezed: {}'.format(count_parameters(model)))

    ## freeze whole BertLayer
    for c in model.children():
        if str(c).startswith('Bert'):
            freeze_model(c)
            
    ## unfreeze top 2 layer in BertEncoder
    bert_encoder = get_child(model, 0, 1, 0)
    for i in range(1, 3):
        m = bert_encoder[-i] 
        #print('Unfreezing: {}'.format(m))
        unfreeze_model(m)
        
    ## unfreeze Pooling layer
    bert_pooling = get_child(model, 0, 2)
    unfreeze_model(bert_pooling)

    print('Trainable parameters: {}'.format(count_parameters(model, True)))
    return model

In [None]:
class BidirectionalLSTMClassifier(torch.nn.Module):
    def __init__(self, num_classes, hidden_size, num_layers):
        super().__init__()
        self.num_layers = num_layers
        self.LSTM = LSTM(50, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.linear = Linear(2 * hidden_size, num_classes)
        self.softmax = Softmax(dim=1)
        
    def forward(self, x):
        _, (h_n, _) = self.LSTM(x)
        h_forward = h_n[2 * self.num_layers - 2]
        h_backward = h_n[2 * self.num_layers - 1]
        y = self.linear(torch.cat((h_forward, h_backward), 1))
        return self.softmax(y)
    
    
class BidirectionalGRUClassifier(torch.nn.Module):
    def __init__(self, num_classes, hidden_size, num_layers):
        super().__init__()
        self.num_layers = num_layers
        self.GRU = GRU(50, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.linear = Linear(2 * hidden_size, num_classes)
        self.softmax = Softmax(dim=1)
        
    def forward(self, x):
        _, h_n = self.GRU(x)
        h_forward = h_n[2 * self.num_layers - 2]
        h_backward = h_n[2 * self.num_layers - 1]
        y = self.linear(torch.cat((h_forward, h_backward), 1))
        return self.softmax(y)

from transformers import BertForSequenceClassification
import torch.nn as nn


class BERTClassifier(nn.Module):
    def __init__(self, num_classes):
        super(BERTClassifier, self).__init__()

        self.bert = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased', num_labels=num_classes)
        self.bert = custom_freezer(self.bert)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input_ids, token_type_ids, attention_mask):
        y = self.bert(input_ids, token_type_ids, attention_mask)
        return self.softmax(y.logits)


In [None]:
class ClassificationDataset(Dataset):
    def __init__(self, dataset, num_classes, tokenizer, model):
        self.num_classes = num_classes
        self.dataset = dataset
        self.tokenizer = tokenizer
        self.model = model

    def __len__(self):
        return self.dataset.__len__()

    def __getitem__(self, idx):
        label, text = self.dataset.__getitem__(idx)
        if type(label) == str:
            if label == 'neg':
                label = 0
            else:
                label = 1
        else:
            label = int(label) - 1

        if self.model == 'BERT':
            return label, self.tokenizer(text, padding="max_length", return_tensors='pt', max_length=512, truncation=True)
        else:
            return label, self.tokenizer(text)

In [None]:
if DATASET == 'IMDB':
    train_set = IMDB(split='train')
    test_set = IMDB(split='test')
    num_classes = 2
elif DATASET == 'AG_NEWS':
    train_set = AG_NEWS(split='train')
    test_set = AG_NEWS(split='test')
    num_classes = 4
elif DATASET == 'YahooAnswers':
    train_set = YahooAnswers(split='train')
    test_set = YahooAnswers(split='test')
    num_classes = 10
else:
    raise ValueError()

if MODEL == 'BERT':
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
else:
    tokenizer = get_tokenizer('basic_english')
#embedding = GloVe(name='6B', dim=50)

train_set = to_map_style_dataset(train_set)
test_set = to_map_style_dataset(test_set)

train_set = ClassificationDataset(train_set, num_classes, tokenizer, MODEL)
test_set = ClassificationDataset(test_set, num_classes, tokenizer, MODEL)
test_set, val_set = random_split(test_set, [test_set.__len__() - int(VALIDATION_SPLIT * test_set.__len__(
)), int(VALIDATION_SPLIT * test_set.__len__())], generator=torch.Generator().manual_seed(42))

Epoch [1/5]:   0%|          | 0/391 [02:37<?, ?it/s]


In [None]:
# print(len(train_set.__getitem__(7)))

In [None]:
def collate_batch(batch):
    label_list, text_list = [], []
    for (_label, _tokens) in batch:
        label_list.append(_label)
        embed = embedding.get_vecs_by_tokens(_tokens)
        text_list.append(embed)
    text_list = pad_sequence(text_list, batch_first=True)
    label_list = torch.tensor(label_list, dtype=torch.int64)
    return label_list.to(device), text_list.to(device)

def collate_BERT(batch):
    label_list, input_ids, token_type_ids, attention_mask = [], [], [], []
    for (_label, _dic) in batch:
        label_list.append(_label)
        input_ids.append(_dic['input_ids'])
        token_type_ids.append(_dic['token_type_ids'])
        attention_mask.append(_dic['attention_mask'])
    label_list = torch.tensor(label_list, dtype=torch.int64).to(device)
    input_ids = torch.cat(input_ids, dim=0).to(device)
    token_type_ids = torch.cat(token_type_ids, dim=0).to(device)
    attention_mask = torch.cat(attention_mask, dim=0).to(device)
    return label_list, input_ids, token_type_ids, attention_mask

if MODEL == 'BERT':
    train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, collate_fn=collate_BERT, shuffle=SHUFFLE)
    test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, collate_fn=collate_BERT, shuffle=SHUFFLE)
    val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, collate_fn=collate_BERT, shuffle=SHUFFLE)
else:
    train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, collate_fn=collate_batch, shuffle=SHUFFLE)
    test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, collate_fn=collate_batch, shuffle=SHUFFLE)
    val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, collate_fn=collate_batch, shuffle=SHUFFLE)

In [None]:
def evaluate(model, data_loader, loss=CrossEntropyLoss()):
    model.eval()
    total_acc, total_count = 0, 0
    
    with torch.no_grad():
        if MODEL == "BERT":
            for idx, (labels, input_ids, token_type_ids, attention_mask) in enumerate(data_loader):
                predicted_label = model(input_ids, token_type_ids, attention_mask)
                loss_ = loss(predicted_label, labels)
                total_acc += (predicted_label.argmax(1) == labels).sum().item()
                total_count += labels.size(0)
        else:
            for idx, (labels, text) in enumerate(data_loader):
                predicted_label = model(text)
                loss_ = loss(predicted_label, labels)
                total_acc += (predicted_label.argmax(1) == labels).sum().item()
                total_count += labels.size(0)
    
    return total_acc / total_count


def train(model, optimizer, train_loader, loss=CrossEntropyLoss(), log_interval=50):
    model.train()
    total_acc, total_count = 0, 0
    pbar = tqdm(total=len(train_loader), desc=f'Epoch [{epoch + 1}/{NUM_EPOCHS}]')

    if MODEL == 'BERT':
        for idx, (labels, input_ids, token_type_ids, attention_mask) in enumerate(train_loader):
            output = model(input_ids, token_type_ids, attention_mask)
            loss_ = loss(output, labels)
            optimizer.zero_grad()
            loss_.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_acc += (output.argmax(1) == labels).sum().item()
            total_count += labels.size(0)
            pbar.update()
            if idx % log_interval == 0 and idx > 0:
                pbar.set_postfix(loss=loss_, accuracy=total_acc / total_count)
                total_acc, total_count = 0, 0
        
        pbar.close()
    else:
        for idx, (labels, text) in enumerate(train_loader):
            output = model(text)
            loss_ = loss(output, labels)
            optimizer.zero_grad()
            loss_.backward()
            optimizer.step()
            total_acc += (output.argmax(1) == labels).sum().item()
            total_count += labels.size(0)
            pbar.update()
            if idx % log_interval == 0 and idx > 0:
                pbar.set_postfix(loss=loss_, accuracy=total_acc / total_count)
                total_acc, total_count = 0, 0
        
        pbar.close()

In [None]:
model = BERTClassifier(num_classes).to(device)
#optim = Adam(model.parameters())
optim = AdamW(model.parameters(), lr=3e-5, correct_bias=False)

for epoch in range(2):
    train(model, optim, train_loader)
    val_accuracy = evaluate(model, val_loader)
    print(val_accuracy)

    '''torch.save({
        'epoch' : epoch,
        'model_state_dict' : model.state_dict(),
        'optimizer_state_dict': optim.state_dict(),
        'val_accuracy' : val_accuracy
    }, PATH + '_' + str(epoch) + '.pt' )'''

    #How to load a model
    #checkpoint = torch.load(PATH)
    #model.load_state_dict(checkpoint['model_state_dict'])
    #optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    #epoch = checkpoint['epoch']
    #val_accuracy = checkpoint['val_accuracy']

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Trainable parameters: 14767874



Epoch [1/5]:   0%|          | 0/391 [00:00<?, ?it/s][A
Epoch [1/5]:   0%|          | 1/391 [00:06<40:58,  6.30s/it][A
Epoch [1/5]:   1%|          | 2/391 [00:12<41:05,  6.34s/it][A
Epoch [1/5]:   1%|          | 3/391 [00:19<41:09,  6.36s/it][A
Epoch [1/5]:   1%|          | 4/391 [00:25<41:09,  6.38s/it][A
Epoch [1/5]:   1%|▏         | 5/391 [00:31<41:19,  6.42s/it][A
Epoch [1/5]:   2%|▏         | 6/391 [00:38<41:01,  6.39s/it][A
Epoch [1/5]:   2%|▏         | 7/391 [00:44<40:48,  6.38s/it][A
Epoch [1/5]:   2%|▏         | 8/391 [00:50<40:36,  6.36s/it][A
Epoch [1/5]:   2%|▏         | 9/391 [00:57<40:25,  6.35s/it][A
Epoch [1/5]:   3%|▎         | 10/391 [01:03<40:18,  6.35s/it][A
Epoch [1/5]:   3%|▎         | 11/391 [01:09<40:08,  6.34s/it][A
Epoch [1/5]:   3%|▎         | 12/391 [01:16<40:03,  6.34s/it][A
Epoch [1/5]:   3%|▎         | 13/391 [01:22<40:04,  6.36s/it][A
Epoch [1/5]:   4%|▎         | 14/391 [01:29<40:01,  6.37s/it][A
Epoch [1/5]:   4%|▍         | 15/391 [01:3

KeyboardInterrupt: ignored

In [None]:
test_accuracy = evaluate(model, test_loader)
print(f'Test accuracy: {test_accuracy}')

Test accuracy: 0.81512


In [None]:
!pip3 install textattack[tensorflow]



In [None]:
import textattack
import torchtext
import torch
from transformers import BertTokenizer

class CustomPyTorchModelWrapper(textattack.models.wrappers.model_wrapper.ModelWrapper):
    def __init__(self, model, outdim, vocab=torchtext.vocab.GloVe("6B", dim=50), tokenizer=torchtext.data.utils.get_tokenizer("basic_english")):
        self.model = model
        self.tokenizer = tokenizer
        self.outdim = outdim
        self.vocab = vocab

    def __call__(self, text_input_list):
        preds = torch.zeros(size=(len(text_input_list), self.outdim))
        for i, review in enumerate(text_input_list):
            tokens = self.tokenizer(review)
            input = self.vocab.get_vecs_by_tokens(tokens)
            with torch.no_grad():
                prediction = self.model(torch.unsqueeze(input, dim=0))
                preds[i] = prediction

        return preds

class CustomBERTModelWrapper(textattack.models.wrappers.model_wrapper.ModelWrapper):

    def __init__(self, model, outdim, tokenizer=BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)):
        self.model = model
        self.tokenizer = tokenizer
        self.outdim = outdim

    
    def __call__(self, text_input_list):
        preds = torch.zeros(size=(len(text_input_list), self.outdim))
        for i, review in enumerate(text_input_list):
            dict_ = self.tokenizer(review, padding="max_length", return_tensors='pt', max_length=512, truncation=True)
            with torch.no_grad():
                prediction = self.model(dict_["input_ids"].to(device), dict_["token_type_ids"].to(device), dict_["attention_mask"].to(device))
                preds[i] = prediction
        
        return preds    

In [None]:
model = BERTClassifier(num_classes).to(device)

In [None]:
model_wrapper = CustomBERTModelWrapper(model, outdim=num_classes)
dataset = textattack.datasets.HuggingFaceDataset("imdb", split="test")
attack = textattack.attack_recipes.pwws_ren_2019.PWWSRen2019.build(model_wrapper)
attack_args = textattack.AttackArgs(num_examples=10)
attacker = textattack.Attacker(attack, dataset, attack_args)
attacker.attack_dataset()

Reusing dataset imdb (/root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/90099cb476936b753383ba2ae6ab2eae419b2e87f71cd5189cb9c8e5814d12a3)
textattack: Loading [94mdatasets[0m dataset [94mimdb[0m, split [94mtest[0m.
textattack: Unknown if model of class <class '__main__.BERTClassifier'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.



  0%|          | 0/100 [00:00<?, ?it/s][A[A[A


  1%|          | 1/100 [00:00<00:10,  9.22it/s][A[A[A


[Succeeded / Failed / Skipped / Total] 0 / 0 / 1 / 1:   1%|          | 1/100 [00:00<00:11,  8.39it/s][A[A[A

Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  weighted-saliency
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapWordNet
  (constraints): 
    (0): RepeatModification
    (1): StopwordModification
  (is_black_box):  True
) 

--------------------------------------------- Result 1 ---------------------------------------------

I went and saw this movie last night after being coaxed to by a few friends of mine. I'll admit that I was reluctant to see it because from what I knew of Ashton Kutcher he was only able to do comedy. I was wrong. Kutcher played the character of Jake Fischer very well, and Kevin Costner played Ben Randall with such professionalism. The sign of a good movie is that it can toy with our emotions. This one did exactly that. The entire theater (which was sold out) was overcome by laughter during the first half of the movie, and were moved to tears during the second half. While exiting the theater I not only saw many women 

KeyboardInterrupt: ignored