In [2]:
!pip install bertviz transformers pytorch-transformers

# Mount Google Drive
from google.colab import drive
drive.mount(f'/content/drive')

# Library imports
from bertviz import head_view, model_view, neuron_view
from bertviz.neuron_view import show
from bertviz.transformers_neuron_view import RobertaModel, RobertaTokenizer
from transformers import AutoConfig, AutoTokenizer, RobertaForSequenceClassification, RobertaTokenizer
import sys

# Set directories
%cd drive/MyDrive/Colab Notebooks/NLP_CW2/
sys.path.insert(0,'/content/drive/MyDrive/Colab Notebooks/NLP_CW2')

# Local imports
from utils import *

Collecting bertviz
[?25l  Downloading https://files.pythonhosted.org/packages/15/8b/f4226c75b35df80504ef41399fc1569b550332e3e4796618e5669c91af55/bertviz-1.0.0-py3-none-any.whl (162kB)
[K     |██                              | 10kB 16.3MB/s eta 0:00:01[K     |████                            | 20kB 21.8MB/s eta 0:00:01[K     |██████                          | 30kB 23.3MB/s eta 0:00:01[K     |████████                        | 40kB 27.0MB/s eta 0:00:01[K     |██████████                      | 51kB 24.5MB/s eta 0:00:01[K     |████████████                    | 61kB 17.9MB/s eta 0:00:01[K     |██████████████                  | 71kB 17.4MB/s eta 0:00:01[K     |████████████████▏               | 81kB 18.4MB/s eta 0:00:01[K     |██████████████████▏             | 92kB 18.0MB/s eta 0:00:01[K     |████████████████████▏           | 102kB 18.0MB/s eta 0:00:01[K     |██████████████████████▏         | 112kB 18.0MB/s eta 0:00:01[K     |████████████████████████▏       | 122kB 18.0

In [3]:
class MyArgs:
  def __init__(self, model, ngpus, weight_decay=0.01, learning_rate=2e-5, nepochs=2, batch_size=16, max_length=64,
               nruns=1):
    self.model = model
    self.ngpus = ngpus
    self.weight_decay = weight_decay
    self.learning_rate = learning_rate
    self.nepochs = nepochs
    self.batch_size = batch_size
    self.max_length = max_length
    self.nruns = nruns

In [4]:
def load_model(model, ngpus, load_path):
    config = AutoConfig.from_pretrained(model, num_labels=1, output_attentions=True, output_pretrained=True)
    model = AutoModelForSequenceClassification.from_pretrained(model, config=config)

    # Use strict=False since the provided models were originally trained with an earlier version of Huggingface
    model.load_state_dict(torch.load(load_path), strict=False)  
    if args.ngpus > 0:
        model = model.cuda()
        model = torch.nn.DataParallel(model, device_ids=[i for i in range(ngpus)])
    return model

def load_process_sentences(model, sentences, max_length=512):
    sentences = ["[CLS] " + s for s in sentences]
    tokenizer = AutoTokenizer.from_pretrained(model)
    ids, amasks = get_ids_mask(sentences, tokenizer, max_length)
    inputs = torch.tensor(ids)
    masks = torch.tensor(amasks)
    return inputs, masks

In [5]:
args = MyArgs(model='roberta-large', ngpus=1)
load_path = 'util_roberta-large.pt' # '/content/drive/MyDrive/Colab Notebooks/NLP_CW2/util_roberta-large.pt'

model = load_model(args.model, args.ngpus, load_path)
model.eval()
tokenizer = AutoTokenizer.from_pretrained(args.model)
sentence_a = "I decided to go look at the animals in the zoo. They were all hiding in the bushes."
sentence_b = "I decided to go look at the animals in the zoo."

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=482.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1425941629.0, style=ProgressStyle(descr…




Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.weight', 'classif

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898823.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1355863.0, style=ProgressStyle(descript…




In [6]:
# sentences = ["[CLS] " + s for s in [sentence_a, sentence_b]]
# ids, amasks = get_ids_mask(sentences, tokenizer, args.max_length)
# inputs = torch.tensor(ids)
# masks = torch.tensor(amasks)

In [None]:
# BertViz: Head view

inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True)
input_ids = inputs['input_ids']
attention = model(input_ids)[-1]
input_id_list = input_ids[0].tolist() # Batch index 0
tokens = tokenizer.convert_ids_to_tokens(input_id_list)
head_view(attention, tokens)

In [None]:
# BertViz: Model view

inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True)
input_ids = inputs['input_ids']
attention = model(input_ids)[-1]
input_id_list = input_ids[0].tolist() # Batch index 0
tokens = tokenizer.convert_ids_to_tokens(input_id_list)
model_view(attention, tokens)

In [None]:
# BertViz: Neuron view

model_version = 'roberta-large'

config = AutoConfig.from_pretrained(model, num_labels=1, output_attentions=True, output_pretrained=True)
model = RobertaModel.from_pretrained(model, config=config)
tokenizer = RobertaTokenizer.from_pretrained(model_version)

# model_type = 'roberta'
# model_version = 'roberta-large'
# model = RobertaModel.from_pretrained(model_version)
# tokenizer = RobertaTokenizer.from_pretrained(model_version)
show(model, model_type, tokenizer, sentence_a, sentence_b)