In [6]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer, BertForMaskedLM
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load models
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

model_name = "textattack/bert-base-uncased-SST-2"

sentiment_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,
    id2label=id2label,
    label2id=label2id
)
sentiment_model.to(device)

sentiment_model_tokenizer = AutoTokenizer.from_pretrained(model_name)

LM_model = BertForMaskedLM.from_pretrained("bert-base-uncased")
LM_model.lm_head = LM_model.cls

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [28]:
LM_model

BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwi

In [33]:
sentence = "The quick brown fox jumps over the lazy dog."
ids = sentiment_model_tokenizer.encode(sentence)
ids

[101, 1996, 4248, 2829, 4419, 14523, 2058, 1996, 13971, 3899, 1012, 102]

In [36]:
# first convert the words to a list of word embeddings:
ids = torch.tensor(ids).view(1, -1)
word_embedding = LM_model.bert.embeddings(input_ids=ids)
word_embedding.shape

  ids = torch.tensor(ids).view(1, -1)


torch.Size([1, 12, 768])

In [49]:
# final_hidden = LM_model(inputs_embeds=word_embedding)
final_hidden = LM_model(inputs_embeds=word_embedding, output_hidden_states=True).hidden_states[-1]
predictions = LM_model.lm_head(final_hidden)
predictions.shape

torch.Size([1, 12, 768])