In [1]:
import pandas as pd

In [2]:
same_texts_as_baselines = pd.read_csv("roberta-base-imdb-short-10-log.csv")

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

model = AutoModelForSequenceClassification.from_pretrained(
    "textattack/bert-base-uncased-imdb", num_labels=2, id2label=id2label, label2id=label2id
)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [5]:
tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-imdb")

In [6]:
sentence = "I loved the movie. It was fantastic."
# sentence = "I hated the movie. It was terrible."
ids_tensor = tokenizer.encode(sentence, return_tensors="pt")
ids_tensor

tensor([[  101,  1045,  3866,  1996,  3185,  1012,  2009,  2001, 10392,  1012,
           102]])

In [7]:
tokens = tokenizer.tokenize(sentence)
tokens

['i', 'loved', 'the', 'movie', '.', 'it', 'was', 'fantastic', '.']

In [8]:
id_list = tokenizer.encode(sentence)
tokens = tokenizer.convert_ids_to_tokens(id_list)
tokens

['[CLS]',
 'i',
 'loved',
 'the',
 'movie',
 '.',
 'it',
 'was',
 'fantastic',
 '.',
 '[SEP]']

In [9]:
import torch

with torch.no_grad():
    logits = model(ids_tensor).logits

logits

tensor([[-1.9893,  2.8492]])

In [62]:
predicted_class_id = logits.argmax().item() # 0
# sentiment_model.config.id2label[predicted_class_id]
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

In [63]:
model.config.id2label[predicted_class_id]

'POSITIVE'

In [2]:
import torch

t = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
t

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [6]:
t.view(-1, 9)

tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])