In [1]:
# Source: https://huggingface.co/learn/nlp-course/chapter2/5

In [4]:
import torch 
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [6]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [7]:
tokenizer

PreTrainedTokenizerFast(name_or_path='distilbert-base-uncased-finetuned-sst-2-english', vocab_size=30522, model_max_len=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})

In [8]:
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

In [9]:
model

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
       

In [15]:
print(f"Number of Model Parameters: {round(model.num_parameters()/1e6, 2)} M")

Number of Model Parameters: 66.96 M


In [16]:
sequence = "I am Md Abul Hayat"

In [42]:
tokens = tokenizer.tokenize(sequence)
tokens

['i', 'am', 'md', 'abu', '##l', 'hay', '##at']

In [44]:
ids = tokenizer.convert_tokens_to_ids(tokens)
ids

[1045, 2572, 9108, 8273, 2140, 10974, 4017]

In [45]:
input_ids = torch.tensor(ids)
input_ids.shape

torch.Size([7])

In [38]:
# model(input_ids)

In [40]:
tokenized_inputs = tokenizer(sequence)
tokenized_inputs

{'input_ids': [101, 1045, 2572, 9108, 8273, 2140, 10974, 4017, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [41]:
tokenized_inputs = tokenizer(sequence, return_tensors = 'pt')
tokenized_inputs

{'input_ids': tensor([[  101,  1045,  2572,  9108,  8273,  2140, 10974,  4017,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [47]:
input_ids = torch.tensor([ids])
input_ids.shape

torch.Size([1, 7])

In [48]:
output = model(input_ids)

In [54]:
output.logits

tensor([[ 0.4859, -0.1915]], grad_fn=<AddmmBackward0>)

### Padding

In [60]:
sequence1_ids = [[200, 200, 200]]
sequence2_ids = [[200, 200]]

batched_ids = [
    [200, 200, 200],
    [200, 200, tokenizer.pad_token_id]
]

In [65]:
model(torch.tensor(sequence1_ids)).logits

tensor([[ 1.5694, -1.3895]], grad_fn=<AddmmBackward0>)

In [66]:
model(torch.tensor(sequence2_ids)).logits

tensor([[ 0.5803, -0.4125]], grad_fn=<AddmmBackward0>)

In [67]:
model(torch.tensor(batched_ids)).logits

tensor([[ 1.5694, -1.3895],
        [ 1.3373, -1.2163]], grad_fn=<AddmmBackward0>)

In [68]:
batched_ids = [
    [200, 200, 200],
    [200, 200, tokenizer.pad_token_id]
]

attention_mask = [
    [1, 1, 1],
    [1, 1, 0],
]

In [71]:
outputs = model(torch.tensor(batched_ids), attention_mask = torch.tensor(attention_mask))

In [72]:
outputs.logits

tensor([[ 1.5694, -1.3895],
        [ 0.5803, -0.4125]], grad_fn=<AddmmBackward0>)

In [74]:
seq1 = "I’ve been waiting for a HuggingFace course my whole life"
seq2 = "I hate this so much!"

In [78]:
seq1_tokens = tokenizer.tokenize(seq1)
seq1_tokens

['i',
 '’',
 've',
 'been',
 'waiting',
 'for',
 'a',
 'hugging',
 '##face',
 'course',
 'my',
 'whole',
 'life']

In [83]:
seq1_tokenized = tokenizer(seq1)
seq1_tokenized

{'input_ids': [101, 1045, 1521, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [98]:
seq2_tokenized = tokenizer(seq1)
seq2_tokenized

{'input_ids': [101, 1045, 1521, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [102]:
output = model(torch.tensor([seq1_tokenized["input_ids"]]), torch.tensor(seq1_tokenized["attention_mask"]))

In [101]:
output.logits

tensor([[-1.5098,  1.5443]], grad_fn=<AddmmBackward0>)

### Longer Sequences

In [104]:
from transformers import LongformerConfig, LongformerModel

# Initializing a Longformer configuration
configuration = LongformerConfig()

# Initializing a model from the configuration
model = LongformerModel(configuration)

# Accessing the model configuration
configuration = model.config

In [105]:
configuration

LongformerConfig {
  "attention_probs_dropout_prob": 0.1,
  "attention_window": [
    512,
    512,
    512,
    512,
    512,
    512,
    512,
    512,
    512,
    512,
    512,
    512
  ],
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "longformer",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "onnx_export": false,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "sep_token_id": 2,
  "transformers_version": "4.24.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

In [106]:
model

LongformerModel(
  (embeddings): LongformerEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=1)
    (position_embeddings): Embedding(512, 768, padding_idx=1)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): LongformerEncoder(
    (layer): ModuleList(
      (0): LongformerLayer(
        (attention): LongformerAttention(
          (self): LongformerSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (query_global): Linear(in_features=768, out_features=768, bias=True)
            (key_global): Linear(in_features=768, out_features=768, bias=True)
            (value_global): Linear(in_features=768, out_features=768, bias=True)
          )
          (ou

In [107]:
model.model_max_len

AttributeError: 'LongformerModel' object has no attribute 'model_max_len'