In [1]:
from transformers import BertConfig, BertModel

# Building the config
config = BertConfig()

# Building the model from the config
model = BertModel(config)

In [2]:
model

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          

In [3]:
print(config)

BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.20.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}



In [5]:
from transformers import BertConfig, BertModel
import torch
config = BertConfig()
model = BertModel(config)


# use untrained model to get gibberish outcome
from transformers import BertTokenizer, BertForMaskedLM

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
# these lines has been suggested by github copilot and not working
text = "I like to play [MASK] with my friends."

tokenized_text = tokenizer.tokenize(text)
masked_index = tokenized_text.index('[MASK]')
tokenized_text[masked_index] = '[MASK]'
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens_tensor = torch.tensor([indexed_tokens])

model.eval()

with torch.no_grad():
    predictions = model(tokens_tensor)

predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
print(predicted_token)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


TypeError: tuple indices must be integers or slices, not tuple

In [7]:
encoded_sequences = [
    [101, 7592, 999, 102],
    [101, 4658, 1012, 102],
    [101, 3835, 999, 102],
]


In [8]:
import torch

model_inputs = torch.tensor(encoded_sequences)

In [9]:
output = model(model_inputs)

In [10]:
output

MaskedLMOutput(loss=None, logits=tensor([[[ -6.8784,  -6.7924,  -6.8190,  ...,  -6.0074,  -6.0348,  -3.9604],
         [-11.6788, -11.7675, -11.6625,  ..., -11.7927, -10.5997,  -7.2536],
         [-12.3819, -12.3635, -12.4734,  ..., -11.8237, -10.5368,  -3.9985],
         [-11.2931, -11.3137, -11.1896,  ...,  -9.0773,  -9.1393, -11.0844]],

        [[ -8.2021,  -8.0694,  -8.0732,  ...,  -7.3225,  -7.1235,  -4.8801],
         [-10.2058, -10.8255, -10.5265,  ..., -10.8788,  -7.7826,  -8.1547],
         [-12.3718, -12.2703, -12.6460,  ..., -11.9417, -10.2464,  -7.1874],
         [-12.9754, -12.7574, -12.6669,  ..., -10.8493, -10.2634, -11.0019]],

        [[ -6.7858,  -6.6812,  -6.7136,  ...,  -6.0624,  -6.0357,  -3.9575],
         [ -9.8298,  -9.9867,  -9.9050,  ..., -11.5350,  -8.7891,  -9.5959],
         [-10.8475, -10.6842, -11.0430,  ..., -10.2337, -10.4369,  -3.5392],
         [-12.1120, -11.9225, -11.7792,  ...,  -9.5271,  -9.6156,  -9.9003]]],
       grad_fn=<ViewBackward0>), hidd