In [None]:
### Creating a Transformer

In [1]:
from transformers import BertConfig, BertModel

In [2]:
# Building the config
config = BertConfig()

model = BertModel(config) # the default config, creates a randomly initialized model

In [4]:
# at this point we can either use an existing model weight or train the model
# below we just use pretrained model weights

model = BertModel.from_pretrained("bert-base-cased")



In [3]:
print(config)

BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.40.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}



In [None]:
# saving the model
# model.save_pretrained("<model_name>") # this will save the model config and the weights as a binary file

In [None]:
# Note: the transformer models can only process numbers, hence any text input need to be tokenized in a format the transformer will understand.

In [22]:
sequences = ["Hello", "Cool", "Nice"]

In [23]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained("bert-base-cased")



In [24]:
encoded_sentences = tokenizer(sequences)

In [25]:
encoded_sentences

{'input_ids': [[101, 8667, 102], [101, 13297, 102], [101, 8835, 102]], 'token_type_ids': [[0, 0, 0], [0, 0, 0], [0, 0, 0]], 'attention_mask': [[1, 1, 1], [1, 1, 1], [1, 1, 1]]}

In [26]:
for encoded in encoded_sentences["input_ids"]:
    print(encoded)

[101, 8667, 102]
[101, 13297, 102]
[101, 8835, 102]


In [27]:
import torch

model_inputs = torch.tensor(encoded_sentences["input_ids"])

In [28]:
output = model(model_inputs)

In [30]:
output.last_hidden_state

tensor([[[ 0.2331,  0.3568,  0.6697,  ..., -0.2591,  0.4786,  0.1082],
         [ 0.4608, -0.4626,  0.6036,  ..., -0.5210,  0.4498,  0.1813],
         [ 0.8368,  0.3394, -0.0480,  ...,  0.3450,  1.2842, -0.0550]],

        [[ 0.2332,  0.1323,  0.3212,  ...,  0.1298,  0.2753,  0.0279],
         [ 0.3232, -0.2685,  0.6295,  ..., -0.4133,  0.4378,  0.1633],
         [ 0.1452,  0.7259, -0.2229,  ...,  0.5730,  1.4919, -0.3813]],

        [[ 0.1454,  0.1195,  0.0471,  ..., -0.0505,  0.0894,  0.0574],
         [ 0.4470, -0.3779,  0.4225,  ..., -0.2240,  0.5730,  0.2702],
         [-0.2411,  0.1996, -1.1288,  ...,  0.2231,  1.1469,  0.2048]]],
       grad_fn=<NativeLayerNormBackward0>)