<h2 align="center">Codebasics DL Course: BERT Basics Using Hugging Face<h2>

In [16]:
from transformers import BertTokenizer, BertModel

### Tokenization

In [17]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [18]:
tokens = tokenizer("I love apple", return_tensors='pt')
tokens

{'input_ids': tensor([[ 101, 1045, 2293, 6207,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])}

### Padding

In [33]:
tokens = tokenizer(["I love apple", "you are what you eat"], padding=True,truncation=True, return_tensors='pt')
tokens

{'input_ids': tensor([[ 101, 1045, 2293, 6207,  102,    0,    0],
        [ 101, 2017, 2024, 2054, 2017, 4521,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 0, 0],
        [1, 1, 1, 1, 1, 1, 1]])}

In [23]:
tokens = tokenizer(["I love apple", "you are what you eat"], padding="max_length", max_length=5, truncation=True, return_tensors='pt')
tokens

{'input_ids': tensor([[ 101, 1045, 2293, 6207,  102],
        [ 101, 2017, 2024, 2054,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]])}

In [24]:
tokens = tokenizer("I love apple", return_tensors='pt')
tokens

{'input_ids': tensor([[ 101, 1045, 2293, 6207,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])}

In [32]:
tokenizer.convert_ids_to_tokens(tokens['input_ids'][0])

['[CLS]', 'i', 'love', 'apple', '[SEP]']

### Contextual Embeddings

In [26]:
model = BertModel.from_pretrained("bert-base-uncased")
output = model(**tokens)

In [27]:
output['last_hidden_state']

tensor([[[ 0.0348,  0.2395, -0.0695,  ..., -0.1550,  0.0836,  0.0415],
         [ 0.0349,  0.4142,  0.0838,  ..., -0.2773,  0.4387,  0.0628],
         [ 0.8293,  0.8872,  0.7468,  ..., -0.0368,  0.1503, -0.1132],
         [-0.4576, -0.0591, -0.6812,  ...,  0.8809,  0.1841, -0.6385],
         [ 0.7952,  0.2601, -0.1495,  ..., -0.3544, -0.5979, -0.4172]]],
       grad_fn=<NativeLayerNormBackward0>)

In [28]:
output['last_hidden_state'].shape

torch.Size([1, 5, 768])

In [30]:
output['pooler_output'].shape

torch.Size([1, 768])