# transformers: Bidirectional LMs

In [None]:
import torch
from transformers import (
    set_seed,
    AutoTokenizer,
    AutoModel # feature extractors (without task-specific head)
    # AutoModelForMaskedLM, # bidirectional models (BERT-like, encoder-only)
    # AutoModelForSequenceClassification # sequence classification (replaces final head)
)

In [None]:
# set random seed manually
set_seed(123)

## Load model

In [None]:
# set model name
model_name = 'distilbert-base-uncased'

In [None]:
# create tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

print(tokenizer)

In [None]:
# load model
model = AutoModel.from_pretrained(
    model_name,
    device_map='cpu',
    torch_dtype=torch.bfloat16, # use brain floating point format
)
model = model.eval()

print('Model device: {}'.format(model.device))
print('Model dtype: {}'.format(model.dtype))
print('Memory footprint: {:.2f} GiB'.format(model.get_memory_footprint() * 1e-9))

print(model)

## Run model

In [None]:
# create raw input
raw_input = [
    'A rabbit goes into the supermarket.',
    'One, two, three, four, five.'
]

# tokenize
model_input = tokenizer(
    raw_input,
    padding=True, # turn on padding (for batched inputs)
    truncation=True, # turn on truncation (to max. length)
    return_tensors='pt' # return PyTorch tensor
)

# transfer to device
model_input = {key: tensor.to(model.device) for key, tensor in model_input.items()}

print(model_input)

In [None]:
## extract features
features = model(**model_input) # (batch, sequence, features)

print('Features shape: {}'.format(features['last_hidden_state'].shape))