## Import packages

In [2]:
from transformers import pipeline, AutoModel, AutoTokenizer
import torch
import torch.nn.functional as F

## Don't run these, takes 8min (already saved)

In [None]:
bertweet = AutoModel.from_pretrained('vinai/bertweet-large')
tokenizer = AutoTokenizer.from_pretrained('vinai/bertweet-large', use_fast=False)

In [6]:
save_directory = 'saved'
tokenizer.save_pretrained(save_directory)
bertweet.save_pretrained(save_directory)

## Start from here

In [4]:
# Load Bertweet base model
save_directory = 'saved'
bertweet = AutoModel.from_pretrained(save_directory)
tokenizer = AutoTokenizer.from_pretrained(save_directory)

In [5]:
# Example string input to tokenizer
line = "Hello world, this is my first time using Bertweet!"
res = tokenizer(line)
res

{'input_ids': [0, 31414, 232, 6, 42, 16, 127, 78, 86, 634, 12975, 21210, 328, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [7]:
# Creating batch for input into Bertweet
X_train = ['This is my first sentence.', 'And this is my second sentence, I hope you understand it.']
batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors='pt')
batch

{'input_ids': tensor([[   0,  713,   16,  127,   78, 3645,    4,    2,    1,    1,    1,    1,
            1,    1,    1],
        [   0, 2409,   42,   16,  127,  200, 3645,    6,   38, 1034,   47, 1346,
           24,    4,    2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [11]:
# 
with torch.no_grad():
    outputs = bertweet(**batch)
print(outputs.last_hidden_state)
print(outputs.last_hidden_state.shape)

tensor([[[-0.0453, -0.0353,  0.1269,  ...,  0.1318,  0.1431, -0.0268],
         [ 0.1216, -0.1689, -0.8237,  ...,  0.3062, -0.3307,  0.3082],
         [ 0.0916,  0.1439, -0.9215,  ...,  0.3699, -0.0623,  0.2387],
         ...,
         [-0.0136, -0.0934, -0.3526,  ...,  0.0756,  0.0351,  0.0351],
         [-0.0136, -0.0934, -0.3526,  ...,  0.0756,  0.0351,  0.0351],
         [-0.0136, -0.0934, -0.3526,  ...,  0.0756,  0.0351,  0.0351]],

        [[-0.2916, -0.2605,  0.1840,  ...,  0.2022, -0.0896,  0.2360],
         [ 0.0496,  0.3750, -0.7756,  ..., -0.0128,  0.2076,  0.1194],
         [ 0.0489, -0.4376, -1.0619,  ...,  0.2605, -0.5458,  0.1574],
         ...,
         [-0.0306, -0.0894, -0.0339,  ..., -0.1659,  0.0283, -0.0898],
         [ 0.0696,  0.0282,  0.0196,  ..., -0.0197,  0.2267, -0.0093],
         [ 0.0338, -0.0204, -0.0525,  ..., -0.0569,  0.1598, -0.0067]]])
torch.Size([2, 15, 1024])


In [13]:
# Alternative way to create input to bertweet
input_ids = torch.tensor([tokenizer.encode(line)])
print(input_ids)
bertweet(input_ids)

tensor([[    0, 31414,   232,     6,    42,    16,   127,    78,    86,   634,
         12975, 21210,   328,     2]])


BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[-0.0285,  0.0044, -0.0314,  ...,  0.0243,  0.0553,  0.0443],
         [-0.3286,  0.6190, -1.5636,  ..., -0.1887, -0.4045,  0.0190],
         [ 0.0090,  0.2959, -0.4406,  ...,  0.4447, -0.1542,  0.1028],
         ...,
         [-0.1125, -0.3202, -0.2327,  ...,  0.1368,  0.3177,  0.0216],
         [-0.4010,  0.1004, -0.4539,  ...,  0.0684, -0.1501, -0.0831],
         [-0.0237, -0.0087, -0.0484,  ...,  0.0073,  0.0453,  0.0352]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[ 0.2569,  0.4870, -0.4550,  ...,  0.3592, -0.4953, -0.7143]],
       grad_fn=<TanhBackward0>), hidden_states=None, past_key_values=None, attentions=None, cross_attentions=None)