In [3]:
!pip install transformers==3.2.0 -q
!pip install sentencepiece -q

In [1]:
import transformers
transformers.__version__

'3.2.0'

In [2]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

In [57]:
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

# the following 2 hyperparameters are task-specific
max_source_length = 512
max_target_length = 128

# Suppose we have the following 2 training examples:
input_sequence_1 = "Welcome to NYC; how are doing"
output_sequence_1 = "How welcome are you in NYC"

input_sequence_2 = "HuggingFace is a company; we are people"
output_sequence_2 = "We are people in Huggingface company"

# encode the inputs
task_prefix = "mix two sentences: "
input_sequences = [input_sequence_1, input_sequence_2]

encoding = tokenizer(
    [task_prefix + sequence for sequence in input_sequences],
    padding="longest",
    max_length=max_source_length,
    truncation=True,
    return_tensors="pt",
)

input_ids, attention_mask = encoding.input_ids, encoding.attention_mask

# encode the targets
target_encoding = tokenizer(
    [output_sequence_1, output_sequence_2],
    padding="longest",
    max_length=max_target_length,
    truncation=True,
    return_tensors="pt",
)
labels = target_encoding.input_ids

# replace padding token id's of the labels by -100 so it's ignored by the loss
labels[labels == tokenizer.pad_token_id] = -100

In [58]:
print(attention_mask.shape)

torch.Size([2, 17])


In [59]:
input_ids.shape

torch.Size([2, 17])

In [60]:
labels

tensor([[  571,  2222,    33,    25,    16, 13465,     1,  -100,  -100],
        [  101,    33,   151,    16, 11560,  3896,  4861,   349,     1]])

In [61]:
# forward pass
loss = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)

In [62]:
loss[0], loss[1].shape

(tensor(4.1771, grad_fn=<NllLossBackward0>), torch.Size([2, 9, 32128]))

In [63]:
len(loss[3])

2

In [65]:
loss[0].item()

4.177064418792725

In [69]:
input_ids = tokenizer("mix two sentences: Who are these people?; we are going", return_tensors="pt").input_ids

In [70]:
outputs = model.generate(input_ids)

In [71]:
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

: Who are these people?; we are going
