In [1]:
import torch
from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig

In [2]:
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", force_bos_token_to_be_generated=True)
tok = BartTokenizer.from_pretrained("facebook/bart-large")

In [3]:
example_english_phrase = "New Celtic manager <mask> says he has been discussing his future plans with some of his senior players."
batch = tok(example_english_phrase, return_tensors='pt')
print(batch) 

{'input_ids': tensor([[    0,  4030, 11955,  1044, 50264,   161,    37,    34,    57,  7345,
            39,   499,   708,    19,   103,     9,    39,   949,   472,     4,
             2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}


In [4]:
outputs = model.generate(batch['input_ids'], num_beams=1 , max_length=60, early_stopping=True, 
                         output_scores=True, output_hidden_states=True, return_dict_in_generate=True)

In [5]:
outputs.keys()

odict_keys(['sequences', 'scores', 'encoder_hidden_states', 'decoder_hidden_states'])

In [6]:
len(outputs['scores'])

22

In [7]:
decoding = [2]
for s in outputs['scores']:
    decoding.append(torch.argmax(s[0]).item())
print(decoding)

[2, 4030, 4030, 11955, 1044, 13015, 9122, 161, 37, 34, 57, 7345, 39, 499, 708, 19, 103, 9, 39, 949, 472, 4, 2]


In [8]:
print(outputs['sequences'].tolist()[0])

[2, 4030, 4030, 11955, 1044, 13015, 9122, 161, 37, 34, 57, 7345, 39, 499, 708, 19, 103, 9, 39, 949, 472, 4, 2]


In [9]:
outputs['sequences'].shape

torch.Size([1, 23])

In [10]:
print(tok.batch_decode(outputs['sequences'], skip_special_tokens=True))

['NewNew Celtic manager Brendan Rodgers says he has been discussing his future plans with some of his senior players.']


In [11]:
print(tok.batch_decode(torch.tensor(decoding).unsqueeze(0), skip_special_tokens=True))

['NewNew Celtic manager Brendan Rodgers says he has been discussing his future plans with some of his senior players.']
