In [22]:
from transformers import pipeline, set_seed
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch

In [42]:
# https://huggingface.co/docs/transformers/internal/generation_utils#generate-outputs
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
# Greedy sampling
output_greedy = model.generate(
    **inputs, 
    return_dict_in_generate=True, 
    output_scores=True, 
    max_new_tokens=128,
    num_return_sequences=1
)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [43]:
tokenizer.batch_decode(output_greedy.sequences)

["Hello, my dog is cute and icky. I'm not sure if she's a good dog, but she's cute and icky. I'm not sure if she's a good dog, but she's cute and icky.\n\nI'm not sure if she's a good dog, but she's cute and icky. I'm not sure if she's a good dog, but she's cute and icky.\n\nI'm not sure if she's a good dog, but she's cute and icky. I'm not sure if she's a good dog, but she's cute and icky.\n\nI'm not sure if she"]

In [87]:
# Manual greedy sampling
sm = torch.nn.functional.softmax(torch.stack(output_greedy['scores']), dim=-1)
topk = sm.topk(k=1, dim=-1)
" ".join(tokenizer.batch_decode(topk.indices.reshape(-1), skip_special_tokens=True))

"icky .  I 'm  not  sure  if  she 's  a  good  dog ,  but  she 's  cute  and   icky .  I 'm  not  sure  if  she 's  a  good  dog ,  but  she 's  cute  and   icky . \n \n I 'm  not  sure  if  she 's  a  good  dog ,  but  she 's  cute  and   icky .  I 'm  not  sure  if  she 's  a  good  dog ,  but  she 's  cute  and   icky . \n \n I 'm  not  sure  if  she 's  a  good  dog ,  but  she 's  cute  and   icky .  I 'm  not  sure  if  she 's  a  good  dog ,  but  she 's  cute  and   icky . \n \n I 'm  not  sure  if  she"

## Other sampling methods

https://huggingface.co/blog/how-to-generate

https://huggingface.co/blog/introducing-csearch

In [98]:
## Beam search
beam_output = model.generate(
    **inputs, 
    max_length=50, 
    num_beams=5, 
    early_stopping=True
)
tokenizer.batch_decode(beam_output, skip_special_tokens=True)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


["Hello, my dog is cute and icky, but I'm not sure if he's a good dog or not. I'm not sure if he's a good dog or not. I'm not sure if he's a good dog or not."]

In [97]:
## Beam search with n-gram penalty
beam_output = model.generate(
    **inputs, 
    max_length=50, 
    num_beams=5, 
    no_repeat_ngram_size=2,  # n-gram penalty
    early_stopping=True
)
tokenizer.batch_decode(beam_output, skip_special_tokens=True)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


["Hello, my dog is cute and icky, but I'm not sure if he's a good dog or not.\n\nI'm a big fan of dogs, and I love them. But I don't think I've ever seen a dog"]

In [101]:
## Sampling
# set seed to reproduce results. Feel free to change the seed though to get different results
torch.manual_seed(0.)

# activate sampling and deactivate top_k by setting top_k sampling to 0
sample_output = model.generate(
    **inputs, 
    do_sample=True, 
    max_length=50, 
    top_k=0
)
tokenizer.batch_decode(sample_output, skip_special_tokens=True)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


['Hello, my dog is cute and \xa0fancy. \xa0I was thinking at the time the bunny was quite far and moved out of the equation but I hear she so far has been very affectionate and supportive with everyone around her and her']

In [103]:
## Top-K sampling
sample_output = model.generate(
    **inputs, 
    do_sample=True, 
    max_length=50, 
    top_k=50
)
tokenizer.batch_decode(sample_output, skip_special_tokens=True)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


['Hello, my dog is cute and icky and she\'s always been my new pet."\n\nHe added that it helped him that he was comfortable with his owner\'s love of the dog and the positive attention that went with his actions.']

In [104]:
## Nucleous sampling
nucleous_output = model.generate(
    **inputs, 
    do_sample=True, 
    max_length=50, 
    top_p=0.95, 
    top_k=0
)
tokenizer.batch_decode(nucleous_output, skip_special_tokens=True)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


['Hello, my dog is cute and iced and tastes like a treat! Today I gave my 2 year old veggie litters 3-4 steamed treats, and treat on it, and the kids love it, and the father loves it!']

In [109]:
## Nucleous sampling
nucleous_output = model.generate(
    **inputs, 
    do_sample=True, 
    max_length=50, 
    top_p=0.95, 
    top_k=0,
    num_return_sequences=3
)
for i, sample_output in enumerate(nucleous_output):
    print("{}: {}".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))
    print()

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


0: Hello, my dog is cute and ive always liked...I wanted the weight off. Unfortunately, everything failed at my pooch car wash.Also your child's final look.


User Info: Xxxxy Xxxxy 2 years ago

1: Hello, my dog is cute and ive done everything i have ever wanted but I haven't seen a dog with flesh. I can't even imagine what it's like to be apart. He's some fucking torokoo dog to shit on while

2: Hello, my dog is cute and icky. She is 9 years old, and still gives me great pleasure and affection. However, some point in her adolescence slowly sets in (she does not love me!). This lead me to one of the last



In [111]:
## Contrastive search

contrastive_output = model.generate(
    **inputs, 
    penalty_alpha=0.6, 
    top_k=4, 
    max_length=128
)
tokenizer.batch_decode(contrastive_output, skip_special_tokens=True)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


["Hello, my dog is cute and icky. I'm going to try to get her to eat a bowl of cereal.\n\nI'm going to try to get her to eat a bowl of cereal. I'm going to try to get her to eat a bowl of cereal. I'm going to try to get her to eat a bowl of cereal. I'm going to try to get her to eat a bowl of cereal. I'm going to try to get her to eat a bowl of cereal. I'm going to try to get her to eat a bowl of cereal. I'm going to try to get her to eat a"]