# Install dep

In [None]:
!pip install -q git+https://github.com/huggingface/transformers.git

# Import dep

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Init and load tokenizer and model

In [None]:
# load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id)

In [4]:
# greedy search
# encode context the generation is conditioned on
input_ids = tokenizer.encode('I enjoy walking with my cute dog', return_tensors='pt')
print(input_ids)

# generate text until the output length (which includes the context length) reaches 50
greedy_output = model.generate(input_ids, max_length=50)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(greedy_output[0], skip_special_tokens=True))

tensor([[   40,  2883,  6155,   351,   616, 13779,  3290]])
Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with my dog. I'm not sure if I'll ever be able to walk with my dog.

I'm not sure if I'll


In [5]:
# beam search
# encode context the generation is conditioned on
input_ids = tokenizer.encode('I enjoy walking with my cute dog', return_tensors='pt')

# activate beam search and early_stopping
beam_output = model.generate(
    input_ids, 
    max_length=50, 
    num_beams=5, 
    early_stopping=True
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I'm not sure if I'll ever be able to walk with him again. I'm not sure if I'll


In [6]:
input_ids = tokenizer.encode('I enjoy walking with my cute dog', return_tensors='pt')

# set no_repeat_ngram_size to 2
beam_output = model.generate(
    input_ids, 
    max_length=50, 
    num_beams=5, 
    no_repeat_ngram_size=2, 
    early_stopping=True
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I've been thinking about this for a while now, and I think it's time for me to take a break


In [8]:
input_ids = tokenizer.encode('I enjoy walking with my cute dog', return_tensors='pt')

# set return_num_sequences > 1
beam_outputs = model.generate(
    input_ids, 
    max_length=50, 
    num_beams=5, 
    no_repeat_ngram_size=2, 
    num_return_sequences=5, 
    early_stopping=True
)

# now we have 3 output sequences
print("Output:\n" + 100 * '-')
for i, beam_output in enumerate(beam_outputs):
  print('---')
  print("{}: {}".format(i, tokenizer.decode(beam_output, skip_special_tokens=True)))

Output:
----------------------------------------------------------------------------------------------------
---
0: I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I've been thinking about this for a while now, and I think it's time for me to take a break
---
1: I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I've been thinking about this for a while now, and I think it's time for me to get back to
---
2: I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with her again.

I've been thinking about this for a while now, and I think it's time for me to take a break
---
3: I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with her again.

I've been thinking about this for a while now, and I think it's time for me to get back to
---
4: I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I've 

In [10]:
# set seed to reproduce results
seed_val = 17
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# tokenize
input_ids = tokenizer.encode('I enjoy walking with my cute dog', return_tensors='pt')

# activate sampling and deactivate top_k by setting top_k sampling to 0
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=50, 
    top_k=0
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog here, and I have only one dog that uses me as guest pi in almost every hour. Ms. Elliott is a happy, vivacious dog like most of her friends," said marks.

Since Jan


In [11]:
# set seed to reproduce results
seed_val = 17
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# tokenize
input_ids = tokenizer.encode('I enjoy walking with my cute dog', return_tensors='pt')

# use temperature to decrease the sensitivity to low probability candidates
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=50, 
    top_k=0, 
    temperature=0.7
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog here, and I have only one dog that is so small and adorable in my life. :)

Thank you so much for taking the time to like us on Facebook and follow us on Twitter. We love your


In [12]:
# set seed to reproduce results
seed_val = 17
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# tokenize
input_ids = tokenizer.encode('I enjoy walking with my cute dog', return_tensors='pt')

# set top_k to 50
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=50, 
    top_k=50
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog here, and I have had many occasions when they say it's fine in my house, but you better believe I have never had any problems here. Not even one big one of those who didn't like having my


In [13]:
# set seed to reproduce results
seed_val = 17
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

input_ids = tokenizer.encode('I enjoy walking with my cute dog', return_tensors='pt')

# deactivate top_k sampling and sample only from 92% most likely words
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=50, 
    top_p=0.92, 
    top_k=0
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog here, and I have only one dog that uses me as an app in almost every day. We are hiking near Castlewreck, which is the perfect place for hiking when you need to relax.

Let's


In [16]:
# set seed to reproduce results
seed_val = 17
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# tokenize
input_ids = tokenizer.encode('What is Philosophy?', return_tensors='pt')

# set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
sample_outputs = model.generate(
    input_ids,
    do_sample=True, 
    max_length=50, 
    top_k=50, 
    top_p=0.95, 
    temperature=0.7
)

# print out the result
print("Output:\n" + 100 * '-')
for i, sample_output in enumerate(sample_outputs):
  print("{}: {}".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))

Output:
----------------------------------------------------------------------------------------------------
0: What is Philosophy? It is an examination of the nature of the human mind. It is the study of the mind's powers and powers of action, of the passions and passions of man, and of the motives and motives of the mind. It is
