<a href="https://colab.research.google.com/github/pjheslin/colab-notebooks/blob/main/gpt2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Text generation with GPT-2

Code adapted from the [Huggingface blog](https://huggingface.co/blog/how-to-generate).

In [None]:
!pip3 install -q git+https://github.com/huggingface/transformers.git

In [None]:
import transformers

In [None]:
import tensorflow as tf

In [None]:
print(tf.config.list_physical_devices('GPU'))

In [None]:
from transformers import TFGPT2LMHeadModel, GPT2Tokenizer

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

In [None]:
# add the EOS token as PAD token to avoid warnings
model = TFGPT2LMHeadModel.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id)

## Greedy Search

In [None]:
input_ids = tokenizer.encode('To be or not to be, that is the', return_tensors='tf')

In [None]:
greedy_output = model.generate(input_ids, max_length=100)

In [None]:
print(tokenizer.decode(greedy_output[0], skip_special_tokens=True))

## Beam search
We activate beam search and early_stopping

In [None]:
beam_output = model.generate(
    input_ids, 
    max_length=100, 
    num_beams=5, 
    early_stopping=True
)

In [None]:
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

We set no_repeat_ngram_size to 2

In [None]:
beam_output = model.generate(
    input_ids, 
    max_length=100, 
    num_beams=5, 
    no_repeat_ngram_size=2, 
    early_stopping=True
)
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

Sampling from the conditional probability distribution

In [None]:
# set seed to reproduce results. 
tf.random.set_seed(2)

# activate sampling and deactivate top_k by setting top_k sampling to 0
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=100, 
    top_k=0
)
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

Lowering the temperature decreases the likelihood of low probability events.

In [None]:
tf.random.set_seed(1)

# use temperature to decrease the sensitivity to low probability candidates
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=100, 
    top_k=0, 
    temperature=0.7
)

print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

### Top-K sampling

In [None]:
tf.random.set_seed(1)

# set top_k to 50
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=100, 
    top_k=50
)

print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

### Top-p (nucleus) sampling

In [None]:
tf.random.set_seed(1)

# deactivate top_k sampling and sample only from 92% most likely words
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=100, 
    top_p=0.92, 
    top_k=0
)

print(tokenizer.decode(sample_output[0], skip_special_tokens=True))