# Outline

The idea is to use the pre-trained GPT2 model to generate text starting with a prompt sentence.

In [1]:
from transformers import GPT2Tokenizer, TFGPT2Model, TFGPT2LMHeadModel
from transformers import pipeline, set_seed
import tensorflow as tf

import numpy as np
import json

# Import GPT2 Model and its Tokenizer

In [2]:
model_name = "gpt2"  # "gpt2-xl"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# model = TFGPT2Model.from_pretrained("gpt2")
model = TFGPT2LMHeadModel.from_pretrained(model_name)

2023-04-08 10:33:53.459732: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-04-08 10:33:53.460116: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB



All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at gpt2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [3]:
# Print model configs:
print(json.dumps(model.config.to_dict(), indent=4))

{
    "vocab_size": 50257,
    "n_positions": 1024,
    "n_embd": 768,
    "n_layer": 12,
    "n_head": 12,
    "n_inner": null,
    "activation_function": "gelu_new",
    "resid_pdrop": 0.1,
    "embd_pdrop": 0.1,
    "attn_pdrop": 0.1,
    "layer_norm_epsilon": 1e-05,
    "initializer_range": 0.02,
    "summary_type": "cls_index",
    "summary_use_proj": true,
    "summary_activation": null,
    "summary_first_dropout": 0.1,
    "summary_proj_to_labels": true,
    "scale_attn_weights": true,
    "use_cache": true,
    "scale_attn_by_inverse_layer_idx": false,
    "reorder_and_upcast_attn": false,
    "bos_token_id": 50256,
    "eos_token_id": 50256,
    "return_dict": true,
    "output_hidden_states": false,
    "output_attentions": false,
    "torchscript": false,
    "torch_dtype": null,
    "use_bfloat16": false,
    "tf_legacy_loss": false,
    "pruned_heads": {},
    "tie_word_embeddings": true,
    "is_encoder_decoder": false,
    "is_decoder": false,
    "cross_attention_hidde

# ~~Generate Text using HF's API~~

~~Currently, only the PyTorch model is supported...~~

In [None]:
# generator = pipeline('text-generation', model="gpt2", framework="pt")

# # Generate text:
# set_seed(42)
# prompt_text = "Replace me by any text you'd"
# generator(prompt_text, max_length=15, num_return_sequences=5)

# Generate the Next Token Distribution

In [None]:
prompt_text = "Replace me" # by any text you'd like"
encoded_input = tokenizer(prompt_text, return_tensors='tf')
output = model(encoded_input)

In [None]:
print(encoded_input["input_ids"].shape)

print(output.keys())
print(output["logits"].shape)
print(output["past_key_values"][0].shape, output["past_key_values"][1].shape)

In [None]:
print(tokenizer.decode(
    token_ids=encoded_input["input_ids"][0])
)

print(output["logits"].shape)
greedy_output_tokens = output["logits"].numpy().argmax(axis=-1)
print(greedy_output_tokens.shape)
print(tokenizer.decode(
    token_ids=greedy_output_tokens[0][-1])
)

In [None]:
probability_threshold = 0.05

last_token_probs = tf.nn.softmax(logits=output["logits"][0, -1, :], axis=-1).numpy()
last_token_ids = tf.argsort(last_token_probs, axis=-1, direction='DESCENDING').numpy()
last_token_ids = last_token_ids[last_token_probs[last_token_ids] >= probability_threshold]

# print(last_token_tokens.shape)
for token, prob in zip(last_token_ids, last_token_probs[last_token_ids]):
    token_str = tokenizer.decode(token_ids=token).replace('\n', '\\n').replace('\t', '\\t')
    print(f"\t[{round(float(prob), 3)}] {token_str}")

Generate sequentially using GREEDY search:

In [None]:
max_generate_length = 10
prob_threshold = 1.0e-3
prompt_text = "At least it is a nice day out to enjoy"
initial_prompt_length = len(prompt_text)

token_prob = 1.0
gen_count = 0
while gen_count <= max_generate_length and token_prob > prob_threshold:
    # Generate the next token:
    encoded_input = tokenizer(prompt_text, return_tensors='tf')
    output = model(encoded_input)
    last_token_probs = tf.nn.softmax(logits=output["logits"][0, -1, :], axis=-1).numpy()
    token_ids = np.argsort(last_token_probs)
    token_prob = last_token_probs[token_ids[-1]]
    next_token = tokenizer.decode(token_ids=token_ids[-1])
    prompt_text = prompt_text + next_token
    # print("-> [{}] {}".format(
    #     round(float(token_prob), 3), next_token.replace('\n', '\\n').replace('\t', '\\t')))
    print("\t-> [{:5}] {}".format(
        str(round(float(token_prob), 3)), prompt_text.replace('\n', '\\n').replace('\t', '\\t')))
    gen_count += 1

# print(f">>{prompt_text[:initial_prompt_length]}<<")
# print(prompt_text[initial_prompt_length:])

Generate sequentially using probabilisitc sampling:

In [None]:
max_generate_length = 10
stop_prob_threshold = 1.0e-2
prompt_text = "At least it is a nice day out to enjoy"
initial_prompt_length = len(prompt_text)

max_token_prob = 1.0
gen_count = 0
while gen_count <= max_generate_length and max_token_prob > stop_prob_threshold:
    # Generate the next token:
    encoded_input = tokenizer(prompt_text, return_tensors='tf')
    output = model(encoded_input)
    last_token_probs = tf.nn.softmax(logits=output["logits"][0, -1, :], axis=-1).numpy()
    token_id = np.searchsorted(
        a=np.cumsum(last_token_probs), 
        v=np.random.uniform(low=0.0, high=1.0, size=(1,)), 
        side="right", sorter=None)
    max_token_prob = last_token_probs.max()
    token_prob = last_token_probs[token_id]
    #
    next_token = tokenizer.decode(token_ids=token_id)
    prompt_text = prompt_text + next_token
    # print("\t-> [{}] {}".format(
    #     round(float(token_prob), 3), next_token.replace('\n', '\\n').replace('\t', '\\t')))
    print("\t-> [{:5}] {}".format(
        str(round(float(token_prob), 3)), prompt_text.replace('\n', '\\n').replace('\t', '\\t')))
    gen_count += 1

print(f"[{prompt_text[:initial_prompt_length]}]{prompt_text[initial_prompt_length:]}")

In [None]:
max_generate_length = 200
stop_prob_threshold = 1.0e-2
prompt_text = "She always leaves behind a trace of golden fabulousness"
initial_prompt_length = len(prompt_text)

max_token_prob = 1.0
gen_count = 0
while gen_count <= max_generate_length and max_token_prob > stop_prob_threshold:
    # Generate the next token:
    encoded_input = tokenizer(prompt_text, return_tensors='tf')
    output = model(encoded_input)
    last_token_probs = tf.nn.softmax(logits=output["logits"][0, -1, :], axis=-1).numpy()
    token_id = np.searchsorted(
        a=np.cumsum(last_token_probs), 
        v=np.random.uniform(low=0.0, high=1.0, size=(1,)), 
        side="right", sorter=None)
    max_token_prob = last_token_probs.max()
    token_prob = last_token_probs[token_id]
    #
    next_token = tokenizer.decode(token_ids=token_id)
    prompt_text = prompt_text + next_token
    # print("\t-> [{}] {}".format(
    #     round(float(token_prob), 3), next_token.replace('\n', '\\n').replace('\t', '\\t')))
    # print("\t-> [{:5}] {}".format(
    #     str(round(float(token_prob), 3)), prompt_text.replace('\n', '\\n').replace('\t', '\\t')))
    gen_count += 1

print(f"[{prompt_text[:initial_prompt_length]}]{prompt_text[initial_prompt_length:]}")

In [None]:
max_generate_length = 100
stop_prob_threshold = 1.0e-2
prompt_text = "Glad you're having fun" #"Tomorrow I will go lift..."
initial_prompt_length = len(prompt_text)

max_token_prob = 1.0
gen_count = 0
while gen_count <= max_generate_length and max_token_prob > stop_prob_threshold:
    # Generate the next token:
    encoded_input = tokenizer(prompt_text, return_tensors='tf')
    output = model(encoded_input)
    last_token_probs = tf.nn.softmax(logits=output["logits"][0, -1, :], axis=-1).numpy()
    token_id = np.searchsorted(
        a=np.cumsum(last_token_probs), 
        v=np.random.uniform(low=0.0, high=1.0, size=(1,)), 
        side="right", sorter=None)
    max_token_prob = last_token_probs.max()
    token_prob = last_token_probs[token_id]
    #
    next_token = tokenizer.decode(token_ids=token_id)
    prompt_text = prompt_text + next_token
    # print("\t-> [{}] {}".format(
    #     round(float(token_prob), 3), next_token.replace('\n', '\\n').replace('\t', '\\t')))
    # print("\t-> [{:5}] {}".format(
    #     str(round(float(token_prob), 3)), prompt_text.replace('\n', '\\n').replace('\t', '\\t')))
    gen_count += 1

print(f"[{prompt_text[:initial_prompt_length]}]{prompt_text[initial_prompt_length:]}")

Select from the top 90%:

In [None]:
max_generate_length = 100
stop_prob_threshold = 1.0e-2
prompt_text = "I want to hear about your AI" 
initial_prompt_length = len(prompt_text)

max_token_prob = 1.0
gen_count = 0
while gen_count <= max_generate_length and max_token_prob > stop_prob_threshold:
    # Generate the next token:
    encoded_input = tokenizer(prompt_text, return_tensors='tf')
    output = model(encoded_input)
    last_token_probs = tf.nn.softmax(logits=output["logits"][0, -1, :], axis=-1).numpy()
    last_token_argsort = np.argsort(-1.0 * last_token_probs)
    token_id = last_token_argsort[np.searchsorted(
        a=np.cumsum(last_token_probs[last_token_argsort]), 
        v=np.random.uniform(low=0.0, high=0.6, size=(1,)), 
        side="right", sorter=None)]
    max_token_prob = last_token_probs.max()
    token_prob = last_token_probs[token_id]
    #
    next_token = tokenizer.decode(token_ids=token_id)
    prompt_text = prompt_text + next_token
    # print("\t-> [{}] {}".format(
    #     round(float(token_prob), 3), next_token.replace('\n', '\\n').replace('\t', '\\t')))
    # print("\t-> [{:5}] {}".format(
    #     str(round(float(token_prob), 3)), prompt_text.replace('\n', '\\n').replace('\t', '\\t')))
    gen_count += 1

print(f"<{prompt_text[:initial_prompt_length]}>{prompt_text[initial_prompt_length:]}")