# Text Generation with GPT2 and Hugging Face

See [this overview](https://huggingface.co/course/chapter1/1) for an introduction to Hugging Face.

## Load Libraries and Data

In [1]:
# May need to run the following command to install Hugging Face
# !pip install transformers

import torch
from transformers import (
    AutoTokenizer, GPT2LMHeadModel,
    StoppingCriteriaList, MaxLengthCriteria
)

In [2]:
tokenizer = AutoTokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

In [3]:
# Count of next tokens and probabilities to output
num_tokens = 3

## Generate Text

In [4]:
# Specify our prompt
prompt = 'AI will support banks by'

# Encode the input
inputs = tokenizer(prompt, return_tensors='pt')

### First Token

In [5]:
# Run the model
outputs = model(**inputs, labels=inputs['input_ids'])

In [6]:
# Get logits and convert to probabilities
logits = outputs.logits
probs = logits.softmax(axis=-1)[:, -1, :]

In [7]:
# Get maximum token probabilities
max_probs = torch.topk(probs.flatten(), num_tokens)
tokens = [tokenizer.decode(x) for x in max_probs.indices]

print(tokens)
print(max_probs.values)

[' providing', ' offering', ' making']
tensor([0.1313, 0.0586, 0.0254], grad_fn=<TopkBackward0>)


In [8]:
# Append most likely token to prompt
prompt = prompt + tokens[0]

# Output result
print(prompt)

AI will support banks by providing


### Additional Tokens

In [9]:
output_tokens = 12

for i in range(output_tokens):
    inputs = tokenizer(prompt, return_tensors='pt')
    outputs = model(**inputs, labels=inputs['input_ids'])
    logits = outputs.logits
    probs = logits.softmax(axis=-1)[:, -1, :]

    max_probs = torch.topk(probs.flatten(), num_tokens)
    tokens = [tokenizer.decode(x) for x in max_probs.indices]

    print(f'Probabilities {i}: {max_probs.values}')
    print(f'Tokens {i}: {tokens}')

    prompt = prompt + tokens[0]
    print(f'Prompt {i}: {prompt}\n')


Probabilities 0: tensor([0.0935, 0.0558, 0.0315], grad_fn=<TopkBackward0>)
Tokens 0: [' a', ' them', ' loans']
Prompt 0: AI will support banks by providing a

Probabilities 1: tensor([0.0221, 0.0184, 0.0183], grad_fn=<TopkBackward0>)
Tokens 1: [' range', ' secure', ' "']
Prompt 1: AI will support banks by providing a range

Probabilities 2: tensor([9.9720e-01, 4.8669e-04, 2.6866e-04], grad_fn=<TopkBackward0>)
Tokens 2: [' of', ' and', ' to']
Prompt 2: AI will support banks by providing a range of

Probabilities 3: tensor([0.0877, 0.0449, 0.0210], grad_fn=<TopkBackward0>)
Tokens 3: [' services', ' financial', ' loans']
Prompt 3: AI will support banks by providing a range of services

Probabilities 4: tensor([0.2312, 0.1404, 0.1302], grad_fn=<TopkBackward0>)
Tokens 4: [' to', ' including', ',']
Prompt 4: AI will support banks by providing a range of services to

Probabilities 5: tensor([0.0538, 0.0518, 0.0472], grad_fn=<TopkBackward0>)
Tokens 5: [' help', ' the', ' ensure']
Prompt 5: AI 