## Import Required Libraries

In [1]:
!pip install -q pytorch-transformers

In [2]:
import torch
from pytorch_transformers import GPT2Tokenizer, GPT2LMHeadModel

## Set Up the Tokenizer

The script uses the `GPT2Tokenizer` class from the `pytorch_transformers` library to load the **pre-trained** tokenizer associated with the `gpt2` model. The tokenizer is responsible for converting input text into a format that the model can understand. This includes splitting the text into tokens (words, subwords, or characters), mapping the tokens to their corresponding IDs in the model’s vocabulary, and creating the necessary inputs for the model (like attention masks).

In [3]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

## Encode and Convert Text to Tokenized Tensor

In [12]:
text = "What is the most populated"

In [13]:
indexed_tokens = tokenizer.encode(text)
print(f'"{text}" tokenized becomes {indexed_tokens}')

"What is the most populated" tokenized becomes [1867, 318, 262, 749, 22331]


In [14]:
# Convert indexed tokens in a PyTorch tensor
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor

tensor([[ 1867,   318,   262,   749, 22331]])

## Load pre-trained model (weights)

In [15]:
model = GPT2LMHeadModel.from_pretrained('gpt2')

## Set Model To Evaluation Mode

This deactivates the DropOut modules

In [16]:
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [17]:
# # If you have a GPU, put everything on cuda
# tokens_tensor = tokens_tensor.to('cuda')
# model.to('cuda')

In [18]:
# Predict all tokens
with torch.no_grad():
    outputs = model(tokens_tensor)
    predictions = outputs[0]

In [19]:
# Get the predicted next sub-word
predicted_index = torch.argmax(predictions[0, -1, :]).item()
predicted_text = tokenizer.decode(indexed_tokens + [predicted_index])

# Print the predicted word
print(predicted_text)

 What is the most populated city
