In [1]:
import torch
import torch.nn as nn
import tiktoken

In [2]:
from gpt_model import *

In [3]:
tokenizer = tiktoken.get_encoding("gpt2")

In [6]:
model = GPTModel(GPT_CONFIG_124M)

In [4]:
start_context = "Every effort moves you"

In [7]:
token_ids = generate_text_simple(
    model=model,
    idx=text_to_token_ids(start_context, tokenizer),
    max_new_tokens=10,
    context_size=GPT_CONFIG_124M["context_length"]
)

print("Output text:\n", token_ids_to_text(token_ids, tokenizer))

Output text:
 Every effort moves you Kay potion scriptleader Meth grants command Baileyacterbert


In [8]:
inputs = torch.tensor([[16833, 3626, 6100],   # ["every effort moves",
                       [40,    1107, 588]])   #  "I really like"]

targets = torch.tensor([[3626, 6100, 345  ],  # [" effort moves you",
                        [1107,  588, 11311]]) #  " really like chocolate"]

In [10]:
with torch.no_grad():
    logits = model(inputs)

probs = torch.softmax(logits, dim=-1) # Probability of each token in vocabulary
print(probs.shape) # Shape: (batch_size, num_tokens, vocab_size)

torch.Size([2, 3, 50257])


In [16]:
probs

tensor([[[7.8191e-06, 6.7431e-06, 5.2584e-05,  ..., 3.6262e-05,
          3.7508e-06, 9.7038e-06],
         [2.1822e-05, 7.7028e-06, 1.1528e-05,  ..., 1.0450e-05,
          8.5603e-06, 1.6912e-05],
         [2.1899e-05, 4.0850e-06, 1.1161e-05,  ..., 2.8772e-05,
          1.3037e-05, 1.5984e-05]],

        [[1.8993e-05, 1.6090e-05, 1.4722e-05,  ..., 2.4365e-05,
          4.7262e-06, 1.0836e-05],
         [4.3924e-05, 2.0622e-05, 1.8945e-05,  ..., 2.8352e-05,
          1.4134e-05, 9.4686e-06],
         [4.9139e-05, 1.4359e-05, 1.1475e-05,  ..., 3.0191e-05,
          2.4759e-05, 1.3044e-05]]])

In [17]:
# predicted tokens:
token_ids = torch.argmax(probs, dim=-1, keepdim=True)
print("Token IDs:\n", token_ids)

Token IDs:
 tensor([[[14607],
         [38557],
         [43662]],

        [[25142],
         [28717],
         [25044]]])


In [18]:
print(f"Targets batch 1: {token_ids_to_text(targets[0], tokenizer)}")
print(f"Outputs batch 1: {token_ids_to_text(token_ids[0].flatten(), tokenizer)}")

Targets batch 1:  effort moves you
Outputs batch 1:  reasoning Exodus Trave


In [19]:
print(f"Targets batch 2: {token_ids_to_text(targets[1], tokenizer)}")
print(f"Outputs batch 2: {token_ids_to_text(token_ids[1].flatten(), tokenizer)}")

Targets batch 2:  really like chocolate
Outputs batch 2:  attracting upstream Interestingly


In [21]:
targets

tensor([[ 3626,  6100,   345],
        [ 1107,   588, 11311]])

In [23]:
text_idx = 0
target_probs_1 = probs[text_idx, [0, 1, 2], targets[text_idx]]
print("Text 1:", target_probs_1)

text_idx = 1
target_probs_2 = probs[text_idx, [0, 1, 2], targets[text_idx]]
print("Text 2:", target_probs_2)

Text 1: tensor([1.0245e-05, 6.2094e-06, 3.0137e-05])
Text 2: tensor([2.3065e-05, 3.0962e-05, 2.4334e-05])


In [26]:
torch.set_printoptions( sci_mode=False )

In [27]:
# Compute logarithm of all token probabilities
log_probs = torch.log(torch.cat((target_probs_1, target_probs_2)))
print(log_probs)

tensor([-11.4887, -11.9894, -10.4098, -10.6772, -10.3827, -10.6236])


In [28]:
avg_log_probs = torch.mean(log_probs)
print(avg_log_probs)
neg_avg_log_probs = avg_log_probs * -1
print(neg_avg_log_probs)

tensor(-10.9286)
tensor(10.9286)
