In [1]:
import torch
import torch.nn as nn
import tiktoken

  cpu = _conversion_method_template(device=torch.device("cpu"))


In [2]:
from gpt_model import * 

In [3]:
tokenizer = tiktoken.get_encoding("gpt2")

In [4]:
model = GPTModel(GPT_CONFIG_124M)

In [5]:
start_context = "Every effort moves you"

In [6]:
token_ids = generate_text_sample(
    model=model,
    idx=text_to_token_ids(start_context, tokenizer),
    max_new_tokens=10,
    context_size=GPT_CONFIG_124M["context_length"]
)

print("Output text:\n", token_ids_to_text(token_ids, tokenizer))

Output text:
 Every effort moves you legitimately interest ordinarilychecked polic aka Africanredited standpoint You


In [7]:
inputs = torch.tensor([[16833, 3626, 6100],   # ["every effort moves",
                       [40,    1107, 588]])   #  "I really like"]

targets = torch.tensor([[3626, 6100, 345  ],  # [" effort moves you",
                        [1107,  588, 11311]]) #  " really like chocolate"]

In [8]:
with torch.no_grad():
    logits = model(inputs)

probs = torch.softmax(logits, dim=-1) # Probability of each token in vocabulary
print(probs.shape) # Shape: (batch_size, num_tokens, vocab_size)

torch.Size([2, 3, 50257])


In [9]:
probs

tensor([[[3.1149e-05, 2.2416e-05, 1.4040e-05,  ..., 1.9498e-05,
          1.8337e-05, 2.1304e-05],
         [1.5485e-05, 1.5969e-05, 3.7982e-05,  ..., 1.2851e-05,
          1.9152e-05, 4.7408e-05],
         [1.3135e-05, 1.3227e-05, 1.2033e-05,  ..., 2.1447e-05,
          8.0718e-06, 1.2883e-05]],

        [[2.3187e-05, 1.4771e-05, 1.6268e-05,  ..., 1.2488e-05,
          1.1397e-05, 3.7293e-05],
         [1.6870e-05, 1.6509e-05, 1.1093e-05,  ..., 7.9201e-06,
          1.4411e-05, 1.1554e-05],
         [2.2305e-05, 1.3232e-05, 1.3950e-05,  ..., 9.3277e-06,
          3.3229e-05, 1.0049e-05]]])

In [10]:
# predicted tokens:
token_ids = torch.argmax(probs, dim=-1, keepdim=True)
print("Token IDs:\n", token_ids)

Token IDs:
 tensor([[[12283],
         [12130],
         [10096]],

        [[ 7652],
         [47617],
         [ 9866]]])


In [11]:
print(f"Targets batch 1: {token_ids_to_text(targets[0], tokenizer)}")
print(f"Outputs batch 1: {token_ids_to_text(token_ids[0].flatten(), tokenizer)}")

Targets batch 1:  effort moves you
Outputs batch 1:  CannPat Room


In [12]:
print(f"Targets batch 2: {token_ids_to_text(targets[1], tokenizer)}")
print(f"Outputs batch 2: {token_ids_to_text(token_ids[1].flatten(), tokenizer)}")

Targets batch 2:  really like chocolate
Outputs batch 2:  regions pacifmaster


In [13]:
targets

tensor([[ 3626,  6100,   345],
        [ 1107,   588, 11311]])

In [14]:
text_idx = 0
target_probs_1 = probs[text_idx, [0, 1, 2], targets[text_idx]]
print("Text 1:", target_probs_1)

text_idx = 1
target_probs_2 = probs[text_idx, [0, 1, 2], targets[text_idx]]
print("Text 2:", target_probs_2)

Text 1: tensor([1.2701e-05, 1.4121e-05, 9.1072e-06])
Text 2: tensor([1.5008e-05, 3.5842e-05, 3.9668e-05])


In [15]:
torch.set_printoptions( sci_mode=False )

In [16]:
# Compute logarithm of all token probabilities
log_probs = torch.log(torch.cat((target_probs_1, target_probs_2)))
print(log_probs)

tensor([-11.2738, -11.1679, -11.6064, -11.1069, -10.2364, -10.1350])


In [17]:
avg_log_probs = torch.mean(log_probs)
print(avg_log_probs)
neg_avg_log_probs = avg_log_probs * -1
print(neg_avg_log_probs)


tensor(-10.9211)
tensor(10.9211)


In [18]:
print("Logits shape:", logits.shape)
print("Targets shape:", targets.shape)

Logits shape: torch.Size([2, 3, 50257])
Targets shape: torch.Size([2, 3])


In [19]:
logits_flat = logits.flatten(0, 1)
targets_flat = targets.flatten()
print("Flattened logits:", logits_flat.shape)
print("Flattened targets:", targets_flat.shape)

Flattened logits: torch.Size([6, 50257])
Flattened targets: torch.Size([6])


In [21]:
loss = torch.nn.functional.cross_entropy(logits_flat, targets_flat)
print(loss)

perplexity = torch.exp(loss)
print("Perplexity:", perplexity)

tensor(10.9211)
Perplexity: tensor(55329.8164)


In [24]:
# use short story from before for training
with open( "../data/gautier.txt", "r" ) as f:
    text_data = f.read()
print(text_data[:500])  # print first 500 characters

Had given orders that day to deny my door to every one; having made a solemn resolution that morning that I would do nothing, I did not wish to be disturbed in that important occupation. With a feeling of confidence that I should not be bothered by bores (there are some left yet besides those in Moli√®re's comedy), I had concerted all my measures to enjoy the pleasure of my predilection at my ease.

A bright fire was blazing in my chimney, the curtains were drawn and admitted a dim mysterious lig


In [25]:
total_characters = len(text_data)
total_tokens = len(tokenizer.encode(text_data))

print("Characters:", total_characters)
print("Tokens:", total_tokens)


Characters: 54532
Tokens: 12805


In [None]:
# seperate our text into training and validation sets:

