# Running Inference

In [18]:
import torch
from previous_chapters import GPTModel, generate_text_simple
import tiktoken

In [19]:
GPT_CONFIG_124M = {
    "vocab_size": 50257,     # Vocabulary size
    "context_length": 1024,  # Context length
    "emb_dim": 768,          # Embedding dimension
    "n_heads": 12,           # Number of attention heads
    "n_layers": 12,          # Number of layers
    "drop_rate": 0.1,        # Dropout rate
    "qkv_bias": False        # Query-Key-Value bias
}

In [20]:
torch.manual_seed(123)
model = GPTModel(GPT_CONFIG_124M)
model.eval()

GPTModel(
  (tok_emb): Embedding(50257, 768)
  (pos_emb): Embedding(1024, 768)
  (drop_emb): Dropout(p=0.1, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(in_features=768, out_features=768, bias=False)
        (W_key): Linear(in_features=768, out_features=768, bias=False)
        (W_value): Linear(in_features=768, out_features=768, bias=False)
        (out_proj): Linear(in_features=768, out_features=768, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU()
          (2): Linear(in_features=3072, out_features=768, bias=True)
        )
      )
      (norm1): LayerNorm()
      (norm2): LayerNorm()
      (drop_shortcut): Dropout(p=0.1, inplace=False)
    )
    (1): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(in_feature

In [21]:
start_context = "This is a"
tokenizer = tiktoken.get_encoding("gpt2")
encoded = tokenizer.encode(start_context)

In [22]:
encoded

[1212, 318, 257]

In [23]:
encoded_tensor = torch.tensor(encoded).unsqueeze(0)

In [24]:
encoded_tensor

tensor([[1212,  318,  257]])

In [25]:
print(f"\n{50*'='}\n{22*' '}IN\n{50*'='}")
print("\nInput text:", start_context)
print("Encoded input text:", encoded)
print("encoded_tensor.shape:", encoded_tensor.shape)


                      IN

Input text: This is a
Encoded input text: [1212, 318, 257]
encoded_tensor.shape: torch.Size([1, 3])


In [26]:
out = generate_text_simple(
    model=model,
    idx = encoded_tensor,
    max_new_tokens=10,
    context_size=GPT_CONFIG_124M["context_length"]
)

In [27]:
out

tensor([[ 1212,   318,   257, 19739, 41927, 49850, 34714,   831,  6578, 43534,
         34178, 12156,  7385]])

In [28]:
decoded_text = tokenizer.decode(out.squeeze(0).tolist())

In [29]:
print(f"\n\n{50*'='}\n{22*' '}OUT\n{50*'='}")
print("\nOutput:", out)
print("Output length:", len(out[0]))
print("Output text:", decoded_text)



                      OUT

Output: tensor([[ 1212,   318,   257, 19739, 41927, 49850, 34714,   831,  6578, 43534,
         34178, 12156,  7385]])
Output length: 13
Output text: This is a Curse ker Contains vivohen lit Messages LIKEDep Kir
