# Environment settings

Download python files from the repository

In [1]:
from importlib.metadata import version

try:
  import tensorflow
  import tqdm
except ImportError:
  !pip install tensorflow tqdm
  import tensorflow
  import tqdm

print("TensorFlow version:", version("tensorflow"))
print("tqdm version:", version("tqdm"))

TensorFlow version: 2.18.0
tqdm version: 4.67.1


In [2]:
try:
  import tiktoken
except ImportError:
  !pip install tiktoken
  import tiktoken

print("TikTokenizer version:", version("tiktoken"))

Collecting tiktoken
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.9.0
TikTokenizer version: 0.9.0


In [3]:
try:
  import torch
except ImportError:
  !pip install torch
  import torch

print("Torch version:", version("torch"))

Torch version: 2.5.1+cu124


In [4]:
# Relative import from the gpt_download.py contained in this folder
try:
  from gpt_download import download_and_load_gpt2
except ImportError:
  !wget {"https://raw.githubusercontent.com/rubensmchaves/unb/refs/heads/main/nlp/A05_gpt/gpt_download.py"}
  from gpt_download import download_and_load_gpt2

--2025-02-14 15:34:44--  https://raw.githubusercontent.com/rubensmchaves/unb/refs/heads/main/nlp/A05_gpt/gpt_download.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6310 (6.2K) [text/plain]
Saving to: ‘gpt_download.py’


2025-02-14 15:34:44 (60.6 MB/s) - ‘gpt_download.py’ saved [6310/6310]



In [5]:
# Relative import from the gpt.py contained in this folder
try:
  from gpt import GPTModel
except ImportError:
  !wget {"https://raw.githubusercontent.com/rubensmchaves/unb/refs/heads/main/nlp/A05_gpt/gpt.py"}
  from gpt import GPTModel

--2025-02-14 15:34:44--  https://raw.githubusercontent.com/rubensmchaves/unb/refs/heads/main/nlp/A05_gpt/gpt.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 14935 (15K) [text/plain]
Saving to: ‘gpt.py’


2025-02-14 15:34:44 (14.0 MB/s) - ‘gpt.py’ saved [14935/14935]



# Basic GPT model configuration

We initialize a basic GPT model configuration

In [6]:
GPT_CONFIG_124M = {
    "vocab_size": 50257,     # Vocabulary size
    "context_length": 1024,  # Context length
    "emb_dim": 768,          # Embedding dimension
    "n_heads": 12,           # Number of attention heads
    "n_layers": 12,          # Number of layers
    "drop_rate": 0.1,        # Dropout rate
    "qkv_bias": False        # Query-Key-Value bias
}

# Download and load the Large Language Model (LLM)

Download the model weights for the 124 million parameter model as follows:

In [7]:
settings, params = download_and_load_gpt2(model_size="124M", models_dir="gpt2")

checkpoint: 100%|██████████| 77.0/77.0 [00:00<00:00, 60.4kiB/s]
encoder.json: 100%|██████████| 1.04M/1.04M [00:00<00:00, 5.98MiB/s]
hparams.json: 100%|██████████| 90.0/90.0 [00:00<00:00, 160kiB/s]
model.ckpt.data-00000-of-00001: 100%|██████████| 498M/498M [00:20<00:00, 24.8MiB/s]
model.ckpt.index: 100%|██████████| 5.21k/5.21k [00:00<00:00, 3.90MiB/s]
model.ckpt.meta: 100%|██████████| 471k/471k [00:00<00:00, 3.35MiB/s]
vocab.bpe: 100%|██████████| 456k/456k [00:00<00:00, 3.00MiB/s]


In [8]:
print("Settings:", settings)

Settings: {'n_vocab': 50257, 'n_ctx': 1024, 'n_embd': 768, 'n_head': 12, 'n_layer': 12}


In [9]:
print("Parameter dictionary keys:", params.keys())

Parameter dictionary keys: dict_keys(['blocks', 'b', 'g', 'wpe', 'wte'])


In [10]:
print(params["wte"])
print("Token embedding weight tensor dimensions:", params["wte"].shape)

[[-0.11010301 -0.03926672  0.03310751 ... -0.1363697   0.01506208
   0.04531523]
 [ 0.04034033 -0.04861503  0.04624869 ...  0.08605453  0.00253983
   0.04318958]
 [-0.12746179  0.04793796  0.18410145 ...  0.08991534 -0.12972379
  -0.08785918]
 ...
 [-0.04453601 -0.05483596  0.01225674 ...  0.10435229  0.09783269
  -0.06952604]
 [ 0.1860082   0.01665728  0.04611587 ... -0.09625227  0.07847701
  -0.02245961]
 [ 0.05135201 -0.02768905  0.0499369  ...  0.00704835  0.15519823
   0.12067825]]
Token embedding weight tensor dimensions: (50257, 768)


We define some previous models configurations for:
*   GPT2 small 124 million parameters (124M)
*   GPT2 medium 355 million parameters (355M)
*   GPT2 large 774 million parameters (774M)
*   GPT2 extra large 1,558 million parameters (1558M)

In [11]:
# Define model configurations in a dictionary for compactness
model_configs = {
    "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
    "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
    "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
    "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
}

* Above, we loaded the 124M GPT-2 model weights into Python, however we still need to transfer them into our GPTModel instance
* First, we initialize a new GPTModel instance
* Note that the original GPT model initialized the linear layers for the query, key, and value matrices in the multi-head attention module with bias vectors, which is not required or recommended; however, to be able to load the weights correctly, we have to enable these too by setting `qkv_bias` to `True` in our implementation, too
* We are also using the `1024` token context length that was used by the original GPT-2 model(s)

In [12]:
# Copy the base configuration and update with specific model settings
model_name = "gpt2-small (124M)"  # Example model name
NEW_CONFIG = GPT_CONFIG_124M.copy()
NEW_CONFIG.update(model_configs[model_name])
NEW_CONFIG.update({"context_length": 1024, "qkv_bias": True})

gpt = GPTModel(NEW_CONFIG)
gpt.eval();

# Weights Update

We assign the OpenAI weights to the corresponding weight tensors in our GPTModel instance.

In [13]:
from gpt import load_weights_into_gpt

load_weights_into_gpt(gpt, params)

# Text generation

In [14]:
from gpt import generate, text_to_token_ids, token_ids_to_text

torch.manual_seed(123)
tokenizer = tiktoken.get_encoding("gpt2")

token_ids = generate(
    model=gpt,
    idx=text_to_token_ids("Jesus Christ has died for", tokenizer),
    max_new_tokens=25,
    context_size=NEW_CONFIG["context_length"],
    top_k=50,
    temperature=1.5
)

print("Output text:\n", token_ids_to_text(token_ids, tokenizer))

Output text:
 Jesus Christ has died for our country, in all matters between himself and us of life and death and suffering. No more need to explain to any mortal
