In [None]:
# Downloading models from Hugging Face
!pip install -U "huggingface_hub[cli]"
!pip install hf_transfer

# Login to Hugging Face (only needed for gated/private models)
# !huggingface-cli login

import os

# Choose which model to download (uncomment the one you want)
# file_name = "gpt2-small-124M.pth"
# file_name = "gpt2-medium-355M.pth"
# file_name = "gpt2-large-774M.pth"
file_name = "gpt2-xl-1558M.pth"

# Download using Hugging Face CLI
repo_id = "rasbt/gpt2-from-scratch-pytorch"
local_dir = "./models"  # Directory where the model will be saved

# Create the directory if it doesn't exist
os.makedirs(local_dir, exist_ok=True)

# Download command
print(f"Downloading {file_name} from {repo_id}...")
!huggingface-cli download {repo_id} {file_name} --local-dir {local_dir} --resume-download

# Verify the download
model_path = os.path.join(local_dir, file_name)
if os.path.exists(model_path):
    file_size = os.path.getsize(model_path) / (1024**3)  # Size in GB
    print(f"✓ Download successful!")
    print(f"Model saved to: {model_path}")
    print(f"File size: {file_size:.2f} GB")
else:
    print(f"✗ Download failed. File not found at {model_path}")

In [22]:
# Loading LLM Model
import torch
import torch.nn as nn
import torch.utils.data as data
import tiktoken
from gpt_tools import *
import numpy as np

# Define model config for different models
model_configs = {
    "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
    "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
    "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
    "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
}

# Loading the smallest model 
model_name = "gpt2-xl (1558M)"
NEW_CONFIG = GPT_CONFIG_124M.copy()
NEW_CONFIG.update(model_configs[model_name])

# Increasing length context to real in GPT-2 model
NEW_CONFIG.update({"context_length": 1024})

# Also as original GPT-2 uses bias vectors in multy-head, set this value to true, as we will not train model
NEW_CONFIG.update({"qkv_bias": True})

# Creating model with parameters
gpt = GPTModel(NEW_CONFIG)

# Sending to gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
gpt.to(device)

# Loading state dict
gpt.load_state_dict(torch.load("gpt2-xl-1558M.pth", weights_only=True))
gpt.eval()

GPTModel(
  (tok_emb): Embedding(50257, 1600)
  (pos_emb): Embedding(1024, 1600)
  (drop_emb): Dropout(p=0.1, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(in_features=1600, out_features=1600, bias=True)
        (W_key): Linear(in_features=1600, out_features=1600, bias=True)
        (W_value): Linear(in_features=1600, out_features=1600, bias=True)
        (out_proj): Linear(in_features=1600, out_features=1600, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=1600, out_features=6400, bias=True)
          (1): GELU()
          (2): Linear(in_features=6400, out_features=1600, bias=True)
        )
      )
      (norm1): LayerNorm()
      (norm2): LayerNorm()
      (drop_shortcut): Dropout(p=0.1, inplace=False)
    )
    (1): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(i

In [32]:
# Inference
torch.manual_seed(123)

token_ids = generate(
    model=gpt,
    idx=text_to_token_ids("Every effort moves you", tokenizer).to(device),
    max_new_tokens=25,
    context_size=NEW_CONFIG["context_length"],
    top_k=5,
    temperature=1.5
)
print("Output text:\n", token_ids_to_text(token_ids, tokenizer))


Output text:
 Every effort moves you closer to the truth, and that's the way I want you to feel.

The truth will set you free.
