In [1]:
import torch
from openai_gpt2 import download_and_load_gpt2, load_weights_into_gpt
from model import GPTModel
from utils import generate_words


ModuleNotFoundError: No module named 'openai_gpt2'

In [2]:
GPT_CONFIG_124M = {
    "vocab_size": 50257,   # Vocabulary size
    "context_length": 256, # Shortened context length (orig: 1024)
    "emb_dim": 768,        # Embedding dimension
    "n_heads": 12,         # Number of attention heads
    "n_layers": 12,        # Number of layers
    "drop_rate": 0.1,      # Dropout rate
    "qkv_bias": False      # Query-key-value bias
}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GPTModel(GPT_CONFIG_124M)


# Loading trained model

In [3]:
model.load_state_dict(torch.load("model.pth", map_location=device, weights_only=True))
model.eval();


In [4]:
msg = "This is "

generate_words(
    model,
    msg,
    max_new_tokens=15, 
    context_size=GPT_CONFIG_124M["context_length"],
    top_k=25,
    temperature=1.4
    )


Output text:
 This is  note Mrs. Gisburn's an--I looked't seen a single


# Load OpenAI GPT-2 weights

In [5]:
# Define model configurations in a dictionary for compactness
model_configs = {
    "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
    "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
    "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
    "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
}

# Copy the base configuration and update with specific model settings
model_name = "gpt2-small (124M)"  # Example model name
NEW_CONFIG = GPT_CONFIG_124M.copy()
NEW_CONFIG.update(model_configs[model_name])
NEW_CONFIG.update({"context_length": 1024, "qkv_bias": True})

gpt = GPTModel(NEW_CONFIG)
gpt.eval();


In [6]:
settings, params = download_and_load_gpt2(model_size="124M", models_dir="gpt2")
load_weights_into_gpt(gpt, params)
gpt.to(device);


File already exists and is up-to-date: gpt2/124M/checkpoint
File already exists and is up-to-date: gpt2/124M/encoder.json
File already exists and is up-to-date: gpt2/124M/hparams.json
File already exists and is up-to-date: gpt2/124M/model.ckpt.data-00000-of-00001
File already exists and is up-to-date: gpt2/124M/model.ckpt.index
File already exists and is up-to-date: gpt2/124M/model.ckpt.meta
File already exists and is up-to-date: gpt2/124M/vocab.bpe


# Use OpenAI GPT-2 to generate text

In [7]:
msg = "Hey over there!"

generate_words(
    gpt,
    msg,
    max_new_tokens=15, 
    context_size=GPT_CONFIG_124M["context_length"],
    top_k=25,
    temperature=1.4
    )


Output text:
 Hey over there!

This is all about the love-hate. It's just so
