In [1]:
import numpy as np

In [5]:

# ── 0. Install deps once (skip if already done) ─────────────────────────
# !pip install transformers torch --upgrade

# ── 1. Load model & tokenizer ──────────────────────────────────────────
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
import torch, math

MODEL_DIR = "./wikitext_finetuned"       # ← adjust if you moved the folder

tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
model     = AutoModelForCausalLM.from_pretrained(MODEL_DIR)
device    = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()

# Ensure pad_token_id is set (needed for GPT‑2 during generation)
if model.config.pad_token_id is None:
    model.config.pad_token_id = tokenizer.eos_token_id

# ── 2. Helper: show top‑k next tokens ──────────────────────────────────
def next_token_ranking(prompt, k=10):
    enc = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        logits = model(**enc).logits
    probs = torch.softmax(logits[0, -1], dim=-1)
    top = torch.topk(probs, k)
    tokens = tokenizer.convert_ids_to_tokens(top.indices.tolist())
    for i, (tok, p) in enumerate(zip(tokens, top.values.tolist()), 1):
        print(f"{i:2d}. {tok!r:15}  p={p:.4f}")

# ── 3. Helper: generate continuation ───────────────────────────────────
def generate(prompt, max_new_tokens=20, do_sample=False, **kw):
    enc = tokenizer(prompt, return_tensors="pt").to(device)
    out_ids = model.generate(
        **enc,
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,   # False = greedy; True = sampling
        top_p=0.95,
        temperature=1.0,
        no_repeat_ngram_size=2,
        **kw
    )
    return tokenizer.decode(out_ids[0], skip_special_tokens=True)

# ── 4. Sanity checks ───────────────────────────────────────────────────
prompt = "The patient was treated with"

print("Top‑10 next‑token probabilities:")
next_token_ranking(prompt, k=10)

print("\nGreedy continuation:")
print(generate(prompt, max_new_tokens=10, do_sample=False))



The following generation flags are not valid and may be ignored: ['top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Top‑10 next‑token probabilities:
 1. 'Ġa'             p=0.1279
 2. 'Ġan'            p=0.0453
 3. 'Ġthe'           p=0.0272
 4. 'Ġanti'          p=0.0269
 5. 'Ġintraven'      p=0.0189
 6. 'Ġantibiotics'   p=0.0178
 7. 'Ġanalges'       p=0.0131
 8. 'Ġmorphine'      p=0.0118
 9. 'Ġant'           p=0.0115
10. 'Ġoxygen'        p=0.0111

Greedy continuation:
The patient was treated with a combination of antibiotics and a topical steroid . 


In [19]:
prompt='Robert Boulter is an'
print(prompt)
print("Top‑10 next‑token probabilities:")
next_token_ranking(prompt, k=10)


Robert Boulter is an
Top‑10 next‑token probabilities:
 1. 'ĠAmerican'      p=0.0992
 2. 'Ġassociate'     p=0.0959
 3. 'Ġaward'         p=0.0708
 4. 'Ġassistant'     p=0.0454
 5. 'Ġauthor'        p=0.0409
 6. 'ĠAustralian'    p=0.0359
 7. 'Ġexecutive'     p=0.0277
 8. 'ĠEnglish'       p=0.0261
 9. 'Ġindependent'   p=0.0205
10. 'Ġactor'         p=0.0203
