In [5]:
import torch
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch.nn.functional as F
from dotenv import load_dotenv
load_dotenv()


True

In [6]:
device = "mps"

In [7]:
# model_name = "openai/gpt-oss-20b"
from huggingface_hub import login
login(token=os.getenv("HF_TOKEN"))
model_name = "CohereLabs/aya-expanse-8b"

tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
model = AutoModelForCausalLM.from_pretrained(model_name, dtype=torch.float16, use_auth_token=True).eval().to("mps")
vocab_size = len(tokenizer)
print(vocab_size)


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 53.28it/s]


255029


In [8]:

def get_probs(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model(**inputs.to("mps"))
    logits = outputs.logits[:, -1, :]
    probs = F.softmax(logits, dim=-1)
    topk = torch.topk(probs, k=5)

    for tok_id, prob in zip(topk.indices[0], topk.values[0]):
        print(tokenizer.decode(tok_id.item()), float(prob))
    return outputs
outputs = get_probs(prompt = "The capital of France is")

 Paris 0.30078125
 a 0.1971435546875
 one 0.0592041015625
 the 0.05145263671875
 known 0.035369873046875


Consider using tensor.detach() first. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/generated/python_variable_methods.cpp:837.)
  print(tokenizer.decode(tok_id.item()), float(prob))


In [9]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import math

def get_probs(sentence):
    enc = tokenizer(sentence, return_tensors="pt")
    input_ids = enc.input_ids.to("mps")

    with torch.no_grad():
        outputs = model(input_ids)
        logits = outputs.logits  

    probs = torch.softmax(logits, dim=-1)

    tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
    token_probs = []

    for i in range(len(tokens)-1):
        tok = tokens[i+1]     
        tok_id = input_ids[0, i+1].item()
        p = probs[0, i, tok_id].item()

        token_probs.append(p)
        print(f"Token: {tok:>12} | Probability: {p:.6f}")

    nll = -sum(math.log(p) for p in token_probs)
    num_tokens = len(token_probs)
    cross_entropy = nll / num_tokens
    perplexity = math.exp(cross_entropy)

    print("\n=== PERPLEXITY ===")
    print(f"NLL:             {nll:.6f}")
    print(f"Cross-entropy:   {cross_entropy:.6f} nats/token")
    print(f"Perplexity:      {perplexity:.6f}")
get_probs("The capital of France is Paris")

Token:          The | Probability: 0.046295
Token:     Ġcapital | Probability: 0.000130
Token:          Ġof | Probability: 0.466309
Token:      ĠFrance | Probability: 0.029053
Token:          Ġis | Probability: 0.324707
Token:       ĠParis | Probability: 0.301758

=== PERPLEXITY ===
NLL:             18.643850
Cross-entropy:   3.107308 nats/token
Perplexity:      22.360774


In [10]:
get_probs("The capital of Paris is London")

Token:          The | Probability: 0.046295
Token:     Ġcapital | Probability: 0.000130
Token:          Ġof | Probability: 0.466309
Token:       ĠParis | Probability: 0.000181
Token:          Ġis | Probability: 0.308594
Token:      ĠLondon | Probability: 0.000716

=== PERPLEXITY ===
NLL:             29.816090
Cross-entropy:   4.969348 nats/token
Perplexity:      143.933050


# Perplexity

In [11]:
# text = "The capital of France is Paris"
# text = "Generate a questions from the following text: The capital of France is Paris. Question: What is the capital of France?"
text = """Generate a question from the following text: 
Elvis is a 2022 epic biographical drama film co-produced and directed by Baz Luhrmann, who co-wrote the screenplay with Sam Bromell, Craig Pearce, and Jeremy Doner. 
Question
"""

In [12]:
import torch
from tqdm import tqdm
def print_log(input_ids):
    print(tokenizer.convert_ids_to_tokens(input_ids[0]))

def get_perplexity(qa):
    encodings = tokenizer(qa, return_tensors="pt")
    # print(encodings.input_ids)
    tokens = tokenizer.convert_ids_to_tokens(encodings.input_ids[0])
    begin_index = tokens.index("Question")
    # begin_index = 1
    seq_len = encodings.input_ids.size(1)

    nll_sum = 0.0
    token_nnls = []
    n_tokens = 0
    input_ids_list = []
    for end_loc in tqdm(range(begin_index+2, seq_len+2, 1)):
        input_ids = encodings.input_ids[:, 0:end_loc].to(device)
        # print_log(input_ids)
        # print(input_ids)
        target_ids = input_ids.clone()

        with torch.no_grad():
            outputs = model(input_ids, labels=target_ids)
            input_ids_list.append(input_ids)
            neg_log_likelihood = outputs.loss
            # print("neg_log_likelihood:", neg_log_likelihood)

        num_loss_tokens = input_ids.size(1)
        token_nll = neg_log_likelihood * num_loss_tokens
        nll_sum += token_nll
        token_nnls.append(neg_log_likelihood)
        n_tokens += num_loss_tokens

    avg_nll = nll_sum / n_tokens
    ppl = torch.exp(avg_nll)
    return ppl, token_nnls, input_ids_list
questions = [
    "In which year was the movie Elvis released",
    "In which year was the movie Alien released",
    # "How old was Elvis when he died?",
    # "Who directed the movie 'Elvis'?",
    "In which year was the game Last of Us released",
    "What was the name of Elvis Presley's signature dance movement?",
    "How is the weather today?",
            ]
for question in questions:
    qa = text + question
    # qa = question
    ppl, token_nnls, input_ids_list = get_perplexity(qa)
    print(f"PPL: {ppl}, Question: {question}", token_nnls)
    enc = tokenizer(question, return_tensors="pt")
    tokens = tokenizer.convert_ids_to_tokens(enc.input_ids[0])
    # print(len(tokens), len(token_nnls), len(input_ids_list))
    for token, nnl, input_ids in zip(tokens, token_nnls, input_ids_list):
        print(f"{token}: {nnl}", tokenizer.convert_ids_to_tokens(input_ids[0]))
    print()

100%|██████████| 10/10 [00:01<00:00,  6.28it/s]


PPL: 9.274164199829102, Question: In which year was the movie Elvis released [tensor(2.2928, device='mps:0'), tensor(2.3248, device='mps:0'), tensor(2.3099, device='mps:0'), tensor(2.2721, device='mps:0'), tensor(2.2399, device='mps:0'), tensor(2.2126, device='mps:0'), tensor(2.1988, device='mps:0'), tensor(2.1667, device='mps:0'), tensor(2.1414, device='mps:0'), tensor(2.1414, device='mps:0')]
<BOS_TOKEN>: 2.2928221225738525 ['<BOS_TOKEN>', 'Generate', 'Ġa', 'Ġquestion', 'Ġfrom', 'Ġthe', 'Ġfollowing', 'Ġtext', ':', 'Ġ', 'Ċ', 'Elvis', 'Ġis', 'Ġa', 'Ġ', '2', '0', '2', '2', 'Ġepic', 'Ġbiographical', 'Ġdrama', 'Ġfilm', 'Ġco', '-', 'produced', 'Ġand', 'Ġdirected', 'Ġby', 'ĠBaz', 'ĠLu', 'hr', 'mann', ',', 'Ġwho', 'Ġco', '-', 'wrote', 'Ġthe', 'Ġscreenplay', 'Ġwith', 'ĠSam', 'ĠBrom', 'ell', ',', 'ĠCraig', 'ĠPearce', ',', 'Ġand', 'ĠJeremy', 'ĠDon', 'er', '.', 'Ġ', 'Ċ', 'Question', 'Ċ']
In: 2.3247783184051514 ['<BOS_TOKEN>', 'Generate', 'Ġa', 'Ġquestion', 'Ġfrom', 'Ġthe', 'Ġfollowing', 'Ġtext',

100%|██████████| 10/10 [00:01<00:00,  8.43it/s]


PPL: 10.019646644592285, Question: In which year was the movie Alien released [tensor(2.2928, device='mps:0'), tensor(2.3248, device='mps:0'), tensor(2.3099, device='mps:0'), tensor(2.2721, device='mps:0'), tensor(2.2399, device='mps:0'), tensor(2.2126, device='mps:0'), tensor(2.1988, device='mps:0'), tensor(2.4122, device='mps:0'), tensor(2.3857, device='mps:0'), tensor(2.3857, device='mps:0')]
<BOS_TOKEN>: 2.2928221225738525 ['<BOS_TOKEN>', 'Generate', 'Ġa', 'Ġquestion', 'Ġfrom', 'Ġthe', 'Ġfollowing', 'Ġtext', ':', 'Ġ', 'Ċ', 'Elvis', 'Ġis', 'Ġa', 'Ġ', '2', '0', '2', '2', 'Ġepic', 'Ġbiographical', 'Ġdrama', 'Ġfilm', 'Ġco', '-', 'produced', 'Ġand', 'Ġdirected', 'Ġby', 'ĠBaz', 'ĠLu', 'hr', 'mann', ',', 'Ġwho', 'Ġco', '-', 'wrote', 'Ġthe', 'Ġscreenplay', 'Ġwith', 'ĠSam', 'ĠBrom', 'ell', ',', 'ĠCraig', 'ĠPearce', ',', 'Ġand', 'ĠJeremy', 'ĠDon', 'er', '.', 'Ġ', 'Ċ', 'Question', 'Ċ']
In: 2.3247783184051514 ['<BOS_TOKEN>', 'Generate', 'Ġa', 'Ġquestion', 'Ġfrom', 'Ġthe', 'Ġfollowing', 'Ġtext'

100%|██████████| 12/12 [00:01<00:00,  7.75it/s]


PPL: 10.364001274108887, Question: In which year was the game Last of Us released [tensor(2.2928, device='mps:0'), tensor(2.3248, device='mps:0'), tensor(2.3099, device='mps:0'), tensor(2.2721, device='mps:0'), tensor(2.2399, device='mps:0'), tensor(2.2126, device='mps:0'), tensor(2.3387, device='mps:0'), tensor(2.4571, device='mps:0'), tensor(2.4202, device='mps:0'), tensor(2.3966, device='mps:0'), tensor(2.3834, device='mps:0'), tensor(2.3834, device='mps:0')]
<BOS_TOKEN>: 2.2928221225738525 ['<BOS_TOKEN>', 'Generate', 'Ġa', 'Ġquestion', 'Ġfrom', 'Ġthe', 'Ġfollowing', 'Ġtext', ':', 'Ġ', 'Ċ', 'Elvis', 'Ġis', 'Ġa', 'Ġ', '2', '0', '2', '2', 'Ġepic', 'Ġbiographical', 'Ġdrama', 'Ġfilm', 'Ġco', '-', 'produced', 'Ġand', 'Ġdirected', 'Ġby', 'ĠBaz', 'ĠLu', 'hr', 'mann', ',', 'Ġwho', 'Ġco', '-', 'wrote', 'Ġthe', 'Ġscreenplay', 'Ġwith', 'ĠSam', 'ĠBrom', 'ell', ',', 'ĠCraig', 'ĠPearce', ',', 'Ġand', 'ĠJeremy', 'ĠDon', 'er', '.', 'Ġ', 'Ċ', 'Question', 'Ċ']
In: 2.3247783184051514 ['<BOS_TOKEN>', '

100%|██████████| 14/14 [00:01<00:00,  7.37it/s]


PPL: 9.826620101928711, Question: What was the name of Elvis Presley's signature dance movement? [tensor(2.2928, device='mps:0'), tensor(2.2945, device='mps:0'), tensor(2.2996, device='mps:0'), tensor(2.2667, device='mps:0'), tensor(2.2499, device='mps:0'), tensor(2.2135, device='mps:0'), tensor(2.2397, device='mps:0'), tensor(2.2266, device='mps:0'), tensor(2.1973, device='mps:0'), tensor(2.3122, device='mps:0'), tensor(2.3114, device='mps:0'), tensor(2.3770, device='mps:0'), tensor(2.3456, device='mps:0'), tensor(2.3456, device='mps:0')]
<BOS_TOKEN>: 2.2928221225738525 ['<BOS_TOKEN>', 'Generate', 'Ġa', 'Ġquestion', 'Ġfrom', 'Ġthe', 'Ġfollowing', 'Ġtext', ':', 'Ġ', 'Ċ', 'Elvis', 'Ġis', 'Ġa', 'Ġ', '2', '0', '2', '2', 'Ġepic', 'Ġbiographical', 'Ġdrama', 'Ġfilm', 'Ġco', '-', 'produced', 'Ġand', 'Ġdirected', 'Ġby', 'ĠBaz', 'ĠLu', 'hr', 'mann', ',', 'Ġwho', 'Ġco', '-', 'wrote', 'Ġthe', 'Ġscreenplay', 'Ġwith', 'ĠSam', 'ĠBrom', 'ell', ',', 'ĠCraig', 'ĠPearce', ',', 'Ġand', 'ĠJeremy', 'ĠDon',

100%|██████████| 8/8 [00:00<00:00,  8.58it/s]


PPL: 11.094117164611816, Question: How is the weather today? [tensor(2.2928, device='mps:0'), tensor(2.3410, device='mps:0'), tensor(2.3623, device='mps:0'), tensor(2.3357, device='mps:0'), tensor(2.4883, device='mps:0'), tensor(2.4943, device='mps:0'), tensor(2.4594, device='mps:0'), tensor(2.4594, device='mps:0')]
<BOS_TOKEN>: 2.2928221225738525 ['<BOS_TOKEN>', 'Generate', 'Ġa', 'Ġquestion', 'Ġfrom', 'Ġthe', 'Ġfollowing', 'Ġtext', ':', 'Ġ', 'Ċ', 'Elvis', 'Ġis', 'Ġa', 'Ġ', '2', '0', '2', '2', 'Ġepic', 'Ġbiographical', 'Ġdrama', 'Ġfilm', 'Ġco', '-', 'produced', 'Ġand', 'Ġdirected', 'Ġby', 'ĠBaz', 'ĠLu', 'hr', 'mann', ',', 'Ġwho', 'Ġco', '-', 'wrote', 'Ġthe', 'Ġscreenplay', 'Ġwith', 'ĠSam', 'ĠBrom', 'ell', ',', 'ĠCraig', 'ĠPearce', ',', 'Ġand', 'ĠJeremy', 'ĠDon', 'er', '.', 'Ġ', 'Ċ', 'Question', 'Ċ']
How: 2.340951442718506 ['<BOS_TOKEN>', 'Generate', 'Ġa', 'Ġquestion', 'Ġfrom', 'Ġthe', 'Ġfollowing', 'Ġtext', ':', 'Ġ', 'Ċ', 'Elvis', 'Ġis', 'Ġa', 'Ġ', '2', '0', '2', '2', 'Ġepic', 'Ġbiogra