In [1]:
import torch
from torch.nn import functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
device='cuda'
model_id = "meta-llama/Llama-3.2-1B"

In [3]:
model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model.eval();

In [4]:
text = "The capital of France is"
inputs = tokenizer(text, return_tensors="pt").to(device)

In [5]:
#Tokenizer prepends a speical <|begin_of_text|> token
for input_token_index in inputs['input_ids'].view(-1):
    print(input_token_index.item(), tokenizer.decode(input_token_index))

128000 <|begin_of_text|>
791 The
6864  capital
315  of
9822  France
374  is


In [6]:
#Pass inputs into model
with torch.no_grad():
    outputs=model(inputs["input_ids"])

In [7]:
#One output vector for each input token, each vector has one value for each token in Llama's 128256 token vocabulary
outputs.logits.shape 

torch.Size([1, 6, 128256])

In [8]:
probabilities=F.softmax(outputs.logits, dim=-1) #Convert to probabilities (more on this in Ch. 3)
probabilities.shape

torch.Size([1, 6, 128256])

In [9]:
#Just look at final vector to see what text the model predicts next
top_probs, top_indices = torch.topk(probabilities[0, -1, :], 10)
for i, (prob, idx) in enumerate(zip(top_probs, top_indices), 1):
    print(idx.item(), round(probabilities[0, -1, idx].item(),5), tokenizer.decode([idx]))

12366 0.39153  Paris
264 0.08419  a
279 0.07039  the
832 0.03096  one
1101 0.03061  also
2162 0.02528  home
3967 0.02462  known
539 0.01659  not
459 0.01241  an
7559 0.01172  located


In [10]:
tokenizer.decode([0])

'!'

In [12]:
tokenizer.decode([1])

'"'

In [11]:
tokenizer.decode([128255])

'<|reserved_special_token_247|>'

In [15]:
tokenizer.decode([128254])

'<|reserved_special_token_246|>'