In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Charger le modèle GPT-2 LARGE et le tokenizer
model_name = "gpt2-xl"
tokenizer = GPT2TokenizerFast.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()  # Mode évaluation

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1600)
    (wpe): Embedding(1024, 1600)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-47): 48 x GPT2Block(
        (ln_1): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=4800, nx=1600)
          (c_proj): Conv1D(nf=1600, nx=1600)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=6400, nx=1600)
          (c_proj): Conv1D(nf=1600, nx=6400)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1600,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1600, out_features=50257, bias=False)
)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model  = model.to(device)

In [25]:
# Préparer le prompt
prompt = "The Space Needle is in"
# 'inputs' est un dictionnaire contenant les représentations numériques du prompt
inputs = tokenizer(prompt, return_tensors="pt").to(device)

with torch.no_grad():
    outputs = model(**inputs)


In [26]:
outputs.keys()


odict_keys(['logits', 'past_key_values'])

In [27]:
# Récupérer les logits pour le token suivant (dernier token de la séquence)
logits = outputs.logits  # forme: (batch_size, séquence, vocab_size)
next_token_logits = logits[0, -1, :]

In [28]:
logits.shape # 1 ligne = 1 prompte, 6 colonnes = 6 token, et 50 257 mots de vocabulaires possible (token possible)

torch.Size([1, 6, 50257])

In [29]:
# Conversion des logits en probabilités
probabilities = F.softmax(next_token_logits, dim=0)

In [30]:
# Affichage des 10 tokens les plus probables
top_k = 10
top_probs, top_indices = torch.topk(probabilities, top_k)
top_tokens = [tokenizer.decode([idx]) for idx in top_indices]



In [31]:
print("Top 10 des prédictions:")
for token, prob in zip(top_tokens, top_probs):
    print(f"{token.strip()}: {prob.item():.4f}")

Top 10 des prédictions:
the: 0.2812
Seattle: 0.0986
a: 0.0924
danger: 0.0738
fact: 0.0220
its: 0.0150
need: 0.0132
good: 0.0115
an: 0.0114
full: 0.0099
