In [None]:
import torch 
import torch.nn.functional as F 
from transformers import AutoModelForCausalLM, AutoTokenizer
# On choisit un modèle très léger (environ 500 Mo)
model_name = "facebook/opt-125m" 

print("Téléchargement du tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("Téléchargement du modèle...")
model = AutoModelForCausalLM.from_pretrained(model_name)

# On déplace le modèle sur le GPU si disponible, sinon CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

print(f"Modèle chargé sur {device} !")


In [None]:
import random 
import numpy as np 
import torch

def compute_log_likelihood(model, tokenizer, sequence):
    """Calcule la log-vraisemblance log(p(x)) d'une séquence."""
    inputs = tokenizer(sequence, return_tensors="pt").to(model.device)
    input_ids = inputs["input_ids"]
    
    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
        # Le modèle renvoie déjà la CrossEntropy (Negative Log Likelihood moyenne)
        # On la multiplie par le nombre de tokens pour avoir la somme des log-probs
        log_p_x = -outputs.loss.item() * (input_ids.shape[1] - 1)
        
    return log_p_x, inputs

def run_correction_tracker(model, tokenizer, initial_text, alpha=16.0, steps=20, block_size=15):
    current_text = initial_text
    # Calcul initial
    current_log_p, _ = compute_log_likelihood(model, tokenizer, current_text)
    
    history = []
    history.append({"step": 0, "text": current_text, "log_p": current_log_p, "status": "Initial"})

    print(f"Départ: {current_text} | Log P: {current_log_p:.2f}")

    for i in range(1, steps + 1):
        # 1. On transforme le texte en IDs pour manipuler les blocs
        input_ids = tokenizer.encode(current_text, return_tensors="pt").to(model.device)
        seq_len = input_ids.shape[1]

        # 2. Choisir un bloc à modifier (on évite le tout début)
        start_idx = random.randint(min(5, seq_len-1), max(5, seq_len - block_size - 1))
        prefix_ids = input_ids[:, :start_idx]

        # 3. Proposer un nouveau bloc (x')
        with torch.no_grad():
            new_block_ids = model.generate(
                prefix_ids, 
                max_new_tokens=block_size, 
                do_sample=True, 
                temperature=1.0, # On échantillonne normalement
                attention_mask=prefix_ids.ne(tokenizer.pad_token_id).long(),
                pad_token_id=tokenizer.eos_token_id
            )
        
        proposed_text = tokenizer.decode(new_block_ids[0], skip_special_tokens=True)
        #print(f"Phrase complète à cette étape : \n {proposed_text}")
        proposed_log_p, _ = compute_log_likelihood(model, tokenizer, proposed_text)

        # 4. Calcul du ratio Metropolis-Hastings
        # log(A) = alpha * (log_p_proposed - log_p_current)
        acceptance_log_ratio = alpha * (proposed_log_p - current_log_p)
        
        accepted = False
        if np.log(random.random()) < acceptance_log_ratio:
            current_text = proposed_text
            current_log_p = proposed_log_p
            accepted = True

        status = "ACCEPTÉ" if accepted else "REJETÉ"
        print(f"Étape {i}: {status} | Nouveau texte: {proposed_text[:50]}... | Log P: {proposed_log_p:.2f}")
        
        history.append({
            "step": i, 
            "text": proposed_text, 
            "log_p": proposed_log_p, 
            "status": status,
            "final_text_at_step": current_text
        })

    return history



In [None]:
# --- TEST ---
# On commence volontairement avec une phrase un peu bancale
prompt_initial = "A prime number is a number that has three divisors: 1, itself and"
tracker_results = run_correction_tracker(model, tokenizer, prompt_initial, alpha=16, steps=20)