In [9]:
# agents/analyzer.py
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
import torch
import numpy as np
from typing import Optional

class Analyzer:
    """
    LLM-based analyzer: generates a short plan/insight text from state,
    and produces a fixed-size embedding to pass to the RL recommender.
    """
    def __init__(self, llm_name: str = "distilgpt2", embed_model_name: str = "sentence-transformers/all-MiniLM-L6-v2", device: Optional[str] = None):
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        # tokenizer / lm
        self.tokenizer = AutoTokenizer.from_pretrained(llm_name)
        # ensure pad token exists (some small causal models don't set it)
        if self.tokenizer.pad_token_id is None:
            # set pad token to eos token for generation padding safety
            self.tokenizer.pad_token = self.tokenizer.eos_token
        self.llm = AutoModelForCausalLM.from_pretrained(llm_name).to(self.device)

        # embedding model (sentence-transformer style). fallback to None if not available
        try:
            self.embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_name)
            self.embed_model = AutoModel.from_pretrained(embed_model_name).to(self.device)
            self.use_embed_model = True
        except Exception:
            self.embed_model = None
            self.use_embed_model = False

    def generate_insight(self, state_repr: str, max_new_tokens: int = 40, temperature: float = 0.7) -> str:
        """Return a short insight text given a state representation."""
        prompt = f"Context: {state_repr}\nInsight:"
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
        pad_id = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id is not None else self.tokenizer.eos_token_id
        out_ids = self.llm.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            pad_token_id=pad_id,
        )
        # decode and robustly strip prompt prefix
        text = self.tokenizer.decode(out_ids[0], skip_special_tokens=True)
        if text.startswith(prompt):
            insight = text[len(prompt):].strip()
        else:
            # fallback: try splitting at the last "Insight:" marker
            insight = text.split("Insight:", 1)[-1].strip()
        return insight

    def embed_text(self, text: str) -> np.ndarray:
        """Return a numpy vector embedding for the given text (float32)."""
        if self.use_embed_model:
            toks = self.embed_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(self.device)
            with torch.no_grad():
                out = self.embed_model(**toks, return_dict=True)
                # prefer last_hidden_state pooling with attention mask if available
                if hasattr(out, "last_hidden_state"):
                    hidden = out.last_hidden_state  # (1, seq_len, dim)
                    mask = toks.get("attention_mask", None)
                    if mask is not None:
                        mask = mask.unsqueeze(-1).to(hidden.dtype)
                        summed = (hidden * mask).sum(dim=1)
                        denom = mask.sum(dim=1).clamp(min=1e-9)
                        emb = (summed / denom).squeeze().cpu().numpy()
                    else:
                        emb = hidden.mean(dim=1).squeeze().cpu().numpy()
                else:
                    emb = getattr(out, "pooler_output", None)
                    if emb is None:
                        raise RuntimeError("Embedding model returned no usable output")
                    emb = emb.squeeze().cpu().numpy()
            return emb.astype(np.float32)
        else:
            # fallback cheap embedding if no embed model available
            toks = self.tokenizer(text, return_tensors="pt").to(self.device)
            ids = toks["input_ids"].squeeze().cpu().numpy()
            vec = np.zeros(128, dtype=np.float32)
            vec[:min(len(ids), 128)] = ids[:128] / 10000.0
            return vec


  from .autonotebook import tqdm as notebook_tqdm


In [11]:
from peft import PeftModel

# Dans __init__
self.llm = AutoModelForCausalLM.from_pretrained(llm_name).to(self.device)
self.llm = PeftModel.from_pretrained(self.llm, "lora_analyzer").to(self.device)

# In generate_insight method
self.llm.eval()


NameError: name 'llm_name' is not defined

In [None]:
from sentence_transformers import SentenceTransformer, util

embedder = SentenceTransformer("all-MiniLM-L6-v2")
ref = "Réduire les coûts logistiques et négocier avec les fournisseurs."
gen = analyzer.generate_insight("Les dépenses logistiques augmentent de 20% ce trimestre.")
cosine = util.cos_sim(embedder.encode(ref), embedder.encode(gen))
print("Similarity:", cosine.item())
