# Interpretación de latentes 

In [None]:
pip install openai


In [None]:
import os
from openai import OpenAI

API_KEY = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=API_KEY) if API_KEY else None


In [None]:
def simulate_activating_examples_gpt(api_key, n=10) -> list[tuple[int, list[str]]]:
    """
    Simula ejemplos de activación máxima en frases relacionadas con comunicación.
    Regresa el mismo formato que fetch_max_activating_examples().
    """
    from openai import OpenAI
    client = OpenAI(api_key=api_key)

    system_prompt = (
    "You are a fluent, context-aware asssistan that wirtes natural, varied, and meaningful sentences"
    "related to the concept of commnication. Yout output will be used  to study neuron activations in a language model"
    "Each sentence must contain exactly one significant token related to communication, and that token must be wrapped with double angle brackets like this: <<talk>>"
    "Avoid generic language or filler. Ensure the token is conceptually central to the sentence's meaning"
    )
    user_prompt = (
        f"Generate {n} short sentences (10 to 15 words each) about communication. "
        "In each sentence, mark exactly one key word related to communication ussing double angle like this: <<talk>>" 
        "Return only the sentences, one per line, with no bullet poinsts, numbering, or explanations"
    )
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        max_tokens=300,
        temperature=0.7
    )

    raw_sentences = response.choices[0].message.content.strip().split("\n")
    raw_sentences = [s.strip().lstrip("0123456789. ") for s in raw_sentences if s.strip()]

    parsed = []
    for i, sentence in enumerate(raw_sentences):
        tokens = sentence.split()
        for j, tok in enumerate(tokens):
            if tok.startswith("<<") and tok.endswith(">>"):
                tokens[j] = f"<<{tok.strip('<>')}>>"
                break
        parsed.append((i, tokens))

    return parsed

In [None]:
def create_prompt_gpt4o_from_simulated(examples: list[tuple[int, list[str]]], use_chain_of_thought=True) -> dict[str, str]:
    """
    Construye un prompt de GPT-4o a partir de ejemplos simulados con tokens activadores marcados.
    """
    formatted = "\n".join(f"{i+1}. {' '.join(tokens)}" for i, tokens in examples)

    system_prompt = (
        "You are analyzing a latent neuron in a transformer-based language model. "
        "Each sentence below contains one token that strongly activates this latent neuron, highlighted using << >>. "
        "Your task is to interpret what concept, theme, or category this neuron responds to. "
        "Be precise but not overly narrow. Use fewer than 20 words. "
        "Avoid punctuation, lists, formatting, or generic labels like 'words' or 'nouns'. "
        "Focus on shared semantic meaning across the highlighted tokens."
    )

    assistant_prompt = (
        "Start by mentally grouping the highlighted tokens into a conceptual category. "
        "Then, write a final interpretation in fewer than 20 words. "
        "This neuron activates on"
    )

    return {
        "system": system_prompt,
        "user": f"The activating examples are:\n\n{formatted}",
        "assistant": assistant_prompt,
    }


In [None]:
def get_gpt4o_explanation_from_prompt(prompts: dict, n_completions=3, max_tokens=100) -> list[str]:
    """
    Llama a la API de GPT-4o con un prompt ya generado y devuelve las interpretaciones.
    """
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": prompts["system"]},
            {"role": "user", "content": prompts["user"]},
            {"role": "assistant", "content": prompts["assistant"]},
        ],
        n=n_completions,
        max_tokens=max_tokens,
        temperature=0.7,
    )
    return [choice.message.content.strip() for choice in response.choices]


In [None]:
if API_KEY:
    simulated_examples = simulate_activating_examples_gpt(api_key=API_KEY, n=10)
    prompt = create_prompt_gpt4o_from_simulated(simulated_examples, use_chain_of_thought=True)
    explanations = get_gpt4o_explanation_from_prompt(prompt, n_completions=3)

    for i, exp in enumerate(explanations):
        print(f"[Explicación {i+1}]: {exp}")
else:
    print("OPENAI_API_KEY no está configurada.")