<a href="https://colab.research.google.com/github/markup1970/Multi-agentic-model/blob/main/Copia_di_May_2nd%2C_2025%2C_multi_LLM_Based_Multi_Agent_System_with_Born_Roles_and_Full_Seeding_multi_web_scraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ============================================================
# ==================== 📦 CELL 1 — SETUP AND INITIALIZATION ===
# ============================================================



import os
import datetime
import sys
import shutil

import google.generativeai as genai
%matplotlib inline
import importlib
import subprocess

from typing import Optional


# --- Utility for timestamped logging ---
def log(message):
    print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {message}")



## --- Install core dependencies only if missing ---
def ensure_pip(package, module_name=None):
    """
    Try to import `module_name` (or `package` if none); if ImportError, pip-install.
    """
    name = module_name or package
    try:
        importlib.import_module(name)
        log(f"📦 {package} already installed.")
    except ImportError:
        log(f"📦 Installing {package}…")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-U", package])

# ── End helper definition ─────────────────────────────────────
# Conditionally install only what’s missing:
ensure_pip("google-generativeai", "google.generativeai")
ensure_pip("openai")
ensure_pip("faiss-cpu", "faiss")
ensure_pip("sentence-transformers", "sentence_transformers")
ensure_pip("python-dotenv", "dotenv")


# ── end conditional installs ─────────────────────────

from dotenv import load_dotenv

# --- Load .env (for local environments) ---
load_dotenv()

# --- Universal Chrome + Selenium + Chromedriver Setup ---
def is_colab():
    return "google.colab" in sys.modules


def has_chrome():
    """
    Detect a Chrome/Chromium executable.
    Covers common binary names on Linux, macOS, and Windows.
    """
    return (
        shutil.which("google-chrome")          # Debian/Ubuntu
        or shutil.which("google-chrome-stable")# Alt Debian name
        or shutil.which("chrome")              # macOS Homebrew cask
        or shutil.which("chromium-browser")    # Chromium on Linux
        or shutil.which("chromium")            # Generic Chromium
    )


def has_chromedriver():
    return shutil.which("chromedriver")

def install_chrome_and_chromedriver():
    if is_colab():
        # --- Colab branch (unchanged from patch 1) ---
        print("📦 Installing Chrome and Chromedriver for Colab…")
        subprocess.check_call(["apt-get", "update", "-y"])
        subprocess.check_call(["apt-get", "install", "-y",
                               "chromium-browser", "chromium-chromedriver"])
        subprocess.check_call([sys.executable, "-m", "pip", "install", "selenium"])
        os.environ["PATH"] += ":/usr/bin"
    else:
        # --- LOCAL branch (laptop / server) ---
        print("📦 Installing Selenium and Chromedriver locally…")
        subprocess.check_call([sys.executable, "-m", "pip", "install",
                               "selenium", "chromedriver-autoinstaller"])

        # ▸ NEW safety check ▸ ensure a Chrome/Chromium binary exists
        if not has_chrome():
            try:
                # Quick install for Debian/Ubuntu users
                subprocess.check_call(["apt-get", "update", "-y"])
                subprocess.check_call(["apt-get", "install", "-y",
                                       "google-chrome-stable"])
                log("✅ Google‑Chrome installed automatically.")
            except Exception:
                log("⚠️  Chrome not installed automatically — "
                    "please install it manually for Selenium.")

        # Finally install/update the matching chromedriver
        import chromedriver_autoinstaller
        chromedriver_autoinstaller.install()


def ensure_chrome_and_chromedriver():
    if has_chrome() and has_chromedriver():
        print("✅ Chrome and Chromedriver are already installed.")
        return
    install_chrome_and_chromedriver()

# --- Ensure Chrome + Chromedriver are available ---
ensure_chrome_and_chromedriver()

# --- Unified API Key Loader (safe fallback included) ---
def get_api_key(key_name):
    api_key = None
    try:
        from google.colab import userdata
        try:
            api_key = userdata.get(key_name)
            if api_key:
                log(f"{key_name} loaded from Colab userdata.")
            else:
                log(f"{key_name} not found in Colab secrets.")
        except Exception as e:
            log(f"{key_name} not found in Colab secrets (exception handled).")
    except ImportError:
        pass  # Not in Colab

    if not api_key:
        api_key = os.getenv(key_name)
        if api_key:
            log(f"{key_name} loaded from environment variables or .env file.")

    return api_key

# --- Load API keys ---
google_api_key = get_api_key("GOOGLE_API_KEY")
openai_api_key = get_api_key("OPENAI_API_KEY")
claude_api_key = get_api_key("CLAUDE_API_KEY")
llama_api_key = get_api_key("LLAMA_API_KEY")

# --- Configurable model names and endpoints ---
GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL_NAME", "models/gemini-2.5-pro-preview-03-25")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME", "gpt-4o")
CLAUDE_MODEL_NAME = os.getenv("CLAUDE_MODEL_NAME", "anthropic/claude-3-sonnet-20240229")
LLAMA_MODEL_NAME = os.getenv("LLAMA_MODEL_NAME", "meta-llama-3-8b-instruct")

from openai import OpenAI   # v1.x SDK

def get_available_models():
    """Return dict {nick: client_or_model} without key overwrites."""
    available = {}

    # ── Google Gemini ──────────────────────────────────────────
    if google_api_key:
        try:
            genai.configure(api_key=google_api_key)
            available["gemini"] = genai.GenerativeModel(GEMINI_MODEL_NAME)
        except Exception as e:
            log(f"❌ Google Gemini init failed: {e}")

    # ── OpenAI GPT‑4o direct ──────────────────────────────────
    if openai_api_key:
        try:
            available["openai"] = OpenAI(api_key=openai_api_key)  # default base
        except Exception as e:
            log(f"❌ OpenAI init failed: {e}")

    # ── Claude via OpenRouter (example) ───────────────────────
    if claude_api_key:
        try:
            available["claude"] = OpenAI(
                api_key=claude_api_key,
                base_url="https://openrouter.ai/api/v1"
            )
        except Exception as e:
            log(f"❌ Claude init failed: {e}")

    # ── Llama via OpenRouter (example) ────────────────────────
    if llama_api_key:
        try:
            available["llama"] = OpenAI(
                api_key=llama_api_key,
                base_url="https://openrouter.ai/api/v1"
            )
        except Exception as e:
            log(f"❌ Llama init failed: {e}")

    return available


# Initialize available models


available_models = get_available_models()
log(f"Available models: {', '.join(available_models.keys())}")



def llm_generate(model_key: str, prompt: str, temperature: float = 1.0) -> str:
    client = available_models.get(model_key)
    if client is None:
        raise ValueError(f"No such model registered: {model_key!r}")

    # ── Google Gemini branch ────────────────────────────────────────────────

    if model_key == "gemini":
        # Preferred: the working generate_content method in your Colab
        try:
            # Only pass the prompt here
            resp = client.generate_content(prompt)
            return resp.text
        except TypeError:
            # Fallback to call_with_response or call (which do accept temperature)
            if hasattr(client, "call_with_response"):
                resp = client.call_with_response(
                    prompt=prompt,
                    temperature=temperature,
                    max_output_tokens=512,
                )
                return resp.candidates[0].output
            if hasattr(client, "call"):
                return client.call(
                    prompt=prompt,
                    temperature=temperature,
                    max_output_tokens=512,
                )
            raise RuntimeError("Gemini client doesn’t support any known generate methods.")

    # ── OpenAI GPT‑4o direct branch ───────────────────────────────────────
    if model_key == "openai":
        resp = client.chat.completions.create(
            model=OPENAI_MODEL_NAME,
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
        )
        return resp.choices[0].message.content

    # ── Claude via OpenRouter branch ─────────────────────────────────────
    if model_key == "claude":
        resp = client.chat.completions.create(
            model=CLAUDE_MODEL_NAME,
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
        )
        return resp.choices[0].message.content

    # ── Llama via OpenRouter branch ──────────────────────────────────────
    if model_key == "llama":
        resp = client.chat.completions.create(
            model=LLAMA_MODEL_NAME,
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
        )
        return resp.choices[0].message.content

    # ── Fallback ─────────────────────────────────────────────────────────
    raise RuntimeError(f"Don't know how to call model: {model_key!r}")




# --- Embedding model (for semantic similarity and task-role mapping) ---
from sentence_transformers import SentenceTransformer

EMB_NAME = os.getenv("EMBEDDING_MODEL_NAME", "all-MiniLM-L6-v2")

if "embedding_model" not in globals():
    # HF downloads go to ~/.cache/ by default – keep across VM reuse
    embedding_model = SentenceTransformer(EMB_NAME, cache_folder=os.path.expanduser("~/.cache"))
    log(f"✅ Embedding model '{EMB_NAME}' loaded.")
else:
    log("✅ Embedding model already in memory.")

# Embedding dimension for fallback zero-vectors
EMB_DIM = embedding_model.get_sentence_embedding_dimension()
log(f"ℹ️  Embedding dimension: {EMB_DIM}")





[2025-05-19 00:33:55] 📦 google-generativeai already installed.
[2025-05-19 00:33:59] 📦 openai already installed.
[2025-05-19 00:33:59] 📦 Installing faiss-cpu…
[2025-05-19 00:34:46] 📦 sentence-transformers already installed.
[2025-05-19 00:34:46] 📦 Installing python-dotenv…
📦 Installing Chrome and Chromedriver for Colab…
[2025-05-19 00:35:22] GOOGLE_API_KEY loaded from Colab userdata.
[2025-05-19 00:35:23] OPENAI_API_KEY not found in Colab secrets (exception handled).
[2025-05-19 00:35:23] CLAUDE_API_KEY not found in Colab secrets (exception handled).
[2025-05-19 00:35:23] LLAMA_API_KEY not found in Colab secrets (exception handled).
[2025-05-19 00:35:23] Available models: gemini


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

[2025-05-19 00:35:33] ✅ Embedding model 'all-MiniLM-L6-v2' loaded.
[2025-05-19 00:35:33] ℹ️  Embedding dimension: 384


In [None]:
# ============================================================
# ==================== 🧰 CELL 2 — UTILITY FUNCTIONS ==========
# ============================================================

import collections
import re


# --- General utilities (display and text processing) ---
from IPython.display import HTML
import html

import numpy as np

from sklearn.metrics.pairwise import cosine_similarity

# Global constants for task‐scoring bonuses
SCORE_BONUS_KEYWORDS = ["ai", "model", "data", "algorithm"]


def is_repetitive(text: str, fraction: float = 0.7) -> bool:
    """
    Returns True if a single character accounts for ≥ fraction of the string.
    Catches things like “aaaaaaa” or “!!!!!!!!!!
    """
    if not text:
        return False
    cnts = collections.Counter(text)
    most_common_freq = cnts.most_common(1)[0][1]
    return most_common_freq / len(text) >= fraction

def too_few_words(text: str, min_words: int = 3) -> bool:
    """
    Returns True if the text has fewer than min_words whitespace-separated tokens.
    Catches very short gibberish.
    """
    return len(text.split()) < min_words

def low_alnum_fraction(text: str, threshold: float = 0.5) -> bool:
    """
    Returns True if fewer than threshold fraction of characters are alphanumeric.
    Catches strings dominated by punctuation or symbols.
    """
    if not text:
        return False
    alnums = len(re.findall(r"\w", text))
    return (alnums / len(text)) < threshold


# --- Detect placeholder or blank output ---
def is_placeholder_output(output):
    """
    Check if output is blank or looks like a placeholder error.
    """
    return (not output.strip()) or "Gemini Error" in output

# --- Compute semantic similarity to detect meaningless outputs ---

def is_meaningless_output(prompt, output, threshold=0.4):
    """
    Decide if an LLM output is meaningless by:
      1) Heuristic checks on the raw string
      2) Embedding-based semantic similarity to the prompt
    Returns (is_meaningless: bool, similarity_score: float)
    """

    # ── 1) Heuristic bail-outs ───────────────────────────────────────────────
    # Empty or placeholder
    if not output.strip() or "Gemini Error" in output:
        return True, 0.0

    # Too repetitive?
    if is_repetitive(output):
        return True, 0.0

    # Too few words (e.g. "<5 words")?
    if too_few_words(output):
        return True, 0.0

    # Low fraction of alphanumeric chars?
    if low_alnum_fraction(output):
        return True, 0.0

    # ── 2) Embedding‐based semantic check ────────────────────────────────────
    try:
        prompt_vec = embedding_model.encode(prompt)
        output_vec = embedding_model.encode(output)

        # Normalize to numpy arrays
        if prompt_vec is None:
            prompt_vec = np.zeros(EMB_DIM)
        elif isinstance(prompt_vec, list):
            prompt_vec = np.array(prompt_vec)

        if output_vec is None:
            output_vec = np.zeros(EMB_DIM)
        elif isinstance(output_vec, list):
            output_vec = np.array(output_vec)

        sim = cosine_similarity([prompt_vec], [output_vec])[0][0]
        # If below your threshold, flag as meaningless
        return (sim < threshold), sim

    except Exception as e:
        print(f"⚠️ Embedding check failed (treating as meaningless): {e}")
        return True, 0.0



# ---------------------------------------------------------------------
# 📊  Physiology curves with mid‑interval adjustments
# --------------------------------------------------------------------

# k‑1 sample → at the very start (after the bump); k‑0.5 sample → immediately after drop‑out & pruning: later, just before act() returns, you add the final k sample.
import matplotlib.pyplot as plt


# ---------------------------------------------------------------------
# 📊  Physiology curves with length check + mid‑interval adjustments
# ---------------------------------------------------------------------
import matplotlib.pyplot as plt




def plot_agent_physiology(runner, *, agents=None, figsize=(14, 7)):
    """
    Draw attention / fatigue / hunger histories.
    If any history list is shorter than the others for an agent,
    it trims all three to the shortest length and emits a warning.
    X-axis ticks at each full interval (0, 1, 2, …).
    """
    agents = agents or runner.agents
    fig, axs = plt.subplots(3, 1, sharex=True, figsize=figsize)
    labels = ["Attention", "Fatigue", "Hunger"]


    for ag in agents:
        # 1) Detect mismatches and pick a common length
        lens = [
            len(ag.attention_history),
            len(ag.fatigue_history),
            len(ag.hunger_history),
        ]
        if len(set(lens)) > 1:
            min_len = min(lens)
            print(f"⚠️  {ag.name}: mismatched history lengths {lens}; trimming to {min_len} samples.")
        else:
            min_len = lens[0]

        # 2) Trim histories
        att = ag.attention_history[:min_len]
        fat = ag.fatigue_history[:min_len]
        hung = ag.hunger_history[:min_len]

        # 3) Build xs based on half‐interval samples: 0, 0.5, 1.0, …
        xs = [i * 0.5 for i in range(min_len)]


        # 4) Plot trimmed series
        axs[0].plot(xs, att,  marker="o", label=ag.name)
        axs[1].plot(xs, fat,  marker="o", label=ag.name)
        axs[2].plot(xs, hung, marker="o", label=ag.name)



    # Force x-axis ticks 0–10 and y-axis from 0–1 (physiology always ∈ [0,1])
    for ax, lab in zip(axs, labels):
        ax.set_title(lab)
        ax.set_xticks(range(0, 11))    # show intervals 0 through 10
        ax.set_xlim(0, 10)             # constrain x-axis to 0–10
        ax.set_ylim(0, 1)              # physiology scores normalized to [0,1]
        ax.set_xlabel("Interval #   (0 = start, … up to 10)")
        ax.grid(alpha=0.3)
        if ax.get_legend_handles_labels()[0]:
            ax.legend(loc="upper right")


    plt.tight_layout()
    plt.show()



In [None]:
# ============================================================
# ==================== 🧠 CELL 3 — ROLES, TASKS AND CAPABILITIES ===
# ============================================================

import random
import numpy as np

# --- Capabilities (always active internal functions) ---
CAPABILITY_DESCRIPTIONS = {
    "memory": "Store and retrieve previous outputs or mission paths in short- and long-term memory for reuse, similarity, or validation purposes.",
    "validation": "Assess outputs based on internal metrics (e.g., efficiency, meaningfulness) and enforce quality gates for meta-learning.",
    "foresight": "Predict scenario types (e.g., Black Swan, Tipping Point) based on prompt/output similarity and divergence from stored paths.",
    "logic": "Support consistent reasoning through entailment, deduction, and structural alignment.",
    "computation": "Perform symbolic or numeric calculations as needed to support analytical outputs.",
    "hardcoded_retrieval": "Use embedding-based similarity search within local memory or external/internal persistent db to support context-aware generation.",
    "triggerable_retrieval": "Use non-similarity search within local memory or external/internal persistent db",
    "embedding_similarity": "Use vector space similarity to compare prompts, tasks, or past outputs based on their semantic embeddings.",
    "llm_call": "Delegate sub-decisions or refinement tasks to a language model in-context during reasoning or generation.",
    "gated_inline_role_activation_mechanism": "Any role may  be activated on a second pass (suitable for a full digital-twin or simulation or a domain-specific need) only activated inline if their associated metric exceeds a threshold and they were not already selected via semantic similarity in the initial role selection."
}












ROLE_DESCRIPTIONS = {
    "Strategist":   "Analyze long-term risks and plan adaptive strategies.",
    "Scout":        "Retrieve evidence, search sources, and gather data.",
    "Executor":     "Summarize content, finalize decisions, and synthesize findings.Ensure completion of the intended task by applying summarization, formatting, or domain-specific finalization inline.",
    "Analyst":      "Verify facts, perform calculations, and assess logical validity.",
    "Builder":      "Design structured models, write code, optimize solutions, and assemble systems.",
    "Decomposer":   "Break down complex systems, reverse-engineer processes, and abstract key components.",
    "Verifier":     "Substantiate claims, provide evidence, and ground arguments in facts.",
    "Mathematician":"Compute results, formalize ideas mathematically, and solve equations.",
    "Logician":     "Apply deductive and inductive reasoning to structure arguments and rationalize decisions.",
    "Interpreter":  "Infer hidden meanings, clarify ambiguous information, and specify precise interpretations.",
    "Contextualizer": "Embed facts into relevant local or global context for clarity and relevance.",
    "Tester":       "Validate assumptions or outputs by testing behavior, reliability, or edge cases.",
    "Tracker":      "Follow evolving patterns, updates, or trajectories across time or datasets.",
    "Monitor":      "Audit systems or data streams for changes, drift, or anomalies.",
    "Translator":   "Convert information between different formats, languages, or conceptual systems.",
    "Forecaster":   "Model time-based developments, anticipate outcomes, and evaluate future scenarios.",
    "Planner":      "Formulate multi-step procedures, organize goal hierarchies, and structure action plans.",
    "Grapher":      "Extract and represent causal relationships or process flows using logical or visual structures.",
    "Statisticien": "Perform statistical inference, analyze quantitative patterns, and support deductive or abductive reasoning.",
    "Agent_Orchestrator": "Coordinate distributed agent behavior, manage task delegation, and align collaborative execution.",
    "Twin_Digitalizer": "Translate outputs into structured, executable representations such as causal graphs, workflows, or digital schematics.",
    "Simulator": "Run scenarios or test logic under controlled conditions to evaluate outcomes, consequences, or counterfactuals.",
    "Refiner": "Improve clarity, coherence, or quality of the output before it is finalized.",
    "Validator": "Assess the generated content for factual accuracy, logical consistency, or internal coherence during generation."
}


# --- Role → Task mapping ---
# A fixed reference declared once (e.g., in a config or constants cell). Typically during initialization (each time this cell is run). Static, but meta-learned priority and pruning apply → tasks get refreshed and pruned across rounds.
# It maps ideal/default tasks to each role.
# Does not change during execution.
# However, self.role_task_map — Dynamic clone is created from ROLE_TASK_MAP but mutated at runtime and reflects pruning, reactivation, and current task availability.It is used in actual role-task assignment logic.



ROLE_TASK_MAP = {
    "Strategist":     ["analyze", "synthesize", "plan"],
    "Scout":          ["retrieve", "search", "synthesize"],
    "Executor":       ["summarize", "finalize", "synthesize"],
    "Analyst":        ["analyze", "verify", "compute"],
    "Builder":        ["model", "code", "optimize", "assemble"],
    "Decomposer":     ["reverse-engineer", "abstractize", "decompose"],
    "Verifier":       ["substantiate", "ground", "fact-check"],
    "Mathematician":  ["compute", "mathematicize", "formalize"],
    "Logician":       ["deduct", "infer", "logicalize", "validate"],
    "Interpreter":    ["clarify", "infer", "disambiguate"],
    "Contextualizer": ["contextualize", "generalize", "specify"],
    "Tester":         ["test", "verify", "debug", "stress-test"],
    "Tracker":        ["track", "monitor", "forecast", "follow"],
    "Monitor":        ["monitor", "track", "verify", "audit"],
    "Translator":     ["translate", "clarify", "restructure"],
    "Forecaster":     ["forecast", "project", "model_trends"],
    "Planner":        ["sequence", "structure", "organize", "prioritize"],
    "Grapher":        ["extract_causality", "represent_relations", "build_graph"],
    "Statisticien":   ["infer", "estimate", "analyze_stats", "reason_quantitatively"],
    "Agent_Orchestrator": ["delegate", "coordinate", "synchronize", "align"],
    "Twin_Digitalizer": ["extract_structure", "encode_logic", "digitalize_output"],
    "Simulator": ["simulate", "test_outcomes", "explore_scenarios"],
    "Refiner": ["refine", "enhance", "improve"],
    "Validator": ["validate", "check_consistency", "fact_check_inline"]
}





POSSIBLE_ROLES = list(ROLE_DESCRIPTIONS.keys())


# --- Born role preferences ---
BORN_ROLES = {
    "Node_0": [],
    "Node_1": [],
    "Node_2": []
}

# --- Precompute role embeddings ---
if embedding_model is None:
    raise RuntimeError("embedding_model is not initialized. Please check SETUP (Cell 1).")



ROLE_EMBEDDINGS = {}

for role, description in ROLE_DESCRIPTIONS.items():
    embed = embedding_model.encode(description)  # OK

    # ✅ Normalize embedding
    if embed is None:
        embed = np.zeros(EMB_DIM)
    elif isinstance(embed, list):
        embed = np.array(embed)

    # ⚡ Normalize embedding for cosine similarity
    norm = np.linalg.norm(embed)
    if norm > 0:
        embed = embed / norm


    ROLE_EMBEDDINGS[role] = embed

# --- Validation ---
for role, embedding in ROLE_EMBEDDINGS.items():
    if embedding is None or (hasattr(embedding, "__len__") and len(embedding) == 0):
        raise ValueError(f"❌ Missing or empty embedding for role: {role}")

print("✅ All role embeddings initialized correctly.")

# Optional validation: every role's task must exist in TASK_DESCRIPTIONS
for role, tasks in ROLE_TASK_MAP.items():
    for task in tasks:
        if task not in TASK_DESCRIPTIONS:
            raise ValueError(f"🚨 Task '{task}' for role '{role}' is not defined in TASK_DESCRIPTIONS.")





# --- Task descriptions (semantic grounding) ---
TASK_DESCRIPTIONS = {
    "analyze": "Perform deep analysis of the content or data.",
    "external_retrieval": "Query external APIs, structured tools, or function-based services to fetch up-to-date or specialized information. Does not include web scraping.",
    "retrieve": "Fetch relevant information from stored memory or internal/external persistent databases using embedding similarity or non-similarity.",
    "synthesize": "Combine multiple inputs into a coherent output.",
    "summarize": "Produce a concise summary of the key points.",
    "compute": "Perform numerical or logical calculations.",
    "deduct": "Draw logical conclusions from given facts.",
    "infer": "Make inferences based on context and evidence.",
    "plan": "Lay out a step-by-step strategy or roadmap.",
    "model": "Build a structured model or simulation.",
    "code": "Write or generate executable code.",
    "verify": "Check facts or validate data against trusted sources.",
    "substantiate": "Provide evidence or references to support a claim.",
    "ground": "Anchor abstract reasoning in concrete examples or data.",
    "design": "Architect a system, workflow, or high-level structure.",
    "debug": "Identify and fix errors in code or logic.",
    "optimize": "Improve performance or efficiency of a solution.",
    "critique": "Evaluate an argument or design and suggest improvements.",
    "visualize": "Generate or describe a chart, diagram, or visual aid.",
    "reverse-engineer": "Deconstruct a system or process to understand its components.",
    "forecast": "Predict future trends or outcomes based on data.",
    "mathematicize": "Formalize an idea in mathematical terms or equations.",
    "logicalize": "Apply formal logic to structure arguments or proofs.",
    "argument": "Construct or analyze a persuasive argument.",
    "factualize": "Convert a statement into factual, evidence-based form.",
    "rationalize": "Explain the reasoning or logic behind a decision or idea.",
    "create": "Generate new content or ideas from scratch.",
    "idealize": "Envision the perfect or optimal version of something.",
    "abstractize": "Extract the underlying essence or abstraction of a concept.",
    "generalize": "Form broad principles or patterns from specific examples.",
    "specify": "Provide detailed, precise information or requirements.",
    "contextualize": "Place information or findings in an appropriate and meaningful context.",
    "test": "Evaluate accuracy, performance, or validity under defined conditions.",
    "track": "Follow the progression or change of elements over time.",
    "monitor": "Continuously observe data, systems, or activity for change or irregularities.",
    "translate": "Convert information between languages, formats, or conceptual domains.",
    "clarify": "Make complex, ambiguous, or vague information more understandable and precise.",
    "restructure": "Reorganize the format, structure, or sequence of information to improve logic, usability, or coherence.",
    "project": "Extend current patterns or trends into plausible future states.",
    "model_trends": "Represent multi-factor temporal dynamics using structured abstractions.",
    "sequence": "Organize actions or steps in the correct temporal or logical order.",
    "structure": "Lay out components in a coherent, functional configuration.",
    "organize": "Arrange information or elements to improve usability or clarity.",
    "prioritize": "Assign importance or urgency levels to tasks or elements.",
    "extract_causality": "Identify and isolate causal relationships between elements.",
    "represent_relations": "Format connections or dependencies in a logical or visual structure.",
    "build_graph": "Construct a structured representation of causality or process flow.",
    "estimate": "Approximate quantities or outcomes using partial or statistical information.",
    "analyze_stats": "Interpret numerical trends, distributions, or correlations.",
    "reason_quantitatively": "Apply statistical or numerical reasoning to draw conclusions.",
    "delegate": "Assign responsibilities or subtasks to appropriate agents or subcomponents.",
    "coordinate": "Align timing, responsibilities, and dependencies across components or agents.",
    "synchronize": "Ensure concurrent elements act in harmony or follow a shared schedule.",
    "align": "Bring tasks, outputs, or strategies into logical and goal-consistent order.",
    "extract_structure": "Identify and extract underlying structure, logic, or dependencies from the output.",
    "encode_logic": "Convert implicit reasoning into a structured, machine-interpretable form.",
    "digitalize_output": "Transform qualitative content into a digital twin, graph, or schema.",
    "simulate": "Execute a scenario or logic structure under defined parameters to observe results.",
    "test_outcomes": "Assess results or consequences by running defined simulations or trials.",
    "explore_scenarios": "Investigate alternative outcomes through structured simulation or branching logic."
}

}




# --- Final lists regenerated ---
POSSIBLE_TASKS = list(TASK_DESCRIPTIONS.keys())

# --- Precompute task embeddings (SAFE with np.array fallback) ---
TASK_EMBEDDINGS = {}

for task, desc in TASK_DESCRIPTIONS.items():
    embed = embedding_model.encode(desc)     # OK

    if embed is None:
        embed = np.zeros(EMB_DIM)
    elif isinstance(embed, list):
        embed = np.array(embed)

    norm = np.linalg.norm(embed)
    if norm > 0:
        embed = embed / norm

    TASK_EMBEDDINGS[task] = embed








METRIC_DESCRIPTIONS = {
    "accuracy": "The correctness and precision of the output.",
    "coverage": "How completely the solution addresses all requirements.",
    "coherence": "Logical consistency and connectedness of the answer.",
    "novelty": "Degree of originality or innovation.",
    "speed": "How quickly the solution or output is produced.",
    "resource_usage": "Efficiency in the use of time or compute resources.",
    "factuality": "Groundedness in established facts or data.",
    "strategic_alignment": "Alignment with long-term goals or strategies.",
    "relevance": "Appropriateness to the prompt or objectives.",
    "temporal_dependency_score": "The extent to which the output reasoning depends on time-sequenced events or time-based causality (e.g., forecasts, policy timelines).",
    "trend_complexity": "Intricacy of multi-factor trends described.",
    "factual_conflictuality": "Presence of factual tension or contradiction within the output.",
    "entailment_discrepancy": "How well the output logically follows from the prompt or retrieved grounding context.",
    "causal_complexity": "Number and depth of causal relations (chains, branches) within the output.",
    "inferential_depth": "Presence of layered inference steps (abduction, deduction, induction) required or performed.",
    "numerical_volatility": "Detects unstable or non-linear numeric behavior in reasoning (e.g., switching units, wide ranges, or poorly justified estimates).",
    "mission_chain_depth": "The number of conceptual or subgoal layers in the mission prompt itself.",
    "action_sequence_complexity": "Evaluates how elaborate or ordered the required actions or procedural steps are in the output.",
    "digitalizability": "How well an output (or prompt) can be converted into a structured, machine-interpretable representation.",
    "simulation_utility": "Estimates the potential value or feasibility of simulating the output to test outcomes, run scenarios, or evaluate consequences."
}

POSSIBLE_METRICS = list(METRIC_DESCRIPTIONS.keys())

# --- Precompute metric embeddings (SAFE with np.array fallback) ---
METRIC_EMBEDDINGS = {}
for metric, desc in METRIC_DESCRIPTIONS.items():
    emb = embedding_model.encode(desc)
    if emb is None:
        emb = np.zeros(EMB_DIM)
    elif isinstance(emb, list):
        emb = np.array(emb)
    norm = np.linalg.norm(emb)
    if norm > 0:
        emb = emb / norm
    METRIC_EMBEDDINGS[metric] = emb

# Validate
for metric, emb in METRIC_EMBEDDINGS.items():
    if emb is None or (hasattr(emb, "__len__") and len(emb) == 0):
        raise ValueError(f"❌ Missing or empty embedding for metric: {metric}")
print("✅ All metric embeddings initialized correctly.")


# Static, used only for inline role activation via metric thresholds → not refreshed or pruned, just checked at runtime. Used for gating only.
ROLE_METRIC_MAP = {
    "Grapher":            "causal_complexity",
    "Statisticien":       "inferential_depth",
    "Twin_Digitalizer":   "digitalizability",
    "Simulator":          "simulation_utility",
    "Strategist":         "strategic_depth",
    "Planner":            "planning_span",
    "Analyst":            "analytic_load",
    "Interpreter":        "ambiguity_level",
    "Verifier":           "grounding_gap",
    "Forecaster":         "predictive_uncertainty",
    "Decomposer":         "problem_entropy",
    "Agent_Orchestrator": "coordination_cost"
}












FEATURE_DESCRIPTIONS = {
    "external_retrieval": "Query external APIs, structured tools, or function-based services to fetch up-to-date or specialized information. Does not include web scraping.",
    "web_scraping":          "Access live, current, or real-time data from the web.",
    "knowledge_graph_query": "Query structured knowledge or relationships between entities.",
    "code_execution":        "Write or execute code, implement functions, or provide scripts.",
    "sentiment_analysis":    "Analyze sentiment, feedback, or opinions in the text.",
    "entity_extraction":     "Extract names, entities, or lists from the content.",
    "data_query":            "Query numeric data, statistics, tables, or perform calculations.",
    "validation":            "Verify, validate, or fact-check the given information.",
    "summarization":         "Summarize, brief, or condense the provided content.",
    "groundedness_check": "Use factuality scoring, entailment, or statistical verification to validate whether output is consistent with real-world references.",
    "inline_refinement": "Enable inline refinement of output during initial generation based on internal thresholds, avoiding a second LLM call."
}

FEATURE_EMBEDDINGS = compute_feature_description_embeddings(embedding_model, FEATURE_DESCRIPTIONS, EMB_DIM)


def compute_feature_description_embeddings(embedding_model, descriptions=FEATURE_DESCRIPTIONS, emb_dim=384):
    embeddings = {}
    for key, desc in descriptions.items():
        embed = embedding_model.encode(desc)
        if embed is None:
            embed = np.zeros(emb_dim)
        elif isinstance(embed, list):
            embed = np.array(embed)
        norm = np.linalg.norm(embed)
        if norm > 0:
            embed = embed / norm
        embeddings[key] = embed
    return embeddings




def compute_uvr_status(values, agent):
    min_window = agent.meta_parameters.get("uvr_min_window", 5)
    inflection_threshold = agent.meta_parameters.get("uvr_inflection_ratio", 1.5)

    if len(values) < 2 * min_window:
        return False, 0.0, 0.0, 1.0

    recent = values[-min_window:]
    prior = values[-2 * min_window:-min_window]

    recent_var = np.var(recent)
    prior_var = np.var(prior)
    var_ratio = (recent_var / (prior_var + 1e-8)) if prior_var > 0 else float("inf")

    uvr_triggered = var_ratio > inflection_threshold
    return uvr_triggered, recent_var, prior_var, var_ratio


# Pre-output born roles and semantic similarity-based role assignment at initizliaztion: no meta-parameters.
# Pre-output inline role injection (first pass)=default inline role prompt shaping (at every round). Role's activation probability or meta-weight is boosted if the role is among born roles. Meta-parameters influence the LLM's first generation and need to be tightly coupled to prompt shaping and generation dynamics.
# Post-output rerouting triggered via fallback logic only if validation or metric thresholds fail (second pass). Meta-parameters here should mirror those used for the pre-output inline roles, or at least include similar attributes.
# Post-output inline roles via metric-triggered activation never cause rerouting if the first output is valid (no second pass, just metric-based tagging). Used for tagging, scoring, or internal logging. Meta-parameters here are more policy-based (e.g., thresholds for tagging) and don’t control LLM behavior. These don’t need temperature/top-p.
# The post-output inline roles (used for tagging) are the core of the gating mechanism that supplements or overrides the semantic similarity–based meaningfulness score.
# Tagging roles do not alter output. They log that they would have been useful
# Annotation-based modification roles do not call LLM as the tagging ones but alter outputs. Some roles can do both tagging and modification.
# True Rewriting (Structured Post-Processing) roles do not involve LLM call, but structurally change output.
INLINE_ROLE_BEHAVIOR = {
    # ──────────────── Pre-output inline roles (shape LLM generation) ────────────────
    "Refiner": {
        "temperature": 0.2,
        "top_p": 0.9,
        "reroutable": True,
        "pre_output": True,
        "post_output": False,
    },
    "Validator": {
        "temperature": 0.3,
        "top_p": 0.8,
        "reroutable": True,
        "pre_output": True,
        "post_output": True,  # Also used in metric tagging
    },
    "Executor": {
        "temperature": 0.1,
        "top_p": 0.95,
        "reroutable": True,
        "pre_output": True,
        "post_output": True,
    },

    # ──────────────── Post-output only (no prompt shaping) ────────────────
    "Simulator": {
        "pre_output": False,
        "post_output": True,
        "reroutable": False,
    },
    "Digitalizer": {
        "pre_output": False,
        "post_output": True,
        "reroutable": False,
    },
    "Statisticien": {
        "pre_output": False,
        "post_output": True,
        "reroutable": False,
    },
    "Grapher": {
        "pre_output": False,
        "post_output": True,
        "reroutable": False,
    }
}









✅ All role embeddings initialized correctly.


In [None]:
# ============================================================
# ==================== 🧠 CELL 4 — PROMPT EMBEDDING CACHE =====
# ============================================================

import numpy as np
# Global cache for prompt embeddings to avoid recomputation
PROMPT_EMBED_CACHE = {}

def get_task_embedding(task: str) -> np.ndarray:
    """
    Retrieve the embedding for a known task, or return a zero-vector if missing.
    """
    # TASK_EMBEDDINGS is your dict mapping task names to np.ndarray embeddings
    return TASK_EMBEDDINGS.get(task, np.zeros(EMB_DIM))

def get_prompt_embedding(prompt):
    """
    Return cached embedding for a normalized prompt, or compute and store it.
    Normalization avoids duplicate cache entries for similar text.
    Ensures that the returned embedding is always a numpy array.
    """
    normalized_prompt = prompt.strip().lower()

    if normalized_prompt not in PROMPT_EMBED_CACHE:
        embedding = embedding_model.encode(normalized_prompt)   # OK

        # ✅ Normalize the embedding → always numpy array
        if embedding is None:
            embedding = np.zeros(EMB_DIM)
        elif isinstance(embedding, list):
            embedding = np.array(embedding)

        PROMPT_EMBED_CACHE[normalized_prompt] = embedding

    return PROMPT_EMBED_CACHE[normalized_prompt]


# Create the FAISS index for graph embeddings
graph_index = faiss.IndexFlatL2(GRAPH_EMB_DIM)
graph_key_map = {}  # To map FAISS index positions to mission keys

def find_similar_graphs(graph_embedding, top_k=3):
    """
    Retrieve the most similar causal graphs from FAISS based on graph embedding.
    Returns a list of (prompt_key, distance) pairs.
    """
    vec = np.array(graph_embedding).astype("float32").reshape(1, -1)
    if graph_index.ntotal == 0:
        return []

    D, I = graph_index.search(vec, top_k)
    return [
        (graph_key_map.get(i, "UNKNOWN_KEY"), D[0][j])
        for j, i in enumerate(I[0]) if i < len(graph_key_map)
    ]




In [None]:
# ============================================================
# ==================== 🌐 CELL 5 — WEB SCRAPING + UTILITIES ===
# ============================================================

import requests
from bs4 import BeautifulSoup
import time
from math import exp
from typing import List
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

# --- Utility sigmoid (optional in scoring) ---
def sigmoid(x: float) -> float:
    return 1 / (1 + exp(-x))

# --- Per-engine Selenium scrapers ------------------------------------------

def scrape_google_with_selenium(query: str, max_chars: int = 1000) -> str:
    """Return concatenated snippets from Google search results."""
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--window-size=1920,1080")
    options.add_argument("--user-agent=Mozilla/5.0")
    driver = webdriver.Chrome(options=options)
    try:
        url = f"https://www.google.com/search?q={query.replace(' ', '+')}"
        driver.get(url)
        time.sleep(2)
        els = driver.find_elements(By.CSS_SELECTOR, "div.MjjYud div.VwiC3b")
        text = " ".join(el.text for el in els if el.text.strip())
        return text[:max_chars] if text else "No Google web results found."
    except Exception as e:
        return f"⚠️ Google scraping failed: {e}"
    finally:
        driver.quit()

def scrape_bing_with_selenium(query: str, max_chars: int = 1000) -> str:
    """Return concatenated snippets from Bing search results."""
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--window-size=1920,1080")
    options.add_argument("--user-agent=Mozilla/5.0")
    driver = webdriver.Chrome(options=options)
    try:
        url = f"https://www.bing.com/search?q={query.replace(' ', '+')}"
        driver.get(url)
        time.sleep(2)
        els = driver.find_elements(By.CSS_SELECTOR, "div.b_caption p")
        text = " ".join(el.text for el in els if el.text.strip())
        return text[:max_chars] if text else "No Bing web results found."
    except Exception as e:
        return f"⚠️ Bing scraping failed: {e}"
    finally:
        driver.quit()

def scrape_baidu_with_selenium(query: str, max_chars: int = 1000) -> str:
    """Return concatenated snippets from Baidu search results."""
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--window-size=1920,1080")
    driver = webdriver.Chrome(options=options)
    try:
        url = f"https://www.baidu.com/s?wd={query.replace(' ', '+')}"
        driver.get(url)
        time.sleep(2)
        els = driver.find_elements(By.CSS_SELECTOR, "div.result")
        text = " ".join(el.text for el in els if el.text.strip())
        return text[:max_chars] if text else "No Baidu web results found."
    except Exception as e:
        return f"⚠️ Baidu scraping failed: {e}"
    finally:
        driver.quit()

# --- Semantic‐filtering stub -----------------------------------------------

def semantic_filter(
    snippets: List[str],
    query: str,
    top_k: int,
    similarity_threshold: float
) -> List[str]:
    """
    Keep up to `top_k` snippets whose embedding cosine similarity to `query`
    is ≥ `similarity_threshold`, ranked from most to least similar.
    """

    if not snippets:
        return []

    # ── Encode query ───────────────────────────────────────────────────────
    query_vec = embedding_model.encode(query)
    if query_vec is None:
        dim = embedding_model.get_sentence_embedding_dimension()
        query_vec = np.zeros(dim)
    elif isinstance(query_vec, list):
        query_vec = np.array(query_vec)

    # ── Encode snippets (batch) ────────────────────────────────────────────
    snip_vecs = embedding_model.encode(snippets, convert_to_numpy=True)
    if snip_vecs is None:
        dim = query_vec.shape[0]
        snip_vecs = np.zeros((len(snippets), dim))
    elif isinstance(snip_vecs, list):
        snip_vecs = np.array(snip_vecs)

    # ── Compute cosine similarities ───────────────────────────────────────
    sims = cosine_similarity([query_vec], snip_vecs)[0]

    # ── Filter + rank ─────────────────────────────────────────────────────
    filtered = [(s, sc) for s, sc in zip(snippets, sims) if sc >= similarity_threshold]
    filtered.sort(key=lambda x: x[1], reverse=True)

    # ── Return top_k snippets only ────────────────────────────────────────
    return [s for s, _ in filtered[:top_k]]



# --- Unified wrapper with semantic filtering ------------------------------
def web_scrape(
    query: str,
    engine: str            = "google",
    max_chars: int         = 1000,
    top_k: int             = 5,
    similarity_threshold: float = 0.3
) -> List[str]:
    """
    1) Run Selenium scrape on chosen engine.
    2) Split the raw text into individual snippets.
    3) Pass through semantic_filter() to pick top_k relevant results.
    """
    if engine == "google":
        raw = scrape_google_with_selenium(query, max_chars)
    elif engine == "bing":
        raw = scrape_bing_with_selenium(query, max_chars)
    elif engine == "baidu":
        raw = scrape_baidu_with_selenium(query, max_chars)
    else:
        raise ValueError(f"Unsupported search engine: {engine}")

    # turn the returned string into a list of candidate snippets
    lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
    # further split any single‐line bullets
    snippets: List[str] = []
    for ln in lines:
        parts = [p.strip() for p in ln.split("·") if p.strip()]
        snippets.extend(parts)

    # finally apply semantic filtering
    return semantic_filter(snippets, query, top_k, similarity_threshold)







In [None]:
# ============================================================
# ==================== 🧪 CELL 6 — TEST WEB SCRAPING =========
# ============================================================

# Make sure you’ve already run CELL 5 so web_scrape() is defined.

def test_scraping_semantic(
    query: str,
    engines: list[str] = ["google", "bing", "baidu"],
    top_k: int = 3,
    similarity_threshold: float = 0.4
):
    for engine in engines:
        print(f"\n🔎 Testing {engine.capitalize()} for “{query}” (top_k={top_k}, sim_thresh={similarity_threshold})…")
        try:
            hits = web_scrape(
                query,
                engine=engine,
                max_chars=1000,
                top_k=top_k,
                similarity_threshold=similarity_threshold
            )
        except Exception as e:
            print(f"⚠️ Error calling web_scrape: {e}")
            continue

        if not hits:
            print(" - No snippets returned.")
        else:
            for idx, snippet in enumerate(hits, start=1):
                print(f" {idx}. {snippet}")

# Run the semantic test
test_scraping_semantic("Artificial Intelligence")







🔎 Testing Google for “Artificial Intelligence” (top_k=3, sim_thresh=0.4)…
 - No snippets returned.

🔎 Testing Bing for “Artificial Intelligence” (top_k=3, sim_thresh=0.4)…
 - No snippets returned.

🔎 Testing Baidu for “Artificial Intelligence” (top_k=3, sim_thresh=0.4)…
 1. 百度百科 artificial intelligence是什么意思_artificial intellige...
 2. 金山词霸 artificial-intelligence - 搜索 词典


In [None]:
# ============================================================
# ==================== 🤖 CELL 7 — STRATEGIC AGENT =====
# ============================================================
import random
import time
import math               # ← new
import difflib            # ← new
import numpy as np        # already in globals but explicit here is safer
from IPython.display import display, Markdown  # ← for highlight_text_differences
from scipy import stats
from collections import Counter
import networkx as nx
from collections import deque



# =======================
# 7.1. **Initialization and Setup**
# =======================
class StrategicAgent:


    def __init__(
        self,
        name,
        model_key,
        born_roles=None,
        born_metrics=None,
        runner=None
    ):
        self.name = name
        self.model_key = model_key



        # Model selection
        if model_key == "openai":
            self.model_name = OPENAI_MODEL_NAME
        elif model_key == "gemini":
            self.model_name = GEMINI_MODEL_NAME
        elif model_key == "claude":
            self.model_name = CLAUDE_MODEL_NAME
        elif model_key == "llama":
            self.model_name = LLAMA_MODEL_NAME
        else:
            raise ValueError(f"Unknown model_key {model_key!r}; cannot set self.model_name")

        self.born_roles = born_roles[:] if born_roles else []
        self.roles = born_roles[:] if born_roles else []
        self.roles_history = [self.roles[:]]
        self.purge_log = []
        self.model = available_models[model_key]
        self.runner = runner
        self.task_store = {
            key: {
                "key": key,
                "desc": TASK_DESCRIPTIONS[key],
                "embedding": None,
                "last_score": 0.0
            }
            for key in TASK_DESCRIPTIONS
        }
        self.born_metrics = born_metrics[:] if born_metrics else []
        self.metrics = born_metrics[:] if born_metrics else []
        self.metrics_history = [self.metrics[:]]  # (Optional, for tracking each round)


        # Compute born_tasks from born_roles
        self.born_tasks = list(set(
            t for r in self.born_roles for t in ROLE_TASK_MAP.get(r, [])
        ))
        self.tasks = self.born_tasks[:]  # Initialize with full set (can be pruned later)
        self.tasks_history = [self.tasks[:]]  # Optional: track task assignment over rounds

        # Spike memory and logging
        self.signal_spike_log = deque(maxlen=100)  # Store recent high-signal events
        self.signal_clusters = []



        # --- Physiology tracking ---
        self.attention = 1.0
        self.fatigue = 0.0
        self.hunger = 0.0
        self.attention_history = []
        self.fatigue_history = []
        self.hunger_history = []
        self._phys_log_current = []

        # --- Validation & time tracking ---
        self.local_time_log = []
        self.local_interval_log = []
        self.local_round = 1
        self.interval = 1
        self.local_mission_time_log = []

        # --- Recovery/stagnation tracking ---
        self.stagnation_counter = 0


        self.features = []

        self.available_models = available_models
        self.short_memory = []
        self.long_memory = {}
        self.usage_count = 0
        self.in_cooperation = False
        self.meaningless_output_counter = 0
        self.external_access_count = 0
        self.enable_web_scraping = True
        self.start_time = time.time()
        # 🆕 Reroute safeguard flag
        self.has_been_rerouted = False




        # Dual-layer variance tracking for foresight reactivation
        self.variance_history = {
            "uvr_similarity_short": deque(maxlen=5),      # short-term rolling window
            "uvr_similarity_reference": deque(maxlen=10), # stable past window
            "reactivation_flag": False                    # dynamic gate
        }

        # --- Unified meta-parameters (short-term & long-term) ---
        self.meta_parameters = {
            # --- Structure: roles, pruning, dropout, reactivation ---
            "role_assignment_threshold": 0.7,
            "role_threshold_range": (0.4, 0.9),
            "pruning_threshold": 0.2,
            "pruning_threshold_range": (0.2, 0.8),
            "pruning_decay_rate": 3.0,
            "pruning_decay_range": (1.0, 5.0),
            "pruning_reactivation_prob": 0.1,
            "pruning_reactivation_range": (0.05, 0.3),
            "dropout_base_probability": 0.1,
            "dropout_base_prob_range": (0.1, 0.5),
            "dropout_decay_rate": 1.5,
            "dropout_decay_range": (1.0, 5.0),

            # --- Task and role selection ---
            "max_roles": 3,
            "max_roles_range": (1, 5),
            "max_tasks": 5,
            "max_tasks_range": (1, 7),
            "top_k_tasks": 2,
            "top_k_tasks_range": (1, 5),
            "task_threshold": 0.4,
            "task_threshold_range": (0.2, 0.7),
            "task_similarity_threshold": 0.4,
            "task_similarity_range": (0.2, 0.8),
            "reuse_similarity_threshold": 0.75,
            "reuse_similarity_range": (0.6, 0.9),

            # Metric selection
            "metric_threshold_range": (0.3, 0.8),
            "metric_selection_threshold": 0.5,
            "metric_threshold_lr": 0.01,
            "top_k_metrics": 3

            "max_metrics": 3,
            "max_metrics_range": (1, 5),
            "top_k_metrics_range": (1, 5),
            "metric_similarity_threshold": 0.4,
            "metric_similarity_range": (0.2, 0.8),


            "entropy_expected": 0.65,
            "entropy_margin": 0.15,
            "entropy_softening_factor": 0.05,

            # --- Jollycard & strategic exploration ---
            "jollycard_injection_weight": 0.5,
            "jollycard_injection_weight_range": (0.1, 0.9),
            "jollycard_importance_threshold": 0.3,
            "jollycard_importance_threshold_range": (0.2, 0.7),
            "jollycard_sampling_temperature": 1.0,
            "jollycard_sampling_temperature_range": (0.5, 2.0),

            # --- Noise, randomness, learning ---
            "score_noise_min": 0.01,
            "score_noise_max": 0.2,
            "score_noise_min_floor": 0.001,
            "score_noise_max_ceiling": 0.3,
            "score_noise_lr": 0.01,
            "lr_efficiency_weight": 0.4,
            "lr_meaningfulness_weight": 0.4,
            "lr_decay_rate": 0.05,
            "lr_min": 0.001,
            "lr_max": 0.05,
            "lr_min_range": (0.0005, 0.005),
            "lr_max_range": (0.02, 0.1),

            # --- Decay, stagnation, and misc. ---
            "stagnation_recovery_threshold": 5,
            "lambda_time": 0.5,
            "lambda_time_range": (0.1, 1.0),
            "lambda_usage": 0.3,
            "lambda_usage_range": (0.1, 0.8),
            "external_decay_time_weight": 0.05,
            "external_decay_usage_weight": 1.0,
            "external_decay_time_range": (0.01, 0.2),
            "external_decay_usage_range": (0.1, 2.0),
            "external_decay_score_threshold": 0.2,
            "external_decay_score_range": (0.05, 0.5),

            # --- Physiology meta-parameters ---
            "attention_time_weight": 0.05,
            "attention_fatigue_weight": 0.05,
            "attention_hunger_weight": 0.05,
            "attention_threshold": 0.3,
            "fatigue_task_weight": 0.15,
            "fatigue_time_weight": 0.03,
            "dropout_time_weight": 0.05,
            "hunger_usage_weight": 0.1,
            "dropout_attention_weight": 0.1,
            "dropout_fatigue_weight": 0.1,
            "dropout_hunger_weight": 0.1,
            "pruning_attention_weight": 0.2,
            "pruning_fatigue_weight": 0.2,
            "pruning_hunger_weight": 0.2,
            "external_call_base_prob": 0.4,
            "external_attention_weight": 0.2,
            "external_fatigue_weight": 0.2,
            "external_hunger_weight": 0.2,

            # --- Short-term meta-parameters (merged here) ---
            "task_feature_coupling": 0.5,
            "strategy_fit": 0.5,
            "cooperation_bias": 0.5,
            "cooperation_baseline": 0.5,
            "cooperation_baseline_range": (0.2, 0.8),
            "cooperation_randomness_range": (0.05, 0.3),



            "uvr_weight_prompt": 0.2,          # weight of prompt similarity in UVR
            "uvr_weight_output": 0.2,          # weight of output similarity in UVR
            "uvr_weight_graph":  0.2,          # weight of graph similarity in UVR
            "uvr_weight_path":   0.2,          # weight of path similarity in UVR
            "uvr_weight_physio": 0.2,          # weight of physiological similarity in UVR
            "uvr_volatility_threshold": 0.1,   # total volatility needed to trigger short-term UVR adjustment
            "uvr_min_window": 5,               # rolling window size for computing variance
            "uvr_inflection_ratio": 1.5,        # how much recent variance must exceed prior to trigger UVR


            "uvr_weight_prompt":  0.2,
            "uvr_weight_output":  0.2,
            "uvr_weight_graph":   0.2,
            "uvr_weight_path":    0.2,
            "uvr_weight_physio":  0.2,


            # ─── Pre-Output Inline Role Parameters ───────────────────────────────

            # Temperature and top-p for prompt shaping during first LLM call
            "refiner_temperature": 0.2,
            "refiner_top_p": 0.9,

            "validator_temperature": 0.3,
            "validator_top_p": 0.8,

            "executor_temperature": 0.1,
            "executor_top_p": 0.95,

            # Meta-weights for pre-output roles (used in activation decision)
            "refiner_weight": 1.0,
            "validator_weight": 1.0,
            "executor_weight": 1.0,

            # Optional: Boost if role is also a born role (pre-output only)
            "born_role_boost": 0.2,                        # ← applies to pre-output weights
            "inline_role_weight_boost_if_born": 1.2,      # ← alternate label (keep one)

            # ─── Post-Output Inline Role Thresholds (Tagging & Gating) ───────────

            "validation_threshold": 0.75,         # trigger Validator tag
            "refinement_trigger_score": 0.6,      # trigger Refiner tag
            "inline_rerouting_threshold": 0.6,    # reroute only if fallback needed

            # Meta-weights for post-output taggable roles (used in utility or memory tagging)
            "simulator_weight": 1.0,
            "digitalizer_weight": 1.0,
            "grapher_weight": 1.0,
            "statistician_weight": 1.0,
            "strategist_weight": 1.0,
            "analyst_weight": 1.0,
            "builder_weight": 1.0,
            "scout_weight": 1.0,





            "uvr_min_window": 5,
            "uvr_inflection_threshold": 1.5,
        }




        # --- Static task pool per role ---
        self.role_task_map = {}
        for role in self.born_roles:
            tasks = self.get_task_candidates_for_roles(
                roles=[role],
                prompt=getattr(self, 'last_mission_prompt', None),
                top_k=self.meta_parameters.get("max_tasks", 5)
            )
            self.role_task_map[role] = tasks


    def inject_inline_roles_into_prompt(self, base_prompt):
        """
        Inject pre-output inline roles (Refiner, Validator, Executor) into the LLM prompt
        based on meta-learned weights and born-role boosting.
        """
        inline_roles_to_inject = []

        for role in ["Refiner", "Validator", "Executor"]:
            if self.should_inject_inline_role(role):
                inline_roles_to_inject.append(role)

        if not inline_roles_to_inject:
            return base_prompt  # No changes

        # Build role-shaping instructions
        role_instructions = []
        for role in inline_roles_to_inject:
            behavior = INLINE_ROLE_BEHAVIOR.get(role, {})
            temperature = self.meta_parameters.get(f"{role.lower()}_temperature", behavior.get("temperature", 0.7))
            top_p = self.meta_parameters.get(f"{role.lower()}_top_p", behavior.get("top_p", 0.9))

            instruction = (
                f"\n\n---\n"
                f"🔧 Role: {role}\n"
                f"• Style Guidance: Behave as a {role.lower()}.\n"
                f"• Generation Parameters: temperature={temperature}, top_p={top_p}\n"
                f"---\n"
            )
            role_instructions.append(instruction)

        # Prepend to the original prompt
        return "".join(role_instructions) + base_prompt





    def get_effective_meta_weight(self, role_name, base_weight_key, boost_if_born=1.2):
        """
        Compute the effective meta-weight for an inline role.
        - `role_name`: The inline role being evaluated (e.g., "Refiner").
        - `base_weight_key`: The meta-parameter key for this role (e.g., "refiner_weight").
        - `boost_if_born`: Optional multiplicative boost if the role is a born role.
        This utility applies only to pre-output inline roles that influence the first LLM generation. It uses: meta_parameters["<role>_weight"] and a boost if the role is also a born role.
        """
        # Get the base weight (meta-learned or default)
        base_weight = self.meta_parameters.get(base_weight_key, 1.0)

        # Apply boost if this role is a born role
        if role_name in self.born_roles:
            return base_weight * boost_if_born
        return base_weight


    def should_inject_inline_role(self, role_name):
        """
        Decide whether to inject a pre-output inline role (e.g., Refiner, Validator, Executor)
        based on its effective meta-weight.

        Uses:
        - meta_parameters["<role>_weight"]
        - meta_parameters["inline_role_weight_boost_if_born"]
        """
        base_key = f"{role_name.lower()}_weight"
        boost_if_born = self.meta_parameters.get("inline_role_weight_boost_if_born", 1.2)

        effective_weight = self.get_effective_meta_weight(role_name, base_key, boost_if_born=boost_if_born)

        # Optional threshold (can be dynamic later)
        return effective_weight > 0.5


    def load_long_memory(self, path="long_memory.json"):
        try:
            with open(path, "r") as f:
                raw = json.load(f)

            # Extract meta section if present
            meta = raw.pop("__meta__", {})

            # Restore long memory entries
            restored = {}
            for k, v in raw.items():
                entry = dict(v)
                if isinstance(entry.get("graph_embedding"), list):
                    entry["graph_embedding"] = np.array(entry["graph_embedding"])
                restored[k] = entry
            self.long_memory = restored

            # Restore output_clusters
            self.output_clusters = []
            for cluster in meta.get("output_clusters", []):
                restored_cluster = {
                    "members": cluster.get("members", []),
                    "centroid": np.array(cluster["centroid"]) if isinstance(cluster["centroid"], list) else cluster["centroid"],
                    "prompt_key": cluster.get("prompt_key"),
                    "task": cluster.get("task"),
                    "usage_count": cluster.get("usage_count", 1),
                    "best_output": cluster.get("best_output"),
                    "best_score": cluster.get("best_score")
                }
                self.output_clusters.append(restored_cluster)

            print(f"✅ {self.name} loaded long memory ({len(restored)} entries + {len(self.output_clusters)} output clusters) ← {path}")
        except FileNotFoundError:
            print(f"⚠️ {self.name} no existing long memory at {path} — starting fresh.")
            self.long_memory = {}
            self.output_clusters = []
        except Exception as e:
            print(f"⚠️ {self.name} failed to load long memory: {e}")





    def save_long_memory(self, path="long_memory.json"):
        try:
            # Serialize long memory entries
            serializable = {}
            for k, v in self.long_memory.items():
                entry = dict(v)
                if isinstance(entry.get("graph_embedding"), np.ndarray):
                    entry["graph_embedding"] = entry["graph_embedding"].tolist()
                serializable[k] = entry

            # Attach output_clusters to a meta section
            if hasattr(self, "output_clusters"):
                serializable["__meta__"] = {
                    "output_clusters": [
                        {
                            "members": cluster.get("members", []),
                            "centroid": cluster["centroid"].tolist()
                            if isinstance(cluster["centroid"], np.ndarray)
                            else cluster["centroid"],
                            "prompt_key": cluster.get("prompt_key"),
                            "task": cluster.get("task"),
                            "usage_count": cluster.get("usage_count", 1),
                            "best_output": cluster.get("best_output"),
                            "best_score": cluster.get("best_score")
                        }
                        for cluster in self.output_clusters
                    ]
                }

            with open(path, "w") as f:
                json.dump(serializable, f)
            print(f"✅ {self.name} saved long memory ({len(serializable)} entries) → {path}")
        except Exception as e:
            print(f"⚠️ {self.name} failed to save long memory: {e}")





    # =======================
    # 7.2. **Model Interaction & Initialization**
    # =======================

    # ── Physiology helpers ────────────────────────────────────────────────

    def _append_phys(self, att, fat, hung):
        """Convenience: push one sample to the three histories."""
        self.attention_history.append(att)
        self.fatigue_history.append(fat)
        self.hunger_history.append(hung)


    def seed_first_phys_sample(self):
        """
        Call immediately after (re‑)starting a local round.
        • Attention = 1.0 (fresh baseline)
        • Fatigue = Hunger = 0.0 (baseline)
        """
        self._append_phys(1.0, 0.0, 0.0)





    def select_model(self):
        """
        Pick the first available model in priority order, set both
        self.model_key, self.model, and self.model_name for API calls.
        """
        priority = ["gemini", "openai", "claude", "llama"]
        # Map internal keys to the config names
        mapping = {
            "gemini": GEMINI_MODEL_NAME,
            "openai": OPENAI_MODEL_NAME,
            "claude": CLAUDE_MODEL_NAME,
            "llama": LLAMA_MODEL_NAME
        }

        for key in priority:
            if key in self.available_models:
                self.model_key   = key
                self.model       = self.available_models[key]
                self.model_name  = mapping[key]
                return key, self.model

        # Fallback: use any available
        key = next(iter(self.available_models))
        self.model_key   = key
        self.model       = self.available_models[key]
        self.model_name  = mapping.get(key)
        return key, self.model

    def execute_task(self, task_type, prompt):
        """
        Executes a task using the selected model.
        task_type is currently unused but kept for future routing logic.
        """
        model_key, model_obj = self.select_model()

        if model_key == "gemini":
            return model_obj.generate_content(prompt).text
        else:  # openai / claude / llama (OpenAI‑compatible clients)
            return model_obj.chat.completions.create(
                model=self.model_name,
                messages=[{"role": "user", "content": prompt}]
            ).choices[0].message.content

    def initialize_from_prompt(
        self,
        prompt,
        global_round=1,
        top_k_roles=3,
        top_k_tasks=2,
        threshold=0.4,
        verbose=True
    ):
        """
        Round-aware initialization from a mission prompt.
        - If global_round==1 → brand-new mission:
            • Reset global_round (passed in by runner)
            • Reset local_round → 1
            • Reset interval → 1 (so first act() bumps it to 1)
        - If global_round>1 → prompt reuse:
            • Keep global_round as-is
            • Still reset local_round → 1
            • Reset interval → 1
        """
        # ── Reset all per-mission counters & timers ────────────────────────────
        self.global_round   = global_round
        self.local_round    = 1
        self.interval  = 1
        self.start_time     = time.time()

        if verbose:
            tag = "NEW" if global_round == 1 else "REUSE"
            print(f"{tag} MISSION ({self.name}) global_round={global_round}, local_round=1, interval=1: “{prompt[:60]}…”")

        # ── Now do the usual role‐assignment logic ────────────────────────────
        prompt_vec = get_prompt_embedding(prompt)

        # (rest of your existing code unchanged…)

        prompt_vec = get_prompt_embedding(prompt)

        # Use meta-learned threshold if available
        threshold = self.meta_parameters.get("role_assignment_threshold", threshold)

        similarities = {
            role: cosine_similarity([prompt_vec], [vec])[0][0]
            for role, vec in ROLE_EMBEDDINGS.items()
        }

        sorted_roles = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
        new_roles = [role for role, score in sorted_roles if score >= threshold]

        if not new_roles:
            print(f"⚠️ {self.name} no strong role match found — using fallback.")
            self.fallback_choose_role_tasks(POSSIBLE_ROLES, POSSIBLE_TASKS, verbose=verbose)
            return

        # ⏳ Restrict to 1 role in early rounds
        if global_round <= 2:
            new_roles = [new_roles[0]]

        # 🚀 Cap role count starting from round 3 using meta-learned parameter
        elif global_round >= 3:
            max_roles = self.meta_parameters.get("max_roles", top_k_roles)
            new_roles = new_roles[:max_roles]

        self.roles = list(dict.fromkeys(new_roles))

        if verbose:
            print(f"✅ {self.name} assigned roles: {self.roles}")

        # allow_jollycard = global_round >= 2
        # self.assign_tasks_from_roles_multi_round(
        #     prompt=prompt,
        #     global_round=global_round,
        #     top_k=top_k_tasks,
        #     threshold=threshold,
        #     allow_jollycard=allow_jollycard,
        #     verbose=verbose
        # )


    def initialize_from_memory(self, strategy="most_similar", top_k_roles=3, top_k_tasks=2, threshold=0.4, allow_jollycard=True, verbose=True):
        """
        Initialize roles and tasks from stored long-term memory (e.g., past prompts).
        Strategy can be:
        - 'most_similar': semantically closest past prompt
        - 'last': most recent validated prompt
        """
        if not self.long_memory:
            if verbose:
                print(f"⚠️ {self.name} has no long-term memory. Cannot initialize.")
            return

        # Select prompt from memory
        if strategy == "last":
            past_prompt = list(self.long_memory.values())[-1].get("prompt", "")
        elif strategy == "most_similar":
            keys = list(self.long_memory.keys())
            past_prompts = [self.long_memory[k].get("prompt", "") for k in keys]

            # Compute similarity with the current prompt
            query_vec = get_prompt_embedding(self.name + " role-task intent")
            similarities = {
                i: cosine_similarity([query_vec], [get_prompt_embedding(p)])[0][0]
                for i, p in enumerate(past_prompts) if p.strip()
            }

            if not similarities:
                if verbose:
                    print(f"⚠️ {self.name} has no valid prompts in memory.")
                return

            best_index = max(similarities.items(), key=lambda x: x[1])[0]
            past_prompt = past_prompts[best_index]
        else:
            raise ValueError(f"Unknown strategy: {strategy}")

        if verbose:
            print(f"\n🔁 {self.name} reusing memory-based prompt:\n\"{past_prompt}\"\n")

        # Initialize roles and tasks using the selected prompt
        self.assign_roles_from_prompt(past_prompt, top_k=top_k_roles, verbose=verbose)

        # self.assign_tasks_from_roles_multi_round(
        #     prompt=past_prompt,
        #     global_round=1,
        #     allow_jollycard=allow_jollycard,
        #     verbose=verbose
        # )


        self.reevaluate_roles(past_prompt, efficiency_threshold=0.55, verbose=verbose)


    def cluster_spikes(self, time_window=30.0, var_threshold=0.02):
        """
        Group nearby spikes into clusters based on time proximity and similarity.
        Each cluster gets an inferred causal tag.
        """
        if not hasattr(self, "signal_spike_log") or not self.signal_spike_log:
            return

        from collections import deque
        self.signal_clusters = []  # reset every time

        spikes = sorted(self.signal_spike_log, key=lambda x: x["timestamp"])
        current_cluster = deque()
        cluster_start_time = None

        for spike in spikes:
            ts = spike["timestamp"]
            var = spike["output_similarity_variance"]

            if not current_cluster:
                current_cluster.append(spike)
                cluster_start_time = ts
                continue

            # Check if spike belongs in current cluster
            if ts - cluster_start_time <= time_window and var >= var_threshold:
                current_cluster.append(spike)
            else:
                # Finalize current cluster
                if len(current_cluster) > 1:
                    self.signal_clusters.append(list(current_cluster))
                current_cluster = deque([spike])
                cluster_start_time = ts

        if len(current_cluster) > 1:
            self.signal_clusters.append(list(current_cluster))


    def is_signal_clear(self, signal, threshold=0.05, margin=0.02, window=3, scenario_sensitive=True, entropy_sensitive=True, verbose=True):
        """
        Determine if a foresight signal is clear and actionable.

        Arguments:
        - signal: the signal dictionary from signal_spike_log
        - threshold: base variance threshold for triggering
        - margin: margin above threshold to filter borderline signals
        - window: how many recent intervals to check for persistence
        - scenario_sensitive: require the scenario to be volatile
        - entropy_sensitive: trigger if entropy deviates from expected
        - verbose: whether to print details

        Returns:
        - True if signal is strong and should trigger rerouting
        """

        # ── 1. Magnitude Check ──
        val = signal.get("output_similarity_variance", 0.0)
        if val < threshold + margin:
            return False

        # ── 2. Persistence Check ──
        recent_vars = self.uvr_similarity_history[-window:]
        persistent = [v for v in recent_vars if v > threshold]
        if len(persistent) < 2:
            return False

        # ── 3. Cluster Tag Check ──
        tag = signal.get("cluster_tag", "")
        if tag not in {"strategy drift", "unresolved oscillation", "behavioral divergence", "strategy collapse"}:
            return False

        # ── 4. Scenario Sensitivity ──
        if scenario_sensitive:
            scenario = signal.get("scenario", "")
            if scenario not in {"Black Swan", "Tipping Point", "Strategy Collapse"}:
                return False

        # ── 5. Adaptive Entropy Check ──
        if entropy_sensitive:
            from collections import Counter
            import math

            history = self.interval_score_log[-10:]
            rc, tc = Counter(), Counter()
            for entry in history:
                for r in entry.get("roles", []): rc[r] += 1
                for t in entry.get("tasks", []): tc[t] += 1

            def entropy(c):
                total = sum(c.values())
                return -sum((v / total) * math.log2(v / total) for v in c.values()) if total > 0 else 0.0

            role_entropy = entropy(rc)
            task_entropy = entropy(tc)

            expected = self.meta_parameters.get("entropy_expected", 0.65)
            margin   = self.meta_parameters.get("entropy_margin", 0.15)
            soften   = self.meta_parameters.get("entropy_softening_factor", 0.05)
            lower, upper = max(expected - margin - soften, 0.0), min(expected + margin + soften, 1.0)

            if not (lower <= role_entropy <= upper or lower <= task_entropy <= upper):
                if verbose:
                    print(f"⚠️ Entropy trigger → role={role_entropy:.2f}, task={task_entropy:.2f} (expected={expected:.2f})")
            else:
                return False

        # ── 6. Meta-learned Expected Gain Check ──
        if self.meta_parameters.get("expected_reroute_gain", 0.0) < 0.01:
            if verbose:
                print(f"⚠️ Reroute gain too low → skipping reroute.")
            return False

        return True







    def reroute_strategy(self, severity="moderate", verbose=True):
        """
        Trigger an adaptive reroute of the agent's strategy.
        """
        if severity == "mild":
            # Only adjust tasks
            self.fallback_choose_tasks(verbose=verbose)

        elif severity == "moderate":
            # Drop low-performing roles and reassign
            efficiency_threshold = self.meta_parameters.get("reroute_efficiency_threshold", 0.5)
            self.reevaluate_roles(prompt=self.runner.last_mission_prompt, efficiency_threshold=efficiency_threshold, verbose=verbose)
            self.assign_tasks_from_roles_multi_round(prompt=self.runner.last_mission_prompt, global_round=self.runner.global_round + 1, verbose=verbose)

        elif severity == "severe":
            # Full reset of roles, tasks, and features
            self.roles = []
            self.tasks = []
            self.features = []
            self.short_memory.clear()
            self.assign_roles_from_prompt(prompt=self.runner.last_mission_prompt, global_round=self.runner.global_round + 1, verbose=verbose)
            self.assign_tasks_from_roles_multi_round(prompt=self.runner.last_mission_prompt, global_round=self.runner.global_round + 1, verbose=verbose)

        if verbose:
            print(f"🔄 {self.name} performed rerouting ({severity}) → Roles: {self.roles}, Tasks: {self.tasks}")



    # =======================
    # 7.3. **Performance Evaluation & Learning**
    # =======================


    def update_long_memory(
        self,
        prompt,
        output,
        score,
        signals,
        mission_metrics=None,
        prompt_key=None,
        verbose=False
    ):
        """
        Update long memory with the current mission if it passes criteria.
        Handles formatting, embeddings, signal tracking, and persistence.
        """
        if prompt_key is None:
            prompt_key = self.generate_prompt_key(prompt)

        entry = {
            "prompt": prompt,
            "prompt_key": prompt_key,
            "output": output,
            "score": score,
            "metrics": mission_metrics or {},
            "timestamp": time.time(),
            "graph_embedding": (
                self.last_graph_embedding.tolist()
                if hasattr(self, "last_graph_embedding") else None
            ),
            "graph_text": (
                self.last_graph_text
                if hasattr(self, "last_graph_text") else None
            ),
            "attention_history": self.attention_history[-1] if self.attention_history else None,
            "fatigue_history": self.fatigue_history[-1] if self.fatigue_history else None,
            "hunger_history": self.hunger_history[-1] if self.hunger_history else None,
            "roles": self.roles[:],
            "tasks": self.tasks[:],
            "features": self.features[:],
            "inline_activated_roles": self.inline_activated_roles[:],
            "role_task_map": deepcopy(self.role_task_map),
            "role_metric_map": deepcopy(self.role_metric_map),
            "uvr_similarity": signals.get("uvr_similarity"),
            "output_similarity_variance": signals.get("output_similarity_variance"),
        }

        self.long_memory[prompt_key] = entry
        self.save_long_memory(f"{self.name}_long_memory.json")

        if verbose:
            print(f"🧠 {self.name} long memory updated → key: {prompt_key[:12]}... | score={score:.3f}")


    def wants_global_promotion(self, new_score, previous_score):
        """
        Decide if this agent believes the new mission should trigger a global reset.

        Arguments:
        - new_score: the score of the newly validated output.
        - previous_score: the score currently stored in the external DB.

        Returns:
        - True if the agent thinks the global mission should be promoted.
        """
        threshold_margin = self.meta_parameters.get("promotion_margin", 0.05)
        entropy_phase = self.variance_history.get("phase", "unknown")
        signal_spike = self.signal_spike_log[-1] if self.signal_spike_log else {}

        # Condition 1: strong improvement
        if new_score > previous_score + threshold_margin:
            return True

        # Condition 2: clear rerouting signal + validated
        if self.is_signal_clear(signal_spike, verbose=False):
            return True

        # Condition 3: entropy suggests strategy shift
        if entropy_phase in ("inflection", "post-inflection"):
            return True

        return False


    def evaluate_interval(self, score, attention, efficiency, verbose=True):
        """
        Determine whether this interval should be considered validated (Stage 1).
        Stage 1 = meaningfulness threshold.
        """
        threshold = self.meta_parameters.get("meaningfulness_threshold", 0.65)
        validated = score >= threshold

        if validated:
            self.local_interval_log.append(self.interval)
            self.local_time_log.append(time.time())
            self.increment_local_round(reason="validated")
            if verbose:
                print(f"✅ {self.name} passed Stage 1 (meaningfulness): score={score:.3f} (threshold={threshold:.3f})")
        else:
            if verbose:
                print(f"⏳ {self.name} did not pass Stage 1 (score={score:.3f}, threshold={threshold:.3f})")

        return validated

    def evaluate_local_mission(self, score, previous_score, prompt_key, output, signals, mission_metrics=None, verbose=True):
        """
        Determine whether the current output should replace the previous long-memory entry.
        Stage 2 = efficiency improvement over last validated mission.
        """
        passed = score > previous_score

        if passed:
            self.update_long_memory(
                prompt=self.runner.last_mission_prompt,
                output=output,
                score=score,
                signals=signals,
                mission_metrics=mission_metrics,
                prompt_key=prompt_key,
                verbose=verbose
            )
            self.reset_local_state()
            self.increment_local_round(reason="validated")

            if verbose:
                print(f"✅ {self.name} passed Stage 2 (efficiency). New local mission accepted.")
        else:
            if verbose:
                print(f"🔁 {self.name} rejected output (score={score:.3f} ≤ prev={previous_score:.3f})")

        return passed



    def increment_local_round(self, reason=None):
        """
        Increments the local round only when explicitly validated.
        Use this instead of direct += 1 to avoid accidental increments.
        """
        if reason != "validated":
            raise RuntimeError(f"🚫 Attempted to increment local_round without validation (reason='{reason}')")
        self.local_round += 1


    def compute_learning_rate(self, efficiency_score, meaningfulness_score):
        """
        Computes a meta-learned learning rate based on efficiency and meaningfulness only.
        Attention does not directly affect learning rate (its effect is reflected via efficiency).
        """

        meta = self.meta_parameters
        w_eff = meta.get("lr_efficiency_weight", 0.4)
        w_mean = meta.get("lr_meaningfulness_weight", 0.4)

        lr_decay = meta.get("lr_decay_rate", 0.05)
        lr_min = meta.get("lr_min", 0.001)
        lr_max = meta.get("lr_max", 0.05)

        # Exploration and decay modifiers
        exploration_boost = 1.0 / (1.0 + self.local_round)
        stability_decay = math.exp(-lr_decay * self.local_round)

        # Attention removed: only efficiency + meaningfulness used
        base_lr = (
            w_eff * efficiency_score +
            w_mean * meaningfulness_score
        )

        learning_rate = base_lr * exploration_boost * stability_decay
        return min(max(learning_rate, lr_min), lr_max)


    def get_global_round_scenario_mode(local_round_score_log):
        """
        Return the most frequent (mode) scenario label for the current global round.
        Normalizes causal suffixes (e.g., “(Causal)”).
        """
        scenarios = []
        for interval_scores in local_round_score_log:
            for entry in interval_scores:
                if "scenario" in entry:
                    base = normalize_foresight_key(entry["scenario"])
                    scenarios.append(base)
        if not scenarios:
            return ""
        counter = Counter(scenarios)
        mode_scenario, _ = counter.most_common(1)[0]
        return mode_scenario




    def update_long_term_meta_parameters(
        self,
        foresight_signal=None,
        avg_efficiency_long_term=None,
        avg_meaningfulness_long_term=None,
        global_avg_attn=None,
        global_avg_fatigue=None,
        global_avg_hunger=None,
        verbose=True
    ):
        """
        Update all meta-parameters based on efficiency, meaningfulness, and attention (local mechanisms).
        Update long-term meta-parameters (self.meta_parameters) based on
        efficiency, meaningfulness, and attention averaged over the GLOBAL round.
        """
        efficiency_score = avg_efficiency_long_term if avg_efficiency_long_term is not None else 0.5
        meaningfulness_score = avg_meaningfulness_long_term if avg_meaningfulness_long_term is not None else 0.5

        learning_rate = self.compute_learning_rate(efficiency_score, meaningfulness_score)
        self.meta_parameters["current_learning_rate"] = learning_rate

        meaningful_and_efficient = meaningfulness_score > 0.6 and efficiency_score > 0.6


        if long_term_uvr_volatility > 0.15:  # 🔧 Meta-learn later
            self.meta_parameters["pruning_threshold"] = min(
                self.meta_parameters.get("pruning_threshold", 0.4) + 0.02, 0.7
            )
            self.meta_parameters["dropout_base_probability"] = min(
                self.meta_parameters.get("dropout_base_probability", 0.3) + 0.01, 0.5
            )
            self.meta_parameters["role_assignment_threshold"] = max(
                self.meta_parameters.get("role_assignment_threshold", 0.7) - 0.01, 0.4
            )
            if verbose:
                print(f"⚠️ [META] {self.name} sustained UVR volatility — increased dropout/pruning, lowered role threshold")


        # --- Meta-learn UVR detection thresholds ---
        current_window = self.meta_parameters.get("uvr_min_window", 5)
        current_thresh = self.meta_parameters.get("uvr_inflection_threshold", 1.5)

        if meaningful_and_efficient:
            # Slightly stabilize (reduce sensitivity to noise)
            new_window = min(current_window + 1, 15)         # Cap window
            new_thresh = min(current_thresh + 0.05, 2.0)     # Cap threshold
        else:
            # Be more sensitive to volatility
            new_window = max(current_window - 1, 3)          # Floor window
            new_thresh = max(current_thresh - 0.05, 1.0)     # Floor threshold

        self.meta_parameters["uvr_min_window"] = new_window
        self.meta_parameters["uvr_inflection_threshold"] = round(new_thresh, 3)

        if verbose:
            print(f"📉 {self.name} UVR window → {new_window}, threshold → {new_thresh:.2f}")

        # 🔄 Environmental scenario-based inflection sensitivity
        scenario_raw = (foresight_signal or {}).get("scenario", "")
        scenario = normalize_foresight_key(scenario_raw)

        scenario_sensitivity_map = {
            "✅ Convergent Paths":         (-1, -0.05),
            "🧭 White Swan":               (-1, -0.03),
            "🌫️ Grey Swan":                (0, 0.0),
            "🤝 Grey Rhino":               (+1, +0.02),
            "🪂 Tipping Point":            (+1, +0.05),
            "🧩 Cascading Discontinuity":  (+2, +0.08),
            "🐉 Wild Card":                (+2, +0.10),
            "🕳️ Black Swan":               (+3, +0.15),
        }

        delta_w, delta_thresh = scenario_sensitivity_map.get(scenario, (0, 0.0))

        # Apply adaptive update
        self.meta_parameters["uvr_min_window"] = np.clip(
            self.meta_parameters["uvr_min_window"] + delta_w, 3, 15
        )
        self.meta_parameters["uvr_inflection_threshold"] = round(np.clip(
            self.meta_parameters["uvr_inflection_threshold"] + delta_thresh, 1.0, 2.5
        ), 3)

        if verbose:
           print(f"🌐 {self.name} env-aware UVR tuning: +window={delta_w}, +thresh={delta_thresh:.2f} → "
                  f"new window={self.meta_parameters['uvr_min_window']}, "
                  f"new threshold={self.meta_parameters['uvr_inflection_threshold']:.3f}")





        # --- GLOBAL ATTENTION ADAPTATION (insert this block here!) ---
        # --- Adapt all attention- and dropout-related weights to global round average ---
        if global_avg_attn is not None and global_avg_fatigue is not None and global_avg_hunger is not None:
            configs = [
                ("attention_time_weight",    global_avg_attn,    0.05, 0.3),
                ("attention_fatigue_weight", global_avg_fatigue, 0.05, 0.3),
                ("attention_hunger_weight",  global_avg_hunger,  0.05, 0.3),
                ("dropout_time_weight",      global_avg_attn,    0.05, 0.3),
                ("dropout_fatigue_weight",   global_avg_fatigue, 0.05, 0.3),
                ("dropout_hunger_weight",    global_avg_hunger,  0.05, 0.3),
                ("external_attention_weight", global_avg_attn,    0.05, 0.5),
                ("external_fatigue_weight",   global_avg_fatigue, 0.05, 0.5),
                ("external_hunger_weight",    global_avg_hunger,  0.05, 0.5),
            ]
            for key, avg, min_val, max_val in configs:
                current = self.meta_parameters.get(key, min_val)
                if avg > 0.6:
                    self.meta_parameters[key] = max(current * 0.99, min_val)
                else:
                    self.meta_parameters[key] = min(current * 1.01, max_val)
            if verbose:
                print(
                    f"🌐 {self.name} [Long-term META-PHYSIO] Weights adapted:"
                    f"\n  attention_time_weight={self.meta_parameters['attention_time_weight']:.3f},"
                    f" attention_fatigue_weight={self.meta_parameters['attention_fatigue_weight']:.3f},"
                    f" attention_hunger_weight={self.meta_parameters['attention_hunger_weight']:.3f},"
                    f"\n  dropout_time_weight={self.meta_parameters['dropout_time_weight']:.3f},"
                    f" dropout_fatigue_weight={self.meta_parameters['dropout_fatigue_weight']:.3f},"
                    f" dropout_hunger_weight={self.meta_parameters['dropout_hunger_weight']:.3f},"
                    f"\n  external_attention_weight={self.meta_parameters['external_attention_weight']:.3f},"
                    f" external_fatigue_weight={self.meta_parameters['external_fatigue_weight']:.3f},"
                    f" external_hunger_weight={self.meta_parameters['external_hunger_weight']:.3f}"
                )



        # --- Learning rate bounds ---
        lr_min_range = self.meta_parameters.get("lr_min_range", (0.0005, 0.005))
        lr_max_range = self.meta_parameters.get("lr_max_range", (0.02, 0.1))
        lr_min = self.meta_parameters.get("lr_min", 0.001)
        lr_max = self.meta_parameters.get("lr_max", 0.05)

        if meaningful_and_efficient:
            new_lr_min = max(lr_min * 0.99, lr_min_range[0])
            new_lr_max = max(lr_max * 0.99, new_lr_min + 0.001)
        else:
            new_lr_min = min(lr_min * 1.01, lr_max - 0.001)
            new_lr_max = min(lr_max * 1.01, lr_max_range[1])

        self.meta_parameters["lr_min"] = new_lr_min
        self.meta_parameters["lr_max"] = new_lr_max

        # --- Noise range ---It adapts the range for noise (used in scoring, e.g., for controlled randomness) based on whether the agent had a "meaningful and efficient" global round
        # The noise block below the learning rate adjusts score_noise_min and score_noise_max dynamically based on recent round performance (i.e., whether the round was meaningful/efficient).
        # If the round was good, noise shrinks. If the round was poor, noise grows.
        noise_lr = self.meta_parameters.get("score_noise_lr", 0.01)
        min_floor = self.meta_parameters.get("score_noise_min_floor", 0.001)
        max_ceiling = self.meta_parameters.get("score_noise_max_ceiling", 0.3)
        score_min = self.meta_parameters.get("score_noise_min", 0.01)
        score_max = self.meta_parameters.get("score_noise_max", 0.2)

        if meaningful_and_efficient:
            new_min = max(score_min * (1 - noise_lr), min_floor)
            new_max = max(score_max * (1 - noise_lr), new_min + 0.01)
        else:
            new_min = min(score_min * (1 + noise_lr), score_max - 0.01)
            new_max = min(score_max * (1 + noise_lr), max_ceiling)

        self.meta_parameters["score_noise_min"] = new_min
        self.meta_parameters["score_noise_max"] = new_max

        # --- Noise learning rate ---
        current = self.meta_parameters.get("score_noise_lr", 0.01)
        if meaningful_and_efficient:
            self.meta_parameters["score_noise_lr"] = max(current * 0.999, 0.0005)
        else:
            self.meta_parameters["score_noise_lr"] = min(current * 1.001, 0.05)


        # --- Role and Pruning thresholds ---
        role_min, role_max = self.meta_parameters.get("role_threshold_range", (0.4, 0.9))
        pruning_min, pruning_max = self.meta_parameters.get("pruning_threshold_range", (0.2, 0.8))

        current_role = self.meta_parameters.get("role_assignment_threshold", 0.7)
        current_pruning = self.meta_parameters.get("pruning_threshold", 0.4)

        self.meta_parameters["role_assignment_threshold"] = min(max(current_role - learning_rate * (1 - meaningfulness_score), role_min), role_max)
        self.meta_parameters["pruning_threshold"] = min(max(current_pruning - learning_rate * (1 - efficiency_score), pruning_min), pruning_max)

        if verbose:
            print(f"📏 {self.name} role_assignment_threshold → {self.meta_parameters['role_assignment_threshold']:.3f}")
            print(f"✂️ {self.name} pruning_threshold → {self.meta_parameters['pruning_threshold']:.3f}")


        # --- Threshold: Stagnation Recovery ---
        stagnation_thresh_min, stagnation_thresh_max = 3, 10  # Define your min/max bounds here (safe zone)

        current_stagnation_threshold = self.meta_parameters.get("stagnation_recovery_threshold", 5)

        # Adapt threshold based on low efficiency → slower reaction if generally efficient, faster if inefficient
        delta_stagnation = learning_rate * (1.0 - efficiency_score)

        self.meta_parameters["stagnation_recovery_threshold"] = min(
            max(current_stagnation_threshold - delta_stagnation, stagnation_thresh_min), stagnation_thresh_max
        )

        if verbose:
            print(f"🛑 {self.name} stagnation_recovery_threshold → {self.meta_parameters['stagnation_recovery_threshold']:.3f}")


        # --- Reinforce role/pruning/reuse thresholds ---
        for key, score, floor, ceil in [("role_assignment_threshold", meaningfulness_score, 0.4, 0.9),
                                        ("pruning_threshold", efficiency_score, 0.2, 0.7),
                                        ("reuse_similarity_threshold", meaningfulness_score, 0.6, 0.9)]:

            current = self.meta_parameters.get(key, (floor + ceil) / 2)
            delta = 0.005 if score > 0.6 else -0.005
            self.meta_parameters[key] = min(max(current + delta, floor), ceil)

        # --- Reuse Similarity Threshold ---
        reuse_min, reuse_max = self.meta_parameters.get("reuse_similarity_range", (0.6, 0.9))
        current_reuse = self.meta_parameters.get("reuse_similarity_threshold", 0.75)

        if meaningfulness_score > 0.6 and efficiency_score > 0.6:
            self.meta_parameters["reuse_similarity_threshold"] = min(current_reuse + learning_rate * 0.01, reuse_max)
        else:
            self.meta_parameters["reuse_similarity_threshold"] = max(current_reuse - learning_rate * 0.01, reuse_min)

        if verbose:
            print(f"🔄 {self.name} reuse_similarity_threshold → {self.meta_parameters['reuse_similarity_threshold']:.3f}")


        # ── Stagnation tracking via runner ─────────────────────────
        if hasattr(self, "runner") and self.runner:
            # Increment or reset based on latest efficiency & meaningfulness
            self.runner.maybe_update_stagnation_counter(
                agent=self,
                meaningfulness_score=meaningfulness_score,
                efficiency_score=efficiency_score
            )

        # --- Role/pruning learning rates ---
        for key, score in [("role_threshold_lr", meaningfulness_score), ("pruning_threshold_lr", efficiency_score)]:
            bounds = (0.001, 0.05)
            current = self.meta_parameters.get(key, 0.01)
            if score > 0.6:
                updated = max(current * (1 - learning_rate), bounds[0])
            else:
                updated = min(current * (1 + learning_rate), bounds[1])
            self.meta_parameters[key] = updated


        # Decay rate adaptation
        ddr_min, ddr_max = self.meta_parameters.get("dropout_decay_range", (1.0, 5.0))
        current_ddr = self.meta_parameters.get("dropout_decay_rate", 3.0)

        if meaningful_and_efficient:
            self.meta_parameters["dropout_decay_rate"] = max(current_ddr * 0.99, ddr_min)
        else:
            self.meta_parameters["dropout_decay_rate"] = min(current_ddr * 1.01, ddr_max)


        # --- Decay rates ---
        lt_min, lt_max = self.meta_parameters.get("lambda_time_range", (0.1, 1.0))
        lu_min, lu_max = self.meta_parameters.get("lambda_usage_range", (0.1, 0.8))
        lambda_time = self.meta_parameters.get("lambda_time", 0.5)
        lambda_usage = self.meta_parameters.get("lambda_usage", 0.3)

        if meaningful_and_efficient:
            self.meta_parameters["lambda_time"] = max(lambda_time * 0.99, lt_min)
            self.meta_parameters["lambda_usage"] = max(lambda_usage * 0.99, lu_min)
        else:
            self.meta_parameters["lambda_time"] = min(lambda_time * 1.01, lt_max)
            self.meta_parameters["lambda_usage"] = min(lambda_usage * 1.01, lu_max)


        # -- Compute foresight scenario influence --Each scenario now supplies a delta for noise as well (negative for “good” scenarios, positive for “bad”—so uncertainty rises in crisis/disruption).
        # The numbers in the brackets in your scenario_update_map like represent the direct adjustments ("deltas") to the meta-parameters that control:
        # - the pruning threshold (first value) (how aggressively you prune roles/tasks)
        # - the dropout base probability (second value) (how likely things are dropped out randomly)
        # - and the noise (update score_noise_min and score_noise_max)

        scenario_raw = (foresight_signal or {}).get("scenario", "")
        scenario = normalize_foresight_key(scenario_raw)

        scenario_update_map = {
            "✅ Convergent Paths":         (+0.02, +0.02, -0.005),
            "🧭 White Swan":               (+0.01, +0.01, -0.002),
            "🌫️ Grey Swan":                ( 0.0,  0.0,  0.0),
            "🤝 Grey Rhino":               (-0.01, 0.0,  +0.003),
            "🪂 Tipping Point":            (-0.02, -0.01, +0.005),
            "🧩 Cascading Discontinuity":  (-0.03, -0.02, +0.008),
            "🐉 Wild Card":                (-0.04, -0.03, +0.010),
            "🕳️ Black Swan":               (-0.05, -0.04, +0.015),
        }
        delta_prune, delta_dropout, delta_noise = scenario_update_map.get(scenario, (0, 0, 0))  # DONE

        # 🧠 Output similarity volatility trend (used in UVR diagnostics)

        uvr_output_vars = [
            x.get("output_similarity_variance", 0.0)
            for x in intervals
            if "output_similarity_variance" in x
        ]

        avg_output_var = np.mean(uvr_output_vars) if uvr_output_vars else 0.0

        output_similarity_volatility = avg_output_var  # Replaces prior long_term_uvr_volatility




        # Adjust relevant thresholds:
        self.meta_parameters["pruning_threshold"] = np.clip(
            self.meta_parameters["pruning_threshold"] + delta_prune, pruning_min, pruning_max)
        self.meta_parameters["dropout_base_probability"] = np.clip(
            self.meta_parameters["dropout_base_probability"] + delta_dropout, dropout_min, dropout_max)

        # Adjust noise min/max (keep within bounds)
        score_noise_min_floor = self.meta_parameters.get("score_noise_min_floor", 0.001)
        score_noise_max_ceiling = self.meta_parameters.get("score_noise_max_ceiling", 0.3)
        new_noise_min = np.clip(self.meta_parameters["score_noise_min"] + delta_noise, score_noise_min_floor, self.meta_parameters["score_noise_max"])
        new_noise_max = np.clip(self.meta_parameters["score_noise_max"] + delta_noise, new_noise_min, score_noise_max_ceiling)
        self.meta_parameters["score_noise_min"] = new_noise_min
        self.meta_parameters["score_noise_max"] = new_noise_max


        # --- Cooperation baseline and randomness ---We have added an override in the short-term update method to make the system more reactive.
        coop_min, coop_max = self.meta_parameters.get("cooperation_randomness_range", (0.05, 0.3))
        if meaningful_and_efficient:
            self.meta_parameters["cooperation_baseline"] = min(self.meta_parameters.get("cooperation_baseline", 0.5) + 0.01, 0.8)
            self.meta_parameters["cooperation_randomness_range"] = (max(coop_min * 0.98, 0.01), max(coop_max * 0.98, 0.05))
        else:
            self.meta_parameters["cooperation_baseline"] = max(self.meta_parameters.get("cooperation_baseline", 0.5) - 0.01, 0.2)
            self.meta_parameters["cooperation_randomness_range"] = (min(coop_min * 1.02, 0.2), min(coop_max * 1.02, 0.6))

        # --- Pruning decay rate ---
        prune_min, prune_max = self.meta_parameters.get("pruning_decay_range", (1.0, 5.0))
        current_decay = self.meta_parameters.get("pruning_decay_rate", 3.0)

        if meaningful_and_efficient:
            self.meta_parameters["pruning_decay_rate"] = max(current_decay * 0.99, prune_min)
        else:
            self.meta_parameters["pruning_decay_rate"] = min(current_decay * 1.01, prune_max)

        # --- Attention weighting factors ---These control how much time, fatigue, and hunger contribute to the attention calculation
        for key, min_val, max_val in [("attention_time_weight", 0.05, 0.3),
                                      ("attention_fatigue_weight", 0.05, 0.3),
                                      ("attention_hunger_weight", 0.05, 0.3)]:
            current = self.meta_parameters.get(key, 0.1)
            if meaningful_and_efficient:
                self.meta_parameters[key] = max(current * 0.99, min_val)
            else:
                self.meta_parameters[key] = min(current * 1.01, max_val)

        # --- GLOBAL-ROUND ADAPTATION FOR ATTENTION/PHYSIOLOGY WEIGHTS ---

        if global_avg_attn is not None:
            # Attention time weight
            attn_time_w = self.meta_parameters.get("attention_time_weight", 0.1)
            if global_avg_attn > 0.6:
                self.meta_parameters["attention_time_weight"] = max(attn_time_w * 0.99, 0.05)
            else:
                self.meta_parameters["attention_time_weight"] = min(attn_time_w * 1.01, 0.3)

            # Dropout/other weights can be similarly adapted
            attn_fatigue_w = self.meta_parameters.get("attention_fatigue_weight", 0.1)
            if global_avg_attn > 0.6:
                self.meta_parameters["attention_fatigue_weight"] = max(attn_fatigue_w * 0.99, 0.05)
            else:
                self.meta_parameters["attention_fatigue_weight"] = min(attn_fatigue_w * 1.01, 0.3)

            attn_hunger_w = self.meta_parameters.get("attention_hunger_weight", 0.1)
            if global_avg_attn > 0.6:
                self.meta_parameters["attention_hunger_weight"] = max(attn_hunger_w * 0.99, 0.05)
            else:
                self.meta_parameters["attention_hunger_weight"] = min(attn_hunger_w * 1.01, 0.3)

        # (Repeat/adapt for any other weights you want to globally modulate)


        # --- Attention threshold (optional meta‑learning) ---This is the global attention threshold for triggering dropout in agents
        thresh_min, thresh_max = 0.1, 0.5
        current_thresh = self.meta_parameters.get("attention_threshold", 0.3)

        # Raise threshold if attention is chronically low; lower if high
        if attention_score < 0.4:
            # make it easier to trigger dropout
            new_thresh = min(current_thresh + learning_rate * 0.01, thresh_max)
        else:
            # harder to trigger dropout
            new_thresh = max(current_thresh - learning_rate * 0.01, thresh_min)

        self.meta_parameters["attention_threshold"] = round(new_thresh, 3)


        # --- Dropout weighting factors ---
        for key, min_val, max_val in [("dropout_time_weight", 0.05, 0.3),
                                      ("dropout_fatigue_weight", 0.05, 0.3),
                                      ("dropout_hunger_weight", 0.05, 0.3)]:
            current = self.meta_parameters.get(key, 0.1)
            if meaningful_and_efficient:
                # productive round → slightly reduce dropout influence
                self.meta_parameters[key] = max(current * 0.99, min_val)
            else:
                # unproductive round → increase dropout influence
                self.meta_parameters[key] = min(current * 1.01, max_val)


        # --- Task selection parameters ---
        min_k, max_k = self.meta_parameters.get("top_k_tasks_range", (1, 5))
        min_thresh, max_thresh = self.meta_parameters.get("task_threshold_range", (0.3, 0.7))
        current_k = self.meta_parameters.get("top_k_tasks", 2)
        current_thresh = self.meta_parameters.get("task_threshold", 0.4)

        if meaningful_and_efficient:
            self.meta_parameters["top_k_tasks"] = min(current_k + 1, max_k)
            self.meta_parameters["task_threshold"] = max(current_thresh - 0.01, min_thresh)
        else:
            self.meta_parameters["top_k_tasks"] = max(current_k - 1, min_k)
            self.meta_parameters["task_threshold"] = min(current_thresh + 0.01, max_thresh)

        # --- Max roles ---
        min_roles, max_roles = self.meta_parameters.get("max_roles_range", (1, 5))
        current_max_roles = self.meta_parameters.get("max_roles", 3)
        delta = 1 if meaningful_and_efficient else -1
        self.meta_parameters["max_roles"] = min(max(current_max_roles + delta, min_roles), max_roles)

        # --- Jollycard importance threshold and sampling temperature ---
        importance_min, importance_max = self.meta_parameters.get("jollycard_importance_threshold_range", (0.2, 0.7))
        temperature_min, temperature_max = self.meta_parameters.get("jollycard_sampling_temperature_range", (0.5, 2.0))

        importance = self.meta_parameters.get("jollycard_importance_threshold", 0.3)
        temperature = self.meta_parameters.get("jollycard_sampling_temperature", 1.0)

        if meaningful_and_efficient:
            self.meta_parameters["jollycard_importance_threshold"] = min(importance + 0.01, importance_max)
            self.meta_parameters["jollycard_sampling_temperature"] = max(temperature * 0.98, temperature_min)
        else:
            self.meta_parameters["jollycard_importance_threshold"] = max(importance - 0.01, importance_min)
            self.meta_parameters["jollycard_sampling_temperature"] = min(temperature * 1.02, temperature_max)

        # --- Meta-learn jollycard injection weight ---
        jollycard_min, jollycard_max = self.meta_parameters.get("jollycard_injection_weight_range", (0.1, 0.9))
        current_injection_weight = self.meta_parameters.get("jollycard_injection_weight", 0.5)

        if meaningful_and_efficient:
            self.meta_parameters["jollycard_injection_weight"] = min(current_injection_weight + 0.01, jollycard_max)
        else:
            self.meta_parameters["jollycard_injection_weight"] = max(current_injection_weight - 0.01, jollycard_min)

        if verbose:
            print(f"🎲 {self.name} adjusted jollycard_injection_weight to {self.meta_parameters['jollycard_injection_weight']:.2f}")


        # --- Adapt external call weights (for inline role execution) ---
        # These weights adjust reliance on external data/tools during inline role-based tasks

        for key, perf in [
            ("external_attention_weight", efficiency_score),    # can use either or both efficiency/meaningfulness
            ("external_fatigue_weight", efficiency_score),
            ("external_hunger_weight", efficiency_score),
        ]:
            min_v, max_v = 0.05, 0.5
            cur = self.meta_parameters.get(key, 0.2)
            # If efficient & meaningful, slightly reduce dependency on that resource, else increase it
            if meaningful_and_efficient:
                self.meta_parameters[key] = max(cur * 0.99, min_v)
            else:
                self.meta_parameters[key] = min(cur * 1.01, max_v)

        # --- Adapt base probability for external tool activation (inline roles) ---
        base_prob_min, base_prob_max = 0.1, 0.8
        cur_base_prob = self.meta_parameters.get("external_call_base_prob", 0.4)
        if meaningful_and_efficient:
            self.meta_parameters["external_call_base_prob"] = max(cur_base_prob * 0.99, base_prob_min)
        else:
            self.meta_parameters["external_call_base_prob"] = min(cur_base_prob * 1.01, base_prob_max)


        # --- Metric selection threshold adaptation ---
        metric_min, metric_max = self.meta_parameters.get("metric_threshold_range", (0.3, 0.8))
        current_thresh = self.meta_parameters.get("metric_selection_threshold", 0.5)
        metric_lr = self.meta_parameters.get("metric_threshold_lr", 0.01)

        if meaningful_and_efficient:   # or other metric validation signal
            self.meta_parameters["metric_selection_threshold"] = max(current_thresh - metric_lr, metric_min)
        else:
            self.meta_parameters["metric_selection_threshold"] = min(current_thresh + metric_lr, metric_max)

        # Optionally adapt learning rate
        bounds = (0.001, 0.05)
        if meaningful_and_efficient:
            self.meta_parameters["metric_threshold_lr"] = max(metric_lr * (1 - learning_rate), bounds[0])
        else:
            self.meta_parameters["metric_threshold_lr"] = min(metric_lr * (1 + learning_rate), bounds[1])

        # --- Meta-learn UVR Reactivation Thresholds ---
        delta_thresh = self.meta_parameters.get("uvr_reactivation_delta_threshold", 0.05)
        novelty_thresh = self.meta_parameters.get("uvr_reactivation_novelty_threshold", 0.3)

        if meaningfulness_score > 0.6 and efficiency_score > 0.6:
            # More stable → raise thresholds slightly to reduce false positives
            delta_thresh = min(delta_thresh * 1.01, 0.2)
            novelty_thresh = min(novelty_thresh * 1.01, 0.6)
        else:
            # Unstable → lower thresholds to become more reactive
            delta_thresh = max(delta_thresh * 0.99, 0.01)
            novelty_thresh = max(novelty_thresh * 0.99, 0.1)

        self.meta_parameters["uvr_reactivation_delta_threshold"] = round(delta_thresh, 4)
        self.meta_parameters["uvr_reactivation_novelty_threshold"] = round(novelty_thresh, 4)

        if verbose:
            print(f"🔁 {self.name} updated reactivation thresholds → Δ={delta_thresh:.3f}, novelty={novelty_thresh:.3f}")


    def gate_inline_roles(agent, metrics, thresholds, role_metric_map):
        """
        Inline activation of any role triggered by internal metrics.

        Arguments:
        - agent: the agent object
        - metrics: a dictionary of internal metrics (e.g., causal_complexity, etc.)
        - thresholds: a dictionary of thresholds for each metric
        - role_metric_map: mapping from roles to the metric that should trigger them

        Returns:
        - List of roles that were activated inline
        """
        inline_activated = []

        for role, metric_name in role_metric_map.items():
            if role not in agent.roles:
                value = metrics.get(metric_name, 0)
                threshold = thresholds.get(metric_name, 0.7)  # Default threshold
                if value > threshold:
                    agent.inline_activate(role)
                    inline_activated.append(role)

        return inline_activated







    def compute_task_score(self, prompt, task):
        """
        Compute a score for a task based on its relevance to the prompt.
        Uses cosine similarity and adds a small keyword‐based bonus.
        """
        # Base relevance via embeddings
        prompt_embedding = get_prompt_embedding(prompt)
        task_embedding   = get_task_embedding(task)
        score = cosine_similarity([prompt_embedding], [task_embedding])[0][0]

        # Keyword bonus for domain‐relevant terms
        bonus_per_keyword = 0.05
        for kw in SCORE_BONUS_KEYWORDS:
            if kw in prompt.lower():
                score += bonus_per_keyword

        # Clamp score to [0, 1]
        return max(0.0, min(1.0, score))


    def adjust_role_task_priority(self, performance_data):
        """
        Adjust role/task selection priorities based on performance feedback.
        Increase priority for tasks/roles that perform well.
        """
        # Example logic: increase priority for tasks with scores above a threshold
        for role in self.roles:
            role_performance = performance_data.get(role, {})
            for task, score in role_performance.items():
                if score > 0.7:  # Threshold for high priority, high‑performing task → raise priority
                    self.meta_parameters["priority_" + task] = 1
                else:       # low‑performing task → lower priority
                    self.meta_parameters["priority_" + task] = 0



    def fallback_choose_role_tasks(self, possible_roles, possible_tasks, verbose=True):
        """
        Fallback method to assign a random role (if missing) and basic task selection.
        Used in exploration or failure recovery cases.
        """
        if not hasattr(self, "roles") or not self.roles:
            self.roles = [random.choice(possible_roles)]
            if verbose:
                print(f"🎭 {self.name} randomly assigned fallback role: {self.roles[0]}")

        if self.in_cooperation:
            if len(possible_tasks) >= 2:
                k = min(3, len(possible_tasks))
                self.tasks = random.sample(possible_tasks, k=random.randint(2, k))
            else:
                # only one task available—just take it
                self.tasks = possible_tasks[:]
            if verbose:
                print(f"🫂 {self.name} (fallback) in cooperation, selected multiple tasks: {self.tasks}")
        else:
            self.tasks = [random.choice(possible_tasks)]
            if verbose:
                print(f"🧍 {self.name} (fallback) independent, selected single task: {self.tasks[0]}")






    def score_output(self, output, prompt=None, verbose=False):
        """
        Score an LLM output based on:
          - A base score
          - Keyword bonus (using SCORE_BONUS_KEYWORDS)
          - Semantic similarity to the prompt
          - A random noise component
        Returns (final_score, details_dict).
        """
        mp = self.meta_parameters

        # Core weights
        base            = mp.get("score_base", 0.5)
        bonus_weight    = mp.get("score_bonus_weight", 0.05)
        semantic_weight = mp.get("score_semantic_weight", 0.2)
        noise_min       = mp.get("score_random_min", 0.05)
        noise_max       = mp.get("score_random_max", 0.15)

        # 1) Keyword bonus
        bonus = sum(term in output.lower() for term in SCORE_BONUS_KEYWORDS) * bonus_weight

        # 2) Semantic similarity
        semantic_score = 0.0
        if prompt:
            try:
                vec_out = embedding_model.encode(output)     # OK

                # ✅ Ensure vec_out is a numpy array
                if vec_out is None:
                    vec_out = np.zeros(EMB_DIM)
                elif isinstance(vec_out, list):
                    vec_out = np.array(vec_out)

                vec_prompt = get_prompt_embedding(prompt)  # ✅ Already returns numpy array   # OK
                semantic_score = cosine_similarity([vec_out], [vec_prompt])[0][0]

            except Exception as e:
                if verbose:
                    print(f"⚠️ Semantic scoring failed: {e}")
                semantic_score = 0.0

        # 3) Random noise
        noise = random.uniform(noise_min, noise_max)

        # 4) Final aggregated score
        final_score = min(base + bonus + semantic_weight * semantic_score + noise, 1.0)
        final_score = round(final_score, 3)

        if verbose:
            snippet = (output[:75] + '…') if len(output) > 75 else output
            print(f"📝 Scoring output: \"{snippet}\"")
            print(f"   base={base}, bonus={bonus:.3f}, sem*wt={semantic_weight*semantic_score:.3f}, noise={noise:.3f} → final={final_score}")

        # Optionally return details for debugging
        details = {
            "base": base,
            "bonus": bonus,
            "semantic_score": semantic_score,
            "semantic_weight": semantic_weight,
            "noise": noise
        }
        return final_score, details



    def cluster_output(self, output_text, task=None, prompt_key=None, score=None, verbose=False):
        vec = embedding_model.encode(output_text)
        if vec is None:
            return None

        vec = np.array(vec) if isinstance(vec, list) else vec

        # Compare with existing clusters
        for cluster in self.output_clusters:
            sim = cosine_similarity([vec], [cluster["centroid"]])[0][0]
            if sim >= 0.85:
                cluster["members"].append(output_text)
                cluster["usage_count"] += 1

                if score is not None:
                    if "best_score" not in cluster or score > cluster["best_score"]:
                        cluster["best_score"] = score
                        cluster["best_output"] = output_text

                if verbose:
                    print(f"🔁 Output clustered with existing cluster (sim={sim:.2f})")
                return cluster

        # Create new cluster
        self.output_clusters.append({
            "centroid": vec,
            "members": [output_text],
            "prompt_key": prompt_key,
            "task": task,
            "usage_count": 1,
            "best_score": score,
            "best_output": output_text
        })

        if verbose:
            print("🆕 New output cluster created.")
        return self.output_clusters[-1]








    def get_local_round_scenario_mode(interval_score_log):
        """
        Return the most frequent (mode) scenario label for the current local round.
        Normalizes causal suffixes (e.g., “(Causal)”).
        """
        scenarios = [normalize_foresight_key(entry["scenario"]) for entry in interval_score_log if "scenario" in entry]
        if not scenarios:
            return ""
        counter = Counter(scenarios)
        mode_scenario, _ = counter.most_common(1)[0]
        return mode_scenario






    def update_short_term_meta_parameters(
        self,
        foresight_signal=None,
        avg_efficiency_short_term=None,
        avg_meaningfulness_short_term=None,
        attention_score=None,
        verbose=True
    ):
        """
        Update short-term meta-parameters (e.g., cooperation overrides, strategy/tactics fit, dropout prob) at the end of each local round or interval.
        Uses adaptive learning rate (same function as long-term), but with local round or interval stats.
        Update short-term meta-parameters (e.g., cooperation overrides, strategy/tactics fit, dropout prob) at the end of each local round or interval.
        Uses adaptive learning rate (same function as long-term), but with local round or interval stats.
        Update short-term meta-parameters (e.g., cooperation overrides, strategy/tactics fit) at the end of each local round or interval.
        Uses adaptive learning rate (same function as long-term), but with local round or interval stats.
            Uses:
                  - foresight_signal/scenario (for direction & scale)
                  - avg_efficiency_short_term (average across local round)
                  - avg_meaningfulness_short_term (average across local round)
                  - dedicated learning rates
            Physiology-based adaptation here but weights in long-term meta parameters update method.
            Short-term (interval/local round) update for meta-parameters that may require fast adaptation.
            This includes a temporary override or nudge for cooperation-related meta-parameters.
        """
        # Use short-term averages or fallback
        avg_eff = avg_efficiency_short_term if avg_efficiency_short_term is not None else 0.5
        avg_mean = avg_meaningfulness_short_term if avg_meaningfulness_short_term is not None else 0.5
        short_term_success = avg_mean > 0.6 and avg_eff > 0.6

        # Compute adaptive learning rate (local round/interval scope)
        learning_rate = self.compute_learning_rate(avg_eff, avg_mean)

        # --- Scenario-driven deltas (for local short-term adaptation) ---

        if interval_score_log is not None:
            scenario = get_local_round_scenario_mode(interval_score_log)
        else:
            scenario_raw = (foresight_signal or {}).get("scenario", "")
            scenario = normalize_foresight_key(scenario_raw)


        scenario_update_map = {
            "✅ Convergent Paths":         (+1.0, +1.0,  +1.0),
            "🧭 White Swan":               (+0.8, +0.8, +1.0),
            "🌫️ Grey Swan":                (+0.2, +0.5, +0.6),
            "🤝 Grey Rhino":               (-0.4, -0.2, +0.6),
            "🪂 Tipping Point":            (-0.6, -0.8, -0.6),
            "🧩 Cascading Discontinuity":  (-1.6, -1.6, -0.8),
            "🐉 Wild Card":                (-1.0, -1.2, -1.0),
            "🕳️ Black Swan":               (-2.0, -2.0, -1.4),
        }
        delta_c, delta_f, delta_coop = scenario_update_map.get(scenario, (0.0, 0.0, 0.0))

        uvr_similarity = (foresight_signal or {}).get("uvr_similarity", 0.0)

        uvr_output_var = (foresight_signal or {}).get("output_similarity_variance", 0.0)


        # --- Meta-learn UVR weights (w1–w5) based on signal variance
        uvr_weight_keys = [
            ("uvr_weight_prompt",  uvr_prompt_var),
            ("uvr_weight_output",  uvr_output_var),
            ("uvr_weight_graph",   0.0),  # Keep stable unless you add graph var later
            ("uvr_weight_path",    uvr_path_var),
            ("uvr_weight_physio",  0.0),  # Physiological UVR variance not tracked (yet)
        ]

        for key, signal_var in uvr_weight_keys:
            current = self.meta_parameters.get(key, 0.2)
            delta = learning_rate * signal_var  # ⬅️ grow with instability
            new_weight = np.clip(current + delta, 0.05, 0.5)  # keep it bounded
            self.meta_parameters[key] = new_weight


        # --- UVR Volatility-Based Adjustments ---
        uvr_volatility = uvr_prompt_var + uvr_output_var + uvr_path_var

        if uvr_volatility > 0.1:  # 🧠 You can meta-learn this threshold later
            self.meta_parameters["dropout_base_probability"] = min(
                self.meta_parameters.get("dropout_base_probability", 0.3) + 0.01, 0.5
            )
            self.meta_parameters["strategy_fit"] = max(
                self.meta_parameters.get("strategy_fit", 0.5) - 0.05, 0.0
            )
            self.meta_parameters["cooperation_bias"] = max(
                self.meta_parameters.get("cooperation_bias", 0.5) - 0.05, 0.0
            )
            if verbose:
                print(f"⚠️ {self.name} UVR volatility adjustment triggered — increased dropout, reduced fit/cooperation")



        # --- Retrieve current values & bounds ---
        coop_bias = self.meta_parameters.get("cooperation_bias", 0.5)
        coop_baseline = self.meta_parameters.get("cooperation_baseline", 0.5)
        rng_lo, rng_hi = self.meta_parameters.get("cooperation_randomness_range", (0.05, 0.3))
        coop_baseline_min, coop_baseline_max = self.meta_parameters.get("cooperation_baseline_range", (0.2, 0.8))
        rng_lo_min, rng_lo_max = 0.01, 0.2
        rng_hi_min, rng_hi_max = 0.05, 0.6
        coupling = self.meta_parameters.get("task_feature_coupling", 0.5)
        fit = self.meta_parameters.get("strategy_fit", 0.5)

        # --- Scenario-driven updates for meta-parameters ---
        # 1. task_feature_coupling (scenario + local performance)
        new_coupling = coupling + learning_rate * delta_c
        new_coupling += learning_rate * (avg_eff - 0.5) * 0.2
        self.meta_parameters["task_feature_coupling"] = np.clip(new_coupling, 0.0, 1.0)

        # 2. strategy_fit (scenario + local performance)
        new_fit = fit + learning_rate * delta_f
        new_fit += learning_rate * (avg_mean - 0.5) * 0.2
        self.meta_parameters["strategy_fit"] = np.clip(new_fit, 0.0, 1.0)

        # 3. cooperation_bias (scenario + short-term success)
        delta_coop_st = 0.05 if short_term_success else -0.05
        coop_bias = np.clip(coop_bias + learning_rate * (delta_coop + delta_coop_st), 0.0, 1.0)
        self.meta_parameters["cooperation_bias"] = coop_bias

        # 4. cooperation_baseline (scenario + nudge for Convergent Paths)
        if scenario.startswith("✅ Convergent Paths"):
            coop_baseline = min(coop_baseline + learning_rate * 0.01, coop_baseline_max)
        else:
            coop_baseline = max(coop_baseline - learning_rate * 0.01, coop_baseline_min)
        coop_baseline = np.clip(coop_baseline + learning_rate * delta_coop * 0.005, coop_baseline_min, coop_baseline_max)
        self.meta_parameters["cooperation_baseline"] = coop_baseline

        # 5. cooperation_randomness_range (scenario nudge)
        rng_lo = np.clip(rng_lo + learning_rate * delta_f * 0.01, rng_lo_min, rng_lo_max)
        rng_hi = np.clip(rng_hi + learning_rate * delta_f * 0.01, rng_hi_min, rng_hi_max)
        self.meta_parameters["cooperation_randomness_range"] = (rng_lo, rng_hi)

        # --- Attention-driven Dropout probability (short-term) ---
        dropout_min, dropout_max = self.meta_parameters.get("dropout_base_prob_range", (0.1, 0.5))
        current_dropout = self.meta_parameters.get("dropout_base_probability", 0.3)
        attn = 0.5 if attention_score is None else attention_score

        if attn < 0.4:
            self.meta_parameters["dropout_base_probability"] = min(current_dropout + 0.005, dropout_max)
        else:
            self.meta_parameters["dropout_base_probability"] = max(current_dropout - 0.005, dropout_min)

        if verbose:
            print(
                f"⏩ [Short-term META] "
                f"task_feature_coupling={self.meta_parameters['task_feature_coupling']:.3f}, "
                f"strategy_fit={self.meta_parameters['strategy_fit']:.3f}, "
                f"cooperation_bias={self.meta_parameters['cooperation_bias']:.3f}, "
                f"cooperation_baseline={self.meta_parameters['cooperation_baseline']:.3f}, "
                f"cooperation_randomness_range={self.meta_parameters['cooperation_randomness_range']}, "
                f"dropout_base_probability={self.meta_parameters['dropout_base_probability']:.3f}"
            )
            print(
                f"🔧 [META SUMMARY] {self.name} | "
                f"Coupling={self.meta_parameters['task_feature_coupling']:.3f}, "
                f"Strategy Fit={self.meta_parameters['strategy_fit']:.3f}"
            )
            print(
                f"🔧 [META PARAMS] {self.name} | "
                f"CoopBias={self.meta_parameters['cooperation_bias']:.3f}, "
                f"CoopBase={self.meta_parameters['cooperation_baseline']:.3f}, "
                f"CoopRng={self.meta_parameters['cooperation_randomness_range']}"
            )




    def retrieve_from_memory(self, prompt, top_k=3):
        """
        Retrieval‐augmented generation: embed the prompt, find the top_k most similar
        past memory entries, and concatenate their outputs as context.
        """
        # 1) Embed the current prompt
        p_vec = get_prompt_embedding(prompt)
        # 2) Compute similarity against your short_memory entries
        sims = []
        for entry in self.short_memory:
            # assume each entry already has an "embedding" field
            m_vec = entry.get("embedding")
            if m_vec is None:
                continue
            score = cosine_similarity([p_vec], [m_vec])[0][0]
            sims.append((score, entry["output"]))
        # 3) Sort & take top_k
        sims.sort(key=lambda x: x[0], reverse=True)
        top_snippets = [out for _, out in sims[:top_k]]
        # 4) Build a single block of context
        return "\n".join(top_snippets)




    # =======================
    # 7.4. **Role and Task Assignment**
    # =======================

    def _compute_dynamic_temperature(self, global_round: int) -> float:
        """
        Start from base temperature, then add a bit each round
        so later rounds get more creative.
        """
        base = self.meta_parameters.get("jollycard_sampling_temperature", 1.0)
        growth = self.meta_parameters.get("temperature_growth_per_round", 0.1)
        # only start growing once we actually inject (round ≥2)
        rounds_since = max(0, global_round - 2)
        return base + rounds_since * growth

    def assign_metrics_from_prompt(self, prompt, top_k=3, verbose=True):
        """
        Assign metrics using embedding similarity, adaptive threshold, top-k, and fallback,
        fully symmetric with role/task selection logic.
        Current Mechanism: Embedding Similarity + Adaptive Threshold + Top-K:
          - Sensitive to prompt phrasing: a slightly different prompt might lead to very different metrics, regardless of prior performance.
        With Meta-Weights (each metric also has a meta-learned weight, e.g. stored in self.meta_metric_weights[metric]):
          - If meta-weights are updated gradually, the system will tend to keep the best-performing metrics but can still “explore” less-used ones via initial neutral weights and learning rates.

        """
        if not prompt.strip():
            # Fallback if prompt is blank or error
            default_metric = random.choice(list(METRIC_EMBEDDINGS.keys()))
            self.metrics = [default_metric]
            if verbose:
                print(f"⚠️ {self.name} fallback metric (blank prompt): {default_metric}")
            return

        # Compute prompt embedding and similarities to all metrics
        prompt_emb = self.embedding_model.encode(prompt)
        similarities = {
            metric: float(np.dot(prompt_emb, METRIC_EMBEDDINGS[metric]))
            for metric in METRIC_EMBEDDINGS
        }

        # Use adaptive threshold for metric selection
        threshold = self.meta_parameters.get("metric_similarity_threshold", 0.4)
        candidates = [(m, sim) for m, sim in similarities.items() if sim >= threshold]
        candidates.sort(key=lambda x: x[1], reverse=True)
        selected = [m for m, _ in candidates[:top_k]]

        # Fallback: if none above threshold, pick the single best
        if not selected:
            best = max(similarities, key=similarities.get)
            selected = [best]
            if verbose:
                print(f"⚠️ {self.name} no metrics passed threshold; fallback to best: {best}")

        if verbose:
            print(f"🎯 {self.name} assigned metrics from prompt: {selected}")

        self.metrics = selected
        if hasattr(self, 'metrics_history'):
            self.metrics_history.append(selected)



    def assign_roles_from_prompt(self, prompt, top_k=3, global_round=1, verbose=True):
        """
        Assign one or more roles based on semantic similarity to role examples.
        If agent.born_roles is non-empty, we seed from those roles as usual.
        If born_roles is empty, pick one random role from among the top_k semantically relevant ones.
        """
        # 1) Blank / error
        if not prompt.strip() or "Gemini Error" in prompt:
            self.roles = [random.choice(POSSIBLE_ROLES)]
            if verbose:
                print(f"⚠️ {self.name} fallback role (blank/error): {self.roles}")
            return

        # 2) Compute prompt embedding + similarities
        prompt_vec = get_prompt_embedding(prompt)
        sims = {
            role: cosine_similarity([prompt_vec], [vec])[0][0]
            for role, vec in ROLE_EMBEDDINGS.items()
        }

        # 3) Dynamic threshold
        length = len(prompt.split())
        threshold = 0.5 if length <= 6 else 0.65 if length <= 15 else 0.7

        # 4) Sort roles by similarity (descending)
        sorted_roles = sorted(sims.items(), key=lambda x: x[1], reverse=True)

        # ── DEBUG: show top_k candidates before applying threshold
        if verbose:
            top_k_candidates = [r for r, _ in sorted_roles[:top_k]]
            print(f"🔍 {self.name} top_{top_k} role candidates (pre‑threshold): {top_k_candidates}")

        # 5) Filter roles by threshold
        filtered = [r for r, sc in sorted_roles if sc >= threshold]
        if verbose:
            print(f"🔍 {self.name} meaningful roles (score ≥ {threshold}): {filtered}")

        # 6) Fallback if none survived threshold
        if not filtered:
            best = sorted_roles[0][0]
            filtered = [best]
            if verbose:
                print(f"⚠️ {self.name} assigning top role despite threshold: {best}")



        # 7) Now choose based on born_roles presence
        if self.born_roles:
            # Seed from born_roles → pick those in filtered
            seeded = [r for r in filtered if r in self.born_roles]
            if not seeded:
                # if none of the born_roles survived, just use filtered
                seeded = filtered
            # Cap by learned max_roles after round 2
            if global_round >= 3:
                max_r = self.meta_parameters.get("max_roles", len(seeded))
                self.roles = seeded[:max_r]
            else:
                self.roles = seeded[:1]
            if verbose:
                print(f"✅ {self.name} assigned roles from born_roles: {self.roles}")

        else:
            # born_roles empty → random pick among top_k filtered
            k = min(top_k, len(filtered))
            choice = random.choice(filtered[:k])
            self.roles = [choice]
            if verbose:
                print(f"🎲 {self.name} randomly assigned role from top_{k}: {self.roles}")






    def reseed_roles_from_prompt(
        self, mission_prompt, global_round=1, top_k=3,
        threshold=0.4, allow_fallback=True, verbose=True
    ):
        """
        Reassign roles semantically; respects meta‑learned max_roles.
        """
        self.global_round = global_round

        prompt_vec = get_prompt_embedding(mission_prompt)
        sims = {role: cosine_similarity([prompt_vec], [vec])[0][0]
                for role, vec in ROLE_EMBEDDINGS.items()}
        sorted_roles = sorted(sims.items(), key=lambda x: x[1], reverse=True)
        filtered = [r for r, s in sorted_roles if s >= threshold]

        if not filtered:
            if allow_fallback:
                print(f"⚠️ {self.name} reseeding failed — fallback triggered.")
                self.fallback_choose_role_tasks(POSSIBLE_ROLES, POSSIBLE_TASKS, verbose=verbose)
            else:
                print(f"❌ No matching roles found for {self.name}.")
            return

        if global_round >= 3:
            max_roles = self.meta_parameters.get("max_roles", 3)
            new_roles = filtered[:max_roles]
            if verbose:
                print(f"🎯 {self.name} applying max_roles={max_roles} (round {global_round})")
        else:
            new_roles = [filtered[0]]
            if verbose:
                print(f"⏳ {self.name} selecting only 1 role (round {global_round})")

        self.roles = list(dict.fromkeys(new_roles))
        if verbose:
            print(f"🔄 Roles reseeded for {self.name}: {self.roles}")

        allow_jollycard = global_round >= 2
        # self.assign_tasks_from_roles_multi_round(
        #    prompt=mission_prompt,
        #    global_round=global_round,
        #    top_k=2,
        #    threshold=threshold,
        #    allow_jollycard=allow_jollycard,
        #    verbose=verbose
        #)



    def get_prompt_relevant_tasks(self, prompt, top_k=2, threshold=None, verbose=False):
        """
        Return up to top_k tasks most semantically similar to the prompt.
        Uses TASK_EMBEDDINGS and prompt embedding cache.
        """
        threshold = threshold if threshold is not None else self.meta_parameters.get("task_similarity_threshold", 0.4)

        prompt_vec = get_prompt_embedding(prompt)
        similarities = {
            task: cosine_similarity([prompt_vec], [desc_vec])[0][0]
            for task, desc_vec in TASK_EMBEDDINGS.items()
        }

        candidates = [(task, sim) for task, sim in similarities.items() if sim >= threshold]
        candidates.sort(key=lambda x: x[1], reverse=True)
        selected = [t for t, _ in candidates[:top_k]]

        # Fallback if no task meets threshold
        if not selected:
            selected = [max(similarities.items(), key=lambda x: x[1])[0]]
            if verbose:
                print(f"⚠️ No prompt-relevant task passed threshold; fallback to best match: {selected[0]}")

        if verbose:
            print(f"🔍 Prompt-relevant tasks selected: {selected}")

        return selected


    def assign_tasks_from_roles_multi_round(
        self,
        prompt: str,
        global_round: int,
        allow_jollycard: bool = True,
        verbose: bool = False,
        top_k: Optional[int] = None
    ):
        """
        Assign tasks to this agent based on its roles, global_round, and prompt.
        1) Rounds 1–2: restrict to single (primary) role.
        2) Round ≥3: allow multi-role.
        3) Jollycard-task injection (exactly one) guaranteed for round ≥2.
        4) Jollycard-role injection (exactly one) guaranteed for round ≥3.
        5) Finally, dedupe & cap to max_tasks.
        """
        # ── 1) Role selection ─────────────────────────────────────
        if global_round < 3:
            primary = self.roles[0] if self.roles else None
            roles_to_use = [primary] if primary else []
            if verbose:
                print(f"⏳ Restricting to single role (global round {global_round}): {roles_to_use}")
        else:
            roles_to_use = list(self.roles)
            if verbose:
                print(f"🚀 Multi-role mode (round {global_round}): {roles_to_use}")

        # ── 2) Seed tasks from roles ─────────────────────────────
        if roles_to_use:
            # Determine how many tasks to take
            cap = top_k if top_k is not None else self.meta_parameters.get("max_tasks", 5)
            seeded = self.get_task_candidates_for_roles(
                roles=roles_to_use,
                prompt=prompt,
                top_k=cap
            )
            if verbose:
                print(f"🔍 {self.name} seeded tasks from store (top_{cap}): {seeded}")
            task_list = seeded
        else:
            task_list = []



        # ── 3) Jollycard-task injection (round ≥2) ──────────────────
        if allow_jollycard and global_round >= 2:
            temp = self._compute_dynamic_temperature(global_round)
            wc   = self.sample_jollycard_task(prompt, global_round, temperature=temp)
            if wc and wc not in task_list:
                task_list.append(wc)
                # persist it into the static pool for the primary role
                primary = roles_to_use[0]
                self.role_task_map.setdefault(primary, []).append(wc)
                if verbose:
                    print(f"🎲 {self.name} injected jollycard task '{wc}' (temp={temp}) "
                          f"into static pool for role '{primary}'")

        # ── 4) Jollycard-role injection (round ≥3) ──────────────────
        if allow_jollycard and global_round >= 3:
            temp = self._compute_dynamic_temperature(global_round)
            wr   = self.sample_jollycard_role(prompt, global_round, temperature=temp)
            if wr and wr not in self.born_roles:
                self.born_roles.append(wr)
                if verbose:
                    print(f"🎲 {self.name} injected jollycard role '{wr}' (temp={temp}) "
                          f"into born_roles")

        # ── 5) Deduplicate & cap to max_tasks ───────────────────
        max_t = self.meta_parameters.get("max_tasks", 5)
        # dict.fromkeys preserves order and removes duplicates
        self.tasks = list(dict.fromkeys(task_list))[:max_t]

        if verbose:
            print(f"🧠 {self.name} assigned tasks: {self.tasks}")



    def fallback_choose_tasks(self, possible_tasks=None, verbose=True):
        """
        Fallback task assignment using only current roles, optionally constrained by a task list.
        Used when the prompt is blank, invalid, or semantically meaningless.
        """
        selected = []
        for role in getattr(self, "roles", []):
            selected += self.get_task_candidates_for_roles([role], prompt=None, top_k=None)


        # Filter by allowed task set if provided
        if possible_tasks is not None:
            selected = [t for t in selected if t in possible_tasks]

        # Deduplicate
        self.tasks = list(dict.fromkeys(selected))

        # Fallback to a random task if empty
        if not self.tasks:
            fallback_pool = possible_tasks if possible_tasks else list(TASK_DESCRIPTIONS.keys())
            self.tasks = [random.choice(fallback_pool)]
            if verbose:
                print(f"⚠️ {self.name} had no tasks after fallback. Randomly assigned: {self.tasks[0]}")
        else:
            if verbose:
                print(f"⚙️ {self.name} fallback-assigned tasks: {self.tasks}")


    def get_tasks_for_roles(self, roles=None, verbose=False):
        """
        Return a deduplicated list of tasks associated with the given role(s).
        If roles is None, defaults to self.roles. Caps the number of tasks using max_tasks.
        """
        if roles is None:
            roles = self.roles

        if not roles:
            if verbose:
                print(f"⚠️ {self.name} received empty role list. No tasks returned.")
            return []

        if not isinstance(roles, list):
            roles = [roles]

        all_tasks = []
        for role in roles:
            tasks = self.get_task_candidates_for_roles([role], prompt=None, top_k=None)

            all_tasks.extend(tasks)
            if verbose:
                print(f"🔍 {self.name} tasks for role '{role}': {tasks if tasks else '[none]'}")

        unique_tasks = list(dict.fromkeys(all_tasks))  # preserves order, removes duplicates

        # Cap number of tasks using meta-parameter (default fallback = 5)
        max_tasks = self.meta_parameters.get("max_tasks", 5)
        capped_tasks = unique_tasks[:max_tasks]

        if verbose:
            print(f"✅ {self.name} aggregated tasks (capped to {max_tasks}): {capped_tasks}")

        return capped_tasks

    def get_task_candidates_for_roles(self, roles, prompt=None, top_k=None):
        """
        Return up to top_k task keys from self.task_store that map to any of the given roles,
        sorted by last_score (descending). If prompt is provided, you could also re-rank by
        semantic similarity to prompt via TASK_EMBEDDINGS.
        """
        # 1) Filter store entries whose key is in ROLE_TASK_MAP for any of the roles
        candidates = [
            entry for entry in self.task_store.values()
            if any(entry["key"] in self.role_task_map.get(role, []) for role in roles)
        ]

        # 2) Optionally re-rank by semantic similarity to prompt
        if prompt is not None:
            p_vec = get_prompt_embedding(prompt)
            sims = {
                entry["key"]: cosine_similarity([p_vec], [entry["embedding"]])[0][0]
                for entry in candidates
            }
            # merge score + similarity (you can weight these as you like)
            for entry in candidates:
                entry["combined_score"] = 0.7 * entry["last_score"] + 0.3 * sims[entry["key"]]
            candidates.sort(key=lambda e: e["combined_score"], reverse=True)
        else:
            # 3) Otherwise sort by last_score
            candidates.sort(key=lambda e: e["last_score"], reverse=True)

        # 4) Cap to top_k (or to self.meta_parameters["max_tasks"])
        cap = top_k or self.meta_parameters.get("max_tasks", len(candidates))

        return [e["key"] for e in candidates[:cap]]


    # =======================
    # 7.5. **Role Re-evaluation & Adjustment**
    # =======================

    def reevaluate_roles(self, prompt, efficiency_threshold=0.55, verbose=True):
        """
        Reevaluates roles based on strategy fit and meta-learned adjustments.
        If the agent’s current strategy fit is low, reduce to the most similar role.
        """
        if not hasattr(self, "roles") or not isinstance(self.roles, list):
            self.roles = []

        # Use the meta-learned strategy fit value instead of a static threshold
        current_strategy_fit = self.meta_parameters.get("strategy_fit", 0.5)

        if current_strategy_fit < efficiency_threshold and len(self.roles) > 1:
            # Use semantic similarity to reevaluate and select the most relevant role
            prompt_vec = get_prompt_embedding(prompt)
            similarities = {
                role: cosine_similarity([prompt_vec], [ROLE_EMBEDDINGS[role]])[0][0]
                for role in self.roles
            }
            # Select the most semantically relevant role based on similarity
            self.roles = [max(similarities.items(), key=lambda x: x[1])[0]]

            if verbose:
                print(f"⚠️ {self.name} dropped to single role based on strategy fit: {self.roles[0]}")
        else:
            if verbose:
                print(f"✅ {self.name} kept roles: {self.roles}")

    from typing import Optional


    def sample_jollycard_task(self, prompt: str, global_round: int, temperature: float) -> Optional[str]:
        if global_round < 2:
            return None

        existing = set(self.tasks)
        role     = (self.roles[0] if self.roles else "General")

        llm_prompt = (
            f"As the “{role}” on mission “{prompt}”,\n"
            "Please suggest exactly one additional task, compatible with that role,\n"
            f"that is NOT already in this list: {list(existing)}.\n"
            "Respond in the format:\n"
            "  key: <one-word-key>\n"
            "  desc: <detailed description>\n"
            "Just give me the key and desc lines."
        )

        raw = llm_generate(
            model_key=self.model_key,
            prompt=llm_prompt,
            temperature=temperature
        ).strip()

        # Parse into key and description
        parts = raw.split("desc:")
        if len(parts) == 2:
            key = parts[0].split("key:")[-1].strip()
            desc = parts[1].strip()
        else:
            # Fallback: assume the first word is key
            key = raw.split()[0]
            desc = raw

        # Only add new tasks to the global store
        if key not in self.task_store:
            emb = embedding_model.encode(desc)
            emb = emb / np.linalg.norm(emb) if np.linalg.norm(emb) > 0 else emb
            self.task_store[key] = {
                "key": key,
                "desc": desc,
                "embedding": emb,
                "last_score": 0.0
            }

        # Return the task key for assignment
        return key


    def sample_jollycard_role(self, prompt: str, global_round: int) -> Optional[str]:
        if global_round < 3:
            return None

        existing = set(self.born_roles)
        temperature= self._compute_dynamic_temperature(global_round)

        llm_prompt = (
            f"As the agent playing roles {self.roles} on mission:\n"
            f"  “{prompt}”\n"
            "Please suggest exactly ONE additional role, compatible\n"
            f"with these, that is NOT already in {list(existing)}.\n"
            "Just respond with the role name."
        )

        suggestion = llm_generate(
            model_key=self.model_key,
            prompt=llm_prompt,
            temperature=temperature
        ).strip()




        return suggestion if suggestion and suggestion not in existing else None



    # =======================
    # 7.6. Conflict Resolution
    # =======================

    def detect_and_resolve_role_task_conflict(self, prompt, verbose=True):
        """
        Detect roles whose expected tasks are missing and resolve conflicts
        by removing those roles and cleaning up tasks based on meta-learned parameters.
        Select jollycard tasks using meta-learned rules (importance threshold, injection weight, temperature).
        """
        conflicts = []

        # Check roles whose expected tasks are missing
        for role in self.roles:
            expected_tasks = self.get_task_candidates_for_roles([role], prompt=None, top_k=None)

            if not any(task in self.tasks for task in expected_tasks):
                conflicts.append(role)

        if conflicts:
            if verbose:
                print(f"⚠️ {self.name} detected role-task conflict(s): {conflicts}")

            # Remove conflicting roles
            self.roles = [r for r in self.roles if r not in conflicts]


            # Clean up tasks unrelated to remaining roles using the dynamic store
            valid_tasks = set()
            for role in self.roles:
                valid_tasks.update(
                    self.get_task_candidates_for_roles([role], prompt=None, top_k=None)
                )
            self.tasks = [t for t in self.tasks if t in valid_tasks]


            # Decide whether to inject jollycard based on meta-learned injection weight
            inject_prob = self.meta_parameters.get("jollycard_injection_weight", 0.5)

            # Jollycards only from global_round >= 2
            if hasattr(self, "global_round") and self.global_round >= 2:
                if random.random() < inject_prob:
                    jollycard_pool = [t for t in self.task_store.keys() if t not in valid_tasks]


                    if jollycard_pool:
                        importance_threshold = self.meta_parameters.get("jollycard_importance_threshold", 0.3)
                        sampling_temperature = self.meta_parameters.get("jollycard_sampling_temperature", 1.0)

                        scores = []
                        prompt_vec = get_prompt_embedding(prompt)

                        for task in jollycard_pool:
                            task_vec = TASK_EMBEDDINGS.get(task)
                            if task_vec is not None:
                                sim = cosine_similarity([prompt_vec], [task_vec])[0][0]
                            else:
                                sim = 0.0
                            scores.append((task, sim))

                        # Filter by importance threshold
                        filtered = [t for t, s in scores if s >= importance_threshold]
                        if not filtered:
                            filtered = [t for t, _ in scores]  # fallback to all

                        # Apply temperature sampling
                        weights = [s ** (1.0 / sampling_temperature) for t, s in scores if t in filtered]
                        total = sum(weights)
                        if total > 0:
                            probs = [w / total for w in weights]
                            chosen_task = random.choices(filtered, weights=probs, k=1)[0]
                        else:
                            chosen_task = random.choice(filtered)

                        self.tasks.append(chosen_task)

                        if verbose:
                            print(f"🎲 {self.name} added jollycard task (meta-learned): {chosen_task}")
                else:
                    if verbose:
                        print(f"🚫 {self.name} skipped jollycard injection this time.")

            # Reassign tasks based on updated roles  DONE DON T COMMENT OUT HRE!!!!!
            self.assign_tasks_from_roles_multi_round(prompt, global_round=getattr(self, "global_round", 1), allow_jollycard=True, verbose=verbose)

            if verbose:
                print(f"✅ {self.name} resolved role-task conflicts. Roles → {self.roles}, Tasks → {self.tasks}")

        else:
            if verbose:
                print(f"✅ {self.name} has no role-task conflicts.")

    # =======================
    # 7.7. **Resource Management and Pruning & Dropout**
    # =======================

    def compute_fatigue(self):
        """
        Compute fatigue as a function of usage count, number of tasks, and elapsed time.
        Uses tunable meta-parameters to weight each factor.
        Returns a bounded value in [0, 1].
        """
        time_elapsed = time.time() - self.start_time
        task_count = len(self.tasks)

        w_task = self.meta_parameters.get("fatigue_task_weight", 0.15)
        w_time = self.meta_parameters.get("fatigue_time_weight", 0.03)

        fatigue_input = self.usage_count + w_task * task_count + w_time * time_elapsed
        fatigue_score = sigmoid(fatigue_input)

        return round(fatigue_score, 3)

    def compute_hunger(self):
        """
        Compute hunger based solely on usage count (internal effort).
        Applies a sigmoid transformation to map hunger into [0, 1].
        """
        hunger_weight = self.meta_parameters.get("dropout_hunger_weight", 0.1)

        effort = self.usage_count
        hunger_score = sigmoid(hunger_weight * effort)

        return round(hunger_score, 3)

    def compute_attention(self):
        """
        Computes attention as a bounded function inversely affected by usage, time, fatigue, and hunger.
        Weights are meta-learned and adjustable.
        """
        time_elapsed = time.time() - self.start_time
        fatigue = self.compute_fatigue()
        hunger = self.compute_hunger()

        w_time = self.meta_parameters.get("dropout_time_weight", 0.05)
        w_fatigue = self.meta_parameters.get("dropout_fatigue_weight", 0.2)
        w_hunger = self.meta_parameters.get("dropout_hunger_weight", 0.2)

        denom = 1.0 + self.usage_count + w_time * time_elapsed + w_fatigue * fatigue + w_hunger * hunger
        attention = 1.0 / denom

        return round(attention, 3)


    def compute_base_dropout_probability(self):
        """
        Computes the base dropout probability using exponential decay based on interval progress.
        This serves as a prior for the adaptive dropout mechanism.
        """
        base_prob = self.meta_parameters.get("dropout_base_probability", 0.3)
        decay_rate = self.meta_parameters.get("dropout_decay_rate", 3.0)
        progress = self.interval / max(1, self.local_round)

        return base_prob * math.exp(-decay_rate * progress)

    def apply_dropout(self, prompt, verbose=True):
        """
        Purpose: Stochastically (probabilistically) drops roles/tasks to enforce exploration, regularization, or handle “overload” due to low attention/energy.
        Apply dropout to roles and tasks based on computed attention, fatigue, hunger
        and meta-learned dropout parameters. Roles and tasks may be temporarily
        deactivated. Returns (dropped_roles, kept_tasks_by_role).
        Apply dropout to roles and tasks based on computed attention, fatigue, hunger and meta-learned dropout parameters.
        Roles and tasks may be temporarily deactivated. When base_dropout_prob is sampled against random.random() (as a consequnce of being below the threshold) and
        the agent still has at least one role / task that qualifies, dropout is triggered.
        There is no quality score for drop‑out – it’s a stochastic off‑switch. At least one such role/task must exist; if the agent is already empty we skip.
        Roles are dropped deterministically when attention is too low, otherwise they are dropped probabilistically by adaptive_sample.
        Key Factors:
        Base dropout probability decays over the local round.
        Adaptive sampling: Modifies dropout probability using current attention, fatigue, and hunger through a meta-learned weighted sum, run through a sigmoid, then used as the probability for sampling.
        Threshold logic: Roles dropped deterministically if attention is below threshold, otherwise probabilistically.
        Each interval, you check (for each role/task): Should I drop this, based on a coin toss using this adjusted probability?
        """
        # 0) Snapshot original state for “static” pruning report
        original_roles = self.roles[:]
        original_tasks = self.tasks[:]

        # 1) Compute current physiology
        attention = self.compute_attention()
        fatigue   = self.compute_fatigue()
        hunger    = self.compute_hunger()

        # 2) Base dropout probability & threshold
        base_dropout_prob    = self.compute_base_dropout_probability()
        attention_threshold  = self.meta_parameters.get("attention_threshold", 0.3)

        # 3) Define adaptive sampling helper
        def adaptive_sample(base_prob, attention, fatigue, hunger):
            meta = self.meta_parameters
            w_att = meta.get("dropout_attention_weight", 0.1)
            w_fat = meta.get("dropout_fatigue_weight", 0.1)
            w_hun = meta.get("dropout_hunger_weight", 0.1)
            influence = (- w_att * attention +
                        w_fat * fatigue +
                        w_hun * hunger)
            from math import exp
            sigmoid = lambda x: 1 / (1 + exp(-x))
            adjusted_prob = base_prob * sigmoid(influence)
            return random.random() < adjusted_prob

        dropped_roles = []
        kept_tasks_by_role = {}

        # 4) Summary header (verbose)
        if verbose:
            print(f"🔧 [Dropout] {self.name}: "
                  f"attention={attention:.2f}, fatigue={fatigue:.2f}, hunger={hunger:.2f}")

        # 5) Drop roles
        for role in list(self.roles):
            if attention < attention_threshold or adaptive_sample(base_dropout_prob, attention, fatigue, hunger):
                dropped_roles.append(role)
        self.roles = [r for r in self.roles if r not in dropped_roles]

        # 6) Drop tasks per remaining role
        for role in self.roles:
            relevant = self.get_task_candidates_for_roles(
                roles=[role],
                prompt=prompt,                       # so you can blend last_score + similarity
                top_k=self.meta_parameters.get("max_tasks", 5)
            )
            kept = [t for t in relevant
                    if not adaptive_sample(base_dropout_prob, attention, fatigue, hunger)]
            if kept:
                kept_tasks_by_role[role] = kept

        # 7) Aggregate kept tasks
        self.tasks = [t for tasks in kept_tasks_by_role.values() for t in tasks]

        # 8) Resolve any role↔task conflicts, quietly
        self.detect_and_resolve_role_task_conflict(prompt, verbose=False)

        # 9) Compute “static” removals
        static_removed_roles = [r for r in original_roles if r not in self.roles]
        static_removed_tasks = [t for t in original_tasks if t not in self.tasks]

        # 10) Unified summary (verbose)
        if verbose:
            print(f"🧠 [Dropout Results] {self.name} → dropped roles: {dropped_roles or '—'}, remaining tasks: {self.tasks or '—'}")


        return dropped_roles, kept_tasks_by_role




    def compute_base_pruning_threshold(self):
        """
        Computes the decayed pruning threshold using exponential decay based on interval progress.
        Used as a dynamic threshold for pruning low-performing tasks.
        """
        base_threshold = self.meta_parameters.get("pruning_threshold", 0.4)
        decay_rate = self.meta_parameters.get("pruning_decay_rate", 3.0)
        progress = self.interval / max(1, self.local_round)

        return base_threshold * math.exp(-decay_rate * progress)


    def compute_metric_score(self, prompt, metric):
        prompt_embedding = get_prompt_embedding(prompt)
        metric_embedding = get_metric_embedding(metric)
        score = cosine_similarity([prompt_embedding], [metric_embedding])[0][0]
        return score


    def _compute_pruning(self, prompt):
        # We prune only tasks (but there is reactivation), not metrics.

        threshold = self.compute_base_pruning_threshold()
        pruned_roles = []
        pruned_tasks = []



        # Example: prune any born_role whose task pool all scores below threshold
        for role, tasks in self.role_task_map.items():
            scores = [ self.compute_task_score(prompt, t) for t in tasks ]
            if scores and max(scores) < threshold:
                pruned_roles.append(role)

        # Example: prune tasks across all roles below threshold
        for task in set(t for tasks in self.role_task_map.values() for t in tasks):
            score = self.compute_task_score(prompt, task)
            if score < threshold:
                pruned_tasks.append(task)




        return pruned_roles, pruned_tasks


    def apply_pruning(self, prompt, verbose=True):
        """
        Purpose: Deterministically (by score) removes least-efficient tasks/roles, but allows for random reactivation (controlled randomness).
        Apply dynamic pruning + reactivation, then bulk‐prune via _compute_pruning,
        and purge those roles/tasks from the static born_roles and role_task_map.
        Logs per‐task removals; only logs a bulk summary if no per‐task logs occurred.
        Key Factors:
        Base pruning threshold decays over the local round.
        Physiology only comes in for reactivation probability, not for the main pruning event.
        Main pruning: If task’s average score < threshold, it is a pruning candidate.
        Reactivation: Instead of pruning, the task might get a random “second chance,” with probability modulated by current attention, fatigue, and hunger.
        You don’t need reactivated_roles explicitly for now.
        Reactivated tasks can restore roles implicitly.
        Metrics are not pruned.
        """
        reactivated_tasks = []
        locally_pruned = []
        pruned_tasks    = []
        pruned_roles = []
        reactivated_roles = []  # Placeholder for symmetry/future use if needed

        # ── Not enough history yet? ──────────────────────────────────────────
        if len(self.short_memory) < 3:
            return []

        # ── 1) Compute threshold & physiology ───────────────────────────────
        threshold = self.compute_base_pruning_threshold()
        attention = self.compute_attention()
        fatigue   = self.compute_fatigue()
        hunger    = self.compute_hunger()
        base_prob = self.meta_parameters["pruning_reactivation_prob"]

        # ── 2) Adaptive sampling helper ─────────────────────────────────────
        def adaptive_sample(bp, att, fat, hun):
            meta      = self.meta_parameters
            w_attn    = meta["pruning_attention_weight"]
            w_fatigue = meta["pruning_fatigue_weight"]
            w_hunger  = meta["pruning_hunger_weight"]
            influence = (-w_attn * att) + (w_fatigue * fat) + (w_hunger * hun)
            from math import exp
            sigmoid = lambda x: 1 / (1 + exp(-x))
            adj_prob = bp * sigmoid(influence)
            return random.random() < adj_prob, adj_prob

        # ── 3) Per‐task pruning logs ────────────────────────────────────────
        removed_tasks = []
        per_task_logged = False

        for task in list(self.tasks):
            if self._prune_condition(task):  # or your own should_prune(task) logic
                removed_tasks.append(task)
                self.tasks.remove(task)
                if verbose:
                    print(f"🗑️ [Prune] {self.name} removed task: {task}")
                    per_task_logged = True

        # ── 4) Score‐based pruning + reactivation ───────────────────────────
        locally_pruned = []
        for task in list(self.tasks):
            recent = [m for m in self.short_memory if m.get("task") == task]
            avg_score = sum(m.get("score", 0) for m in recent[-3:]) / max(1, len(recent[-3:]))
            if avg_score < threshold:
                reactivate, p = adaptive_sample(base_prob, attention, fatigue, hunger)
                if verbose:
                    if reactivate:
                        print(f"♻️ {self.name} reactivated '{task}' (p={p:.3f})")
                        reactivated_tasks.append(task)
                        # ✅ Restore role if this reactivated task belonged to a pruned role
                        for role, role_tasks in ROLE_TASK_MAP.items():
                            if task in role_tasks and role not in self.roles and role in self.born_roles:
                                self.roles.append(role)
                                if verbose:
                                    print(f"🎭 {self.name} re-added role '{role}' due to reactivated task '{task}'")
                    else:
                        print(f"✂️ {self.name} pruned '{task}' (avg={avg_score:.2f}<thr={threshold:.2f})")
                if not reactivate:
                    locally_pruned.append(task)



        # remove those pruned by score
        self.tasks = [t for t in self.tasks if t not in locally_pruned + pruned_tasks]


        for key in locally_pruned + pruned_tasks:
            self.task_store.pop(key, None)

        # resolve any role/task conflicts silently
        self.detect_and_resolve_role_task_conflict(prompt, verbose=False)

        # ── 5) Bulk pruning hook ────────────────────────────────────────────
        pruned_roles, pruned_tasks = self._compute_pruning(prompt)


        # Only print a bulk summary if no per‐task logs occurred
        if verbose and not per_task_logged and (pruned_roles or pruned_tasks):
            print(f"🗑️ Bulk pruning → roles: {pruned_roles}, tasks: {pruned_tasks}")

        # ── 6) Update static definitions & purge_log ────────────────────────
        before_roles = set(self.born_roles)
        before_tasks = {r: ts[:] for r, ts in self.role_task_map.items()}
        before_metrics = set(self.metrics)

        # remove roles
        for r in pruned_roles:
            if r in self.born_roles:
                self.born_roles.remove(r)
            self.role_task_map.pop(r, None)

        # remove tasks from all roles
        for t in pruned_tasks:
            for ts in self.role_task_map.values():
                if t in ts:
                    ts.remove(t)



        # record the diff
        after_roles = set(self.born_roles)
        after_tasks = {r: ts[:] for r, ts in self.role_task_map.items()}
        after_metrics = set(self.metrics)


        all_before_tasks = set(self.born_tasks)
        all_after_tasks  = {t for task_list in after_tasks.values() for t in task_list}

        self.purge_log.append({
            "global_round":      self.runner.global_round + 1,
            "removed_roles":     sorted(set(self.born_roles) - after_roles),
            "retained_roles":    sorted(after_roles),
            "removed_tasks":     sorted(all_before_tasks - all_after_tasks),
            "retained_tasks":    sorted(all_after_tasks),
            "retained_metrics":  sorted(after_metrics),
            "reactivated_tasks": sorted(reactivated_tasks)
        })


        # ── 7) Final static-purge summary ───────────────────────────────────
        if verbose:
            removed_r = sorted(before_roles - after_roles)
            retained_r = sorted(after_roles)
            removed_t = sorted(all_before - all_after)
            retained_t = sorted(all_after)
            retained_m = sorted(after_metrics)
            print(f"   ↪️ Metrics retained: {retained_m}")
            print(f"   ↪️ Static roles removed: {removed_r or '–'}")
            print(f"   ↪️ Static roles retained: {retained_r}")
            print(f"   ↪️ Static tasks removed: {removed_t or '–'}")
            print(f"   ↪️ Static tasks retained: {retained_t}")
            print(f"   ♻️ Reactivated tasks: {reactivated_tasks or '–'}")

        return removed_tasks






    # =======================
    # 7.8. **Memory & Redundancy Management**
    # =======================

    def trigger_memory_based_recovery(self, verbose=True):
        """
        Re‑initialize roles and tasks from memory (most‑similar strategy) and
        reset the stagnation counter to escape a stagnant state.
        """
        if verbose:
            print(f"🧠 {self.name} triggering memory‑based recovery.")
        self.initialize_from_memory(strategy="most_similar", verbose=verbose)
        self.stagnation_counter = 0



    def is_redundant_content(self, task, content, memory, threshold=0.85):
        """
        Check if content is redundant with past outputs for the same task.
        Returns (True, similarity) if redundancy is detected.
        """
        try:
            current_vec = embedding_model.encode(content)    # OK

            # ✅ Normalize current_vec
            if current_vec is None:
                current_vec = np.zeros(384)
            elif isinstance(current_vec, list):
                current_vec = np.array(current_vec)

            for entry in memory:
                if entry.get("task") == task:
                    past_vec = embedding_model.encode(entry.get("output"))   # OK

                    # ✅ Normalize past_vec
                    if past_vec is None:
                        past_vec = np.zeros(384)
                    elif isinstance(past_vec, list):
                        past_vec = np.array(past_vec)

                    similarity = cosine_similarity([current_vec], [past_vec])[0][0]
                    if similarity >= threshold:
                        print(f"♻️ {self.name} found redundant output (sim={similarity:.2f})")
                        return True, similarity

        except Exception as e:
            print(f"⚠️ Redundancy check failed: {e}")
            return False, 0.0

        return False, 0.0


    def has_already_done(self, task, prompt=None, content=None, verbose=False):
        """
        Check if this task/prompt/content pair has already been executed and stored.
        Returns True if found in short_memory.
        """
        # 1) By exact prompt match
        if prompt:
            for item in self.short_memory:
                if isinstance(item, dict) and item.get("prompt") == prompt:
                    if task in item.get("tasks", []):
                        if verbose:
                            print(f"♻️ {self.name} has already done task '{task}' for this prompt.")
                        return True

        # 2) By exact content match
        if content:
            for item in self.short_memory:
                if isinstance(item, dict) and item.get("output") == content:
                    if task in item.get("tasks", []):
                        if verbose:
                            print(f"♻️ {self.name} has already produced this content for task '{task}'.")
                        return True

        return False

    def remember_task_output(self, task, output, prompt=None):
        """
        Store the output of a specific task in short‐term memory,
        avoiding duplicates based on prompt or content.
        """
        # Only remember if not already done
        if not self.has_already_done(task, prompt=prompt, content=output):
            # Score the output for storage
            score, _ = self.score_output(output, prompt=prompt)
            self.short_memory.append({
                "task": task,
                "output": output,
                "prompt": prompt,
                "prompt_key": self.generate_prompt_key(prompt) if prompt else None,
                "timestamp": time.time(),
                "roles": list(self.roles),
                "features": list(self.features),
                "score": score,
                "cooperation": self.in_cooperation
            })

    def generate_prompt_key(self, prompt):
        """
        Generate a normalized prompt key for memory lookup.
        Used for redundancy filtering and external DB.
        """
        return prompt.strip().lower().replace(" ", "").replace("\n", "")


    # =======================
    # 7.9. Cooperation & Sharing
    # =======================

    def maybe_cooperate(self, verbose=True):
        """
        Decide whether to enter cooperation mode based on attention, fatigue, and hunger.
        Uses a sigmoid-influenced probability around a meta-learned baseline.
        """
        attention = self.compute_attention()
        fatigue = self.compute_fatigue()
        hunger = self.compute_hunger()

        meta = self.meta_parameters
        w_attention = meta.get("cooperation_attention_weight", 0.2)
        w_fatigue   = meta.get("cooperation_fatigue_weight", 0.2)
        w_hunger    = meta.get("cooperation_hunger_weight", 0.2)

        influence = (-w_attention * attention) + (w_fatigue * fatigue) + (w_hunger * hunger)

        from math import exp
        sigmoid = lambda x: 1 / (1 + exp(-x))

        baseline       = meta.get("cooperation_baseline", 0.5)
        coop_randomness = random.uniform(*meta.get("cooperation_randomness_range", (0.05, 0.3)))

        coop_chance = min(
            max(baseline * sigmoid(influence)
                + random.uniform(-coop_randomness, coop_randomness),
                0.0),
            1.0
        )
        self.in_cooperation = random.random() < coop_chance

        if verbose:
            state = "🫂 joined" if self.in_cooperation else "🧍 stayed independent"
            print(f"🔁 {self.name} {state} (coop chance = {coop_chance:.2f})")



    # =======================
    # 7.10. Feature Selection & Capabilities
    # =======================

    def select_features(self, prompt, embedding_model=None, verbose=True):
        """
        Assign features based on semantic similarity between the prompt and cached feature archetype embeddings.
        The 'inline_refinement' feature is triggered adaptively based on physiology and meta-learned weights.
        """
        if embedding_model is None:
            embedding_model = self.embedding_model  # fallback or global

        # --- Compute embedding for the prompt ---
        prompt_emb = embedding_model.encode(prompt)
        if prompt_emb is None:
            prompt_emb = np.zeros(384)  # adjust dimension if necessary

        # --- Compute similarities to cached archetype embeddings ---
        feature_scores = {}
        for feature, ref_emb in FEATURE_EMBEDDINGS.items():
            score = np.dot(prompt_emb, ref_emb) / (np.linalg.norm(prompt_emb) * np.linalg.norm(ref_emb) + 1e-8)
            feature_scores[feature] = score

        # --- Select features above threshold ---
        threshold = self.meta_parameters.get("task_similarity_threshold", 0.4)
        self.features = [f for f, s in feature_scores.items() if s >= threshold]
        self.features = list(set(self.features))  # Deduplicate


        # 🎲 Optional: Role-level entropy (exploration of latent roles)
        if phase in ("inflection", "post-inflection"):
            latent_roles = [r for r in POSSIBLE_ROLES if r not in self.roles]
            if latent_roles:
                import random
                new_role = random.choice(latent_roles)
                self.roles.append(new_role)
                if verbose:
                    print(f"🎲 {self.name} exploring latent role: {new_role}")


        # ── STEP 3: Exploration vs Exploitation Tuning ──────────────────────────────
        phase = self.variance_history.get("phase", "stable")

        if phase in ("inflection", "post-inflection"):
            self.meta_parameters["task_sampling_temperature"] = 1.2  # Encourage exploration
            self.meta_parameters["top_k_tasks"] = min(
                self.meta_parameters.get("top_k_tasks", 3) + 1, 5
            )
            self.meta_parameters["task_threshold"] = max(
                self.meta_parameters.get("task_threshold", 0.4) - 0.05, 0.3
            )
            if verbose:
                print(f"🔍 {self.name} exploring more — phase={phase}")
        else:
            self.meta_parameters["task_sampling_temperature"] = 0.8  # Exploit/Consolidate
            self.meta_parameters["top_k_tasks"] = max(
                self.meta_parameters.get("top_k_tasks", 3) - 1, 1
            )
            self.meta_parameters["task_threshold"] = min(
                self.meta_parameters.get("task_threshold", 0.4) + 0.05, 0.6
            )
            if verbose:
                print(f"♻️ {self.name} consolidating (reuse favored) — phase={phase}")

        # 🔁 Optional: Jollycard injection tuning based on phase
        if phase in ("inflection", "post-inflection"):
            self.meta_parameters["jollycard_sampling_temperature"] = min(
                self.meta_parameters.get("jollycard_sampling_temperature", 1.0) * 1.05, 2.0
            )
            self.meta_parameters["jollycard_injection_weight"] = min(
                self.meta_parameters.get("jollycard_injection_weight", 0.5) + 0.01, 0.9
            )
        else:
            self.meta_parameters["jollycard_sampling_temperature"] = max(
                self.meta_parameters.get("jollycard_sampling_temperature", 1.0) * 0.95, 0.5
            )
            self.meta_parameters["jollycard_injection_weight"] = max(
                self.meta_parameters.get("jollycard_injection_weight", 0.5) - 0.01, 0.1
            )



        # --- Inline refinement logic (formerly 'post_process') ---
        attention = self.compute_attention()
        fatigue   = self.compute_fatigue()
        hunger    = self.compute_hunger()

        meta        = self.meta_parameters
        w_attention = meta.get("external_attention_weight", 0.2)
        w_fatigue   = meta.get("external_fatigue_weight", 0.2)
        w_hunger    = meta.get("external_hunger_weight", 0.2)
        base_prob   = meta.get("external_call_base_prob", 0.4)

        influence = (-w_attention * attention +
                      w_fatigue * fatigue +
                      w_hunger * hunger)

        from math import exp
        sigmoid = lambda x: 1 / (1 + exp(-x))
        adjusted_prob = base_prob * sigmoid(influence)

        import random
        if random.random() < adjusted_prob:
            self.features.append("inline_refinement")
            self.external_access_count += 1
            if verbose:
                print(f"🔌 {self.name} selected 'inline_refinement' feature (p={adjusted_prob:.3f})")

        if verbose:
            print(f"🧩 {self.name} selected features: {self.features}")
            if hasattr(self, 'relevant_capabilities') and self.relevant_capabilities:
                print(f"🧠 Relevant capabilities activated: {self.relevant_capabilities}")




    def should_use_web_scraping(self, prompt, verbose=True):
        """
        Agent-level decision: Should I perform web scraping this round?
        Returns True if scraping is enabled AND 'web_scraping' is in self.features.
        """
        # Global on/off switch for web scraping
        if not getattr(self, "enable_web_scraping", True):
            decision = False
        else:
            decision = "web_scraping" in getattr(self, "features", [])

        if verbose:
            state = "✅ will use" if decision else "🚫 will skip"
            print(f"🌐 {self.name} {state} web scraping for prompt: \"{prompt[:60]}...\"")

        return decision

    def perform_web_scraping(self, prompt, verbose=True):
        """
        Full web-scraping pipeline: generate search query, fetch & clean snippets,
        summarize, rank, and return a concise Web Context block.
        """
        # 1) Generate search keywords
        keywords = make_search_query(prompt)
        query    = " ".join(keywords)
        if verbose:
            print(f"🔍 Searching web for: {query}")

        # 2) Fetch & cache top-k results
        if not hasattr(self, "web_cache"):
            self.web_cache = {}
        if query in self.web_cache:
            hits = self.web_cache[query]
        else:
            hits = search_api.search(query, top_k=3)
            self.web_cache[query] = hits

        # 3) Extract, clean & summarize each snippet
        contexts = []
        for url, snippet in hits:
            cleaned = fetch_and_clean(url)
            summary = llm_generate(
                model_key=self.model_key,
                prompt=f"Summarize in 2 sentences: {cleaned[:500]}",
                temperature=0.2
            ).strip()
            contexts.append((url, summary))

        # 4) Select top-2 by keyword overlap
        ranked = sorted(
            contexts,
            key=lambda us: sum(1 for kw in keywords if kw in us[1].lower()),
            reverse=True
        )[:2]

        # 5) Build Web Context block
        block = "\n".join(f"[Context from {url}]: {summary}" for url, summary in ranked)
        return block


    def perform_data_query(self, prompt):
        # e.g. parse a SQL or CSV source
        return "…data table or stats…"

    def extract_entities(self, text):
        # e.g. spaCy or simple regex NER
        return ["Entity1", "Entity2"]

    def analyze_sentiment(self, text):
        # e.g. call a sentiment model
        return {"polarity": 0.2, "subjectivity": 0.5}

    def execute_code_snippet(self, code):
        # safely run code in a sandbox and capture stdout
        return "…code output…"

    def summarize_text(self, text):
        # call LLM or local summarizer
        return llm_generate(self.model_key, f"Summarize: {text}", temperature=0.2)

    def validate_output(self, text):
        # fact-check via LLM or rules
        return llm_generate(self.model_key, f"Fact-check this: {text}", temperature=0.2)



    # =======================
    # 7.11. Execution & Action
    # =======================

    def act(self, mission_prompt, verbose=True):
        """
        End-to-end agent execution:
          1. Feature selection & optional web scraping
          2. Dynamic model selection
          3. Model invocation
          4. Output scoring & memory update
          5. Meta-parameter updates & round advancement
        Returns: (output, score, attention, fatigue, hunger, _, meta_parameters, status)
        """
        if verbose:
            print(f"🔁 ACT method triggered for {self.name}")
        self.usage_count += 1

        # ── Inject planning hints based on scenario-cluster map ──
        if hasattr(self.runner, "scenario_cluster_map") and scene in self.runner.scenario_cluster_map:
            cluster_hints = self.runner.scenario_cluster_map[scene]
            if verbose:
                print(f"🧠 Injected planning hint → scenario '{scene}' linked to clusters: {cluster_hints}")
            self.meta_parameters["planning_hints"] = cluster_hints
        else:
            self.meta_parameters["planning_hints"] = []

        # 1) Feature selection
        self.select_features(mission_prompt)

        # 2) Web scraping if enabled
        scraped_info = None
        if self.should_use_web_scraping(mission_prompt, verbose=verbose):
            if verbose:
                print(f"🌍 {self.name} is performing enhanced web scraping…")
            web_ctx = self.perform_web_scraping(mission_prompt, verbose)
            mission_prompt += f"\n\n[Web Context]:\n{web_ctx}"
            if verbose:
                print(f"🌐 Context added:\n{web_ctx[:200]}…\n")


        # ── PASTE PRE-PROCESSING HERE ───────────────────────────────────
        # 2.1) Data query
        if "data_query" in self.features:
            data_ctx = self.perform_data_query(mission_prompt)
            if verbose:
                print(f"📈 Data context:\n{data_ctx}")
            mission_prompt += f"\n\n[Data Context]: {data_ctx}"

        # 2.2) Entity extraction
        if "entity_extraction" in self.features:
            entities = self.extract_entities(mission_prompt)
            if verbose:
                print(f"🏷️ Extracted entities: {entities}")
            mission_prompt += f"\n\n[Entities]: {entities}"

        # 2.3) Sentiment analysis (pre-tagging)
        if "sentiment_analysis" in self.features:
            sentiment = self.analyze_sentiment(mission_prompt)
            if verbose:
                print(f"😊 Sentiment score: {sentiment}")
            mission_prompt += f"\n\n[Sentiment]: {sentiment}"

        # ── END PRE-PROCESSING ─────────────────────────────────────────

        # ── PASTE RETRIEVAL INJECTION HERE ─────────────────────────────────
        # 2.x) Retrieval‐augmented context
        if "retrieval" in self.features:
            mem_ctx = self.retrieve_from_memory(mission_prompt, top_k=3)
            if verbose:
                print(f"🗄️ {self.name} retrieved memory context:\n{mem_ctx}\n")
            mission_prompt += f"\n\n[Memory Context]:\n{mem_ctx}"
        # ── END RETRIEVAL ────────────────────────────────────────────────

        # ── END PRE-PROCESSING ─────────────────────────────────────────


        # 3) Dynamic model selection
        model_key, self.model = self.select_model()
        if verbose:
            print(f"🔄 {self.name} selected model → {model_key}")


        # 4) Core LLM call with inline role injection
        try:
            # Inject inline roles (pre-output shaping)
            shaped_prompt = self.inject_inline_roles_into_prompt(mission_prompt)

            if model_key == "gemini":
                raw_output = self.model.generate_content(shaped_prompt).text
            else:
                raw_output = (
                    self.model.chat.completions.create(
                        model=self.model_name,
                        messages=[{"role": "user", "content": shaped_prompt}]
                    )
                    .choices[0]
                    .message
                    .content
                )

            self.last_raw_output = raw_output
            output = raw_output.strip()

        except Exception as e:
            # Handle rate limits by retrying once
            if "429" in str(e):
                if verbose:
                    print("⚠️ Rate limit, retrying…")
                time.sleep(3)
                try:
                    shaped_prompt = self.inject_inline_roles_into_prompt(mission_prompt)
                    if model_key == "gemini":
                        raw_output = self.model.generate_content(shaped_prompt).text
                    else:
                        raw_output = (
                            self.model.chat.completions.create(
                                model=self.model_name,
                                messages=[{"role": "user", "content": shaped_prompt}]
                            )
                            .choices[0]
                            .message
                            .content
                        )
                    self.last_raw_output = raw_output
                    output = raw_output.strip()
                except Exception as e2:
                    output = f"⚠️ Error after retry: {e2}"
            else:
                output = f"⚠️ Model Error: {e}"



        # 6) Compute internal metrics
        attention = self.compute_attention()
        fatigue   = self.compute_fatigue()
        hunger    = self.compute_hunger()

        # append once
        self.attention_history.append(attention)
        self.fatigue_history.append(fatigue)
        self.hunger_history.append(hunger)



        # 7) Score & efficiency
        score, _ = self.score_output(output, prompt=mission_prompt, verbose=verbose)
        efficiency_score = round(
            score * attention /
            (1 + 0.1*self.interval +
                 0.5*self.meaningless_output_counter +
                 0.3*self.external_access_count),
            3
        )

        if verbose:
            print(
                f"🎯 {self.name} act() → "
                f"Score={score}, Efficiency={efficiency_score:.3f}, "
                f"attention={attention:.3f}, fatigue={fatigue:.3f}, hunger={hunger:.3f}"
            )



        for task_key in self.tasks:
            if task_key in self.task_store:
                self.task_store[task_key]["last_score"] = score



        # 7.1) Stagnation tracking (increment on low performance, reset otherwise)
        if score < 0.5 and efficiency_score < 0.5:
            self.stagnation_counter += 1
            if verbose:
                print(f"⚠️ {self.name} stagnation_counter → {self.stagnation_counter}")
        else:
            if self.stagnation_counter > 0 and verbose:
                print(f"✅ {self.name} recovered from stagnation.")
            self.stagnation_counter = 0




        # ── STEP 1: Injection Control ────────────────────────────────
        allow_injection = False
        phase = getattr(self, "variance_history", {}).get("phase", "stable")
        reactivation = getattr(self, "variance_history", {}).get("reactivation_flag", False)

        if phase in ("inflection", "post-inflection") or reactivation:
            allow_injection = True

        if not allow_injection:
            print(f"🚫 {self.name} skipping short memory injection — phase={phase}, reactivation={reactivation}")
            return  # 🚫 Block the rest of this method
        else:
            print(f"✅ {self.name} injection allowed — phase={phase}, reactivation={reactivation}")

        # 7.2) Memory updates
        entry = {
            "prompt": mission_prompt,
            "prompt_key": self.generate_prompt_key(mission_prompt),
            "output": output,
            "score": score,
            "embedding": get_prompt_embedding(mission_prompt),
            "features": list(self.features),
            "roles": list(self.roles),
            "cooperation": self.in_cooperation,
            "timestamp": time.time()
        }


        self.short_memory.append(entry)
        if "web_scraping" in self.features and scraped_info:
            self.short_memory.append({
                **entry,
                "task": "web_scraping",
                "source": "web",
                "output": scraped_info
            })
        self.last_output = output

        # 8) Meta-learning & housekeeping
        self.update_meta_parameters(attention, fatigue, hunger)
        self.interval += 1
        for task in self.tasks:
            self.remember_task_output(task, output, prompt=mission_prompt)



        # (Placeholder meaningfulness = 1.0 for now)
        self.update_meta_parameters(
            meaningfulness_score=1.0,
            efficiency_score=efficiency_score,
            attention_score=attention,
            verbose=verbose
        )
        self.local_round += 1

        return output, score, attention, fatigue, hunger, "", dict(self.meta_parameters), "generated"

    # === Dispatcher already present ===
    def perform_inline_module(self, role, output):
        if role == "Grapher":
            return self.insert_causal_graph(output)
        elif role == "Statisticien":
            return self.enhance_statistical_inference(output)
        elif role == "Twin_Digitalizer":
            return self.generate_digital_twin_annotation(output)
        elif role == "Simulator":
            return self.append_simulation_block(output)
        elif role == "Refiner":
            return self.inline_refine_output(output)
        elif role == "Validator":
            return self.inline_validate_output(output)
        elif role == "Executor":
            return self.symbolic_execute_plan(output)
        elif role == "Analyst":
            return self.enrich_with_contextual_analysis(output)
        elif role == "Strategist":
            return self.add_high_level_strategy(output)
        elif role == "Builder":
            return self.sketch_solution_architecture(output)
        elif role == "Scout":
            return self.add_exploration_paths(output)
        else:
            return output  # fallback


    def add_exploration_paths(self, output):
        return output + "\n\n🧭 Exploration Paths: [Alternative directions or domains suggested]"

    def sketch_solution_architecture(self, output):
        return output + "\n\n🏗 Architecture: [Functional building blocks proposed]"

    def add_high_level_strategy(self, output):
        return output + "\n\n♟️ Strategic Layer: [Long-term goal alignment and tradeoffs considered]"

    def enrich_with_contextual_analysis(self, output):
        return output + "\n\n🔍 Contextual Insight: [Environmental, temporal, or geopolitical context]"

    def symbolic_execute_plan(self, output):
        return output + "\n\n🛠 Execution Plan: [Next steps derived from proposal]"

    def inline_refine_output(self, output):
        return output + "\n\n🔧 Inline Refinement: improved clarity, structure, and flow."

    def inline_validate_output(self, output):
        return output + "\n\n🧪 Inline Validation: internally consistent and fact-aligned."

    def insert_causal_graph(self, output):
        state = self.get_recent_state_summary()
        G = nx.DiGraph()

        if state["fatigue"] and state["attention"]:
            G.add_edge("fatigue", "attention")
        if state["attention"] and state["efficiency"]:
            G.add_edge("attention", "efficiency")

        # Represent graph as text
        graph_text = "\n".join(f"{u} → {v}" for u, v in G.edges)
        self.last_graph_text = graph_text

        # ✅ Embed graph and store it
        emb_vec = None
        if hasattr(self, "embedding_model"):
            emb_input = "CausalGraph: " + graph_text
            emb_vec = self.embedding_model.encode(emb_input)

        if emb_vec is not None:
            self.last_graph_embedding = np.array(emb_vec)

            # 🔁 Lookup similar causal graphs
            similar = find_similar_graphs(self.last_graph_embedding)
            self.last_graph_similarity = similar  # for reuse or validation scoring

            # (Optional debug print)
            if similar:
                print(f"🔁 {self.name} found similar causal graphs:")
                for key, dist in similar:
                    print(f"   → {key} (dist={dist:.2f})")

        return output + "\n\n📈 Causal Graph (agent-state):\n" + graph_text






    def get_recent_state_summary(self, window: int = 5):
        """
        Returns recent agent state summaries for inline modules.
        Includes efficiency, attention, fatigue, hunger, and short-term scores.
        """
        recent = self.interval_score_log[-window:]

        summary = {
            "efficiency": [entry.get("efficiency", 0) for entry in recent],
            "meaningfulness": [entry.get("meaningfulness", 0) for entry in recent],
            "attention": [entry.get("attention", 0) for entry in recent],
            "fatigue": [entry.get("fatigue", 0) for entry in recent],
            "hunger": [entry.get("hunger", 0) for entry in recent],
            "timestamps": [entry.get("interval") for entry in recent],
        }
        return summary


    def enhance_statistical_inference(self, output):
        state = self.get_recent_state_summary()
        eff_scores = state["efficiency"]
        att_scores = state["attention"]

        if eff_scores:
            eff_mean = np.mean(eff_scores)
            eff_std = np.std(eff_scores)
            att_mean = np.mean(att_scores)
            report = (
                f"Efficiency μ={eff_mean:.2f}, σ={eff_std:.2f} | "
                f"Avg attention={att_mean:.2f}"
            )
        else:
            report = "No recent efficiency data available."

        return output + "\n\n📊 Statistical Summary:\n" + report





    def generate_digital_twin_annotation(self, output):
        state = {
            "system": "energy grid",
            "inputs": {"solar": 0.6, "wind": 0.3, "thermal": 0.1},
            "load": 92.5,
            "status": "stable"
        }
        twin_repr = "\n".join(f"{k}: {v}" for k, v in state.items())
        return output + "\n\n🧿 Digital Twin:\n" + twin_repr


    def append_simulation_block(self, output):
        years = np.arange(2025, 2035)
        forecast = 100 * np.exp(-0.05 * (years - 2025))
        forecast_text = "\n".join(f"{y}: {v:.1f}" for y, v in zip(years, forecast))
        return output + "\n\n🧪 Simulation Result:\n" + forecast_text





    def highlight_text_differences(self, verbose=True):
        """
        Compare the last raw output and final output, highlighting changes.
        Uses difflib and optionally prints differences in Markdown.
        """
        if not hasattr(self, 'last_raw_output') or not hasattr(self, 'last_output'):
            if verbose:
                print("⚠️ No outputs available for comparison.")
            return

        raw   = self.last_raw_output
        final = self.last_output

        matcher = difflib.SequenceMatcher(None, raw, final)
        highlighted = []

        for opcode, a0, a1, b0, b1 in matcher.get_opcodes():
            if opcode == 'equal':
                highlighted.append(raw[a0:a1])
            elif opcode == 'insert':
                highlighted.append(
                    f"<span style='background-color:#d4edda;'>[[INSERT → {final[b0:b1]}]]</span>")
            elif opcode == 'delete':
                highlighted.append(
                    f"<span style='background-color:#f8d7da;'>[[DELETE → {raw[a0:a1]}]]</span>")
            elif opcode == 'replace':
                highlighted.append(
                    f"<span style='background-color:#fff3cd;'>[[REPLACE {raw[a0:a1]} → {final[b0:b1]}]]</span>")

        diff_text = ''.join(highlighted)

        if diff_text != raw:
            if verbose:
                display(Markdown(
                    f"🧩 **Differences between raw and final output:**\n\n{diff_text}"))
        elif verbose:
            print("✅ No textual differences detected between raw and final output.")



    def reset_local_state(self):
        """
        Soft wipe – called whenever the agent starts a *new local mission*.
        • Clears short‑term memory, counters and physiology curves.
        • Leaves long‑term memory and meta‑weights untouched.
        """

        # 1)  Clear memories and histories  ──────────────────────────────
        self.attention_history.clear()
        self.fatigue_history.clear()
        self.hunger_history.clear()

        self.short_memory.clear()

        # 2)  Reset counters / timers  ───────────────────────────────────
        self.interval              = 1          # NEW  (start interval count at 1)
        self.usage_count                = 0
        self.meaningless_output_counter = 0
        self.start_time                 = time.time()

        # 3)  Clear last outputs / foresight  ────────────────────────────
        self.last_output            = ""
        self.last_raw_output        = ""
        self.last_foresight         = None
        self.last_foresight_signals = {}

        # seed the very first sample for the *new* local round
        self.seed_first_phys_sample()



        # 4)  Notify runner (if any)  ────────────────────────────────────
        if hasattr(self, "runner"):
            self.runner.clear_penalty_for_agent(self)





    def reset_meaningless_counter(self):
        """
        Resets the counter for meaningless or generic outputs.
        Used to avoid unfair penalties after validation or reset.
        """
        self.meaningless_output_counter = 0



    def reset_global_state(self):
        """
        Reset the agent’s overall state:
          - Clears both local state and strategic assignments
          - Resets roles to their initial born_roles
          - Clears tasks/features and meta-weights
          - Resets round counters and timers
        """
        # First, clear local/session data
        self.reset_local_state()

        # Revert to initial strategic parameters
        self.roles = list(self.born_roles) if hasattr(self, "born_roles") else []
        self.tasks = []       # Do NOT reseed tasks here
        self.features = []
        self.in_cooperation = False

        # Reset timing and round counters
        self.local_round = 1
        self.interval = 1
        self.start_time = time.time()



    def share_memory_with(self, cooperating_agents):
        """
        Share and merge short‐ and long‐term memory with peer agents.
        Deduplicates entries and applies a slight fatigue/hunger relief.
        """
        # 1) Merge short_memory and long_memory from peers
        for other in cooperating_agents:
            if other is not self:
                # Append short‐term memories
                self.short_memory.extend(other.short_memory)
                # Merge long‐term entries, keeping higher‑scored items
                for key, value in other.long_memory.items():
                    if key not in self.long_memory or value.get("score", 0) > self.long_memory[key].get("score", 0):
                        self.long_memory[key] = value

        # 2) Deduplicate short_memory by (prompt, output)
        seen = set()
        deduped = []
        for entry in self.short_memory:
            key = (entry.get("prompt"), entry.get("output"))
            if key not in seen:
                deduped.append(entry)
                seen.add(key)
        self.short_memory = deduped

        print(f"🔗 {self.name} shared memory with peers. Short_memory size: {len(self.short_memory)}")

        # 3) Cooperation eases fatigue & hunger slightly
        if self.fatigue_history:
            self.fatigue_history[-1] = max(self.fatigue_history[-1] - 0.1, 0.0)
        if self.hunger_history:
            self.hunger_history[-1] = max(self.hunger_history[-1] - 0.1, 0.0)


    # =======================
    # 7.12. Foresight & Planning
    # =======================

    def analyze_foresight_signals(self, prompt, output, current_path, stored_entry, verbose=True):
        """
        Compute foresight signals for planning:
          - prompt_similarity: cosine similarity between current prompt and stored prompt
          - output_similarity: cosine similarity between current output and stored output
          - path_difference: count of differences in roles, tasks, features, cooperation
        """
        prompt_sim = 0.0
        output_sim = 0.0
        path_diff = 0

        # Prompt similarity
        try:
            vec1 = embedding_model.encode(prompt) or np.zeros(EMB_DIM)
            vec2 = embedding_model.encode(stored_entry.get("original_prompt", prompt)) or np.zeros(EMB_DIM)
            prompt_sim = cosine_similarity([vec1], [vec2])[0][0]
        except Exception as e:
            print(f"⚠️ Foresight prompt similarity failed: {e}")

        # Output similarity
        try:
            vec1 = embedding_model.encode(output) or np.zeros(EMB_DIM)
            vec2 = embedding_model.encode(stored_entry.get("output", output)) or np.zeros(EMB_DIM)
            output_sim = cosine_similarity([vec1], [vec2])[0][0]
        except Exception as e:
            print(f"⚠️ Foresight output similarity failed: {e}")

        # Path comparison
        stored_path = {
            "roles": set(stored_entry.get("roles", [])),
            "tasks": set(stored_entry.get("tasks", [])),
            "features": set(stored_entry.get("features", [])),
            "cooperation": stored_entry.get("cooperation", False)
        }
        curr_path = {
            "roles": set(current_path.get("roles", [])),
            "tasks": set(current_path.get("tasks", [])),
            "features": set(current_path.get("features", [])),
            "cooperation": current_path.get("cooperation", False)
        }
        path_diff = sum([
            curr_path["roles"] != stored_path["roles"],
            curr_path["tasks"] != stored_path["tasks"],
            curr_path["features"] != stored_path["features"],
            curr_path["cooperation"] != stored_path["cooperation"]
        ])

        # Initialize similarity history if missing
        if not hasattr(self, "variance_history"):
            self.variance_history = {
                "uvr_similarity_short": deque(maxlen=5),
                "uvr_similarity_reference": deque(maxlen=10),
                "reactivation_flag": False
            }

        if not hasattr(self, "prompt_similarity_history"):
            self.prompt_similarity_history = []
            self.output_similarity_history = []
            self.path_difference_history = []

        self.prompt_similarity_history.append(prompt_sim)
        self.output_similarity_history.append(output_sim)
        self.path_difference_history.append(path_diff)

        # Compute UVR status per component
        min_window = self.meta_parameters.get("uvr_min_window", 5)
        inflection_threshold = self.meta_parameters.get("uvr_inflection_threshold", 1.5)

        prompt_uvr, var_prompt_recent, var_prompt_prior, _ = compute_uvr_status(self.prompt_similarity_history, min_window, inflection_threshold)
        output_uvr, var_output_recent, var_output_prior, _ = compute_uvr_status(self.output_similarity_history, min_window, inflection_threshold)
        path_uvr, var_path_recent, var_path_prior, _ = compute_uvr_status(self.path_difference_history, min_window, inflection_threshold)

        if prompt_uvr:
            print(f"⚠️ Prompt UVR — var spike: {var_prompt_recent:.3f} vs {var_prompt_prior:.3f}")
        if output_uvr:
            print(f"⚠️ Output UVR — var spike: {var_output_recent:.3f} vs {var_output_prior:.3f}")
        if path_uvr:
            print(f"⚠️ Path UVR — var spike: {var_path_recent:.3f} vs {var_path_prior:.3f}")

        # Physiology and graph similarities
        graph_sim = 1.0 - min(self.last_graph_similarity[0][1], 1.0) if hasattr(self, "last_graph_similarity") and self.last_graph_similarity else 0.0

        path_vec = np.array([
            len(current_path["roles"] & stored_path["roles"]),
            len(current_path["tasks"] & stored_path["tasks"]),
            len(current_path["features"] & stored_path["features"]),
            float(current_path["cooperation"] == stored_path["cooperation"])
        ]) / 4.0
        path_sim = cosine_similarity([path_vec], [path_vec])[0][0]

        physio_vec = np.array([
            self.attention_history[-1] if self.attention_history else 0.5,
            self.fatigue_history[-1] if self.fatigue_history else 0.5,
            self.hunger_history[-1] if self.hunger_history else 0.5
        ])
        prior_physio_vec = np.array([
            stored_entry.get("attention_history", 0.5),
            stored_entry.get("fatigue_history", 0.5),
            stored_entry.get("hunger_history", 0.5)
        ])
        physio_sim = cosine_similarity([physio_vec], [prior_physio_vec])[0][0]

        # Compute composite UVR similarity
        w1 = self.meta_parameters.get("uvr_weight_prompt", 0.2)
        w2 = self.meta_parameters.get("uvr_weight_output", 0.2)
        w3 = self.meta_parameters.get("uvr_weight_graph", 0.2)
        w4 = self.meta_parameters.get("uvr_weight_path", 0.2)
        w5 = self.meta_parameters.get("uvr_weight_physio", 0.2)

        UVR_similarity = (
            w1 * prompt_sim +
            w2 * output_sim +
            w3 * graph_sim +
            w4 * path_sim +
            w5 * physio_sim
        )

        # Dual-window variance tracking
        self.variance_history["uvr_similarity_short"].append(UVR_similarity)
        if len(self.variance_history["uvr_similarity_short"]) == self.variance_history["uvr_similarity_short"].maxlen:
            for val in self.variance_history["uvr_similarity_short"]:
                self.variance_history["uvr_similarity_reference"].append(val)

        if verbose:
            print(f"🧠 {self.name} short variance → {list(self.variance_history['uvr_similarity_short'])}")
            print(f"📘 {self.name} ref variance → {list(self.variance_history['uvr_similarity_reference'])}")

        # Compute delta and novelty
        var_short = np.var(list(self.variance_history["uvr_similarity_short"]))
        var_long = np.var(list(self.variance_history["uvr_similarity_reference"]))
        delta_var = var_short - var_long
        novelty_score = 1.0 - UVR_similarity



        # ── STEP 6: Classify inflection phase based on short vs. reference variance ──
        phase_margin = self.meta_parameters.get("inflection_phase_margin", 0.03)

        if var_long < 1e-5:
            inflection_phase = "undetermined"
        else:
            ratio = var_short / var_long

            if abs(ratio - 1.0) < phase_margin:
                inflection_phase = "inflection"
            elif ratio > 1.0 + phase_margin:
                inflection_phase = "post-inflection"
            elif ratio < 1.0 - phase_margin:
                inflection_phase = "pre-inflection"
            else:
                inflection_phase = "stable"

        self.variance_history["phase"] = inflection_phase

        if verbose:
            print(f"📊 {self.name} inflection phase → {inflection_phase}")


        delta_thresh = self.meta_parameters.get("uvr_reactivation_delta_threshold", 0.05)
        novelty_thresh = self.meta_parameters.get("uvr_reactivation_novelty_threshold", 0.3)
        should_reactivate = delta_var > delta_thresh and novelty_score > novelty_thresh
        self.variance_history["reactivation_flag"] = should_reactivate

        if should_reactivate:
            print(f"🔄 Reactivation triggered → ΔVar={delta_var:.3f}, Novelty={novelty_score:.3f}")

        # ── PHASE 1: Spike Logging ──────────────────────────────
        if not hasattr(self, "signal_spike_log"):
            self.signal_spike_log = deque(maxlen=100)

        self.signal_spike_log.append({
            "timestamp": time.time(),
            "uvr_similarity": round(UVR_similarity, 3),
            "output_similarity_variance": round(var_output_recent, 4),
            "prompt_similarity": round(prompt_sim, 4),
            "scenario": getattr(self, "last_scenario", "unknown"),
            "roles": self.roles[:],
            "tasks": self.tasks[:],
            "features": self.features[:],
            "inline_activated_roles": self.inline_activated_roles[:],
            "reactivation_flag": True,
            "inflection_phase": self.variance_history.get("phase", "unknown")
        })



        # Optional: single-track UVR variance for logging
        if not hasattr(self, "uvr_similarity_history"):
            self.uvr_similarity_history = []
        self.uvr_similarity_history.append(UVR_similarity)
        uvr_triggered, var_recent, var_prior, var_ratio = compute_uvr_status(
            self.uvr_similarity_history, min_window, inflection_threshold
        )
        if uvr_triggered:
            print(f"⚠️ UVR triggered — Similarity variance spike (ratio={var_ratio:.2f})")

        return {
            "prompt_similarity": round(prompt_sim, 3),
            "output_similarity": round(output_sim, 3),
            "path_difference": path_diff,
            "uvr_similarity": round(UVR_similarity, 3),
            "output_similarity_variance": var_output_recent
        }

        # ── STEP 5: Log contextual delta variance for later meta-learning or clustering ──
        if not hasattr(self, "variance_context_log"):
            self.variance_context_log = []

        self.variance_context_log.append({
            "agent": self.name,
            "delta_var": round(delta_var, 4),
            "novelty_score": round(novelty_score, 4),
            "uvr_similarity": round(UVR_similarity, 4),
            "roles": list(curr_path["roles"]),
            "tasks": list(curr_path["tasks"]),
            "features": list(curr_path["features"]),
            "cooperation": curr_path["cooperation"],
            "scenario": (stored_entry.get("scenario") or "unknown"),
            "timestamp": time.time()
        })

        if verbose:
            print(f"🧾 {self.name} logged delta variance context for meta-analysis.")




    def compute_foresight(self, prompt, path_reuse, similarity_score, external_db=None):
        """
        Determine a foresight scenario based on:
          - Reuse flag (convergent paths)
          - Similarity scores
          - Foresight signals (prompt/output similarity, path differences)
        Returns a scenario label (e.g., Black Swan, Grey Rhino).
        """
        # Build key and current context path
        prompt_key = self.generate_prompt_key(prompt)
        current_path = {
            "tasks": set(self.tasks),
            "features": set(self.features),
            "roles": set(self.roles),
            "cooperation": self.in_cooperation
        }

        # Retrieve stored context (local or external)
        stored = self.long_memory.get(prompt_key)
        if not stored and external_db:
            stored = external_db.get(prompt_key)

        # Increment external usage if relevant
        if stored and external_db and prompt_key in external_db:
            external_db[prompt_key]["usage_count"] = external_db[prompt_key].get("usage_count", 0) + 1

        # Analyze signals or default values
        if stored:
            signals = self.analyze_foresight_signals(prompt, self.last_output, current_path, stored)
        else:
            signals = {"prompt_similarity": 0.0, "output_similarity": 0.0, "path_difference": 4}

        self.last_foresight_signals = signals

        # ✅ Classification logic — causal override FIRST
        if hasattr(self, "last_graph_similarity") and self.last_graph_similarity:
            most_similar_key, min_dist = min(self.last_graph_similarity, key=lambda x: x[1])
            if min_dist < 0.25:
                scenario = "✅ Convergent Paths (Causal)"
            elif min_dist < 0.4:
                scenario = "🌫️ Grey Swan (Causal)"
            elif min_dist < 0.6:
                scenario = "🐉 Wild Card (Causal)"
            else:
                scenario = "🕳️ Black Swan (Causal)"
            self.last_foresight = scenario
            return scenario


        # Classification logic
        if path_reuse:
            scenario = "✅ Convergent Paths"
        elif similarity_score < 0.3 and signals["path_difference"] >= 3:
            scenario = "🧩 Cascading Discontinuity"
        elif similarity_score < 0.5 and signals["output_similarity"] > 0.9:
            scenario = "🪂 Tipping Point"
        elif signals["path_difference"] >= 2 or signals["output_similarity"] < 0.5:
            scenario = "🐉 Wild Card"
        elif similarity_score > 0.8 and signals["output_similarity"] > 0.8:
            scenario = "🧭 White Swan"
        elif similarity_score > 0.6 and signals["output_similarity"] > 0.6:
            scenario = "🌫️ Grey Swan"
        elif similarity_score > 0.4:
            scenario = "🤝 Grey Rhino"
        else:
            scenario = "🕳️ Black Swan"

        self.last_foresight = scenario
        return scenario




    def assign_cluster_tag(self, cluster):
        """
        Assigns a semantic label to a signal cluster.
        This helps interpret root causes behind volatility spikes.
        """
        avg_var = np.mean([s["output_similarity_variance"] for s in cluster])
        common_roles = set.intersection(*[set(s["roles"]) for s in cluster])
        common_scenarios = set(s["scenario"] for s in cluster)

        # Rule-based tag assignment
        if avg_var > 0.06 and "Black Swan" in common_scenarios:
            tag = "instability (foresight shock)"
        elif avg_var > 0.06 and "Tipping Point" in common_scenarios:
            tag = "strategy shift (critical phase)"
        elif avg_var < 0.02:
            tag = "residual drift"
        else:
            tag = "adaptive turbulence"

        # Assign to each spike in cluster
        for spike in cluster:
            spike["cluster_tag"] = tag
            spike["cluster_roles"] = list(common_roles)


    def build_scenario_cluster_map(self):
        """
        Constructs a map of foresight scenarios to the types of variance clusters
        they've co-occurred with.
        """
        scenario_cluster_map = {}

        # Aggregate all clusters from agents
        all_spikes = []
        for ag in self.agents:
            all_spikes.extend(getattr(ag, "signal_spike_log", []))

        for spike in all_spikes:
            scen = spike.get("scenario", "Unknown")
            tag = spike.get("cluster_tag", None)
            if tag is None:
                continue  # Skip untagged

            if scen not in scenario_cluster_map:
                scenario_cluster_map[scen] = []

            scenario_cluster_map[scen].append(tag)

        # Deduplicate
        for scen, tags in scenario_cluster_map.items():
            scenario_cluster_map[scen] = list(set(tags))

        self.scenario_cluster_map = scenario_cluster_map

        print("📌 Scenario–Cluster Co-Occurrence Map:")
        for scen, tags in scenario_cluster_map.items():
            print(f"  - {scen}: {tags}")






    # ============================================================
    # 📘 PROMPT_KEY vs SEMANTIC REUSE — DESIGN GUIDELINES
    # ============================================================

    """
    IMPORTANT DESIGN NOTE:

    This system uses TWO DIFFERENT mechanisms to handle prompts:

    —————————————————————————————
    📌 1. prompt_key → (generate_prompt_key)

    - Purpose → For EXTERNAL DB and LONG MEMORY storage/retrieval
    - How → Uses normalized (lowercase, no space/newline) prompt text
    - Use cases →
        - Quickly check if the EXACT same prompt was previously stored
        - Retrieve stored mission results (local memory or external database)
        - Avoid exact duplicates (redundancy filtering)

    - NOT semantic → purely based on raw text format

    —————————————————————————————
    📌 2. Semantic Similarity → (similarity_to_db + reuse_similarity_threshold)

    - Purpose → For mission REUSE detection (similarity, not identity)
    - How → Uses embeddings + cosine similarity
    - Use cases →
        - Detect if new prompt is semantically similar to past prompts
        - Decide if mission should be treated as NEW or CONTINUATION
        - Adaptive (meta-learned threshold) → allows exploration vs convergence

    - Fully semantic → uses embeddings for meaning-based comparison

    —————————————————————————————
    ✅ Summary

    prompt_key → storage + lookup (exact match / fast redundancy filter)
    semantic similarity → mission reuse detection (meta-learned + meaning-based)

    THESE TWO SHOULD NOT BE CONFUSED.
    They work together, but have distinct roles.

    """

    # ============================================================
    # 📘 PROMPT FLOW DIAGRAM — FROM INPUT TO REUSE / STORAGE / FORESIGHT
    # ============================================================

    """
    Diagram — How prompts flow through the system:

    [ USER PROMPT ]
       ↓
    (similarity_to_db → semantic similarity → meta-learned reuse threshold → YES/NO reuse decision)

       ↓                                        ↓
    [ Same mission → CONTINUE global round ]   [ New mission → RESET global round ]

       ↓
    [ ACT + local validation + result generation ]
       ↓
    [ Store in SHORT MEMORY → (prompt, output, score) ]    → (uses raw prompt)
       ↓
    [ Store in LONG MEMORY → (prompt_key, entry) ]         → (uses generate_prompt_key)

       ↓
    [ If validated as GLOBAL best → store in EXTERNAL DB → (prompt_key, entry) ]

       ↓
    [ Later missions → use similarity_to_db + semantic similarity → compare against EXTERNAL DB ]

       ↓
    [ When foresight analysis is needed → retrieve via prompt_key → compare semantic similarity (embedding) to stored prompt/output → classify scenario (Black Swan, Grey Rhino, etc.) ]
    """

    # ============================================================
    # ==================== 🧭 CELL 7.13 — SIMILARITY TO EXTERNAL DB ===
    # ============================================================

    def similarity_to_long_memory_db(self, prompt):
        """
        Compute the highest semantic similarity between *prompt* and any
        stored prompt in the external DB. Returns 0 if the DB is empty.
        """
        db = getattr(self, "long_memory", None)
        if not db:                        # no DB or empty DB
            return 0.0

        # Embedding for the new prompt
        prompt_vec = embedding_model.encode(prompt)
        if prompt_vec is None:
            prompt_vec = np.zeros(EMB_DIM)
        elif isinstance(prompt_vec, list):
            prompt_vec = np.array(prompt_vec)

        highest_similarity = 0.0

        # Iterate over stored prompts
        for key, entry in db.items():
            stored_prompt = entry.get("prompt", "")
            stored_vec = embedding_model.encode(stored_prompt)
            if stored_vec is None:
                stored_vec = np.zeros(EMB_DIM)
            elif isinstance(stored_vec, list):
                stored_vec = np.array(stored_vec)

            sim = cosine_similarity([prompt_vec], [stored_vec])[0][0]
            highest_similarity = max(highest_similarity, sim)

        return highest_similarity





In [None]:
# ============================================================
# ==================== 🏁 CELL 8 — MULTI-AGENT ROUNDS =========
# ============================================================

import pandas as pd
from sklearn.cluster import KMeans
import numpy as np
import math
import textwrap
from IPython.display import display, Markdown


# Global counter for rounds
GLOBAL_ROUND = 1

def run_agents_round(
    agents,
    mission_prompt,
    verbose: bool = False,
    new_mission: bool = True
):
    """
    Executes one global round for a list of agents on the same mission prompt.
    - new_mission=True  → Reset GLOBAL_ROUND to 1
    - new_mission=False → Continue from last GLOBAL_ROUND
    Resets each agent’s local_round and interval via initialize_from_prompt,
    and provides a runner context for clear_penalty and purge_log.
    """
    global GLOBAL_ROUND

    # 1) Reset global counter if this is a brand-new mission
    if new_mission:
        GLOBAL_ROUND = 1

    # 2) Provide runner context (for agent.runner references)
    # runner_context = SimpleNamespace(global_round=GLOBAL_ROUND)

    # Update the real runner’s global round counter
    runner.global_round = GLOBAL_ROUND

    # 3) Announce this round
    print(f"\n🌍 GLOBAL ROUND {GLOBAL_ROUND} — Mission: {mission_prompt}\n")

    # 4) Initialize or reset each agent’s counters & runner link
    for agent in agents:
        # agent.runner = runner_context
        agent.initialize_from_prompt(
            mission_prompt,
            global_round=GLOBAL_ROUND,
            verbose=verbose
        )

    # 5) Execute each agent’s act() and collect results
    records = []
    for agent in agents:
        output, score, attention, fatigue, hunger, _, meta_parameters, status = agent.act(
            mission_prompt, verbose=verbose
        )
        records.append({
            "Agent": agent.name,
            "Roles": ", ".join(agent.roles),
            "Tasks": ", ".join(agent.tasks),
            "Score": score,
            "Attention": attention,
            "Fatigue": fatigue,
            "Hunger": hunger,
            "Meta_StrategyFit": round(meta_parameters.get("strategy_fit", 0.0), 3),
            "Meta_TaskCoupling": round(meta_parameters.get("task_feature_coupling", 0.0), 3),
            "ShortMem": len(agent.short_memory),
            "LongMem": len(agent.long_memory),
            "ExternalCalls": agent.external_access_count,
            "Status": status
        })

    # 6) Display summary table
    df = pd.DataFrame(records)
    display(
        df.style
          .set_caption(f"📊 Summary: Global Round {GLOBAL_ROUND}")
          .format(precision=3)
    )

    # 7) Advance the global counter for next call
    GLOBAL_ROUND += 1

    return df







In [None]:
# ============================================================
# ==================== 🛡️ CELL 9 — REDUNDANCY FILTER ==========
# ============================================================

import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


class RedundancyFilter:

    @staticmethod
    def is_prompt_redundant(prompt, external_db, agent, reuse_threshold=None):
        """
        Returns True if the prompt is redundant (based on semantic similarity).
        Also bumps usage count if found redundant → for smarter decay handling.
        """
        if not external_db:
            return False

        prompt_vec = embedding_model.encode(prompt)    # OK

        # ✅ Normalize embedding
        if prompt_vec is None:
            prompt_vec = np.zeros(384)
        elif isinstance(prompt_vec, list):
            prompt_vec = np.array(prompt_vec)

        highest_similarity = 0.0
        most_similar_key = None

        for key, entry in external_db.items():
            stored_prompt = entry.get("original_prompt", "")
            stored_vec = embedding_model.encode(stored_prompt)    # OK

            # ✅ Normalize embedding
            if stored_vec is None:
                stored_vec = np.zeros(384)
            elif isinstance(stored_vec, list):
                stored_vec = np.array(stored_vec)

            sim = cosine_similarity([prompt_vec], [stored_vec])[0][0]

            if sim > highest_similarity:
                highest_similarity = sim
                most_similar_key = key

        # Use agent meta-parameter threshold if not explicitly provided
        if reuse_threshold is None:
            reuse_threshold = agent.meta_parameters.get("reuse_similarity_threshold", 0.75)

        if highest_similarity >= reuse_threshold:
            print(f"🛑 [FILTER] Prompt-level redundant: similarity={highest_similarity:.3f} >= threshold={reuse_threshold:.3f}")

            # ✅ Bump usage count → prevents fast decay
            if most_similar_key:
                external_db[most_similar_key]["usage_count"] += 1

            return True

        return False

    @staticmethod
    def is_task_redundant(agent, task, prompt=None, output=None, role=None, role_aware=False):
        """
        Check if *task* has already been executed by *agent* under the same prompt/output.
        If role_aware=True, require that the same role was involved.
        """
        for item in agent.short_memory:
            if item.get("task") != task:
                continue

            # Role‑aware filtering: skip if the stored entry wasn’t done under the same role
            if role_aware and role and role not in item.get("roles", []):
                continue

            # Redundancy by prompt or by exact output
            if prompt and item.get("prompt") == prompt:
                print(f"♻️ [FILTER] Task‑level redundant on prompt: {task}")
                return True
            if output and item.get("output") == output:
                print(f"♻️ [FILTER] Task‑level redundant on output: {task}")
                return True

        return False

    @staticmethod
    def is_content_redundant(agent, task, prompt, cooperating_agents, threshold=0.85):
        """
        Return True if *agent* recently produced content for *task/prompt*
        that is semantically very similar to content already produced by any
        peer in *cooperating_agents*.
        """
        # 1. Find the latest output by this agent for the task+prompt
        curr_entry = next(
            (e for e in reversed(agent.short_memory)
             if e.get("task") == task and e.get("prompt") == prompt),
            None
        )
        if not curr_entry:
            return False

        curr_vec = embedding_model.encode(curr_entry["output"])
        if curr_vec is None:
            return False
        if isinstance(curr_vec, list):
            curr_vec = np.array(curr_vec)

        # 2. Compare with peers’ outputs for the same task
        for peer in cooperating_agents:
            if peer is agent:
                continue
            for e in peer.short_memory:
                if e.get("task") != task:
                    continue
                peer_vec = embedding_model.encode(e["output"])
                if peer_vec is None:
                    continue
                if isinstance(peer_vec, list):
                    peer_vec = np.array(peer_vec)

                sim = cosine_similarity([curr_vec], [peer_vec])[0][0]
                if sim >= threshold:
                    print(f"♻️ [FILTER] Content redundant between {agent.name} and {peer.name} "
                          f"(task={task}, sim={sim:.2f})")
                    return True
        return False




    @staticmethod
    def is_graph_redundant(graph_embedding, graph_index, graph_key_map, reuse_threshold=0.25):
        """
        Returns True if the graph embedding is close enough to a past one
        to be considered redundant. Uses FAISS nearest neighbor search.

        - `graph_embedding`: np.ndarray of current causal graph
        - `graph_index`: FAISS index (already built)
        - `graph_key_map`: mapping from FAISS idx → mission key
        - `reuse_threshold`: L2 distance threshold to flag as redundant
        """

        if graph_index.ntotal == 0:
            return False  # No prior graphs

        if graph_embedding is None or not isinstance(graph_embedding, np.ndarray):
            return False

        D, I = graph_index.search(np.array([graph_embedding]), k=1)
        dist = D[0][0]

        if dist < reuse_threshold:
            matched_key = graph_key_map.get(I[0][0], "[unknown]")
            print(f"🛑 [FILTER] Graph-level redundant: dist={dist:.3f} < threshold={reuse_threshold:.3f}")
            print(f"    Most similar graph mission key: {matched_key}")
            return True

        return False




In [None]:
# ============================================================
# ==================== 🚦 CELL 10 — FORESIGHT BEHAVIOR & COUNTERS =========
# ============================================================



# 📜 Global foresight behavior map
FORESIGHT_BEHAVIOR_MAP = {
    "✅ Convergent Paths": {
        "retrieval": "M_global",
        "randomness": "none",
        "response": "reuse_best_path"
    },
    "🤝 Grey Rhino": {
        "retrieval": "M_long",
        "randomness": "low",
        "response": "force_attention_reset"
    },
    "🧭 White Swan": {
        "retrieval": "M_long_nearest_neighbor",
        "randomness": "low_moderate",
        "response": "prepare_adjust_weights"
    },
    "🌫️ Grey Swan": {
        "retrieval": "partial_match_shadow",
        "randomness": "moderate",
        "response": "probe_with_low_weight"
    },
    "🐉 Wild Card": {
        "retrieval": "low_confidence_match",
        "randomness": "high",
        "response": "promote_exploration"
    },
    "🕳️ Black Swan": {
        "retrieval": "none_blank_slate",
        "randomness": "maximum",
        "response": "activate_high_entropy"
    },
    "🧩 Cascading Discontinuity": {
        "retrieval": "cascade_segment",
        "randomness": "varies",
        "response": "activate_chain_analysis"
    },
    "🪂 Tipping Point": {
        "retrieval": "fragile_prior_path",
        "randomness": "moderate_high",
        "response": "add_redundancy"
    }
}



def normalize_foresight_key(scenario: str) -> str:
    """
    Strips causal suffix or trailing metadata to match base FORESIGHT_BEHAVIOR_MAP keys.
    """
    return scenario.split(" (")[0].strip()




def update_decision_counter(agent_name, interval_counters):
    """
    Increment or initialize the cooperation decision counter for an agent.
    """
    current = interval_counters.get(agent_name, 0)
    interval_counters[agent_name] = current + 1 if current >= 1 else 1
    return interval_counters[agent_name]

def update_interval_counter(agent_name, interval_counters, active=True):
    """
    Increment the interval counter for an agent if active;
    otherwise leave it unchanged.
    """
    if active:
        interval_counters[agent_name] = interval_counters.get(agent_name, 0) + 1
    return interval_counters[agent_name]


In [None]:
# ============================================================
# CELL 11. RUNNER CLASS
# ============================================================

import pandas as pd
from sklearn.cluster import KMeans
import numpy as np
import math
import random
import textwrap
from IPython.display import display, Markdown
import time
from sklearn.metrics.pairwise import cosine_similarity
from copy import deepcopy
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from collections import defaultdict



# --------------------
# 11.1. Prompt Clustering
# --------------------

class EnhancedStrategicRunner:


    def process_mission_prompt(self, mission_prompt, verbose=False):
        """
        Normalize and preprocess the mission prompt.
        Compute and cache prompt key and embedding for this round.

        Side effects:
        - Updates self.last_mission_prompt
        - Updates self.last_mission_key
        - Updates self.last_mission_embedding
        """

        # ── 1. Normalize prompt for key and comparison ──
        normalized_prompt = mission_prompt.strip().lower().replace("\n", " ").strip()
        self.last_mission_prompt = mission_prompt
        self.last_mission_key = self.generate_prompt_key(normalized_prompt)

        # ── 2. Compute and cache the embedding ──
        raw_emb = embedding_model.encode(mission_prompt)
        if raw_emb is None:
            emb = np.zeros(EMB_DIM)
        elif isinstance(raw_emb, list):
            emb = np.array(raw_emb)
        else:
            emb = raw_emb
        self.last_mission_embedding = emb

        if verbose:
            print(f"📌 Processed mission prompt: {mission_prompt[:80]}...")
            print(f"   ↪️ Key: {self.last_mission_key[:12]}... | Embedding dim: {emb.shape[0]}")

        return self.last_mission_key, self.last_mission_embedding






    def cluster_prompts(self, n_clusters=3):
        """
        Cluster stored prompts in external_db using KMeans on precomputed embeddings.
        Prints and returns a dict mapping cluster labels to lists of prompt strings.
        """
        keys = list(self.external_db.keys())
        prompts = []
        embeddings = []

        # Gather embeddings and corresponding prompts
        for k in keys:
            entry = self.external_db[k]
            if "embedding" in entry:
                embeddings.append(entry["embedding"])
                prompts.append(entry.get("original_prompt", k))

        # Guard against too few data points
        if len(embeddings) < n_clusters:
            print(f"⚠️ Not enough prompts to form {n_clusters} clusters "
                  f"(only {len(embeddings)} available).")
            return {}

        # Perform clustering
        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        labels = kmeans.fit_predict(embeddings)

        # Organize prompts by cluster
        clusters = {i: [] for i in range(n_clusters)}
        for idx, label in enumerate(labels):
            clusters[label].append(prompts[idx])

        # Display results
        print("\n📊 Prompt Clusters (semantic similarity):")
        for label, members in clusters.items():
            print(f"🔹 Cluster {label + 1}:")
            for p in members:
                print(f"  - {p[:80]}")
        return clusters


    def score_prompt(prompt_text):
        """
        Score a prompt based on length, structure, keyword richness, etc.
        Can be adjusted based on your scoring philosophy.
        """
        if not prompt_text.strip():
            return 0.0

        base = 0.3
        word_bonus = 0.01 * len(prompt_text.split())      # e.g. 10 words → +0.10
        symbol_penalty = 0.1 if "???" in prompt_text else 0.0

        return min(base + word_bonus - symbol_penalty, 1.0)





    def update_prompt_clusters(self, new_prompt, new_embedding, recluster_threshold=5):
        """
        Update prompt clusters with a new prompt and embedding.
        Track best-scored representative prompt per cluster.
        """
        if not hasattr(self, "prompt_clusters"):
            self.prompt_clusters = []

        added_to_existing = False
        prompt_score = score_prompt(new_prompt)  # ← Use your defined scoring function

        # Try to assign to closest cluster
        for cluster in self.prompt_clusters:
            centroid = cluster["centroid"]
            similarity = cosine_similarity([new_embedding], [centroid])[0][0]
            if similarity > 0.85:
                cluster["prompts"].append(new_prompt)

                # Update best prompt if applicable
                if "best_score" not in cluster or prompt_score > cluster["best_score"]:
                    cluster["best_prompt"] = new_prompt
                    cluster["best_score"] = prompt_score

                added_to_existing = True
                break

        # If not close to any existing, create a new cluster
        if not added_to_existing:
            self.prompt_clusters.append({
                "prompts": [new_prompt],
                "centroid": new_embedding,
                "best_prompt": new_prompt,
                "best_score": prompt_score
            })

        if len(self.prompt_clusters) >= recluster_threshold:
            self.recluster_prompt_centroids()

        self.save_external_db()


    def recluster_prompt_centroids(self, n_clusters=3):
        """
        Recluster all stored prompts based on their embeddings and rebuild prompt_clusters.
        Tracks the best-scored prompt per cluster using score_prompt().
        """
        if not self.external_db:
            return

        prompts = []
        embeddings = []

        for k, entry in self.external_db.items():
            if k == "__meta__":
                continue
            if "embedding" in entry:
                embeddings.append(entry["embedding"])
                prompts.append(entry.get("original_prompt", k))

        if len(embeddings) < n_clusters:
            print(f"⚠️ Not enough data to recluster ({len(embeddings)} prompts < {n_clusters} clusters).")
            return

        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        labels = kmeans.fit_predict(embeddings)

        new_clusters = {i: {"prompts": [], "centroid": None} for i in range(n_clusters)}

        for idx, label in enumerate(labels):
            new_clusters[label]["prompts"].append(prompts[idx])

        for i in range(n_clusters):
            cluster_prompts = new_clusters[i]["prompts"]
            cluster_embeddings = [
                embeddings[idx] for idx in range(len(labels)) if labels[idx] == i
            ]
            if cluster_embeddings:
                centroid = np.mean(cluster_embeddings, axis=0)
                new_clusters[i]["centroid"] = centroid

                # Identify best-scored prompt
                best_prompt = None
                best_score = -1
                for p in cluster_prompts:
                    score = score_prompt(p)
                    if score > best_score:
                        best_score = score
                        best_prompt = p

                new_clusters[i]["best_prompt"] = best_prompt
                new_clusters[i]["best_score"] = best_score

        self.prompt_clusters = list(new_clusters.values())
        print(f"🔄 Reclustered prompts into {n_clusters} clusters.")






    # --------------------
    # 11.2. Runner Initialization
    # --------------------

    def __init__(self, agents, external_db=None):
        self.agents = agents
        self.prompt_clusters = []

        self.external_db = external_db or {}

        # ── AUTO-LOAD prompt clusters (stored on disk) ──
        self.prompt_clusters = []
        self.output_clusters = []  # Stores clustered outputs with centroids and metadata



        # ── Global prompt & mission tracking ─────────────────────
        self.last_mission_prompt = None       # the last full prompt seen
        self.global_mission_key   = None      # hash key of best global output
        self.global_output        = None      # the best global output text
        self.global_round        = 1         # global round counter
        self.global_time_log      = []        # timestamps of global validations

        # ── Metrics & scenarios logs ────────────────────────────
        self.scenario_log        = []
        self.mission_metrics_log = []

        # ── Per-agent state trackers ────────────────────────────
        self.coop_decision_counters = {agent.name: 1     for agent in agents}
        self.local_validated_agents  = {agent.name: False for agent in agents}

        # ── Initialize each agent’s own local counters & logs ──
        for agent in self.agents:
            agent.local_time_log     = []
            agent.local_interval_log = []
            agent.local_round       = 1
            agent.interval      = 1

        # ── Penalty & decay ─────────────────────────────────────
        self.penalty_tracker = {}  # maps prompt_key → (current_penalty, decay_counter)





    # -----------------------------
    # 11.3. Execution Loop & Orchestration
    # -----------------------------

    def maybe_update_stagnation_counter(self, agent, meaningfulness_score, efficiency_score):
        """
        Updates the stagnation counter for the agent if both efficiency and meaningfulness are low.
        Returns True if stagnation is detected, False otherwise.
        """
        stagnation_detected = False

        if efficiency_score < 0.5 and meaningfulness_score < 0.5:
            agent.stagnation_counter += 1
            stagnation_detected = True
            print(f"⚠️ {agent.name} stagnation counter incremented → {agent.stagnation_counter}")
        else:
            if agent.stagnation_counter > 0:
                print(f"✅ {agent.name} recovered from stagnation.")
            agent.stagnation_counter = 0

        return stagnation_detected


    def execute(self, mission_prompt, verbose=False):
        promote = False

        # ✅ Step 0: Process and cache prompt before execution
        prompt_key, embedding = self.process_mission_prompt(mission_prompt, verbose=True)


        # ── 1. Early sanity check for a garbled prompt ──
        meaningless, sim = is_meaningless_output(mission_prompt, mission_prompt)
        if meaningless:
            print(f"⚠️ Your prompt appears meaningless (sim={sim:.2f}). Please provide a new mission prompt.")
            return False

        # ── 2. Global-prompt similarity & new-vs-same mission logic ──
        reactivated = any(getattr(agent, "variance_history", {}).get("reactivation_flag", False) for agent in self.agents)
        phase_set = {getattr(agent, "variance_history", {}).get("phase", "stable") for agent in self.agents}

        if reactivated or "inflection" in phase_set or "post-inflection" in phase_set:
            print("🔁 Reuse filtering triggered — environment volatile or novel.")
            best_similarity = 0.0     # Pretend nothing is similar
            threshold = 1.1           # Effectively disable reuse threshold
        else:
            best_similarity = self.runner_similarity_to_db(mission_prompt)
            threshold = (
                self.agents[0].meta_parameters.get("reuse_similarity_threshold", 0.75)
                if self.agents else 0.75
            )

        print(f"📈 Best similarity found: {best_similarity:.4f} vs reuse threshold: {threshold:.4f}")
        is_new_prompt = best_similarity < threshold

        if best_similarity >= threshold:
            print("🔁 Similar mission prompt detected — continuing global round.")
        else:
            print("🆕 New mission prompt detected — resetting global counter.")
            self.global_round = 1
            self.global_mission_key = None
            self.global_time_log.clear()





        # ── 3. Central Counter Reset (Stage 3 setup) ──
        MEANING_THRESHOLD = 0.65
        self.meaning_validated = {a.name: False for a in self.agents}
        self.local_validated_agents = {a.name: False for a in self.agents}

        for ag in self.agents:
            ag.local_round        = 1
            ag.interval           = 1
            self.coop_decision_counters[ag.name] = 1
            ag.local_time_log     = []
            ag.local_interval_log = []

        print("\n---- Interval 1 (mission start) ----")
        for ag in self.agents:
            print(f" • {ag.name}: local_round={ag.local_round}, interval={ag.interval}")

        # 4. Metric selection (now per-agent, adaptive)
        for ag in self.agents:
            ag.assign_metrics_from_prompt(
                prompt=mission_prompt,
                top_k=3,
                verbose=verbose
            )

        # Optionally: aggregate all metrics for mission-level log
        from collections import Counter
        all_metrics = [m for ag in self.agents for m in ag.metrics]
        mission_metrics = [m for m, _ in Counter(all_metrics).most_common(3)]

        # ── 5. SEED INITIAL ROLES & TASKS for Interval 1 ──
        for ag in self.agents:
            ag.assign_roles_from_prompt(
                prompt=self.last_mission_prompt,
                top_k=3,
                verbose=True
            )
            ag.reevaluate_roles(
                self.last_mission_prompt,
                efficiency_threshold=ag.meta_parameters["strategy_fit"],
                verbose=True
            )
            ag.role_task_map = {}
            for role in ag.roles:
                ag.role_task_map[role] = ROLE_TASK_MAP.get(role, []).copy()
            ag.assign_tasks_from_roles_multi_round(
                prompt=self.last_mission_prompt,
                global_round=self.global_round,
                allow_jollycard=True,
                verbose=True
            )

        print("\n🔩 Seeded initial roles/tasks for Interval 1")

        for ag in self.agents:
            print(f" • {ag.name}: roles={ag.roles}, tasks={ag.tasks}")
            print("\n📦 Dynamic task_store contents after initial seed (per agent):")
            for ag2 in self.agents:
                print(f" • {ag2.name}.task_store:")
                if not ag2.task_store:
                    print("     (empty)")
                else:
                    for key, entry in ag2.task_store.items():
                        desc = entry.get("desc", "<no desc>")
                        score = entry.get("last_score", 0.0)
                        print(f"     • {key!r}: last_score={score:.3f}, desc='{desc[:40]}…'")

        # ── 6. Reset per-mission counters & initialize Stage-flags ──
        # (This is somewhat redundant with above, but ensures fresh state.)
        self.meaning_validated = {agent.name: False for agent in self.agents}
        self.local_validated_agents = {agent.name: False for agent in self.agents}
        for agent in self.agents:
            agent.local_round        = 1
            agent.interval           = 1
            self.coop_decision_counters[agent.name] = 1
            agent.local_interval_log = []

        # ── 7. Main agent loop — repeat until at least one local validation ──
        local_validated = []
        interval_cycles = 0

        while not local_validated:
            interval_cycles += 1
            # ✅ Reset rerouting flags for this interval
            for agent in self.agents:
                agent.has_been_rerouted = False

            if interval_cycles > 10:
                print("⚠️  Aborting after 10 unvalidated local cycles.")
                break

            current_iv = self.agents[0].interval
            print(f"\n==== Interval {current_iv} ====")


            meaningful_found_this_interval = False


            # ── Start-of-interval physiology snapshot (x = k–1)
            for ag in self.agents:
                ag.attention_history.append(ag.compute_attention())
                ag.fatigue_history.append(ag.compute_fatigue())
                ag.hunger_history.append(ag.compute_hunger())

            local_outputs = []
            scenario_tally = {}

            # ── 7.1. Loop over each agent, in full (repeat until at least one local validation)
            for agent in self.agents:
                # A-1) Role/task logic
                if agent.interval == 1:
                    print(f"🔒 {agent.name} retains roles/tasks: {agent.roles} → {agent.tasks}")
                else:
                    agent.assign_roles_from_prompt(
                        prompt=mission_prompt,
                        top_k=3,
                        verbose=True
                    )
                    agent.reevaluate_roles(
                        prompt=mission_prompt,
                        efficiency_threshold=agent.meta_parameters["strategy_fit"],
                        verbose=True
                    )
                    agent.assign_tasks_from_roles_multi_round(
                        prompt=mission_prompt,
                        global_round=self.global_round + 1,
                        allow_jollycard=True,
                        verbose=True
                    )
                    print(f"🔁 {agent.name} reassigned → {agent.roles} → {agent.tasks}")

                # A-2) Fallback if still no roles/tasks
                if not agent.roles or not agent.tasks:
                    print(f"⚠️ {agent.name} has no roles/tasks → fallback.")
                    agent.fallback_choose_role_tasks(POSSIBLE_ROLES, POSSIBLE_TASKS, verbose=True)
                    print(f"🔄 {agent.name} fallback → {agent.roles} → {agent.tasks}")

                # A-3) Persist role_task_map pruning from before
                if hasattr(agent, "role_task_map"):
                    for role, tasks in agent.role_task_map.items():
                        agent.role_task_map[role] = [t for t in tasks if t in agent.tasks]


                # A-4) Redundancy filters & skip
                redundancy_reason = None

                if (
                    agent.in_cooperation and any(
                        RedundancyFilter.is_content_redundant(agent, t, mission_prompt, self.agents)
                        for t in agent.tasks)
                ):
                    redundancy_reason = "content-level redundancy"

                elif any(
                    RedundancyFilter.is_task_redundant(
                        agent, t, prompt=mission_prompt,
                        role=(agent.roles[0] if agent.roles else None),
                        role_aware=True)
                    for t in agent.tasks
                ):
                    redundancy_reason = "task-level redundancy"

                elif RedundancyFilter.is_prompt_redundant(mission_prompt, self.external_db, agent):
                    redundancy_reason = "prompt-level redundancy"

                elif hasattr(agent, "last_graph_embedding") and RedundancyFilter.is_graph_redundant(
                    agent.last_graph_embedding, graph_index, graph_key_map
                ):
                    redundancy_reason = "causal graph redundancy"

                if redundancy_reason:
                    print(f"🛑 {agent.name} skipped due to {redundancy_reason}.")
                    continue

                # Skip if no roles or tasks
                if not agent.roles or not agent.tasks:
                    print(f"⏩ {agent.name} skipped act() — no roles/tasks.")
                    agent.increment_local_round(reason="validated")
                    continue




                # A-5) ACT → get output, raw physio (x = k-1 start)
                output, score, attention, fatigue, hunger, *_ = agent.act(
                    mission_prompt, verbose=True
                )


                # A-5.5) Inline gating based on internal metrics — PRE-LLM CALL
                metrics = agent.compute_internal_metrics(mission_prompt, output=None)


                # ── STEP 5: Inline Role Gating Sensitivity ──────────────────────────────
                phase = getattr(agent, "variance_history", {}).get("phase", "stable")
                reactivation = getattr(agent, "variance_history", {}).get("reactivation_flag", False)

                # Use base thresholds as fallback
                base_thresholds = agent.meta_parameters.get("inline_thresholds", {})
                adjusted_thresholds = {}

                for role, base in base_thresholds.items():
                    if phase in ("inflection", "post-inflection") or reactivation:
                        # Soften threshold → encourage activation
                        adjusted = max(base * 0.85, 0.1)
                    elif phase in ("stable", "pre-inflection"):
                        # Tighten threshold → conserve resources
                        adjusted = min(base * 1.15, 0.9)
                    else:
                        adjusted = base
                    adjusted_thresholds[role] = round(adjusted, 3)

                # Store or override
                agent.meta_parameters["inline_thresholds_adaptive"] = adjusted_thresholds


                # Gate roles using the adjusted thresholds
                inline_roles = gate_inline_roles(
                    agent,
                    metrics,
                    adjusted_thresholds,
                    ROLE_METRIC_MAP
                )


                agent.inline_activated_roles = inline_roles


                if inline_roles:  # only store if something actually activated
                    agent.update_long_memory(
                        prompt=mission_prompt,
                        output=output,
                        score=score,
                        signals=signals,
                        prompt_key=self.generate_prompt_key(mission_prompt),
                        verbose=False



                # Modify prompt inline based on active roles
                # Modify prompt inline based on active roles
                role_instructions = {
                    "Refiner":  "Refine your response carefully.",
                    "Validator": "Validate your answer as you generate it.",
                    "Executor": "Include logic or code execution details."
                }

                prefix_lines = [role_instructions[r] for r in inline_roles if r in role_instructions]
                prefix_block = "\n".join(prefix_lines)

                final_prompt = f"{prefix_block}\n\nOriginal prompt:\n\n{mission_prompt}" if prefix_block else mission_prompt

                if verbose:
                    print(f"🎯 {agent.name} inline roles active → {inline_roles}")
                    print(f"📝 Modified prompt:\n{final_prompt[:300]}...")





                # A-6) Generate the output (single LLM call, now with inline role effects)

                # Start from defaults
                llm_kwargs = {
                    "temperature": 0.5,
                    "top_p": 1.0
                }

                # Apply inline role overrides conservatively
                for role in inline_roles:
                    if role in INLINE_ROLE_BEHAVIOR:
                        for k, v in INLINE_ROLE_BEHAVIOR[role].items():
                            llm_kwargs[k] = min(llm_kwargs.get(k, 1.0), v)

                # Call LLM
                output = llm_generate(
                    model_key=agent.model_key,
                    prompt=final_prompt,
                    **llm_kwargs
                ).strip()







                # ✅ Optional: Reassign tasks based on updated roles
                agent.detect_and_resolve_role_task_conflict(mission_prompt, verbose=True)


                # Append end-of-interval physiology snapshot (x = k)
                agent.attention_history.append(attention)
                agent.fatigue_history.append(fatigue)
                agent.hunger_history.append(hunger)



                # Compute true efficiency
                efficiency_score = round(
                    score * attention /
                    (1 + 0.1 * agent.interval
                      + 0.5 * agent.meaningless_output_counter
                      + 0.3 * agent.external_access_count),
                    3
                )
                print(
                    f"🚀 {agent.name} act() → "
                    f"Score={score:.3f}, Eff={efficiency_score:.3f}, "
                    f"att={attention:.3f}, fat={fatigue:.3f}, hung={hunger:.3f}"
                )

                # --- Log interval scores and state for this agent ---
                agent.interval_score_log.append({
                    "efficiency":      efficiency_score,
                    "meaningfulness":  score,
                    "attention":       attention,
                    "fatigue":         fatigue,
                    "hunger":          hunger,
                    "scenario":        scene,  # scenario label from compute_foresight()
                    "roles":           agent.roles[:],        # copy current roles
                    "tasks":           agent.tasks[:],        # copy current tasks
                    "features":        agent.features[:],     # copy current features
                    "cooperation":     agent.in_cooperation,  # True/False
                    "interval":        agent.interval,        # optional: interval counter
                    "external_access": agent.external_access_count,  # optional: how many external calls this interval
                    "uvr_similarity": signals.get("uvr_similarity"),
                    "output_similarity_variance": signals.get("output_similarity_variance")
                })


                # Dropout & pruning
                agent.apply_dropout(mission_prompt, verbose=True)
                agent.apply_pruning(mission_prompt, verbose=True)
                if hasattr(agent, "role_task_map"):
                    for role, tasks in agent.role_task_map.items():
                        agent.role_task_map[role] = [t for t in tasks if t in agent.tasks]

                # Mid-interval physiology snapshot (x = k-0.5)
                att_mid  = agent.compute_attention()
                fat_mid  = agent.compute_fatigue()
                hung_mid = agent.compute_hunger()
                agent.attention_history.append(att_mid)
                agent.fatigue_history.append(fat_mid)
                agent.hunger_history.append(hung_mid)
                if verbose:
                    print(f"[MID] {agent.name}  att={att_mid:.3f}, fat={fat_mid:.3f}, hung={hung_mid:.3f}")

                # Dropout & pruning again (for mid-interval)
                agent.apply_dropout(mission_prompt, verbose=True)
                agent.apply_pruning(mission_prompt, verbose=True)
                if hasattr(agent, "role_task_map"):
                    for role, tasks in agent.role_task_map.items():
                        agent.role_task_map[role] = [t for t in tasks if t in agent.tasks]




                # Stagnation tracking & recovery
                stagnated = self.maybe_update_stagnation_counter(
                    agent,
                    meaningfulness_score=1.0,  # TODO: real meaningfulness if needed
                    efficiency_score=efficiency_score
                )
                self.maybe_trigger_recovery(agent)

                # Stage 1: Let agent decide if this interval is meaningful
                if not self.meaning_validated[agent.name]:
                    validated = agent.evaluate_interval(score, threshold=MEANING_THRESHOLD, verbose=True)
                    if validated:
                        self.meaning_validated[agent.name] = True
                        meaningful_found_this_interval = True


                # Immediate global promotion for new prompts
                if is_new_prompt:
                    promote = True
                    best = {
                        "agent":      agent,
                        "score":      score,
                        "efficiency": efficiency_score
                    }
                    print(f"⏹️ {agent.name} is first to validate → ending global loop early.")

                    # Store in external_db
                    raw_emb = embedding_model.encode(self.last_mission_prompt)
                    if raw_emb is None:
                        emb = np.zeros(EMB_DIM)
                    elif isinstance(raw_emb, list):
                        emb = np.array(raw_emb)
                    else:
                        emb = raw_emb
                    # ── STEP 1: Injection Control for external DB ───────────────────────────────
                    allow_injection = False
                    phase = getattr(agent, "variance_history", {}).get("phase", "stable")
                    reactivation = getattr(agent, "variance_history", {}).get("reactivation_flag", False)

                    if phase in ("inflection", "post-inflection") or reactivation:
                        allow_injection = True

                    if not allow_injection:
                        print(f"🚫 {agent.name} skipping external DB injection — phase={phase}, reactivation={reactivation}")
                    else:
                        print(f"✅ {agent.name} external DB injection allowed — phase={phase}, reactivation={reactivation}")

                if allow_injection:

                    prompt_key = self.last_mission_key


                    self.external_db[prompt_key] = {
                        "output": output,
                        "score": score,
                        "metrics": mission_metrics,
                        "embedding": emb if isinstance(emb, np.ndarray) else np.array(emb),
                        "prompt_key": prompt_key,
                        "original_prompt": mission_prompt,
                        "timestamp": time.time(),
                        "usage_count": 0,

                        # Causal graph
                        "graph_embedding": (
                            agent.last_graph_embedding.tolist()
                            if hasattr(agent, "last_graph_embedding") else None
                        ),
                        "graph_text": (
                            agent.last_graph_text
                            if hasattr(agent, "last_graph_text") else None
                        ),

                        # Agent state snapshot
                        "attention": attention,
                        "fatigue": fatigue,
                        "hunger": hunger,
                        "roles": agent.roles[:],
                        "tasks": agent.tasks[:],
                        "features": agent.features[:],
                        "inline_activated_roles": agent.inline_activated_roles[:],

                        # Reasoning context
                        "role_task_map": deepcopy(agent.role_task_map),
                        "role_metric_map": deepcopy(agent.role_metric_map),

                        "uvr_similarity": signals.get("uvr_similarity"),
                        "output_similarity_variance": signals.get("output_similarity_variance")
                    }



                    # ✅ Save to disk
                    self.save_external_db("external_db.json")


                    self.global_time_log.append(time.time())
                    print(f"\n🥇 GLOBAL PROMOTED (g={self.global_round})")
                    plot_agent_physiology(self)
                    plot_real_timelines_with_shading(self)
                    self.global_round += 1
                    self.reset_all()
                    return True


                # Stage 2: Local mission validation (delegated to agent)
                if self.meaning_validated[agent.name] and not self.local_validated_agents[agent.name]:
                    validated = agent.evaluate_local_mission(
                        mission_key=self.last_mission_prompt,
                        score=score,
                        output=output,
                        signals=signals,
                        prompt=mission_prompt,
                        verbose=verbose
                    )
                    if validated:
                        self.local_validated_agents[agent.name] = True
                        # Cluster the validated output
                        self.cluster_output(
                            output_text=output,
                            task=agent.tasks[0] if agent.tasks else None,
                            prompt_key=self.last_mission_prompt,
                            score=score,  # ✅ score from evaluate_local_mission
                            verbose=verbose
                        )




                        print(f"\n📈 {agent.name} physiology this local round:")
                        plot_agent_physiology(self, agents=[agent])
                        break

                # Tally & metrics collection
                scene = agent.compute_foresight(
                    mission_prompt,
                    path_reuse=False,
                    similarity_score=score,
                    external_db=self.external_db
                )

                agent.signal_spike_log.append({
                    "timestamp": time.time(),
                    "agent": agent.name,
                    "roles": agent.roles[:],
                    "output_similarity_variance": signals.get("output_similarity_variance"),
                    "inflection_phase": getattr(agent, "variance_history", {}).get("phase", "unknown"),
                    "reactivation_flag": getattr(agent, "variance_history", {}).get("reactivation_flag", False),
                    "scenario": scene  # ← Add the foresight scenario label here
                })

                # ── SIGNAL-BASED REROUTING TRIGGER ──────────────────────
                latest_signal = agent.signal_spike_log[-1] if agent.signal_spike_log else None


                if latest_signal and agent.is_signal_clear(latest_signal):

                    if agent.has_been_rerouted:
                        print(f"⚠️  {agent.name} has already rerouted this interval → skipping.")
                    else:
                        print(f"🔁 {agent.name} detected a clear rerouting signal → triggering strategy reset.")
                        agent.has_been_rerouted = True

                        severity = "severe" if latest_signal.get("cluster_tag") == "strategy collapse" else "moderate"
                        agent.reroute_strategy(severity=severity, verbose=True)
                        agent.reset_local_state()

                    continue








                scenario_tally[scene] = scenario_tally.get(scene, 0) + 1
                local_outputs.append({
                    "agent": agent,
                    "output": output,
                    "score": score,
                    "attention": attention,
                    "fatigue": fatigue,
                    "hunger": hunger,
                    "scenario": scene
                })


            if not meaningful_found_this_interval:
                old_thresh = MEANING_THRESHOLD
                MEANING_THRESHOLD = max(MEANING_THRESHOLD - 0.05, 0.1)
                print(f"⬇️  MEANING_THRESHOLD reduced: {old_thresh:.2f} → {MEANING_THRESHOLD:.2f}")


            # BREAK OUT if someone already validated
            if any(self.local_validated_agents.values()):
                break

            # 4.2) Print scenario tally & handle cooperation
            if not is_new_prompt and not promote:
                candidates = [r for r in local_outputs if r["score"] >= MEANING_THRESHOLD]
                if candidates:
                    old = self.external_db[self.global_mission_key]
                    old_mean = old["metrics"]["meaningfulness"]
                    old_eff  = old["metrics"]["efficiency"]
                    valid = [
                        r for r in candidates
                        if (r["score"] > old_mean and r["efficiency"] >= old_eff)
                          or (r["score"] == old_mean and r["efficiency"] > old_eff)
                    ]
                    if valid:
                        best = max(valid, key=lambda r: (r["score"], r["efficiency"]))
                        promote = True
                        print(f"⏹️ {best['agent'].name} wins repeat‐prompt promotion at interval {current_iv}.")
                        break

            if not promote:
                print("\n📊 Scenario tally across agents:")
                for scen, cnt in scenario_tally.items():
                    print(f" - {scen}: {cnt}")
                self.handle_cooperation(scenario_tally)

            # ── (Optional) Show UVR spikes after this round ──
            for agent in self.agents:
                if hasattr(agent, "signal_spike_log") and agent.signal_spike_log:
                    print(f"\n📌 Spike log for {agent.name}:")
                    for spike in agent.signal_spike_log:
                        print(f"  ⏱️ [{spike['timestamp']:.0f}] Phase={spike['inflection_phase']}, "
                              f"Roles={spike['roles']}, ΔVar={spike['output_similarity_variance']:.4f}")



            # 4.3) Log scenario tally
            self.scenario_log.append({
                "global_round": self.global_round + 1,
                "scenario_tally": dict(scenario_tally)
            })


            # 4.3b) Run spike clustering and scenario-cluster correlation
            self.cluster_signal_spikes()           # Step 2
            self.build_scenario_cluster_map()      # Step 3

            # 4.3c) Save scenario–cluster co-occurrence map
            self.save_scenario_cluster_map("scenario_cluster_map.json")


            # 4.4) Local validation check & logging
            local_validated = []
            for entry in local_outputs:
                agent, sc = entry["agent"], entry["score"]
                if sc >= MEANING_THRESHOLD:
                    local_validated.append(agent)
                    self.local_validated_agents[agent.name] = True
                    print(f"✅ {agent.name} validated locally (score={sc:.2f})")
                else:
                    self.local_validated_agents[agent.name] = False

            for entry in local_outputs:
                ag = entry["agent"]
                self.mission_metrics_log.append({
                    "global_round": self.global_round + 1,
                    "agent":        ag.name,
                    "score":        entry["score"],
                    "validated":    self.local_validated_agents[ag.name],
                    "metrics":      mission_metrics,
                    "scenario":     entry["scenario"],
                    "timestamp":    time.time()
                })

        # ── end while not local_validated ──

        # --- LOCAL ROUND END: Update short-term meta-parameters and logs for EACH AGENT ---
        for agent in self.agents:
            intervals = getattr(agent, "interval_score_log", [])
            if intervals:  # Only if there was at least one interval
                avg_eff = np.mean([x["efficiency"] for x in intervals])
                avg_mean = np.mean([x["meaningfulness"] for x in intervals])
                avg_attn = np.mean([x["attention"] for x in intervals])

                # Get the mode scenario label for the local round
                scenario_mode = get_local_round_scenario_mode(agent.local_round_score_log)
                foresight_signal = {"scenario": scenario_mode}

                # Update short-term meta-parameters
                agent.update_short_term_meta_parameters(
                    foresight_signal=foresight_signal,
                    avg_efficiency_short_term=avg_eff,
                    avg_meaningfulness_short_term=avg_mean,
                    attention_score=avg_attn,
                    interval_score_log=agent.interval_score_log
                )

                # Log/clear interval scores for this local round
                if not hasattr(agent, "local_round_score_log"):
                    agent.local_round_score_log = []
                agent.local_round_score_log.append(agent.interval_score_log[:])
                agent.interval_score_log.clear()



        # 5) Global Mission Promotion
        validated = [
            r for r in local_outputs
            if self.local_validated_agents.get(r["agent"].name, False)
        ]
        if validated:
            best = max(validated, key=lambda r: r["score"])
            agent, score = best["agent"], best["score"]
            if agent.stagnation_counter >= agent.meta_parameters.get(
                    "stagnation_recovery_threshold", 5):
                print(f"⚡ {agent.name} stagnated → forcing global promotion.")
                promote = True
            else:
                key = self.global_mission_key
                old = self.external_db[key]["score"] if key else -float("inf")
                if self.global_round == 1 or not key or agent.wants_global_promotion(score, old):
                    promote = True

        if promote:

            emb = self.last_mission_embedding
            prompt_key = self.last_mission_key



            self.external_db[prompt_key] = {
                "output":          best["output"],
                "score":           best["score"],
                "metrics":         mission_metrics,
                "embedding":       emb,
                "prompt_key": prompt_key,
                "original_prompt": mission_prompt,
                "timestamp":       time.time(),
                "usage_count":     0,

                # Agent state
                "attention": agent.attention_history[-1] if agent.attention_history else None,
                "fatigue": agent.fatigue_history[-1] if agent.fatigue_history else None,
                "hunger": agent.hunger_history[-1] if agent.hunger_history else None,
                "roles": agent.roles[:],
                "tasks": agent.tasks[:],
                "features": agent.features[:],
                "inline_activated_roles": agent.inline_activated_roles[:],

                # Causal graph info
                "graph_embedding": agent.last_graph_embedding.tolist()
                    if hasattr(agent, "last_graph_embedding") else None,
                "graph_text": agent.last_graph_text
                    if hasattr(agent, "last_graph_text") else None,

                # Reasoning maps
                "role_task_map": deepcopy(agent.role_task_map),
                "role_metric_map": deepcopy(agent.role_metric_map)
            }

            # 🔁 Update semantic prompt clusters (canonical injection only)
            self.update_prompt_clusters(
                new_prompt=mission_prompt,
                new_embedding=emb
            )

            # ✅ Auto-save updated clusters
            self.save_prompt_clusters("prompt_clusters.json")

            # ✅ Save to disk
            self.save_external_db("external_db.json")

            self.global_time_log.append(time.time())
            print(f"\n🥇 GLOBAL PROMOTED (g={self.global_round})")
            self.global_round += 1

            # --- Long-term meta-parameter updates for all agents ---
            global_avg_eff = np.mean([
                np.mean([x["efficiency"] for x in getattr(agent, "local_round_score_log", [[]])[-1]])
                if getattr(agent, "local_round_score_log", []) else 0.5
                for agent in self.agents
            ])
            global_avg_mean = np.mean([
                np.mean([x["meaningfulness"] for x in getattr(agent, "local_round_score_log", [[]])[-1]])
                if getattr(agent, "local_round_score_log", []) else 0.5
                for agent in self.agents
            ])

            global_avg_attn = np.mean([
                np.mean([x["attention"] for x in getattr(agent, "local_round_score_log", [[]])[-1]])
                if getattr(agent, "local_round_score_log", []) else 0.5
                for agent in self.agents
            ])


            global_avg_fatigue = np.mean([
                np.mean([x.get("fatigue", 0.5) for x in getattr(agent, "local_round_score_log", [[]])[-1]])
                if getattr(agent, "local_round_score_log", []) else 0.5
                for agent in self.agents
            ])

            global_avg_hunger = np.mean([
                np.mean([x.get("hunger", 0.5) for x in getattr(agent, "local_round_score_log", [[]])[-1]])
                if getattr(agent, "local_round_score_log", []) else 0.5
                for agent in self.agents
            ])



            for agent in self.agents:
                # --- Compute scenario mode for the global round
                global_scenario_mode = get_global_round_scenario_mode(agent.local_round_score_log)
                foresight_signal = {"scenario": global_scenario_mode}

                agent.update_long_term_meta_parameters(
                    foresight_signal=foresight_signal,
                    avg_efficiency_long_term=global_avg_eff,
                    avg_meaningfulness_long_term=global_avg_mean,
                    global_avg_attn=global_avg_attn,
                    global_avg_fatigue=global_avg_fatigue,
                    global_avg_hunger=global_avg_hunger,
                    verbose=True
                )


            plot_agent_physiology(self)
            plot_real_timelines_with_shading(self)
            for agent in self.agents:
                agent.local_round        = 1
                gent.interval           = 1
                agent.local_time_log     = []
                agent.local_interval_log = []
                agent.stagnation_counter = 0
            self.reset_all()

        # 6) Abortion: no agent passed Stage 2 within the max intervals
        if not local_validated:
            print("🔴 No agent completed Stage 2 within the interval limit → aborting round.")
            self.global_round += 1

            self.reset_all()

        return promote


    # --------------------
    # 11.4. Stagnation Recovery
    # --------------------

    def maybe_trigger_recovery(self, agent):
        """
        If an agent’s stagnation counter exceeds the recovery threshold,
        trigger memory-based recovery to unstick it.
        """
        # Fetch per-agent recovery threshold (default to 5 if not defined)
        recovery_threshold = agent.meta_parameters.get("stagnation_recovery_threshold", 5)

        if agent.stagnation_counter >= recovery_threshold:
            print(f"🚨 {agent.name} is stuck → triggering memory-based recovery.")
            agent.trigger_memory_based_recovery(verbose=True)
            agent.stagnation_counter = 0  # Reset after triggering recovery



    # --------------------
    # 11.5. Agent Timeline Plotting
    # --------------------

    def plot_agent_timelines(self):
        """
        ASCII‐bar showing exactly which discrete intervals each agent validated in.
        """
        print("\n🕒 Agent Timelines Based on Local Validation Speed:")
        print("=" * 80)

        # find the farthest interval any agent ever validated at
        max_interval = max(
            (max(agent.local_interval_log) for agent in self.agents if agent.local_interval_log),
            default=0
        )

        for agent in self.agents:
            line = ""
            for k in range(1, max_interval + 1):
                line += "▓" if k in agent.local_interval_log else "░"
            print(f"🤖 {agent.name}: {line or '(no validations yet)'}")

        print("=" * 80 + "\n")

    # --------------------
    # 11.6. Agent Snapshot Reporting
    # --------------------
    def print_agent_snapshot(self):
        """
        Print a quick snapshot of each agent’s roles, tasks,
        local round, interval counter, and recent validations.
        """
        print("\n🧠 Agent Snapshot:")
        print("=" * 80)
        for agent in self.agents:
            print(f"\n🤖 {agent.name}")
            print(f"  🎭 Roles:     {', '.join(agent.roles) if agent.roles else 'None'}")
            print(f"  🧪 Tasks:     {', '.join(agent.tasks) if agent.tasks else 'None'}")
            print(f"  🔢 Local round:   {agent.local_round}")
            last_interval = agent.local_interval_log[-1] if agent.local_interval_log else 0
            print(f"  ⏳ Last validated interval: {last_interval}")

            # How many times this agent validated locally
            val_count = len(getattr(agent, 'local_time_log', []))
            print(f"  ✅ Local validations: {val_count}")
            # Show up to last 3 validation timestamps
            for idx, ts in enumerate(agent.local_time_log[-3:], start=1):
                print(f"    {idx}. {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts))}")
            print(f"  📚 Short mem entries: {len(agent.short_memory)}")
            # Show last 3 memory entries
            for idx, entry in enumerate(agent.short_memory[-3:], start=1):
                snip = entry.get("prompt", "")[:60]
                roles = entry.get("roles", [])
                roles = ", ".join(roles) if isinstance(roles, list) else roles
                print(f"    {idx}. [Roles: {roles or 'None'}] Prompt: {snip}...")
        print("=" * 80)



    def plot_scenario_cluster_timeline(self):
        """
        Plots global timeline showing scenario-cluster evolution.
        """
        timestamps = []
        scenario_labels = []
        cluster_tags = []

        # Collect all signals with both scenario + cluster tag
        for agent in self.agents:
            for spike in getattr(agent, "signal_spike_log", []):
                if "scenario" in spike and "cluster_tag" in spike:
                    timestamps.append(spike["timestamp"])
                    scenario_labels.append(spike["scenario"])
                    cluster_tags.append(spike["cluster_tag"])

        if not timestamps:
            print("⚠️ No annotated scenario-cluster signals to plot.")
            return

        # Normalize timestamps
        min_t = min(timestamps)
        norm_times = [t - min_t for t in timestamps]

        # Encode categories to ints
        from sklearn.preprocessing import LabelEncoder
        scen_enc = LabelEncoder()
        clus_enc = LabelEncoder()

        y1 = scen_enc.fit_transform(scenario_labels)
        y2 = clus_enc.fit_transform(cluster_tags)

        cmap1 = cm.get_cmap("tab10", len(scen_enc.classes_))
        cmap2 = cm.get_cmap("tab20", len(clus_enc.classes_))

        plt.figure(figsize=(12, 5))

        # Plot scenario labels
        plt.subplot(1, 2, 1)
        plt.scatter(norm_times, y1, c=y1, cmap=cmap1, s=40)
        plt.yticks(range(len(scen_enc.classes_)), scen_enc.classes_)
        plt.xlabel("Time (s since first spike)")
        plt.title("⏳ Scenario Evolution")

        # Plot cluster tags
        plt.subplot(1, 2, 2)
        plt.scatter(norm_times, y2, c=y2, cmap=cmap2, s=40)
        plt.yticks(range(len(clus_enc.classes_)), clus_enc.classes_)
        plt.xlabel("Time (s since first spike)")
        plt.title("🧠 Signal Cluster Evolution")

        plt.tight_layout()
        plt.show()


    # --------------------
    # 11.7. Foresight Response
    # --------------------
    def respond_to_foresight(self, agent, foresight_scenario):
        """
        Apply mapped behaviors for a given foresight scenario to the specified agent.
        """
        behavior = FORESIGHT_BEHAVIOR_MAP.get(normalize_foresight_key(foresight_scenario))

        if not behavior:
            print(f"⚠️ No mapped behavior for foresight scenario: {foresight_scenario}")
            return

        response_type = behavior["response"]
        origin = "causal" if "(Causal)" in foresight_scenario else "semantic"

        if response_type == "reuse_best_path":
            print(f"🔁 {agent.name} reusing best global path.")
            # No special action yet (already retrieved best)
        elif response_type == "force_attention_reset":
            print(f"⚡ {agent.name} forcing attention reset.")
            agent.attention_history = [1.0]  # Full attention restored
        elif response_type == "prepare_adjust_weights":
            print(f"🛠️ {agent.name} preparing by adjusting feature weights.")
            agent.meta_parameters["task_feature_coupling"] = min(
                agent.meta_parameters["task_feature_coupling"] + 0.1, 1.0
            )
        elif response_type == "probe_with_low_weight":
            print(f"🧪 {agent.name} probing fringe path.")
            agent.meta_parameters["strategy_fit"] = max(
                agent.meta_parameters["strategy_fit"] - 0.1, 0.1
            )
        elif response_type == "promote_exploration":
            print(f"🚀 {agent.name} promoting high-priority exploration.")
            agent.meta_parameters["strategy_fit"] = min(
                agent.meta_parameters["strategy_fit"] + 0.2, 1.0
            )
        elif response_type == "activate_high_entropy":
            print(f"🎲 {agent.name} entering high-entropy exploration mode.")
            agent.in_cooperation = False  # Agent goes solo to explore
        elif response_type == "activate_chain_analysis":
            print(f"🔗 {agent.name} activating chain analysis mode.")
            # You could flag for cascade-specific behavior later
        elif response_type == "add_redundancy":
            print(f"🛡️ {agent.name} adding redundancy buffers.")
            agent.meta_parameters["task_feature_coupling"] = min(
                agent.meta_parameters["task_feature_coupling"] + 0.15, 1.0
            )

    # --------------------
    # 11.8. Metric Selection Simulation
    # --------------------

    def meta_learned_metric_selection(self, mission_prompt):
        """
        Select metrics based on prompt similarity and adaptive threshold (no per-metric weights).
        """
        prompt_vec = self.embedding_model.encode(mission_prompt)
        sims = {}
        for m, ref_vec in self.metric_reference_embeddings.items():
            sim = cosine_similarity([prompt_vec], [ref_vec])[0][0]
            sims[m] = sim

        # Select metrics above adaptive threshold
        thresh = self.meta_parameters.get("metric_selection_threshold", 0.5)
        selected_metrics = [m for m, score in sims.items() if score > thresh]

        # Optionally cap at top_k_metrics
        top_k = self.meta_parameters.get("top_k_metrics", 3)
        if len(selected_metrics) > top_k:
            selected_metrics = sorted(selected_metrics, key=lambda m: sims[m], reverse=True)[:top_k]
        # Fallback to most similar if none pass threshold
        if not selected_metrics:
            selected_metrics = [max(sims, key=sims.get)]
        return selected_metrics



    # --------------------
    # 11.9. External Database Similarity
    # --------------------

    # --------------------
    # 11.9. Runner-side Database Similarity
    # --------------------
    def runner_similarity_to_db(self, prompt):
        """
        Compute semantic similarity of a given prompt to entries in runner.external_db.
        Returns the highest similarity found.
        """
        if not self.external_db:
            return 0.0

        vec = embedding_model.encode(prompt)
        if vec is None:
            vec = np.zeros(EMB_DIM)
        elif isinstance(vec, list):
            vec = np.array(vec)

        best = 0.0
        for entry in self.external_db.values():
            stored = entry.get("embedding")
            if stored is None:
                continue
            if isinstance(stored, list):
                stored = np.array(stored)
            sim = cosine_similarity([vec], [stored])[0][0]
            best = max(best, sim)
        return best



    # --------------------
    # 11.10. Cooperation Handling
    # --------------------
    def handle_cooperation(self, scenario_tally):
        """
        If multiple agents encounter critical foresight scenarios in the same round,
        switch all agents into cooperation mode.
        """
        critical_cases = ["Grey Rhino", "Wild Card", "Black Swan", "Cascading Discontinuity"]
        if any(scenario in scenario_tally and scenario_tally[scenario] >= 2 for scenario in critical_cases):
            print("🤝 Triggering cooperative mode across agents.\n")
            for agent in self.agents:
                agent.in_cooperation = True

    # --------------------
    # 11.11. Full System Reset
    # --------------------





    def reset_all(self):
        """
        Reset the entire multi-agent system state:
          - Reset each agent’s global state
          - Re-seed tasks from born_roles using multi-round assignment
          - Clear runner-level counters and trackers (preserving global logs)
        """
        print("🔁 Full system reset (fatigue, hunger, memory, meta-weights).\n")

        # 1) Reset each agent
        for agent in self.agents:
            # keep physiology for plotting (and reset all other state)
            agent.reset_global_state()


            print(f"🔄 {agent.name} reset — roles now: {agent.roles}")

            # Re-seed tasks from Born Roles after reset using assign_tasks_from_roles_multi_round DONE don t comment out assing!!!!!!
            if agent.roles and self.last_mission_prompt:
                agent.assign_tasks_from_roles_multi_round(
                    prompt=self.last_mission_prompt,
                    global_round=self.global_round + 1,  # continue global round count after validation
                    allow_jollycard=True,  # after validation → jollycard becomes available again
                    verbose=True
                )
                print(f"🔧 {agent.name} re-seeded tasks after reset: {agent.tasks}")



        # cooperation / penalty trackers
        self.coop_decision_counters   = {a.name: 1     for a in self.agents}
        self.penalty_tracker.clear()


    # --------------------
    # 11.12. Penalty Clear Helper
    # --------------------
    def clear_penalty_for_agent(self, agent):
        """
        Remove any penalty‑tracker entries whose keys begin with the agent’s
        prompt‑key prefix (first 8 chars). Prevents carry‑over after a reset.
        """
        prefix = agent.generate_prompt_key("")[:8]   # ← safe 8‑char prefix
        keys_to_clear = [k for k in self.penalty_tracker if k.startswith(prefix)]
        for k in keys_to_clear:
            del self.penalty_tracker[k]



    # =====================================
    # 11.3 External Memory and Decay Logic
    # =====================================

    def apply_external_db_decay(self):
        """
        Applies decay to the external DB based on time and usage.
        Removes entries with decayed scores below the minimum threshold.
        """
        SECONDS_PER_MONTH = 30 * 24 * 3600

        if not self.agents:
            print("⚠️ No agents found for meta-parameters.")
            return

        agent_meta = self.agents[0].meta_parameters

        lambda_time = agent_meta.get("lambda_time", 0.5)
        lambda_usage = agent_meta.get("lambda_usage", 0.3)
        external_db_decay_randomness = agent_meta.get("external_db_decay_randomness", 0.1)
        MINIMUM_SCORE = agent_meta.get("external_decay_score_threshold", 0.2)  # ✅ Meta-learned threshold

        # Apply noise
        lambda_time *= 1 + random.uniform(-external_db_decay_randomness, external_db_decay_randomness)
        lambda_usage *= 1 + random.uniform(-external_db_decay_randomness, external_db_decay_randomness)

        print(f"🎲 Applied decay randomness: λ_time={lambda_time:.3f}, λ_usage={lambda_usage:.3f}")

        if not self.external_db:
            print("\n📉 External DB is empty. Skipping decay step.\n")
            return

        current_time = time.time()
        for key, entry in list(self.external_db.items()):
            timestamp = entry.get("timestamp", current_time)
            usage = entry.get("usage_count", 0)

            months_elapsed = (current_time - timestamp) / SECONDS_PER_MONTH
            time_decay = math.exp(-lambda_time * months_elapsed)
            usage_decay = math.exp(-lambda_usage * usage)
            combined_decay = time_decay * usage_decay

            decayed_score = entry["score"] * combined_decay

            if decayed_score < MINIMUM_SCORE:
                print(f"🗑️ Score too low, removing: {key[:10]}... | score={decayed_score:.3f}")
                del self.external_db[key]
            else:
                entry["score"] = decayed_score
                print(f"♻️ Gradual decay applied to {key[:10]}... | new_score={decayed_score:.3f}")

        remaining = list(self.external_db.values())
        if remaining:
            total = len(remaining)
            avg_score = sum(e["score"] for e in remaining) / total
            oldest = max((current_time - e["timestamp"]) / SECONDS_PER_MONTH for e in remaining)

            print(f"\n📊 Decay Report:")
            print(f"🧠 Entries remaining: {total}")
            print(f"⭐ Avg. score: {avg_score:.3f}")
            print(f"📅 Oldest entry age: {oldest:.2f} months")
        else:
            print("\n📉 All entries decayed. External DB is empty.")

        print("\n✅ External DB decay step completed.\n")

    def apply_long_memory_decay(self):
        """
        Applies decay to long memory based on time and usage-like proxy (if available).
        Uses meta-parameters to decay entries and purge low-score items.
        """
        SECONDS_PER_MONTH = 30 * 24 * 3600

        meta = self.meta_parameters if hasattr(self, "meta_parameters") else {}
        lambda_time = meta.get("lambda_time", 0.5)
        lambda_usage = meta.get("lambda_usage", 0.3)
        decay_noise = meta.get("external_db_decay_randomness", 0.1)
        MIN_SCORE = meta.get("external_decay_score_threshold", 0.2)

        lambda_time *= 1 + random.uniform(-decay_noise, decay_noise)
        lambda_usage *= 1 + random.uniform(-decay_noise, decay_noise)

        current_time = time.time()
        removed = 0
        for k in list(self.long_memory.keys()):
            entry = self.long_memory[k]
            score = entry.get("score", 0.0)
            timestamp = entry.get("timestamp", current_time)
            usage = entry.get("usage_count", 1)  # fallback to 1 for long memory

            months_elapsed = (current_time - timestamp) / SECONDS_PER_MONTH
            time_decay = math.exp(-lambda_time * months_elapsed)
            usage_decay = math.exp(-lambda_usage * usage)
            decayed_score = score * time_decay * usage_decay

            if decayed_score < MIN_SCORE:
                del self.long_memory[k]
                removed += 1
            else:
                entry["score"] = decayed_score

        print(f"♻️ {self.name}: Decayed long memory → removed {removed}, remaining {len(self.long_memory)}")


    def save_external_db(self, path="external_db.json"):
        # Convert all embeddings to lists
        serializable_db = {
            k: {
                **v,
                "embedding": v["embedding"].tolist() if isinstance(v["embedding"], np.ndarray) else v["embedding"],
                "graph_embedding": v.get("graph_embedding", []).tolist() if isinstance(v.get("graph_embedding"), np.ndarray) else v.get("graph_embedding")
            }
            for k, v in self.external_db.items()
        }

        # Add prompt_clusters to __meta__ if available
        if hasattr(self, "prompt_clusters"):
            serializable_db["__meta__"] = {
                "prompt_clusters": [
                    {
                        "prompts": cluster["prompts"],
                        "centroid": (
                            cluster["centroid"].tolist()
                            if isinstance(cluster["centroid"], np.ndarray)
                            else cluster["centroid"]
                        ),
                        "best_prompt": cluster.get("best_prompt", None),
                        "best_score": cluster.get("best_score", None)
                    }
                    for cluster in self.prompt_clusters
                ]
            }

        with open(path, "w") as f:
            json.dump(serializable_db, f)


    def load_external_db(self, path="external_db.json"):
        try:
            with open(path, "r") as f:
                raw_db = json.load(f)

            # Extract meta section (if any)
            meta = raw_db.pop("__meta__", {})

            # Load prompt clusters
            if "prompt_clusters" in meta:
                self.prompt_clusters = []
                for cluster in meta["prompt_clusters"]:
                    new_cluster = {
                        "prompts": cluster.get("prompts", []),
                        "centroid": np.array(cluster["centroid"]) if isinstance(cluster["centroid"], list) else cluster["centroid"],
                        "best_prompt": cluster.get("best_prompt"),
                        "best_score": cluster.get("best_score")
                    }
                    self.prompt_clusters.append(new_cluster)
            else:
                self.prompt_clusters = []

            # Restore external_db
            restored_db = {}
            for k, v in raw_db.items():
                restored_entry = dict(v)
                if isinstance(v.get("embedding"), list):
                    restored_entry["embedding"] = np.array(v["embedding"])
                if isinstance(v.get("graph_embedding"), list):
                    restored_entry["graph_embedding"] = np.array(v["graph_embedding"])
                restored_db[k] = restored_entry

            self.external_db = restored_db
            print(f"✅ Loaded external DB from {path} ({len(restored_db)} entries + {len(self.prompt_clusters)} clusters)")

        except FileNotFoundError:
            print(f"⚠️ No existing external DB file found at {path} — starting fresh.")
            self.external_db = {}
            self.prompt_clusters = []








In [None]:
# ============================================================
# ==================== 📊 CELL 12 — DIAGNOSTIC HELPER + PRE/POST MISSION CHECK ===
# ============================================================
# ✅ Unified diagnostics helper class and pre/post mission checks.
#    - Replaces previous Cell 15 and Cell 16
#    - Provides easy-to-use diagnostic commands:
#         diag.pre_mission_check()
#         diag.post_mission_check_and_compare()
# ============================================================


class DiagnosticHelper:

    def __init__(self, runner):
        self.runner = runner
        self.pre_external_count = None
        self.pre_long_memory = None

    def pre_mission_check(self):
        print("\n======================")
        print("📦 EXTERNAL DB STATUS (Pre-mission)")
        print("======================")

        external_count = len(self.runner.external_db)
        print(f"→ External DB entries: {external_count}")

        if external_count == 0:
            print("⚠️ External DB is currently empty.")
        else:
            for key, entry in self.runner.external_db.items():
                score = entry.get("score", "N/A")
                prompt = entry.get("prompt") or entry.get("original_prompt") or "N/A"
                timestamp = entry.get("timestamp", "N/A")
                prompt_display = prompt[:50] + "..." if isinstance(prompt, str) else "N/A"
                print(f"- Key: {key} | Score: {score} | Prompt: {prompt_display} | Timestamp: {timestamp}")

        self.pre_external_count = external_count
        self.pre_long_memory = {agent.name: len(agent.long_memory) for agent in self.runner.agents}

    def post_mission_check_and_compare(self):
        print("\n======================")
        print("📦 EXTERNAL DB STATUS (Post-mission)")
        print("======================")

        external_count = len(self.runner.external_db)
        print(f"→ External DB entries: {external_count}")

        if external_count == 0:
            print("⚠️ External DB is currently empty.")
        else:
            for key, entry in self.runner.external_db.items():
                score = entry.get("score", "N/A")
                prompt = entry.get("prompt") or entry.get("original_prompt") or "N/A"
                timestamp = entry.get("timestamp", "N/A")
                prompt_display = prompt[:50] + "..." if isinstance(prompt, str) else "N/A"
                print(f"- Key: {key} | Score: {score} | Prompt: {prompt_display} | Timestamp: {timestamp}")

        post_external_count = external_count
        post_long_memory = {agent.name: len(agent.long_memory) for agent in self.runner.agents}

        print("\n======================")
        print("📌 COMPARISON SUMMARY")
        print("======================")

        # Compare External DB
        if self.pre_external_count == post_external_count:
            print(f"📦 External DB → SAME (entries: {post_external_count})")
        elif post_external_count > self.pre_external_count:
            print(f"📦 External DB → + ADDED (was {self.pre_external_count}, now {post_external_count})")
        else:
            print(f"📦 External DB → - REMOVED (was {self.pre_external_count}, now {post_external_count})")

        # Compare Agent Memories
        print("\n🧠 AGENT MEMORY CHANGES:")

        for agent_name in self.pre_long_memory.keys() | post_long_memory.keys():
            pre_count = self.pre_long_memory.get(agent_name, 0)
            post_count = post_long_memory.get(agent_name, 0)

            if pre_count == post_count:
                print(f" - {agent_name}: SAME (entries: {post_count})")
            elif post_count > pre_count:
                print(f" - {agent_name}: + ADDED (was {pre_count}, now {post_count})")
            else:
                print(f" - {agent_name}: - REMOVED (was {pre_count}, now {post_count})")





In [None]:
# ============================================================
# ==================== 🚦 CELL 13 — AGENT & RUNNER CREATION =
# ============================================================

# Utility for consistent task seeding from roles


def seed_tasks_from_roles(agent, mission_prompt=None):
    # agent.assign_tasks_from_roles_multi_round(
    #     prompt=mission_prompt or agent.runner.last_mission_prompt,
    #     global_round=agent.runner.global_round + 1,
    #     allow_jollycard=True,
    #     verbose=True
    # )
    pass

# 0) Make an “empty” runner
runner = EnhancedStrategicRunner(agents=[])

# ✅ Load previously stored DB from disk
runner.load_external_db("external_db.json")

# 1) Build agents with Born Roles + Hybrid Task Seeding

import random


model_keys = list(available_models.keys())  # e.g. ["gemini","openai","claude",…]
agents = []

for i in range(3):
    name = f"Node_{i}"
    born = BORN_ROLES.get(name, [])
    # pick one of the loaded model keys at random:
    chosen_key = random.choice(model_keys)
    agent = StrategicAgent(
        name=name,
        model_key=chosen_key,    # <-- pass the model key, not a file path
        born_roles=born,
        runner=runner
    )
    agent.load_long_memory("long_memory.json")  # ✅ LOAD at startup

    agents.append(agent)

# 2) Now that agents exist, give them to runner
runner.agents = agents
# (re-initialize any runner caches if needed—often not required)




# ✅ DIAGNOSTIC HELPER BINDING → Add to runner after creation
runner.diag = DiagnosticHelper(runner)



In [None]:
# ============================================================
# =============== CELL 14 — RUNNER VISUALIZATION =============
# ============================================================


import matplotlib.pyplot as plt



def plot_real_timelines_with_shading(runner, *, figsize=(14, 3)):
    """
    ASCII‑style validation bars but with Matplotlib shading.
    • One bar per agent.
    • Blue segments  = intervals that *did* validate locally
    • White segments = intervals that didn't
    """
    agents = runner.agents
    if not agents:
        print("⟐ No agents to plot.")
        return

    # length = longest interval processed by any agent this global round
    max_x = max((a.interval for a in agents), default=0)

    fig, ax = plt.subplots(1, 1, figsize=figsize)

    y_ticks = []
    y_labels = []
    for idx, ag in enumerate(agents, start=1):
        y = idx       # vertical slot for this agent
        y_ticks.append(y)
        y_labels.append(ag.name)

        # build a 0/1 mask per interval
        validated_mask = [0]*(max_x+1)
        for iv in ag.local_interval_log:
            if iv <= max_x:
                validated_mask[iv] = 1

        # draw background bar
        ax.barh(y, max_x, left=0, height=0.6, color="lightgrey", alpha=0.5)

        # overlay validated intervals
        for k, flag in enumerate(validated_mask, start=0):
            if flag:
                ax.barh(y, 1, left=k, height=0.6, color="skyblue", alpha=0.9)

    ax.set_xlabel("Interval #")
    ax.set_xlim(0, max_x + 1e-3)      # avoid identical low/high limits
    ax.set_xticks(range(0, max_x+1))

    ax.set_yticks(y_ticks)
    ax.set_yticklabels(y_labels)
    ax.set_title("Agent local‑validation timeline")
    ax.grid(axis="x", alpha=0.3)
    plt.tight_layout()
    plt.show()




In [None]:
# ============================================================
# =============== 🚀 CELL 15 — FULL RUN & DIAGNOSTICS ========
# ============================================================

# 1) Define (or re‑define) the mission prompt
mission_prompt = "Derive the closed-form solution for the Black-Scholes European call option pricing model and implement it in Python."
runner.last_mission_prompt = mission_prompt          # let agents see the prompt if needed

# ─────────────────────────────────────────────────────────────
# 🔍 PRE‑MISSION SNAPSHOT
# ─────────────────────────────────────────────────────────────
runner.diag.pre_mission_check()

# ─────────────────────────────────────────────────────────────
# 🚀 MAIN EXECUTION
# ─────────────────────────────────────────────────────────────
promoted = runner.execute(mission_prompt, verbose=True)
print("\n🚦 Promoted this round?", promoted)

# ─────────────────────────────────────────────────────────────
# 📸 POST‑MISSION SNAPSHOT & COMPARISON
# ─────────────────────────────────────────────────────────────
runner.diag.post_mission_check_and_compare()
runner.print_agent_snapshot()

# ♻️ Apply decay to the external DB
runner.apply_external_db_decay()

# ♻️ Apply decay to each agent’s long memory
for agent in runner.agents:
    agent.apply_long_memory_decay()
    agent.save_long_memory(f"{agent.name}_long_memory.json")


# ─────────────────────────────────────────────────────────────
# 🗑️  STATIC‑DEFINITION PURGE LOG
# ─────────────────────────────────────────────────────────────
print("\n🗑️  Static‑Definition Purge Log:")
print("="*80)
for agent in runner.agents:
    if not agent.purge_log:
        continue
    print(f"\n🔍 {agent.name} purge history:")
    for entry in agent.purge_log:
        gr   = entry["global_round"]
        remR = entry["removed_roles"]
        retR = entry["retained_roles"]
        remT = entry["removed_tasks"]
        retT = entry["retained_tasks"]
        print(f" • After global round {gr}:")
        print(f"    • Roles removed:   {remR or '–'}")
        print(f"    • Roles retained:  {retR}")
        print(f"    • Tasks removed:   {remT or '–'}")
        print(f"    • Tasks retained:  {retT}")
print("="*80)

# ─────────────────────────────────────────────────────────────
# 🩺 PHYSIOLOGY ARRAYS (debug print)
# ─────────────────────────────────────────────────────────────
print("\n── Physiology Histories ──")
for agent in runner.agents:
    print(f"{agent.name}:",
          f"attention={agent.attention_history},",
          f"fatigue={agent.fatigue_history},",
          f"hunger={agent.hunger_history}")
    print(f"{agent.name} length →", len(agent.attention_history))

# ─────────────────────────────────────────────────────────────
# 📊 VISUALS
# ─────────────────────────────────────────────────────────────
runner.plot_agent_timelines()                 # ASCII timeline
runner.plot_scenario_cluster_timeline()
plot_real_timelines_with_shading(runner)      # Matplotlib timeline (with validation shading)
plot_agent_physiology(runner)                 # Attention / Fatigue / Hunger charts











📦 EXTERNAL DB STATUS (Pre-mission)
→ External DB entries: 0
⚠️ External DB is currently empty.
📈 Best similarity found: 0.0000 vs reuse threshold: 0.7500
🆕 New mission prompt detected — resetting global counter.

---- Interval 1 (mission start) ----
 • Node_0: local_round=1, interval=1
 • Node_1: local_round=1, interval=1
 • Node_2: local_round=1, interval=1
🔍 Node_0 top_3 role candidates (pre‑threshold): ['Mathematician', 'Logician', 'Builder']
🔍 Node_0 meaningful roles (score ≥ 0.7): []
⚠️ Node_0 assigning top role despite threshold: Mathematician
🎲 Node_0 randomly assigned role from top_1: ['Mathematician']
✅ Node_0 kept roles: ['Mathematician']
⏳ Restricting to single role (global round 1): ['Mathematician']
🔍 Node_0 seeded tasks from store (top_5): ['compute', 'mathematicize']
🧠 Node_0 assigned tasks: ['compute', 'mathematicize']
🔍 Node_1 top_3 role candidates (pre‑threshold): ['Mathematician', 'Logician', 'Builder']
🔍 Node_1 meaningful roles (score ≥ 0.7): []
⚠️ Node_1 assigning

AttributeError: 'StrategicAgent' object has no attribute 'act'