<a href="https://colab.research.google.com/github/micah-shull/AI_Agents/blob/main/098_Research_Summarizer_Agent_aLittleLessDirty.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Agent Code
### Copy of completed code from preivous notebook

In [None]:
# --- LLM Summarization Helper (uses your OpenAI client outside the loop) ---
def make_summarizer(openai_chat_fn: Callable[[List[Dict[str, str]]], str]):
    """Return a summarize_text(text, max_points, style) function using provided LLM call.
    openai_chat_fn: function that takes messages=[...] and returns string content.
    """
    def summarize_text(text: str, max_points: int = 5, style: str = "bullet") -> str:
        system = (
            "You are a precise technical summarizer. Extract key points, preserve facts, "
            "and avoid speculation. Keep it concise."
        )
        user = (
            f"Summarize the following text into at most {max_points} key points. "
            f"Format: {'bullets' if style=='bullet' else 'short paragraphs'}.\n\n" + text
        )
        messages = [
            {"role": "system", "content": system},
            {"role": "user", "content": user},
        ]
        return openai_chat_fn(messages)
    return summarize_text

# --- Wiring helper to build the registry for this agent ---
def build_research_actions(env: ResearchEnvironment, summarizer_fn: Callable[[str, int, str], str]) -> ActionRegistry:
    registry = ActionRegistry()

    registry.register(Action(
        name="list_txt_files",
        fn=lambda: env.list_txt_files(),
        description="Return .txt file names from /content/files",
        parameters={"type":"object","properties":{},"required":[]},
    ))

    registry.register(Action(
        name="read_txt_file",
        fn=lambda file_name: env.read_txt_file(file_name),
        description="Read a text file from /content/files",
        parameters={
            "type": "object",
            "properties": {"file_name": {"type": "string"}},
            "required": ["file_name"],
        },
    ))

    registry.register(Action(
        name="summarize_text",
        fn=lambda text, max_points=5, style="bullet": summarizer_fn(text, max_points, style),
        description="Summarize raw text into key points using the LLM",
        parameters={
            "type": "object",
            "properties": {
                "text": {"type": "string"},
                "max_points": {"type": "integer", "minimum": 1, "maximum": 12},
                "style": {"type": "string", "enum": ["bullet", "paragraph"]},
            },
            "required": ["text"],
        },
    ))

    registry.register(Action(
        name="write_summary_file",
        fn=lambda source_file, content: env.write_summary_file(source_file, content),
        description="Write summary text to /content/summaries (auto-named from source)",
        parameters={
            "type": "object",
            "properties": {
                "source_file": {"type": "string"},
                "content": {"type": "string"},
            },
            "required": ["source_file", "content"],
        },
    ))

    return registry


# STEP 2 — Language & Prereqs (clean)
# Put this ABOVE the wiring cell. Defines: Goal, Memory, AgentLanguage, SummarizerLanguage.
from dataclasses import dataclass
from typing import List, Dict, Any

# --- Minimal prereqs --------------------------------------------------------
@dataclass(frozen=True)
class Goal:
    priority: int
    name: str
    description: str

class Memory:
    def __init__(self):
        self.items: List[Dict[str, Any]] = []
    def add_memory(self, m: Dict[str, Any]):
        self.items.append(m)
    def get_memories(self, limit: int | None = None):
        return self.items[-limit:] if limit else self.items

# --- AgentLanguage base + concrete SummarizerLanguage ----------------------
class AgentLanguage:
    """Build prompt for the LLM; parse the LLM's response (usually handled by generate_response)."""
    def construct_prompt(self, actions: List[Any], environment: Any, goals: List[Goal], memory: Memory) -> Dict[str, Any]:
        raise NotImplementedError
    def parse_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
        # Default: response already structured as {"tool": ..., "args": {...}}
        return response

class SummarizerLanguage(AgentLanguage):
    """Formats goals/memory for the summarizer agent. Tool-call parsing is done in generate_response()."""
    def construct_prompt(self, actions, environment, goals: List[Goal], memory: Memory) -> Dict[str, Any]:
        goals_text = (
            "You are a file summarizer. Follow these goals in order of priority:\n" +
            "\n".join(f"- ({g.priority}) {g.name}: {g.description.strip()}" for g in sorted(goals, key=lambda g: g.priority))
        )
        mem = memory.get_memories(8)
        return {"goals_text": goals_text, "memory": mem, "actions": actions}

# Research Summarizer — Orchestrator Wiring (function calling)
# REQUIREMENTS (already defined earlier in your notebook):
# - Agent (orchestrator template)
# - Goal, Action, ActionRegistry
# - ResearchEnvironment, make_summarizer, build_research_actions (from the previous cell)
# - Memory class from your template
#
# This cell wires those pieces together, adds an AgentLanguage
# and a generate_response() that uses OpenAI function calling.

import os, json
from typing import Dict, Any, List
from dotenv import load_dotenv
from openai import OpenAI

# ---------------- Load API key & client ----------------
load_dotenv('/content/API_KEYS.env')
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
MODEL = "gpt-4o-mini"

# ---------------- Tools export helper ------------------
def registry_to_openai_tools(registry: ActionRegistry) -> List[Dict[str, Any]]:
    tools = []
    for a in registry.get_actions():
        tools.append({
            "type": "function",
            "function": {
                "name": a.name,
                "description": a.description,
                "parameters": a.parameters or {"type": "object", "properties": {}, "required": []},
            },
        })
    return tools

# ---------------- AgentLanguage ------------------------
class SummarizerLanguage(AgentLanguage):
    """Formats goals/memory for the LLM; parse is handled in generate_response."""
    def construct_prompt(self, actions, environment, goals, memory):
        goals_text = "You are a file summarizer. Follow these goals in order of priority:\n" + "\n".join(
            f"- ({g.priority}) {g.name}: {g.description.strip()}" for g in sorted(goals, key=lambda g: g.priority)
        )
        # Keep a tight memory window
        mem = memory.get_memories(8)
        return {"goals_text": goals_text, "memory": mem, "actions": actions}

# ------------- generate_response (OpenAI call) ---------
# NOTE: This returns a structured dict {"tool": name, "args": {...}} for the orchestrator.

def make_generate_response(registry: ActionRegistry):
    tools_spec = registry_to_openai_tools(registry)

    def build_messages(prompt_dict: Dict[str, Any]) -> List[Dict[str, str]]:
        system = (
            prompt_dict["goals_text"]
            + "\n\nYou must use tools via function calling to make progress. "
            + "Choose exactly one next tool per step. If you have saved all summaries, call a terminate tool if available; otherwise indicate completion."
        )
        # Replay memory if you'd like the model to see prior context (optional here)
        memory_msgs = []
        for m in prompt_dict["memory"]:
            role = m.get("role") or m.get("type") or "user"
            content = m.get("content")
            # Coerce non-strings for safety
            if not isinstance(content, str):
                content = json.dumps(content)
            memory_msgs.append({"role": role if role in ("system","user","assistant") else "user", "content": content})

        # Nudge the model with a fresh user instruction
        user_msg = {
            "role": "user",
            "content": (
                "Pick the best next tool from the available functions to progress toward summarizing the files. "
                "Return a function call, not prose."
            ),
        }
        return [{"role": "system", "content": system}] + memory_msgs + [user_msg]

    def _generate_response(prompt_dict: Dict[str, Any]) -> Dict[str, Any]:
        messages = build_messages(prompt_dict)
        resp = client.chat.completions.create(
            model=MODEL,
            messages=messages,
            tools=tools_spec,
            tool_choice="auto",
            temperature=0.2,
        )
        msg = resp.choices[0].message
        # If the model chose a tool, parse it
        if msg.tool_calls:
            call = msg.tool_calls[0]
            name = call.function.name
            try:
                args = json.loads(call.function.arguments or "{}")
            except json.JSONDecodeError:
                args = {}
            return {"tool": name, "args": args}
        # Fallback if no tool was called; gently kick off with list_txt_files
        return {"tool": "list_txt_files", "args": {}}

    return _generate_response

# ---------------- Build environment & tools -------------
env = ResearchEnvironment()

# Summarizer uses your OpenAI client under the hood

def openai_chat_fn(messages):
    resp = client.chat.completions.create(model=MODEL, messages=messages)
    return resp.choices[0].message.content

summarizer = make_summarizer(openai_chat_fn)
registry = build_research_actions(env, summarizer)

# ---------------- Goals --------------------------------
file_summary_goal = Goal(
    priority=1,
    name="file_summary",
    description=(
        "Summarize key points of text documents in /content/files.\n"
        "Steps: 1) list files, 2) read each file, 3) summarize to ≤5 bullets, 4) write to /content/summaries."
    ),
)

# ---------------- Orchestrator instance -----------------
language = SummarizerLanguage()
generate_response = make_generate_response(registry)

agent = Agent(
    goals=[file_summary_goal],
    agent_language=language,
    action_registry=registry,
    generate_response=generate_response,
    environment=env,
)


# STEP 1 — Base Orchestrator (GAME skeleton)
# Run this cell first. It defines the core classes we'll reuse.
from typing import List, Dict, Any, Optional, Callable
from dataclasses import dataclass

# ---- G: Goals --------------------------------------------------------------
@dataclass(frozen=True)
class Goal:
    priority: int
    name: str
    description: str

# ---- A: Actions + Registry -------------------------------------------------
class Action:
    def __init__(self, name: str, fn: Callable, description: str, parameters: Dict, terminal: bool=False):
        self.name, self.fn = name, fn
        self.description, self.parameters = description, parameters
        self.terminal = terminal
    def execute(self, **kwargs):
        return self.fn(**kwargs)

class ActionRegistry:
    def __init__(self):
        self._actions: Dict[str, Action] = {}
    def register(self, action: Action):
        if action.name in self._actions:
            raise ValueError(f"Action already registered: {action.name}")
        self._actions[action.name] = action
    def get_action(self, name: str) -> Optional[Action]:
        return self._actions.get(name)
    def get_actions(self) -> List[Action]:
        return list(self._actions.values())
    def validate_args(self, action: Action, args: Dict[str, Any]) -> (bool, str):
        schema = action.parameters or {"type":"object","properties":{},"required":[]}
        for key in schema.get("required", []):
            if key not in args:
                return False, f"Missing required arg: {key}"
        return True, "ok"

# ---- M: Memory -------------------------------------------------------------
class Memory:
    def __init__(self):
        self.items: List[Dict[str, Any]] = []  # each item: {role, content}
    def add_memory(self, m: Dict[str, Any]):
        self.items.append(m)
    def get_memories(self, limit: Optional[int]=None) -> List[Dict[str, Any]]:
        return self.items[-limit:] if limit else self.items

# ---- E: Environment --------------------------------------------------------
class Environment:
    def execute_action(self, action: Action, args: Dict[str, Any]) -> Dict[str, Any]:
        try:
            result = action.execute(**args)
            return {"tool_executed": True, "result": result}
        except Exception as e:
            return {"tool_executed": False, "error": str(e)}



# ---- AgentLanguage (prompt builder + parser) ------------------------------
class AgentLanguage:
    def construct_prompt(self, actions: List[Action], environment: Environment, goals: List[Goal], memory: Memory) -> Dict[str, Any]:
        return {
            "goals": [g.description for g in sorted(goals, key=lambda g: g.priority)],
            "tools": [a.name for a in actions],
            "memory": memory.get_memories(6),
        }
    def parse_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
        # Expect a structured dict: {"tool": name, "args": {...}}
        return response

# ---- Orchestrator (Agent) -------------------------------------------------
class Agent:
    def __init__(self, goals, agent_language, action_registry, generate_response, environment):
        self.goals = goals
        self.agent_language = agent_language
        self.actions = action_registry
        self.generate_response = generate_response  # Callable[prompt_dict] -> {tool,args}
        self.environment = environment

    def construct_prompt(self, goals, memory, actions):
        return self.agent_language.construct_prompt(actions=actions.get_actions(),
                                                    environment=self.environment,
                                                    goals=goals,
                                                    memory=memory)

    def prompt_llm_for_action(self, full_prompt):
        return self.generate_response(full_prompt)

    def get_action(self, response):
        invocation = self.agent_language.parse_response(response)
        action = self.actions.get_action(invocation.get("tool"))
        return action, invocation

    def should_terminate(self, response):
        action_def, _ = self.get_action(response)
        return bool(action_def and action_def.terminal)

    def run(self, user_input: str, memory: Optional[Memory]=None, max_iterations: int=3, verbose: bool=True) -> Memory:
        memory = memory or Memory()
        memory.add_memory({"role": "user", "content": user_input})
        for _ in range(max_iterations):
            prompt = self.construct_prompt(self.goals, memory, self.actions)
            if verbose:
                print("Prompt →", prompt)
            response = self.prompt_llm_for_action(prompt)
            if verbose:
                print("Decision ←", response)
            action, invocation = self.get_action(response)
            if not action:
                err = {"tool_executed": False, "error": f"Unknown action: {invocation.get('tool')}"}
                memory.add_memory({"role": "tool", "content": err})
                break
            ok, msg = self.actions.validate_args(action, invocation.get("args", {}))
            if not ok:
                err = {"tool_executed": False, "error": f"Invalid args: {msg}"}
                memory.add_memory({"role": "tool", "content": err})
                continue
            result = self.environment.execute_action(action, invocation.get("args", {}))
            if verbose:
                print("Result ←", result)
            memory.add_memory({"role": "tool", "content": result})
            if not result.get("tool_executed", False):
                memory.add_memory({"role": "assistant", "content": "Got an error; choosing another action next."})
                continue
            if self.should_terminate(response):
                if verbose:
                    print("Terminate signal: stopping loop.")
                break
        return memory

# ---- Smoke test (no OpenAI, no files) -------------------------------------
# Define a tiny tool and a mock "LLM" that always selects it

def hello_tool(name: str = "world"):
    return f"hello, {name}!"

reg = ActionRegistry()
reg.register(Action(
    name="hello_tool",
    fn=hello_tool,
    description="Say hello",
    parameters={"type":"object","properties":{"name":{"type":"string"}},"required":[]}
))

lang = AgentLanguage()

def mock_generate_response(prompt_dict: Dict[str, Any]) -> Dict[str, Any]:
    # Always choose hello_tool with no args
    return {"tool": "hello_tool", "args": {}}

env = Environment()
goals = [Goal(1, "demo", "Run a single tool to confirm wiring works.")]

if __name__ == "__main__":
    agent = Agent(goals, lang, reg, mock_generate_response, env)
    _ = agent.run("Say hi", verbose=True)



# Agent Build Blueprint (GAME)

Use this checklist **every time** you spin up a new agent. Treat it like a recipe.

### 0) Idea → 1-paragraph spec

* **What:** “Summarize key points”
* **Why:** “Create concise notes to speed up review.”
* **Inputs/Outputs:** Inputs: files in `/content/files`. Outputs: `.summary.txt` in `/content/summaries`.
* **Success criteria:** At most 5 bullets per file, factual, saved to disk.

### 1) G — Goals (what & how)

Write 1–3 concise goals, ordered by priority.

```python
file_summary_goal = Goal(
    priority=1,
    name="file_summary",
    description=(
        "Summarize key points of text documents in /content/files.\n"
        "Steps: 1) list files  2) read each  3) summarize ≤5 bullets  4) save to /content/summaries."
    ),
)
```

### 2) E — Environment (the body)

Encapsulate domain actions with **safe paths** and **uniform envelopes**. Keep side effects minimal and explicit.

* `list_txt_files()` → list source docs
* `read_txt_file(file_name)` → return `{file_name, content, truncated}`
* `write_summary_file(source_file, content)` → return output path

(You already have this; keep path safety and truncation guard.)

### 3) A — Actions (tools) + Registry

Register each environment method as an `Action` with JSON-Schema-like `parameters`. Keep tools **specific**.

```python
registry.register(Action(
    name="read_txt_file",
    fn=lambda file_name: env.read_txt_file(file_name),
    description="Read a text file from /content/files",
    parameters={"type":"object","properties":{"file_name":{"type":"string"}},"required":["file_name"]},
))
```

Also register:

* `list_txt_files` (no args)
* `summarize_text` (DI-backed; see next step)
* `write_summary_file` (source\_file, content)
* *(Optional)* `terminate` with `terminal=True`

### 4) Inject dependencies (LLM) cleanly

Use **dependency injection** so your action isn’t married to one model/provider.

```python
def make_summarizer(openai_chat_fn):
    def summarize_text(text, max_points=5, style="bullet"):
        messages=[{"role":"system","content":"Precise technical summarizer."},
                  {"role":"user","content": f"Summarize into ≤{max_points} key points, {style}.\n\n{text}"}]
        return openai_chat_fn(messages)
    return summarize_text
```

### 5) M — Memory (what happened)

Start simple: list of `{role, content}`. Keep the window tight (e.g., last 6–8 messages). Consider upgrade paths (summaries/chunk recall) later, but don’t complicate the MVP.

### 6) AgentLanguage (prompt builder & response parsing)

* **Construct prompt**: deterministic text; sort goals/actions; cap memory window.
* **Parse response**: if you use **function calling**, parsing happens in `generate_response`; otherwise parse the model’s JSON/text here.

### 7) Function calling driver

Export registry to the API’s `tools` format and implement a `generate_response(prompt) -> {"tool": name, "args": {...}}`.

* Use a **narrow system message** (“pick exactly one tool per step”).
* If the model doesn’t call a tool, **default** to a safe starting tool (e.g., `list_txt_files`).

### 8) Orchestrator (the glue)

Keep the loop small and boring:

1. Build prompt (G + A + M + E)
2. Get decision (`generate_response`)
3. Validate args; execute via Environment
4. Write results/errors back to **memory**
5. Respect `terminal=True` or `max_iterations`

### 9) Observability (optional but helpful)

Add simple hooks/prints: `Prompt →`, `Decision ←`, `Result ←`. Later, you can redirect to logs.

### 10) Safety & guardrails

* File size limits & truncation flags
* JSON schema validation before execution
* Timeouts or retry limits if you add network tools
* Principle of least privilege (the env only exposes what’s needed)

---

## Skeleton you can copy between projects

```python
# 0) Spec: one paragraph (keep near the top of your notebook)

# 1) Goals
goals = [Goal(1, "task_name", "Short, stepwise description…")]

# 2) Environment
class MyEnv(Environment):
    # domain-specific methods: list_*, read_*, write_*

env = MyEnv()

# 3) Actions & Registry
registry = ActionRegistry()
# registry.register(Action(...))  # repeat per tool

# 4) DI-backed helpers (e.g., make_summarizer)
# summarizer = make_summarizer(openai_chat_fn)

# 5) Memory (from skeleton)
memory = Memory()

# 6) AgentLanguage
class MyLanguage(AgentLanguage):
    def construct_prompt(self, actions, environment, goals, memory):
        # deterministic text + small memory window
        return {"goals_text": "...", "memory": memory.get_memories(8), "actions": actions}

language = MyLanguage()

# 7) generate_response (function calling)
generate_response = make_generate_response(registry)  # your FC driver

# 8) Orchestrator
agent = Agent(goals, language, registry, generate_response, env)
_ = agent.run("Natural language task request here", max_iterations=8, verbose=True)
```

---

## Why this works well

* **Separation of concerns (GAME):** brain/body split keeps cognitive load low (for you and the model).
* **Stable interfaces:** you can swap any component without rewriting the loop.
* **Schema-first tools:** fewer tool-call mistakes, easier recovery.
* **DI:** easy to test offline; easy to switch models/providers.



Let’s treat this like a **repeatable kitchen recipe** we’ll refine over 3–5 agents. Here’s a tight, reusable **Agent Recipe v1.0** plus a test plan you can run each time.

# Agent Recipe v1.0 (GAME)

## 0) One-paragraph spec (write first)

* **What** (task), **Why** (value), **Inputs/Outputs**, **Done when** (acceptance).
* Example: “Summarize key points from `.txt` files in `/content/files`; write `.summary.txt` to `/content/summaries`; ≤5 bullets, factual.”

## 1) Goals (G)

* 1–3 goals, prioritized, stepwise.

```python
goals = [Goal(1, "file_summary", "1) list files 2) read 3) summarize ≤5 bullets 4) save")]
```

## 2) Environment (E)

* Domain verbs only (list/read/write/etc), **no business logic**.
* Safety: path whitelist, size guard, deterministic outputs.
* Uniform envelope returned by base `Environment.execute_action`.

## 3) Actions (A) + Registry

* One `Action` per Env verb (plus LLM helpers), **specific names**, JSON-schema params.
* Optional `terminate` action (`terminal=True`).

## 4) Dependency Injection (LLM helpers)

* Wrap model calls (e.g., `make_summarizer(chat_fn)`), never hardcode clients.

## 5) Memory (M)

* `{role, content}` items; small window (6–8). Keep it dumb at first.

## 6) AgentLanguage

* Deterministic prompt constructor; sort goals/actions; cap memory.
* If using function calling, parsing lives in `generate_response`.

## 7) Function-Calling Driver

* Export registry → `tools`.
* `generate_response(prompt) -> {"tool": name, "args": {...}}`.
* Safe default if no tool called (e.g., `list_*`).

## 8) Orchestrator

* Loop: prompt → decision → validate → execute → write result to memory → (terminate?).
* Guardrails: `max_iterations`, schema validation, error envelopes.

---

# Test Plan v1.0 (run every agent)

## A) Unit-ish checks (fast)

* **Env**: `list_*` returns list; `read_*` returns `{content, truncated}`; writes land in expected folder.
* **Registry**: missing required arg → validation error.
* **LLM helper**: with a **fake chat\_fn**, returns plausible summary (string).

## B) Integration smoke

* **Mock driver**: hardcode `{"tool": "list_*"}` once; ensure result envelope looks right.
* **Function calling**: real model picks tools in expected order on a tiny dataset.

## C) Acceptance (spec → done)

* Produce outputs matching “Done when.” Fail if any file skipped or bullets > limit.

---

# Iteration Loop (how we improve the recipe)

1. **Log**: Keep `Prompt →`, `Decision ←`, `Result ←` prints. Save last N messages.
2. **Find pain**: Where did it stumble? (wrong tool? arg mistake? long file?)
3. **Fix**:

   * Tool too generic? split it.
   * Error ambiguous? add `hint` in envelope.
   * Prompt noisy? trim or sort more deterministically.
   * Files too big? add `read_chunk` action.
4. **Re-run acceptance** on the same dataset.
5. **Update the recipe** (checklist + code stub) so the fix is reusable.

---

# Experiment Set (3–5 agents to validate the recipe)

1. **Research Summarizer** (current): local `.txt` → summaries.
2. **Task Planner**: goal → tasks CRUD (add/list/complete/remove).
3. **CSV Explorer**: list/read csv → summarize columns → write simple report.
4. **Code Snippet Finder**: list/read `.py` → search patterns → explain.
5. **KB Q\&A**: search markdown → answer with citations.

Each uses the same **recipe**; only the **Environment + Actions** change.

---

# Portable Notebook Scaffold (sections)

1. Spec
2. STEP 1: GAME skeleton (import once per session)
3. STEP 2: Environment (domain)
4. STEP 3: Actions + Registry (+ optional terminate)
5. STEP 4: DI helpers (LLM wrappers)
6. STEP 5: AgentLanguage
7. STEP 6: Function-calling driver
8. STEP 7: Final wiring + run
9. Tests: unit-ish, integration, acceptance
10. Notes: issues found → recipe updates





# **Agent Idea**

##A file-summarizing assistant that:

* Reads `.txt` (and optionally `.md`, `.docx`, or `.pdf` in later iterations) from a given folder.
* Produces clear, concise bullet-point summaries.
* Saves the summaries into an output folder.
* Works in an iterative “plan → act → check” loop, calling the right tools in sequence without manual intervention.

---

### **Key Features**

1. **File Management**

   * List available text files in the input directory.
   * Read file contents safely (with truncation for very large files).
   * Write summaries to a separate output directory with sanitized filenames.

2. **Summarization**

   * Use an LLM to convert file content into ≤5 concise bullet points.
   * Ensure summaries retain key facts and avoid fluff.

3. **Action Orchestration**

   * Automatically choose next steps (list → read → summarize → write) without hard-coding the order.
   * Allow retrying or skipping files if an error occurs.

4. **Memory & Context**

   * Keep a rolling memory of the last N steps so the agent can track progress and avoid repeating work.
   * Optionally include truncated file contents in memory for multi-step summarization.

5. **Extensibility**

   * Easy to add new actions (e.g., translate summary, create PDF version).
   * Portable “plug-and-play” environment so the same code can be used for other agent ideas.

6. **Safety & Robustness**

   * Path sanitization to prevent unsafe file access.
   * Graceful handling of missing files or bad inputs.
   * JSON-schema-like argument validation before tool execution.






# **Who Does What — Brain vs Body**

### **1) File Management — Body**

* **List files** → **Body**
  Python: `list_txt_files()` → `["a.txt", "b.txt", ...]`.
* **Read file contents (with truncation)** → **Body**
  Python: `read_txt_file(file_name)` → `{file_name, content, truncated: bool}`.
* **Write sanitized summaries to output** → **Body**
  Python: `write_summary_file(source_file, content)` → `"/content/summaries/a.summary.txt"`.

---

### **2) Summarization — Brain**

* **Convert content to ≤5 bullet points** → **Brain**
  LLM: `summarize_text(text, max_points=5, style="bullet")` (via dependency injection).
* **Maintain tone/quality (“concise, factual, no fluff”)** → **Brain**
  System prompt ensures style compliance.

---

### **3) Action Orchestration — Brain + Body**

* **Choose next tool (list → read → summarize → write)** → **Brain**
  LLM planning via function calling.
* **Validate args & execute tool** → **Body**
  Python: schema checks + `Environment.execute_action(...)`.
* **Handle failures (retry/skip)** → **Both**
  **Body**: returns structured error.
  **Brain**: decides next action.

---

### **4) Memory & Context — Brain + Body**

* **Store outcomes & last N steps** → **Body**
  Python: `Memory.add_memory({role, content})`.
* **Use memory to plan and avoid repeats** → **Brain**
  LLM: reads memory in prompt.

---

### **5) Extensibility — Brain + Body**

* **Add new tools (translate, export PDF)** → **Body**
  Python: implement env functions + register in action registry.
* **Decide when to use new tools** → **Brain**
  LLM: selects via function calling.

---

### **6) Safety & Robustness — Brain + Body**

* **Path sanitization, size limits, arg validation, error envelopes** → **Body**
  Deterministic Python guardrails.
* **Interpret errors & re-plan** → **Brain**
  LLM recovery logic.



## **Brain vs Body** table

| **Function**             | **Task**                         | **Who**   | **Notes**                                       |
| ------------------------ | -------------------------------- | --------- | ----------------------------------------------- |
| **File Management**      | List files                       | **Body**  | `list_txt_files()` returns list of file names   |
|                          | Read file contents               | **Body**  | `read_txt_file(file_name)` with truncation flag |
|                          | Write summaries                  | **Body**  | Sanitized filenames → output folder             |
| **Summarization**        | Summarize to ≤5 bullets          | **Brain** | LLM, concise/factual tone                       |
|                          | Maintain style guardrails        | **Brain** | Via system prompt                               |
| **Action Orchestration** | Choose next tool                 | **Brain** | LLM planning via function calling               |
|                          | Validate args & execute tool     | **Body**  | Schema checks + `Environment.execute_action`    |
|                          | Handle failures                  | **Both**  | Body returns error, Brain decides retry/skip    |
| **Memory & Context**     | Store last N steps               | **Body**  | `Memory.add_memory()`                           |
|                          | Use memory for planning          | **Brain** | Reads memory in prompt                          |
| **Extensibility**        | Add new tools                    | **Body**  | New Python functions + registry entries         |
|                          | Decide when to use tools         | **Brain** | LLM picks tools                                 |
| **Safety & Robustness**  | Path/size checks, arg validation | **Body**  | Guardrails in Python                            |
|                          | Interpret errors & re-plan       | **Brain** | LLM adjusts plan                                |




# Agent Recipe v1.1
# 1) Idea

**Idea:**
Create an assistant that reads plain-text documents in a folder, writes short, factual bullet-point summaries for each file, and saves those summaries in a separate folder. It should work step-by-step on its own, choosing the right action at each step, and handle errors gracefully.

---

# 2) Architecture Map (Brain vs Body)

| Function Area   | Task                     | Who       | What it does (plain English)                                                        |
| --------------- | ------------------------ | --------- | ----------------------------------------------------------------------------------- |
| File management | List files               | **Boyd (Python)**  | Get the list of text files we can work on.                                          |
|                 | Read file                | **Boyd (Python)**  | Open a file safely and return its text (with a truncation flag if it’s huge).       |
|                 | Write summary            | **Boyd (Python)**  | Save a summary to the summaries folder with a safe filename.                        |
| Summarization   | Make a summary           | **Brain (LLM)** | Turn raw text into ≤5 clear, factual bullet points.                                 |
| Action planning | Choose next step         | **Brain (LLM)** | Decide which tool to call next (list → read → summarize → write).                   |
|                 | Execute the tool safely  | **Boyd (Python)**  | Validate inputs and run the tool; return a structured result or a structured error. |
| Errors          | Handle failures          | **Both**  | Boyd (Python) reports a clear error; Brain decides the next recovery step.                   |
| Memory          | Store progress           | **Boyd (Python)**  | Keep a small rolling log of decisions and results.                                  |
|                 | Use memory               | **Brain (LLM)** | Read recent steps to avoid repeating work and to plan the next step.                |
| Extensibility   | Add new tools            | **Boyd (Python)**  | Implement new functions (e.g., translate, export PDF) and register them.            |
|                 | Choose new tools         | **Brain (LLM)** | Decide when to use new tools.                                                       |
| Safety          | Guardrails               | **Boyd (Python)**  | Path safety, size limits, argument checks, consistent result format.                |
|                 | Adjust plan after errors | **Brain (LLM)** | Interpret errors and pick a better next step.                                       |

---

# 3) Tool Inventory (who + what + short description)

1. **list\_txt\_files** — **Body**
   Return all `.txt` file names in the input folder.

2. **read\_txt\_file(file\_name)** — **Body**
   Read a specific text file. If it’s very large, return a truncated version and mark it as truncated.

3. **summarize\_text(text, max\_points=5, style="bullet")** — **Brain**
   Produce a short, factual summary of the text. Default to ≤5 bullet points.

4. **write\_summary\_file(source\_file, content)** — **Body**
   Save the summary to an output folder with a sanitized filename.

5. *(Optional)* **terminate(message?)** — **Body**
   Signal that the work is completed and the loop can stop.

> We’ll review this list before coding to make sure nothing is missing.

# 3.5) ActionContext  

**What it is:** A tiny object passed around during execution that carries shared dependencies and state the actions might need.

**Why:** Keeps function signatures clean and prevents hard-coding globals. Makes tests easier.

**Includes (suggested):**

* `env` (Environment instance)
* `registry` (ActionRegistry)
* `memory` (Memory)
* `llm` deps (e.g., `openai_chat_fn`, other helper callables)
* optional `config` (limits, paths, flags)

**Acceptance (quick check):**

* Adding a new dependency does **not** require changing action function signatures.
* Unit tests can swap in fakes by replacing fields on `ActionContext`.
---

# 4) Tool Schemas (stacked for easy reading)

**read\_txt\_file**

```json
{
  "type": "object",
  "properties": {
    "file_name": { "type": "string" }
  },
  "required": ["file_name"]
}
```

**summarize\_text**

```json
{
  "type": "object",
  "properties": {
    "text": { "type": "string" },
    "max_points": { "type": "integer", "minimum": 1, "maximum": 12 },
    "style": { "type": "string", "enum": ["bullet", "paragraph"] }
  },
  "required": ["text"]
}
```

**write\_summary\_file**

```json
{
  "type": "object",
  "properties": {
    "source_file": { "type": "string" },
    "content": { "type": "string" }
  },
  "required": ["source_file", "content"]
}
```

**terminate** *(optional)*

```json
{
  "type": "object",
  "properties": {
    "message": { "type": "string" }
  },
  "required": []
}
```

---

# 5) Memory Policy (What we remember)

**Decide:** What we store and how much of it we show the model.

**Write it down (plain English):**

* Each memory item looks like: `{role: "user" | "assistant" | "tool", content: <text or small dict>}`.
* Keep only the **last 8** items when talking to the model (small window = less noise).
* Always log the tool’s result (success or error) and the agent’s decision.

**Quick check:** If the model saw only the last 8 items, could it keep going without confusion?

---

# 6) Goals (What we’re doing and how we’ll do it)

**Decide:** One or two short goals in order of importance.

**Write it down (example):**

```
Goal(priority=1, name="file_summary",
     description="1) list files  2) read each  3) summarize to ≤5 bullets  4) save the summary")
```

**Quick check:** Does this still make sense in a blank notebook with no other context?

---

# 7) Message Plan (what we tell the model)

**Decide:** The exact messages we’ll send: system rules, a simple user nudge, recent memory, and the available tools.

**Write it down (outline):**

* **System message:**
  “You are a precise, factual summarizer. Choose exactly one tool per step. After saving all summaries, end the session.”
* **User message:**
  “Pick the best next tool to make progress. Return a function call, not prose.”
* **Memory:**
  Include the last 8 items, converting any dicts to short strings.
* **Tools:**
  Provide tool names, plain-English descriptions, and parameter schemas.

**Quick check:** If we run this twice, the messages should be similar and predictable.

---

# 8) Decision Maker (how we pick the next tool)

**Decide:** How we turn our messages into a “do this next” instruction.

**Write it down (rules):**

* The model should return **exactly one** tool to call each step, with arguments.
  Example result: `{"tool": "read_txt_file", "args": {"file_name": "a.txt"}}`
* If the model doesn’t pick a tool, we **default** to:
  `{"tool": "list_txt_files", "args": {}}`
* Keep creativity low (we want steady choices, not flair).

**Quick check:** From a cold start (empty memory), do we still move forward?

---
# 9) Orchestrator (Capabilities)

**What they are:** Small, optional modules that hook into the loop at defined points (before decision, after execution, on error, on terminate).

**Examples:**

* `PlanFirstCapability` — nudges the model to plan or pre-select a safe first tool (e.g., `list_txt_files`).
* `ProgressTrackingCapability` — updates memory with which files are done/remaining.
* `RetryBackoffCapability` — suggests pauses/retries after repeated failures.

**Why:** Extend behavior without rewriting the loop.

**Hooks (suggested):**

* `on_before_decision(context)`
* `on_after_decision(context, decision)`
* `on_after_execution(context, result)`
* `on_error(context, error)`
* `on_terminate(context)`

**Acceptance:**

* You can enable/disable a capability without changing the orchestrator code.
* Capabilities don’t alter tool contracts; they only observe/annotate or add hints.

# 9) PlanFirstCapability  

**Purpose:** Ensures the first step is safe and deterministic (e.g., list files) to avoid the model guessing filenames.

**Behavior:**

* If memory is “cold” (no prior tool results), inject a “start with `list_txt_files`” hint or directly return that decision once.

**Acceptance:**

* From a cold start, the first decision is always `list_txt_files` (unless you configure otherwise).


# 9) The Loop (glue that runs everything)

**Decide:** The simple steps the agent repeats.

**Write it down (6 steps):**

1. Build the messages (goals + recent memory + tools + system rules).
2. Ask the model to choose the **one next tool**.
3. Check the tool’s arguments; if something’s missing, log the error and let the model try again.
4. Run the tool safely and capture the result (or error) in a **uniform shape**.
5. Add the decision and the result to memory. If there was an error, the model will recover next step.
6. Stop if a **stop tool** is called (like `terminate`) or if we hit a maximum number of steps.

**Quick check:** No business rules here—this loop should work for many agents.

---

# 10) What We Log (so we can debug fast)

**Decide:** What to print or store on each step so we can see what happened.

**Write it down (three prints):**

* `Prompt →` the messages or a compact summary of them
* `Decision ←` the chosen tool and arguments
* `Result ←` the tool’s success or error in the uniform result shape

Also keep a short tail of memory (last 6 items) handy for quick inspection.

**Quick check:** If something breaks, can you spot *what* and *why* in under a minute?

---

# 11) Testing Checklist

**What to test before integration:**

* **Tools:**

  * list returns expected files
  * read returns `{content, truncated}` and handles missing files with a helpful hint
  * summarize returns a non-empty string for sample text
  * write creates a file with sanitized name
* **Validation:**

  * Missing required arg → validation error (no execution)
  * Bad types (if enforced) → clear error
* **Orchestrator smoke:**

  * Mock decision `{"tool": "list_txt_files"}` runs and logs a valid result envelope
* **Function-calling path:**

  * Real model picks a tool on a tiny dataset
  * Safe default (list) kicks in if no tool call is returned

**Acceptance:**

* All unit checks pass locally without the full loop.
* One integration smoke test completes a single file end-to-end.


---

# 12) One Pass Storyboard (without code)

**Walkthrough:**

1. **Brain**: “list files” → **Body** returns `["a.txt","b.txt"]`.
2. **Brain**: “read a.txt” → **Body** returns `{content, truncated:false}`.
3. **Brain**: “summarize” → **Body** (LLM helper) returns ≤5 bullets.
4. **Brain**: “write summary for a.txt” → **Body** returns saved path.
5. Repeat 2–4 for `b.txt`.
6. **Brain**: “terminate” with message “All summaries written.”

**Failure example:**
If “read a.txt” fails with “file not found” + hint “list files first”, the **Brain** calls “list files” and tries again.

**Quick check:** Does every failure have a clear next step?

---

# 13) Known Risks and Early Choices

**Decide now:**

* Very large files → add a “read chunk” tool next iteration if needed.
* Mixed encodings → always open with `errors="replace"` so we never crash.
* Model wandering → keep the system rule strict and memory short.
* Tool names drifting → stick to `verb_object_context` (e.g., `write_summary_file`).

**Quick check:** Are we okay shipping v1 with these choices?

---

# 14) Ready-to-Code Checklist (final gate)

* [ ] Brain/Body table finalized
* [ ] Tool list + their schemas finalized
* [ ] Environment rules agreed (paths, truncation, envelope)
* [ ] Memory policy set (shape + window)
* [ ] Goal text written
* [ ] Message plan written (system + user + memory + tools)
* [ ] Decision maker rules written (one tool per step, default start)
* [ ] Loop unchanged (simple and reusable)
* [ ] “Done” checklist ready

---




## **Agent Build Recipe – Final Structure**

1. **Idea** – Plain-English description of what the agent should do, without implementation details.
2. **Brain vs Body Table** – Divide responsibilities between LLM (“Brain”) and Python code (“Body”).
3. **Tool Inventory** – List every tool, label Brain or Body, and give a short description of what it does.
4. **ActionContext** – Central object holding dependencies (memory, auth, services, registry) for tools.
5. **Tool Schemas** – JSON schema definitions for tool inputs/outputs, ensuring safety and clarity.
6. **Environment Contracts** – Rules for safe execution (path restrictions, truncation, result envelopes).
7. **Memory Policy** – What to remember, in what format, and for how long (rolling window).
8. **Goals** – Clear, prioritized statements of what the agent should achieve.
9. **Message Plan** – How prompts are built from Goals, Actions, Memory, and Environment for the LLM.
10. **Function Calling Driver** – Turns prompts into `{tool, args}` with safe defaults.
11. **Capabilities Layer** – Optional modular behaviors that enhance the loop (planning, tracking, retries).
12. **Orchestrator Loop** – The minimal “glue” logic to run the agent until done.
13. **Logging** – Decide what to record at each step for debugging and transparency.
14. **Testing & Acceptance Criteria** – Unit/integration tests plus checklist for pass/fail outcomes.
15. **Storyboard (Dry-Run)** – One complete, code-free walk-through of a happy path and an error path.
16. **Risks & Early Choices** – Known limitations and pre-decisions to avoid surprises later.
17. **Ready-to-Code Checklist** – Final preflight check before implementation.

Exactly — I’ll take our improved **Agent Build Recipe** and re-write it so it fully integrates the missing parts you called out:

* **ActionContext** explicitly between Tool Inventory and Tool Schemas.
* **Capabilities Layer** explicitly between Function Calling Driver and Orchestrator Loop.
* **Testing & Acceptance** renamed and broadened to include explicit test planning.
* **Known Risks & Early Choices** aligned in order with the checklist.

Here’s the revised **final master template**:

---

## **Agent Build Recipe – Master Template**

1. **Idea**
   Plain-English description of what the agent should do. No technical paths or code — just the “what” and “why.”

2. **Brain vs Body Table**
   Map responsibilities between **Brain** (LLM) and **Body** (Python code), with explicit labels.

3. **Tool Inventory**
   List every tool with:

   * Name
   * Brain or Body
   * Short description of what it does.

4. **ActionContext**
   Central object that holds and injects dependencies into tools (memory, auth keys, services, registries).

5. **Tool Schemas**
   JSON schema for each tool’s inputs/outputs, stacked clearly for visibility.

   ```json
   {
     "type": "object",
     "properties": { "file_name": { "type": "string" } },
     "required": ["file_name"]
   }
   ```

6. **Environment Contracts**
   Rules for safe execution: allowed paths, truncation limits, structured success/failure envelopes.

7. **Memory Policy**
   Define format, size, and what’s always stored (e.g., tool results, decisions).

8. **Goals**
   Short, prioritized statements of what the agent is trying to achieve.

9. **Message Plan**
   How prompts are built from Goals, Actions, Memory, and Environment.

10. **Function Calling Driver**
    How the prompt is converted into `{tool, args}`. Include safe defaults and temperature settings.

11. **Capabilities Layer**
    Optional plug-in behaviors like:

    * **PlanFirstCapability** (always start with planning tool)
    * **ProgressTrackingCapability** (monitor % complete)

12. **Orchestrator Loop**
    Minimal glue logic that runs until a terminal condition is met.

13. **Logging**
    Decide exactly what to print/store at each step for debugging and transparency.

14. **Testing & Acceptance**
    Unit and integration test plan + clear pass/fail checklist.

15. **Storyboard (Dry-Run)**
    One happy-path and one error-path walkthrough, no code.

16. **Known Risks & Early Choices**
    Limitations and pre-decisions that affect the build.

17. **Ready-to-Code Checklist**
    Quick preflight before implementation.





## Agent Build Recipe Doc

In [None]:
# Save the Agent Build Recipe – Final Structure as a text file

# 1. Idea – Plain-English description of what the agent should do, without implementation details.
# 2. Brain vs Body Table – Divide responsibilities between LLM (“Brain”) and Python code (“Body”).
# 3. Tool Inventory – List every tool, label Brain or Body, and give a short description of what it does.
# 4. ActionContext – Central object holding dependencies (memory, auth, services, registry) for tools.
# 5. Tool Schemas – JSON schema definitions for tool inputs/outputs, ensuring safety and clarity.
# 6. Environment Contracts – Rules for safe execution (path restrictions, truncation, result envelopes).
# 7. Memory Policy – What to remember, in what format, and for how long (rolling window).
# 8. Goals – Clear, prioritized statements of what the agent should achieve.
# 9. Message Plan – How prompts are built from Goals, Actions, Memory, and Environment for the LLM.
# 10. Function Calling Driver – Turns prompts into {tool, args} with safe defaults.
# 11. Capabilities Layer – Optional modular behaviors that enhance the loop (planning, tracking, retries).
# 12. Orchestrator Loop – The minimal “glue” logic to run the agent until done.
# 13. Logging – Decide what to record at each step for debugging and transparency.
# 14. Testing & Acceptance Criteria – Unit/integration tests plus checklist for pass/fail outcomes.
# 15. Storyboard (Dry-Run) – One complete, code-free walk-through of a happy path and an error path.
# 16. Risks & Early Choices – Known limitations and pre-decisions to avoid surprises later.
# 17. Ready-to-Code Checklist – Final preflight check before implementation.

In [2]:
# Write the Agent Build Recipe – Final Structure (polished) to a text file

recipe_text = """Agent Build Recipe – Final Structure (polished)

1. Idea – Plain-English description of what the agent should do and why (no paths or code here).
2. Brain vs Body Table – Divide responsibilities between LLM (“Brain”) and Python (“Body”) with explicit task ownership.
3. Tool Inventory – List every tool, mark Brain/Body owner, add a one-line description and any pre/postconditions.
4. ActionContext – Central object holding dependencies (memory, env, registry, LLM helpers, config) that tools receive (no globals).
5. Tool Schemas – JSON schema for inputs to each tool; outputs must use the standard result envelope (see Environment).
6. Environment Contracts – Safe execution rules (path whitelist, size caps, deterministic side-effects) and the result envelope:
   Success -> {"tool_executed": true, "result": ...}
   Failure -> {"tool_executed": false, "error": "...", "hint": "...", "retryable": true|false}
7. Memory Policy – What to remember and how (e.g., sliding window vs summarized), item shape, and window size (details live in the particulars sheet).
8. Goals – Short, prioritized statements (what + how) that are stable across runs.
9. Message Plan – Deterministic prompts built from Goals, Actions, Memory, and Environment; keep them minimal and consistent.
10. Function Calling Driver – Convert prompts into exactly one {tool, args} per step; define a safe default if none is chosen.
11. Capabilities Layer – Optional plug-ins (e.g., plan-first, progress tracking, retry) that hook into the loop without changing tool contracts.
12. Orchestrator Loop – Thin glue only: build -> decide -> validate -> execute -> log -> repeat; no business logic here.
13. Logging – Print/store a compact Prompt ->, Decision <-, Result <-; redact secrets and keep a short memory tail for debugging.
14. Testing & Acceptance Criteria – Unit + integration tests and a pass/fail checklist tied to the Idea and Goals.
15. Storyboard (Dry-Run) – One happy path and one error-recovery path described step-by-step, no code.
16. Risks & Early Choices – Known failure modes and guardrails (e.g., large files, encoding, naming conventions, drift controls).
17. Ready-to-Code Checklist – Final preflight (tools & schemas frozen, env rules agreed, driver defaults set, tests defined).
"""

output_path = "/content/Agent_Build_Recipe_Master.txt"

with open(output_path, "w", encoding="utf-8") as f:
    f.write(recipe_text)

print(f"Saved recipe to: {output_path}")



Saved recipe to: /content/Agent_Build_Recipe_Master.txt


# Master Recipe Dress Reheral
---

# 1) Idea

Create an assistant that reads plain-text documents in a folder, produces short, factual bullet-point summaries for each file, and saves those summaries in a separate folder. It should proceed step-by-step on its own, choose the right action each step, and handle errors gracefully.

---

# 2) Brain vs Body Table

| Function Area   | Task                 | Who       | What it does (plain English)                                      |
| --------------- | -------------------- | --------- | ----------------------------------------------------------------- |
| File management | List files           | **Body (Python) (Python)**  | Get the list of `.txt` files we can work on.                      |
|                 | Read file            | **Body (Python)**  | Open a file safely and return its text (mark if truncated).       |
|                 | Write summary        | **Body (Python)**  | Save a summary with a sanitized filename.                         |
| Summarization   | Make a summary       | **Brain (LLM)** | Turn raw text into ≤5 factual bullet points.                      |
| Action planning | Choose next step     | **Brain (LLM)** | Decide which tool to call next (list → read → summarize → write). |
|                 | Execute tool safely  | **Body (Python)**  | Validate inputs and run the tool; return a structured result.     |
| Errors          | Handle failures      | **Both**  | Body reports a clear error; Brain decides the next recovery step. |
| Memory          | Store progress       | **Body (Python)**  | Keep a small rolling log of decisions and results.                |
|                 | Use memory           | **Brain (LLM)** | Read recent steps to avoid repeats and plan next.                 |
| Extensibility   | Add new tools        | **Body (Python)**  | Implement new functions and register them.                        |
|                 | Use new tools        | **Brain (LLM)** | Choose when to call new tools.                                    |
| Safety          | Guardrails           | **Body (Python)**  | Path safety, size limits, argument checks, result envelope.       |
|                 | Re-plan after errors | **Brain (LLM)** | Interpret errors and pick a better next step.                     |

---

# 3) Tool Inventory

1. **list\_txt\_files** — **Body**
   Return all `.txt` filenames in the input folder.

2. **read\_txt\_file(file\_name)** — **Body**
   Read a text file. If very large, truncate and mark `truncated: true`.

3. **summarize\_text(text, max\_points=5, style="bullet")** — **Brain**
   Produce a concise, factual summary of the text (default ≤5 bullets).

4. **write\_summary\_file(source\_file, content)** — **Body**
   Save the summary to the output folder using a sanitized filename.

5. **terminate(message?)** — **Body** *(optional, terminal)*
   Signal that all work is complete and end the loop.

---

# 4) ActionContext

A small object passed around that carries shared dependencies so tool functions stay clean.

**Fields (for this agent):**

* `env` — the environment (file ops)
* `registry` — action registry
* `memory` — rolling store of recent steps
* `llm` — callable(s) for summarization (e.g., `openai_chat_fn`, or a small dict of helpers)
* `config` — paths, truncation limit, etc.

---

# 5) Tool Schemas (stacked)

**read\_txt\_file**

```json
{
  "type": "object",
  "properties": {
    "file_name": { "type": "string" }
  },
  "required": ["file_name"]
}
```

**summarize\_text**

```json
{
  "type": "object",
  "properties": {
    "text": { "type": "string" },
    "max_points": { "type": "integer", "minimum": 1, "maximum": 12 },
    "style": { "type": "string", "enum": ["bullet", "paragraph"] }
  },
  "required": ["text"]
}
```

**write\_summary\_file**

```json
{
  "type": "object",
  "properties": {
    "source_file": { "type": "string" },
    "content": { "type": "string" }
  },
  "required": ["source_file", "content"]
}
```

**terminate (optional)**

```json
{
  "type": "object",
  "properties": {
    "message": { "type": "string" }
  },
  "required": []
}
```

---

# 6) Environment Contracts

* **Folders**: input folder for sources; output folder for summaries.
* **Path safety**: only allow files inside those folders; sanitize filenames.
* **Large files**: cap reads at a safe character limit (e.g., 12,000) and set `truncated: true`.
* **Result envelope**:

  * Success → `{"tool_executed": true, "result": ...}`
  * Failure → `{"tool_executed": false, "error": "...", "hint": "...", "retryable": boolean}`

---

# 7) Memory Policy

* Each item is `{role: "user"|"assistant"|"tool", content: <text or small dict>}`.
* Keep only the **last 8** items in the prompt.
* Always log: the agent’s decision and the tool result (success or error).

---

# 8) Goals

```
Goal(priority=1, name="file_summary",
     description="1) list files  2) read each  3) summarize to ≤5 bullets  4) save the summary")
```

---

# 9) Message Plan (what we tell the model)

* **System:** “You are a precise, factual summarizer. Choose exactly one tool per step. After saving all summaries, end the session.”
* **User:** “Pick the best next tool to make progress. Return a function call, not prose.”
* **Memory:** last 8 items (dicts coerced to short strings if needed).
* **Tools:** names, descriptions, and parameter schemas.

---

# 10) Function Calling Driver (decision interface)

* Convert the messages into **one** `{ "tool": "...", "args": {...} }` per step.
* If the model doesn’t pick a tool, **default** to `{"tool":"list_txt_files","args":{}}`.
* Keep temperature low for stable choices.

---

# 11) Capabilities Layer (optional plug-ins)

* **PlanFirstCapability** — if memory is “cold”, nudge “start with `list_txt_files`.”
* **ProgressTrackingCapability** — track which files are done/remaining in memory.
* **RetryBackoffCapability** — suggest retry spacing after repeated failures.

*These hook into the loop but don’t change tool contracts.*

---

# 12) Orchestrator Loop (glue)

1. Build messages (Goals + Memory + Tools + System rules).
2. Ask model to choose the next tool.
3. Validate arguments; on failure → log error + continue.
4. Execute tool via the Environment; write result to memory.
5. If execution failed → Brain will re-plan next step.
6. Stop on a terminal tool or `max_iterations`.

---

# 13) Logging (what we record each step)

* `Prompt →` (compact summary)
* `Decision ←` (tool + args)
* `Result ←` (success/failure envelope)
* Keep a short tail of memory (last \~6) for quick inspection.

---

# 14) Testing & Acceptance

**Unit checks**

* list returns expected files
* read returns `{content, truncated}` and handles missing files with a useful `hint`
* summarize returns a non-empty string for sample text
* write creates a file with sanitized name
* validation: missing required arg → clean error (no execution)

**Integration smoke**

* Mock decision `{"tool":"list_txt_files"}` runs and logs a valid result.

**Function-calling**

* Real model picks a tool on a tiny dataset.
* Default to `list_txt_files` if no tool call returned.

**Acceptance (Go/No-Go)**

* [ ] One `.summary.txt` per source file
* [ ] Each has **≤5** bullet points, factual tone
* [ ] No unhandled errors in memory tail
* [ ] Nonexistent file requests yield helpful hints
* [ ] Large files set `truncated: true` and the run still completes

---

# 15) Storyboard (dry-run)

1. **Brain**: list files → **Body**: `["a.txt","b.txt"]`
2. **Brain**: read `a.txt` → **Body**: `{content, truncated:false}`
3. **Brain**: summarize → **Body**: bullets string
4. **Brain**: write summary for `a.txt` → **Body**: saved path
5. Repeat for `b.txt`
6. **Brain**: terminate (“All summaries written.”)

**Failure branch**
Read fails (“file not found”, hint “list files first”) → **Brain** calls list, recovers.

---

# 16) Known Risks & Early Choices

* Very large files → add a `read_txt_chunk` tool next iteration.
* Mixed encodings → always open with `errors="replace"`.
* LLM drift → strict system message + short memory + low temperature.
* Naming → stick to `verb_object_context` (e.g., `write_summary_file`).

---

# 17) Ready-to-Code Checklist

* [ ] Brain/Body table finalized
* [ ] Tool list + schemas finalized
* [ ] Environment rules agreed (paths, truncation, envelope)
* [ ] Memory policy set (shape + window)
* [ ] Goal text written
* [ ] Message plan written
* [ ] Function-calling defaults set
* [ ] Capabilities selected (optional)
* [ ] Loop remains minimal and reusable
* [ ] Testing & Acceptance checklist ready




## Dress Rehersal Doc

In [3]:
dress_rehearsal = """Master Recipe Dress Reheral
---

1) Idea

Create an assistant that reads plain-text documents in a folder, produces short, factual bullet-point summaries for each file, and saves those summaries in a separate folder. It should proceed step-by-step on its own, choose the right action each step, and handle errors gracefully.

---

2) Brain vs Body Table

| Function Area   | Task                 | Who            | What it does (plain English)                                      |
| --------------- | -------------------- | -------------- | ----------------------------------------------------------------- |
| File management | List files           | Body (Python)  | Get the list of `.txt` files we can work on.                      |
|                 | Read file            | Body (Python)  | Open a file safely and return its text (mark if truncated).       |
|                 | Write summary        | Body (Python)  | Save a summary with a sanitized filename.                         |
| Summarization   | Make a summary       | Brain (LLM)    | Turn raw text into ≤5 factual bullet points.                      |
| Action planning | Choose next step     | Brain (LLM)    | Decide which tool to call next (list → read → summarize → write). |
|                 | Execute tool safely  | Body (Python)  | Validate inputs and run the tool; return a structured result.     |
| Errors          | Handle failures      | Both           | Body reports a clear error; Brain decides the next recovery step. |
| Memory          | Store progress       | Body (Python)  | Keep a small rolling log of decisions and results.                |
|                 | Use memory           | Brain (LLM)    | Read recent steps to avoid repeats and plan next.                 |
| Extensibility   | Add new tools        | Body (Python)  | Implement new functions and register them.                        |
|                 | Use new tools        | Brain (LLM)    | Choose when to call new tools.                                    |
| Safety          | Guardrails           | Body (Python)  | Path safety, size limits, argument checks, result envelope.       |
|                 | Re-plan after errors | Brain (LLM)    | Interpret errors and pick a better next step.                     |

---

3) Tool Inventory

1. list_txt_files — Body
   Return all `.txt` filenames in the input folder.

2. read_txt_file(file_name) — Body
   Read a text file. If very large, truncate and mark `truncated: true`.

3. summarize_text(text, max_points=5, style="bullet") — Brain
   Produce a concise, factual summary of the text (default ≤5 bullets).

4. write_summary_file(source_file, content) — Body
   Save the summary to the output folder using a sanitized filename.

5. terminate(message?) — Body (optional, terminal)
   Signal that all work is complete and end the loop.

---

4) ActionContext

A small object passed around that carries shared dependencies so tool functions stay clean.

Fields (for this agent):
- env — the environment (file ops)
- registry — action registry
- memory — rolling store of recent steps
- llm — callable(s) for summarization (e.g., openai_chat_fn, or a small dict of helpers)
- config — paths, truncation limit, etc.

---

5) Tool Schemas (stacked)

read_txt_file
{
  "type": "object",
  "properties": {
    "file_name": { "type": "string" }
  },
  "required": ["file_name"]
}

summarize_text
{
  "type": "object",
  "properties": {
    "text": { "type": "string" },
    "max_points": { "type": "integer", "minimum": 1, "maximum": 12 },
    "style": { "type": "string", "enum": ["bullet", "paragraph"] }
  },
  "required": ["text"]
}

write_summary_file
{
  "type": "object",
  "properties": {
    "source_file": { "type": "string" },
    "content": { "type": "string" }
  },
  "required": ["source_file", "content"]
}

terminate (optional)
{
  "type": "object",
  "properties": {
    "message": { "type": "string" }
  },
  "required": []
}

---

6) Environment Contracts

- Folders: input folder for sources; output folder for summaries.
- Path safety: only allow files inside those folders; sanitize filenames.
- Large files: cap reads at a safe character limit (e.g., 12,000) and set `truncated: true`.
- Result envelope:
  - Success → {"tool_executed": true, "result": ...}
  - Failure → {"tool_executed": false, "error": "...", "hint": "...", "retryable": boolean}

---

7) Memory Policy

- Each item is {role: "user"|"assistant"|"tool", content: <text or small dict>}.
- Keep only the last 8 items in the prompt.
- Always log: the agent’s decision and the tool result (success or error).

---

8) Goals

Goal(priority=1, name="file_summary",
     description="1) list files  2) read each  3) summarize to ≤5 bullets  4) save the summary")

---

9) Message Plan (what we tell the model)

- System: “You are a precise, factual summarizer. Choose exactly one tool per step. After saving all summaries, end the session.”
- User: “Pick the best next tool to make progress. Return a function call, not prose.”
- Memory: last 8 items (dicts coerced to short strings if needed).
- Tools: names, descriptions, and parameter schemas.

---

10) Function Calling Driver (decision interface)

- Convert the messages into one {"tool": "...", "args": {...}} per step.
- If the model doesn’t pick a tool, default to {"tool":"list_txt_files","args":{}}.
- Keep temperature low for stable choices.

---

11) Capabilities Layer (optional plug-ins)

- PlanFirstCapability — if memory is “cold”, nudge “start with list_txt_files.”
- ProgressTrackingCapability — track which files are done/remaining in memory.
- RetryBackoffCapability — suggest retry spacing after repeated failures.

(These hook into the loop but don’t change tool contracts.)

---

12) Orchestrator Loop (glue)

1. Build messages (Goals + Memory + Tools + System rules).
2. Ask model to choose the next tool.
3. Validate arguments; on failure → log error + continue.
4. Execute tool via the Environment; write result to memory.
5. If execution failed → Brain will re-plan next step.
6. Stop on a terminal tool or max_iterations.

---

13) Logging (what we record each step)

- Prompt → (compact summary)
- Decision ← (tool + args)
- Result ← (success/failure envelope)
- Keep a short tail of memory (last ~6) for quick inspection.

---

14) Testing & Acceptance

Unit checks
- list returns expected files
- read returns {content, truncated} and handles missing files with a useful hint
- summarize returns a non-empty string for sample text
- write creates a file with sanitized name
- validation: missing required arg → clean error (no execution)

Integration smoke
- Mock decision {"tool":"list_txt_files"} runs and logs a valid result.

Function-calling
- Real model picks a tool on a tiny dataset.
- Default to list_txt_files if no tool call returned.

Acceptance (Go/No-Go)
- [ ] One .summary.txt per source file
- [ ] Each has ≤5 bullet points, factual tone
- [ ] No unhandled errors in memory tail
- [ ] Nonexistent file requests yield helpful hints
- [ ] Large files set truncated: true and the run still completes

---

15) Storyboard (dry-run)

1. Brain: list files → Body: ["a.txt","b.txt"]
2. Brain: read a.txt → Body: {content, truncated:false}
3. Brain: summarize → Body: bullets string
4. Brain: write summary for a.txt → Body: saved path
5. Repeat for b.txt
6. Brain: terminate (“All summaries written.”)

Failure branch
Read fails (“file not found”, hint “list files first”) → Brain calls list, recovers.

---

16) Known Risks & Early Choices

- Very large files → add a read_txt_chunk tool next iteration.
- Mixed encodings → always open with errors="replace".
- LLM drift → strict system message + short memory + low temperature.
- Naming → stick to verb_object_context (e.g., write_summary_file).

---

17) Ready-to-Code Checklist

- [ ] Brain/Body table finalized
- [ ] Tool list + schemas finalized
- [ ] Environment rules agreed (paths, truncation, envelope)
- [ ] Memory policy set (shape + window)
- [ ] Goal text written
- [ ] Message plan written
- [ ] Function-calling defaults set
- [ ] Capabilities selected (optional)
- [ ] Loop remains minimal and reusable
- [ ] Testing & Acceptance checklist ready
"""

output_path = "/content/Master_Recipe_Dress_Rehearsal.txt"
with open(output_path, "w", encoding="utf-8") as f:
    f.write(dress_rehearsal)

print(f"Saved: {output_path}")


Saved: /content/Master_Recipe_Dress_Rehearsal.txt


# What is Missing

## Can you build a fully functioning Agent with the Code?

Your checklist is strong enough that I could build a working agent from it, **with a few small clarifications** to remove guesswork and ensure the result matches your expectations exactly.

Here’s what I’d tighten before coding:

### Must-clarify (to avoid ambiguity)

1. **Input/Output folders (exact paths & permissions)**

   * Confirm canonical paths (e.g., `/content/files`, `/content/summaries`) and that writing is allowed.

2. **File scope**

   * v1 = only `.txt`? (You mention future `.md/.pdf/.docx`; specify **not** in scope now.)

3. **Truncation rule**

   * Exact cap (e.g., **12,000 chars**), and whether the LLM should be told “truncated: true” in the prompt.

4. **Termination criteria**

   * End when: all listed files have summaries **or** the LLM explicitly calls `terminate` (prefer both).

5. **Function-calling defaults**

   * Temperature = **0.2**; **one** tool per step; fallback to `list_txt_files` if no tool chosen.

6. **Memory window**

   * **8** items, roles limited to `user|assistant|tool`. Confirm we stringify dicts before sending to the model.

7. **Error envelope**

   * Exact shape:

     * Success → `{ "tool_executed": true, "result": ... }`
     * Failure → `{ "tool_executed": false, "error": "…", "hint": "…", "retryable": true|false }`

8. **Acceptance test corpus**

   * Provide 2–3 sample files (e.g., `short.txt`, `long.txt` > 12k chars, and a missing-file scenario) so “Done” is objectively testable.

### Should-clarify (quality/reproducibility)

1. **Summary format**

   * Bullets with `-` prefix; **≤5 bullets**; no intro/outro prose; no markdown headings.

2. **LLM prompt style**

   * Keep the system message **strict** (“concise, factual, no speculation; one tool per step”).

3. **Logging**

   * Print the triad every step: `Prompt →` (compact), `Decision ←` (tool+args), `Result ←` (envelope).
   * Keep last **6** memory items for quick inspection.

4. **Config surface**

   * Put paths, truncation cap, temperature, and max iterations in a small `config` dict held by `ActionContext`.

5. **Rate limits / retries (optional)**

   * If an OpenAI call fails, one retry with exponential backoff? (Simple default: retry once after 1s.)

### Nice-to-have (but not blockers)

* **`terminate` tool** with `terminal=True`.
* **PlanFirstCapability** enabled by default for cold starts.
* **Testing plan**: run unit-ish tests for each tool + one integration pass on the sample corpus.




| #      | Master List Item                   | Why Particulars Are Needed                                            |
| ------ | ---------------------------------- | --------------------------------------------------------------------- |
| **1**  | **Idea & One-Paragraph Spec**      | Needs the exact English goal for *this* agent.                        |
| **2**  | **Brain/Body Table**               | Needs the final list of actions with role assignments for this agent. |
| **3**  | **Architecture Map**               | Needs actual tools, paths, and flow for *this* build.                 |
| **4**  | **Tool Inventory**                 | Needs exact tool names and functions.                                 |
| **5**  | **Tool Schemas**                   | Needs JSON schema for each tool.                                      |
| **6**  | **ActionContext**                  | Needs the injected dependencies and config for this agent.            |
| **7**  | **Environment Contracts**          | Needs final path rules, truncation settings, result envelope format.  |
| **8**  | **Memory Policy**                  | Needs memory size, item format, and coercion rules.                   |
| **9**  | **Goals**                          | Needs exact goal object(s) with priority and description.             |
| **10** | **Agent Language (Prompt Design)** | Needs final system/user prompts and style constraints.                |
| **11** | **Function Calling Driver**        | Needs model name, temperature, defaults, and fallback tool.           |
| **12** | **Capabilities**                   | Needs which optional capabilities are enabled (e.g., PlanFirst).      |
| **13** | **Orchestrator Loop**              | Needs loop limits (max iterations), stop conditions, logging prefs.   |
| **14** | **Observability**                  | Needs logging level and which artifacts to store.                     |
| **15** | **Testing & Acceptance**           | Needs final pass/fail criteria and test plan.                         |
| **16** | **Risks & Early Choices**          | Needs known risks + chosen mitigations for this agent.                |
| **17** | **Config Bundle**                  | Needs final key-value pairs for runtime config.                       |


This “What is Missing” doc is spot-on. It cleanly captures the exact particulars an LLM (and you) need before writing code. I’d keep everything you have and make a few small upgrades so it’s truly turnkey and reusable across agents:

## Small upgrades (to make it bulletproof)

1. **Model + API settings (explicit)**

   * Add: `model = gpt-4o-mini`, `temperature = 0.2`, `top_p = default`, `max_tokens = N (optional)`, `timeout = N sec`.

2. **Fallbacks & cold start**

   * Already noted: default to `list_txt_files`. Also add: “if tool args fail validation twice in a row → call `list_txt_files` again.”

3. **Chunking (future-proof toggle)**

   * Add an on/off flag with a stubbed API (`read_txt_chunk(file, start, n_chars)`), even if off in v1.

4. **File naming policy (deterministic)**

   * Specify: summary filename pattern `f"{stem}.summary.txt"` (ASCII-safe, snake/slug policy).
   * Collisions: overwrite vs version (v1: overwrite).

5. **Security & privacy**

   * State: no network/file writes outside whitelisted dirs; redact secrets from logs; never echo API keys in memory.

6. **Determinism knobs**

   * Temperature low (0.2), seed where supported (if not, document that runs may vary slightly).

7. **Observability**

   * Log level `INFO` by default, `DEBUG` optional toggle; cap log size (e.g., last 2000 chars per payload).

8. **Versioning**

   * Add `AGENT_VERSION = "1.0.0"`; record it in the first memory item and in each output file header (comment line).

9. **Operational limits**

   * `max_files_per_run` (v1: all), `max_iterations` (v1: 20), `per_file_retry_limit` (v1: 1).

10. **Acceptance corpus**

    * Name the exact sample files to include in the repo/notebook (`short.txt`, `very_long.txt`, `missing_case.txt` docstring).

## Turn it into a fill-in sheet (ready for each new agent)

If you like this structure, here’s a compact “Particulars Sheet v1.0” you can paste into a text file and fill quickly:





In [7]:
particulars = """

PARTICULARS SHEET v1.0

# Paths & Scope
input_dir: /content/files
output_dir: /content/summaries
in_scope_extensions: [.txt]
out_of_scope_extensions: [.md, .pdf, .docx]   # ignored in v1
write_permissions_confirmed: true

# File Handling
truncate_chars: 12000
set_truncated_flag: true
chunking_enabled: false   # if true, provide read_txt_chunk API

# Model & Driver
model: gpt-4o-mini
temperature: 0.2
top_p: default
max_tokens: default
one_tool_per_step: true
fallback_tool: list_txt_files
timeout_seconds: 60

# Memory
memory_window: 8
coerce_dicts_to_strings: true
allowed_roles: [user, assistant, tool]

# Termination & Limits
stop_condition: all_files_processed_or_terminate
max_iterations: 20
max_files_per_run: all
per_file_retry_limit: 1

# Error Envelope (MUST USE)
success_shape: {"tool_executed": true, "result": ...}
failure_shape: {"tool_executed": false, "error": "...", "hint": "...", "retryable": true|false}
always_include_hint_on_failure: true

# Summary Style
bullet_prefix: "-"
max_bullets: 5
disallow_headings_and_fluff: true
tone: concise_factual_no_speculation

# Capabilities
plan_first_enabled: true
progress_tracking_enabled: false
retry_backoff_enabled: false

# Logging & Privacy
log_triad_each_step: true       # Prompt → / Decision ← / Result ←
memory_tail_visible: 6
log_level: INFO                 # DEBUG optional
redact_secrets_in_logs: true

# Determinism & Versioning
seed_supported: false
agent_version: "1.0.0"

# File Naming Policy
summary_filename_pattern: "{stem}.summary.txt"
filename_sanitization: ascii_safe_slug
on_collision: overwrite

# Tests & Acceptance
unit_tests: [list, read, summarize, write, validation_errors]
integration_smoke: one_file_end_to_end
function_calling_sanity: choose_a_tool_on_tiny_dataset
acceptance_checklist:
  - one_output_per_input_file
  - leq_5_bullets_factual
  - no_unhandled_errors
  - missing_file_yields_hint
  - truncated_true_for_long_files_and_run_completes

# Risks & Early Choices
large_files_plan: add_read_chunk_in_v2
encoding_policy: errors="replace"
naming_convention: verb_object_context
"""

output_path = "/content/Master_Recipe_Particulars_A.txt"
with open(output_path, "w", encoding="utf-8") as f:
    f.write(particulars)

print(f"Saved: {output_path}")

Saved: /content/Master_Recipe_Particulars_A.txt


In [6]:
particulars = """

# Agent Particulars — Indexed by Recipe Steps

step_01_idea:
  summary: "<one paragraph, plain English>"

step_02_brain_body_table:
  tasks:  # short bullets; this mirrors the table, not the prose
    - { area: "File management", task: "List files", who: "Body" }
    - { area: "Summarization", task: "Make summary", who: "Brain" }

step_03_tool_inventory:
  tools:
    - name: "list_txt_files"
      owner: "Body"
      desc: "Return .txt filenames in input folder"
      preconditions: []
      postconditions: ["returns: [str]"]
    - name: "read_txt_file"
      owner: "Body"
      desc: "Read text; mark truncation if over cap"
      preconditions: ["file exists under input_dir"]
      postconditions: ["returns: {file_name, content, truncated: bool}"]
    - name: "summarize_text"
      owner: "Brain"
      desc: "≤5 factual bullets"
      preconditions: ["text non-empty"]
      postconditions: ["returns: str (bullets)"]
    - name: "write_summary_file"
      owner: "Body"
      desc: "Save sanitized summary"
      preconditions: ["output_dir writable"]
      postconditions: ["returns: output_path"]
    - name: "terminate"
      owner: "Body"
      desc: "Signal completion"
      preconditions: []
      postconditions: ["terminal: true"]

step_03_5_action_context:
  env: true
  registry: true
  memory: true
  llm_helpers:
    openai_chat_fn: "enabled"
  config:
    input_dir: "/content/files"
    output_dir: "/content/summaries"
    truncate_chars: 12000
    model: "gpt-4o-mini"
    temperature: 0.2
    max_iterations: 20
    memory_window: 8
    fallback_tool: "list_txt_files"

step_04_tool_schemas:
  read_txt_file:
    type: object
    properties: { file_name: { type: string } }
    required: [file_name]
  summarize_text:
    type: object
    properties:
      text: { type: string }
      max_points: { type: integer, minimum: 1, maximum: 12 }
      style: { type: string, enum: [bullet, paragraph] }
    required: [text]
  write_summary_file:
    type: object
    properties:
      source_file: { type: string }
      content: { type: string }
    required: [source_file, content]
  terminate:
    type: object
    properties: { message: { type: string } }
    required: []

step_05_environment_contracts:
  path_whitelist: ["${config.input_dir}", "${config.output_dir}"]
  filename_policy: "ascii_safe_slug"
  summary_filename_pattern: "{stem}.summary.txt"
  on_collision: "overwrite"
  truncation_cap_chars: "${config.truncate_chars}"
  result_envelope:
    success: { tool_executed: true, result: "<any>" }
    failure: { tool_executed: false, error: "<string>", hint: "<string>", retryable: "<bool>" }

step_06_memory_policy:
  item_shape: "{role: user|assistant|tool, content: str|dict}"
  window_items: "${config.memory_window}"
  coerce_dicts_to_strings: true
  always_log: ["decision", "tool_result"]

step_07_goals:
  list:
    - { priority: 1, name: "file_summary",
        description: "1) list files  2) read each  3) summarize ≤5 bullets  4) save the summary" }

step_08_message_plan:
  system: "You are a precise, factual summarizer. Choose exactly one tool per step. After saving all summaries, end the session."
  user: "Pick the best next tool to make progress. Return a function call, not prose."
  include_tools: true
  include_memory_tail_items: "${config.memory_window}"

step_09_function_calling_driver:
  model: "${config.model}"
  temperature: "${config.temperature}"
  one_tool_per_step: true
  fallback_decision: { tool: "${config.fallback_tool}", args: {} }
  timeout_seconds: 60

step_10_capabilities:
  plan_first_enabled: true
  progress_tracking_enabled: false
  retry_backoff_enabled: false

step_11_orchestrator_loop:
  stop_conditions: ["terminal_tool", "max_iterations"]
  max_iterations: "${config.max_iterations}"

step_12_logging:
  log_triad_each_step: true
  memory_tail_visible: 6
  log_level: "INFO"
  redact_secrets: true

step_13_testing_and_acceptance:
  unit_tests: ["list", "read", "summarize", "write", "validation_errors"]
  integration_smoke: "one_file_end_to_end"
  function_calling_sanity: "choose_tool_on_tiny_dataset"
  acceptance_checklist:
    - "one_output_per_input_file"
    - "leq_5_bullets_factual"
    - "no_unhandled_errors"
    - "missing_file_yields_hint"
    - "truncated_true_for_long_files_and_run_completes"

step_14_storyboard:
  happy_path:
    - "Brain: list files → Body: ['a.txt','b.txt']"
    - "Brain: read a.txt → Body: {content, truncated: false}"
    - "Brain: summarize → Body: bullets string"
    - "Brain: write summary for a.txt → Body: saved path"
    - "Repeat for b.txt"
    - "Brain: terminate ('All summaries written.')"
  error_branch:
    - "read a.txt fails: file not found + hint 'list files'"
    - "Brain: call list_txt_files to recover"

step_15_risks_and_early_choices:
  large_files_plan: "add read_txt_chunk in v2"
  encoding_policy: "errors='replace'"
  naming_convention: "verb_object_context"
  determinism: { temperature_low: true, seed_supported: false }

step_16_ready_to_code_checklist:
  items:
    - "Brain/Body table finalized"
    - "Tool list + schemas finalized"
    - "Environment rules agreed"
    - "Memory policy set"
    - "Goal text written"
    - "Message plan written"
    - "Function-calling defaults set"
    - "Capabilities chosen"
    - "Loop thin & reusable"
    - "Testing & Acceptance checklist ready"
"""

output_path = "/content/Master_Recipe_Particulars_B.txt"
with open(output_path, "w", encoding="utf-8") as f:
    f.write(particulars)

print(f"Saved: {output_path}")


Saved: /content/Master_Recipe_Particulars_B.txt


In [5]:
import textwrap, os

yaml_text = """
# Agent Particulars — Indexed by Recipe Steps

step_01_idea:
  summary: "<one paragraph, plain English>"

step_02_brain_body_table:
  tasks:  # short bullets; this mirrors the table, not the prose
    - { area: "File management", task: "List files", who: "Body" }
    - { area: "Summarization", task: "Make summary", who: "Brain" }

step_03_tool_inventory:
  tools:
    - name: "list_txt_files"
      owner: "Body"
      desc: "Return .txt filenames in input folder"
      preconditions: []
      postconditions: ["returns: [str]"]
    - name: "read_txt_file"
      owner: "Body"
      desc: "Read text; mark truncation if over cap"
      preconditions: ["file exists under input_dir"]
      postconditions: ["returns: {file_name, content, truncated: bool}"]
    - name: "summarize_text"
      owner: "Brain"
      desc: "≤5 factual bullets"
      preconditions: ["text non-empty"]
      postconditions: ["returns: str (bullets)"]
    - name: "write_summary_file"
      owner: "Body"
      desc: "Save sanitized summary"
      preconditions: ["output_dir writable"]
      postconditions: ["returns: output_path"]
    - name: "terminate"
      owner: "Body"
      desc: "Signal completion"
      preconditions: []
      postconditions: ["terminal: true"]

step_03_5_action_context:
  env: true
  registry: true
  memory: true
  llm_helpers:
    openai_chat_fn: "enabled"
  config:
    input_dir: "/content/files"
    output_dir: "/content/summaries"
    truncate_chars: 12000
    model: "gpt-4o-mini"
    temperature: 0.2
    max_iterations: 20
    memory_window: 8
    fallback_tool: "list_txt_files"

step_04_tool_schemas:
  read_txt_file:
    type: object
    properties: { file_name: { type: string } }
    required: [file_name]
  summarize_text:
    type: object
    properties:
      text: { type: string }
      max_points: { type: integer, minimum: 1, maximum: 12 }
      style: { type: string, enum: [bullet, paragraph] }
    required: [text]
  write_summary_file:
    type: object
    properties:
      source_file: { type: string }
      content: { type: string }
    required: [source_file, content]
  terminate:
    type: object
    properties: { message: { type: string } }
    required: []

step_05_environment_contracts:
  path_whitelist: ["${config.input_dir}", "${config.output_dir}"]
  filename_policy: "ascii_safe_slug"
  summary_filename_pattern: "{stem}.summary.txt"
  on_collision: "overwrite"
  truncation_cap_chars: "${config.truncate_chars}"
  result_envelope:
    success: { tool_executed: true, result: "<any>" }
    failure: { tool_executed: false, error: "<string>", hint: "<string>", retryable: "<bool>" }

step_06_memory_policy:
  item_shape: "{role: user|assistant|tool, content: str|dict}"
  window_items: "${config.memory_window}"
  coerce_dicts_to_strings: true
  always_log: ["decision", "tool_result"]

step_07_goals:
  list:
    - { priority: 1, name: "file_summary",
        description: "1) list files  2) read each  3) summarize ≤5 bullets  4) save the summary" }

step_08_message_plan:
  system: "You are a precise, factual summarizer. Choose exactly one tool per step. After saving all summaries, end the session."
  user: "Pick the best next tool to make progress. Return a function call, not prose."
  include_tools: true
  include_memory_tail_items: "${config.memory_window}"

step_09_function_calling_driver:
  model: "${config.model}"
  temperature: "${config.temperature}"
  one_tool_per_step: true
  fallback_decision: { tool: "${config.fallback_tool}", args: {} }
  timeout_seconds: 60

step_10_capabilities:
  plan_first_enabled: true
  progress_tracking_enabled: false
  retry_backoff_enabled: false

step_11_orchestrator_loop:
  stop_conditions: ["terminal_tool", "max_iterations"]
  max_iterations: "${config.max_iterations}"

step_12_logging:
  log_triad_each_step: true
  memory_tail_visible: 6
  log_level: "INFO"
  redact_secrets: true

step_13_testing_and_acceptance:
  unit_tests: ["list", "read", "summarize", "write", "validation_errors"]
  integration_smoke: "one_file_end_to_end"
  function_calling_sanity: "choose_tool_on_tiny_dataset"
  acceptance_checklist:
    - "one_output_per_input_file"
    - "leq_5_bullets_factual"
    - "no_unhandled_errors"
    - "missing_file_yields_hint"
    - "truncated_true_for_long_files_and_run_completes"

step_14_storyboard:
  happy_path:
    - "Brain: list files → Body: ['a.txt','b.txt']"
    - "Brain: read a.txt → Body: {content, truncated: false}"
    - "Brain: summarize → Body: bullets string"
    - "Brain: write summary for a.txt → Body: saved path"
    - "Repeat for b.txt"
    - "Brain: terminate ('All summaries written.')"
  error_branch:
    - "read a.txt fails: file not found + hint 'list files'"
    - "Brain: call list_txt_files to recover"

step_15_risks_and_early_choices:
  large_files_plan: "add read_txt_chunk in v2"
  encoding_policy: "errors='replace'"
  naming_convention: "verb_object_context"
  determinism: { temperature_low: true, seed_supported: false }

step_16_ready_to_code_checklist:
  items:
    - "Brain/Body table finalized"
    - "Tool list + schemas finalized"
    - "Environment rules agreed"
    - "Memory policy set"
    - "Goal text written"
    - "Message plan written"
    - "Function-calling defaults set"
    - "Capabilities chosen"
    - "Loop thin & reusable"
    - "Testing & Acceptance checklist ready"
"""
out_path = "/content/Agent_Particulars_By_Recipe_Step.yaml"
with open(out_path, "w", encoding="utf-8") as f:
    f.write(textwrap.dedent(yaml_text).strip()+"\n")
print(f"Saved: {out_path}")


Saved: /content/Agent_Particulars_By_Recipe_Step.yaml
