diff --git a/src/conductor/cli/app.py b/src/conductor/cli/app.py index 8f0add6..fd80ef1 100644 --- a/src/conductor/cli/app.py +++ b/src/conductor/cli/app.py @@ -306,6 +306,24 @@ def run( ), ), ] = False, + workspace_instructions: Annotated[ + bool, + typer.Option( + "--workspace-instructions", + help=( + "Auto-discover workspace instruction files " + "(AGENTS.md, CLAUDE.md, .github/copilot-instructions.md) " + "and prepend them to all agent prompts." + ), + ), + ] = False, + raw_instructions: Annotated[ + list[str] | None, + typer.Option( + "--instructions", + help="Path to instruction file(s) to prepend to all agent prompts. Can be repeated.", + ), + ] = None, ) -> None: """Run a workflow from a YAML file. @@ -329,6 +347,8 @@ def run( conductor run workflow.yaml --web conductor run workflow.yaml --web --web-port 8080 conductor run workflow.yaml --web-bg + conductor run workflow.yaml --workspace-instructions + conductor run workflow.yaml --instructions AGENTS.md """ import asyncio import json @@ -418,6 +438,8 @@ def run( no_interactive=True, # Always non-interactive in background web_port=web_port, metadata=cli_metadata, + workspace_instructions=workspace_instructions, + cli_instructions=raw_instructions, ) console.print(f"[bold cyan]Dashboard:[/bold cyan] {url}") console.print( @@ -443,6 +465,8 @@ def run( web_port=web_port, web_bg=web_bg, metadata=cli_metadata, + workspace_instructions=workspace_instructions, + cli_instructions=raw_instructions, ) ) diff --git a/src/conductor/cli/bg_runner.py b/src/conductor/cli/bg_runner.py index 462764b..d415ee9 100644 --- a/src/conductor/cli/bg_runner.py +++ b/src/conductor/cli/bg_runner.py @@ -63,6 +63,8 @@ def launch_background( no_interactive: bool = True, web_port: int = 0, metadata: dict[str, str] | None = None, + workspace_instructions: bool = False, + cli_instructions: list[str] | None = None, ) -> str: """Fork a detached child process running the workflow with a web dashboard. @@ -79,6 +81,8 @@ def launch_background( no_interactive: Whether to disable interactive mode (always True for bg). web_port: Desired port (0 = auto-select). metadata: Optional CLI metadata key=value pairs. + workspace_instructions: Whether to auto-discover workspace instruction files. + cli_instructions: Optional list of instruction file paths. Returns: The dashboard URL (e.g. ``http://127.0.0.1:8080``). @@ -123,6 +127,13 @@ def launch_background( if log_file: cmd.extend(["--log-file", str(log_file)]) + if workspace_instructions: + cmd.append("--workspace-instructions") + + if cli_instructions: + for instr_path in cli_instructions: + cmd.extend(["--instructions", instr_path]) + # Launch detached child kwargs: dict[str, Any] = { "stdout": subprocess.DEVNULL, diff --git a/src/conductor/cli/run.py b/src/conductor/cli/run.py index 7e75ab8..2726666 100644 --- a/src/conductor/cli/run.py +++ b/src/conductor/cli/run.py @@ -1026,6 +1026,8 @@ async def run_workflow_async( web_port: int = 0, web_bg: bool = False, metadata: dict[str, str] | None = None, + workspace_instructions: bool = False, + cli_instructions: list[str] | None = None, ) -> dict[str, Any]: """Execute a workflow asynchronously. @@ -1040,6 +1042,8 @@ async def run_workflow_async( web_port: Port for the web dashboard (0 = auto-select). web_bg: If True, auto-shutdown dashboard after workflow + client disconnect. metadata: Optional CLI metadata to merge on top of YAML-declared metadata. + workspace_instructions: If True, auto-discover workspace instruction files. + cli_instructions: Optional list of instruction file paths from CLI. Returns: The workflow output as a dictionary. @@ -1118,6 +1122,22 @@ async def run_workflow_async( verbose_log(f"Provider override: {provider_override}", style="yellow") config.workflow.runtime.provider = provider_override # type: ignore[assignment] + # Build workspace instructions preamble + instructions_preamble: str | None = None + if workspace_instructions or cli_instructions or config.workflow.instructions: + from conductor.config.instructions import build_instructions_preamble + + instructions_preamble = build_instructions_preamble( + auto_discover_dir=Path.cwd() if workspace_instructions else None, + yaml_instructions=config.workflow.instructions or None, + cli_instruction_paths=cli_instructions, + ) + if instructions_preamble: + verbose_log( + f"Workspace instructions loaded ({len(instructions_preamble)} chars)", + style="cyan", + ) + # Convert MCP servers from workflow config to SDK format mcp_servers = await _build_mcp_servers(config) @@ -1159,6 +1179,7 @@ async def run_workflow_async( event_emitter=emitter, keyboard_listener=listener, web_dashboard=dashboard, + instructions_preamble=instructions_preamble, run_context=RunContext( run_id=event_log_subscriber.run_id if event_log_subscriber else "", log_file=str(event_log_subscriber.path) if event_log_subscriber else "", @@ -1583,6 +1604,7 @@ async def resume_workflow_async( workflow_path=resolved_workflow_path, interrupt_event=interrupt_event, keyboard_listener=listener, + instructions_preamble=cp.instructions_preamble, ) engine.set_context(restored_context) engine.set_limits(restored_limits) diff --git a/src/conductor/config/instructions.py b/src/conductor/config/instructions.py new file mode 100644 index 0000000..c0cf432 --- /dev/null +++ b/src/conductor/config/instructions.py @@ -0,0 +1,284 @@ +"""Workspace instruction file discovery and loading. + +This module discovers and loads workspace instruction files (AGENTS.md, +CLAUDE.md, copilot-instructions.md, etc.) that provide context about a +repository's conventions, coding style, and architecture to workflow agents. + +The primary use case is enabling conductor workflows (which may live in +distant skill directories) to automatically pick up the target repository's +instruction files when invoked from within that repo. +""" + +from __future__ import annotations + +import logging +from pathlib import Path + +logger = logging.getLogger(__name__) + +# Convention instruction files to discover, in deterministic order. +# Each entry is a relative path from a directory to check. +CONVENTION_FILES: list[str] = [ + "AGENTS.md", + ".github/copilot-instructions.md", + "CLAUDE.md", +] + +# Warn when total instruction content exceeds this threshold (bytes). +INSTRUCTION_SIZE_WARNING_THRESHOLD = 50 * 1024 # 50 KB + + +def _find_git_root(start_dir: Path) -> Path | None: + """Find the git repository root by walking up from start_dir. + + Looks for a `.git` directory or file (worktrees use a `.git` file + pointing to the main repo's git dir). + + Args: + start_dir: Directory to start searching from. + + Returns: + The git root directory, or None if not in a git repo. + """ + current = start_dir.resolve() + while True: + git_path = current / ".git" + if git_path.exists(): + return current + parent = current.parent + if parent == current: + # Reached filesystem root + return None + current = parent + + +def discover_workspace_instructions(start_dir: Path) -> list[Path]: + """Discover convention instruction files by walking up to the git root. + + Searches from ``start_dir`` up to the git repository root for known + convention files (AGENTS.md, .github/copilot-instructions.md, CLAUDE.md). + Files closer to ``start_dir`` take precedence when the same filename + exists at multiple levels. + + Args: + start_dir: Directory to start discovery from (typically CWD). + + Returns: + List of discovered instruction file paths in deterministic order, + grouped by convention file type. + """ + git_root = _find_git_root(start_dir) + stop_at = git_root if git_root is not None else start_dir.resolve() + + # Track which convention file names we've already found (closest wins). + found_names: set[str] = set() + discovered: dict[str, Path] = {} + + current = start_dir.resolve() + while True: + for convention_file in CONVENTION_FILES: + if convention_file in found_names: + continue + candidate = current / convention_file + if candidate.is_file(): + found_names.add(convention_file) + discovered[convention_file] = candidate + logger.debug("Discovered instruction file: %s", candidate) + + if current == stop_at or current.parent == current: + break + current = current.parent + + # Return in the deterministic order defined by CONVENTION_FILES + return [discovered[name] for name in CONVENTION_FILES if name in discovered] + + +def load_instruction_files(paths: list[Path]) -> str: + """Read and concatenate instruction files into a single text block. + + Each file's content is wrapped with a source header for traceability. + Files that cannot be read are skipped with a warning. + + Args: + paths: List of instruction file paths to load. + + Returns: + Concatenated instruction content, or empty string if no files loaded. + """ + sections: list[str] = [] + + for path in paths: + try: + content = path.read_text(encoding="utf-8").strip() + if content: + sections.append(f"# Instructions from: {path.name}\n\n{content}") + logger.debug("Loaded instruction file: %s (%d bytes)", path, len(content)) + except (OSError, UnicodeDecodeError) as e: + logger.warning("Failed to read instruction file %s: %s", path, e) + + combined = "\n\n---\n\n".join(sections) + + if combined and len(combined.encode("utf-8")) > INSTRUCTION_SIZE_WARNING_THRESHOLD: + size_kb = len(combined.encode("utf-8")) / 1024 + logger.warning( + "Total workspace instructions are %.1fKB (threshold: %dKB). " + "Large instructions consume tokens on every agent call.", + size_kb, + INSTRUCTION_SIZE_WARNING_THRESHOLD // 1024, + ) + + return combined + + +def build_inner_instructions( + *, + auto_discover_dir: Path | None = None, + yaml_instructions: list[str] | None = None, + cli_instruction_paths: list[str] | None = None, +) -> str | None: + """Combine all instruction sources into raw (unwrapped) content. + + This returns the inner text without ```` tags. + Use :func:`build_instructions_preamble` for the fully wrapped version, + or call this directly when merging multiple instruction sources before + wrapping once at the outermost layer (e.g. sub-workflow merging). + + Sources are combined in this order: + 1. Auto-discovered workspace files (if ``auto_discover_dir`` is provided) + 2. Workflow YAML ``instructions`` field entries + 3. CLI ``--instructions`` file paths + + Args: + auto_discover_dir: Directory to start auto-discovery from (typically CWD). + If None, auto-discovery is skipped. + yaml_instructions: Instruction entries from the workflow YAML ``instructions`` + field. Each entry can be inline text or content already loaded via ``!file``. + cli_instruction_paths: File paths provided via ``--instructions`` CLI flag. + + Returns: + Combined raw instruction text, or None if no instructions were found. + """ + parts: list[str] = [] + + # 1. Auto-discovered workspace files + if auto_discover_dir is not None: + discovered = discover_workspace_instructions(auto_discover_dir) + if discovered: + content = load_instruction_files(discovered) + if content: + parts.append(content) + logger.info("Auto-discovered %d workspace instruction file(s)", len(discovered)) + + # 2. Workflow YAML instructions field + if yaml_instructions: + for entry in yaml_instructions: + text = entry.strip() + if text: + parts.append(text) + + # 3. CLI --instructions file paths + if cli_instruction_paths: + cli_paths = [] + missing: list[str] = [] + for path_str in cli_instruction_paths: + p = Path(path_str) + if p.is_file(): + cli_paths.append(p) + else: + missing.append(path_str) + if missing: + raise FileNotFoundError(f"Instruction file(s) not found: {', '.join(missing)}") + if cli_paths: + content = load_instruction_files(cli_paths) + if content: + parts.append(content) + + if not parts: + return None + + return "\n\n---\n\n".join(parts) + + +def _wrap_preamble(inner: str) -> str: + """Wrap raw instruction content in workspace_instructions tags.""" + return ( + "\n" + "The following workspace instructions describe the conventions, patterns, " + "and practices for the repository you are working in. Follow them when " + "writing code, reviewing changes, or designing solutions.\n\n" + f"{inner}\n" + "\n\n" + ) + + +_OPEN_TAG = "\n" +_CLOSE_TAG = "\n\n\n" + +# The preamble header text inserted after the opening tag +_HEADER = ( + "The following workspace instructions describe the conventions, patterns, " + "and practices for the repository you are working in. Follow them when " + "writing code, reviewing changes, or designing solutions.\n\n" +) + + +def _unwrap_preamble(preamble: str) -> str: + """Extract inner content from a wrapped preamble string. + + Strips the ```` tags and header text, + returning only the raw instruction content. + + Args: + preamble: A preamble string produced by :func:`_wrap_preamble`. + + Returns: + The inner instruction content without wrapper tags. + """ + inner = preamble + if inner.startswith(_OPEN_TAG): + inner = inner[len(_OPEN_TAG) :] + if inner.startswith(_HEADER): + inner = inner[len(_HEADER) :] + if inner.endswith(_CLOSE_TAG): + inner = inner[: -len(_CLOSE_TAG)] + elif inner.endswith("\n\n"): + inner = inner[: -len("\n\n")] + return inner.strip() + + +def build_instructions_preamble( + *, + auto_discover_dir: Path | None = None, + yaml_instructions: list[str] | None = None, + cli_instruction_paths: list[str] | None = None, +) -> str | None: + """Combine all instruction sources into a single wrapped preamble string. + + Sources are combined in this order: + 1. Auto-discovered workspace files (if ``auto_discover_dir`` is provided) + 2. Workflow YAML ``instructions`` field entries + 3. CLI ``--instructions`` file paths + + The combined content is wrapped in ```` tags. + Use :func:`build_inner_instructions` to get the unwrapped content + (e.g. for merging with a parent preamble before wrapping once). + + Args: + auto_discover_dir: Directory to start auto-discovery from (typically CWD). + If None, auto-discovery is skipped. + yaml_instructions: Instruction entries from the workflow YAML ``instructions`` + field. Each entry can be inline text or content already loaded via ``!file``. + cli_instruction_paths: File paths provided via ``--instructions`` CLI flag. + + Returns: + Combined preamble string to prepend to agent prompts, or None if no + instructions were found from any source. + """ + inner = build_inner_instructions( + auto_discover_dir=auto_discover_dir, + yaml_instructions=yaml_instructions, + cli_instruction_paths=cli_instruction_paths, + ) + if inner is None: + return None + return _wrap_preamble(inner) diff --git a/src/conductor/config/schema.py b/src/conductor/config/schema.py index a0e8791..83510c7 100644 --- a/src/conductor/config/schema.py +++ b/src/conductor/config/schema.py @@ -795,6 +795,28 @@ class WorkflowDef(BaseModel): consumers can use it for enrichment without parsing the YAML source. """ + instructions: list[str] = Field(default_factory=list) + """Workspace instruction file contents or inline text. + + Each entry can be: + - A ``!file`` tag reference (resolved by the YAML loader) + - Inline text included as-is + + Instructions from all entries are concatenated and prepended to every + agent's prompt as workspace context. Use this for self-contained + workflows where the YAML lives alongside the code. + + For workflows distributed as skills (where the YAML lives far from + the target repo), use the ``--workspace-instructions`` CLI flag + instead for automatic discovery. + + Example:: + + instructions: + - !file ../AGENTS.md + - "Always respond in English." + """ + class WorkflowConfig(BaseModel): """Complete workflow configuration file.""" diff --git a/src/conductor/engine/checkpoint.py b/src/conductor/engine/checkpoint.py index 596861e..a2122ea 100644 --- a/src/conductor/engine/checkpoint.py +++ b/src/conductor/engine/checkpoint.py @@ -91,6 +91,8 @@ class CheckpointData: limits: dict[str, Any] copilot_session_ids: dict[str, str] = field(default_factory=dict) file_path: Path = field(default_factory=lambda: Path()) + instructions_preamble: str | None = None + """Workspace instructions preamble that was active during the original run.""" class CheckpointManager: @@ -137,6 +139,7 @@ def save_checkpoint( inputs: dict[str, Any], copilot_session_ids: dict[str, str] | None = None, system_metadata: dict[str, Any] | None = None, + instructions_preamble: str | None = None, ) -> Path | None: """Serialize workflow state to a checkpoint file. @@ -155,6 +158,7 @@ def save_checkpoint( inputs: Workflow inputs. copilot_session_ids: Optional mapping of agent names to session IDs. system_metadata: Optional system metadata captured at workflow start. + instructions_preamble: Optional workspace instructions preamble to persist. Returns: Path to the saved checkpoint file, or ``None`` if saving failed. @@ -196,6 +200,7 @@ def save_checkpoint( "limits": _make_json_serializable(limits.to_dict()), "copilot_session_ids": copilot_session_ids or {}, "system": system_metadata or {}, + "instructions_preamble": instructions_preamble, } # Serialize to JSON @@ -311,6 +316,7 @@ def load_checkpoint(checkpoint_path: Path) -> CheckpointData: limits=data["limits"], copilot_session_ids=data.get("copilot_session_ids", {}), file_path=checkpoint_path, + instructions_preamble=data.get("instructions_preamble"), ) @staticmethod diff --git a/src/conductor/engine/workflow.py b/src/conductor/engine/workflow.py index cc0c7fc..6967499 100644 --- a/src/conductor/engine/workflow.py +++ b/src/conductor/engine/workflow.py @@ -286,6 +286,7 @@ def __init__( _subworkflow_depth: int = 0, run_context: RunContext | None = None, _dashboard_context_path: list[str] | None = None, + instructions_preamble: str | None = None, ) -> None: """Initialize the WorkflowEngine. @@ -322,6 +323,10 @@ def __init__( ``subworkflow_path`` on outgoing events so the dashboard can route per-context state under concurrency. Callers should not set this directly. + instructions_preamble: Optional workspace instructions text to prepend + to every agent's rendered prompt. Built from auto-discovered + workspace files, YAML ``instructions`` field, and/or CLI + ``--instructions`` flags. Inherited by sub-workflows. Note: If both provider and registry are provided, registry takes precedence. @@ -356,10 +361,17 @@ def __init__( self._registry = registry self._single_provider = provider + # Workspace instructions preamble (inherited by sub-workflows) + self._instructions_preamble = instructions_preamble + # For backward compatibility, create a default executor with single provider # This is used when registry is None if provider is not None: - self.executor = AgentExecutor(provider, workflow_tools=config.tools) + self.executor = AgentExecutor( + provider, + workflow_tools=config.tools, + instructions_preamble=self._instructions_preamble, + ) self.provider = provider # Keep for backward compatibility else: # Create a placeholder - will be created per-agent when using registry @@ -540,7 +552,11 @@ async def _get_executor_for_agent(self, agent: AgentDef) -> AgentExecutor: if self._registry is not None: # Multi-provider mode: get provider from registry provider = await self._registry.get_provider(agent) - return AgentExecutor(provider, workflow_tools=self.config.tools) + return AgentExecutor( + provider, + workflow_tools=self.config.tools, + instructions_preamble=self._instructions_preamble, + ) elif self.executor is not None: # Single provider mode (backward compatibility) return self.executor @@ -687,6 +703,28 @@ async def _execute_subworkflow( # Build sub-workflow inputs from the parent context sub_inputs = self._build_subworkflow_inputs(agent, context) + # Merge instructions preamble: parent preamble + sub-workflow's own instructions. + # Uses inner (unwrapped) content to avoid nested tags, + # then wraps once at the outermost layer. + child_preamble = self._instructions_preamble + if sub_config.workflow.instructions: + from conductor.config.instructions import ( + _unwrap_preamble, + _wrap_preamble, + build_inner_instructions, + ) + + sub_inner = build_inner_instructions( + yaml_instructions=sub_config.workflow.instructions, + ) + if sub_inner: + if child_preamble: + # Parent preamble is already wrapped — unwrap, merge, re-wrap + parent_inner = _unwrap_preamble(child_preamble) + child_preamble = _wrap_preamble(parent_inner + "\n\n---\n\n" + sub_inner) + else: + child_preamble = _wrap_preamble(sub_inner) + # Create child engine inheriting provider/registry but with deeper depth child_engine = WorkflowEngine( config=sub_config, @@ -703,6 +741,7 @@ async def _execute_subworkflow( *self._dashboard_context_path, slot_key or agent.name, ], + instructions_preamble=child_preamble, ) return await child_engine.run(sub_inputs) @@ -794,6 +833,27 @@ async def _execute_subworkflow_with_inputs( *dashboard_path, slot_key or agent.name, ] + + # Merge instructions preamble: parent preamble + sub-workflow's own instructions + child_preamble = self._instructions_preamble + if sub_config.workflow.instructions: + from conductor.config.instructions import ( + _unwrap_preamble, + _wrap_preamble, + build_inner_instructions, + ) + + sub_inner = build_inner_instructions( + yaml_instructions=sub_config.workflow.instructions, + ) + if sub_inner: + if child_preamble: + parent_inner = _unwrap_preamble(child_preamble) + child_preamble = _wrap_preamble(parent_inner + "\n\n---\n\n" + sub_inner) + else: + child_preamble = _wrap_preamble(sub_inner) + child_engine_kwargs["instructions_preamble"] = child_preamble + child_engine = WorkflowEngine(**child_engine_kwargs) output = await child_engine.run(sub_inputs) @@ -988,6 +1048,7 @@ def _save_checkpoint_on_failure(self, error: BaseException) -> None: inputs=self.context.workflow_inputs, copilot_session_ids=copilot_session_ids, system_metadata=self._system_metadata, + instructions_preamble=self._instructions_preamble, ) self._last_checkpoint_path = checkpoint_path if checkpoint_path is not None: diff --git a/src/conductor/executor/agent.py b/src/conductor/executor/agent.py index e68c22d..48a7e03 100644 --- a/src/conductor/executor/agent.py +++ b/src/conductor/executor/agent.py @@ -97,15 +97,19 @@ def __init__( self, provider: AgentProvider, workflow_tools: list[str] | None = None, + instructions_preamble: str | None = None, ) -> None: """Initialize the AgentExecutor. Args: provider: The agent provider to use for execution. workflow_tools: Tools defined at workflow level. Defaults to empty list. + instructions_preamble: Optional workspace instructions text to prepend + to every agent's rendered prompt. """ self.provider = provider self.workflow_tools = workflow_tools or [] + self.instructions_preamble = instructions_preamble self.renderer = TemplateRenderer() async def execute( @@ -153,6 +157,10 @@ async def execute( # Render prompt with context rendered_prompt = self.renderer.render(agent.prompt, context) + # Prepend workspace instructions preamble if available + if self.instructions_preamble: + rendered_prompt = self.instructions_preamble + rendered_prompt + # Append user guidance section if provided if guidance_section: rendered_prompt = rendered_prompt + guidance_section @@ -222,7 +230,7 @@ async def execute( return output def render_prompt(self, agent: AgentDef, context: dict[str, Any]) -> str: - """Render an agent's prompt template. + """Render an agent's prompt template including workspace instructions. This is useful for debugging or dry-run mode. @@ -231,9 +239,12 @@ def render_prompt(self, agent: AgentDef, context: dict[str, Any]) -> str: context: Context for prompt rendering. Returns: - Rendered prompt string. + Rendered prompt string with workspace instructions prepended if configured. Raises: TemplateError: If prompt rendering fails. """ - return self.renderer.render(agent.prompt, context) + rendered = self.renderer.render(agent.prompt, context) + if self.instructions_preamble: + rendered = self.instructions_preamble + rendered + return rendered diff --git a/tests/test_config/test_instructions.py b/tests/test_config/test_instructions.py new file mode 100644 index 0000000..6a45324 --- /dev/null +++ b/tests/test_config/test_instructions.py @@ -0,0 +1,656 @@ +"""Tests for workspace instruction file discovery and loading.""" + +from __future__ import annotations + +import logging +from pathlib import Path + +import pytest + +from conductor.config.instructions import ( + INSTRUCTION_SIZE_WARNING_THRESHOLD, + _find_git_root, + build_instructions_preamble, + discover_workspace_instructions, + load_instruction_files, +) + +# --------------------------------------------------------------------------- +# _find_git_root +# --------------------------------------------------------------------------- + + +class TestFindGitRoot: + """Tests for _find_git_root().""" + + def test_finds_git_directory(self, tmp_path: Path) -> None: + (tmp_path / ".git").mkdir() + sub = tmp_path / "a" / "b" + sub.mkdir(parents=True) + assert _find_git_root(sub) == tmp_path + + def test_finds_git_file_worktree(self, tmp_path: Path) -> None: + """Git worktrees use a .git file instead of a directory.""" + (tmp_path / ".git").write_text("gitdir: /somewhere/else/.git/worktrees/foo") + sub = tmp_path / "src" + sub.mkdir() + assert _find_git_root(sub) == tmp_path + + def test_returns_none_outside_git(self, tmp_path: Path) -> None: + sub = tmp_path / "norepo" + sub.mkdir() + assert _find_git_root(sub) is None + + def test_git_root_is_start_dir(self, tmp_path: Path) -> None: + (tmp_path / ".git").mkdir() + assert _find_git_root(tmp_path) == tmp_path + + +# --------------------------------------------------------------------------- +# discover_workspace_instructions +# --------------------------------------------------------------------------- + + +class TestDiscoverWorkspaceInstructions: + """Tests for discover_workspace_instructions().""" + + def test_discovers_agents_md(self, tmp_path: Path) -> None: + (tmp_path / ".git").mkdir() + (tmp_path / "AGENTS.md").write_text("# Agent instructions") + result = discover_workspace_instructions(tmp_path) + assert len(result) == 1 + assert result[0].name == "AGENTS.md" + + def test_discovers_all_convention_files(self, tmp_path: Path) -> None: + (tmp_path / ".git").mkdir() + (tmp_path / "AGENTS.md").write_text("agents") + (tmp_path / ".github").mkdir() + (tmp_path / ".github" / "copilot-instructions.md").write_text("copilot") + (tmp_path / "CLAUDE.md").write_text("claude") + + result = discover_workspace_instructions(tmp_path) + assert len(result) == 3 + # Deterministic order matches CONVENTION_FILES + assert [p.name for p in result] == [ + "AGENTS.md", + "copilot-instructions.md", + "CLAUDE.md", + ] + + def test_discovers_files_in_parent_directory(self, tmp_path: Path) -> None: + (tmp_path / ".git").mkdir() + (tmp_path / "AGENTS.md").write_text("root agents") + sub = tmp_path / "src" / "pkg" + sub.mkdir(parents=True) + + result = discover_workspace_instructions(sub) + assert len(result) == 1 + assert result[0] == tmp_path / "AGENTS.md" + + def test_closest_file_wins(self, tmp_path: Path) -> None: + """If AGENTS.md exists at multiple levels, closest to start_dir wins.""" + (tmp_path / ".git").mkdir() + (tmp_path / "AGENTS.md").write_text("root agents") + sub = tmp_path / "subdir" + sub.mkdir() + (sub / "AGENTS.md").write_text("local agents") + + result = discover_workspace_instructions(sub) + assert len(result) == 1 + assert result[0] == sub / "AGENTS.md" + + def test_stops_at_git_root(self, tmp_path: Path) -> None: + """Discovery should not walk above the git root.""" + (tmp_path / "AGENTS.md").write_text("above git root") + repo = tmp_path / "myrepo" + repo.mkdir() + (repo / ".git").mkdir() + + result = discover_workspace_instructions(repo) + assert len(result) == 0 + + def test_no_git_repo_only_checks_start_dir(self, tmp_path: Path) -> None: + """Without .git, discovery stops at start_dir.""" + (tmp_path / "AGENTS.md").write_text("parent") + sub = tmp_path / "child" + sub.mkdir() + + result = discover_workspace_instructions(sub) + assert len(result) == 0 + + def test_no_files_found(self, tmp_path: Path) -> None: + (tmp_path / ".git").mkdir() + result = discover_workspace_instructions(tmp_path) + assert result == [] + + def test_mixed_levels(self, tmp_path: Path) -> None: + """AGENTS.md in sub, CLAUDE.md in root — both found.""" + (tmp_path / ".git").mkdir() + (tmp_path / "CLAUDE.md").write_text("claude root") + sub = tmp_path / "src" + sub.mkdir() + (sub / "AGENTS.md").write_text("agents local") + + result = discover_workspace_instructions(sub) + assert len(result) == 2 + names = [p.name for p in result] + assert "AGENTS.md" in names + assert "CLAUDE.md" in names + + +# --------------------------------------------------------------------------- +# load_instruction_files +# --------------------------------------------------------------------------- + + +class TestLoadInstructionFiles: + """Tests for load_instruction_files().""" + + def test_loads_single_file(self, tmp_path: Path) -> None: + f = tmp_path / "AGENTS.md" + f.write_text("# Instructions\nDo stuff.") + result = load_instruction_files([f]) + assert "# Instructions from: AGENTS.md" in result + assert "Do stuff." in result + + def test_loads_multiple_files(self, tmp_path: Path) -> None: + a = tmp_path / "AGENTS.md" + a.write_text("agents content") + c = tmp_path / "CLAUDE.md" + c.write_text("claude content") + + result = load_instruction_files([a, c]) + assert "agents content" in result + assert "claude content" in result + assert "---" in result # separator + + def test_skips_empty_files(self, tmp_path: Path) -> None: + a = tmp_path / "AGENTS.md" + a.write_text(" \n ") # whitespace only + result = load_instruction_files([a]) + assert result == "" + + def test_skips_unreadable_files(self, tmp_path: Path, caplog: pytest.LogCaptureFixture) -> None: + missing = tmp_path / "nonexistent.md" + with caplog.at_level(logging.WARNING): + result = load_instruction_files([missing]) + assert result == "" + assert "Failed to read" in caplog.text + + def test_warns_on_large_content(self, tmp_path: Path, caplog: pytest.LogCaptureFixture) -> None: + f = tmp_path / "large.md" + f.write_text("x" * (INSTRUCTION_SIZE_WARNING_THRESHOLD + 1)) + with caplog.at_level(logging.WARNING): + load_instruction_files([f]) + assert "workspace instructions" in caplog.text.lower() + + def test_strips_whitespace(self, tmp_path: Path) -> None: + f = tmp_path / "AGENTS.md" + f.write_text(" content with spaces \n\n") + result = load_instruction_files([f]) + assert "content with spaces" in result + + +# --------------------------------------------------------------------------- +# build_instructions_preamble +# --------------------------------------------------------------------------- + + +class TestBuildInstructionsPreamble: + """Tests for build_instructions_preamble().""" + + def test_returns_none_with_no_sources(self) -> None: + result = build_instructions_preamble() + assert result is None + + def test_auto_discovery(self, tmp_path: Path) -> None: + (tmp_path / ".git").mkdir() + (tmp_path / "AGENTS.md").write_text("discovered instructions") + + result = build_instructions_preamble(auto_discover_dir=tmp_path) + assert result is not None + assert "discovered instructions" in result + assert "" in result + assert "" in result + + def test_yaml_instructions(self) -> None: + result = build_instructions_preamble( + yaml_instructions=["Always use Python 3.12", "Follow PEP 8"] + ) + assert result is not None + assert "Python 3.12" in result + assert "PEP 8" in result + + def test_yaml_instructions_skip_empty(self) -> None: + result = build_instructions_preamble(yaml_instructions=["content", " ", ""]) + assert result is not None + assert "content" in result + + def test_cli_instructions(self, tmp_path: Path) -> None: + f = tmp_path / "custom.md" + f.write_text("custom instructions") + + result = build_instructions_preamble(cli_instruction_paths=[str(f)]) + assert result is not None + assert "custom instructions" in result + + def test_cli_missing_file_raises(self, tmp_path: Path) -> None: + with pytest.raises(FileNotFoundError, match="not found"): + build_instructions_preamble(cli_instruction_paths=[str(tmp_path / "missing.md")]) + + def test_combines_all_sources(self, tmp_path: Path) -> None: + (tmp_path / ".git").mkdir() + (tmp_path / "AGENTS.md").write_text("auto-discovered") + custom = tmp_path / "custom.md" + custom.write_text("cli-provided") + + result = build_instructions_preamble( + auto_discover_dir=tmp_path, + yaml_instructions=["yaml-inline"], + cli_instruction_paths=[str(custom)], + ) + assert result is not None + assert "auto-discovered" in result + assert "yaml-inline" in result + assert "cli-provided" in result + + def test_precedence_order(self, tmp_path: Path) -> None: + """Auto-discovered → YAML → CLI ordering is preserved.""" + (tmp_path / ".git").mkdir() + (tmp_path / "AGENTS.md").write_text("FIRST") + custom = tmp_path / "custom.md" + custom.write_text("THIRD") + + result = build_instructions_preamble( + auto_discover_dir=tmp_path, + yaml_instructions=["SECOND"], + cli_instruction_paths=[str(custom)], + ) + assert result is not None + first_idx = result.index("FIRST") + second_idx = result.index("SECOND") + third_idx = result.index("THIRD") + assert first_idx < second_idx < third_idx + + def test_wraps_with_tags(self, tmp_path: Path) -> None: + result = build_instructions_preamble( + yaml_instructions=["test content"], + ) + assert result is not None + assert result.startswith("") + assert result.rstrip().endswith("") + + def test_no_discovery_when_dir_is_none(self, tmp_path: Path) -> None: + """auto_discover_dir=None should skip discovery entirely.""" + (tmp_path / ".git").mkdir() + (tmp_path / "AGENTS.md").write_text("should not appear") + + result = build_instructions_preamble(auto_discover_dir=None) + assert result is None + + +# --------------------------------------------------------------------------- +# Wrap / unwrap roundtrip +# --------------------------------------------------------------------------- + + +class TestWrapUnwrapPreamble: + """Tests for _wrap_preamble and _unwrap_preamble roundtrip.""" + + def test_roundtrip(self) -> None: + from conductor.config.instructions import _unwrap_preamble, _wrap_preamble + + inner = "Follow PEP 8.\n\n---\n\nUse pytest for tests." + wrapped = _wrap_preamble(inner) + assert "" in wrapped + assert "" in wrapped + unwrapped = _unwrap_preamble(wrapped) + assert unwrapped == inner + + def test_unwrap_passthrough(self) -> None: + """Unwrapping a string without tags returns it stripped.""" + from conductor.config.instructions import _unwrap_preamble + + result = _unwrap_preamble("plain text") + assert result == "plain text" + + +# --------------------------------------------------------------------------- +# AgentExecutor integration +# --------------------------------------------------------------------------- + + +class TestAgentExecutorInstructionsPreamble: + """Tests for instructions preamble integration in AgentExecutor.""" + + def test_preamble_prepended_to_prompt(self) -> None: + """Instructions preamble is prepended to the rendered prompt.""" + from unittest.mock import AsyncMock, MagicMock + + from conductor.config.schema import AgentDef + from conductor.executor.agent import AgentExecutor + from conductor.providers.base import AgentOutput + + provider = MagicMock() + provider.execute = AsyncMock( + return_value=AgentOutput(content={"result": "ok"}, raw_response='{"result":"ok"}') + ) + + preamble = "\nFollow PEP 8\n\n\n" + executor = AgentExecutor( + provider, + instructions_preamble=preamble, + ) + + agent = AgentDef(name="test", prompt="Do the thing.") + + import asyncio + + asyncio.run(executor.execute(agent, {})) + + # Verify the prompt passed to provider includes the preamble + call_args = provider.execute.call_args + rendered = call_args.kwargs.get("rendered_prompt") or call_args[1].get("rendered_prompt") + assert rendered.startswith("") + assert "Follow PEP 8" in rendered + assert "Do the thing." in rendered + + def test_no_preamble_when_none(self) -> None: + """Without preamble, prompt is rendered normally.""" + from unittest.mock import AsyncMock, MagicMock + + from conductor.config.schema import AgentDef + from conductor.executor.agent import AgentExecutor + from conductor.providers.base import AgentOutput + + provider = MagicMock() + provider.execute = AsyncMock( + return_value=AgentOutput(content={"result": "ok"}, raw_response='{"result":"ok"}') + ) + + executor = AgentExecutor(provider, instructions_preamble=None) + agent = AgentDef(name="test", prompt="Do the thing.") + + import asyncio + + asyncio.run(executor.execute(agent, {})) + + call_args = provider.execute.call_args + rendered = call_args.kwargs.get("rendered_prompt") or call_args[1].get("rendered_prompt") + assert rendered == "Do the thing." + + def test_render_prompt_includes_preamble(self) -> None: + """render_prompt() should include the preamble for dry-run consistency.""" + from unittest.mock import MagicMock + + from conductor.config.schema import AgentDef + from conductor.executor.agent import AgentExecutor + + provider = MagicMock() + executor = AgentExecutor( + provider, + instructions_preamble="PREAMBLE\n\n", + ) + agent = AgentDef(name="test", prompt="Hello {{ name }}") + result = executor.render_prompt(agent, {"name": "World"}) + assert result == "PREAMBLE\n\nHello World" + + def test_render_prompt_without_preamble(self) -> None: + from unittest.mock import MagicMock + + from conductor.config.schema import AgentDef + from conductor.executor.agent import AgentExecutor + + provider = MagicMock() + executor = AgentExecutor(provider) + agent = AgentDef(name="test", prompt="Hello {{ name }}") + result = executor.render_prompt(agent, {"name": "World"}) + assert result == "Hello World" + + +# --------------------------------------------------------------------------- +# Sub-workflow instruction merging +# --------------------------------------------------------------------------- + + +class TestSubWorkflowInstructionMerging: + """Tests for sub-workflow instruction preamble merging in WorkflowEngine.""" + + @pytest.mark.asyncio + async def test_subworkflow_inherits_parent_preamble(self, tmp_path: Path) -> None: + """Sub-workflow should inherit the parent's instructions preamble.""" + import textwrap + + from conductor.config.schema import ( + AgentDef, + ContextConfig, + LimitsConfig, + RouteDef, + RuntimeConfig, + WorkflowConfig, + WorkflowDef, + ) + from conductor.engine.workflow import WorkflowEngine + from conductor.providers.copilot import CopilotProvider + + # Create sub-workflow YAML (no instructions of its own) + sub_yaml = tmp_path / "sub.yaml" + sub_yaml.write_text( + textwrap.dedent("""\ + workflow: + name: sub + entry_point: inner + runtime: + provider: copilot + limits: + max_iterations: 5 + agents: + - name: inner + prompt: "Do inner work" + routes: + - to: "$end" + output: + result: "{{ inner.output.result }}" + """), + encoding="utf-8", + ) + + parent_path = tmp_path / "parent.yaml" + parent_path.write_text("dummy", encoding="utf-8") + + config = WorkflowConfig( + workflow=WorkflowDef( + name="parent", + entry_point="step", + runtime=RuntimeConfig(provider="copilot"), + context=ContextConfig(mode="accumulate"), + limits=LimitsConfig(max_iterations=10), + ), + agents=[ + AgentDef( + name="step", + type="workflow", + workflow="sub.yaml", + routes=[RouteDef(to="$end")], + ), + ], + output={"result": "{{ step.output.result }}"}, + ) + + prompts_seen: list[str] = [] + + def mock_handler(agent, prompt, context): + prompts_seen.append(prompt) + return {"result": "done"} + + provider = CopilotProvider(mock_handler=mock_handler) + engine = WorkflowEngine( + config, + provider, + workflow_path=parent_path, + instructions_preamble="PARENT_PREAMBLE\n\n", + ) + await engine.run({}) + + # The inner agent's prompt should include the parent preamble + assert len(prompts_seen) == 1 + assert "PARENT_PREAMBLE" in prompts_seen[0] + + @pytest.mark.asyncio + async def test_subworkflow_merges_own_instructions(self, tmp_path: Path) -> None: + """Sub-workflow with its own instructions field should merge with parent preamble. + + The merged result should contain a single block, + not nested tags. + """ + import textwrap + + from conductor.config.instructions import _wrap_preamble + from conductor.config.schema import ( + AgentDef, + ContextConfig, + LimitsConfig, + RouteDef, + RuntimeConfig, + WorkflowConfig, + WorkflowDef, + ) + from conductor.engine.workflow import WorkflowEngine + from conductor.providers.copilot import CopilotProvider + + # Create sub-workflow YAML with its own instructions + sub_yaml = tmp_path / "sub.yaml" + sub_yaml.write_text( + textwrap.dedent("""\ + workflow: + name: sub + entry_point: inner + runtime: + provider: copilot + limits: + max_iterations: 5 + instructions: + - "SUB_INSTRUCTION" + agents: + - name: inner + prompt: "Do inner work" + routes: + - to: "$end" + output: + result: "{{ inner.output.result }}" + """), + encoding="utf-8", + ) + + parent_path = tmp_path / "parent.yaml" + parent_path.write_text("dummy", encoding="utf-8") + + config = WorkflowConfig( + workflow=WorkflowDef( + name="parent", + entry_point="step", + runtime=RuntimeConfig(provider="copilot"), + context=ContextConfig(mode="accumulate"), + limits=LimitsConfig(max_iterations=10), + ), + agents=[ + AgentDef( + name="step", + type="workflow", + workflow="sub.yaml", + routes=[RouteDef(to="$end")], + ), + ], + output={"result": "{{ step.output.result }}"}, + ) + + prompts_seen: list[str] = [] + + def mock_handler(agent, prompt, context): + prompts_seen.append(prompt) + return {"result": "done"} + + # Pass a properly wrapped preamble (as build_instructions_preamble would produce) + parent_preamble = _wrap_preamble("PARENT_CONTENT") + + provider = CopilotProvider(mock_handler=mock_handler) + engine = WorkflowEngine( + config, + provider, + workflow_path=parent_path, + instructions_preamble=parent_preamble, + ) + await engine.run({}) + + # Inner agent should see both parent and sub instruction content + assert len(prompts_seen) == 1 + prompt = prompts_seen[0] + assert "PARENT_CONTENT" in prompt + assert "SUB_INSTRUCTION" in prompt + + # Critically: only ONE set of tags (not nested) + assert prompt.count("") == 1 + assert prompt.count("") == 1 + + +# --------------------------------------------------------------------------- +# bg_runner flag forwarding +# --------------------------------------------------------------------------- + + +class TestBgRunnerInstructionFlags: + """Tests for --workspace-instructions and --instructions forwarding in bg_runner.""" + + def test_workspace_instructions_flag_forwarded(self) -> None: + """--workspace-instructions should appear in the subprocess command.""" + import contextlib + from unittest.mock import patch + + from conductor.cli.bg_runner import launch_background + + with ( + patch("conductor.cli.bg_runner.subprocess.Popen") as mock_popen, + patch("conductor.cli.bg_runner._wait_for_server", return_value=True), + ): + mock_popen.return_value.pid = 12345 + + with contextlib.suppress(Exception): + launch_background( + workflow_path=Path("test.yaml"), + inputs={}, + workspace_instructions=True, + web_port=9999, + ) + + if mock_popen.called: + cmd = mock_popen.call_args[0][0] + assert "--workspace-instructions" in cmd + + def test_cli_instructions_forwarded(self) -> None: + """--instructions paths should appear in the subprocess command.""" + import contextlib + from unittest.mock import patch + + from conductor.cli.bg_runner import launch_background + + with ( + patch("conductor.cli.bg_runner.subprocess.Popen") as mock_popen, + patch("conductor.cli.bg_runner._wait_for_server", return_value=True), + ): + mock_popen.return_value.pid = 12345 + + with contextlib.suppress(Exception): + launch_background( + workflow_path=Path("test.yaml"), + inputs={}, + cli_instructions=["AGENTS.md", "CLAUDE.md"], + web_port=9999, + ) + + if mock_popen.called: + cmd = mock_popen.call_args[0][0] + # Should have --instructions AGENTS.md --instructions CLAUDE.md + instr_indices = [i for i, x in enumerate(cmd) if x == "--instructions"] + assert len(instr_indices) == 2 + assert cmd[instr_indices[0] + 1] == "AGENTS.md" + assert cmd[instr_indices[1] + 1] == "CLAUDE.md"