diff --git a/apps/openant-cli/cmd/analyze.go b/apps/openant-cli/cmd/analyze.go index 986213b..e9daf80 100644 --- a/apps/openant-cli/cmd/analyze.go +++ b/apps/openant-cli/cmd/analyze.go @@ -66,6 +66,9 @@ func runAnalyze(cmd *cobra.Command, args []string) { if analyzeAnalyzerOutput == "" { analyzeAnalyzerOutput = ctx.scanFile("analyzer_output.json") } + if analyzeAppContext == "" { + analyzeAppContext = ctx.scanFile("application_context.json") + } if analyzeRepoPath == "" { analyzeRepoPath = ctx.RepoPath } diff --git a/apps/openant-cli/cmd/generatecontext.go b/apps/openant-cli/cmd/generatecontext.go new file mode 100644 index 0000000..9b2e402 --- /dev/null +++ b/apps/openant-cli/cmd/generatecontext.go @@ -0,0 +1,112 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/knostic/open-ant-cli/internal/output" + "github.com/knostic/open-ant-cli/internal/python" + "github.com/spf13/cobra" +) + +var generateContextCmd = &cobra.Command{ + Use: "generate-context [repository-path]", + Short: "Generate application security context for a repository", + Long: `Analyzes a repository and produces an application_context.json file +that describes the application type, trust boundaries, intended +behaviors, and patterns that should not be flagged as vulnerabilities. + +This context is automatically used by the analyze and verify commands +to reduce false positives. + +If no repository path is given, the active project is used (see: openant init). + +The command checks for a manual override file (OPENANT.md or OPENANT.json) +in the repository root before falling back to LLM-based generation. +Use --force to skip the manual override check.`, + Args: cobra.MaximumNArgs(1), + Run: runGenerateContext, +} + +var ( + gcOutput string + gcForce bool + gcShowPrompt bool +) + +func init() { + generateContextCmd.Flags().StringVarP(&gcOutput, "output", "o", "", "Output path (default: /application_context.json or /application_context.json)") + generateContextCmd.Flags().BoolVar(&gcForce, "force", false, "Force regeneration, ignoring OPENANT.md override files") + generateContextCmd.Flags().BoolVar(&gcShowPrompt, "show-prompt", false, "Include formatted prompt text in output") +} + +func runGenerateContext(cmd *cobra.Command, args []string) { + repoPath, ctx, err := resolveRepoArg(args) + if err != nil { + output.PrintError(err.Error()) + os.Exit(2) + } + + // Apply project defaults + if ctx != nil { + if gcOutput == "" { + gcOutput = ctx.scanFile("application_context.json") + } + } + + rt, err := ensurePython() + if err != nil { + output.PrintError(err.Error()) + os.Exit(2) + } + + // Build Python CLI args + pyArgs := []string{"generate-context", repoPath} + if gcOutput != "" { + pyArgs = append(pyArgs, "--output", gcOutput) + } + if gcForce { + pyArgs = append(pyArgs, "--force") + } + if gcShowPrompt { + pyArgs = append(pyArgs, "--show-prompt") + } + + result, err := python.Invoke(rt.Path, pyArgs, "", quiet, requireAPIKey()) + if err != nil { + output.PrintError(err.Error()) + os.Exit(2) + } + + if jsonOutput { + output.PrintJSON(result.Envelope) + } else if result.Envelope.Status == "success" { + if data, ok := result.Envelope.Data.(map[string]any); ok { + printGenerateContextSummary(data) + } + } else { + output.PrintErrors(result.Envelope.Errors) + } + + os.Exit(result.ExitCode) +} + +func printGenerateContextSummary(data map[string]any) { + output.PrintHeader("Application Context Generated") + if v, ok := data["application_type"].(string); ok { + output.PrintKeyValue("Type", v) + } + if v, ok := data["purpose"].(string); ok { + output.PrintKeyValue("Purpose", v) + } + if v, ok := data["confidence"].(float64); ok { + output.PrintKeyValue("Confidence", fmt.Sprintf("%.0f%%", v*100)) + } + if v, ok := data["source"].(string); ok { + output.PrintKeyValue("Source", v) + } + if v, ok := data["app_context_path"].(string); ok { + output.PrintKeyValue("Output", v) + } + fmt.Println() +} diff --git a/apps/openant-cli/cmd/root.go b/apps/openant-cli/cmd/root.go index 334dc9a..e584308 100644 --- a/apps/openant-cli/cmd/root.go +++ b/apps/openant-cli/cmd/root.go @@ -31,16 +31,17 @@ Stage 1: Detect potential vulnerabilities via code analysis Stage 2: Simulate an attacker to eliminate false positives Commands: - scan Full pipeline: parse → enhance → detect → verify → report - diff Scan only code changed vs a base ref or GitHub PR - parse Extract code units from a repository - enhance Add security context to a parsed dataset - analyze Run Stage 1 vulnerability detection - verify Run Stage 2 attacker simulation - build-output Assemble pipeline_output.json from verified results - dynamic-test Docker-isolated exploit testing - report Generate reports from analysis results - config Manage CLI configuration (API key, etc.)`, + scan Full pipeline: parse → enhance → detect → verify → report + diff Scan only code changed vs a base ref or GitHub PR + parse Extract code units from a repository + generate-context Generate application security context + enhance Add security context to a parsed dataset + analyze Run Stage 1 vulnerability detection + verify Run Stage 2 attacker simulation + build-output Assemble pipeline_output.json from verified results + dynamic-test Docker-isolated exploit testing + report Generate reports from analysis results + config Manage CLI configuration (API key, etc.)`, } // Execute adds all child commands to the root command and sets flags appropriately. @@ -82,6 +83,7 @@ func init() { rootCmd.AddCommand(scanCmd) rootCmd.AddCommand(diffCmd) rootCmd.AddCommand(parseCmd) + rootCmd.AddCommand(generateContextCmd) rootCmd.AddCommand(enhanceCmd) rootCmd.AddCommand(analyzeCmd) rootCmd.AddCommand(verifyCmd) diff --git a/apps/openant-cli/cmd/verify.go b/apps/openant-cli/cmd/verify.go index cad9b8a..b486db5 100644 --- a/apps/openant-cli/cmd/verify.go +++ b/apps/openant-cli/cmd/verify.go @@ -61,6 +61,9 @@ func runVerify(cmd *cobra.Command, args []string) { if verifyAnalyzerOutput == "" { verifyAnalyzerOutput = ctx.scanFile("analyzer_output.json") } + if verifyAppContext == "" { + verifyAppContext = ctx.scanFile("application_context.json") + } if verifyRepoPath == "" { verifyRepoPath = ctx.RepoPath } diff --git a/libs/openant-core/CURRENT_IMPLEMENTATION.md b/libs/openant-core/CURRENT_IMPLEMENTATION.md index f2524c3..07f246a 100644 --- a/libs/openant-core/CURRENT_IMPLEMENTATION.md +++ b/libs/openant-core/CURRENT_IMPLEMENTATION.md @@ -227,13 +227,17 @@ Unsupported types (desktop apps, mobile apps, games, embedded systems) are rejec **Usage:** ```bash -# List supported types -python -m context.generate_context --list-types +# Generate context via CLI (recommended) +openant generate-context /path/to/repo -# Generate context for a repository +# Generate context via Python module python -m context.generate_context /path/to/repo -# Context is saved to application_context.json in the dataset directory +# List supported types +python -m context.generate_context --list-types + +# Context is saved to application_context.json in the scan/dataset directory +# analyze and verify auto-discover it when using a project ``` **Generated Context Structure:** diff --git a/libs/openant-core/DOCUMENTATION.md b/libs/openant-core/DOCUMENTATION.md index 5f1f434..beb1761 100644 --- a/libs/openant-core/DOCUMENTATION.md +++ b/libs/openant-core/DOCUMENTATION.md @@ -221,7 +221,8 @@ For AI assistants working on the code, here are the key source files: | File | Purpose | |------|---------| | `context/application_context.py` | Context detection & formatting | -| `context/generate_context.py` | CLI for context generation | +| `context/generate_context.py` | Python module CLI for context generation | +| `openant/cli.py` (`generate-context`) | Primary CLI command (`openant generate-context`) | ### Report Generator diff --git a/libs/openant-core/PIPELINE_MANUAL.md b/libs/openant-core/PIPELINE_MANUAL.md index fe77b78..ef1f590 100644 --- a/libs/openant-core/PIPELINE_MANUAL.md +++ b/libs/openant-core/PIPELINE_MANUAL.md @@ -534,15 +534,26 @@ For typical web applications, entry-point filtering achieves 60-95% reduction. Classifies the repository type to reduce false positives. -**Location:** `context/generate_context.py` +**Location:** `context/application_context.py`, `openant/cli.py` -**Command:** +**Command (via CLI):** +```bash +openant generate-context # Uses active project +openant generate-context /path/to/repo # Explicit repo path +openant generate-context /path/to/repo -o ctx.json # Custom output path +openant generate-context --force # Skip OPENANT.md override +openant generate-context --show-prompt # Include prompt format in output +``` + +**Command (via Python module):** ```bash python -m context.generate_context /path/to/repo python -m context.generate_context /path/to/repo -o application_context.json python -m context.generate_context --list-types # Show supported types ``` +When using a project (`openant init`), the output defaults to the project scan directory and is automatically discovered by `analyze` and `verify` — no need to pass `--app-context`. + **Supported Application Types:** | Type | Description | Attack Model | @@ -885,7 +896,7 @@ python parsers/python/parse_repository.py /path/to/flask-app \ python validate_dataset_schema.py datasets/flask-app/dataset.json # 3. Generate application context -python -m context.generate_context /path/to/flask-app +openant generate-context /path/to/flask-app # 4. Run Stage 1 + Stage 2 on first 20 units python experiment.py --dataset flask-app --verify --limit 20 @@ -907,7 +918,7 @@ python parsers/javascript/test_pipeline.py /path/to/node-app \ python validate_dataset_schema.py datasets/node-app/dataset.json # 3. Generate application context -python -m context.generate_context /path/to/node-app +openant generate-context /path/to/node-app # 4. Run full analysis python experiment.py --dataset node-app --verify @@ -953,7 +964,7 @@ python parsers/python/parse_repository.py /repo --output datasets/name/dataset.j python parsers/javascript/test_pipeline.py /repo --analyzer-path /analyzer.js --output datasets/name --processing-level codeql # Generate app context -python -m context.generate_context /repo +openant generate-context /repo # Run Stage 1 python experiment.py --dataset name diff --git a/libs/openant-core/README.md b/libs/openant-core/README.md index 9d466ed..fdc2d80 100644 --- a/libs/openant-core/README.md +++ b/libs/openant-core/README.md @@ -131,16 +131,18 @@ OpenAnt generates application context to understand what type of application is ### Generate Context ```bash -# Generate context for a repository -python -m context.generate_context /path/to/repo - -# View formatted prompt output -python -m context.generate_context /path/to/repo --show-prompt +# Generate context via CLI (recommended) +openant generate-context /path/to/repo +openant generate-context /path/to/repo --show-prompt # Include prompt format +openant generate-context --force # Skip OPENANT.md override -# List supported types -python -m context.generate_context --list-types +# Generate context via Python module +python -m context.generate_context /path/to/repo +python -m context.generate_context --list-types # Show supported types ``` +When using a project (`openant init`), `analyze` and `verify` auto-discover the generated context — no need to pass `--app-context`. + ### Manual Override Create `OPENANT.md` or `OPENANT.json` in your repository root to provide manual security context. This is useful when: diff --git a/libs/openant-core/openant/cli.py b/libs/openant-core/openant/cli.py index b0ce345..39034e1 100644 --- a/libs/openant-core/openant/cli.py +++ b/libs/openant-core/openant/cli.py @@ -5,6 +5,7 @@ Commands: openant scan /path/to/repo --output /tmp/results openant parse /path/to/repo --output /tmp/results + openant generate-context /path/to/repo -o /tmp/results/application_context.json openant enhance dataset.json --analyzer-output ao.json --repo-path /repo -o enhanced.json openant analyze dataset.json --output /tmp/results openant verify results.json --analyzer-output ao.json --output /tmp/results @@ -29,6 +30,20 @@ def _output_json(data: dict): sys.stdout.write("\n") +def _find_app_context(*candidate_dirs: str) -> str | None: + """Search candidate directories for application_context.json. + + Returns the first existing path, or None. + """ + for d in candidate_dirs: + if not d: + continue + path = os.path.join(d, "application_context.json") + if os.path.isfile(path): + return path + return None + + def _load_step_reports(directory: str) -> list[dict]: """Load all {step}.report.json files from a directory. @@ -152,6 +167,57 @@ def cmd_parse(args): return 2 +def cmd_generate_context(args): + """Generate application security context for a repository.""" + from pathlib import Path + from context.application_context import ( + generate_application_context, + save_context, + format_context_for_prompt, + ) + from core.schemas import success, error + from core.step_report import step_context + + output_path = args.output or os.path.join(args.repo, "application_context.json") + output_dir = os.path.dirname(os.path.abspath(output_path)) + + try: + with step_context("generate-context", output_dir, inputs={ + "repo_path": os.path.abspath(args.repo), + "force": args.force, + }) as ctx: + app_context = generate_application_context( + Path(args.repo), + force_regenerate=args.force, + ) + save_context(app_context, Path(output_path)) + + ctx.summary = { + "application_type": app_context.application_type, + "confidence": app_context.confidence, + "source": app_context.source, + } + ctx.outputs = {"app_context_path": os.path.abspath(output_path)} + + result = { + "app_context_path": os.path.abspath(output_path), + "application_type": app_context.application_type, + "purpose": app_context.purpose, + "confidence": app_context.confidence, + "source": app_context.source, + } + + if args.show_prompt: + result["prompt_format"] = format_context_for_prompt(app_context) + + _output_json(success(result)) + return 0 + + except Exception as e: + _output_json(error(str(e))) + return 2 + + def cmd_enhance(args): """Enhance a dataset with security context.""" from core.enhancer import enhance_dataset @@ -225,6 +291,18 @@ def cmd_analyze(args): exploitable_filter = "all" if args.exploitable_all else ("strict" if args.exploitable_only else None) + # Auto-discover application context if not explicitly provided + app_context_path = args.app_context + if not app_context_path: + app_context_path = _find_app_context( + output_dir, + args.repo_path, + os.path.dirname(os.path.abspath(args.dataset)), + ) + if app_context_path: + print(f"[Analyze] Auto-discovered application context: {app_context_path}", + file=sys.stderr) + try: with step_context("analyze", output_dir, inputs={ "dataset_path": os.path.abspath(args.dataset), @@ -236,7 +314,7 @@ def cmd_analyze(args): dataset_path=args.dataset, output_dir=output_dir, analyzer_output_path=args.analyzer_output, - app_context_path=args.app_context, + app_context_path=app_context_path, repo_path=args.repo_path, limit=args.limit, model=args.model, @@ -277,7 +355,7 @@ def cmd_analyze(args): results_path=result.results_path, output_dir=output_dir, analyzer_output_path=args.analyzer_output, - app_context_path=args.app_context, + app_context_path=app_context_path, repo_path=args.repo_path, workers=args.workers, backoff_seconds=args.backoff, @@ -322,18 +400,30 @@ def cmd_verify(args): output_dir = args.output or tempfile.mkdtemp(prefix="open_ant_verify_") + # Auto-discover application context if not explicitly provided + app_context_path = args.app_context + if not app_context_path: + app_context_path = _find_app_context( + output_dir, + args.repo_path, + os.path.dirname(os.path.abspath(args.results)), + ) + if app_context_path: + print(f"[Verify] Auto-discovered application context: {app_context_path}", + file=sys.stderr) + try: with step_context("verify", output_dir, inputs={ "results_path": os.path.abspath(args.results), "analyzer_output_path": os.path.abspath(args.analyzer_output), - "app_context_path": os.path.abspath(args.app_context) if args.app_context else None, + "app_context_path": os.path.abspath(app_context_path) if app_context_path else None, "repo_path": os.path.abspath(args.repo_path) if args.repo_path else None, }) as ctx: result = run_verification( results_path=args.results, output_dir=output_dir, analyzer_output_path=args.analyzer_output, - app_context_path=args.app_context, + app_context_path=app_context_path, repo_path=args.repo_path, workers=args.workers, checkpoint_path=getattr(args, "checkpoint", None), @@ -1019,6 +1109,22 @@ def main(): parse_p.add_argument("--diff-manifest", help="Path to diff_manifest.json; tags units with diff_selected") parse_p.set_defaults(func=cmd_parse) + # --------------------------------------------------------------- + # generate-context — generate application security context + # --------------------------------------------------------------- + gc_p = subparsers.add_parser( + "generate-context", + help="Generate application security context for a repository", + ) + gc_p.add_argument("repo", help="Path to repository") + gc_p.add_argument("--output", "-o", + help="Output path (default: /application_context.json)") + gc_p.add_argument("--force", action="store_true", + help="Force regeneration, ignoring OPENANT.md override files") + gc_p.add_argument("--show-prompt", action="store_true", + help="Include formatted prompt text in output") + gc_p.set_defaults(func=cmd_generate_context) + # --------------------------------------------------------------- # enhance — add security context to a dataset # --------------------------------------------------------------- diff --git a/libs/openant-core/tests/test_app_context_discovery.py b/libs/openant-core/tests/test_app_context_discovery.py new file mode 100644 index 0000000..74949d6 --- /dev/null +++ b/libs/openant-core/tests/test_app_context_discovery.py @@ -0,0 +1,86 @@ +"""Tests for application_context.json auto-discovery in the Python CLI. + +These tests exercise the `_find_app_context` helper used by `analyze` and +`verify` to locate `application_context.json` automatically when +`--app-context` is not passed. +""" +import json +from pathlib import Path + +from openant.cli import _find_app_context + + +def _write_dummy_context(path: Path) -> None: + path.write_text(json.dumps({ + "application_type": "web_app", + "purpose": "test", + "confidence": "high", + "source": "test", + })) + + +class TestFindAppContext: + def test_returns_none_when_no_dirs(self): + assert _find_app_context() is None + + def test_returns_none_when_dirs_empty(self): + assert _find_app_context("", None) is None + + def test_returns_none_when_no_file_present(self, tmp_path): + d1 = tmp_path / "out" + d1.mkdir() + d2 = tmp_path / "repo" + d2.mkdir() + assert _find_app_context(str(d1), str(d2)) is None + + def test_finds_in_first_dir(self, tmp_path): + out_dir = tmp_path / "out" + out_dir.mkdir() + ctx_path = out_dir / "application_context.json" + _write_dummy_context(ctx_path) + + result = _find_app_context(str(out_dir), str(tmp_path / "repo")) + assert result == str(ctx_path) + + def test_finds_in_second_dir_when_first_missing(self, tmp_path): + out_dir = tmp_path / "out" + out_dir.mkdir() + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + ctx_path = repo_dir / "application_context.json" + _write_dummy_context(ctx_path) + + result = _find_app_context(str(out_dir), str(repo_dir)) + assert result == str(ctx_path) + + def test_first_match_wins(self, tmp_path): + out_dir = tmp_path / "out" + out_dir.mkdir() + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + first = out_dir / "application_context.json" + second = repo_dir / "application_context.json" + _write_dummy_context(first) + _write_dummy_context(second) + + result = _find_app_context(str(out_dir), str(repo_dir)) + assert result == str(first) + + def test_skips_falsy_dirs(self, tmp_path): + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + ctx_path = repo_dir / "application_context.json" + _write_dummy_context(ctx_path) + + # First two are falsy (empty / None) — should be skipped without error + result = _find_app_context("", None, str(repo_dir)) + assert result == str(ctx_path) + + def test_ignores_directory_named_application_context_json(self, tmp_path): + """A *directory* with the magic name should not be treated as a hit.""" + out_dir = tmp_path / "out" + out_dir.mkdir() + # Create a directory (not file) with the target name + (out_dir / "application_context.json").mkdir() + + assert _find_app_context(str(out_dir)) is None diff --git a/libs/openant-core/tests/test_go_cli.py b/libs/openant-core/tests/test_go_cli.py index fc92113..519e6ae 100644 --- a/libs/openant-core/tests/test_go_cli.py +++ b/libs/openant-core/tests/test_go_cli.py @@ -163,6 +163,28 @@ def test_parse_json_output_is_valid(self, sample_python_repo, tmp_path): assert "status" in envelope +class TestGenerateContextHelp: + """Tests for `openant generate-context --help`.""" + + def test_help(self): + result = run_cli("generate-context", "--help") + assert result.returncode == 0 + output = result.stdout + result.stderr + assert "repository" in output.lower() + assert "context" in output.lower() + + +class TestGenerateContext: + """Tests for `openant generate-context` (no API key).""" + + def test_requires_api_key(self, sample_python_repo): + """generate-context should fail without an API key.""" + result = run_cli("generate-context", sample_python_repo) + output = result.stderr + result.stdout + assert result.returncode != 0 + assert "api key" in output.lower() + + class TestApiKeyHandling: def test_scan_requires_api_key(self, sample_python_repo): """Scan should fail without an API key."""