From c3ba2f62fef5ff6020ca77fbf6db8a061c91357f Mon Sep 17 00:00:00 2001 From: dacharyc Date: Fri, 17 Oct 2025 16:57:15 -0400 Subject: [PATCH 1/7] Add audit-cli --- audit-cli/.gitignore | 1 + audit-cli/README.md | 796 ++++++++++++++++++ .../extract/code-examples/code_examples.go | 181 ++++ .../code-examples/code_examples_test.go | 597 +++++++++++++ .../extract/code-examples/language.go | 177 ++++ .../commands/extract/code-examples/parser.go | 253 ++++++ .../commands/extract/code-examples/report.go | 114 +++ .../commands/extract/code-examples/types.go | 94 +++ .../commands/extract/code-examples/writer.go | 97 +++ audit-cli/commands/extract/extract.go | 34 + .../search/find-string/find_string.go | 193 +++++ .../commands/search/find-string/report.go | 51 ++ .../commands/search/find-string/types.go | 45 + audit-cli/commands/search/search.go | 33 + audit-cli/go.mod | 10 + audit-cli/go.sum | 11 + audit-cli/internal/rst/directive_parser.go | 490 +++++++++++ audit-cli/internal/rst/file_utils.go | 72 ++ audit-cli/internal/rst/include_resolver.go | 360 ++++++++ audit-cli/internal/rst/parser.go | 91 ++ audit-cli/main.go | 35 + .../code-block-test.code-block.1.js | 2 + .../code-block-test.code-block.2.py | 3 + .../code-block-test.code-block.3.js | 41 + .../code-block-test.code-block.4.txt | 2 + .../code-block-test.code-block.5.sh | 3 + .../code-block-test.code-block.6.ts | 4 + .../code-block-test.code-block.7.cpp | 6 + .../examples.literalinclude.1.go | 7 + ...o-code-block-test.io-code-block.1.input.js | 1 + ...-code-block-test.io-code-block.1.output.js | 5 + ...o-code-block-test.io-code-block.3.input.py | 6 + ...-code-block-test.io-code-block.3.output.py | 1 + ...o-code-block-test.io-code-block.4.input.sh | 1 + ...code-block-test.io-code-block.4.output.txt | 4 + ...o-code-block-test.io-code-block.5.input.ts | 7 + ...code-block-test.io-code-block.5.output.txt | 1 + ...o-code-block-test.io-code-block.6.input.js | 1 + ...-code-block-test.io-code-block.6.output.js | 4 + ...o-code-block-test.io-code-block.7.input.go | 15 + .../literalinclude-test.literalinclude.1.py | 4 + .../literalinclude-test.literalinclude.2.go | 7 + .../literalinclude-test.literalinclude.3.js | 5 + .../literalinclude-test.literalinclude.4.php | 6 + .../literalinclude-test.literalinclude.5.rb | 10 + .../literalinclude-test.literalinclude.6.ts | 9 + .../literalinclude-test.literalinclude.7.cpp | 8 + .../nested-code-block-test.code-block.1.js | 3 + .../nested-code-block-test.code-block.10.rb | 2 + .../nested-code-block-test.code-block.11.txt | 9 + .../nested-code-block-test.code-block.2.js | 4 + .../nested-code-block-test.code-block.3.js | 2 + .../nested-code-block-test.code-block.4.py | 5 + .../nested-code-block-test.code-block.5.go | 9 + .../nested-code-block-test.code-block.6.ts | 9 + .../nested-code-block-test.code-block.7.ts | 5 + .../nested-code-block-test.code-block.8.sh | 3 + .../nested-code-block-test.code-block.9.rb | 2 + .../input-files/source/code-block-test.rst | 112 +++ .../source/code-examples/example.cpp | 9 + .../source/code-examples/example.go | 8 + .../source/code-examples/example.js | 10 + .../source/code-examples/example.php | 12 + .../source/code-examples/example.py | 16 + .../source/code-examples/example.rb | 11 + .../source/code-examples/example.ts | 10 + .../input-files/source/include-test.rst | 14 + .../input-files/source/includes/examples.rst | 8 + .../input-files/source/includes/intro.rst | 5 + .../input-files/source/io-code-block-test.rst | 146 ++++ .../source/literalinclude-test.rst | 53 ++ .../source/nested-code-block-test.rst | 167 ++++ 72 files changed, 4532 insertions(+) create mode 100644 audit-cli/.gitignore create mode 100644 audit-cli/README.md create mode 100644 audit-cli/commands/extract/code-examples/code_examples.go create mode 100644 audit-cli/commands/extract/code-examples/code_examples_test.go create mode 100644 audit-cli/commands/extract/code-examples/language.go create mode 100644 audit-cli/commands/extract/code-examples/parser.go create mode 100644 audit-cli/commands/extract/code-examples/report.go create mode 100644 audit-cli/commands/extract/code-examples/types.go create mode 100644 audit-cli/commands/extract/code-examples/writer.go create mode 100644 audit-cli/commands/extract/extract.go create mode 100644 audit-cli/commands/search/find-string/find_string.go create mode 100644 audit-cli/commands/search/find-string/report.go create mode 100644 audit-cli/commands/search/find-string/types.go create mode 100644 audit-cli/commands/search/search.go create mode 100644 audit-cli/go.mod create mode 100644 audit-cli/go.sum create mode 100644 audit-cli/internal/rst/directive_parser.go create mode 100644 audit-cli/internal/rst/file_utils.go create mode 100644 audit-cli/internal/rst/include_resolver.go create mode 100644 audit-cli/internal/rst/parser.go create mode 100644 audit-cli/main.go create mode 100644 audit-cli/testdata/expected-output/code-block-test.code-block.1.js create mode 100644 audit-cli/testdata/expected-output/code-block-test.code-block.2.py create mode 100644 audit-cli/testdata/expected-output/code-block-test.code-block.3.js create mode 100644 audit-cli/testdata/expected-output/code-block-test.code-block.4.txt create mode 100644 audit-cli/testdata/expected-output/code-block-test.code-block.5.sh create mode 100644 audit-cli/testdata/expected-output/code-block-test.code-block.6.ts create mode 100644 audit-cli/testdata/expected-output/code-block-test.code-block.7.cpp create mode 100644 audit-cli/testdata/expected-output/examples.literalinclude.1.go create mode 100644 audit-cli/testdata/expected-output/io-code-block-test.io-code-block.1.input.js create mode 100644 audit-cli/testdata/expected-output/io-code-block-test.io-code-block.1.output.js create mode 100644 audit-cli/testdata/expected-output/io-code-block-test.io-code-block.3.input.py create mode 100644 audit-cli/testdata/expected-output/io-code-block-test.io-code-block.3.output.py create mode 100644 audit-cli/testdata/expected-output/io-code-block-test.io-code-block.4.input.sh create mode 100644 audit-cli/testdata/expected-output/io-code-block-test.io-code-block.4.output.txt create mode 100644 audit-cli/testdata/expected-output/io-code-block-test.io-code-block.5.input.ts create mode 100644 audit-cli/testdata/expected-output/io-code-block-test.io-code-block.5.output.txt create mode 100644 audit-cli/testdata/expected-output/io-code-block-test.io-code-block.6.input.js create mode 100644 audit-cli/testdata/expected-output/io-code-block-test.io-code-block.6.output.js create mode 100644 audit-cli/testdata/expected-output/io-code-block-test.io-code-block.7.input.go create mode 100644 audit-cli/testdata/expected-output/literalinclude-test.literalinclude.1.py create mode 100644 audit-cli/testdata/expected-output/literalinclude-test.literalinclude.2.go create mode 100644 audit-cli/testdata/expected-output/literalinclude-test.literalinclude.3.js create mode 100644 audit-cli/testdata/expected-output/literalinclude-test.literalinclude.4.php create mode 100644 audit-cli/testdata/expected-output/literalinclude-test.literalinclude.5.rb create mode 100644 audit-cli/testdata/expected-output/literalinclude-test.literalinclude.6.ts create mode 100644 audit-cli/testdata/expected-output/literalinclude-test.literalinclude.7.cpp create mode 100644 audit-cli/testdata/expected-output/nested-code-block-test.code-block.1.js create mode 100644 audit-cli/testdata/expected-output/nested-code-block-test.code-block.10.rb create mode 100644 audit-cli/testdata/expected-output/nested-code-block-test.code-block.11.txt create mode 100644 audit-cli/testdata/expected-output/nested-code-block-test.code-block.2.js create mode 100644 audit-cli/testdata/expected-output/nested-code-block-test.code-block.3.js create mode 100644 audit-cli/testdata/expected-output/nested-code-block-test.code-block.4.py create mode 100644 audit-cli/testdata/expected-output/nested-code-block-test.code-block.5.go create mode 100644 audit-cli/testdata/expected-output/nested-code-block-test.code-block.6.ts create mode 100644 audit-cli/testdata/expected-output/nested-code-block-test.code-block.7.ts create mode 100644 audit-cli/testdata/expected-output/nested-code-block-test.code-block.8.sh create mode 100644 audit-cli/testdata/expected-output/nested-code-block-test.code-block.9.rb create mode 100644 audit-cli/testdata/input-files/source/code-block-test.rst create mode 100644 audit-cli/testdata/input-files/source/code-examples/example.cpp create mode 100644 audit-cli/testdata/input-files/source/code-examples/example.go create mode 100644 audit-cli/testdata/input-files/source/code-examples/example.js create mode 100644 audit-cli/testdata/input-files/source/code-examples/example.php create mode 100644 audit-cli/testdata/input-files/source/code-examples/example.py create mode 100644 audit-cli/testdata/input-files/source/code-examples/example.rb create mode 100644 audit-cli/testdata/input-files/source/code-examples/example.ts create mode 100644 audit-cli/testdata/input-files/source/include-test.rst create mode 100644 audit-cli/testdata/input-files/source/includes/examples.rst create mode 100644 audit-cli/testdata/input-files/source/includes/intro.rst create mode 100644 audit-cli/testdata/input-files/source/io-code-block-test.rst create mode 100644 audit-cli/testdata/input-files/source/literalinclude-test.rst create mode 100644 audit-cli/testdata/input-files/source/nested-code-block-test.rst diff --git a/audit-cli/.gitignore b/audit-cli/.gitignore new file mode 100644 index 0000000..bf1138a --- /dev/null +++ b/audit-cli/.gitignore @@ -0,0 +1 @@ +audit-cli diff --git a/audit-cli/README.md b/audit-cli/README.md new file mode 100644 index 0000000..eaeb37a --- /dev/null +++ b/audit-cli/README.md @@ -0,0 +1,796 @@ +# audit-cli + +A Go CLI tool for extracting and analyzing code examples from MongoDB documentation written in reStructuredText (RST). + +## Table of Contents + +- [Overview](#overview) +- [Installation](#installation) +- [Usage](#usage) + - [Extract Commands](#extract-commands) + - [Search Commands](#search-commands) +- [Development](#development) + - [Project Structure](#project-structure) + - [Adding New Commands](#adding-new-commands) + - [Testing](#testing) + - [Code Patterns](#code-patterns) +- [Supported RST Directives](#supported-rst-directives) + +## Overview + +This CLI tool helps maintain code quality across MongoDB's documentation by: + +1. **Extracting code examples** from RST files into individual, testable files +2. **Searching extracted code** for specific patterns or substrings +3. **Following include directives** to process entire documentation trees +4. **Handling MongoDB-specific conventions** like steps files, extracts, and template variables + +## Installation + +### Build from Source + +```bash +cd audit-cli +go build +``` + +This creates an `audit-cli` executable in the current directory. + +### Run Without Building + +```bash +cd audit-cli +go run main.go [command] [flags] +``` + +## Usage + +The CLI is organized into parent commands with subcommands: + +``` +audit-cli +├── extract # Extract content from RST files +│ └── code-examples +└── search # Search through extracted content + └── find-string +``` + +### Extract Commands + +#### `extract code-examples` + +Extract code examples from reStructuredText files into individual files. + +**Basic Usage:** + +```bash +# Extract from a single file +./audit-cli extract code-examples path/to/file.rst -o ./output + +# Extract from a directory (non-recursive) +./audit-cli extract code-examples path/to/docs -o ./output + +# Extract recursively from all subdirectories +./audit-cli extract code-examples path/to/docs -o ./output -r + +# Follow include directives +./audit-cli extract code-examples path/to/file.rst -o ./output -f + +# Combine recursive scanning and include following +./audit-cli extract code-examples path/to/docs -o ./output -r -f + +# Dry run (show what would be extracted without writing files) +./audit-cli extract code-examples path/to/file.rst -o ./output --dry-run + +# Verbose output +./audit-cli extract code-examples path/to/file.rst -o ./output -v +``` + +**Flags:** + +- `-o, --output ` - Output directory for extracted files (default: `./output`) +- `-r, --recursive` - Recursively scan directories for RST files +- `-f, --follow-includes` - Follow `.. include::` directives in RST files +- `--dry-run` - Show what would be extracted without writing files +- `-v, --verbose` - Show detailed processing information + +**Output Format:** + +Extracted files are named: `{source-base}.{directive-type}.{index}.{ext}` + +Examples: +- `my-doc.code-block.1.js` - First code-block from my-doc.rst +- `my-doc.literalinclude.2.py` - Second literalinclude from my-doc.rst +- `my-doc.io-code-block.1.input.js` - Input from first io-code-block +- `my-doc.io-code-block.1.output.json` - Output from first io-code-block + +**Report:** + +After extraction, a report is displayed showing: +- Number of files traversed +- Number of output files written +- Code examples by language +- Code examples by directive type + +### Search Commands + +#### `search find-string` + +Search through extracted code example files for a specific substring. + +**Basic Usage:** + +```bash +# Search in a single file +./audit-cli search find-string path/to/file.js "substring" + +# Search in a directory (non-recursive) +./audit-cli search find-string path/to/output "substring" + +# Search recursively +./audit-cli search find-string path/to/output "substring" -r + +# Verbose output (show file paths and language breakdown) +./audit-cli search find-string path/to/output "substring" -r -v +``` + +**Flags:** + +- `-r, --recursive` - Recursively search all files in subdirectories +- `-v, --verbose` - Show file paths and language breakdown + +**Report:** + +The search report shows: +- Number of files scanned +- Number of files containing the substring (each file counted once) + +With `-v` flag, also shows: +- List of file paths where substring appears +- Count broken down by language (file extension) + +## Development + +### Project Structure + +``` +audit-cli/ +├── main.go # CLI entry point +├── commands/ # Command implementations +│ ├── extract/ # Extract parent command +│ │ ├── extract.go # Parent command definition +│ │ └── code-examples/ # Code examples subcommand +│ │ ├── code_examples.go # Command logic +│ │ ├── code_examples_test.go # Tests +│ │ ├── parser.go # RST directive parsing +│ │ ├── writer.go # File writing logic +│ │ ├── report.go # Report generation +│ │ ├── types.go # Type definitions +│ │ └── language.go # Language normalization +│ └── search/ # Search parent command +│ ├── search.go # Parent command definition +│ └── find-string/ # Find string subcommand +│ ├── find_string.go # Command logic +│ ├── types.go # Type definitions +│ └── report.go # Report generation +├── internal/ # Internal packages +│ └── rst/ # RST parsing utilities +│ ├── include.go # Include directive resolution +│ ├── traverse.go # Directory traversal +│ └── directive.go # Directive parsing +└── testdata/ # Test fixtures + ├── input-files/ # Test RST files + │ └── source/ # Source directory (required) + │ ├── *.rst # Test files + │ ├── includes/ # Included RST files + │ └── code-examples/ # Code files for literalinclude + └── expected-output/ # Expected extraction results +``` + +### Adding New Commands + +#### 1. Adding a New Subcommand to an Existing Parent + +Example: Adding `extract tables` subcommand + +1. **Create the subcommand directory:** + ```bash + mkdir -p commands/extract/tables + ``` + +2. **Create the command file** (`commands/extract/tables/tables.go`): + ```go + package tables + + import ( + "github.com/spf13/cobra" + ) + + func NewTablesCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "tables [filepath]", + Short: "Extract tables from RST files", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + // Implementation here + return nil + }, + } + + // Add flags + cmd.Flags().StringP("output", "o", "./output", "Output directory") + + return cmd + } + ``` + +3. **Register the subcommand** in `commands/extract/extract.go`: + ```go + import ( + "github.com/mongodb/code-example-tooling/audit-cli/commands/extract/tables" + ) + + func NewExtractCommand() *cobra.Command { + cmd := &cobra.Command{...} + + cmd.AddCommand(codeexamples.NewCodeExamplesCommand()) + cmd.AddCommand(tables.NewTablesCommand()) // Add this line + + return cmd + } + ``` + +#### 2. Adding a New Parent Command + +Example: Adding `analyze` parent command + +1. **Create the parent directory:** + ```bash + mkdir -p commands/analyze + ``` + +2. **Create the parent command** (`commands/analyze/analyze.go`): + ```go + package analyze + + import ( + "github.com/spf13/cobra" + ) + + func NewAnalyzeCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "analyze", + Short: "Analyze extracted content", + } + + // Add subcommands here + + return cmd + } + ``` + +3. **Register in main.go:** + ```go + import ( + "github.com/mongodb/code-example-tooling/audit-cli/commands/analyze" + ) + + func main() { + rootCmd.AddCommand(extract.NewExtractCommand()) + rootCmd.AddCommand(search.NewSearchCommand()) + rootCmd.AddCommand(analyze.NewAnalyzeCommand()) // Add this line + } + ``` + + + +### Testing + +#### Running Tests + +```bash +# Run all tests +cd audit-cli +go test ./... + +# Run tests for a specific package +go test ./commands/extract/code-examples -v + +# Run a specific test +go test ./commands/extract/code-examples -run TestRecursiveDirectoryScanning -v + +# Run tests with coverage +go test ./... -cover +``` + +#### Test Structure + +Tests use a table-driven approach with test fixtures in the `testdata/` directory: + +- **Input files**: `testdata/input-files/source/` - RST files and referenced code +- **Expected output**: `testdata/expected-output/` - Expected extracted files +- **Test pattern**: Compare actual extraction output against expected files + +**Note**: The `testdata` directory name is special in Go - it's automatically ignored during builds, which is important since it contains non-Go files (`.cpp`, `.rst`, etc.). + +#### Adding New Tests + +1. **Create test input files** in `testdata/input-files/source/`: + ```bash + # Create a new test RST file + cat > testdata/input-files/source/my-test.rst << 'EOF' + .. code-block:: javascript + + console.log("Hello, World!"); + EOF + ``` + +2. **Generate expected output**: + ```bash + ./audit-cli extract code-examples testdata/input-files/source/my-test.rst \ + -o testdata/expected-output + ``` + +3. **Verify the output** is correct before committing + +4. **Add test case** in the appropriate `*_test.go` file: + ```go + func TestMyNewFeature(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputFile := filepath.Join(testDataDir, "input-files", "source", "my-test.rst") + expectedDir := filepath.Join(testDataDir, "expected-output") + + tempDir, err := os.MkdirTemp("", "test-*") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + report, err := RunExtract(inputFile, tempDir, false, false, false, false) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Add assertions here + } + ``` + +#### Test Conventions + +- **Relative paths**: Tests use `filepath.Join("..", "..", "..", "testdata")` to reference test data (three levels up from `commands/extract/code-examples/`) +- **Temporary directories**: Use `os.MkdirTemp()` for test output, clean up with `defer os.RemoveAll()` +- **Exact content matching**: Tests compare byte-for-byte content +- **No trailing newlines**: Expected output files should not have trailing blank lines + +#### Updating Expected Output + +If you've changed the parsing logic and need to regenerate expected output: + +```bash +cd audit-cli + +# Update all expected outputs +./audit-cli extract code-examples testdata/input-files/source/literalinclude-test.rst \ + -o testdata/expected-output + +./audit-cli extract code-examples testdata/input-files/source/code-block-test.rst \ + -o testdata/expected-output + +./audit-cli extract code-examples testdata/input-files/source/nested-code-block-test.rst \ + -o testdata/expected-output + +./audit-cli extract code-examples testdata/input-files/source/io-code-block-test.rst \ + -o testdata/expected-output + +./audit-cli extract code-examples testdata/input-files/source/include-test.rst \ + -o testdata/expected-output -f +``` + +**Important**: Always verify the new output is correct before committing! + +### Code Patterns + +#### 1. Command Structure Pattern + +All commands follow this pattern: + +```go +package mycommand + +import "github.com/spf13/cobra" + +func NewMyCommand() *cobra.Command { + var flagVar string + + cmd := &cobra.Command{ + Use: "my-command [args]", + Short: "Brief description", + Long: "Detailed description", + Args: cobra.ExactArgs(1), // Or MinimumNArgs, etc. + RunE: func(cmd *cobra.Command, args []string) error { + // Get flag values + flagValue, _ := cmd.Flags().GetString("flag-name") + + // Call the main logic function + return RunMyCommand(args[0], flagValue) + }, + } + + // Define flags + cmd.Flags().StringVarP(&flagVar, "flag-name", "f", "default", "Description") + + return cmd +} + +// Separate logic function for testability +func RunMyCommand(arg string, flagValue string) error { + // Implementation here + return nil +} +``` + +**Why this pattern?** +- Separates command definition from logic +- Makes logic testable without Cobra +- Consistent across all commands + +#### 2. Error Handling Pattern + +Use descriptive error wrapping: + +```go +import "fmt" + +// Wrap errors with context +file, err := os.Open(filePath) +if err != nil { + return fmt.Errorf("failed to open file %s: %w", filePath, err) +} + +// Check for specific conditions +if !fileInfo.IsDir() { + return fmt.Errorf("path %s is not a directory", path) +} +``` + +#### 3. File Processing Pattern + +Use the scanner pattern for line-by-line processing: + +```go +import ( + "bufio" + "os" +) + +func processFile(filePath string) error { + file, err := os.Open(filePath) + if err != nil { + return fmt.Errorf("failed to open file: %w", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + lineNum := 0 + + for scanner.Scan() { + lineNum++ + line := scanner.Text() + + // Process line + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("error reading file: %w", err) + } + + return nil +} +``` + +#### 4. Directory Traversal Pattern + +Use `filepath.Walk` for recursive traversal: + +```go +import ( + "os" + "path/filepath" +) + +func traverseDirectory(rootPath string, recursive bool) ([]string, error) { + var files []string + + err := filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Skip subdirectories if not recursive + if !recursive && info.IsDir() && path != rootPath { + return filepath.SkipDir + } + + // Collect files + if !info.IsDir() { + files = append(files, path) + } + + return nil + }) + + return files, err +} +``` + +#### 5. Testing Pattern + +Use table-driven tests where appropriate: + +```go +func TestLanguageNormalization(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + {"TypeScript", "ts", "typescript"}, + {"C++", "c++", "cpp"}, + {"Golang", "golang", "go"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := NormalizeLanguage(tt.input) + if result != tt.expected { + t.Errorf("NormalizeLanguage(%q) = %q, want %q", + tt.input, result, tt.expected) + } + }) + } +} +``` + +#### 6. Verbose Output Pattern + +Use a consistent pattern for verbose logging: + +```go +func processWithVerbose(filePath string, verbose bool) error { + if verbose { + fmt.Printf("Processing: %s\n", filePath) + } + + // Do work + + if verbose { + fmt.Printf("Completed: %s\n", filePath) + } + + return nil +} +``` + + + +## Supported RST Directives + +The tool extracts code examples from the following reStructuredText directives: + +### 1. `literalinclude` + +Extracts code from external files with support for partial extraction and dedenting. + +**Syntax:** +```rst +.. literalinclude:: /path/to/file.py + :language: python + :start-after: start-tag + :end-before: end-tag + :dedent: +``` + +**Supported Options:** +- `:language:` - Specifies the programming language (normalized: `ts` → `typescript`, `c++` → `cpp`, `golang` → `go`) +- `:start-after:` - Extract content after this tag (skips the entire line containing the tag) +- `:end-before:` - Extract content before this tag (cuts before the entire line containing the tag) +- `:dedent:` - Remove common leading whitespace from the extracted content + +**Example:** + +Given `code-examples/example.py`: +```python +def main(): + # start-example + result = calculate(42) + print(result) + # end-example +``` + +And RST: +```rst +.. literalinclude:: /code-examples/example.py + :language: python + :start-after: start-example + :end-before: end-example + :dedent: +``` + +Extracts: +```python +result = calculate(42) +print(result) +``` + +### 2. `code-block` + +Inline code blocks with automatic dedenting based on the first line's indentation. + +**Syntax:** +```rst +.. code-block:: javascript + :copyable: false + :emphasize-lines: 2,3 + + const greeting = "Hello, World!"; + console.log(greeting); +``` + +**Supported Options:** +- Language argument - `.. code-block:: javascript` (optional, defaults to `txt`) +- `:language:` - Alternative way to specify language +- `:copyable:` - Parsed but not used for extraction +- `:emphasize-lines:` - Parsed but not used for extraction + +**Automatic Dedenting:** + +The content is automatically dedented based on the indentation of the first content line. For example: + +```rst +.. note:: + + .. code-block:: python + + def hello(): + print("Hello") +``` + +The code has 6 spaces of indentation (3 from `note`, 3 from `code-block`). The tool automatically removes these 6 spaces, resulting in: + +```python +def hello(): + print("Hello") +``` + +### 3. `io-code-block` + +Input/output code blocks for interactive examples with nested sub-directives. + +**Syntax:** +```rst +.. io-code-block:: + :copyable: true + + .. input:: + :language: javascript + + db.restaurants.aggregate([ + { $match: { category: "cafe" } } + ]) + + .. output:: + :language: json + + [ + { _id: 1, category: 'café', status: 'Open' } + ] +``` + +**Supported Options:** +- `:copyable:` - Parsed but not used for extraction +- Nested `.. input::` sub-directive (required) + - Can have filepath argument: `.. input:: /path/to/file.js` + - Or inline content with `:language:` option +- Nested `.. output::` sub-directive (optional) + - Can have filepath argument: `.. output:: /path/to/output.txt` + - Or inline content with `:language:` option + +**File-based Content:** +```rst +.. io-code-block:: + + .. input:: /code-examples/query.js + :language: javascript + + .. output:: /code-examples/result.json + :language: json +``` + +**Output Files:** + +Generates two files: +- `{source}.io-code-block.{index}.input.{ext}` - The input code +- `{source}.io-code-block.{index}.output.{ext}` - The output (if present) + +Example: `my-doc.io-code-block.1.input.js` and `my-doc.io-code-block.1.output.json` + +### 4. `include` + +Follows include directives to process entire documentation trees (when `-f` flag is used). + +**Syntax:** +```rst +.. include:: /includes/intro.rst +``` + +**Special MongoDB Conventions:** + +The tool handles several MongoDB-specific include patterns: + +#### Steps Files +Converts directory-based paths to filename-based paths: +- Input: `/includes/steps/run-mongodb-on-linux.rst` +- Resolves to: `/includes/steps-run-mongodb-on-linux.yaml` + +#### Extracts and Release Files +Resolves ref-based includes by searching YAML files: +- Input: `/includes/extracts/install-mongodb.rst` +- Searches: `/includes/extracts-*.yaml` for `ref: install-mongodb` +- Resolves to: The YAML file containing that ref + +#### Template Variables +Resolves template variables from YAML replacement sections: +```yaml +replacement: + release_specification_default: "/includes/release/install-windows-default.rst" +``` +- Input: `{{release_specification_default}}` +- Resolves to: `/includes/release/install-windows-default.rst` + +**Source Directory Resolution:** + +The tool walks up the directory tree to find a directory named "source" or containing a "source" subdirectory. This is used as the base for resolving relative include paths. + +## Internal Packages + +### `internal/rst` + +Provides reusable utilities for parsing and processing RST files: + +- **Include resolution** - Handles all include directive patterns +- **Directory traversal** - Recursive file scanning +- **Directive parsing** - Extracts structured data from RST directives +- **Template variable resolution** - Resolves YAML-based template variables +- **Source directory detection** - Finds the documentation root + +See the code in `internal/rst/` for implementation details. + +## Language Normalization + +The tool normalizes language identifiers to standard file extensions: + +| Input | Normalized | Extension | +|-------|-----------|-----------| +| `ts` | `typescript` | `.ts` | +| `c++` | `cpp` | `.cpp` | +| `golang` | `go` | `.go` | +| `javascript` | `javascript` | `.js` | +| `python` | `python` | `.py` | +| `shell` / `sh` | `sh` | `.sh` | +| `json` | `json` | `.json` | +| `yaml` | `yaml` | `.yaml` | +| (none) | `txt` | `.txt` | + +## Contributing + +When contributing to this project: + +1. **Follow the established patterns** - Use the command structure, error handling, and testing patterns described above +2. **Write tests** - All new functionality should have corresponding tests +3. **Update documentation** - Keep this README up to date with new features +4. **Run tests before committing** - Ensure `go test ./...` passes +5. **Use meaningful commit messages** - Describe what changed and why + +## License + +[Add license information here] diff --git a/audit-cli/commands/extract/code-examples/code_examples.go b/audit-cli/commands/extract/code-examples/code_examples.go new file mode 100644 index 0000000..475806f --- /dev/null +++ b/audit-cli/commands/extract/code-examples/code_examples.go @@ -0,0 +1,181 @@ +// Package code_examples provides functionality for extracting code examples from RST files. +// +// This package implements the "extract code-examples" subcommand, which parses +// reStructuredText files and extracts code examples from various directives: +// - literalinclude: External file references with optional partial extraction +// - code-block: Inline code blocks with automatic dedenting +// - io-code-block: Input/output examples with nested directives +// +// The extracted code examples are written to individual files with standardized naming: +// {source-base}.{directive-type}.{index}.{ext} +// +// Supports recursive directory scanning and following include directives to process +// entire documentation trees. +package code_examples + +import ( + "fmt" + "os" + + "github.com/spf13/cobra" +) + +// NewCodeExamplesCommand creates the code-examples subcommand. +// +// This command extracts code examples from RST files and writes them to individual +// files in the output directory. Supports various flags for controlling behavior: +// - -r, --recursive: Recursively scan directories for RST files +// - -f, --follow-includes: Follow .. include:: directives +// - -o, --output: Output directory for extracted files +// - --dry-run: Show what would be extracted without writing files +// - -v, --verbose: Show detailed processing information +func NewCodeExamplesCommand() *cobra.Command { + var ( + recursive bool + followIncludes bool + outputDir string + dryRun bool + verbose bool + ) + + cmd := &cobra.Command{ + Use: "code-examples [filepath]", + Short: "Extract code examples from reStructuredText files", + Long: `Extract code examples from reStructuredText directives (code-block, literalinclude, io-code-block) +and output them as individual files.`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + filePath := args[0] + return runExtract(filePath, recursive, followIncludes, outputDir, dryRun, verbose) + }, + } + + cmd.Flags().BoolVarP(&recursive, "recursive", "r", false, "Recursively scan directories for files to process") + cmd.Flags().BoolVarP(&followIncludes, "follow-includes", "f", false, "Follow .. include:: directives in RST files") + cmd.Flags().StringVarP(&outputDir, "output", "o", "./output", "Output directory for code example files") + cmd.Flags().BoolVar(&dryRun, "dry-run", false, "Show what would be outputted without writing files") + cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Provide additional information during execution") + + return cmd +} + +// RunExtract executes the extraction operation and returns the report. +// +// This function is exported for use in tests. It extracts code examples from the +// specified file or directory and writes them to the output directory. +// +// Parameters: +// - filePath: Path to RST file or directory to process +// - outputDir: Directory where extracted files will be written +// - recursive: If true, recursively scan directories for RST files +// - followIncludes: If true, follow .. include:: directives +// - dryRun: If true, show what would be extracted without writing files +// - verbose: If true, show detailed processing information +// +// Returns: +// - *Report: Statistics about the extraction operation +// - error: Any error encountered during extraction +func RunExtract(filePath string, outputDir string, recursive bool, followIncludes bool, dryRun bool, verbose bool) (*Report, error) { + report, err := runExtractInternal(filePath, recursive, followIncludes, outputDir, dryRun, verbose) + return report, err +} + +// runExtract executes the extraction operation (internal wrapper for CLI). +// +// This is a thin wrapper around runExtractInternal that discards the report +// and only returns errors, suitable for use in the CLI command handler. +func runExtract(filePath string, recursive bool, followIncludes bool, outputDir string, dryRun bool, verbose bool) error { + _, err := runExtractInternal(filePath, recursive, followIncludes, outputDir, dryRun, verbose) + return err +} + +// runExtractInternal executes the extraction operation +func runExtractInternal(filePath string, recursive bool, followIncludes bool, outputDir string, dryRun bool, verbose bool) (*Report, error) { + fileInfo, err := os.Stat(filePath) + if err != nil { + return nil, fmt.Errorf("failed to access path %s: %w", filePath, err) + } + + report := NewReport() + + var filesToProcess []string + + if fileInfo.IsDir() { + if verbose { + fmt.Printf("Scanning directory: %s (recursive: %v)\n", filePath, recursive) + } + filesToProcess, err = TraverseDirectory(filePath, recursive) + if err != nil { + return nil, fmt.Errorf("failed to traverse directory: %w", err) + } + } else { + filesToProcess = []string{filePath} + } + + var filteredFiles []string + for _, file := range filesToProcess { + if ShouldProcessFile(file) { + filteredFiles = append(filteredFiles, file) + } + } + filesToProcess = filteredFiles + + if verbose { + fmt.Printf("Found %d files to process\n", len(filesToProcess)) + } + + if !dryRun { + if err := EnsureOutputDirectory(outputDir); err != nil { + return nil, fmt.Errorf("failed to create output directory: %w", err) + } + } + + // Track visited files to prevent circular includes + visited := make(map[string]bool) + + for _, file := range filesToProcess { + if verbose { + fmt.Printf("Processing: %s\n", file) + } + + // Use ParseFileWithIncludes to follow include directives when followIncludes flag is set + examples, processedFiles, err := ParseFileWithIncludes(file, followIncludes, visited, verbose) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to parse %s: %v\n", file, err) + continue + } + + // Add all processed files (including includes) to the report + for _, processedFile := range processedFiles { + report.AddTraversedFile(processedFile) + } + + for _, example := range examples { + outputPath, err := WriteCodeExample(example, outputDir, dryRun) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to write code example: %v\n", err) + continue + } + + if verbose { + if dryRun { + fmt.Printf(" [DRY RUN] Would write: %s\n", outputPath) + } else { + fmt.Printf(" Wrote: %s\n", outputPath) + } + } + + report.AddCodeExample(example, outputPath) + if !dryRun { + report.OutputFilesWritten++ + } + } + } + + if dryRun { + fmt.Println("\n[DRY RUN MODE - No files were written]") + } + PrintReport(report, verbose) + + return report, nil +} diff --git a/audit-cli/commands/extract/code-examples/code_examples_test.go b/audit-cli/commands/extract/code-examples/code_examples_test.go new file mode 100644 index 0000000..ba8a16e --- /dev/null +++ b/audit-cli/commands/extract/code-examples/code_examples_test.go @@ -0,0 +1,597 @@ +package code_examples + +import ( + "os" + "path/filepath" + "testing" +) + +// TestLiteralIncludeDirective tests the parsing and extraction of literalinclude directives +func TestLiteralIncludeDirective(t *testing.T) { + // Setup paths + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputFile := filepath.Join(testDataDir, "input-files", "source", "literalinclude-test.rst") + expectedOutputDir := filepath.Join(testDataDir, "expected-output") + + // Create temporary output directory + tempDir, err := os.MkdirTemp("", "audit-test-*") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Run the extract command + report, err := RunExtract(inputFile, tempDir, false, false, false, false) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Verify the report + if report.FilesTraversed != 1 { + t.Errorf("Expected 1 file traversed, got %d", report.FilesTraversed) + } + + if report.OutputFilesWritten != 7 { + t.Errorf("Expected 7 output files, got %d", report.OutputFilesWritten) + } + + // Expected output files + expectedFiles := []string{ + "literalinclude-test.literalinclude.1.py", + "literalinclude-test.literalinclude.2.go", + "literalinclude-test.literalinclude.3.js", + "literalinclude-test.literalinclude.4.php", + "literalinclude-test.literalinclude.5.rb", + "literalinclude-test.literalinclude.6.ts", + "literalinclude-test.literalinclude.7.cpp", + } + + // Compare each output file with expected + for _, filename := range expectedFiles { + actualPath := filepath.Join(tempDir, filename) + expectedPath := filepath.Join(expectedOutputDir, filename) + + // Read actual output + actualContent, err := os.ReadFile(actualPath) + if err != nil { + t.Errorf("Failed to read actual output file %s: %v", filename, err) + continue + } + + // Read expected output + expectedContent, err := os.ReadFile(expectedPath) + if err != nil { + t.Errorf("Failed to read expected output file %s: %v", filename, err) + continue + } + + // Compare content + if string(actualContent) != string(expectedContent) { + t.Errorf("Content mismatch for %s\nExpected:\n%s\n\nActual:\n%s", + filename, string(expectedContent), string(actualContent)) + } + } + + // Verify language counts + expectedLanguages := map[string]int{ + "python": 1, + "go": 1, + "javascript": 1, + "php": 1, + "ruby": 1, + "typescript": 1, + "cpp": 1, + } + + for lang, expectedCount := range expectedLanguages { + if actualCount := report.LanguageCounts[lang]; actualCount != expectedCount { + t.Errorf("Expected %d %s examples, got %d", expectedCount, lang, actualCount) + } + } + + // Verify directive counts + if count := report.DirectiveCounts[LiteralInclude]; count != 7 { + t.Errorf("Expected 7 literalinclude directives, got %d", count) + } +} + +// TestIncludeDirectiveFollowing tests that include directives are followed correctly +func TestIncludeDirectiveFollowing(t *testing.T) { + // Setup paths + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputFile := filepath.Join(testDataDir, "input-files", "source", "include-test.rst") + expectedOutputDir := filepath.Join(testDataDir, "expected-output") + + // Create temporary output directory + tempDir, err := os.MkdirTemp("", "audit-test-*") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Run the extract command with include following enabled + report, err := RunExtract(inputFile, tempDir, false, true, false, false) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Verify that multiple files were traversed (main file + includes) + if report.FilesTraversed < 2 { + t.Errorf("Expected at least 2 files traversed (with includes), got %d", report.FilesTraversed) + } + + // Verify output file was created + if report.OutputFilesWritten != 1 { + t.Errorf("Expected 1 output file, got %d", report.OutputFilesWritten) + } + + // Compare output with expected + // The literalinclude is in examples.rst (included file), so output is named after that + actualPath := filepath.Join(tempDir, "examples.literalinclude.1.go") + expectedPath := filepath.Join(expectedOutputDir, "examples.literalinclude.1.go") + + actualContent, err := os.ReadFile(actualPath) + if err != nil { + t.Fatalf("Failed to read actual output: %v", err) + } + + expectedContent, err := os.ReadFile(expectedPath) + if err != nil { + t.Fatalf("Failed to read expected output: %v", err) + } + + if string(actualContent) != string(expectedContent) { + t.Errorf("Content mismatch\nExpected:\n%s\n\nActual:\n%s", + string(expectedContent), string(actualContent)) + } + + // Verify the language was normalized (golang -> go) + if count := report.LanguageCounts["go"]; count != 1 { + t.Errorf("Expected 1 go example (normalized from golang), got %d", count) + } +} + +// TestEmptyFile tests handling of files with no directives +func TestCodeBlockDirective(t *testing.T) { + // Setup paths + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputFile := filepath.Join(testDataDir, "input-files", "source", "code-block-test.rst") + expectedOutputDir := filepath.Join(testDataDir, "expected-output") + + // Create temp directory for output + tempDir, err := os.MkdirTemp("", "audit-test-code-block-*") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tempDir) + + // Run extract on code-block test file + report, err := RunExtract(inputFile, tempDir, false, false, false, false) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Verify report + if report.FilesTraversed != 1 { + t.Errorf("Expected 1 file traversed, got %d", report.FilesTraversed) + } + + if report.OutputFilesWritten != 7 { + t.Errorf("Expected 7 output files, got %d", report.OutputFilesWritten) + } + + // Expected output files + expectedFiles := []string{ + "code-block-test.code-block.1.js", // JavaScript with language + "code-block-test.code-block.2.py", // Python with options + "code-block-test.code-block.3.js", // JSON array example + "code-block-test.code-block.4.txt", // No language (undefined) + "code-block-test.code-block.5.sh", // Shell script + "code-block-test.code-block.6.ts", // TypeScript normalization + "code-block-test.code-block.7.cpp", // C++ normalization + } + + // Compare each output file with expected + for _, filename := range expectedFiles { + actualPath := filepath.Join(tempDir, filename) + expectedPath := filepath.Join(expectedOutputDir, filename) + + actualContent, err := os.ReadFile(actualPath) + if err != nil { + t.Errorf("Failed to read actual file %s: %v", filename, err) + continue + } + + expectedContent, err := os.ReadFile(expectedPath) + if err != nil { + t.Errorf("Failed to read expected file %s: %v", filename, err) + continue + } + + if string(actualContent) != string(expectedContent) { + t.Errorf("Content mismatch for %s\nExpected:\n%s\n\nActual:\n%s", + filename, string(expectedContent), string(actualContent)) + } + } +} + +func TestNestedCodeBlockDirective(t *testing.T) { + // Setup paths + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputFile := filepath.Join(testDataDir, "input-files", "source", "nested-code-block-test.rst") + expectedOutputDir := filepath.Join(testDataDir, "expected-output") + + // Create temp directory for output + tempDir, err := os.MkdirTemp("", "audit-test-nested-code-block-*") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tempDir) + + // Run extract on nested code-block test file + report, err := RunExtract(inputFile, tempDir, false, false, false, false) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Verify we found 11 code blocks + if report.OutputFilesWritten != 11 { + t.Errorf("Expected 11 output files, got %d", report.OutputFilesWritten) + } + + // Verify all are code-block directives + if report.DirectiveCounts[CodeBlock] != 11 { + t.Errorf("Expected 11 code-block directives, got %d", report.DirectiveCounts[CodeBlock]) + } + + // Expected files and their languages + expectedFiles := map[string]string{ + "nested-code-block-test.code-block.1.js": "javascript", + "nested-code-block-test.code-block.2.js": "javascript", + "nested-code-block-test.code-block.3.js": "javascript", + "nested-code-block-test.code-block.4.py": "python", + "nested-code-block-test.code-block.5.go": "go", + "nested-code-block-test.code-block.6.ts": "typescript", + "nested-code-block-test.code-block.7.ts": "typescript", + "nested-code-block-test.code-block.8.sh": "shell", + "nested-code-block-test.code-block.9.rb": "ruby", + "nested-code-block-test.code-block.10.rb": "ruby", + "nested-code-block-test.code-block.11.txt": "undefined", + } + + // Verify each expected file exists and matches + for filename := range expectedFiles { + actualPath := filepath.Join(tempDir, filename) + expectedPath := filepath.Join(expectedOutputDir, filename) + + // Check file exists + if _, err := os.Stat(actualPath); os.IsNotExist(err) { + t.Errorf("Expected output file not created: %s", filename) + continue + } + + // Compare content + actualContent, err := os.ReadFile(actualPath) + if err != nil { + t.Errorf("Failed to read actual file %s: %v", filename, err) + continue + } + + expectedContent, err := os.ReadFile(expectedPath) + if err != nil { + t.Errorf("Failed to read expected file %s: %v", filename, err) + continue + } + + if string(actualContent) != string(expectedContent) { + t.Errorf("Content mismatch for %s\nExpected:\n%s\n\nActual:\n%s", + filename, string(expectedContent), string(actualContent)) + } + } +} + +func TestIoCodeBlockDirective(t *testing.T) { + // Setup paths + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputFile := filepath.Join(testDataDir, "input-files", "source", "io-code-block-test.rst") + expectedOutputDir := filepath.Join(testDataDir, "expected-output") + + // Create temp directory for output + tempDir, err := os.MkdirTemp("", "audit-test-io-code-block-*") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tempDir) + + // Run extract on io-code-block test file + report, err := RunExtract(inputFile, tempDir, false, false, false, false) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Verify we found 11 code examples (7 directives, but Test 2 fails, Test 7 has no output) + // Test 1: input + output = 2 + // Test 2: fails (file not found) = 0 + // Test 3: input + output = 2 + // Test 4: input + output = 2 + // Test 5: input + output = 2 + // Test 6: input + output = 2 + // Test 7: input only = 1 + // Total: 11 + if report.OutputFilesWritten != 11 { + t.Errorf("Expected 11 output files, got %d", report.OutputFilesWritten) + } + + // Verify all are io-code-block directives + if report.DirectiveCounts[IoCodeBlock] != 11 { + t.Errorf("Expected 11 io-code-block examples, got %d", report.DirectiveCounts[IoCodeBlock]) + } + + // Expected files + expectedFiles := []string{ + // Test 1: Inline input/output (JavaScript) + "io-code-block-test.io-code-block.1.input.js", + "io-code-block-test.io-code-block.1.output.js", + // Test 2: File-based (skipped - files don't exist) + // Test 3: Python inline + "io-code-block-test.io-code-block.3.input.py", + "io-code-block-test.io-code-block.3.output.py", + // Test 4: Shell command + "io-code-block-test.io-code-block.4.input.sh", + "io-code-block-test.io-code-block.4.output.txt", + // Test 5: TypeScript + "io-code-block-test.io-code-block.5.input.ts", + "io-code-block-test.io-code-block.5.output.txt", + // Test 6: Nested in procedure + "io-code-block-test.io-code-block.6.input.js", + "io-code-block-test.io-code-block.6.output.js", + // Test 7: Input only (Go) + "io-code-block-test.io-code-block.7.input.go", + } + + // Verify each expected file exists and matches + for _, filename := range expectedFiles { + actualPath := filepath.Join(tempDir, filename) + expectedPath := filepath.Join(expectedOutputDir, filename) + + // Check file exists + if _, err := os.Stat(actualPath); os.IsNotExist(err) { + t.Errorf("Expected output file not created: %s", filename) + continue + } + + // Compare content + actualContent, err := os.ReadFile(actualPath) + if err != nil { + t.Errorf("Failed to read actual file %s: %v", filename, err) + continue + } + + expectedContent, err := os.ReadFile(expectedPath) + if err != nil { + t.Errorf("Failed to read expected file %s: %v", filename, err) + continue + } + + if string(actualContent) != string(expectedContent) { + t.Errorf("Content mismatch for %s\nExpected:\n%s\n\nActual:\n%s", + filename, string(expectedContent), string(actualContent)) + } + } +} + +func TestEmptyFile(t *testing.T) { + // Create a temporary file with no directives + tempDir, err := os.MkdirTemp("", "audit-test-*") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create a source directory structure + sourceDir := filepath.Join(tempDir, "source") + if err := os.MkdirAll(sourceDir, 0755); err != nil { + t.Fatalf("Failed to create source directory: %v", err) + } + + emptyFile := filepath.Join(sourceDir, "empty.rst") + if err := os.WriteFile(emptyFile, []byte("Empty File\n==========\n\nNo directives here."), 0644); err != nil { + t.Fatalf("Failed to create empty file: %v", err) + } + + outputDir := filepath.Join(tempDir, "output") + if err := os.MkdirAll(outputDir, 0755); err != nil { + t.Fatalf("Failed to create output directory: %v", err) + } + + // Run the extract command + report, err := RunExtract(emptyFile, outputDir, false, false, false, false) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Verify no output files were created + if report.OutputFilesWritten != 0 { + t.Errorf("Expected 0 output files for empty file, got %d", report.OutputFilesWritten) + } + + // Verify the file was still traversed + if report.FilesTraversed != 1 { + t.Errorf("Expected 1 file traversed, got %d", report.FilesTraversed) + } +} + +// TestRecursiveDirectoryScanning tests that -r flag scans all files in subdirectories +func TestRecursiveDirectoryScanning(t *testing.T) { + // Setup paths + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputDir := filepath.Join(testDataDir, "input-files", "source") + + // Create temporary output directory + tempDir, err := os.MkdirTemp("", "audit-test-recursive-*") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Run the extract command with recursive=true, followIncludes=false + report, err := RunExtract(inputDir, tempDir, true, false, false, false) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Verify that multiple files were traversed + // Should find all .rst files in source/ and source/includes/ + // Expected: code-block-test.rst, include-test.rst, io-code-block-test.rst, + // literalinclude-test.rst, nested-code-block-test.rst, + // includes/examples.rst, includes/intro.rst + expectedMinFiles := 7 + if report.FilesTraversed < expectedMinFiles { + t.Errorf("Expected at least %d files traversed with recursive scan, got %d", + expectedMinFiles, report.FilesTraversed) + } + + // Verify that code examples were extracted from multiple files + // Without following includes, include-test.rst should have 0 examples + // but all other files should have examples + if report.OutputFilesWritten < 30 { + t.Errorf("Expected at least 30 output files from recursive scan, got %d", + report.OutputFilesWritten) + } + + // Verify we have examples from different directive types + if report.DirectiveCounts[CodeBlock] == 0 { + t.Error("Expected code-block directives to be found") + } + if report.DirectiveCounts[LiteralInclude] == 0 { + t.Error("Expected literalinclude directives to be found") + } + if report.DirectiveCounts[IoCodeBlock] == 0 { + t.Error("Expected io-code-block directives to be found") + } +} + +// TestFollowIncludesWithoutRecursive tests that -f flag follows includes in a single file +func TestFollowIncludesWithoutRecursive(t *testing.T) { + // Setup paths + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputFile := filepath.Join(testDataDir, "input-files", "source", "include-test.rst") + + // Create temporary output directory + tempDir, err := os.MkdirTemp("", "audit-test-follow-*") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Run the extract command with recursive=false, followIncludes=true + report, err := RunExtract(inputFile, tempDir, false, true, false, false) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Verify that multiple files were traversed (main file + includes) + // include-test.rst includes intro.rst and examples.rst + expectedFiles := 3 + if report.FilesTraversed != expectedFiles { + t.Errorf("Expected %d files traversed (main + 2 includes), got %d", + expectedFiles, report.FilesTraversed) + } + + // Verify that the code example from the included file was extracted + // examples.rst has 1 literalinclude directive + if report.OutputFilesWritten != 1 { + t.Errorf("Expected 1 output file from included files, got %d", + report.OutputFilesWritten) + } + + // Verify the directive type + if report.DirectiveCounts[LiteralInclude] != 1 { + t.Errorf("Expected 1 literalinclude directive, got %d", + report.DirectiveCounts[LiteralInclude]) + } +} + +// TestRecursiveWithFollowIncludes tests that -r and -f together work correctly +func TestRecursiveWithFollowIncludes(t *testing.T) { + // Setup paths + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputDir := filepath.Join(testDataDir, "input-files", "source") + + // Create temporary output directory + tempDir, err := os.MkdirTemp("", "audit-test-both-*") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Run the extract command with recursive=true, followIncludes=true + report, err := RunExtract(inputDir, tempDir, true, true, false, false) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Verify that multiple files were traversed + // Should find all .rst files in source/ and source/includes/ + expectedMinFiles := 7 + if report.FilesTraversed < expectedMinFiles { + t.Errorf("Expected at least %d files traversed, got %d", + expectedMinFiles, report.FilesTraversed) + } + + // Verify that code examples were extracted + // This should be the same as recursive-only since the include files + // are already found by recursive directory scanning + if report.OutputFilesWritten < 30 { + t.Errorf("Expected at least 30 output files, got %d", + report.OutputFilesWritten) + } + + // Verify we have examples from all directive types + if report.DirectiveCounts[CodeBlock] == 0 { + t.Error("Expected code-block directives to be found") + } + if report.DirectiveCounts[LiteralInclude] == 0 { + t.Error("Expected literalinclude directives to be found") + } + if report.DirectiveCounts[IoCodeBlock] == 0 { + t.Error("Expected io-code-block directives to be found") + } +} + +// TestNoFlagsOnDirectory tests that without -r flag, directory is not scanned +func TestNoFlagsOnDirectory(t *testing.T) { + // Setup paths + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputDir := filepath.Join(testDataDir, "input-files", "source") + + // Create temporary output directory + tempDir, err := os.MkdirTemp("", "audit-test-noflags-*") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Run the extract command with recursive=false, followIncludes=false on a directory + report, err := RunExtract(inputDir, tempDir, false, false, false, false) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Without recursive flag, should only process files in the top-level directory + // Should NOT include files in includes/ subdirectory + // Expected: code-block-test.rst, include-test.rst, io-code-block-test.rst, + // literalinclude-test.rst, nested-code-block-test.rst (5 files) + expectedFiles := 5 + if report.FilesTraversed != expectedFiles { + t.Errorf("Expected %d files traversed (top-level only), got %d", + expectedFiles, report.FilesTraversed) + } + + // Without followIncludes, include-test.rst should have 0 examples + // So we should have examples from the other 4 files + if report.OutputFilesWritten < 30 { + t.Errorf("Expected at least 30 output files, got %d", + report.OutputFilesWritten) + } +} diff --git a/audit-cli/commands/extract/code-examples/language.go b/audit-cli/commands/extract/code-examples/language.go new file mode 100644 index 0000000..2a5df93 --- /dev/null +++ b/audit-cli/commands/extract/code-examples/language.go @@ -0,0 +1,177 @@ +package code_examples + +import "strings" + +// Language constants define canonical language names used throughout the tool. +// These are used for normalization and file extension mapping. +const ( + Bash = "bash" + C = "c" + CPP = "cpp" + CSharp = "csharp" + Console = "console" + Go = "go" + Java = "java" + JavaScript = "javascript" + Kotlin = "kotlin" + PHP = "php" + PowerShell = "powershell" + PS5 = "ps5" + Python = "python" + Ruby = "ruby" + Rust = "rust" + Scala = "scala" + Shell = "shell" + Swift = "swift" + Text = "text" + TypeScript = "typescript" + Undefined = "undefined" +) + +// File extension constants define the file extensions for each language. +// Used when generating output filenames for extracted code examples. +const ( + BashExtension = ".sh" + CExtension = ".c" + CPPExtension = ".cpp" + CSharpExtension = ".cs" + ConsoleExtension = ".sh" + GoExtension = ".go" + JavaExtension = ".java" + JavaScriptExtension = ".js" + KotlinExtension = ".kt" + PHPExtension = ".php" + PowerShellExtension = ".ps1" + PS5Extension = ".ps1" + PythonExtension = ".py" + RubyExtension = ".rb" + RustExtension = ".rs" + ScalaExtension = ".scala" + ShellExtension = ".sh" + SwiftExtension = ".swift" + TextExtension = ".txt" + TypeScriptExtension = ".ts" + UndefinedExtension = ".txt" +) + +// GetFileExtensionFromLanguage returns the appropriate file extension for a given language. +// +// This function maps language identifiers to their corresponding file extensions. +// Handles various language name variants (e.g., "ts" -> ".ts", "c++" -> ".cpp", "golang" -> ".go"). +// Returns ".txt" for unknown or undefined languages. +// +// Parameters: +// - language: The language identifier (case-insensitive) +// +// Returns: +// - string: The file extension including the leading dot (e.g., ".js", ".py") +func GetFileExtensionFromLanguage(language string) string { + lang := strings.ToLower(strings.TrimSpace(language)) + + langExtensionMap := map[string]string{ + Bash: BashExtension, + C: CExtension, + CPP: CPPExtension, + CSharp: CSharpExtension, + Console: ConsoleExtension, + Go: GoExtension, + Java: JavaExtension, + JavaScript: JavaScriptExtension, + Kotlin: KotlinExtension, + PHP: PHPExtension, + PowerShell: PowerShellExtension, + PS5: PS5Extension, + Python: PythonExtension, + Ruby: RubyExtension, + Rust: RustExtension, + Scala: ScalaExtension, + Shell: ShellExtension, + Swift: SwiftExtension, + Text: TextExtension, + TypeScript: TypeScriptExtension, + Undefined: UndefinedExtension, + "c++": CPPExtension, + "c#": CSharpExtension, + "cs": CSharpExtension, + "golang": GoExtension, + "js": JavaScriptExtension, + "kt": KotlinExtension, + "py": PythonExtension, + "rb": RubyExtension, + "rs": RustExtension, + "sh": ShellExtension, + "ts": TypeScriptExtension, + "txt": TextExtension, + "ps1": PowerShellExtension, + "": UndefinedExtension, + "none": UndefinedExtension, + } + + if extension, exists := langExtensionMap[lang]; exists { + return extension + } + + return UndefinedExtension +} + +// NormalizeLanguage normalizes a language string to a canonical form. +// +// This function converts various language name variants to their canonical forms: +// - "ts" -> "typescript" +// - "c++" -> "cpp" +// - "golang" -> "go" +// - "js" -> "javascript" +// - etc. +// +// Parameters: +// - language: The language identifier (case-insensitive) +// +// Returns: +// - string: The normalized language name, or the original string if no normalization is defined +func NormalizeLanguage(language string) string { + lang := strings.ToLower(strings.TrimSpace(language)) + + normalizeMap := map[string]string{ + Bash: Bash, + C: C, + CPP: CPP, + CSharp: CSharp, + Console: Console, + Go: Go, + Java: Java, + JavaScript: JavaScript, + Kotlin: Kotlin, + PHP: PHP, + PowerShell: PowerShell, + PS5: PS5, + Python: Python, + Ruby: Ruby, + Rust: Rust, + Scala: Scala, + Shell: Shell, + Swift: Swift, + Text: Text, + TypeScript: TypeScript, + "c++": CPP, + "c#": CSharp, + "cs": CSharp, + "golang": Go, + "js": JavaScript, + "kt": Kotlin, + "py": Python, + "rb": Ruby, + "rs": Rust, + "sh": Shell, + "ts": TypeScript, + "txt": Text, + "ps1": PowerShell, + "": Undefined, + "none": Undefined, + } + + if normalized, exists := normalizeMap[lang]; exists { + return normalized + } + + return lang +} diff --git a/audit-cli/commands/extract/code-examples/parser.go b/audit-cli/commands/extract/code-examples/parser.go new file mode 100644 index 0000000..0681428 --- /dev/null +++ b/audit-cli/commands/extract/code-examples/parser.go @@ -0,0 +1,253 @@ +package code_examples + +import ( + "fmt" + "os" + + "github.com/mongodb/code-example-tooling/audit-cli/internal/rst" +) + +// ParseFile parses a file and extracts code examples from reStructuredText directives. +// +// This function parses all supported RST directives (literalinclude, code-block, io-code-block) +// and converts them into CodeExample structs ready for writing to files. +// +// Parameters: +// - filePath: Path to the RST file to parse +// +// Returns: +// - []CodeExample: Slice of extracted code examples +// - error: Any error encountered during parsing +func ParseFile(filePath string) ([]CodeExample, error) { + // Parse all directives from the file + directives, err := rst.ParseDirectives(filePath) + if err != nil { + return nil, err + } + + var examples []CodeExample + directiveCounts := make(map[rst.DirectiveType]int) + + for _, directive := range directives { + // Track directive index for this type + directiveCounts[directive.Type]++ + index := directiveCounts[directive.Type] + + switch directive.Type { + case rst.LiteralInclude: + example, err := parseLiteralInclude(filePath, directive, index) + if err != nil { + // Log warning but continue processing + fmt.Fprintf(os.Stderr, "Warning: failed to parse literalinclude at line %d in %s: %v\n", + directive.LineNum, filePath, err) + continue + } + examples = append(examples, example) + + case rst.CodeBlock: + example, err := parseCodeBlock(filePath, directive, index) + if err != nil { + // Log warning but continue processing + fmt.Fprintf(os.Stderr, "Warning: failed to parse code-block at line %d in %s: %v\n", + directive.LineNum, filePath, err) + continue + } + examples = append(examples, example) + + case rst.IoCodeBlock: + examples = append(examples, parseIoCodeBlock(filePath, directive, index)...) + continue + } + } + + return examples, nil +} + +// parseLiteralInclude parses a literalinclude directive and extracts the code content +func parseLiteralInclude(sourceFile string, directive rst.Directive, index int) (CodeExample, error) { + // Extract the content from the referenced file + content, err := rst.ExtractLiteralIncludeContent(sourceFile, directive) + if err != nil { + return CodeExample{}, err + } + + // Get the language from the :language: option + language := directive.Options["language"] + if language == "" { + language = Undefined + } + + // Normalize the language + language = NormalizeLanguage(language) + + return CodeExample{ + SourceFile: sourceFile, + DirectiveName: DirectiveType(directive.Type), + Language: language, + Content: content, + Index: index, + }, nil +} + +// parseCodeBlock parses a code-block directive and extracts the inline code content. +// +// The content is already dedented by the directive parser based on the first line's indentation. +// Language can be specified either as an argument (.. code-block:: javascript) or as an option (:language: javascript). +func parseCodeBlock(sourceFile string, directive rst.Directive, index int) (CodeExample, error) { + // The content is already parsed and dedented by the directive parser + content := directive.Content + if content == "" { + return CodeExample{}, fmt.Errorf("code-block has no content") + } + + // Get the language from the directive argument (e.g., .. code-block:: javascript) + // or from the :language: option + language := directive.Argument + if language == "" { + language = directive.Options["language"] + } + if language == "" { + language = Undefined + } + + // Normalize the language + language = NormalizeLanguage(language) + + return CodeExample{ + SourceFile: sourceFile, + DirectiveName: DirectiveType(directive.Type), + Language: language, + Content: content, + Index: index, + }, nil +} + +// ParseFileWithIncludes parses a file and recursively follows include directives. +// +// This function wraps the internal RST package's ParseFileWithIncludes to extract +// code examples from the main file and all included files. +// +// Parameters: +// - filePath: Path to the RST file to parse +// - followIncludes: If true, recursively follow .. include:: directives +// - visited: Map tracking already-processed files to prevent circular includes +// - verbose: If true, print detailed processing information +// +// Returns: +// - []CodeExample: All code examples from the file and its includes +// - []string: List of all processed file paths +// - error: Any error encountered during parsing +func ParseFileWithIncludes(filePath string, followIncludes bool, visited map[string]bool, verbose bool) ([]CodeExample, []string, error) { + var examples []CodeExample + + // Define the parse function that will be called for each file + parseFunc := func(path string) error { + fileExamples, err := ParseFile(path) + if err != nil { + return err + } + examples = append(examples, fileExamples...) + return nil + } + + // Use the internal RST package to handle include following + processedFiles, err := rst.ParseFileWithIncludes(filePath, followIncludes, visited, verbose, parseFunc) + if err != nil { + return nil, processedFiles, err + } + + return examples, processedFiles, nil +} + +// TraverseDirectory recursively traverses a directory and returns all file paths. +// +// This is a wrapper around the internal RST package's TraverseDirectory function. +// +// Parameters: +// - rootPath: Root directory to traverse +// - recursive: If true, recursively scan subdirectories +// +// Returns: +// - []string: List of all file paths found +// - error: Any error encountered during traversal +func TraverseDirectory(rootPath string, recursive bool) ([]string, error) { + return rst.TraverseDirectory(rootPath, recursive) +} + +// ShouldProcessFile determines if a file should be processed based on its extension. +// +// This is a wrapper around the internal RST package's ShouldProcessFile function. +// Returns true for files with .rst, .txt, or .md extensions. +func ShouldProcessFile(filePath string) bool { + return rst.ShouldProcessFile(filePath) +} + +// parseIoCodeBlock parses an io-code-block directive and extracts input/output code examples +// Returns a slice of CodeExample (one for input, one for output if present) +func parseIoCodeBlock(sourceFile string, directive rst.Directive, index int) []CodeExample { + var examples []CodeExample + + // Process input directive + if directive.InputDirective != nil { + inputExample, err := parseSubDirective(sourceFile, directive.InputDirective, "input", index) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to parse input directive at line %d in %s: %v\n", + directive.LineNum, sourceFile, err) + } else { + examples = append(examples, inputExample) + } + } + + // Process output directive + if directive.OutputDirective != nil { + outputExample, err := parseSubDirective(sourceFile, directive.OutputDirective, "output", index) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to parse output directive at line %d in %s: %v\n", + directive.LineNum, sourceFile, err) + } else { + examples = append(examples, outputExample) + } + } + + return examples +} + +// parseSubDirective parses an input or output sub-directive within an io-code-block +func parseSubDirective(sourceFile string, subDir *rst.SubDirective, dirType string, index int) (CodeExample, error) { + var content string + var err error + + // If there's a filepath argument, read from the file + if subDir.Argument != "" { + content, err = rst.ExtractLiteralIncludeContent(sourceFile, rst.Directive{ + Argument: subDir.Argument, + Options: subDir.Options, + }) + if err != nil { + return CodeExample{}, fmt.Errorf("failed to read file %s: %w", subDir.Argument, err) + } + } else { + // Use inline content + content = subDir.Content + if content == "" { + return CodeExample{}, fmt.Errorf("%s directive has no content or filepath", dirType) + } + } + + // Get language from options + language := subDir.Options["language"] + if language == "" { + language = Undefined + } + + language = NormalizeLanguage(language) + + return CodeExample{ + SourceFile: sourceFile, + DirectiveName: DirectiveType(rst.IoCodeBlock), + Language: language, + Content: content, + Index: index, + SubType: dirType, // "input" or "output" + }, nil +} diff --git a/audit-cli/commands/extract/code-examples/report.go b/audit-cli/commands/extract/code-examples/report.go new file mode 100644 index 0000000..3a6728e --- /dev/null +++ b/audit-cli/commands/extract/code-examples/report.go @@ -0,0 +1,114 @@ +package code_examples + +import ( + "fmt" + "sort" + "strings" +) + +// PrintReport prints the extraction report to stdout. +// +// Displays statistics about the extraction operation including: +// - Number of files traversed +// - Number of output files written +// - Code examples by language (summary or detailed based on verbose flag) +// - Code examples by directive type +// - Per-source-file statistics (if verbose is true) +// +// Parameters: +// - report: The report to print +// - verbose: If true, show detailed breakdown including file paths and per-source stats +func PrintReport(report *Report, verbose bool) { + fmt.Println("\n" + strings.Repeat("=", 60)) + fmt.Println("CODE EXTRACTION REPORT") + fmt.Println(strings.Repeat("=", 60)) + + fmt.Printf("\nFiles Traversed: %d\n", report.FilesTraversed) + if verbose && len(report.TraversedFilepaths) > 0 { + fmt.Println("\nTraversed Filepaths:") + for _, path := range report.TraversedFilepaths { + fmt.Printf(" - %s\n", path) + } + } + + fmt.Printf("\nOutput Files Written: %d\n", report.OutputFilesWritten) + + if len(report.LanguageCounts) > 0 { + fmt.Println("\nCode Examples by Language:") + + languages := make([]string, 0, len(report.LanguageCounts)) + for lang := range report.LanguageCounts { + languages = append(languages, lang) + } + sort.Strings(languages) + + if verbose { + for _, lang := range languages { + count := report.LanguageCounts[lang] + fmt.Printf(" %-15s: %d\n", lang, count) + } + } else { + total := 0 + for _, count := range report.LanguageCounts { + total += count + } + fmt.Printf(" Total: %d (use --verbose for breakdown)\n", total) + } + } + + if len(report.DirectiveCounts) > 0 { + fmt.Println("\nCode Examples by Directive Type:") + + directives := []DirectiveType{CodeBlock, LiteralInclude, IoCodeBlock} + for _, directive := range directives { + if count, exists := report.DirectiveCounts[directive]; exists { + fmt.Printf(" %-20s: %d\n", directive, count) + } + } + } + + if verbose && len(report.SourcePathStats) > 0 { + fmt.Println("\nStatistics by Source File:") + + sourcePaths := make([]string, 0, len(report.SourcePathStats)) + for path := range report.SourcePathStats { + sourcePaths = append(sourcePaths, path) + } + sort.Strings(sourcePaths) + + for _, sourcePath := range sourcePaths { + stats := report.SourcePathStats[sourcePath] + fmt.Printf("\n %s:\n", sourcePath) + + if len(stats.DirectiveCounts) > 0 { + fmt.Println(" Directives:") + directives := []DirectiveType{CodeBlock, LiteralInclude, IoCodeBlock} + for _, directive := range directives { + if count, exists := stats.DirectiveCounts[directive]; exists { + fmt.Printf(" %-20s: %d\n", directive, count) + } + } + } + + if len(stats.LanguageCounts) > 0 { + fmt.Println(" Languages:") + languages := make([]string, 0, len(stats.LanguageCounts)) + for lang := range stats.LanguageCounts { + languages = append(languages, lang) + } + sort.Strings(languages) + + for _, lang := range languages { + count := stats.LanguageCounts[lang] + fmt.Printf(" %-15s: %d\n", lang, count) + } + } + + if len(stats.OutputFiles) > 0 { + fmt.Printf(" Output Files: %d\n", len(stats.OutputFiles)) + } + } + } + + fmt.Println("\n" + strings.Repeat("=", 60)) +} diff --git a/audit-cli/commands/extract/code-examples/types.go b/audit-cli/commands/extract/code-examples/types.go new file mode 100644 index 0000000..a0cda16 --- /dev/null +++ b/audit-cli/commands/extract/code-examples/types.go @@ -0,0 +1,94 @@ +package code_examples + +// DirectiveType represents the type of reStructuredText directive. +type DirectiveType string + +const ( + // CodeBlock represents inline code blocks (.. code-block::) + CodeBlock DirectiveType = "code-block" + // LiteralInclude represents external file references (.. literalinclude::) + LiteralInclude DirectiveType = "literalinclude" + // IoCodeBlock represents input/output examples (.. io-code-block::) + IoCodeBlock DirectiveType = "io-code-block" +) + +// CodeExample represents a single code example extracted from an RST file. +// +// Each code example corresponds to one directive occurrence in the source file +// and will be written to a separate output file. +type CodeExample struct { + SourceFile string // Path to the source RST file + DirectiveName DirectiveType // Type of directive (code-block, literalinclude, io-code-block) + Language string // Programming language (normalized) + Content string // The actual code content + Index int // The occurrence index of this directive in the source file (1-based) + SubType string // For io-code-block: "input" or "output" +} + +// Report contains statistics about the extraction operation. +// +// Tracks overall statistics as well as per-source-file statistics for detailed reporting. +type Report struct { + FilesTraversed int // Total number of RST files processed + TraversedFilepaths []string // List of all processed file paths + OutputFilesWritten int // Total number of code example files written + LanguageCounts map[string]int // Count of examples by language + DirectiveCounts map[DirectiveType]int // Count of examples by directive type + SourcePathStats map[string]*SourceStats // Per-file statistics +} + +// SourceStats contains statistics for a single source file. +// +// Used for verbose reporting to show detailed breakdown per source file. +type SourceStats struct { + DirectiveCounts map[DirectiveType]int // Count of directives by type in this file + LanguageCounts map[string]int // Count of examples by language in this file + OutputFiles []string // List of output files generated from this source +} + +// NewReport creates a new initialized Report with empty maps and slices. +func NewReport() *Report { + return &Report{ + TraversedFilepaths: make([]string, 0), + LanguageCounts: make(map[string]int), + DirectiveCounts: make(map[DirectiveType]int), + SourcePathStats: make(map[string]*SourceStats), + } +} + +// NewSourceStats creates a new initialized SourceStats with empty maps and slices. +func NewSourceStats() *SourceStats { + return &SourceStats{ + DirectiveCounts: make(map[DirectiveType]int), + LanguageCounts: make(map[string]int), + OutputFiles: make([]string, 0), + } +} + +// AddCodeExample updates the report with a new code example. +// +// This method updates both global statistics and per-source-file statistics. +// It should be called once for each code example that is successfully extracted. +func (r *Report) AddCodeExample(example CodeExample, outputPath string) { + // Update global counts + r.LanguageCounts[example.Language]++ + r.DirectiveCounts[example.DirectiveName]++ + + // Update source-specific stats + if _, exists := r.SourcePathStats[example.SourceFile]; !exists { + r.SourcePathStats[example.SourceFile] = NewSourceStats() + } + stats := r.SourcePathStats[example.SourceFile] + stats.DirectiveCounts[example.DirectiveName]++ + stats.LanguageCounts[example.Language]++ + stats.OutputFiles = append(stats.OutputFiles, outputPath) +} + +// AddTraversedFile adds a file to the list of traversed files. +// +// This method should be called once for each RST file that is processed, +// including files discovered through include directives. +func (r *Report) AddTraversedFile(filepath string) { + r.FilesTraversed++ + r.TraversedFilepaths = append(r.TraversedFilepaths, filepath) +} diff --git a/audit-cli/commands/extract/code-examples/writer.go b/audit-cli/commands/extract/code-examples/writer.go new file mode 100644 index 0000000..c8a6670 --- /dev/null +++ b/audit-cli/commands/extract/code-examples/writer.go @@ -0,0 +1,97 @@ +package code_examples + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +// WriteCodeExample writes a code example to a file in the output directory. +// +// Generates a standardized filename and writes the code content to that file. +// If dryRun is true, returns the filename without actually writing the file. +// +// Parameters: +// - example: The code example to write +// - outputDir: Directory where the file should be written +// - dryRun: If true, skip writing and only return the filename +// +// Returns: +// - string: The full path to the output file +// - error: Any error encountered during writing +func WriteCodeExample(example CodeExample, outputDir string, dryRun bool) (string, error) { + filename := GenerateOutputFilename(example) + outputPath := filepath.Join(outputDir, filename) + + if dryRun { + return outputPath, nil + } + + if err := os.MkdirAll(outputDir, 0755); err != nil { + return "", fmt.Errorf("failed to create output directory: %w", err) + } + + if err := os.WriteFile(outputPath, []byte(example.Content), 0644); err != nil { + return "", fmt.Errorf("failed to write file %s: %w", outputPath, err) + } + + return outputPath, nil +} + +// GenerateOutputFilename generates a standardized filename for a code example. +// +// The filename format is: {source-base}.{directive-type}.{index}.{ext} +// For io-code-block directives: {source-base}.{directive-type}.{index}.{subtype}.{ext} +// +// Examples: +// - my-doc.code-block.1.js +// - my-doc.literalinclude.2.py +// - my-doc.io-code-block.1.input.js +// - my-doc.io-code-block.1.output.json +// +// Parameters: +// - example: The code example to generate a filename for +// +// Returns: +// - string: The generated filename (without directory path) +func GenerateOutputFilename(example CodeExample) string { + sourceBase := filepath.Base(example.SourceFile) + sourceBase = strings.TrimSuffix(sourceBase, filepath.Ext(sourceBase)) + + extension := GetFileExtensionFromLanguage(example.Language) + + // For io-code-block, include the subtype (input/output) in the filename + if example.DirectiveName == IoCodeBlock && example.SubType != "" { + filename := fmt.Sprintf("%s.%s.%d.%s%s", + sourceBase, + example.DirectiveName, + example.Index, + example.SubType, + extension, + ) + return filename + } + + filename := fmt.Sprintf("%s.%s.%d%s", + sourceBase, + example.DirectiveName, + example.Index, + extension, + ) + + return filename +} + +// EnsureOutputDirectory ensures the output directory exists. +// +// Creates the directory and any necessary parent directories with permissions 0755. +// +// Parameters: +// - outputDir: Path to the directory to create +// +// Returns: +// - error: Any error encountered during directory creation +func EnsureOutputDirectory(outputDir string) error { + return os.MkdirAll(outputDir, 0755) +} diff --git a/audit-cli/commands/extract/extract.go b/audit-cli/commands/extract/extract.go new file mode 100644 index 0000000..7f1926b --- /dev/null +++ b/audit-cli/commands/extract/extract.go @@ -0,0 +1,34 @@ +// Package extract provides the parent command for extracting content from RST files. +// +// This package serves as the parent command for various extraction operations. +// Currently supports: +// - code-examples: Extract code examples from RST directives +// +// Future subcommands could include extracting tables, images, or other structured content. +package extract + +import ( + "github.com/mongodb/code-example-tooling/audit-cli/commands/extract/code-examples" + "github.com/spf13/cobra" +) + +// NewExtractCommand creates the extract parent command. +// +// This command serves as a parent for various extraction operations on RST files. +// It doesn't perform any operations itself but provides a namespace for subcommands. +func NewExtractCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "extract", + Short: "Extract content from reStructuredText files", + Long: `Extract various types of content from reStructuredText files. + +Currently supports extracting code examples from directives like literalinclude, +code-block, and io-code-block. Future subcommands may support extracting other +types of structured content such as tables, images, or metadata.`, + } + + // Add subcommands + cmd.AddCommand(code_examples.NewCodeExamplesCommand()) + + return cmd +} diff --git a/audit-cli/commands/search/find-string/find_string.go b/audit-cli/commands/search/find-string/find_string.go new file mode 100644 index 0000000..8fa3569 --- /dev/null +++ b/audit-cli/commands/search/find-string/find_string.go @@ -0,0 +1,193 @@ +// Package find_string provides functionality for searching code example files for substrings. +// +// This package implements the "search find-string" subcommand, which searches through +// extracted code example files to find occurrences of a specific substring. +// +// The search is case-sensitive and counts each file only once, even if the substring +// appears multiple times in the same file. +// +// Supports: +// - Recursive directory scanning +// - Verbose output with file paths and language breakdown +// - Language detection based on file extension +package find_string + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/spf13/cobra" +) + +// NewFindStringCommand creates the find-string subcommand. +// +// This command searches through extracted code example files for a specific substring. +// Supports flags for recursive search and verbose output. +// +// Flags: +// - -r, --recursive: Recursively search all files in subdirectories +// - -v, --verbose: Show file paths and language breakdown +func NewFindStringCommand() *cobra.Command { + var ( + recursive bool + verbose bool + ) + + cmd := &cobra.Command{ + Use: "find-string [filepath] [substring]", + Short: "Search for a substring in extracted code example files", + Long: `Search through extracted code example files to find occurrences of a specific substring. +Reports the number of code examples containing the substring.`, + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + filePath := args[0] + substring := args[1] + return runSearch(filePath, substring, recursive, verbose) + }, + } + + cmd.Flags().BoolVarP(&recursive, "recursive", "r", false, "Recursively search all files in subdirectories") + cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Provide additional information during execution") + + return cmd +} + +// RunSearch executes the search operation and returns the report. +// +// This function is exported for use in tests. It searches for the substring in the +// specified file or directory and returns statistics about the search. +// +// Parameters: +// - filePath: Path to file or directory to search +// - substring: The substring to search for (case-sensitive) +// - recursive: If true, recursively search subdirectories +// - verbose: If true, show detailed information during search +// +// Returns: +// - *SearchReport: Statistics about the search operation +// - error: Any error encountered during search +func RunSearch(filePath string, substring string, recursive bool, verbose bool) (*SearchReport, error) { + return runSearchInternal(filePath, substring, recursive, verbose) +} + +// runSearch executes the search operation (internal wrapper for CLI). +// +// This is a thin wrapper around runSearchInternal that discards the report +// and only returns errors, suitable for use in the CLI command handler. +func runSearch(filePath string, substring string, recursive bool, verbose bool) error { + _, err := runSearchInternal(filePath, substring, recursive, verbose) + return err +} + +// runSearchInternal contains the core logic for the search-code-examples command +func runSearchInternal(filePath string, substring string, recursive bool, verbose bool) (*SearchReport, error) { + fileInfo, err := os.Stat(filePath) + if err != nil { + return nil, fmt.Errorf("failed to access path %s: %w", filePath, err) + } + + report := NewSearchReport() + + var filesToSearch []string + + if fileInfo.IsDir() { + if verbose { + fmt.Printf("Scanning directory: %s (recursive: %v)\n", filePath, recursive) + } + filesToSearch, err = collectFiles(filePath, recursive) + if err != nil { + return nil, fmt.Errorf("failed to traverse directory: %w", err) + } + } else { + filesToSearch = []string{filePath} + } + + if verbose { + fmt.Printf("Found %d files to search\n", len(filesToSearch)) + fmt.Printf("Searching for substring: %q\n\n", substring) + } + + for _, file := range filesToSearch { + if verbose { + fmt.Printf("Searching: %s\n", file) + } + + result, err := searchFile(file, substring) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to search %s: %v\n", file, err) + continue + } + + report.AddResult(result) + + if verbose && result.Contains { + fmt.Printf(" ✓ Found substring in %s\n", file) + } + } + + PrintReport(report, verbose) + + return report, nil +} + +// collectFiles gathers all files to search +func collectFiles(dirPath string, recursive bool) ([]string, error) { + var files []string + + if recursive { + err := filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + files = append(files, path) + } + return nil + }) + if err != nil { + return nil, err + } + } else { + entries, err := os.ReadDir(dirPath) + if err != nil { + return nil, err + } + for _, entry := range entries { + if !entry.IsDir() { + files = append(files, filepath.Join(dirPath, entry.Name())) + } + } + } + + return files, nil +} + +// searchFile searches a single file for the substring +func searchFile(filePath string, substring string) (SearchResult, error) { + result := SearchResult{ + FilePath: filePath, + Language: extractLanguageFromFilename(filePath), + Contains: false, + } + + content, err := os.ReadFile(filePath) + if err != nil { + return result, err + } + + result.Contains = strings.Contains(string(content), substring) + + return result, nil +} + +// extractLanguageFromFilename extracts the language from the file extension +func extractLanguageFromFilename(filePath string) string { + ext := filepath.Ext(filePath) + if ext == "" { + return "unknown" + } + // Remove the leading dot + return strings.TrimPrefix(ext, ".") +} diff --git a/audit-cli/commands/search/find-string/report.go b/audit-cli/commands/search/find-string/report.go new file mode 100644 index 0000000..c620d79 --- /dev/null +++ b/audit-cli/commands/search/find-string/report.go @@ -0,0 +1,51 @@ +package find_string + +import ( + "fmt" + "sort" + "strings" +) + +// PrintReport prints the search report to stdout. +// +// Displays statistics about the search operation including: +// - Number of files scanned +// - Number of files containing the substring +// - Files containing substring by language (if verbose is true) +// - List of file paths containing the substring (if verbose is true) +// +// Parameters: +// - report: The report to print +// - verbose: If true, show detailed breakdown including file paths and language counts +func PrintReport(report *SearchReport, verbose bool) { + fmt.Println("\n" + strings.Repeat("=", 60)) + fmt.Println("CODE SEARCH REPORT") + fmt.Println(strings.Repeat("=", 60)) + + fmt.Printf("\nFiles Scanned: %d\n", report.FilesScanned) + fmt.Printf("Files Containing Substring: %d\n", report.FilesContaining) + + if verbose && len(report.LanguageCounts) > 0 { + fmt.Println("\nFiles Containing Substring by Language:") + + languages := make([]string, 0, len(report.LanguageCounts)) + for lang := range report.LanguageCounts { + languages = append(languages, lang) + } + sort.Strings(languages) + + for _, lang := range languages { + count := report.LanguageCounts[lang] + fmt.Printf(" %-15s: %d\n", lang, count) + } + } + + if verbose && len(report.FilesWithSubstring) > 0 { + fmt.Println("\nFiles Containing Substring:") + for _, path := range report.FilesWithSubstring { + fmt.Printf(" - %s\n", path) + } + } + + fmt.Println(strings.Repeat("=", 60)) +} diff --git a/audit-cli/commands/search/find-string/types.go b/audit-cli/commands/search/find-string/types.go new file mode 100644 index 0000000..dde85e8 --- /dev/null +++ b/audit-cli/commands/search/find-string/types.go @@ -0,0 +1,45 @@ +package find_string + +// SearchResult contains the results of searching a single file. +// +// Used internally during the search operation to track results for each file. +type SearchResult struct { + FilePath string // Path to the file that was searched + Language string // Programming language (detected from file extension) + Contains bool // Whether the file contains the substring +} + +// SearchReport contains statistics about the search operation. +// +// Tracks overall statistics for reporting to the user. +type SearchReport struct { + FilesScanned int // Total number of files scanned + FilesContaining int // Number of files containing the substring + LanguageCounts map[string]int // Count of files containing substring by language + FilesWithSubstring []string // List of file paths containing the substring +} + +// NewSearchReport creates a new initialized SearchReport with empty maps and slices. +func NewSearchReport() *SearchReport { + return &SearchReport{ + LanguageCounts: make(map[string]int), + FilesWithSubstring: make([]string, 0), + } +} + +// AddResult updates the report with a search result. +// +// This method should be called once for each file that is searched. +// It updates the statistics based on whether the file contains the substring. +func (r *SearchReport) AddResult(result SearchResult) { + r.FilesScanned++ + + if result.Contains { + r.FilesContaining++ + r.FilesWithSubstring = append(r.FilesWithSubstring, result.FilePath) + + if result.Language != "" { + r.LanguageCounts[result.Language]++ + } + } +} diff --git a/audit-cli/commands/search/search.go b/audit-cli/commands/search/search.go new file mode 100644 index 0000000..ed9bdee --- /dev/null +++ b/audit-cli/commands/search/search.go @@ -0,0 +1,33 @@ +// Package search provides the parent command for searching through extracted content. +// +// This package serves as the parent command for various search operations. +// Currently supports: +// - find-string: Search for substrings in extracted code example files +// +// Future subcommands could include pattern matching, regex search, or semantic search. +package search + +import ( + "github.com/mongodb/code-example-tooling/audit-cli/commands/search/find-string" + "github.com/spf13/cobra" +) + +// NewSearchCommand creates the search parent command. +// +// This command serves as a parent for various search operations on extracted content. +// It doesn't perform any operations itself but provides a namespace for subcommands. +func NewSearchCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "search", + Short: "Search through extracted content", + Long: `Search through extracted content such as code examples. + +Currently supports searching for substrings in extracted code example files. +Future subcommands may support pattern matching, regex search, or semantic search.`, + } + + // Add subcommands + cmd.AddCommand(find_string.NewFindStringCommand()) + + return cmd +} diff --git a/audit-cli/go.mod b/audit-cli/go.mod new file mode 100644 index 0000000..96d2b7c --- /dev/null +++ b/audit-cli/go.mod @@ -0,0 +1,10 @@ +module github.com/mongodb/code-example-tooling/audit-cli + +go 1.24 + +require github.com/spf13/cobra v1.10.1 + +require ( + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/spf13/pflag v1.0.10 // indirect +) diff --git a/audit-cli/go.sum b/audit-cli/go.sum new file mode 100644 index 0000000..989827e --- /dev/null +++ b/audit-cli/go.sum @@ -0,0 +1,11 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= +github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/audit-cli/internal/rst/directive_parser.go b/audit-cli/internal/rst/directive_parser.go new file mode 100644 index 0000000..6539c2c --- /dev/null +++ b/audit-cli/internal/rst/directive_parser.go @@ -0,0 +1,490 @@ +// Package rst provides utilities for parsing reStructuredText (RST) files. +// +// This package contains the core RST parsing logic used by the extract commands. +// It handles: +// - Parsing RST directives (literalinclude, code-block, io-code-block) +// - Following include directives recursively +// - Resolving include paths with MongoDB-specific conventions +// - Traversing directories for RST files +// +// The package is designed to be reusable across different extraction operations. +package rst + +import ( + "bufio" + "fmt" + "os" + "regexp" + "strings" +) + +// DirectiveType represents the type of reStructuredText directive. +type DirectiveType string + +const ( + // CodeBlock represents inline code blocks (.. code-block::) + CodeBlock DirectiveType = "code-block" + // LiteralInclude represents external file references (.. literalinclude::) + LiteralInclude DirectiveType = "literalinclude" + // IoCodeBlock represents input/output examples (.. io-code-block::) + IoCodeBlock DirectiveType = "io-code-block" +) + +// Directive represents a parsed reStructuredText directive. +// +// Contains all information needed to extract content from the directive, +// including the directive type, arguments, options, and content. +type Directive struct { + Type DirectiveType // Type of directive (code-block, literalinclude, io-code-block) + Argument string // Main argument (e.g., language for code-block, filepath for literalinclude) + Options map[string]string // Directive options (e.g., :language:, :start-after:, etc.) + Content string // Content of the directive (for code-block and inline io-code-block) + LineNum int // Line number where directive starts (1-based) + + // For io-code-block directives + InputDirective *SubDirective // The .. input:: nested directive + OutputDirective *SubDirective // The .. output:: nested directive +} + +// SubDirective represents a nested directive within io-code-block. +// +// Can contain either a filepath argument (for external file reference) +// or inline content (for embedded code). +type SubDirective struct { + Argument string // Filepath argument (if provided) + Options map[string]string // Directive options (e.g., :language:) + Content string // Inline content (if no filepath) +} + +// Regular expressions for directive parsing +var ( + // Matches: .. literalinclude:: /path/to/file.php + literalIncludeRegex = regexp.MustCompile(`^\.\.\s+literalinclude::\s+(.+)$`) + + // Matches: .. code-block:: python (language is optional) + codeBlockRegex = regexp.MustCompile(`^\.\.\s+code-block::\s*(.*)$`) + + // Matches: .. io-code-block:: + ioCodeBlockRegex = regexp.MustCompile(`^\.\.\s+io-code-block::\s*$`) + + // Matches: .. input:: /path/to/file.cs (filepath is optional) + inputDirectiveRegex = regexp.MustCompile(`^\.\.\s+input::\s*(.*)$`) + + // Matches: .. output:: /path/to/file.txt (filepath is optional) + outputDirectiveRegex = regexp.MustCompile(`^\.\.\s+output::\s*(.*)$`) + + // Matches directive options like: :language: python + optionRegex = regexp.MustCompile(`^\s+:([^:]+):\s*(.*)$`) +) + +// ParseDirectives parses all directives from an RST file. +// +// This function scans the file line-by-line and extracts all supported directives +// (literalinclude, code-block, io-code-block). For each directive, it parses: +// - The directive type and argument +// - All directive options (e.g., :language:, :start-after:) +// - The directive content (for code-block and io-code-block) +// - Nested directives (for io-code-block) +// +// Parameters: +// - filePath: Path to the RST file to parse +// +// Returns: +// - []Directive: Slice of all parsed directives in order of appearance +// - error: Any error encountered during parsing +func ParseDirectives(filePath string) ([]Directive, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer file.Close() + + var directives []Directive + scanner := bufio.NewScanner(file) + lineNum := 0 + + for scanner.Scan() { + lineNum++ + line := scanner.Text() + trimmedLine := strings.TrimSpace(line) + + // Check for literalinclude directive + if matches := literalIncludeRegex.FindStringSubmatch(trimmedLine); len(matches) > 1 { + directive := Directive{ + Type: LiteralInclude, + Argument: strings.TrimSpace(matches[1]), + Options: make(map[string]string), + LineNum: lineNum, + } + + // Parse options on following lines + parseDirectiveOptions(scanner, &directive, &lineNum) + directives = append(directives, directive) + continue + } + + // Check for code-block directive + if matches := codeBlockRegex.FindStringSubmatch(trimmedLine); len(matches) > 1 { + directive := Directive{ + Type: CodeBlock, + Argument: strings.TrimSpace(matches[1]), + Options: make(map[string]string), + LineNum: lineNum, + } + + // Parse options and content on following lines + firstContentLine := parseDirectiveOptions(scanner, &directive, &lineNum) + parseDirectiveContent(scanner, &directive, &lineNum, firstContentLine) + directives = append(directives, directive) + continue + } + + // Check for io-code-block directive + if ioCodeBlockRegex.MatchString(trimmedLine) { + directive := Directive{ + Type: IoCodeBlock, + Options: make(map[string]string), + LineNum: lineNum, + } + + // Parse io-code-block with its nested input/output directives + parseIoCodeBlock(scanner, &directive, &lineNum) + directives = append(directives, directive) + continue + } + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return directives, nil +} + +// parseDirectiveOptions parses the options following a directive +// Returns the first content line if encountered, or empty string if not +func parseDirectiveOptions(scanner *bufio.Scanner, directive *Directive, lineNum *int) string { + for scanner.Scan() { + *lineNum++ + line := scanner.Text() + + // Check if this is an option line + if matches := optionRegex.FindStringSubmatch(line); len(matches) > 1 { + optionName := strings.TrimSpace(matches[1]) + optionValue := strings.TrimSpace(matches[2]) + directive.Options[optionName] = optionValue + continue + } + + // If we hit a blank line or non-indented line, we're done with options + trimmedLine := strings.TrimSpace(line) + if trimmedLine == "" { + continue // Skip blank lines between options and content + } + + // If the line is not indented and not an option, we're done + if len(line) > 0 && line[0] != ' ' && line[0] != '\t' { + // Non-indented line means end of directive + return "" + } + + // If we have indented content (not an option), this is the start of content + if len(line) > 0 && (line[0] == ' ' || line[0] == '\t') && !optionRegex.MatchString(line) { + return line + } + } + return "" +} + +// parseDirectiveContent parses the content block of a directive (for code-block, io-code-block) +// firstContentLine is the first line of content (if already consumed by parseDirectiveOptions) +func parseDirectiveContent(scanner *bufio.Scanner, directive *Directive, lineNum *int, firstContentLine string) { + var contentLines []string + var baseIndent int = -1 + + // Process the first content line if provided + if firstContentLine != "" { + // Calculate indentation + indent := len(firstContentLine) - len(strings.TrimLeft(firstContentLine, " \t")) + baseIndent = indent + + // Add the first line, removing the base indentation + contentLines = append(contentLines, firstContentLine[baseIndent:]) + } + + for scanner.Scan() { + *lineNum++ + line := scanner.Text() + + // Empty lines are part of the content + if strings.TrimSpace(line) == "" { + contentLines = append(contentLines, "") + continue + } + + // Calculate indentation + indent := len(line) - len(strings.TrimLeft(line, " \t")) + + // If this is the first content line, establish the base indentation + if baseIndent == -1 { + baseIndent = indent + } + + // If the line is less indented than the base, we're done with content + if indent < baseIndent { + break + } + + // Add the line to content, removing the base indentation + if indent >= baseIndent { + contentLines = append(contentLines, line[baseIndent:]) + } + } + + directive.Content = strings.TrimSpace(strings.Join(contentLines, "\n")) +} + +// ExtractLiteralIncludeContent extracts content from a literalinclude directive +// Handles start-after and end-before options +func ExtractLiteralIncludeContent(currentFilePath string, directive Directive) (string, error) { + if directive.Type != LiteralInclude { + return "", fmt.Errorf("directive is not a literalinclude") + } + + // Resolve the file path + resolvedPath, err := ResolveIncludePath(currentFilePath, directive.Argument) + if err != nil { + return "", fmt.Errorf("failed to resolve literalinclude path %s: %w", directive.Argument, err) + } + + // Read the file content + content, err := os.ReadFile(resolvedPath) + if err != nil { + return "", fmt.Errorf("failed to read literalinclude file %s: %w", resolvedPath, err) + } + + contentStr := string(content) + + // Handle start-after option + if startAfter, hasStartAfter := directive.Options["start-after"]; hasStartAfter { + startIdx := strings.Index(contentStr, startAfter) + if startIdx == -1 { + return "", fmt.Errorf("start-after tag '%s' not found in %s", startAfter, resolvedPath) + } + // Find the end of the line containing the start-after tag + lineEnd := strings.Index(contentStr[startIdx:], "\n") + if lineEnd == -1 { + // Tag is on the last line, take everything after it + contentStr = "" + } else { + // Skip past the newline to start at the next line + contentStr = contentStr[startIdx+lineEnd+1:] + } + } + + // Handle end-before option + if endBefore, hasEndBefore := directive.Options["end-before"]; hasEndBefore { + endIdx := strings.Index(contentStr, endBefore) + if endIdx == -1 { + return "", fmt.Errorf("end-before tag '%s' not found in %s", endBefore, resolvedPath) + } + // Find the start of the line containing the end-before tag + lineStart := strings.LastIndex(contentStr[:endIdx], "\n") + if lineStart == -1 { + lineStart = 0 + } else { + lineStart++ // Move past the newline + } + // Cut before the line containing the tag, but keep the newline before it + if lineStart > 0 { + contentStr = contentStr[:lineStart-1] + } else { + contentStr = "" + } + } + + // Handle dedent option + if _, hasDedent := directive.Options["dedent"]; hasDedent { + contentStr = dedentContent(contentStr) + } + + return strings.TrimSpace(contentStr), nil +} + +// dedentContent removes common leading whitespace from all lines +func dedentContent(content string) string { + lines := strings.Split(content, "\n") + if len(lines) == 0 { + return content + } + + // Find the minimum indentation (ignoring empty lines) + minIndent := -1 + for _, line := range lines { + if strings.TrimSpace(line) == "" { + continue + } + indent := len(line) - len(strings.TrimLeft(line, " \t")) + if minIndent == -1 || indent < minIndent { + minIndent = indent + } + } + + if minIndent <= 0 { + return content + } + + // Remove the common indentation from all lines + var dedentedLines []string + for _, line := range lines { + if strings.TrimSpace(line) == "" { + dedentedLines = append(dedentedLines, "") + } else if len(line) >= minIndent { + dedentedLines = append(dedentedLines, line[minIndent:]) + } else { + dedentedLines = append(dedentedLines, line) + } + } + + return strings.Join(dedentedLines, "\n") +} + +// parseIoCodeBlock parses an io-code-block directive with its nested input/output directives +func parseIoCodeBlock(scanner *bufio.Scanner, directive *Directive, lineNum *int) { + // First, parse any options for the io-code-block itself + // This might return the first input/output directive line + firstLine := parseDirectiveOptions(scanner, directive, lineNum) + + // Now parse the nested input and output directives + var pendingLine string = firstLine + for { + var line string + var trimmedLine string + + // Use pending line if we have one, otherwise scan for next line + if pendingLine != "" { + line = pendingLine + trimmedLine = strings.TrimSpace(line) + pendingLine = "" + } else { + if !scanner.Scan() { + break + } + *lineNum++ + line = scanner.Text() + trimmedLine = strings.TrimSpace(line) + } + + // Stop if we hit a blank line followed by dedent to base level + if trimmedLine == "" { + // Peek ahead to see if next line is dedented + if !scanner.Scan() { + break + } + *lineNum++ + nextLine := scanner.Text() + if len(nextLine) > 0 && nextLine[0] != ' ' && nextLine[0] != '\t' { + // We've reached the end of the io-code-block + break + } + // Not dedented, continue parsing + line = nextLine + trimmedLine = strings.TrimSpace(line) + } + + // Check for input directive + if matches := inputDirectiveRegex.FindStringSubmatch(trimmedLine); len(matches) > 0 { + subDir := &SubDirective{ + Argument: strings.TrimSpace(matches[1]), + Options: make(map[string]string), + } + pendingLine = parseSubDirective(scanner, subDir, lineNum) + directive.InputDirective = subDir + continue + } + + // Check for output directive + if matches := outputDirectiveRegex.FindStringSubmatch(trimmedLine); len(matches) > 0 { + subDir := &SubDirective{ + Argument: strings.TrimSpace(matches[1]), + Options: make(map[string]string), + } + pendingLine = parseSubDirective(scanner, subDir, lineNum) + directive.OutputDirective = subDir + continue + } + + // If we get here, the line is neither input nor output directive + // This means we've reached the end of the io-code-block + break + } +} + +// parseSubDirective parses a nested directive (input or output) within io-code-block +// Returns the last line read (which might be the start of the next directive) +func parseSubDirective(scanner *bufio.Scanner, subDir *SubDirective, lineNum *int) string { + var contentLines []string + var baseIndent int = -1 + var lastLine string + + // Parse options and content + for scanner.Scan() { + *lineNum++ + line := scanner.Text() + lastLine = line + trimmedLine := strings.TrimSpace(line) + + // Empty line - might be part of content or end of directive + if trimmedLine == "" { + if len(contentLines) > 0 { + contentLines = append(contentLines, "") + } + continue + } + + // Check if this is an option line + if matches := optionRegex.FindStringSubmatch(line); len(matches) > 2 { + subDir.Options[matches[1]] = strings.TrimSpace(matches[2]) + continue + } + + // Check if this is the start of another directive (input/output) + if inputDirectiveRegex.MatchString(trimmedLine) || outputDirectiveRegex.MatchString(trimmedLine) { + // Return this line so the caller can process it + break + } + + // Check if line is indented (content) + if len(line) > 0 && (line[0] == ' ' || line[0] == '\t') { + indent := len(line) - len(strings.TrimLeft(line, " \t")) + + // Set base indent from first content line + if baseIndent == -1 { + baseIndent = indent + } + + // If we've dedented back to or past the base level, we're done + if len(contentLines) > 0 && indent < baseIndent { + break + } + + // Add content line (remove base indentation) + if baseIndent >= 0 && len(line) >= baseIndent { + contentLines = append(contentLines, line[baseIndent:]) + } else { + contentLines = append(contentLines, strings.TrimLeft(line, " \t")) + } + } else { + // Non-indented, non-empty line means we're done with this directive + break + } + } + + // Set the content + if len(contentLines) > 0 { + subDir.Content = strings.TrimSpace(strings.Join(contentLines, "\n")) + } + + return lastLine +} + diff --git a/audit-cli/internal/rst/file_utils.go b/audit-cli/internal/rst/file_utils.go new file mode 100644 index 0000000..f867f7b --- /dev/null +++ b/audit-cli/internal/rst/file_utils.go @@ -0,0 +1,72 @@ +package rst + +import ( + "os" + "path/filepath" + "strings" +) + +// TraverseDirectory traverses a directory and returns all file paths. +// +// If recursive is true, walks the entire directory tree. If false, only +// returns files in the immediate directory (no subdirectories). +// +// Parameters: +// - rootPath: Root directory to traverse +// - recursive: If true, recursively scan subdirectories +// +// Returns: +// - []string: List of all file paths found +// - error: Any error encountered during traversal +func TraverseDirectory(rootPath string, recursive bool) ([]string, error) { + var files []string + + if recursive { + err := filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + files = append(files, path) + } + return nil + }) + if err != nil { + return nil, err + } + } else { + entries, err := os.ReadDir(rootPath) + if err != nil { + return nil, err + } + for _, entry := range entries { + if !entry.IsDir() { + files = append(files, filepath.Join(rootPath, entry.Name())) + } + } + } + + return files, nil +} + +// ShouldProcessFile determines if a file should be processed based on its extension. +// +// Returns true for files with .rst, .txt, or .md extensions (case-insensitive). +// This is used to filter files during directory traversal. +// +// Parameters: +// - filePath: Path to the file to check +// +// Returns: +// - bool: True if the file should be processed, false otherwise +func ShouldProcessFile(filePath string) bool { + ext := strings.ToLower(filepath.Ext(filePath)) + validExtensions := []string{".rst", ".txt", ".md"} + for _, validExt := range validExtensions { + if ext == validExt { + return true + } + } + return false +} + diff --git a/audit-cli/internal/rst/include_resolver.go b/audit-cli/internal/rst/include_resolver.go new file mode 100644 index 0000000..af437ec --- /dev/null +++ b/audit-cli/internal/rst/include_resolver.go @@ -0,0 +1,360 @@ +package rst + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" +) + +// IncludeDirectiveRegex matches .. include:: directives in RST files. +var IncludeDirectiveRegex = regexp.MustCompile(`^\.\.\s+include::\s+(.+)$`) + +// FindIncludeDirectives finds all include directives in a file and resolves their paths. +// +// This function scans the file for .. include:: directives and resolves each path +// using MongoDB-specific conventions (steps files, extracts, template variables, etc.). +// +// Parameters: +// - filePath: Path to the RST file to scan +// +// Returns: +// - []string: List of resolved absolute paths to included files +// - error: Any error encountered during scanning +func FindIncludeDirectives(filePath string) ([]string, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer file.Close() + + var includePaths []string + scanner := bufio.NewScanner(file) + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + + // Check if this line is an include directive + matches := IncludeDirectiveRegex.FindStringSubmatch(line) + if len(matches) > 1 { + includePath := strings.TrimSpace(matches[1]) + + // Resolve the include path relative to the source directory + resolvedPath, err := ResolveIncludePath(filePath, includePath) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to resolve include path %s: %v\n", includePath, err) + continue + } + + includePaths = append(includePaths, resolvedPath) + } + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return includePaths, nil +} + +// ResolveIncludePath resolves an include path relative to the source directory +// Handles multiple special cases: +// - Template variables ({{var_name}}) +// - Steps files (/includes/steps/name.rst -> /includes/steps-name.yaml) +// - Extracts files (ref-based YAML content blocks) +// - Release files (ref-based YAML content blocks) +// - Files without extensions (auto-append .rst) +func ResolveIncludePath(currentFilePath, includePath string) (string, error) { + // Handle template variables by looking up replacements in the current file + if strings.HasPrefix(includePath, "{{") && strings.HasSuffix(includePath, "}}") { + // Extract the variable name + varName := strings.TrimSuffix(strings.TrimPrefix(includePath, "{{"), "}}") + varName = strings.TrimSpace(varName) + + // Try to resolve the variable from the current file's replacement section + resolvedPath, err := ResolveTemplateVariable(currentFilePath, varName) + if err != nil { + return "", fmt.Errorf("failed to resolve template variable %s: %w", includePath, err) + } + + // Now resolve the replacement path as a normal include + includePath = resolvedPath + } + + // Find the source directory by walking up from the current file + sourceDir, err := FindSourceDirectory(currentFilePath) + if err != nil { + return "", err + } + + // Clean the include path (remove leading slash if present) + cleanIncludePath := strings.TrimPrefix(includePath, "/") + + // Special handling for steps/ includes + // Convert /includes/steps/filename.rst to /includes/steps-filename.yaml + if strings.Contains(cleanIncludePath, "steps/") { + fullPath, err := resolveSpecialIncludePath(sourceDir, cleanIncludePath, "steps") + if err == nil { + return fullPath, nil + } + // If steps resolution fails, continue with normal resolution + } + + // Special handling for extracts/ includes + // These reference content blocks in YAML files by ref ID + // Convert /includes/extracts/ref-name.rst to the YAML file containing that ref + if strings.Contains(cleanIncludePath, "extracts/") { + fullPath, err := resolveRefBasedIncludePath(sourceDir, cleanIncludePath, "extracts") + if err == nil { + return fullPath, nil + } + // If extracts resolution fails, continue with normal resolution + } + + // Special handling for release/ includes + // These also reference content blocks in YAML files by ref ID + if strings.Contains(cleanIncludePath, "release/") { + fullPath, err := resolveRefBasedIncludePath(sourceDir, cleanIncludePath, "release") + if err == nil { + return fullPath, nil + } + // If release resolution fails, continue with normal resolution + } + + // Construct the full path + fullPath := filepath.Join(sourceDir, cleanIncludePath) + + // If the file exists as-is, return it + if _, err := os.Stat(fullPath); err == nil { + return fullPath, nil + } + + // If the path doesn't have an extension, try adding .rst + if filepath.Ext(cleanIncludePath) == "" { + fullPathWithRst := fullPath + ".rst" + if _, err := os.Stat(fullPathWithRst); err == nil { + return fullPathWithRst, nil + } + } + + return "", fmt.Errorf("include file not found: %s", fullPath) +} + +// resolveSpecialIncludePath handles special include paths (steps/) +// Converts: /includes/steps/run-mongodb-on-a-linux-distribution-systemd.rst +// To: /includes/steps-run-mongodb-on-a-linux-distribution-systemd.yaml +func resolveSpecialIncludePath(sourceDir, includePath, dirType string) (string, error) { + // Find the "dirType/" part in the path (e.g., "steps/") + searchPattern := dirType + "/" + dirIndex := strings.Index(includePath, searchPattern) + if dirIndex == -1 { + return "", fmt.Errorf("no %s/ found in path", dirType) + } + + // Split the path at "dirType/" + beforeDir := includePath[:dirIndex] + afterDir := includePath[dirIndex+len(searchPattern):] + + // Remove the file extension from afterDir + afterDir = strings.TrimSuffix(afterDir, filepath.Ext(afterDir)) + + // Construct the new path: before + "dirType-" + after + ".yaml" + newPath := beforeDir + dirType + "-" + afterDir + ".yaml" + + // Construct the full path + fullPath := filepath.Join(sourceDir, newPath) + + // Verify the file exists + if _, err := os.Stat(fullPath); err != nil { + return "", fmt.Errorf("%s file not found: %s", dirType, fullPath) + } + + return fullPath, nil +} + +// resolveRefBasedIncludePath handles ref-based include paths (extracts/, release/) +// These reference content blocks in YAML files by ref ID +// Example: /includes/extracts/install-mongodb-community-manually-redhat.rst +// References a ref in a YAML file like /includes/extracts-install-mongodb-manually.yaml +// Example: /includes/release/pin-repo-to-version-yum.rst +// References a ref in a YAML file like /includes/release-pinning.yaml +func resolveRefBasedIncludePath(sourceDir, includePath, dirType string) (string, error) { + // Extract the ref name from the path + // /includes/dirType/ref-name.rst -> ref-name + searchPattern := dirType + "/" + dirIndex := strings.Index(includePath, searchPattern) + if dirIndex == -1 { + return "", fmt.Errorf("no %s/ found in path", dirType) + } + + refName := includePath[dirIndex+len(searchPattern):] + refName = strings.TrimSuffix(refName, filepath.Ext(refName)) + + // Get the directory part before "dirType/" + beforeDir := includePath[:dirIndex] + searchDir := filepath.Join(sourceDir, beforeDir) + + // Find all dirType-*.yaml files in the includes directory + pattern := filepath.Join(searchDir, dirType+"-*.yaml") + matches, err := filepath.Glob(pattern) + if err != nil { + return "", fmt.Errorf("failed to search for %s files: %w", dirType, err) + } + + // Search each YAML file for the ref + for _, yamlFile := range matches { + hasRef, err := YAMLFileContainsRef(yamlFile, refName) + if err != nil { + continue // Skip files we can't read + } + if hasRef { + return yamlFile, nil + } + } + + return "", fmt.Errorf("no %s file found containing ref: %s", dirType, refName) +} + +// YAMLFileContainsRef checks if a YAML file contains a specific ref. +// +// This function scans a YAML file for a line matching "ref: ". +// Used to find the correct YAML file for ref-based includes (extracts, release). +// +// Parameters: +// - filePath: Path to the YAML file to check +// - refName: The ref name to search for +// +// Returns: +// - bool: True if the file contains the ref, false otherwise +// - error: Any error encountered during scanning +func YAMLFileContainsRef(filePath, refName string) (bool, error) { + file, err := os.Open(filePath) + if err != nil { + return false, err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + searchPattern := "ref: " + refName + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == searchPattern { + return true, nil + } + } + + return false, scanner.Err() +} + +// ResolveTemplateVariable resolves a template variable from a YAML file's replacement section. +// +// MongoDB documentation uses template variables in include directives like: +// .. include:: {{release_specification_default}} +// +// These are resolved by looking up the variable in the YAML file's replacement section: +// replacement: +// release_specification_default: "/includes/release/install-windows-default.rst" +// +// Parameters: +// - yamlFilePath: Path to the YAML file containing the replacement section +// - varName: The variable name to resolve (without {{ }}) +// +// Returns: +// - string: The resolved path from the replacement section +// - error: Any error encountered during resolution +func ResolveTemplateVariable(yamlFilePath, varName string) (string, error) { + file, err := os.Open(yamlFilePath) + if err != nil { + return "", err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + inReplacementSection := false + searchPattern := varName + ":" + + for scanner.Scan() { + line := scanner.Text() + trimmedLine := strings.TrimSpace(line) + + // Check if we're entering the replacement section + if trimmedLine == "replacement:" { + inReplacementSection = true + continue + } + + // If we're in the replacement section + if inReplacementSection { + // Check if we've left the replacement section (new top-level key or document separator) + if len(line) > 0 && line[0] != ' ' && line[0] != '\t' { + // We've left the replacement section + break + } + if trimmedLine == "..." || trimmedLine == "---" { + // Document separator - we've left the replacement section + break + } + + // Look for our variable + if strings.HasPrefix(trimmedLine, searchPattern) { + // Extract the value (everything after "varName: ") + value := strings.TrimPrefix(trimmedLine, searchPattern) + value = strings.TrimSpace(value) + // Remove quotes if present + value = strings.Trim(value, "\"'") + return value, nil + } + } + } + + if err := scanner.Err(); err != nil { + return "", err + } + + return "", fmt.Errorf("template variable %s not found in replacement section of %s", varName, yamlFilePath) +} + +// FindSourceDirectory walks up the directory tree to find the "source" directory. +// +// MongoDB documentation is typically organized with a "source" directory at the root. +// This function walks up from the current file to find that directory, which is used +// as the base for resolving include paths. +// +// Parameters: +// - filePath: Path to a file within the documentation tree +// +// Returns: +// - string: Absolute path to the source directory +// - error: Error if source directory cannot be found +func FindSourceDirectory(filePath string) (string, error) { + // Get the directory containing the file + dir := filepath.Dir(filePath) + + // Walk up the directory tree + for { + // Check if the current directory is named "source" + if filepath.Base(dir) == "source" { + return dir, nil + } + + // Check if there's a "source" subdirectory + sourceSubdir := filepath.Join(dir, "source") + if info, err := os.Stat(sourceSubdir); err == nil && info.IsDir() { + return sourceSubdir, nil + } + + // Move up one directory + parent := filepath.Dir(dir) + + // If we've reached the root, stop + if parent == dir { + return "", fmt.Errorf("could not find source directory for %s", filePath) + } + + dir = parent + } +} + diff --git a/audit-cli/internal/rst/parser.go b/audit-cli/internal/rst/parser.go new file mode 100644 index 0000000..a7a7bb0 --- /dev/null +++ b/audit-cli/internal/rst/parser.go @@ -0,0 +1,91 @@ +package rst + +import ( + "fmt" + "os" + "path/filepath" +) + +// ParseFileWithIncludes parses a file and recursively follows include directives. +// +// This function provides a generic mechanism for processing RST files and their includes. +// It handles: +// - Tracking visited files to prevent circular includes +// - Calling a custom parse function for each file +// - Recursively following .. include:: directives +// - Resolving include paths with MongoDB-specific conventions +// +// The parseFunc is called for each file to extract content (e.g., code examples). +// It should return an error if parsing fails. +// +// Parameters: +// - filePath: Path to the RST file to parse +// - followIncludes: If true, recursively follow .. include:: directives +// - visited: Map tracking already-processed files (prevents circular includes) +// - verbose: If true, print detailed processing information +// - parseFunc: Function to call for each file to extract content +// +// Returns: +// - []string: List of all processed file paths (absolute paths) +// - error: Any error encountered during parsing +func ParseFileWithIncludes( + filePath string, + followIncludes bool, + visited map[string]bool, + verbose bool, + parseFunc func(string) error, +) ([]string, error) { + // Prevent infinite loops from circular includes + absPath, err := filepath.Abs(filePath) + if err != nil { + return nil, err + } + + if visited[absPath] { + return nil, nil // Already processed this file + } + visited[absPath] = true + + var processedFiles []string + processedFiles = append(processedFiles, absPath) + + // Parse the current file using the provided parse function + if parseFunc != nil { + if err := parseFunc(filePath); err != nil { + return processedFiles, err + } + } + + // If not following includes, return just this file + if !followIncludes { + return processedFiles, nil + } + + // Find and process include directives + includeFiles, err := FindIncludeDirectives(filePath) + if err != nil { + return processedFiles, nil // Continue even if we can't find includes + } + + if verbose && len(includeFiles) > 0 { + fmt.Printf(" Found %d include(s) in %s\n", len(includeFiles), filepath.Base(filePath)) + } + + // Recursively parse included files + for _, includeFile := range includeFiles { + if verbose { + fmt.Printf(" Following include: %s\n", includeFile) + } + + includedFiles, err := ParseFileWithIncludes(includeFile, followIncludes, visited, verbose, parseFunc) + if err != nil { + // Log warning but continue processing other files + fmt.Fprintf(os.Stderr, "Warning: failed to parse included file %s: %v\n", includeFile, err) + continue + } + processedFiles = append(processedFiles, includedFiles...) + } + + return processedFiles, nil +} + diff --git a/audit-cli/main.go b/audit-cli/main.go new file mode 100644 index 0000000..8cad2e1 --- /dev/null +++ b/audit-cli/main.go @@ -0,0 +1,35 @@ +// Package main provides the entry point for the audit-cli tool. +// +// audit-cli is a command-line tool for extracting and analyzing code examples +// from MongoDB documentation written in reStructuredText (RST). +// +// The CLI is organized into parent commands with subcommands: +// - extract: Extract content from RST files +// - code-examples: Extract code examples from RST directives +// - search: Search through extracted content +// - find-string: Search for substrings in extracted files +package main + +import ( + "github.com/mongodb/code-example-tooling/audit-cli/commands/extract" + "github.com/mongodb/code-example-tooling/audit-cli/commands/search" + "github.com/spf13/cobra" +) + +func main() { + var rootCmd = &cobra.Command{ + Use: "audit-cli", + Short: "A CLI tool for extracting and analyzing code examples from MongoDB documentation", + Long: `audit-cli extracts code examples from reStructuredText files and provides +tools for searching and analyzing the extracted content. + +Supports extraction from literalinclude, code-block, and io-code-block directives, +with special handling for MongoDB documentation conventions.`, + } + + // Add parent commands + rootCmd.AddCommand(extract.NewExtractCommand()) + rootCmd.AddCommand(search.NewSearchCommand()) + + rootCmd.Execute() +} diff --git a/audit-cli/testdata/expected-output/code-block-test.code-block.1.js b/audit-cli/testdata/expected-output/code-block-test.code-block.1.js new file mode 100644 index 0000000..cfdd3ad --- /dev/null +++ b/audit-cli/testdata/expected-output/code-block-test.code-block.1.js @@ -0,0 +1,2 @@ +const greeting = "Hello, World!"; +console.log(greeting); \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/code-block-test.code-block.2.py b/audit-cli/testdata/expected-output/code-block-test.code-block.2.py new file mode 100644 index 0000000..e2cd691 --- /dev/null +++ b/audit-cli/testdata/expected-output/code-block-test.code-block.2.py @@ -0,0 +1,3 @@ +def calculate_sum(a, b): + result = a + b + return result \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/code-block-test.code-block.3.js b/audit-cli/testdata/expected-output/code-block-test.code-block.3.js new file mode 100644 index 0000000..d77da9c --- /dev/null +++ b/audit-cli/testdata/expected-output/code-block-test.code-block.3.js @@ -0,0 +1,41 @@ +[ + { + _id: ObjectId("620ad555394d47411658b5ef"), + time: ISODate("2021-03-08T09:00:00.000Z"), + price: 500, + linearFillPrice: 500, + locfPrice: 500 + }, + { + _id: ObjectId("620ad555394d47411658b5f0"), + time: ISODate("2021-03-08T10:00:00.000Z"), + linearFillPrice: 507.5, + locfPrice: 500 + }, + { + _id: ObjectId("620ad555394d47411658b5f1"), + time: ISODate("2021-03-08T11:00:00.000Z"), + price: 515, + linearFillPrice: 515, + locfPrice: 515 + }, + { + _id: ObjectId("620ad555394d47411658b5f2"), + time: ISODate("2021-03-08T12:00:00.000Z"), + linearFillPrice: 505, + locfPrice: 515 + }, + { + _id: ObjectId("620ad555394d47411658b5f3"), + time: ISODate("2021-03-08T13:00:00.000Z"), + linearFillPrice: 495, + locfPrice: 515 + }, + { + _id: ObjectId("620ad555394d47411658b5f4"), + time: ISODate("2021-03-08T14:00:00.000Z"), + price: 485, + linearFillPrice: 485, + locfPrice: 485 + } +] \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/code-block-test.code-block.4.txt b/audit-cli/testdata/expected-output/code-block-test.code-block.4.txt new file mode 100644 index 0000000..27d8210 --- /dev/null +++ b/audit-cli/testdata/expected-output/code-block-test.code-block.4.txt @@ -0,0 +1,2 @@ +This is a code block with no language specified. +It should still be extracted. \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/code-block-test.code-block.5.sh b/audit-cli/testdata/expected-output/code-block-test.code-block.5.sh new file mode 100644 index 0000000..0cb7dcb --- /dev/null +++ b/audit-cli/testdata/expected-output/code-block-test.code-block.5.sh @@ -0,0 +1,3 @@ +#!/bin/bash +echo "Hello from shell" +exit 0 \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/code-block-test.code-block.6.ts b/audit-cli/testdata/expected-output/code-block-test.code-block.6.ts new file mode 100644 index 0000000..34e7b57 --- /dev/null +++ b/audit-cli/testdata/expected-output/code-block-test.code-block.6.ts @@ -0,0 +1,4 @@ +interface User { + name: string; + age: number; +} \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/code-block-test.code-block.7.cpp b/audit-cli/testdata/expected-output/code-block-test.code-block.7.cpp new file mode 100644 index 0000000..422591f --- /dev/null +++ b/audit-cli/testdata/expected-output/code-block-test.code-block.7.cpp @@ -0,0 +1,6 @@ +#include + +int main() { + std::cout << "Hello" << std::endl; + return 0; +} \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/examples.literalinclude.1.go b/audit-cli/testdata/expected-output/examples.literalinclude.1.go new file mode 100644 index 0000000..bc4d6fa --- /dev/null +++ b/audit-cli/testdata/expected-output/examples.literalinclude.1.go @@ -0,0 +1,7 @@ +package main + +import "fmt" + +func main() { + fmt.Println("Hello from Go!") +} \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.1.input.js b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.1.input.js new file mode 100644 index 0000000..e980d86 --- /dev/null +++ b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.1.input.js @@ -0,0 +1 @@ +db.restaurants.aggregate( [ { $match: { category: "cafe" } } ] ) \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.1.output.js b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.1.output.js new file mode 100644 index 0000000..6449d1b --- /dev/null +++ b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.1.output.js @@ -0,0 +1,5 @@ +[ + { _id: 1, category: 'café', status: 'Open' }, + { _id: 2, category: 'cafe', status: 'open' }, + { _id: 3, category: 'cafE', status: 'open' } +] \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.3.input.py b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.3.input.py new file mode 100644 index 0000000..a2967ac --- /dev/null +++ b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.3.input.py @@ -0,0 +1,6 @@ +from pymongo import MongoClient +client = MongoClient('mongodb://localhost:27017') +db = client.test_database +collection = db.test_collection +result = collection.insert_one({'name': 'Alice', 'age': 30}) +print(result.inserted_id) \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.3.output.py b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.3.output.py new file mode 100644 index 0000000..2b106e5 --- /dev/null +++ b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.3.output.py @@ -0,0 +1 @@ +ObjectId('507f1f77bcf86cd799439011') \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.4.input.sh b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.4.input.sh new file mode 100644 index 0000000..f815eb7 --- /dev/null +++ b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.4.input.sh @@ -0,0 +1 @@ +mongosh --eval "db.users.find({age: {$gt: 25}})" \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.4.output.txt b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.4.output.txt new file mode 100644 index 0000000..12943f7 --- /dev/null +++ b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.4.output.txt @@ -0,0 +1,4 @@ +[ + { "_id": 1, "name": "Alice", "age": 30 }, + { "_id": 2, "name": "Bob", "age": 35 } +] \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.5.input.ts b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.5.input.ts new file mode 100644 index 0000000..d5aaa42 --- /dev/null +++ b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.5.input.ts @@ -0,0 +1,7 @@ +import { MongoClient } from 'mongodb'; + +const client = new MongoClient('mongodb://localhost:27017'); +await client.connect(); +const db = client.db('mydb'); +const result = await db.collection('users').findOne({ name: 'Alice' }); +console.log(result); \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.5.output.txt b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.5.output.txt new file mode 100644 index 0000000..4f0d8a4 --- /dev/null +++ b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.5.output.txt @@ -0,0 +1 @@ +{ "_id": 1, "name": "Alice", "age": 30, "email": "alice@example.com" } \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.6.input.js b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.6.input.js new file mode 100644 index 0000000..ea52620 --- /dev/null +++ b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.6.input.js @@ -0,0 +1 @@ +db.inventory.find({ status: "A" }) \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.6.output.js b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.6.output.js new file mode 100644 index 0000000..fc4e206 --- /dev/null +++ b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.6.output.js @@ -0,0 +1,4 @@ +[ + { _id: 1, item: "journal", status: "A" }, + { _id: 2, item: "notebook", status: "A" } +] \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.7.input.go b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.7.input.go new file mode 100644 index 0000000..b2ff291 --- /dev/null +++ b/audit-cli/testdata/expected-output/io-code-block-test.io-code-block.7.input.go @@ -0,0 +1,15 @@ +package main + +import ( + "context" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" +) + +func main() { + client, err := mongo.Connect(context.TODO(), options.Client().ApplyURI("mongodb://localhost:27017")) + if err != nil { + panic(err) + } + defer client.Disconnect(context.TODO()) +} \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.1.py b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.1.py new file mode 100644 index 0000000..ac2eb81 --- /dev/null +++ b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.1.py @@ -0,0 +1,4 @@ +def hello_world(): + """Print hello world message.""" + print("Hello, World!") + return True \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.2.go b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.2.go new file mode 100644 index 0000000..bc4d6fa --- /dev/null +++ b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.2.go @@ -0,0 +1,7 @@ +package main + +import "fmt" + +func main() { + fmt.Println("Hello from Go!") +} \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.3.js b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.3.js new file mode 100644 index 0000000..7c75f16 --- /dev/null +++ b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.3.js @@ -0,0 +1,5 @@ +function greet(name) { + return `Hello, ${name}!`; +} + +console.log(greet("World")); \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.4.php b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.4.php new file mode 100644 index 0000000..5e921e5 --- /dev/null +++ b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.4.php @@ -0,0 +1,6 @@ + 'localhost', + 'port' => 27017 +]; \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.5.rb b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.5.rb new file mode 100644 index 0000000..6201a21 --- /dev/null +++ b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.5.rb @@ -0,0 +1,10 @@ +# Ruby example +class Greeter + def initialize(name) + @name = name + end + + def greet + puts "Hello, #{@name}!" + end +end \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.6.ts b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.6.ts new file mode 100644 index 0000000..721cc1e --- /dev/null +++ b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.6.ts @@ -0,0 +1,9 @@ +// TypeScript example +interface User { + name: string; + age: number; +} + +function greetUser(user: User): string { + return `Hello, ${user.name}!`; +} \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.7.cpp b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.7.cpp new file mode 100644 index 0000000..28276a3 --- /dev/null +++ b/audit-cli/testdata/expected-output/literalinclude-test.literalinclude.7.cpp @@ -0,0 +1,8 @@ +#include +#include + +int main() { + std::string message = "Hello from C++!"; + std::cout << message << std::endl; + return 0; +} \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/nested-code-block-test.code-block.1.js b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.1.js new file mode 100644 index 0000000..05e0fa8 --- /dev/null +++ b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.1.js @@ -0,0 +1,3 @@ +const { MongoClient } = require('mongodb'); +const client = new MongoClient('mongodb://localhost:27017'); +await client.connect(); \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/nested-code-block-test.code-block.10.rb b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.10.rb new file mode 100644 index 0000000..8aeee6e --- /dev/null +++ b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.10.rb @@ -0,0 +1,2 @@ +require 'mongo' +client = Mongo::Client.new(['localhost:27017'], database: 'mydb') \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/nested-code-block-test.code-block.11.txt b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.11.txt new file mode 100644 index 0000000..509a91b --- /dev/null +++ b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.11.txt @@ -0,0 +1,9 @@ +{ + "database": { + "host": "localhost", + "port": 27017 + }, + "logging": { + "level": "info" + } +} \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/nested-code-block-test.code-block.2.js b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.2.js new file mode 100644 index 0000000..54fd99e --- /dev/null +++ b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.2.js @@ -0,0 +1,4 @@ +const db = client.db('myDatabase'); +const collection = db.collection('myCollection'); +const result = await collection.insertOne({ name: 'Alice', age: 30 }); +console.log('Inserted document:', result.insertedId); \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/nested-code-block-test.code-block.3.js b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.3.js new file mode 100644 index 0000000..a2b3626 --- /dev/null +++ b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.3.js @@ -0,0 +1,2 @@ +const doc = await collection.findOne({ name: 'Alice' }); +console.log('Found document:', doc); \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/nested-code-block-test.code-block.4.py b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.4.py new file mode 100644 index 0000000..0bfccdb --- /dev/null +++ b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.4.py @@ -0,0 +1,5 @@ +client = MongoClient('mongodb://localhost:27017') +session = client.start_session() +with session.start_transaction(): + collection.insert_one({'x': 1}, session=session) + collection.update_one({'x': 1}, {'$set': {'y': 2}}, session=session) \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/nested-code-block-test.code-block.5.go b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.5.go new file mode 100644 index 0000000..8776f29 --- /dev/null +++ b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.5.go @@ -0,0 +1,9 @@ +func validateInput(input string) error { + if len(input) == 0 { + return errors.New("input cannot be empty") + } + if len(input) > 100 { + return errors.New("input too long") + } + return nil +} \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/nested-code-block-test.code-block.6.ts b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.6.ts new file mode 100644 index 0000000..2867d5e --- /dev/null +++ b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.6.ts @@ -0,0 +1,9 @@ +interface Config { + host: string; + port: number; +} + +const config: Config = { + host: 'localhost', + port: 27017 +}; \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/nested-code-block-test.code-block.7.ts b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.7.ts new file mode 100644 index 0000000..2bcabc6 --- /dev/null +++ b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.7.ts @@ -0,0 +1,5 @@ +import { MongoClient } from 'mongodb'; + +const client = new MongoClient(`mongodb://${config.host}:${config.port}`); +await client.connect(); +console.log('Connected successfully'); \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/nested-code-block-test.code-block.8.sh b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.8.sh new file mode 100644 index 0000000..4a7f92e --- /dev/null +++ b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.8.sh @@ -0,0 +1,3 @@ +# This is insecure! +chmod 777 /var/lib/mongodb +chown nobody:nobody /var/lib/mongodb \ No newline at end of file diff --git a/audit-cli/testdata/expected-output/nested-code-block-test.code-block.9.rb b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.9.rb new file mode 100644 index 0000000..71132c3 --- /dev/null +++ b/audit-cli/testdata/expected-output/nested-code-block-test.code-block.9.rb @@ -0,0 +1,2 @@ +require 'mongo' +client = Mongo::Client.new('mongodb://localhost:27017/mydb') \ No newline at end of file diff --git a/audit-cli/testdata/input-files/source/code-block-test.rst b/audit-cli/testdata/input-files/source/code-block-test.rst new file mode 100644 index 0000000..d655fed --- /dev/null +++ b/audit-cli/testdata/input-files/source/code-block-test.rst @@ -0,0 +1,112 @@ +Code Block Test +=============== + +This file tests various code-block directive scenarios. + +JavaScript with Language +------------------------ + +.. code-block:: javascript + + const greeting = "Hello, World!"; + console.log(greeting); + +Python with Options +------------------- + +.. code-block:: python + :copyable: false + :emphasize-lines: 2,3 + + def calculate_sum(a, b): + result = a + b + return result + +JSON Array Example +------------------ + +.. code-block:: javascript + :copyable: false + :emphasize-lines: 12,13,25,26,31,32 + + [ + { + _id: ObjectId("620ad555394d47411658b5ef"), + time: ISODate("2021-03-08T09:00:00.000Z"), + price: 500, + linearFillPrice: 500, + locfPrice: 500 + }, + { + _id: ObjectId("620ad555394d47411658b5f0"), + time: ISODate("2021-03-08T10:00:00.000Z"), + linearFillPrice: 507.5, + locfPrice: 500 + }, + { + _id: ObjectId("620ad555394d47411658b5f1"), + time: ISODate("2021-03-08T11:00:00.000Z"), + price: 515, + linearFillPrice: 515, + locfPrice: 515 + }, + { + _id: ObjectId("620ad555394d47411658b5f2"), + time: ISODate("2021-03-08T12:00:00.000Z"), + linearFillPrice: 505, + locfPrice: 515 + }, + { + _id: ObjectId("620ad555394d47411658b5f3"), + time: ISODate("2021-03-08T13:00:00.000Z"), + linearFillPrice: 495, + locfPrice: 515 + }, + { + _id: ObjectId("620ad555394d47411658b5f4"), + time: ISODate("2021-03-08T14:00:00.000Z"), + price: 485, + linearFillPrice: 485, + locfPrice: 485 + } + ] + +Code Block with No Language +---------------------------- + +.. code-block:: + + This is a code block with no language specified. + It should still be extracted. + +Shell Script +------------ + +.. code-block:: sh + + #!/bin/bash + echo "Hello from shell" + exit 0 + +TypeScript Normalization +------------------------ + +.. code-block:: ts + + interface User { + name: string; + age: number; + } + +C++ Normalization +----------------- + +.. code-block:: c++ + + #include + + int main() { + std::cout << "Hello" << std::endl; + return 0; + } + diff --git a/audit-cli/testdata/input-files/source/code-examples/example.cpp b/audit-cli/testdata/input-files/source/code-examples/example.cpp new file mode 100644 index 0000000..15f17b0 --- /dev/null +++ b/audit-cli/testdata/input-files/source/code-examples/example.cpp @@ -0,0 +1,9 @@ +#include +#include + +int main() { + std::string message = "Hello from C++!"; + std::cout << message << std::endl; + return 0; +} + diff --git a/audit-cli/testdata/input-files/source/code-examples/example.go b/audit-cli/testdata/input-files/source/code-examples/example.go new file mode 100644 index 0000000..6c4129b --- /dev/null +++ b/audit-cli/testdata/input-files/source/code-examples/example.go @@ -0,0 +1,8 @@ +package main + +import "fmt" + +func main() { + fmt.Println("Hello from Go!") +} + diff --git a/audit-cli/testdata/input-files/source/code-examples/example.js b/audit-cli/testdata/input-files/source/code-examples/example.js new file mode 100644 index 0000000..eb4f156 --- /dev/null +++ b/audit-cli/testdata/input-files/source/code-examples/example.js @@ -0,0 +1,10 @@ +// JavaScript example +console.log("Before function"); + +// start-greet +function greet(name) { + return `Hello, ${name}!`; +} + +console.log(greet("World")); + diff --git a/audit-cli/testdata/input-files/source/code-examples/example.php b/audit-cli/testdata/input-files/source/code-examples/example.php new file mode 100644 index 0000000..1763a58 --- /dev/null +++ b/audit-cli/testdata/input-files/source/code-examples/example.php @@ -0,0 +1,12 @@ + 'localhost', + 'port' => 27017 +]; +// end-init + +function connect($config) { + return new MongoDB\Client("mongodb://{$config['host']}:{$config['port']}"); +} + diff --git a/audit-cli/testdata/input-files/source/code-examples/example.py b/audit-cli/testdata/input-files/source/code-examples/example.py new file mode 100644 index 0000000..f932099 --- /dev/null +++ b/audit-cli/testdata/input-files/source/code-examples/example.py @@ -0,0 +1,16 @@ +# Python example file +import sys + +# start-hello + def hello_world(): + """Print hello world message.""" + print("Hello, World!") + return True +# end-hello + +def main(): + hello_world() + +if __name__ == "__main__": + main() + diff --git a/audit-cli/testdata/input-files/source/code-examples/example.rb b/audit-cli/testdata/input-files/source/code-examples/example.rb new file mode 100644 index 0000000..8374655 --- /dev/null +++ b/audit-cli/testdata/input-files/source/code-examples/example.rb @@ -0,0 +1,11 @@ + # Ruby example + class Greeter + def initialize(name) + @name = name + end + + def greet + puts "Hello, #{@name}!" + end + end + diff --git a/audit-cli/testdata/input-files/source/code-examples/example.ts b/audit-cli/testdata/input-files/source/code-examples/example.ts new file mode 100644 index 0000000..047b1ee --- /dev/null +++ b/audit-cli/testdata/input-files/source/code-examples/example.ts @@ -0,0 +1,10 @@ +// TypeScript example +interface User { + name: string; + age: number; +} + +function greetUser(user: User): string { + return `Hello, ${user.name}!`; +} + diff --git a/audit-cli/testdata/input-files/source/include-test.rst b/audit-cli/testdata/input-files/source/include-test.rst new file mode 100644 index 0000000..1d352cf --- /dev/null +++ b/audit-cli/testdata/input-files/source/include-test.rst @@ -0,0 +1,14 @@ +Include Directive Test +====================== + +This file tests include directive following. + +.. include:: /includes/intro.rst + +Main Content +------------ + +Some main content here. + +.. include:: /includes/examples.rst + diff --git a/audit-cli/testdata/input-files/source/includes/examples.rst b/audit-cli/testdata/input-files/source/includes/examples.rst new file mode 100644 index 0000000..96b554e --- /dev/null +++ b/audit-cli/testdata/input-files/source/includes/examples.rst @@ -0,0 +1,8 @@ +Examples +-------- + +Here's a simple example: + +.. literalinclude:: /code-examples/example.go + :language: golang + diff --git a/audit-cli/testdata/input-files/source/includes/intro.rst b/audit-cli/testdata/input-files/source/includes/intro.rst new file mode 100644 index 0000000..4bed946 --- /dev/null +++ b/audit-cli/testdata/input-files/source/includes/intro.rst @@ -0,0 +1,5 @@ +Introduction +------------ + +This is an included introduction section. + diff --git a/audit-cli/testdata/input-files/source/io-code-block-test.rst b/audit-cli/testdata/input-files/source/io-code-block-test.rst new file mode 100644 index 0000000..52402f7 --- /dev/null +++ b/audit-cli/testdata/input-files/source/io-code-block-test.rst @@ -0,0 +1,146 @@ +========================== +IO Code Block Test +========================== + +This file tests io-code-block directives with input and output sub-directives. + +Test 1: Inline Input and Output +================================= + +.. io-code-block:: + :copyable: true + + .. input:: + :language: javascript + + db.restaurants.aggregate( [ { $match: { category: "cafe" } } ] ) + + .. output:: + :language: javascript + + [ + { _id: 1, category: 'café', status: 'Open' }, + { _id: 2, category: 'cafe', status: 'open' }, + { _id: 3, category: 'cafE', status: 'open' } + ] + +Test 2: File-based Input and Output +===================================== + +.. io-code-block:: + + .. input:: /code-examples/example.js + :language: javascript + + .. output:: /code-examples/example-output.txt + :language: text + +Test 3: Python Example with Inline Code +========================================= + +.. io-code-block:: + + .. input:: + :language: python + + from pymongo import MongoClient + client = MongoClient('mongodb://localhost:27017') + db = client.test_database + collection = db.test_collection + result = collection.insert_one({'name': 'Alice', 'age': 30}) + print(result.inserted_id) + + .. output:: + :language: python + + ObjectId('507f1f77bcf86cd799439011') + +Test 4: Shell Command Example +=============================== + +.. io-code-block:: + :copyable: true + + .. input:: + :language: sh + + mongosh --eval "db.users.find({age: {$gt: 25}})" + + .. output:: + :language: json + + [ + { "_id": 1, "name": "Alice", "age": 30 }, + { "_id": 2, "name": "Bob", "age": 35 } + ] + +Test 5: TypeScript Example +============================ + +.. io-code-block:: + + .. input:: + :language: ts + + import { MongoClient } from 'mongodb'; + + const client = new MongoClient('mongodb://localhost:27017'); + await client.connect(); + const db = client.db('mydb'); + const result = await db.collection('users').findOne({ name: 'Alice' }); + console.log(result); + + .. output:: + :language: json + + { "_id": 1, "name": "Alice", "age": 30, "email": "alice@example.com" } + +Test 6: Nested Inside Procedure Step +====================================== + +.. procedure:: + + .. step:: Query the database + + Run the following query: + + .. io-code-block:: + :copyable: true + + .. input:: + :language: javascript + + db.inventory.find({ status: "A" }) + + .. output:: + :language: javascript + + [ + { _id: 1, item: "journal", status: "A" }, + { _id: 2, item: "notebook", status: "A" } + ] + +Test 7: Input Only (No Output) +================================ + +.. io-code-block:: + + .. input:: + :language: go + + package main + + import ( + "context" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + ) + + func main() { + client, err := mongo.Connect(context.TODO(), options.Client().ApplyURI("mongodb://localhost:27017")) + if err != nil { + panic(err) + } + defer client.Disconnect(context.TODO()) + } + diff --git a/audit-cli/testdata/input-files/source/literalinclude-test.rst b/audit-cli/testdata/input-files/source/literalinclude-test.rst new file mode 100644 index 0000000..d6c4e44 --- /dev/null +++ b/audit-cli/testdata/input-files/source/literalinclude-test.rst @@ -0,0 +1,53 @@ +Literalinclude Test +=================== + +This file tests various literalinclude directive scenarios. + +Python with start-after and end-before +--------------------------------------- + +.. literalinclude:: /code-examples/example.py + :language: python + :start-after: start-hello + :end-before: end-hello + :dedent: + +Go full file +------------ + +.. literalinclude:: /code-examples/example.go + :language: go + +JavaScript with start-after only +--------------------------------- + +.. literalinclude:: /code-examples/example.js + :language: javascript + :start-after: start-greet + +PHP with end-before only +------------------------- + +.. literalinclude:: /code-examples/example.php + :language: php + :end-before: end-init + +Ruby with dedent +---------------- + +.. literalinclude:: /code-examples/example.rb + :language: ruby + :dedent: + +TypeScript language normalization +---------------------------------- + +.. literalinclude:: /code-examples/example.ts + :language: ts + +C++ language normalization +--------------------------- + +.. literalinclude:: /code-examples/example.cpp + :language: c++ + diff --git a/audit-cli/testdata/input-files/source/nested-code-block-test.rst b/audit-cli/testdata/input-files/source/nested-code-block-test.rst new file mode 100644 index 0000000..32f8ba2 --- /dev/null +++ b/audit-cli/testdata/input-files/source/nested-code-block-test.rst @@ -0,0 +1,167 @@ +========================== +Nested Code Block Test +========================== + +This file tests code-block directives that are nested inside other directives. + +Test 1: Code Block Inside Procedure Step +========================================== + +.. procedure:: + :style: normal + + .. step:: Create a database connection + + First, establish a connection to the database: + + .. code-block:: javascript + :copyable: true + + const { MongoClient } = require('mongodb'); + const client = new MongoClient('mongodb://localhost:27017'); + await client.connect(); + + .. step:: Insert a document + + Next, insert a document into the collection: + + .. code-block:: javascript + :copyable: true + + const db = client.db('myDatabase'); + const collection = db.collection('myCollection'); + const result = await collection.insertOne({ name: 'Alice', age: 30 }); + console.log('Inserted document:', result.insertedId); + + .. step:: Query the document + + Finally, query the document you just inserted: + + .. code-block:: javascript + + const doc = await collection.findOne({ name: 'Alice' }); + console.log('Found document:', doc); + +Test 2: Code Block Inside Note Directive +========================================== + +.. note:: + + When using transactions, you must use a session: + + .. code-block:: python + :emphasize-lines: 2,3 + + client = MongoClient('mongodb://localhost:27017') + session = client.start_session() + with session.start_transaction(): + collection.insert_one({'x': 1}, session=session) + collection.update_one({'x': 1}, {'$set': {'y': 2}}, session=session) + +Test 3: Code Block Inside Important Directive +=============================================== + +.. important:: + + Always validate user input before processing: + + .. code-block:: go + + func validateInput(input string) error { + if len(input) == 0 { + return errors.New("input cannot be empty") + } + if len(input) > 100 { + return errors.New("input too long") + } + return nil + } + +Test 4: Deeply Nested Code Block +================================== + +.. container:: example + + .. admonition:: Example: Multi-step Process + + This example shows a multi-step process: + + .. procedure:: + + .. step:: Initialize the system + + .. code-block:: typescript + + interface Config { + host: string; + port: number; + } + + const config: Config = { + host: 'localhost', + port: 27017 + }; + + .. step:: Connect to the database + + .. code-block:: typescript + + import { MongoClient } from 'mongodb'; + + const client = new MongoClient(`mongodb://${config.host}:${config.port}`); + await client.connect(); + console.log('Connected successfully'); + +Test 5: Code Block Inside Warning +=================================== + +.. warning:: + + Do not use this pattern in production: + + .. code-block:: sh + + # This is insecure! + chmod 777 /var/lib/mongodb + chown nobody:nobody /var/lib/mongodb + +Test 6: Multiple Code Blocks in Same Parent +============================================= + +.. tip:: + + You can use either syntax for connecting: + + **Option 1: Connection String** + + .. code-block:: ruby + + require 'mongo' + client = Mongo::Client.new('mongodb://localhost:27017/mydb') + + **Option 2: Hash Options** + + .. code-block:: ruby + + require 'mongo' + client = Mongo::Client.new(['localhost:27017'], database: 'mydb') + +Test 7: Code Block with No Language Inside Directive +====================================================== + +.. note:: + + Here's a sample configuration file: + + .. code-block:: + + { + "database": { + "host": "localhost", + "port": 27017 + }, + "logging": { + "level": "info" + } + } + From 99e316fc9801ab8f2ededc6fb98205096eb851d5 Mon Sep 17 00:00:00 2001 From: dacharyc Date: Fri, 17 Oct 2025 16:58:13 -0400 Subject: [PATCH 2/7] Add new CLI tool to the parent README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index b4921a9..ee5c46d 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ corpus. database in Atlas. - `dodec`, or the Database of Devoured Example Code: a query tool that lets us find code examples and related metadata in the database for reporting or to perform manual updates. +- `audit-cli`: A Go CLI project to help us audit docs content from files on the local filesystem. - `examples-copier`: a Go app that runs as a GitHub App and copies files from the source code repo (generated code examples) to multiple target repos and branches. - `github-check-releases`: a Node.js script that gets the latest release versions From e37a21bb2da9f133d9c3938d956fc0008a858d85 Mon Sep 17 00:00:00 2001 From: dacharyc Date: Fri, 17 Oct 2025 17:29:38 -0400 Subject: [PATCH 3/7] Add a --follow-includes flag for the search find-string command --- audit-cli/README.md | 9 ++- .../search/find-string/find_string.go | 80 +++++++++++++++---- .../commands/search/find-string/report.go | 2 +- 3 files changed, 72 insertions(+), 19 deletions(-) diff --git a/audit-cli/README.md b/audit-cli/README.md index eaeb37a..f2e80e2 100644 --- a/audit-cli/README.md +++ b/audit-cli/README.md @@ -116,7 +116,7 @@ After extraction, a report is displayed showing: #### `search find-string` -Search through extracted code example files for a specific substring. +Search through files for a specific substring. Can search through extracted code example files or RST source files. **Basic Usage:** @@ -130,6 +130,12 @@ Search through extracted code example files for a specific substring. # Search recursively ./audit-cli search find-string path/to/output "substring" -r +# Search an RST file and all files it includes +./audit-cli search find-string path/to/source.rst "substring" -f + +# Search a directory recursively and follow includes in RST files +./audit-cli search find-string path/to/source "substring" -r -f + # Verbose output (show file paths and language breakdown) ./audit-cli search find-string path/to/output "substring" -r -v ``` @@ -137,6 +143,7 @@ Search through extracted code example files for a specific substring. **Flags:** - `-r, --recursive` - Recursively search all files in subdirectories +- `-f, --follow-includes` - Follow `.. include::` directives in RST files - `-v, --verbose` - Show file paths and language breakdown **Report:** diff --git a/audit-cli/commands/search/find-string/find_string.go b/audit-cli/commands/search/find-string/find_string.go index 8fa3569..63d6b83 100644 --- a/audit-cli/commands/search/find-string/find_string.go +++ b/audit-cli/commands/search/find-string/find_string.go @@ -8,6 +8,7 @@ // // Supports: // - Recursive directory scanning +// - Following include directives in RST files // - Verbose output with file paths and language breakdown // - Language detection based on file extension package find_string @@ -18,21 +19,24 @@ import ( "path/filepath" "strings" + "github.com/mongodb/code-example-tooling/audit-cli/internal/rst" "github.com/spf13/cobra" ) // NewFindStringCommand creates the find-string subcommand. // // This command searches through extracted code example files for a specific substring. -// Supports flags for recursive search and verbose output. +// Supports flags for recursive search, following includes, and verbose output. // // Flags: // - -r, --recursive: Recursively search all files in subdirectories +// - -f, --follow-includes: Follow .. include:: directives in RST files // - -v, --verbose: Show file paths and language breakdown func NewFindStringCommand() *cobra.Command { var ( - recursive bool - verbose bool + recursive bool + followIncludes bool + verbose bool ) cmd := &cobra.Command{ @@ -44,11 +48,12 @@ Reports the number of code examples containing the substring.`, RunE: func(cmd *cobra.Command, args []string) error { filePath := args[0] substring := args[1] - return runSearch(filePath, substring, recursive, verbose) + return runSearch(filePath, substring, recursive, followIncludes, verbose) }, } cmd.Flags().BoolVarP(&recursive, "recursive", "r", false, "Recursively search all files in subdirectories") + cmd.Flags().BoolVarP(&followIncludes, "follow-includes", "f", false, "Follow .. include:: directives in RST files") cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Provide additional information during execution") return cmd @@ -63,26 +68,27 @@ Reports the number of code examples containing the substring.`, // - filePath: Path to file or directory to search // - substring: The substring to search for (case-sensitive) // - recursive: If true, recursively search subdirectories +// - followIncludes: If true, follow .. include:: directives // - verbose: If true, show detailed information during search // // Returns: // - *SearchReport: Statistics about the search operation // - error: Any error encountered during search -func RunSearch(filePath string, substring string, recursive bool, verbose bool) (*SearchReport, error) { - return runSearchInternal(filePath, substring, recursive, verbose) +func RunSearch(filePath string, substring string, recursive bool, followIncludes bool, verbose bool) (*SearchReport, error) { + return runSearchInternal(filePath, substring, recursive, followIncludes, verbose) } // runSearch executes the search operation (internal wrapper for CLI). // // This is a thin wrapper around runSearchInternal that discards the report // and only returns errors, suitable for use in the CLI command handler. -func runSearch(filePath string, substring string, recursive bool, verbose bool) error { - _, err := runSearchInternal(filePath, substring, recursive, verbose) +func runSearch(filePath string, substring string, recursive bool, followIncludes bool, verbose bool) error { + _, err := runSearchInternal(filePath, substring, recursive, followIncludes, verbose) return err } // runSearchInternal contains the core logic for the search-code-examples command -func runSearchInternal(filePath string, substring string, recursive bool, verbose bool) (*SearchReport, error) { +func runSearchInternal(filePath string, substring string, recursive bool, followIncludes bool, verbose bool) (*SearchReport, error) { fileInfo, err := os.Stat(filePath) if err != nil { return nil, fmt.Errorf("failed to access path %s: %w", filePath, err) @@ -106,24 +112,46 @@ func runSearchInternal(filePath string, substring string, recursive bool, verbos if verbose { fmt.Printf("Found %d files to search\n", len(filesToSearch)) - fmt.Printf("Searching for substring: %q\n\n", substring) + fmt.Printf("Searching for substring: %q\n", substring) + fmt.Printf("Follow includes: %v\n\n", followIncludes) } + // Track visited files to prevent circular includes + visited := make(map[string]bool) + for _, file := range filesToSearch { if verbose { fmt.Printf("Searching: %s\n", file) } - result, err := searchFile(file, substring) - if err != nil { - fmt.Fprintf(os.Stderr, "Warning: failed to search %s: %v\n", file, err) - continue + // If followIncludes is enabled, collect all files including those referenced by includes + var filesToSearchWithIncludes []string + if followIncludes { + // Use ParseFileWithIncludes to get all files (main + includes) + processedFiles, err := collectFilesWithIncludes(file, visited, verbose) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to follow includes for %s: %v\n", file, err) + filesToSearchWithIncludes = []string{file} + } else { + filesToSearchWithIncludes = processedFiles + } + } else { + filesToSearchWithIncludes = []string{file} } - report.AddResult(result) + // Search all collected files + for _, fileToSearch := range filesToSearchWithIncludes { + result, err := searchFile(fileToSearch, substring) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to search %s: %v\n", fileToSearch, err) + continue + } + + report.AddResult(result) - if verbose && result.Contains { - fmt.Printf(" ✓ Found substring in %s\n", file) + if verbose && result.Contains { + fmt.Printf(" ✓ Found substring in %s\n", fileToSearch) + } } } @@ -164,6 +192,24 @@ func collectFiles(dirPath string, recursive bool) ([]string, error) { return files, nil } +// collectFilesWithIncludes collects a file and all files it includes via .. include:: directives +func collectFilesWithIncludes(filePath string, visited map[string]bool, verbose bool) ([]string, error) { + // Use the RST package's ParseFileWithIncludes to get all files + // We pass a no-op parseFunc since we just want the list of files + processedFiles, err := rst.ParseFileWithIncludes( + filePath, + true, // followIncludes = true + visited, + verbose, + nil, // no-op parseFunc + ) + if err != nil { + return nil, err + } + + return processedFiles, nil +} + // searchFile searches a single file for the substring func searchFile(filePath string, substring string) (SearchResult, error) { result := SearchResult{ diff --git a/audit-cli/commands/search/find-string/report.go b/audit-cli/commands/search/find-string/report.go index c620d79..4a4a70c 100644 --- a/audit-cli/commands/search/find-string/report.go +++ b/audit-cli/commands/search/find-string/report.go @@ -19,7 +19,7 @@ import ( // - verbose: If true, show detailed breakdown including file paths and language counts func PrintReport(report *SearchReport, verbose bool) { fmt.Println("\n" + strings.Repeat("=", 60)) - fmt.Println("CODE SEARCH REPORT") + fmt.Println("SEARCH REPORT") fmt.Println(strings.Repeat("=", 60)) fmt.Printf("\nFiles Scanned: %d\n", report.FilesScanned) From c1884d1d1dacf82486f0470145c16fb3eaa24d7d Mon Sep 17 00:00:00 2001 From: dacharyc Date: Fri, 17 Oct 2025 17:54:25 -0400 Subject: [PATCH 4/7] Add a command to visit include paths and output either a list or tree structure from the start file --- audit-cli/README.md | 99 ++++++++-- audit-cli/commands/analyze/analyze.go | 34 ++++ .../commands/analyze/includes/analyzer.go | 169 ++++++++++++++++++ .../commands/analyze/includes/includes.go | 100 +++++++++++ audit-cli/commands/analyze/includes/output.go | 116 ++++++++++++ audit-cli/commands/analyze/includes/types.go | 23 +++ .../code-examples/code_examples_test.go | 7 +- audit-cli/main.go | 4 + .../source/duplicate-include-test.rst | 19 ++ .../source/includes/nested-include.rst | 7 + .../source/nested-include-test.rst | 14 ++ 11 files changed, 576 insertions(+), 16 deletions(-) create mode 100644 audit-cli/commands/analyze/analyze.go create mode 100644 audit-cli/commands/analyze/includes/analyzer.go create mode 100644 audit-cli/commands/analyze/includes/includes.go create mode 100644 audit-cli/commands/analyze/includes/output.go create mode 100644 audit-cli/commands/analyze/includes/types.go create mode 100644 audit-cli/testdata/input-files/source/duplicate-include-test.rst create mode 100644 audit-cli/testdata/input-files/source/includes/nested-include.rst create mode 100644 audit-cli/testdata/input-files/source/nested-include-test.rst diff --git a/audit-cli/README.md b/audit-cli/README.md index f2e80e2..10c20dd 100644 --- a/audit-cli/README.md +++ b/audit-cli/README.md @@ -9,6 +9,7 @@ A Go CLI tool for extracting and analyzing code examples from MongoDB documentat - [Usage](#usage) - [Extract Commands](#extract-commands) - [Search Commands](#search-commands) + - [Analyze Commands](#analyze-commands) - [Development](#development) - [Project Structure](#project-structure) - [Adding New Commands](#adding-new-commands) @@ -22,8 +23,9 @@ This CLI tool helps maintain code quality across MongoDB's documentation by: 1. **Extracting code examples** from RST files into individual, testable files 2. **Searching extracted code** for specific patterns or substrings -3. **Following include directives** to process entire documentation trees -4. **Handling MongoDB-specific conventions** like steps files, extracts, and template variables +3. **Analyzing include relationships** to understand file dependencies +4. **Following include directives** to process entire documentation trees +5. **Handling MongoDB-specific conventions** like steps files, extracts, and template variables ## Installation @@ -51,8 +53,10 @@ The CLI is organized into parent commands with subcommands: audit-cli ├── extract # Extract content from RST files │ └── code-examples -└── search # Search through extracted content - └── find-string +├── search # Search through extracted content +│ └── find-string +└── analyze # Analyze RST file structures + └── includes ``` ### Extract Commands @@ -156,6 +160,67 @@ With `-v` flag, also shows: - List of file paths where substring appears - Count broken down by language (file extension) +### Analyze Commands + +#### `analyze includes` + +Analyze include directive relationships in RST files to understand file dependencies. + +**Basic Usage:** + +```bash +# Analyze a single file (shows summary) +./audit-cli analyze includes path/to/file.rst + +# Show hierarchical tree structure +./audit-cli analyze includes path/to/file.rst --tree + +# Show flat list of all included files +./audit-cli analyze includes path/to/file.rst --list + +# Show both tree and list +./audit-cli analyze includes path/to/file.rst --tree --list + +# Verbose output (show processing details) +./audit-cli analyze includes path/to/file.rst --tree -v +``` + +**Flags:** + +- `--tree` - Display results as a hierarchical tree structure +- `--list` - Display results as a flat list of all files +- `-v, --verbose` - Show detailed processing information + +**Output Formats:** + +**Summary** (default - no flags): +- Root file path +- Total number of files +- Maximum depth of include nesting +- Hints to use --tree or --list for more details + +**Tree** (--tree flag): +- Hierarchical tree structure showing include relationships +- Uses box-drawing characters for visual clarity +- Shows which files include which other files + +**List** (--list flag): +- Flat numbered list of all files +- Files listed in depth-first traversal order +- Shows absolute paths to all files + +**Use Cases:** + +This command helps writers: +- Understand the impact of changes to widely-included files +- Identify circular include dependencies (files included multiple times) +- Document file relationships for maintenance +- Plan refactoring of complex include structures + +**Note on File Counting:** + +The total file count represents **unique files** discovered through include directives. If a file is included multiple times (e.g., file A includes file C, and file B also includes file C), it is counted only once in the total. However, the tree view will show it in all locations where it appears, with subsequent occurrences marked as circular includes in verbose mode. + ## Development ### Project Structure @@ -174,17 +239,25 @@ audit-cli/ │ │ ├── report.go # Report generation │ │ ├── types.go # Type definitions │ │ └── language.go # Language normalization -│ └── search/ # Search parent command -│ ├── search.go # Parent command definition -│ └── find-string/ # Find string subcommand -│ ├── find_string.go # Command logic -│ ├── types.go # Type definitions -│ └── report.go # Report generation +│ ├── search/ # Search parent command +│ │ ├── search.go # Parent command definition +│ │ └── find-string/ # Find string subcommand +│ │ ├── find_string.go # Command logic +│ │ ├── types.go # Type definitions +│ │ └── report.go # Report generation +│ └── analyze/ # Analyze parent command +│ ├── analyze.go # Parent command definition +│ └── includes/ # Includes analysis subcommand +│ ├── includes.go # Command logic +│ ├── analyzer.go # Include tree building +│ ├── output.go # Output formatting +│ └── types.go # Type definitions ├── internal/ # Internal packages │ └── rst/ # RST parsing utilities -│ ├── include.go # Include directive resolution -│ ├── traverse.go # Directory traversal -│ └── directive.go # Directive parsing +│ ├── parser.go # Generic parsing with includes +│ ├── include_resolver.go # Include directive resolution +│ ├── directive_parser.go # Directive parsing +│ └── file_utils.go # File utilities └── testdata/ # Test fixtures ├── input-files/ # Test RST files │ └── source/ # Source directory (required) diff --git a/audit-cli/commands/analyze/analyze.go b/audit-cli/commands/analyze/analyze.go new file mode 100644 index 0000000..dd4f9a8 --- /dev/null +++ b/audit-cli/commands/analyze/analyze.go @@ -0,0 +1,34 @@ +// Package analyze provides the parent command for analyzing RST file structures. +// +// This package serves as the parent command for various analysis operations. +// Currently supports: +// - includes: Analyze include directive relationships in RST files +// +// Future subcommands could include analyzing cross-references, broken links, or content metrics. +package analyze + +import ( + "github.com/mongodb/code-example-tooling/audit-cli/commands/analyze/includes" + "github.com/spf13/cobra" +) + +// NewAnalyzeCommand creates the analyze parent command. +// +// This command serves as a parent for various analysis operations on RST files. +// It doesn't perform any operations itself but provides a namespace for subcommands. +func NewAnalyzeCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "analyze", + Short: "Analyze reStructuredText file structures", + Long: `Analyze various aspects of reStructuredText files and their relationships. + +Currently supports analyzing include directive relationships to understand file dependencies. +Future subcommands may support analyzing cross-references, broken links, or content metrics.`, + } + + // Add subcommands + cmd.AddCommand(includes.NewIncludesCommand()) + + return cmd +} + diff --git a/audit-cli/commands/analyze/includes/analyzer.go b/audit-cli/commands/analyze/includes/analyzer.go new file mode 100644 index 0000000..52c1ff1 --- /dev/null +++ b/audit-cli/commands/analyze/includes/analyzer.go @@ -0,0 +1,169 @@ +package includes + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/mongodb/code-example-tooling/audit-cli/internal/rst" +) + +// AnalyzeIncludes analyzes a file and builds a tree of include relationships. +// +// This function recursively follows include directives and builds both a tree structure +// and a flat list of all files discovered. It tracks the maximum depth of nesting. +// +// Parameters: +// - filePath: Path to the RST file to analyze +// - verbose: If true, print detailed processing information +// +// Returns: +// - *IncludeAnalysis: Analysis results including tree and file list +// - error: Any error encountered during analysis +func AnalyzeIncludes(filePath string, verbose bool) (*IncludeAnalysis, error) { + absPath, err := filepath.Abs(filePath) + if err != nil { + return nil, fmt.Errorf("failed to get absolute path: %w", err) + } + + // Verify the file exists + if _, err := os.Stat(absPath); err != nil { + return nil, fmt.Errorf("file not found: %s", absPath) + } + + if verbose { + fmt.Printf("Analyzing includes for: %s\n\n", absPath) + } + + // Build the tree structure + visited := make(map[string]bool) + tree, err := buildIncludeTree(absPath, visited, verbose, 0) + if err != nil { + return nil, err + } + + // Collect all unique files from the visited map + // The visited map contains all unique files that were processed + allFiles := make([]string, 0, len(visited)) + for file := range visited { + allFiles = append(allFiles, file) + } + + // Calculate max depth + maxDepth := calculateMaxDepth(tree, 0) + + analysis := &IncludeAnalysis{ + RootFile: absPath, + Tree: tree, + AllFiles: allFiles, + TotalFiles: len(allFiles), + MaxDepth: maxDepth, + } + + return analysis, nil +} + +// buildIncludeTree recursively builds a tree of include relationships. +// +// This function creates an IncludeNode for the given file and recursively +// processes all files it includes, preventing circular includes. +// +// Parameters: +// - filePath: Path to the file to process +// - visited: Map tracking already-processed files (prevents circular includes) +// - verbose: If true, print detailed processing information +// - depth: Current depth in the tree (for verbose output) +// +// Returns: +// - *IncludeNode: Tree node representing this file and its includes +// - error: Any error encountered during processing +func buildIncludeTree(filePath string, visited map[string]bool, verbose bool, depth int) (*IncludeNode, error) { + absPath, err := filepath.Abs(filePath) + if err != nil { + return nil, err + } + + // Create the node for this file + node := &IncludeNode{ + FilePath: absPath, + Children: []*IncludeNode{}, + } + + // Check if we've already visited this file (circular include) + if visited[absPath] { + if verbose { + indent := getIndent(depth) + fmt.Printf("%s⚠ Circular include detected: %s\n", indent, filepath.Base(absPath)) + } + return node, nil + } + visited[absPath] = true + + // Find include directives in this file + includeFiles, err := rst.FindIncludeDirectives(absPath) + if err != nil { + // Not a fatal error - file might not have includes + return node, nil + } + + if verbose && len(includeFiles) > 0 { + indent := getIndent(depth) + fmt.Printf("%s📄 %s (%d includes)\n", indent, filepath.Base(absPath), len(includeFiles)) + } + + // Recursively process each included file + for _, includeFile := range includeFiles { + childNode, err := buildIncludeTree(includeFile, visited, verbose, depth+1) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to process include %s: %v\n", includeFile, err) + continue + } + node.Children = append(node.Children, childNode) + } + + return node, nil +} + +// calculateMaxDepth calculates the maximum depth of the include tree. +// +// This function recursively traverses the tree to find the deepest nesting level. +// +// Parameters: +// - node: Current node being processed +// - currentDepth: Depth of the current node +// +// Returns: +// - int: Maximum depth found in the tree +func calculateMaxDepth(node *IncludeNode, currentDepth int) int { + if node == nil || len(node.Children) == 0 { + return currentDepth + } + + maxChildDepth := currentDepth + for _, child := range node.Children { + childDepth := calculateMaxDepth(child, currentDepth+1) + if childDepth > maxChildDepth { + maxChildDepth = childDepth + } + } + + return maxChildDepth +} + +// getIndent returns an indentation string for the given depth level. +// +// This is used for verbose output to show the tree structure. +// +// Parameters: +// - depth: Nesting depth level +// +// Returns: +// - string: Indentation string (2 spaces per level) +func getIndent(depth int) string { + indent := "" + for i := 0; i < depth; i++ { + indent += " " + } + return indent +} + diff --git a/audit-cli/commands/analyze/includes/includes.go b/audit-cli/commands/analyze/includes/includes.go new file mode 100644 index 0000000..bb0d5f6 --- /dev/null +++ b/audit-cli/commands/analyze/includes/includes.go @@ -0,0 +1,100 @@ +// Package includes provides functionality for analyzing include directive relationships. +// +// This package implements the "analyze includes" subcommand, which analyzes RST files +// to understand their include directive relationships. It can display results as: +// - A hierarchical tree structure showing include relationships +// - A flat list of all files referenced through includes +// +// This helps writers understand the impact of changes to files that are widely included. +package includes + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +// NewIncludesCommand creates the includes subcommand. +// +// This command analyzes include directive relationships in RST files. +// Supports flags for different output formats (tree or list). +// +// Flags: +// - --tree: Display results as a hierarchical tree structure +// - --list: Display results as a flat list of all files +// - -v, --verbose: Show detailed processing information +func NewIncludesCommand() *cobra.Command { + var ( + showTree bool + showList bool + verbose bool + ) + + cmd := &cobra.Command{ + Use: "includes [filepath]", + Short: "Analyze include directive relationships in RST files", + Long: `Analyze include directive relationships to understand file dependencies. + +This command recursively follows .. include:: directives and shows all files +that are referenced. This helps writers understand the impact of changes to +files that are widely included across the documentation. + +Output formats: + --tree: Show hierarchical tree structure of includes + --list: Show flat list of all included files + +If neither flag is specified, shows a summary with basic statistics.`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + filePath := args[0] + return runAnalyze(filePath, showTree, showList, verbose) + }, + } + + cmd.Flags().BoolVar(&showTree, "tree", false, "Display results as a hierarchical tree structure") + cmd.Flags().BoolVar(&showList, "list", false, "Display results as a flat list of all files") + cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Show detailed processing information") + + return cmd +} + +// runAnalyze executes the include analysis operation. +// +// This function analyzes the file's include relationships and displays +// the results according to the specified flags. +// +// Parameters: +// - filePath: Path to the RST file to analyze +// - showTree: If true, display tree structure +// - showList: If true, display flat list +// - verbose: If true, show detailed processing information +// +// Returns: +// - error: Any error encountered during analysis +func runAnalyze(filePath string, showTree bool, showList bool, verbose bool) error { + // Perform the analysis + analysis, err := AnalyzeIncludes(filePath, verbose) + if err != nil { + return fmt.Errorf("failed to analyze includes: %w", err) + } + + // Display results based on flags + if showTree && showList { + // Both flags specified - show both outputs + PrintTree(analysis) + fmt.Println() + PrintList(analysis) + } else if showTree { + // Only tree + PrintTree(analysis) + } else if showList { + // Only list + PrintList(analysis) + } else { + // Neither flag - show summary + PrintSummary(analysis) + } + + return nil +} + diff --git a/audit-cli/commands/analyze/includes/output.go b/audit-cli/commands/analyze/includes/output.go new file mode 100644 index 0000000..bd33aa7 --- /dev/null +++ b/audit-cli/commands/analyze/includes/output.go @@ -0,0 +1,116 @@ +package includes + +import ( + "fmt" + "path/filepath" +) + +// PrintTree prints the include tree structure. +// +// This function displays the hierarchical relationship of includes using +// tree-style formatting with box-drawing characters. +// +// Parameters: +// - analysis: The analysis results containing the tree structure +func PrintTree(analysis *IncludeAnalysis) { + fmt.Println("============================================================") + fmt.Println("INCLUDE TREE") + fmt.Println("============================================================") + fmt.Printf("Root File: %s\n", analysis.RootFile) + fmt.Printf("Total Files: %d\n", analysis.TotalFiles) + fmt.Printf("Max Depth: %d\n", analysis.MaxDepth) + fmt.Println("============================================================") + fmt.Println() + + if analysis.Tree != nil { + printTreeNode(analysis.Tree, "", true, true) + } + + fmt.Println() +} + +// printTreeNode recursively prints a tree node with proper formatting. +// +// This function uses box-drawing characters to create a visual tree structure. +// +// Parameters: +// - node: The node to print +// - prefix: Prefix string for indentation +// - isLast: Whether this is the last child of its parent +// - isRoot: Whether this is the root node +func printTreeNode(node *IncludeNode, prefix string, isLast bool, isRoot bool) { + if node == nil { + return + } + + // Print the current node + if isRoot { + fmt.Printf("%s\n", filepath.Base(node.FilePath)) + } else { + connector := "├── " + if isLast { + connector = "└── " + } + fmt.Printf("%s%s%s\n", prefix, connector, filepath.Base(node.FilePath)) + } + + // Print children + childPrefix := prefix + if !isRoot { + if isLast { + childPrefix += " " + } else { + childPrefix += "│ " + } + } + + for i, child := range node.Children { + isLastChild := i == len(node.Children)-1 + printTreeNode(child, childPrefix, isLastChild, false) + } +} + +// PrintList prints a flat list of all included files. +// +// This function displays all files discovered through include directives +// in the order they were discovered (depth-first traversal). +// +// Parameters: +// - analysis: The analysis results containing the file list +func PrintList(analysis *IncludeAnalysis) { + fmt.Println("============================================================") + fmt.Println("INCLUDE FILE LIST") + fmt.Println("============================================================") + fmt.Printf("Root File: %s\n", analysis.RootFile) + fmt.Printf("Total Files: %d\n", analysis.TotalFiles) + fmt.Println("============================================================") + fmt.Println() + + for i, file := range analysis.AllFiles { + fmt.Printf("%3d. %s\n", i+1, file) + } + + fmt.Println() +} + +// PrintSummary prints a brief summary of the analysis. +// +// This function is used when neither --tree nor --list is specified, +// providing basic statistics about the include structure. +// +// Parameters: +// - analysis: The analysis results +func PrintSummary(analysis *IncludeAnalysis) { + fmt.Println("============================================================") + fmt.Println("INCLUDE ANALYSIS SUMMARY") + fmt.Println("============================================================") + fmt.Printf("Root File: %s\n", analysis.RootFile) + fmt.Printf("Total Files: %d\n", analysis.TotalFiles) + fmt.Printf("Max Depth: %d\n", analysis.MaxDepth) + fmt.Println("============================================================") + fmt.Println() + fmt.Println("Use --tree to see the hierarchical structure") + fmt.Println("Use --list to see a flat list of all files") + fmt.Println() +} + diff --git a/audit-cli/commands/analyze/includes/types.go b/audit-cli/commands/analyze/includes/types.go new file mode 100644 index 0000000..5f7bcc9 --- /dev/null +++ b/audit-cli/commands/analyze/includes/types.go @@ -0,0 +1,23 @@ +package includes + +// IncludeNode represents a file and its included files in a tree structure. +// +// This type is used to build a hierarchical representation of include relationships, +// where each node represents a file and its children are the files it includes. +type IncludeNode struct { + FilePath string // Absolute path to the file + Children []*IncludeNode // Files included by this file +} + +// IncludeAnalysis contains the results of analyzing include directives. +// +// This type holds both the tree structure and the flat list of all files +// discovered through include directives. +type IncludeAnalysis struct { + RootFile string // The original file that was analyzed + Tree *IncludeNode // Tree structure of include relationships + AllFiles []string // Flat list of all files (in order discovered) + TotalFiles int // Total number of unique files + MaxDepth int // Maximum depth of include nesting +} + diff --git a/audit-cli/commands/extract/code-examples/code_examples_test.go b/audit-cli/commands/extract/code-examples/code_examples_test.go index ba8a16e..f1432de 100644 --- a/audit-cli/commands/extract/code-examples/code_examples_test.go +++ b/audit-cli/commands/extract/code-examples/code_examples_test.go @@ -580,9 +580,10 @@ func TestNoFlagsOnDirectory(t *testing.T) { // Without recursive flag, should only process files in the top-level directory // Should NOT include files in includes/ subdirectory - // Expected: code-block-test.rst, include-test.rst, io-code-block-test.rst, - // literalinclude-test.rst, nested-code-block-test.rst (5 files) - expectedFiles := 5 + // Expected: code-block-test.rst, duplicate-include-test.rst, include-test.rst, + // io-code-block-test.rst, literalinclude-test.rst, nested-code-block-test.rst, + // nested-include-test.rst (7 files) + expectedFiles := 7 if report.FilesTraversed != expectedFiles { t.Errorf("Expected %d files traversed (top-level only), got %d", expectedFiles, report.FilesTraversed) diff --git a/audit-cli/main.go b/audit-cli/main.go index 8cad2e1..cd688f3 100644 --- a/audit-cli/main.go +++ b/audit-cli/main.go @@ -8,9 +8,12 @@ // - code-examples: Extract code examples from RST directives // - search: Search through extracted content // - find-string: Search for substrings in extracted files +// - analyze: Analyze RST file structures +// - includes: Analyze include directive relationships package main import ( + "github.com/mongodb/code-example-tooling/audit-cli/commands/analyze" "github.com/mongodb/code-example-tooling/audit-cli/commands/extract" "github.com/mongodb/code-example-tooling/audit-cli/commands/search" "github.com/spf13/cobra" @@ -30,6 +33,7 @@ with special handling for MongoDB documentation conventions.`, // Add parent commands rootCmd.AddCommand(extract.NewExtractCommand()) rootCmd.AddCommand(search.NewSearchCommand()) + rootCmd.AddCommand(analyze.NewAnalyzeCommand()) rootCmd.Execute() } diff --git a/audit-cli/testdata/input-files/source/duplicate-include-test.rst b/audit-cli/testdata/input-files/source/duplicate-include-test.rst new file mode 100644 index 0000000..5dd89a0 --- /dev/null +++ b/audit-cli/testdata/input-files/source/duplicate-include-test.rst @@ -0,0 +1,19 @@ +Duplicate Include Test +====================== + +This file includes the same file twice to test deduplication. + +.. include:: /includes/intro.rst + +Middle Content +-------------- + +Some content in the middle. + +.. include:: /includes/intro.rst + +End Content +----------- + +More content at the end. + diff --git a/audit-cli/testdata/input-files/source/includes/nested-include.rst b/audit-cli/testdata/input-files/source/includes/nested-include.rst new file mode 100644 index 0000000..b461662 --- /dev/null +++ b/audit-cli/testdata/input-files/source/includes/nested-include.rst @@ -0,0 +1,7 @@ +Nested Include Example +====================== + +This file includes another file. + +.. include:: /includes/intro.rst + diff --git a/audit-cli/testdata/input-files/source/nested-include-test.rst b/audit-cli/testdata/input-files/source/nested-include-test.rst new file mode 100644 index 0000000..4d8c475 --- /dev/null +++ b/audit-cli/testdata/input-files/source/nested-include-test.rst @@ -0,0 +1,14 @@ +Nested Include Test +=================== + +This file tests nested include directives. + +.. include:: /includes/nested-include.rst + +Main Content +------------ + +Some main content here. + +.. include:: /includes/examples.rst + From 887896d248ce90182725910a2eb69e5ea0d77440 Mon Sep 17 00:00:00 2001 From: dacharyc Date: Mon, 20 Oct 2025 15:32:19 -0400 Subject: [PATCH 5/7] More explicit string matching behavior, a few new flags, and tests --- audit-cli/README.md | 21 +- .../search/find-string/find_string.go | 103 +++++++- .../search/find-string/find_string_test.go | 250 ++++++++++++++++++ .../search-test-files/curl-examples.txt | 4 + .../search-test-files/libcurl-examples.txt | 4 + .../testdata/search-test-files/mixed-case.txt | 4 + .../testdata/search-test-files/no-match.txt | 3 + .../testdata/search-test-files/python-code.py | 8 + .../search-test-files/word-boundaries.txt | 8 + 9 files changed, 390 insertions(+), 15 deletions(-) create mode 100644 audit-cli/commands/search/find-string/find_string_test.go create mode 100644 audit-cli/testdata/search-test-files/curl-examples.txt create mode 100644 audit-cli/testdata/search-test-files/libcurl-examples.txt create mode 100644 audit-cli/testdata/search-test-files/mixed-case.txt create mode 100644 audit-cli/testdata/search-test-files/no-match.txt create mode 100644 audit-cli/testdata/search-test-files/python-code.py create mode 100644 audit-cli/testdata/search-test-files/word-boundaries.txt diff --git a/audit-cli/README.md b/audit-cli/README.md index 10c20dd..7e25f7f 100644 --- a/audit-cli/README.md +++ b/audit-cli/README.md @@ -122,11 +122,17 @@ After extraction, a report is displayed showing: Search through files for a specific substring. Can search through extracted code example files or RST source files. +**Default Behavior:** +- **Case-insensitive** search (matches "curl", "CURL", "Curl", etc.) +- **Exact word matching** (excludes partial matches like "curl" in "libcurl") + +Use `--case-sensitive` to make the search case-sensitive, or `--partial-match` to allow matching the substring as part of larger words. + **Basic Usage:** ```bash -# Search in a single file -./audit-cli search find-string path/to/file.js "substring" +# Search in a single file (case-insensitive, exact word match) +./audit-cli search find-string path/to/file.js "curl" # Search in a directory (non-recursive) ./audit-cli search find-string path/to/output "substring" @@ -142,6 +148,15 @@ Search through files for a specific substring. Can search through extracted code # Verbose output (show file paths and language breakdown) ./audit-cli search find-string path/to/output "substring" -r -v + +# Case-sensitive search (only matches exact case) +./audit-cli search find-string path/to/output "CURL" --case-sensitive + +# Partial match (includes "curl" in "libcurl") +./audit-cli search find-string path/to/output "curl" --partial-match + +# Combine flags for case-sensitive partial matching +./audit-cli search find-string path/to/output "curl" --case-sensitive --partial-match ``` **Flags:** @@ -149,6 +164,8 @@ Search through files for a specific substring. Can search through extracted code - `-r, --recursive` - Recursively search all files in subdirectories - `-f, --follow-includes` - Follow `.. include::` directives in RST files - `-v, --verbose` - Show file paths and language breakdown +- `--case-sensitive` - Make search case-sensitive (default: case-insensitive) +- `--partial-match` - Allow partial matches within words (default: exact word matching) **Report:** diff --git a/audit-cli/commands/search/find-string/find_string.go b/audit-cli/commands/search/find-string/find_string.go index 63d6b83..be90ec5 100644 --- a/audit-cli/commands/search/find-string/find_string.go +++ b/audit-cli/commands/search/find-string/find_string.go @@ -3,14 +3,18 @@ // This package implements the "search find-string" subcommand, which searches through // extracted code example files to find occurrences of a specific substring. // -// The search is case-sensitive and counts each file only once, even if the substring -// appears multiple times in the same file. +// By default, the search is case-insensitive and matches exact words only (not partial matches +// within larger words). These behaviors can be changed with the --case-sensitive and +// --partial-match flags. Each file is counted only once, even if the substring appears +// multiple times in the same file. // // Supports: // - Recursive directory scanning // - Following include directives in RST files // - Verbose output with file paths and language breakdown // - Language detection based on file extension +// - Case-insensitive search (default) or case-sensitive search (--case-sensitive flag) +// - Exact word matching (default) or partial matching (--partial-match flag) package find_string import ( @@ -32,29 +36,39 @@ import ( // - -r, --recursive: Recursively search all files in subdirectories // - -f, --follow-includes: Follow .. include:: directives in RST files // - -v, --verbose: Show file paths and language breakdown +// - --case-sensitive: Make search case-sensitive (default: case-insensitive) +// - --partial-match: Allow partial matches within words (default: exact word matching) func NewFindStringCommand() *cobra.Command { var ( recursive bool followIncludes bool verbose bool + caseSensitive bool + partialMatch bool ) cmd := &cobra.Command{ Use: "find-string [filepath] [substring]", Short: "Search for a substring in extracted code example files", Long: `Search through extracted code example files to find occurrences of a specific substring. -Reports the number of code examples containing the substring.`, +Reports the number of code examples containing the substring. + +By default, the search is case-insensitive and matches exact words only. Use --case-sensitive +to make the search case-sensitive, or --partial-match to allow matching the substring as part +of larger words (e.g., "curl" matching "libcurl").`, Args: cobra.ExactArgs(2), RunE: func(cmd *cobra.Command, args []string) error { filePath := args[0] substring := args[1] - return runSearch(filePath, substring, recursive, followIncludes, verbose) + return runSearch(filePath, substring, recursive, followIncludes, verbose, caseSensitive, partialMatch) }, } cmd.Flags().BoolVarP(&recursive, "recursive", "r", false, "Recursively search all files in subdirectories") cmd.Flags().BoolVarP(&followIncludes, "follow-includes", "f", false, "Follow .. include:: directives in RST files") cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Provide additional information during execution") + cmd.Flags().BoolVar(&caseSensitive, "case-sensitive", false, "Make search case-sensitive (default: case-insensitive)") + cmd.Flags().BoolVar(&partialMatch, "partial-match", false, "Allow partial matches within words (default: exact word matching)") return cmd } @@ -66,29 +80,31 @@ Reports the number of code examples containing the substring.`, // // Parameters: // - filePath: Path to file or directory to search -// - substring: The substring to search for (case-sensitive) +// - substring: The substring to search for // - recursive: If true, recursively search subdirectories // - followIncludes: If true, follow .. include:: directives // - verbose: If true, show detailed information during search +// - caseSensitive: If true, search is case-sensitive; if false, case-insensitive +// - partialMatch: If true, allow partial matches within words; if false, match exact words only // // Returns: // - *SearchReport: Statistics about the search operation // - error: Any error encountered during search -func RunSearch(filePath string, substring string, recursive bool, followIncludes bool, verbose bool) (*SearchReport, error) { - return runSearchInternal(filePath, substring, recursive, followIncludes, verbose) +func RunSearch(filePath string, substring string, recursive bool, followIncludes bool, verbose bool, caseSensitive bool, partialMatch bool) (*SearchReport, error) { + return runSearchInternal(filePath, substring, recursive, followIncludes, verbose, caseSensitive, partialMatch) } // runSearch executes the search operation (internal wrapper for CLI). // // This is a thin wrapper around runSearchInternal that discards the report // and only returns errors, suitable for use in the CLI command handler. -func runSearch(filePath string, substring string, recursive bool, followIncludes bool, verbose bool) error { - _, err := runSearchInternal(filePath, substring, recursive, followIncludes, verbose) +func runSearch(filePath string, substring string, recursive bool, followIncludes bool, verbose bool, caseSensitive bool, partialMatch bool) error { + _, err := runSearchInternal(filePath, substring, recursive, followIncludes, verbose, caseSensitive, partialMatch) return err } // runSearchInternal contains the core logic for the search-code-examples command -func runSearchInternal(filePath string, substring string, recursive bool, followIncludes bool, verbose bool) (*SearchReport, error) { +func runSearchInternal(filePath string, substring string, recursive bool, followIncludes bool, verbose bool, caseSensitive bool, partialMatch bool) (*SearchReport, error) { fileInfo, err := os.Stat(filePath) if err != nil { return nil, fmt.Errorf("failed to access path %s: %w", filePath, err) @@ -113,6 +129,8 @@ func runSearchInternal(filePath string, substring string, recursive bool, follow if verbose { fmt.Printf("Found %d files to search\n", len(filesToSearch)) fmt.Printf("Searching for substring: %q\n", substring) + fmt.Printf("Case sensitive: %v\n", caseSensitive) + fmt.Printf("Partial match: %v\n", partialMatch) fmt.Printf("Follow includes: %v\n\n", followIncludes) } @@ -141,7 +159,7 @@ func runSearchInternal(filePath string, substring string, recursive bool, follow // Search all collected files for _, fileToSearch := range filesToSearchWithIncludes { - result, err := searchFile(fileToSearch, substring) + result, err := searchFile(fileToSearch, substring, caseSensitive, partialMatch) if err != nil { fmt.Fprintf(os.Stderr, "Warning: failed to search %s: %v\n", fileToSearch, err) continue @@ -211,7 +229,7 @@ func collectFilesWithIncludes(filePath string, visited map[string]bool, verbose } // searchFile searches a single file for the substring -func searchFile(filePath string, substring string) (SearchResult, error) { +func searchFile(filePath string, substring string, caseSensitive bool, partialMatch bool) (SearchResult, error) { result := SearchResult{ FilePath: filePath, Language: extractLanguageFromFilename(filePath), @@ -223,11 +241,70 @@ func searchFile(filePath string, substring string) (SearchResult, error) { return result, err } - result.Contains = strings.Contains(string(content), substring) + contentStr := string(content) + searchStr := substring + + // Handle case sensitivity + if !caseSensitive { + contentStr = strings.ToLower(contentStr) + searchStr = strings.ToLower(searchStr) + } + + // Check if substring exists in content + if !strings.Contains(contentStr, searchStr) { + return result, nil + } + + // If partial match is allowed, we're done + if partialMatch { + result.Contains = true + return result, nil + } + + // For exact word matching, check if the match is a whole word + result.Contains = isExactWordMatch(contentStr, searchStr) return result, nil } +// isExactWordMatch checks if the substring appears as a complete word in the content. +// A word boundary is defined as the start/end of the string or a non-alphanumeric character. +func isExactWordMatch(content string, substring string) bool { + // Find all occurrences of the substring + index := 0 + for { + pos := strings.Index(content[index:], substring) + if pos == -1 { + break + } + + actualPos := index + pos + + // Check if this is a whole word match + // Check character before (if not at start) + beforeOK := actualPos == 0 || !isWordChar(rune(content[actualPos-1])) + + // Check character after (if not at end) + afterPos := actualPos + len(substring) + afterOK := afterPos >= len(content) || !isWordChar(rune(content[afterPos])) + + if beforeOK && afterOK { + return true + } + + // Move to next potential match + index = actualPos + 1 + } + + return false +} + +// isWordChar returns true if the character is alphanumeric or underscore. +// These characters are considered part of a word. +func isWordChar(c rune) bool { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' +} + // extractLanguageFromFilename extracts the language from the file extension func extractLanguageFromFilename(filePath string) string { ext := filepath.Ext(filePath) diff --git a/audit-cli/commands/search/find-string/find_string_test.go b/audit-cli/commands/search/find-string/find_string_test.go new file mode 100644 index 0000000..2bd6f7e --- /dev/null +++ b/audit-cli/commands/search/find-string/find_string_test.go @@ -0,0 +1,250 @@ +package find_string + +import ( + "path/filepath" + "testing" +) + +// TestDefaultBehaviorCaseInsensitive tests that search is case-insensitive by default +func TestDefaultBehaviorCaseInsensitive(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "search-test-files") + mixedCaseFile := filepath.Join(testDataDir, "mixed-case.txt") + + // Search for lowercase "curl" with default settings (case-insensitive) + report, err := RunSearch(mixedCaseFile, "curl", false, false, false, false, false) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + // Should match because it's case-insensitive + if report.FilesContaining != 1 { + t.Errorf("Expected 1 file containing 'curl' (case-insensitive), got %d", report.FilesContaining) + } +} + +// TestCaseSensitiveFlag tests that --case-sensitive flag works correctly +func TestCaseSensitiveFlag(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "search-test-files") + mixedCaseFile := filepath.Join(testDataDir, "mixed-case.txt") + + // Search for uppercase "CURL" with case-sensitive flag + report, err := RunSearch(mixedCaseFile, "CURL", false, false, false, true, false) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + // Should match only the uppercase version + if report.FilesContaining != 1 { + t.Errorf("Expected 1 file containing 'CURL' (case-sensitive), got %d", report.FilesContaining) + } + + // Search for lowercase "curl" with case-sensitive flag + report2, err := RunSearch(mixedCaseFile, "curl", false, false, false, true, false) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + // Should match only the lowercase version + if report2.FilesContaining != 1 { + t.Errorf("Expected 1 file containing 'curl' (case-sensitive), got %d", report2.FilesContaining) + } +} + +// TestDefaultBehaviorExactWordMatch tests that exact word matching is the default +func TestDefaultBehaviorExactWordMatch(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "search-test-files") + + // Search for "curl" in a file that only has "curl" as a standalone word + curlFile := filepath.Join(testDataDir, "curl-examples.txt") + report1, err := RunSearch(curlFile, "curl", false, false, false, false, false) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + if report1.FilesContaining != 1 { + t.Errorf("Expected 1 file containing 'curl' as exact word, got %d", report1.FilesContaining) + } + + // Search for "curl" in a file that only has "libcurl" (should NOT match with exact word matching) + libcurlFile := filepath.Join(testDataDir, "libcurl-examples.txt") + report2, err := RunSearch(libcurlFile, "curl", false, false, false, false, false) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + if report2.FilesContaining != 0 { + t.Errorf("Expected 0 files containing 'curl' as exact word in libcurl file, got %d", report2.FilesContaining) + } +} + +// TestPartialMatchFlag tests that --partial-match flag allows substring matching +func TestPartialMatchFlag(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "search-test-files") + libcurlFile := filepath.Join(testDataDir, "libcurl-examples.txt") + + // Search for "curl" with partial match enabled (should match "libcurl") + report, err := RunSearch(libcurlFile, "curl", false, false, false, false, true) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + if report.FilesContaining != 1 { + t.Errorf("Expected 1 file containing 'curl' with partial match, got %d", report.FilesContaining) + } +} + +// TestWordBoundaries tests various word boundary scenarios +func TestWordBoundaries(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "search-test-files") + boundariesFile := filepath.Join(testDataDir, "word-boundaries.txt") + + // Test exact word match (should match "curl" but not "libcurl", "curlopt", etc.) + report, err := RunSearch(boundariesFile, "curl", false, false, false, false, false) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + // The file contains "curl" as a standalone word, so should match + if report.FilesContaining != 1 { + t.Errorf("Expected 1 file containing 'curl' as exact word, got %d", report.FilesContaining) + } + + // Test partial match (should match all occurrences) + report2, err := RunSearch(boundariesFile, "curl", false, false, false, false, true) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + // Should match because partial matching is enabled + if report2.FilesContaining != 1 { + t.Errorf("Expected 1 file containing 'curl' with partial match, got %d", report2.FilesContaining) + } +} + +// TestDirectorySearch tests searching across multiple files in a directory +func TestDirectorySearch(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "search-test-files") + + // Search for "curl" in the directory (exact word match, case-insensitive) + report, err := RunSearch(testDataDir, "curl", false, false, false, false, false) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + // Should find "curl" in: + // - curl-examples.txt (has "curl" as standalone word) + // - mixed-case.txt (has "curl", "CURL", "Curl" - case insensitive) + // - word-boundaries.txt (has "curl" as standalone word) + // - python-code.py (has "curl" as standalone word) + // Should NOT find in: + // - libcurl-examples.txt (only has "libcurl", not standalone "curl") + // - no-match.txt (doesn't contain "curl" at all) + expectedMatches := 4 + if report.FilesContaining != expectedMatches { + t.Errorf("Expected %d files containing 'curl', got %d", expectedMatches, report.FilesContaining) + } + + // Verify total files scanned + if report.FilesScanned != 6 { + t.Errorf("Expected 6 files scanned, got %d", report.FilesScanned) + } +} + +// TestDirectorySearchWithPartialMatch tests directory search with partial matching +func TestDirectorySearchWithPartialMatch(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "search-test-files") + + // Search for "curl" with partial match enabled + report, err := RunSearch(testDataDir, "curl", false, false, false, false, true) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + // Should find "curl" in all files except no-match.txt: + // - curl-examples.txt + // - libcurl-examples.txt (now matches because of partial match) + // - mixed-case.txt + // - word-boundaries.txt + // - python-code.py + expectedMatches := 5 + if report.FilesContaining != expectedMatches { + t.Errorf("Expected %d files containing 'curl' with partial match, got %d", expectedMatches, report.FilesContaining) + } +} + +// TestCombinedFlags tests using both case-sensitive and partial-match flags together +func TestCombinedFlags(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "search-test-files") + mixedCaseFile := filepath.Join(testDataDir, "mixed-case.txt") + + // Search for lowercase "curl" with both case-sensitive and partial match + report, err := RunSearch(mixedCaseFile, "curl", false, false, false, true, true) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + // Should match only lowercase "curl" + if report.FilesContaining != 1 { + t.Errorf("Expected 1 file containing 'curl' (case-sensitive + partial), got %d", report.FilesContaining) + } + + // Search for uppercase "CURL" with both flags + report2, err := RunSearch(mixedCaseFile, "CURL", false, false, false, true, true) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + // Should match only uppercase "CURL" + if report2.FilesContaining != 1 { + t.Errorf("Expected 1 file containing 'CURL' (case-sensitive + partial), got %d", report2.FilesContaining) + } +} + +// TestLanguageDetection tests that language is correctly detected from file extensions +func TestLanguageDetection(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "search-test-files") + + // Search in directory and check language counts + report, err := RunSearch(testDataDir, "curl", false, false, false, false, false) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + // Should have detected .txt and .py files + if _, hasTxt := report.LanguageCounts["txt"]; !hasTxt { + t.Error("Expected to find 'txt' in language counts") + } + + if _, hasPy := report.LanguageCounts["py"]; !hasPy { + t.Error("Expected to find 'py' in language counts") + } + + // Check that txt count is correct (3 txt files should match) + if report.LanguageCounts["txt"] != 3 { + t.Errorf("Expected 3 txt files, got %d", report.LanguageCounts["txt"]) + } + + // Check that py count is correct (1 py file should match) + if report.LanguageCounts["py"] != 1 { + t.Errorf("Expected 1 py file, got %d", report.LanguageCounts["py"]) + } +} + +// TestNoMatches tests searching for a string that doesn't exist +func TestNoMatches(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "search-test-files") + noMatchFile := filepath.Join(testDataDir, "no-match.txt") + + // Search for "curl" in a file that doesn't contain it + report, err := RunSearch(noMatchFile, "curl", false, false, false, false, false) + if err != nil { + t.Fatalf("RunSearch failed: %v", err) + } + + if report.FilesContaining != 0 { + t.Errorf("Expected 0 files containing 'curl', got %d", report.FilesContaining) + } + + if report.FilesScanned != 1 { + t.Errorf("Expected 1 file scanned, got %d", report.FilesScanned) + } +} + diff --git a/audit-cli/testdata/search-test-files/curl-examples.txt b/audit-cli/testdata/search-test-files/curl-examples.txt new file mode 100644 index 0000000..53e8960 --- /dev/null +++ b/audit-cli/testdata/search-test-files/curl-examples.txt @@ -0,0 +1,4 @@ +This file contains curl command examples. +Use curl to make HTTP requests. +The curl tool is very useful. + diff --git a/audit-cli/testdata/search-test-files/libcurl-examples.txt b/audit-cli/testdata/search-test-files/libcurl-examples.txt new file mode 100644 index 0000000..cc5a822 --- /dev/null +++ b/audit-cli/testdata/search-test-files/libcurl-examples.txt @@ -0,0 +1,4 @@ +This file uses libcurl library. +The libcurl API is powerful. +You can use libcurl in C programs. + diff --git a/audit-cli/testdata/search-test-files/mixed-case.txt b/audit-cli/testdata/search-test-files/mixed-case.txt new file mode 100644 index 0000000..a41a8e2 --- /dev/null +++ b/audit-cli/testdata/search-test-files/mixed-case.txt @@ -0,0 +1,4 @@ +This file has CURL in uppercase. +Also has Curl in mixed case. +And curl in lowercase. + diff --git a/audit-cli/testdata/search-test-files/no-match.txt b/audit-cli/testdata/search-test-files/no-match.txt new file mode 100644 index 0000000..3c642e7 --- /dev/null +++ b/audit-cli/testdata/search-test-files/no-match.txt @@ -0,0 +1,3 @@ +This file does not contain the search term. +It has other content but not what we're looking for. + diff --git a/audit-cli/testdata/search-test-files/python-code.py b/audit-cli/testdata/search-test-files/python-code.py new file mode 100644 index 0000000..50c3648 --- /dev/null +++ b/audit-cli/testdata/search-test-files/python-code.py @@ -0,0 +1,8 @@ +import requests + +# Use curl or requests library +def fetch_data(): + # curl alternative in Python + response = requests.get('https://api.example.com') + return response.json() + diff --git a/audit-cli/testdata/search-test-files/word-boundaries.txt b/audit-cli/testdata/search-test-files/word-boundaries.txt new file mode 100644 index 0000000..3dc9ad5 --- /dev/null +++ b/audit-cli/testdata/search-test-files/word-boundaries.txt @@ -0,0 +1,8 @@ +Testing word boundaries: +curl is a tool +libcurl is a library +curlopt is an option +_curl_ with underscores +curl-config is a script +precurl and postcurl + From a10620d623d666d7bfe3435b60a3cdbfc3e1cf4f Mon Sep 17 00:00:00 2001 From: dacharyc Date: Mon, 20 Oct 2025 18:02:36 -0400 Subject: [PATCH 6/7] Add compare command and file-contents subcommand --- audit-cli/README.md | 202 ++++++- audit-cli/commands/compare/compare.go | 35 ++ .../compare/file-contents/comparer.go | 217 +++++++ .../commands/compare/file-contents/differ.go | 81 +++ .../compare/file-contents/file_contents.go | 197 +++++++ .../file-contents/file_contents_test.go | 535 ++++++++++++++++++ .../commands/compare/file-contents/output.go | 197 +++++++ .../commands/compare/file-contents/types.go | 77 +++ .../compare/file-contents/version_resolver.go | 160 ++++++ audit-cli/go.mod | 5 +- audit-cli/go.sum | 2 + audit-cli/main.go | 4 + audit-cli/testdata/compare/file1.txt | 4 + audit-cli/testdata/compare/file2.txt | 5 + audit-cli/testdata/compare/identical1.txt | 4 + audit-cli/testdata/compare/identical2.txt | 4 + .../manual/source/includes/example.rst | 40 ++ .../manual/source/includes/new-feature.rst | 13 + .../upcoming/source/includes/example.rst | 42 ++ .../upcoming/source/includes/new-feature.rst | 13 + .../product/v8.0/source/includes/example.rst | 38 ++ 21 files changed, 1863 insertions(+), 12 deletions(-) create mode 100644 audit-cli/commands/compare/compare.go create mode 100644 audit-cli/commands/compare/file-contents/comparer.go create mode 100644 audit-cli/commands/compare/file-contents/differ.go create mode 100644 audit-cli/commands/compare/file-contents/file_contents.go create mode 100644 audit-cli/commands/compare/file-contents/file_contents_test.go create mode 100644 audit-cli/commands/compare/file-contents/output.go create mode 100644 audit-cli/commands/compare/file-contents/types.go create mode 100644 audit-cli/commands/compare/file-contents/version_resolver.go create mode 100644 audit-cli/testdata/compare/file1.txt create mode 100644 audit-cli/testdata/compare/file2.txt create mode 100644 audit-cli/testdata/compare/identical1.txt create mode 100644 audit-cli/testdata/compare/identical2.txt create mode 100644 audit-cli/testdata/compare/product/manual/source/includes/example.rst create mode 100644 audit-cli/testdata/compare/product/manual/source/includes/new-feature.rst create mode 100644 audit-cli/testdata/compare/product/upcoming/source/includes/example.rst create mode 100644 audit-cli/testdata/compare/product/upcoming/source/includes/new-feature.rst create mode 100644 audit-cli/testdata/compare/product/v8.0/source/includes/example.rst diff --git a/audit-cli/README.md b/audit-cli/README.md index 7e25f7f..ee4e53d 100644 --- a/audit-cli/README.md +++ b/audit-cli/README.md @@ -10,6 +10,7 @@ A Go CLI tool for extracting and analyzing code examples from MongoDB documentat - [Extract Commands](#extract-commands) - [Search Commands](#search-commands) - [Analyze Commands](#analyze-commands) + - [Compare Commands](#compare-commands) - [Development](#development) - [Project Structure](#project-structure) - [Adding New Commands](#adding-new-commands) @@ -24,8 +25,9 @@ This CLI tool helps maintain code quality across MongoDB's documentation by: 1. **Extracting code examples** from RST files into individual, testable files 2. **Searching extracted code** for specific patterns or substrings 3. **Analyzing include relationships** to understand file dependencies -4. **Following include directives** to process entire documentation trees -5. **Handling MongoDB-specific conventions** like steps files, extracts, and template variables +4. **Comparing file contents** across documentation versions to identify differences +5. **Following include directives** to process entire documentation trees +6. **Handling MongoDB-specific conventions** like steps files, extracts, and template variables ## Installation @@ -55,8 +57,10 @@ audit-cli │ └── code-examples ├── search # Search through extracted content │ └── find-string -└── analyze # Analyze RST file structures - └── includes +├── analyze # Analyze RST file structures +│ └── includes +└── compare # Compare files across versions + └── file-contents ``` ### Extract Commands @@ -238,6 +242,166 @@ This command helps writers: The total file count represents **unique files** discovered through include directives. If a file is included multiple times (e.g., file A includes file C, and file B also includes file C), it is counted only once in the total. However, the tree view will show it in all locations where it appears, with subsequent occurrences marked as circular includes in verbose mode. +### Compare Commands + +#### `compare file-contents` + +Compare file contents to identify differences between files. Supports two modes: +1. **Direct comparison** - Compare two specific files +2. **Version comparison** - Compare the same file across multiple documentation versions + +**Use Cases:** + +This command helps writers: +- Identify content drift across documentation versions +- Verify that updates have been applied consistently +- Scope maintenance work when updating shared content +- Understand how files have diverged over time + +**Basic Usage:** + +```bash +# Direct comparison of two files +./audit-cli compare file-contents file1.rst file2.rst + +# Compare with diff output +./audit-cli compare file-contents file1.rst file2.rst --show-diff + +# Version comparison across MongoDB documentation versions +./audit-cli compare file-contents \ + /path/to/manual/manual/source/includes/example.rst \ + --product-dir /path/to/manual \ + --versions manual,upcoming,v8.0,v7.0 + +# Show which files differ +./audit-cli compare file-contents \ + /path/to/manual/manual/source/includes/example.rst \ + --product-dir /path/to/manual \ + --versions manual,upcoming,v8.0,v7.0 \ + --show-paths + +# Show detailed diffs +./audit-cli compare file-contents \ + /path/to/manual/manual/source/includes/example.rst \ + --product-dir /path/to/manual \ + --versions manual,upcoming,v8.0,v7.0 \ + --show-diff + +# Verbose output (show processing details) +./audit-cli compare file-contents file1.rst file2.rst -v +``` + +**Flags:** + +- `-p, --product-dir ` - Product directory path (required for version comparison) +- `-V, --versions ` - Comma-separated list of versions (e.g., `manual,upcoming,v8.0`) +- `--show-paths` - Display file paths grouped by status (matching, differing, not found) +- `-d, --show-diff` - Display unified diff output (implies `--show-paths`) +- `-v, --verbose` - Show detailed processing information + +**Comparison Modes:** + +**1. Direct Comparison (Two Files)** + +Provide two file paths as arguments: + +```bash +./audit-cli compare file-contents path/to/file1.rst path/to/file2.rst +``` + +This mode: +- Compares exactly two files +- Reports whether they are identical or different +- Can show unified diff with `--show-diff` + +**2. Version Comparison (Product Directory)** + +Provide one file path plus `--product-dir` and `--versions`: + +```bash +./audit-cli compare file-contents \ + /path/to/manual/manual/source/includes/example.rst \ + --product-dir /path/to/manual \ + --versions manual,upcoming,v8.0 +``` + +This mode: +- Extracts the relative path from the reference file +- Resolves the same relative path in each version directory +- Compares all versions against the reference file +- Reports matching, differing, and missing files + +**Version Directory Structure:** + +The tool expects MongoDB documentation to be organized as: +``` +product-dir/ +├── manual/ +│ └── source/ +│ └── includes/ +│ └── example.rst +├── upcoming/ +│ └── source/ +│ └── includes/ +│ └── example.rst +└── v8.0/ + └── source/ + └── includes/ + └── example.rst +``` + +**Output Formats:** + +**Summary** (default - no flags): +- Total number of versions compared +- Count of matching, differing, and missing files +- Hints to use `--show-paths` or `--show-diff` for more details + +**With --show-paths:** +- Summary (as above) +- List of files that match (with ✓) +- List of files that differ (with ✗) +- List of files not found (with -) + +**With --show-diff:** +- Summary and paths (as above) +- Unified diff output for each differing file +- Shows added lines (prefixed with +) +- Shows removed lines (prefixed with -) +- Shows context lines around changes + +**Examples:** + +```bash +# Check if a file is consistent across all versions +./audit-cli compare file-contents \ + ~/workspace/docs-mongodb-internal/content/manual/manual/source/includes/fact-atlas-search.rst \ + --product-dir ~/workspace/docs-mongodb-internal/content/manual \ + --versions manual,upcoming,v8.0,v7.0,v6.0 + +# Find differences and see what changed +./audit-cli compare file-contents \ + ~/workspace/docs-mongodb-internal/content/manual/manual/source/includes/fact-atlas-search.rst \ + --product-dir ~/workspace/docs-mongodb-internal/content/manual \ + --versions manual,upcoming,v8.0,v7.0,v6.0 \ + --show-diff + +# Compare two specific versions of a file +./audit-cli compare file-contents \ + ~/workspace/docs-mongodb-internal/content/manual/manual/source/includes/example.rst \ + ~/workspace/docs-mongodb-internal/content/manual/v8.0/source/includes/example.rst \ + --show-diff +``` + +**Exit Codes:** + +- `0` - Success (files compared successfully, regardless of whether they match) +- `1` - Error (invalid arguments, file not found, read error, etc.) + +**Note on Missing Files:** + +Files that don't exist in certain versions are reported separately and do not cause errors. This is expected behavior since features may be added or removed across versions. + ## Development ### Project Structure @@ -262,13 +426,23 @@ audit-cli/ │ │ ├── find_string.go # Command logic │ │ ├── types.go # Type definitions │ │ └── report.go # Report generation -│ └── analyze/ # Analyze parent command -│ ├── analyze.go # Parent command definition -│ └── includes/ # Includes analysis subcommand -│ ├── includes.go # Command logic -│ ├── analyzer.go # Include tree building +│ ├── analyze/ # Analyze parent command +│ │ ├── analyze.go # Parent command definition +│ │ └── includes/ # Includes analysis subcommand +│ │ ├── includes.go # Command logic +│ │ ├── analyzer.go # Include tree building +│ │ ├── output.go # Output formatting +│ │ └── types.go # Type definitions +│ └── compare/ # Compare parent command +│ ├── compare.go # Parent command definition +│ └── file-contents/ # File contents comparison subcommand +│ ├── file_contents.go # Command logic +│ ├── file_contents_test.go # Tests +│ ├── comparer.go # Comparison logic +│ ├── differ.go # Diff generation │ ├── output.go # Output formatting -│ └── types.go # Type definitions +│ ├── types.go # Type definitions +│ └── version_resolver.go # Version path resolution ├── internal/ # Internal packages │ └── rst/ # RST parsing utilities │ ├── parser.go # Generic parsing with includes @@ -281,7 +455,13 @@ audit-cli/ │ ├── *.rst # Test files │ ├── includes/ # Included RST files │ └── code-examples/ # Code files for literalinclude - └── expected-output/ # Expected extraction results + ├── expected-output/ # Expected extraction results + └── compare/ # Compare command test data + ├── product/ # Version structure tests + │ ├── manual/ # Manual version + │ ├── upcoming/ # Upcoming version + │ └── v8.0/ # v8.0 version + └── *.txt # Direct comparison tests ``` ### Adding New Commands diff --git a/audit-cli/commands/compare/compare.go b/audit-cli/commands/compare/compare.go new file mode 100644 index 0000000..e353ce9 --- /dev/null +++ b/audit-cli/commands/compare/compare.go @@ -0,0 +1,35 @@ +// Package compare provides the parent command for comparing files across versions. +// +// This package serves as the parent command for various comparison operations. +// Currently supports: +// - file-contents: Compare file contents across different versions +// +// Future subcommands could include comparing metadata, structure, or other aspects. +package compare + +import ( + "github.com/mongodb/code-example-tooling/audit-cli/commands/compare/file-contents" + "github.com/spf13/cobra" +) + +// NewCompareCommand creates the compare parent command. +// +// This command serves as a parent for various comparison operations on documentation files. +// It doesn't perform any operations itself but provides a namespace for subcommands. +func NewCompareCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "compare", + Short: "Compare files across different versions", + Long: `Compare files across different versions of MongoDB documentation. + +Currently supports comparing file contents to identify differences between +the same file across multiple documentation versions. This helps writers +understand how content has diverged across versions and identify maintenance work.`, + } + + // Add subcommands + cmd.AddCommand(file_contents.NewFileContentsCommand()) + + return cmd +} + diff --git a/audit-cli/commands/compare/file-contents/comparer.go b/audit-cli/commands/compare/file-contents/comparer.go new file mode 100644 index 0000000..deabaa0 --- /dev/null +++ b/audit-cli/commands/compare/file-contents/comparer.go @@ -0,0 +1,217 @@ +package file_contents + +import ( + "fmt" + "os" + "path/filepath" +) + +// CompareFiles performs a direct comparison between two files. +// +// This function compares two files directly without version resolution. +// +// Parameters: +// - file1Path: Path to the first file +// - file2Path: Path to the second file +// - generateDiff: If true, generate unified diff for differences +// - verbose: If true, show detailed processing information +// +// Returns: +// - *ComparisonResult: The comparison result +// - error: Any error encountered during comparison +func CompareFiles(file1Path, file2Path string, generateDiff bool, verbose bool) (*ComparisonResult, error) { + if verbose { + fmt.Printf("Comparing files:\n") + fmt.Printf(" File 1: %s\n", file1Path) + fmt.Printf(" File 2: %s\n", file2Path) + } + + // Read the reference file + content1, err := os.ReadFile(file1Path) + if err != nil { + return nil, fmt.Errorf("failed to read file %s: %w", file1Path, err) + } + + // Read the comparison file + content2, err := os.ReadFile(file2Path) + if err != nil { + return nil, fmt.Errorf("failed to read file %s: %w", file2Path, err) + } + + // Compare contents + result := &ComparisonResult{ + ReferenceFile: file1Path, + TotalFiles: 1, + } + + comparison := FileComparison{ + Version: filepath.Base(filepath.Dir(file2Path)), + FilePath: file2Path, + } + + if AreFilesIdentical(string(content1), string(content2)) { + comparison.Status = FileMatches + result.MatchingFiles = 1 + } else { + comparison.Status = FileDiffers + result.DifferingFiles = 1 + + if generateDiff { + diff, err := GenerateDiff(file1Path, string(content1), file2Path, string(content2)) + if err != nil { + return nil, fmt.Errorf("failed to generate diff: %w", err) + } + comparison.Diff = diff + } + } + + result.Comparisons = []FileComparison{comparison} + + return result, nil +} + +// CompareVersions performs a version-based comparison. +// +// This function compares a reference file against the same file across +// multiple versions of the documentation. +// +// Parameters: +// - referenceFile: Path to the reference file +// - productDir: Path to the product directory +// - versions: List of version identifiers to compare +// - generateDiff: If true, generate unified diff for differences +// - verbose: If true, show detailed processing information +// +// Returns: +// - *ComparisonResult: The comparison result +// - error: Any error encountered during comparison +func CompareVersions(referenceFile, productDir string, versions []string, generateDiff bool, verbose bool) (*ComparisonResult, error) { + if verbose { + fmt.Printf("Comparing file across %d versions...\n", len(versions)) + fmt.Printf(" Reference file: %s\n", referenceFile) + fmt.Printf(" Product directory: %s\n", productDir) + fmt.Printf(" Versions: %v\n", versions) + } + + // Extract the reference version from the path + referenceVersion, err := ExtractVersionFromPath(referenceFile, productDir) + if err != nil { + return nil, fmt.Errorf("failed to extract version from reference file: %w", err) + } + + if verbose { + fmt.Printf(" Reference version: %s\n", referenceVersion) + } + + // Read the reference file + referenceContent, err := os.ReadFile(referenceFile) + if err != nil { + return nil, fmt.Errorf("failed to read reference file %s: %w", referenceFile, err) + } + + // Resolve version paths + versionPaths, err := ResolveVersionPaths(referenceFile, productDir, versions) + if err != nil { + return nil, fmt.Errorf("failed to resolve version paths: %w", err) + } + + // Initialize result + result := &ComparisonResult{ + ReferenceFile: referenceFile, + ReferenceVersion: referenceVersion, + TotalFiles: len(versionPaths), + } + + // Compare each version + for _, vp := range versionPaths { + if verbose { + fmt.Printf(" Checking %s: %s\n", vp.Version, vp.FilePath) + } + + comparison := compareFile(referenceFile, string(referenceContent), vp, generateDiff, verbose) + result.Comparisons = append(result.Comparisons, comparison) + + // Update counters + switch comparison.Status { + case FileMatches: + result.MatchingFiles++ + case FileDiffers: + result.DifferingFiles++ + case FileNotFound: + result.NotFoundFiles++ + case FileError: + result.ErrorFiles++ + } + } + + return result, nil +} + +// compareFile compares a single version file against the reference content. +// +// This is an internal helper function used by CompareVersions. +// +// Parameters: +// - referencePath: Path to the reference file (for diff labels) +// - referenceContent: Content of the reference file +// - versionPath: The version path to compare +// - generateDiff: If true, generate unified diff for differences +// - verbose: If true, show detailed processing information +// +// Returns: +// - FileComparison: The comparison result for this file +func compareFile(referencePath, referenceContent string, versionPath VersionPath, generateDiff bool, verbose bool) FileComparison { + comparison := FileComparison{ + Version: versionPath.Version, + FilePath: versionPath.FilePath, + } + + // Check if file exists + if _, err := os.Stat(versionPath.FilePath); os.IsNotExist(err) { + comparison.Status = FileNotFound + if verbose { + fmt.Printf(" → File not found\n") + } + return comparison + } + + // Read the file + content, err := os.ReadFile(versionPath.FilePath) + if err != nil { + comparison.Status = FileError + comparison.Error = fmt.Errorf("failed to read file: %w", err) + if verbose { + fmt.Printf(" → Error reading file: %v\n", err) + } + return comparison + } + + // Compare contents + if AreFilesIdentical(referenceContent, string(content)) { + comparison.Status = FileMatches + if verbose { + fmt.Printf(" → Matches\n") + } + } else { + comparison.Status = FileDiffers + if verbose { + fmt.Printf(" → Differs\n") + } + + if generateDiff { + diff, err := GenerateDiff(referencePath, referenceContent, versionPath.FilePath, string(content)) + if err != nil { + comparison.Status = FileError + comparison.Error = fmt.Errorf("failed to generate diff: %w", err) + if verbose { + fmt.Printf(" → Error generating diff: %v\n", err) + } + } else { + comparison.Diff = diff + } + } + } + + return comparison +} + diff --git a/audit-cli/commands/compare/file-contents/differ.go b/audit-cli/commands/compare/file-contents/differ.go new file mode 100644 index 0000000..7e11e70 --- /dev/null +++ b/audit-cli/commands/compare/file-contents/differ.go @@ -0,0 +1,81 @@ +package file_contents + +import ( + "github.com/aymanbagabas/go-udiff" +) + +// GenerateDiff generates a unified diff between two file contents. +// +// This function uses the Myers diff algorithm to compute the differences +// between two strings and formats the output as a unified diff. +// +// Parameters: +// - fromName: Name/label for the "from" file (e.g., "manual/source/file.rst") +// - fromContent: Content of the "from" file +// - toName: Name/label for the "to" file (e.g., "v8.0/source/file.rst") +// - toContent: Content of the "to" file +// +// Returns: +// - string: The unified diff output, or empty string if files are identical +// - error: Any error encountered during diff generation +func GenerateDiff(fromName, fromContent, toName, toContent string) (string, error) { + // If contents are identical, return empty string + if fromContent == toContent { + return "", nil + } + + // Generate unified diff using go-udiff + // This uses the default number of context lines (3) + diff := udiff.Unified(fromName, toName, fromContent, toContent) + + return diff, nil +} + +// GenerateDiffWithContext generates a unified diff with custom context lines. +// +// This function is similar to GenerateDiff but allows specifying the number +// of context lines to include around changes. +// +// Parameters: +// - fromName: Name/label for the "from" file +// - fromContent: Content of the "from" file +// - toName: Name/label for the "to" file +// - toContent: Content of the "to" file +// - contextLines: Number of context lines to show around changes (typically 3) +// +// Returns: +// - string: The unified diff output, or empty string if files are identical +// - error: Any error encountered during diff generation +func GenerateDiffWithContext(fromName, fromContent, toName, toContent string, contextLines int) (string, error) { + // If contents are identical, return empty string + if fromContent == toContent { + return "", nil + } + + // Compute edits + edits := udiff.Strings(fromContent, toContent) + + // Generate unified diff with custom context lines + // ToUnified returns a string directly + diff, err := udiff.ToUnified(fromName, toName, fromContent, edits, contextLines) + if err != nil { + return "", err + } + + return diff, nil +} + +// AreFilesIdentical checks if two file contents are identical. +// +// This is a simple byte-by-byte comparison. +// +// Parameters: +// - content1: First file content +// - content2: Second file content +// +// Returns: +// - bool: true if contents are identical, false otherwise +func AreFilesIdentical(content1, content2 string) bool { + return content1 == content2 +} + diff --git a/audit-cli/commands/compare/file-contents/file_contents.go b/audit-cli/commands/compare/file-contents/file_contents.go new file mode 100644 index 0000000..32a17bf --- /dev/null +++ b/audit-cli/commands/compare/file-contents/file_contents.go @@ -0,0 +1,197 @@ +// Package file_contents provides functionality for comparing file contents across versions. +// +// This package implements the "compare file-contents" subcommand, which compares +// file contents either directly between two files or across multiple versions of +// MongoDB documentation. +// +// The command supports two modes: +// 1. Direct comparison: Compare two specific files +// 2. Version comparison: Compare the same file across multiple versions +// +// Output can be progressively detailed: +// - Default: Summary of differences +// - --show-paths: Include file paths +// - --show-diff: Include unified diffs +package file_contents + +import ( + "fmt" + "strings" + + "github.com/spf13/cobra" +) + +// NewFileContentsCommand creates the file-contents subcommand. +// +// This command compares file contents either directly between two files +// or across multiple versions of documentation. +// +// Usage modes: +// 1. Direct comparison: +// compare file-contents file1.rst file2.rst +// +// 2. Version comparison: +// compare file-contents file.rst --product-dir /path/to/product --versions v1,v2,v3 +// +// Flags: +// - -p, --product-dir: Product directory path (required for version comparison) +// - -V, --versions: Comma-separated list of versions (required for version comparison) +// - --show-paths: Display file paths of files that differ +// - -d, --show-diff: Display unified diff output +// - -v, --verbose: Show detailed processing information +func NewFileContentsCommand() *cobra.Command { + var ( + productDir string + versions string + showPaths bool + showDiff bool + verbose bool + ) + + cmd := &cobra.Command{ + Use: "file-contents [file1] [file2]", + Short: "Compare file contents across versions or between two files", + Long: `Compare file contents to identify differences. + +This command supports two modes: + +1. Direct comparison (two file arguments): + Compare two specific files directly. + Example: compare file-contents file1.rst file2.rst + +2. Version comparison (one file argument + flags): + Compare the same file across multiple documentation versions. + Example: compare file-contents /path/to/manual/manual/source/file.rst \ + --product-dir /path/to/manual \ + --versions manual,upcoming,v8.1,v8.0 + +The command provides progressive output detail: + - Default: Summary of differences + - --show-paths: Include file paths grouped by status + - --show-diff: Include unified diffs (implies --show-paths) + +Files that don't exist in certain versions are reported separately and +do not cause errors.`, + Args: cobra.RangeArgs(1, 2), + RunE: func(cmd *cobra.Command, args []string) error { + return runCompare(args, productDir, versions, showPaths, showDiff, verbose) + }, + } + + cmd.Flags().StringVarP(&productDir, "product-dir", "p", "", "Product directory path (e.g., /path/to/manual)") + cmd.Flags().StringVarP(&versions, "versions", "V", "", "Comma-separated list of versions (e.g., manual,upcoming,v8.1)") + cmd.Flags().BoolVar(&showPaths, "show-paths", false, "Display file paths of files that differ") + cmd.Flags().BoolVarP(&showDiff, "show-diff", "d", false, "Display unified diff output") + cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Show detailed processing information") + + return cmd +} + +// runCompare executes the comparison operation. +// +// This function validates arguments and delegates to the appropriate +// comparison function based on the mode (direct or version comparison). +// +// Parameters: +// - args: Command line arguments (1 or 2 file paths) +// - productDir: Product directory path (for version comparison) +// - versions: Comma-separated version list (for version comparison) +// - showPaths: If true, show file paths +// - showDiff: If true, show diffs +// - verbose: If true, show detailed processing information +// +// Returns: +// - error: Any error encountered during comparison +func runCompare(args []string, productDir, versions string, showPaths, showDiff, verbose bool) error { + // Validate arguments based on mode + if len(args) == 2 { + // Direct comparison mode + if productDir != "" || versions != "" { + return fmt.Errorf("--product-dir and --versions cannot be used with two file arguments") + } + return runDirectComparison(args[0], args[1], showPaths, showDiff, verbose) + } else if len(args) == 1 { + // Version comparison mode + if productDir == "" { + return fmt.Errorf("--product-dir is required when comparing versions (use -p or --product-dir)") + } + if versions == "" { + return fmt.Errorf("--versions is required when comparing versions (use -V or --versions)") + } + return runVersionComparison(args[0], productDir, versions, showPaths, showDiff, verbose) + } + + return fmt.Errorf("expected 1 or 2 file arguments") +} + +// runDirectComparison performs a direct comparison between two files. +// +// Parameters: +// - file1: Path to the first file +// - file2: Path to the second file +// - showPaths: If true, show file paths +// - showDiff: If true, show diffs +// - verbose: If true, show detailed processing information +// +// Returns: +// - error: Any error encountered during comparison +func runDirectComparison(file1, file2 string, showPaths, showDiff, verbose bool) error { + result, err := CompareFiles(file1, file2, showDiff, verbose) + if err != nil { + return fmt.Errorf("comparison failed: %w", err) + } + + PrintComparisonResult(result, showPaths, showDiff) + return nil +} + +// runVersionComparison performs a version-based comparison. +// +// Parameters: +// - referenceFile: Path to the reference file +// - productDir: Product directory path +// - versionsStr: Comma-separated version list +// - showPaths: If true, show file paths +// - showDiff: If true, show diffs +// - verbose: If true, show detailed processing information +// +// Returns: +// - error: Any error encountered during comparison +func runVersionComparison(referenceFile, productDir, versionsStr string, showPaths, showDiff, verbose bool) error { + // Parse versions + versionList := parseVersions(versionsStr) + if len(versionList) == 0 { + return fmt.Errorf("no versions specified") + } + + result, err := CompareVersions(referenceFile, productDir, versionList, showDiff, verbose) + if err != nil { + return fmt.Errorf("comparison failed: %w", err) + } + + PrintComparisonResult(result, showPaths, showDiff) + return nil +} + +// parseVersions parses a comma-separated version string into a slice. +// +// This function splits the version string by commas and trims whitespace +// from each version identifier. +// +// Parameters: +// - versionsStr: Comma-separated version string (e.g., "manual, upcoming, v8.1") +// +// Returns: +// - []string: List of version identifiers +func parseVersions(versionsStr string) []string { + parts := strings.Split(versionsStr, ",") + var versions []string + for _, part := range parts { + trimmed := strings.TrimSpace(part) + if trimmed != "" { + versions = append(versions, trimmed) + } + } + return versions +} + diff --git a/audit-cli/commands/compare/file-contents/file_contents_test.go b/audit-cli/commands/compare/file-contents/file_contents_test.go new file mode 100644 index 0000000..2f9cb4a --- /dev/null +++ b/audit-cli/commands/compare/file-contents/file_contents_test.go @@ -0,0 +1,535 @@ +package file_contents + +import ( + "strings" + "testing" +) + +// TestCompareFiles tests direct file comparison +func TestCompareFiles(t *testing.T) { + testDataDir := "../../../testdata/compare" + + tests := []struct { + name string + file1 string + file2 string + generateDiff bool + expectError bool + expectDiff bool + expectMatching bool + }{ + { + name: "different files without diff", + file1: testDataDir + "/file1.txt", + file2: testDataDir + "/file2.txt", + generateDiff: false, + expectError: false, + expectDiff: true, + expectMatching: false, + }, + { + name: "different files with diff", + file1: testDataDir + "/file1.txt", + file2: testDataDir + "/file2.txt", + generateDiff: true, + expectError: false, + expectDiff: true, + expectMatching: false, + }, + { + name: "identical files", + file1: testDataDir + "/identical1.txt", + file2: testDataDir + "/identical2.txt", + generateDiff: false, + expectError: false, + expectDiff: false, + expectMatching: true, + }, + { + name: "nonexistent file", + file1: testDataDir + "/file1.txt", + file2: testDataDir + "/nonexistent.txt", + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := CompareFiles(tt.file1, tt.file2, tt.generateDiff, false) + + if tt.expectError { + if err == nil { + t.Errorf("expected error but got none") + } + return + } + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result == nil { + t.Fatal("expected result but got nil") + } + + if tt.expectMatching && result.MatchingFiles != 1 { + t.Errorf("expected 1 matching file, got %d", result.MatchingFiles) + } + + if tt.expectDiff && result.DifferingFiles != 1 { + t.Errorf("expected 1 differing file, got %d", result.DifferingFiles) + } + + if tt.generateDiff && tt.expectDiff { + if len(result.Comparisons) == 0 { + t.Fatal("expected comparisons but got none") + } + if result.Comparisons[0].Diff == "" { + t.Error("expected diff output but got empty string") + } + } + }) + } +} + +// TestCompareVersions tests version-based comparison +func TestCompareVersions(t *testing.T) { + testDataDir := "../../../testdata/compare" + + tests := []struct { + name string + referenceFile string + productDir string + versions []string + generateDiff bool + expectError bool + expectMatching int + expectDiffering int + expectNotFound int + }{ + { + name: "compare across three versions", + referenceFile: testDataDir + "/product/manual/source/includes/example.rst", + productDir: testDataDir + "/product", + versions: []string{"manual", "upcoming", "v8.0"}, + generateDiff: false, + expectError: false, + expectMatching: 1, // manual matches itself + expectDiffering: 2, // upcoming and v8.0 differ + expectNotFound: 0, + }, + { + name: "compare with diff generation", + referenceFile: testDataDir + "/product/manual/source/includes/example.rst", + productDir: testDataDir + "/product", + versions: []string{"manual", "upcoming"}, + generateDiff: true, + expectError: false, + expectMatching: 1, + expectDiffering: 1, + expectNotFound: 0, + }, + { + name: "file not found in some versions", + referenceFile: testDataDir + "/product/manual/source/includes/new-feature.rst", + productDir: testDataDir + "/product", + versions: []string{"manual", "upcoming", "v8.0"}, + generateDiff: false, + expectError: false, + expectMatching: 2, // manual and upcoming match + expectDiffering: 0, + expectNotFound: 1, // v8.0 doesn't have this file + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := CompareVersions(tt.referenceFile, tt.productDir, tt.versions, tt.generateDiff, false) + + if tt.expectError { + if err == nil { + t.Errorf("expected error but got none") + } + return + } + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result == nil { + t.Fatal("expected result but got nil") + } + + if result.MatchingFiles != tt.expectMatching { + t.Errorf("expected %d matching files, got %d", tt.expectMatching, result.MatchingFiles) + } + + if result.DifferingFiles != tt.expectDiffering { + t.Errorf("expected %d differing files, got %d", tt.expectDiffering, result.DifferingFiles) + } + + if result.NotFoundFiles != tt.expectNotFound { + t.Errorf("expected %d not found files, got %d", tt.expectNotFound, result.NotFoundFiles) + } + + if result.TotalFiles != len(tt.versions) { + t.Errorf("expected %d total files, got %d", len(tt.versions), result.TotalFiles) + } + + // Verify diff generation if requested + if tt.generateDiff && tt.expectDiffering > 0 { + foundDiff := false + for _, comp := range result.Comparisons { + if comp.Status == FileDiffers && comp.Diff != "" { + foundDiff = true + break + } + } + if !foundDiff { + t.Error("expected diff output but none found") + } + } + }) + } +} + +// TestResolveVersionPaths tests version path resolution +func TestResolveVersionPaths(t *testing.T) { + testDataDir := "../../../testdata/compare" + + tests := []struct { + name string + referenceFile string + productDir string + versions []string + expectError bool + expectedPaths map[string]string // version -> expected path suffix + }{ + { + name: "resolve paths for multiple versions", + referenceFile: testDataDir + "/product/manual/source/includes/example.rst", + productDir: testDataDir + "/product", + versions: []string{"manual", "upcoming", "v8.0"}, + expectError: false, + expectedPaths: map[string]string{ + "manual": "manual/source/includes/example.rst", + "upcoming": "upcoming/source/includes/example.rst", + "v8.0": "v8.0/source/includes/example.rst", + }, + }, + { + name: "file not under product dir", + referenceFile: "/some/other/path/file.rst", + productDir: testDataDir + "/product", + versions: []string{"manual"}, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + paths, err := ResolveVersionPaths(tt.referenceFile, tt.productDir, tt.versions) + + if tt.expectError { + if err == nil { + t.Errorf("expected error but got none") + } + return + } + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(paths) != len(tt.versions) { + t.Fatalf("expected %d paths, got %d", len(tt.versions), len(paths)) + } + + for _, vp := range paths { + expectedSuffix, ok := tt.expectedPaths[vp.Version] + if !ok { + t.Errorf("unexpected version: %s", vp.Version) + continue + } + + if !strings.HasSuffix(vp.FilePath, expectedSuffix) { + t.Errorf("expected path to end with %s, got %s", expectedSuffix, vp.FilePath) + } + } + }) + } +} + +// TestExtractVersionFromPath tests version extraction from file paths +func TestExtractVersionFromPath(t *testing.T) { + testDataDir := "../../../testdata/compare" + + tests := []struct { + name string + filePath string + productDir string + expectedVersion string + expectError bool + }{ + { + name: "extract manual version", + filePath: testDataDir + "/product/manual/source/includes/example.rst", + productDir: testDataDir + "/product", + expectedVersion: "manual", + expectError: false, + }, + { + name: "extract v8.0 version", + filePath: testDataDir + "/product/v8.0/source/includes/example.rst", + productDir: testDataDir + "/product", + expectedVersion: "v8.0", + expectError: false, + }, + { + name: "file not under product dir", + filePath: "/some/other/path/file.rst", + productDir: testDataDir + "/product", + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + version, err := ExtractVersionFromPath(tt.filePath, tt.productDir) + + if tt.expectError { + if err == nil { + t.Errorf("expected error but got none") + } + return + } + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if version != tt.expectedVersion { + t.Errorf("expected version %s, got %s", tt.expectedVersion, version) + } + }) + } +} + +// TestGenerateDiff tests unified diff generation +func TestGenerateDiff(t *testing.T) { + tests := []struct { + name string + fromName string + fromContent string + toName string + toContent string + expectEmpty bool + }{ + { + name: "identical content", + fromName: "file1.txt", + fromContent: "Line 1\nLine 2\n", + toName: "file2.txt", + toContent: "Line 1\nLine 2\n", + expectEmpty: true, + }, + { + name: "different content", + fromName: "file1.txt", + fromContent: "Line 1\nLine 2\n", + toName: "file2.txt", + toContent: "Line 1\nLine 2 modified\n", + expectEmpty: false, + }, + { + name: "added lines", + fromName: "file1.txt", + fromContent: "Line 1\n", + toName: "file2.txt", + toContent: "Line 1\nLine 2\n", + expectEmpty: false, + }, + { + name: "removed lines", + fromName: "file1.txt", + fromContent: "Line 1\nLine 2\n", + toName: "file2.txt", + toContent: "Line 1\n", + expectEmpty: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + diff, err := GenerateDiff(tt.fromName, tt.fromContent, tt.toName, tt.toContent) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if tt.expectEmpty { + if diff != "" { + t.Errorf("expected empty diff but got: %s", diff) + } + } else { + if diff == "" { + t.Error("expected non-empty diff but got empty string") + } + // Verify it's a unified diff format + if !strings.Contains(diff, "---") || !strings.Contains(diff, "+++") { + t.Errorf("expected unified diff format but got: %s", diff) + } + } + }) + } +} + +// TestAreFilesIdentical tests file identity checking +func TestAreFilesIdentical(t *testing.T) { + tests := []struct { + name string + content1 string + content2 string + identical bool + }{ + { + name: "identical content", + content1: "Hello, world!\n", + content2: "Hello, world!\n", + identical: true, + }, + { + name: "different content", + content1: "Hello, world!\n", + content2: "Hello, Go!\n", + identical: false, + }, + { + name: "empty strings", + content1: "", + content2: "", + identical: true, + }, + { + name: "whitespace difference", + content1: "Hello\n", + content2: "Hello \n", + identical: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := AreFilesIdentical(tt.content1, tt.content2) + if result != tt.identical { + t.Errorf("expected %v but got %v", tt.identical, result) + } + }) + } +} + +// TestComparisonResultMethods tests ComparisonResult helper methods +func TestComparisonResultMethods(t *testing.T) { + t.Run("HasDifferences", func(t *testing.T) { + result := &ComparisonResult{ + DifferingFiles: 1, + } + if !result.HasDifferences() { + t.Error("expected HasDifferences to return true") + } + + result.DifferingFiles = 0 + if result.HasDifferences() { + t.Error("expected HasDifferences to return false") + } + }) + + t.Run("AllMatch", func(t *testing.T) { + result := &ComparisonResult{ + MatchingFiles: 3, + DifferingFiles: 0, + ErrorFiles: 0, + } + if !result.AllMatch() { + t.Error("expected AllMatch to return true") + } + + result.DifferingFiles = 1 + if result.AllMatch() { + t.Error("expected AllMatch to return false when files differ") + } + + result.DifferingFiles = 0 + result.ErrorFiles = 1 + if result.AllMatch() { + t.Error("expected AllMatch to return false when errors exist") + } + + result.ErrorFiles = 0 + result.MatchingFiles = 0 + if result.AllMatch() { + t.Error("expected AllMatch to return false when no matching files") + } + }) +} + +// TestParseVersions tests version string parsing +func TestParseVersions(t *testing.T) { + tests := []struct { + name string + versionsStr string + expectedCount int + expectedVersion []string + }{ + { + name: "single version", + versionsStr: "manual", + expectedCount: 1, + expectedVersion: []string{"manual"}, + }, + { + name: "multiple versions", + versionsStr: "manual,upcoming,v8.0", + expectedCount: 3, + expectedVersion: []string{"manual", "upcoming", "v8.0"}, + }, + { + name: "versions with spaces", + versionsStr: "manual, upcoming, v8.0", + expectedCount: 3, + expectedVersion: []string{"manual", "upcoming", "v8.0"}, + }, + { + name: "empty string", + versionsStr: "", + expectedCount: 0, + expectedVersion: []string{}, + }, + { + name: "trailing comma", + versionsStr: "manual,upcoming,", + expectedCount: 2, + expectedVersion: []string{"manual", "upcoming"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + versions := parseVersions(tt.versionsStr) + + if len(versions) != tt.expectedCount { + t.Errorf("expected %d versions, got %d", tt.expectedCount, len(versions)) + } + + for i, expected := range tt.expectedVersion { + if i >= len(versions) { + t.Errorf("missing expected version: %s", expected) + continue + } + if versions[i] != expected { + t.Errorf("expected version %s at index %d, got %s", expected, i, versions[i]) + } + } + }) + } +} diff --git a/audit-cli/commands/compare/file-contents/output.go b/audit-cli/commands/compare/file-contents/output.go new file mode 100644 index 0000000..d9db245 --- /dev/null +++ b/audit-cli/commands/compare/file-contents/output.go @@ -0,0 +1,197 @@ +package file_contents + +import ( + "fmt" + "strings" +) + +// PrintComparisonResult prints the comparison result with progressive detail levels. +// +// The output format depends on the flags: +// - Default: Summary only +// - showPaths: Summary + file paths +// - showDiff: Summary + paths + diffs +// +// Parameters: +// - result: The comparison result to print +// - showPaths: If true, show file paths +// - showDiff: If true, show diffs (implies showPaths) +func PrintComparisonResult(result *ComparisonResult, showPaths bool, showDiff bool) { + // If showDiff is true, we also need to show paths + if showDiff { + showPaths = true + } + + // Print summary + printSummary(result) + + // Print paths if requested + if showPaths { + fmt.Println() + printPaths(result) + } + + // Print diffs if requested + if showDiff { + fmt.Println() + printDiffs(result) + } +} + +// printSummary prints a summary of the comparison results. +func printSummary(result *ComparisonResult) { + if result.ReferenceVersion != "" { + // Version comparison mode + fmt.Printf("Comparing file across %d versions...\n", result.TotalFiles) + } else { + // Direct comparison mode + fmt.Println("Comparing files...") + } + + if result.AllMatch() { + // All files match + fmt.Printf("✓ All versions match (%d/%d files identical)\n", result.MatchingFiles, result.TotalFiles) + } else if result.HasDifferences() { + // Some files differ + fmt.Printf("⚠ Differences found: %d of %d versions differ", result.DifferingFiles, result.TotalFiles) + if result.ReferenceVersion != "" { + fmt.Printf(" from %s\n", result.ReferenceVersion) + } else { + fmt.Println() + } + + // Show breakdown + if result.MatchingFiles > 0 { + fmt.Printf(" - %d version(s) match\n", result.MatchingFiles) + } + if result.DifferingFiles > 0 { + fmt.Printf(" - %d version(s) differ\n", result.DifferingFiles) + } + if result.NotFoundFiles > 0 { + fmt.Printf(" - %d version(s) not found (file does not exist)\n", result.NotFoundFiles) + } + if result.ErrorFiles > 0 { + fmt.Printf(" - %d version(s) had errors\n", result.ErrorFiles) + } + + // Show hints + fmt.Println() + fmt.Println("Use --show-paths to see which files differ") + fmt.Println("Use --show-diff to see the differences") + } else if result.NotFoundFiles > 0 || result.ErrorFiles > 0 { + // No differences, but some files not found or had errors + fmt.Printf("✓ No differences found among existing files\n") + if result.NotFoundFiles > 0 { + fmt.Printf(" - %d version(s) not found (file does not exist)\n", result.NotFoundFiles) + } + if result.ErrorFiles > 0 { + fmt.Printf(" - %d version(s) had errors\n", result.ErrorFiles) + } + } +} + +// printPaths prints the file paths grouped by status. +func printPaths(result *ComparisonResult) { + // Group comparisons by status + var matching, differing, notFound, errors []FileComparison + for _, comp := range result.Comparisons { + switch comp.Status { + case FileMatches: + matching = append(matching, comp) + case FileDiffers: + differing = append(differing, comp) + case FileNotFound: + notFound = append(notFound, comp) + case FileError: + errors = append(errors, comp) + } + } + + // Print matching files + if len(matching) > 0 { + fmt.Println("Files that match:") + for _, comp := range matching { + if comp.Version == result.ReferenceVersion { + fmt.Printf(" ✓ %s (reference)\n", comp.FilePath) + } else { + fmt.Printf(" ✓ %s\n", comp.FilePath) + } + } + } + + // Print differing files + if len(differing) > 0 { + if len(matching) > 0 { + fmt.Println() + } + fmt.Println("Files that differ:") + for _, comp := range differing { + fmt.Printf(" ✗ %s\n", comp.FilePath) + } + } + + // Print not found files + if len(notFound) > 0 { + if len(matching) > 0 || len(differing) > 0 { + fmt.Println() + } + fmt.Println("Files not found:") + for _, comp := range notFound { + fmt.Printf(" - %s\n", comp.FilePath) + } + } + + // Print error files + if len(errors) > 0 { + if len(matching) > 0 || len(differing) > 0 || len(notFound) > 0 { + fmt.Println() + } + fmt.Println("Files with errors:") + for _, comp := range errors { + fmt.Printf(" ⚠ %s: %v\n", comp.FilePath, comp.Error) + } + } +} + +// printDiffs prints the unified diffs for files that differ. +func printDiffs(result *ComparisonResult) { + // Find files with diffs + var diffsToShow []FileComparison + for _, comp := range result.Comparisons { + if comp.Status == FileDiffers && comp.Diff != "" { + diffsToShow = append(diffsToShow, comp) + } + } + + if len(diffsToShow) == 0 { + return + } + + fmt.Println("Diffs:") + fmt.Println(strings.Repeat("=", 80)) + + for i, comp := range diffsToShow { + if i > 0 { + fmt.Println() + } + + // Print header + if result.ReferenceVersion != "" { + fmt.Printf("Diff: %s vs %s\n", result.ReferenceVersion, comp.Version) + } else { + fmt.Printf("Diff: %s\n", comp.Version) + } + fmt.Println(strings.Repeat("-", 80)) + + // Print the diff + fmt.Print(comp.Diff) + + // Ensure there's a newline at the end + if !strings.HasSuffix(comp.Diff, "\n") { + fmt.Println() + } + } + + fmt.Println(strings.Repeat("=", 80)) +} + diff --git a/audit-cli/commands/compare/file-contents/types.go b/audit-cli/commands/compare/file-contents/types.go new file mode 100644 index 0000000..85c95d6 --- /dev/null +++ b/audit-cli/commands/compare/file-contents/types.go @@ -0,0 +1,77 @@ +// Package file_contents provides functionality for comparing file contents across versions. +package file_contents + +// FileStatus represents the status of a file in a comparison. +type FileStatus int + +const ( + // FileMatches indicates the file content matches the reference file + FileMatches FileStatus = iota + // FileDiffers indicates the file content differs from the reference file + FileDiffers + // FileNotFound indicates the file does not exist at the expected path + FileNotFound + // FileError indicates an error occurred while reading the file + FileError +) + +// String returns a string representation of the FileStatus. +func (s FileStatus) String() string { + switch s { + case FileMatches: + return "matches" + case FileDiffers: + return "differs" + case FileNotFound: + return "not found" + case FileError: + return "error" + default: + return "unknown" + } +} + +// FileComparison represents the comparison result for a single file. +type FileComparison struct { + // Version is the version identifier (e.g., "v8.0", "upcoming") + Version string + // FilePath is the absolute path to the file + FilePath string + // Status is the comparison status + Status FileStatus + // Error is any error encountered (only set if Status == FileError) + Error error + // Diff is the unified diff output (only set if Status == FileDiffers and diff was requested) + Diff string +} + +// ComparisonResult represents the overall comparison result. +type ComparisonResult struct { + // ReferenceFile is the path to the reference file being compared against + ReferenceFile string + // ReferenceVersion is the version of the reference file (empty for direct comparison) + ReferenceVersion string + // Comparisons is the list of file comparisons + Comparisons []FileComparison + // TotalFiles is the total number of files compared + TotalFiles int + // MatchingFiles is the number of files that match + MatchingFiles int + // DifferingFiles is the number of files that differ + DifferingFiles int + // NotFoundFiles is the number of files not found + NotFoundFiles int + // ErrorFiles is the number of files with errors + ErrorFiles int +} + +// HasDifferences returns true if any files differ from the reference. +func (r *ComparisonResult) HasDifferences() bool { + return r.DifferingFiles > 0 +} + +// AllMatch returns true if all files match the reference (excluding not found files). +func (r *ComparisonResult) AllMatch() bool { + return r.DifferingFiles == 0 && r.ErrorFiles == 0 && r.MatchingFiles > 0 +} + diff --git a/audit-cli/commands/compare/file-contents/version_resolver.go b/audit-cli/commands/compare/file-contents/version_resolver.go new file mode 100644 index 0000000..4f42d55 --- /dev/null +++ b/audit-cli/commands/compare/file-contents/version_resolver.go @@ -0,0 +1,160 @@ +package file_contents + +import ( + "fmt" + "path/filepath" + "strings" +) + +// VersionPath represents a resolved file path for a specific version. +type VersionPath struct { + Version string + FilePath string +} + +// ResolveVersionPaths resolves file paths for all specified versions. +// +// Given a reference file path and a list of versions, this function constructs +// the corresponding file paths for each version by replacing the version segment +// in the path. +// +// Example: +// Input: /path/to/manual/manual/source/includes/file.rst +// Versions: [manual, upcoming, v8.1, v8.0] +// Output: +// - manual: /path/to/manual/manual/source/includes/file.rst +// - upcoming: /path/to/manual/upcoming/source/includes/file.rst +// - v8.1: /path/to/manual/v8.1/source/includes/file.rst +// - v8.0: /path/to/manual/v8.0/source/includes/file.rst +// +// Parameters: +// - referenceFile: The absolute path to the reference file +// - productDir: The absolute path to the product directory (e.g., /path/to/manual) +// - versions: List of version identifiers +// +// Returns: +// - []VersionPath: List of resolved version paths +// - error: Any error encountered during resolution +func ResolveVersionPaths(referenceFile string, productDir string, versions []string) ([]VersionPath, error) { + // Clean the paths + referenceFile = filepath.Clean(referenceFile) + productDir = filepath.Clean(productDir) + + // Ensure productDir ends with a separator for proper prefix matching + if !strings.HasSuffix(productDir, string(filepath.Separator)) { + productDir += string(filepath.Separator) + } + + // Check if referenceFile is under productDir + if !strings.HasPrefix(referenceFile, productDir) { + return nil, fmt.Errorf("reference file %s is not under product directory %s", referenceFile, productDir) + } + + // Extract the relative path from productDir + relativePath := strings.TrimPrefix(referenceFile, productDir) + + // Find the version segment and the path after it + // Expected format: {version}/source/{rest-of-path} + parts := strings.Split(relativePath, string(filepath.Separator)) + if len(parts) < 2 { + return nil, fmt.Errorf("invalid file path structure: expected {version}/source/... format, got %s", relativePath) + } + + // Find the "source" directory + sourceIndex := -1 + for i, part := range parts { + if part == "source" { + sourceIndex = i + break + } + } + + if sourceIndex == -1 { + return nil, fmt.Errorf("could not find 'source' directory in path: %s", relativePath) + } + + if sourceIndex == 0 { + return nil, fmt.Errorf("invalid path structure: 'source' cannot be the first segment in %s", relativePath) + } + + // The version is the segment before "source" + // Everything from "source" onwards is the path we want to preserve + pathFromSource := strings.Join(parts[sourceIndex:], string(filepath.Separator)) + + // Build version paths + var versionPaths []VersionPath + for _, version := range versions { + versionPath := filepath.Join(productDir, version, pathFromSource) + versionPaths = append(versionPaths, VersionPath{ + Version: version, + FilePath: versionPath, + }) + } + + return versionPaths, nil +} + +// ExtractVersionFromPath extracts the version identifier from a file path. +// +// Given a file path under a product directory, this function extracts the +// version segment (the directory name before "source"). +// +// Example: +// Input: /path/to/manual/v8.0/source/includes/file.rst +// Product Dir: /path/to/manual +// Output: v8.0 +// +// Parameters: +// - filePath: The absolute path to the file +// - productDir: The absolute path to the product directory +// +// Returns: +// - string: The version identifier +// - error: Any error encountered during extraction +func ExtractVersionFromPath(filePath string, productDir string) (string, error) { + // Clean the paths + filePath = filepath.Clean(filePath) + productDir = filepath.Clean(productDir) + + // Ensure productDir ends with a separator for proper prefix matching + if !strings.HasSuffix(productDir, string(filepath.Separator)) { + productDir += string(filepath.Separator) + } + + // Check if filePath is under productDir + if !strings.HasPrefix(filePath, productDir) { + return "", fmt.Errorf("file path %s is not under product directory %s", filePath, productDir) + } + + // Extract the relative path from productDir + relativePath := strings.TrimPrefix(filePath, productDir) + + // Split into parts + parts := strings.Split(relativePath, string(filepath.Separator)) + if len(parts) < 2 { + return "", fmt.Errorf("invalid file path structure: expected {version}/source/... format, got %s", relativePath) + } + + // Find the "source" directory + sourceIndex := -1 + for i, part := range parts { + if part == "source" { + sourceIndex = i + break + } + } + + if sourceIndex == -1 { + return "", fmt.Errorf("could not find 'source' directory in path: %s", relativePath) + } + + if sourceIndex == 0 { + return "", fmt.Errorf("invalid path structure: 'source' cannot be the first segment in %s", relativePath) + } + + // The version is the segment before "source" + version := parts[sourceIndex-1] + + return version, nil +} + diff --git a/audit-cli/go.mod b/audit-cli/go.mod index 96d2b7c..788992d 100644 --- a/audit-cli/go.mod +++ b/audit-cli/go.mod @@ -2,7 +2,10 @@ module github.com/mongodb/code-example-tooling/audit-cli go 1.24 -require github.com/spf13/cobra v1.10.1 +require ( + github.com/aymanbagabas/go-udiff v0.3.1 + github.com/spf13/cobra v1.10.1 +) require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect diff --git a/audit-cli/go.sum b/audit-cli/go.sum index 989827e..ce2736b 100644 --- a/audit-cli/go.sum +++ b/audit-cli/go.sum @@ -1,3 +1,5 @@ +github.com/aymanbagabas/go-udiff v0.3.1 h1:LV+qyBQ2pqe0u42ZsUEtPiCaUoqgA9gYRDs3vj1nolY= +github.com/aymanbagabas/go-udiff v0.3.1/go.mod h1:G0fsKmG+P6ylD0r6N/KgQD/nWzgfnl8ZBcNLgcbrw8E= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= diff --git a/audit-cli/main.go b/audit-cli/main.go index cd688f3..cad9975 100644 --- a/audit-cli/main.go +++ b/audit-cli/main.go @@ -10,10 +10,13 @@ // - find-string: Search for substrings in extracted files // - analyze: Analyze RST file structures // - includes: Analyze include directive relationships +// - compare: Compare files across different versions +// - file-contents: Compare file contents across versions package main import ( "github.com/mongodb/code-example-tooling/audit-cli/commands/analyze" + "github.com/mongodb/code-example-tooling/audit-cli/commands/compare" "github.com/mongodb/code-example-tooling/audit-cli/commands/extract" "github.com/mongodb/code-example-tooling/audit-cli/commands/search" "github.com/spf13/cobra" @@ -34,6 +37,7 @@ with special handling for MongoDB documentation conventions.`, rootCmd.AddCommand(extract.NewExtractCommand()) rootCmd.AddCommand(search.NewSearchCommand()) rootCmd.AddCommand(analyze.NewAnalyzeCommand()) + rootCmd.AddCommand(compare.NewCompareCommand()) rootCmd.Execute() } diff --git a/audit-cli/testdata/compare/file1.txt b/audit-cli/testdata/compare/file1.txt new file mode 100644 index 0000000..c4d290d --- /dev/null +++ b/audit-cli/testdata/compare/file1.txt @@ -0,0 +1,4 @@ +Line 1 +Line 2 +Line 3 + diff --git a/audit-cli/testdata/compare/file2.txt b/audit-cli/testdata/compare/file2.txt new file mode 100644 index 0000000..90f6207 --- /dev/null +++ b/audit-cli/testdata/compare/file2.txt @@ -0,0 +1,5 @@ +Line 1 +Line 2 modified +Line 3 +Line 4 + diff --git a/audit-cli/testdata/compare/identical1.txt b/audit-cli/testdata/compare/identical1.txt new file mode 100644 index 0000000..2e25982 --- /dev/null +++ b/audit-cli/testdata/compare/identical1.txt @@ -0,0 +1,4 @@ +Identical content +Line 2 +Line 3 + diff --git a/audit-cli/testdata/compare/identical2.txt b/audit-cli/testdata/compare/identical2.txt new file mode 100644 index 0000000..2e25982 --- /dev/null +++ b/audit-cli/testdata/compare/identical2.txt @@ -0,0 +1,4 @@ +Identical content +Line 2 +Line 3 + diff --git a/audit-cli/testdata/compare/product/manual/source/includes/example.rst b/audit-cli/testdata/compare/product/manual/source/includes/example.rst new file mode 100644 index 0000000..313e078 --- /dev/null +++ b/audit-cli/testdata/compare/product/manual/source/includes/example.rst @@ -0,0 +1,40 @@ +.. _example-reference: + +================= +Example Document +================= + +This is an example RST file for testing the compare command. + +Introduction +------------ + +MongoDB is a document database designed for ease of application development and scaling. + +Features +-------- + +- Document-oriented storage +- Full index support +- Replication and high availability +- Auto-sharding +- Rich queries +- Fast in-place updates +- Professional support by MongoDB + +Code Example +------------ + +.. code-block:: javascript + + db.collection.insertOne({ + name: "John Doe", + age: 30, + status: "active" + }) + +Conclusion +---------- + +This concludes the example document. + diff --git a/audit-cli/testdata/compare/product/manual/source/includes/new-feature.rst b/audit-cli/testdata/compare/product/manual/source/includes/new-feature.rst new file mode 100644 index 0000000..00d5e49 --- /dev/null +++ b/audit-cli/testdata/compare/product/manual/source/includes/new-feature.rst @@ -0,0 +1,13 @@ +.. _new-feature: + +=========== +New Feature +=========== + +This feature only exists in manual and upcoming versions. + +Description +----------- + +This is a new feature that was added in recent versions. + diff --git a/audit-cli/testdata/compare/product/upcoming/source/includes/example.rst b/audit-cli/testdata/compare/product/upcoming/source/includes/example.rst new file mode 100644 index 0000000..de6b7cb --- /dev/null +++ b/audit-cli/testdata/compare/product/upcoming/source/includes/example.rst @@ -0,0 +1,42 @@ +.. _example-reference: + +================= +Example Document +================= + +This is an example RST file for testing the compare command. + +Introduction +------------ + +MongoDB is a document database designed for ease of application development and scaling. + +Features +-------- + +- Document-oriented storage +- Full index support +- Replication and high availability +- Auto-sharding +- Rich queries +- Fast in-place updates +- Professional support by MongoDB +- New feature in upcoming version + +Code Example +------------ + +.. code-block:: javascript + + db.collection.insertOne({ + name: "John Doe", + age: 30, + status: "active", + version: "upcoming" + }) + +Conclusion +---------- + +This concludes the example document with updates for the upcoming version. + diff --git a/audit-cli/testdata/compare/product/upcoming/source/includes/new-feature.rst b/audit-cli/testdata/compare/product/upcoming/source/includes/new-feature.rst new file mode 100644 index 0000000..00d5e49 --- /dev/null +++ b/audit-cli/testdata/compare/product/upcoming/source/includes/new-feature.rst @@ -0,0 +1,13 @@ +.. _new-feature: + +=========== +New Feature +=========== + +This feature only exists in manual and upcoming versions. + +Description +----------- + +This is a new feature that was added in recent versions. + diff --git a/audit-cli/testdata/compare/product/v8.0/source/includes/example.rst b/audit-cli/testdata/compare/product/v8.0/source/includes/example.rst new file mode 100644 index 0000000..ce8892d --- /dev/null +++ b/audit-cli/testdata/compare/product/v8.0/source/includes/example.rst @@ -0,0 +1,38 @@ +.. _example-reference: + +================= +Example Document +================= + +This is an example RST file for testing the compare command. + +Introduction +------------ + +MongoDB is a document database designed for ease of application development and scaling. + +Features +-------- + +- Document-oriented storage +- Full index support +- Replication and high availability +- Auto-sharding +- Rich queries +- Fast in-place updates + +Code Example +------------ + +.. code-block:: javascript + + db.collection.insertOne({ + name: "John Doe", + age: 30 + }) + +Conclusion +---------- + +This concludes the example document. + From a4af8a79399e44e3ee9a1f508989807efc8c7d7b Mon Sep 17 00:00:00 2001 From: dacharyc Date: Tue, 21 Oct 2025 09:45:28 -0400 Subject: [PATCH 7/7] Apply suggestions from review --- audit-cli/README.md | 159 ++++++++++++++++++++++++++++++++------------ audit-cli/main.go | 13 ++-- 2 files changed, 124 insertions(+), 48 deletions(-) diff --git a/audit-cli/README.md b/audit-cli/README.md index ee4e53d..8b2cb50 100644 --- a/audit-cli/README.md +++ b/audit-cli/README.md @@ -55,7 +55,7 @@ The CLI is organized into parent commands with subcommands: audit-cli ├── extract # Extract content from RST files │ └── code-examples -├── search # Search through extracted content +├── search # Search through extracted content or source files │ └── find-string ├── analyze # Analyze RST file structures │ └── includes @@ -67,7 +67,18 @@ audit-cli #### `extract code-examples` -Extract code examples from reStructuredText files into individual files. +Extract code examples from reStructuredText files into individual files. For details about what code example directives +are supported and how, refer to the [Supported rST Directives - Code Example Extraction](#code-example-extraction) +section below. + +**Use Cases:** + +This command helps writers: +- Examine all the code examples that make up a specific page or section +- Split out code examples into individual files for migration to test infrastructure +- Report on the number of code examples by language +- Report on the number of code examples by directive type +- Use additional commands, such as search, to find strings within specific code examples **Basic Usage:** @@ -97,8 +108,16 @@ Extract code examples from reStructuredText files into individual files. **Flags:** - `-o, --output ` - Output directory for extracted files (default: `./output`) -- `-r, --recursive` - Recursively scan directories for RST files -- `-f, --follow-includes` - Follow `.. include::` directives in RST files +- `-r, --recursive` - Recursively scan directories for RST files. If you do not provide this flag, the tool will only + extract code examples from the top-level RST file. If you do provide this flag, the tool will recursively scan all + subdirectories for RST files and extract code examples from all files. +- `-f, --follow-includes` - Follow `.. include::` directives in RST files. If you do not provide this flag, the tool + will only extract code examples from the top-level RST file. If you do provide this flag, the tool will follow any + `.. include::` directives in the RST file and extract code examples from all included files. When combined with `-r`, + the tool will recursively scan all subdirectories for RST files and follow `.. include::` directives in all files. If + an include filepath is *outside* the input directory, the `-r` flag would not parse it, but the `-f` flag would + follow the include directive and parse the included file. This effectively lets you parse all the files that make up + a single page, if you start from the page's root `.txt` file. - `--dry-run` - Show what would be extracted without writing files - `-v, --verbose` - Show detailed processing information @@ -114,7 +133,7 @@ Examples: **Report:** -After extraction, a report is displayed showing: +After extraction, the code extraction report shows: - Number of files traversed - Number of output files written - Code examples by language @@ -130,7 +149,17 @@ Search through files for a specific substring. Can search through extracted code - **Case-insensitive** search (matches "curl", "CURL", "Curl", etc.) - **Exact word matching** (excludes partial matches like "curl" in "libcurl") -Use `--case-sensitive` to make the search case-sensitive, or `--partial-match` to allow matching the substring as part of larger words. +Use `--case-sensitive` to make the search case-sensitive, or `--partial-match` to allow matching the substring as part +of larger words. + +**Use Cases:** + +This command helps writers: +- Find specific strings across documentation files or pages + - Search for product names, command names, API methods, or other strings that may need to be updated +- Understand the number of references and impact of changes across documentation files or pages +- Identify files that need to be updated when a string needs to be changed +- Scope work related to specific changes **Basic Usage:** @@ -165,8 +194,16 @@ Use `--case-sensitive` to make the search case-sensitive, or `--partial-match` t **Flags:** -- `-r, --recursive` - Recursively search all files in subdirectories -- `-f, --follow-includes` - Follow `.. include::` directives in RST files +- `-r, --recursive` - Recursively scan directories for RST files. If you do not provide this flag, the tool will only + search within the top-level RST file or directory. If you do provide this flag, the tool will recursively scan all + subdirectories for RST files and search across all files. +- `-f, --follow-includes` - Follow `.. include::` directives in RST files. If you do not provide this flag, the tool + will search only the top-level RST file or directory. If you do provide this flag, the tool will follow any + `.. include::` directives in any RST file in the input path and search across all included files. When + combined with `-r`, the tool will recursively scan all subdirectories for RST files and follow `.. include::` directives + in all files. If an include filepath is *outside* the input directory, the `-r` flag would not parse it, but the `-f` + flag would follow the include directive and search the included file. This effectively lets you parse all the files + that make up a single page, if you start from the page's root `.txt` file. - `-v, --verbose` - Show file paths and language breakdown - `--case-sensitive` - Make search case-sensitive (default: case-insensitive) - `--partial-match` - Allow partial matches within words (default: exact word matching) @@ -185,7 +222,15 @@ With `-v` flag, also shows: #### `analyze includes` -Analyze include directive relationships in RST files to understand file dependencies. +Analyze `include` directive relationships in RST files to understand file dependencies. + +**Use Cases:** + +This command helps writers: +- Understand the impact of changes to widely-included files +- Identify circular include dependencies (files included multiple times) +- Document file relationships for maintenance +- Plan refactoring of complex include structures **Basic Usage:** @@ -230,17 +275,12 @@ Analyze include directive relationships in RST files to understand file dependen - Files listed in depth-first traversal order - Shows absolute paths to all files -**Use Cases:** - -This command helps writers: -- Understand the impact of changes to widely-included files -- Identify circular include dependencies (files included multiple times) -- Document file relationships for maintenance -- Plan refactoring of complex include structures - **Note on File Counting:** -The total file count represents **unique files** discovered through include directives. If a file is included multiple times (e.g., file A includes file C, and file B also includes file C), it is counted only once in the total. However, the tree view will show it in all locations where it appears, with subsequent occurrences marked as circular includes in verbose mode. +The total file count represents **unique files** discovered through include directives. If a file is included multiple +times (e.g., file A includes file C, and file B also includes file C), the file is counted only once in the total. +However, the tree view will show it in all locations where it appears, with subsequent occurrences marked as circular +includes in verbose mode. ### Compare Commands @@ -400,7 +440,8 @@ product-dir/ **Note on Missing Files:** -Files that don't exist in certain versions are reported separately and do not cause errors. This is expected behavior since features may be added or removed across versions. +Files that don't exist in certain versions are reported separately and do not cause errors. This is expected behavior +since features may be added or removed across versions. ## Development @@ -559,8 +600,6 @@ Example: Adding `analyze` parent command } ``` - - ### Testing #### Running Tests @@ -588,7 +627,8 @@ Tests use a table-driven approach with test fixtures in the `testdata/` director - **Expected output**: `testdata/expected-output/` - Expected extracted files - **Test pattern**: Compare actual extraction output against expected files -**Note**: The `testdata` directory name is special in Go - it's automatically ignored during builds, which is important since it contains non-Go files (`.cpp`, `.rst`, etc.). +**Note**: The `testdata` directory name is special in Go - it's automatically ignored during builds, which is important +since it contains non-Go files (`.cpp`, `.rst`, etc.). #### Adding New Tests @@ -634,7 +674,8 @@ Tests use a table-driven approach with test fixtures in the `testdata/` director #### Test Conventions -- **Relative paths**: Tests use `filepath.Join("..", "..", "..", "testdata")` to reference test data (three levels up from `commands/extract/code-examples/`) +- **Relative paths**: Tests use `filepath.Join("..", "..", "..", "testdata")` to reference test data (three levels up + from `commands/extract/code-examples/`) - **Temporary directories**: Use `os.MkdirTemp()` for test output, clean up with `defer os.RemoveAll()` - **Exact content matching**: Tests compare byte-for-byte content - **No trailing newlines**: Expected output files should not have trailing blank lines @@ -848,13 +889,13 @@ func processWithVerbose(filePath string, verbose bool) error { } ``` - - ## Supported RST Directives +### Code Example Extraction + The tool extracts code examples from the following reStructuredText directives: -### 1. `literalinclude` +#### 1. `literalinclude` Extracts code from external files with support for partial extraction and dedenting. @@ -899,7 +940,7 @@ result = calculate(42) print(result) ``` -### 2. `code-block` +#### 2. `code-block` Inline code blocks with automatic dedenting based on the first line's indentation. @@ -932,14 +973,15 @@ The content is automatically dedented based on the indentation of the first cont print("Hello") ``` -The code has 6 spaces of indentation (3 from `note`, 3 from `code-block`). The tool automatically removes these 6 spaces, resulting in: +The code has 6 spaces of indentation (3 from `note`, 3 from `code-block`). The tool automatically removes these 6 spaces, +resulting in: ```python def hello(): print("Hello") ``` -### 3. `io-code-block` +#### 3. `io-code-block` Input/output code blocks for interactive examples with nested sub-directives. @@ -991,7 +1033,9 @@ Generates two files: Example: `my-doc.io-code-block.1.input.js` and `my-doc.io-code-block.1.output.json` -### 4. `include` +### Include handling + +#### 4. `include` Follows include directives to process entire documentation trees (when `-f` flag is used). @@ -1004,18 +1048,18 @@ Follows include directives to process entire documentation trees (when `-f` flag The tool handles several MongoDB-specific include patterns: -#### Steps Files +##### Steps Files Converts directory-based paths to filename-based paths: - Input: `/includes/steps/run-mongodb-on-linux.rst` - Resolves to: `/includes/steps-run-mongodb-on-linux.yaml` -#### Extracts and Release Files +##### Extracts and Release Files Resolves ref-based includes by searching YAML files: - Input: `/includes/extracts/install-mongodb.rst` - Searches: `/includes/extracts-*.yaml` for `ref: install-mongodb` - Resolves to: The YAML file containing that ref -#### Template Variables +##### Template Variables Resolves template variables from YAML replacement sections: ```yaml replacement: @@ -1026,7 +1070,8 @@ replacement: **Source Directory Resolution:** -The tool walks up the directory tree to find a directory named "source" or containing a "source" subdirectory. This is used as the base for resolving relative include paths. +The tool walks up the directory tree to find a directory named "source" or containing a "source" subdirectory. This is +used as the base for resolving relative include paths. ## Internal Packages @@ -1048,15 +1093,47 @@ The tool normalizes language identifiers to standard file extensions: | Input | Normalized | Extension | |-------|-----------|-----------| -| `ts` | `typescript` | `.ts` | +| `bash` | `bash` | `.sh` | +| `c` | `c` | `.c` | | `c++` | `cpp` | `.cpp` | +| `c#` | `csharp` | `.cs` | +| `console` | `console` | `.sh` | +| `cpp` | `cpp` | `.cpp` | +| `cs` | `csharp` | `.cs` | +| `csharp` | `csharp` | `.cs` | +| `go` | `go` | `.go` | | `golang` | `go` | `.go` | +| `java` | `java` | `.java` | | `javascript` | `javascript` | `.js` | +| `js` | `javascript` | `.js` | +| `kotlin` | `kotlin` | `.kt` | +| `kt` | `kotlin` | `.kt` | +| `php` | `php` | `.php` | +| `powershell` | `powershell` | `.ps1` | +| `ps1` | `powershell` | `.ps1` | +| `ps5` | `ps5` | `.ps1` | +| `py` | `python` | `.py` | | `python` | `python` | `.py` | -| `shell` / `sh` | `sh` | `.sh` | -| `json` | `json` | `.json` | -| `yaml` | `yaml` | `.yaml` | -| (none) | `txt` | `.txt` | +| `rb` | `ruby` | `.rb` | +| `rs` | `rust` | `.rs` | +| `ruby` | `ruby` | `.rb` | +| `rust` | `rust` | `.rs` | +| `scala` | `scala` | `.scala` | +| `sh` | `shell` | `.sh` | +| `shell` | `shell` | `.sh` | +| `swift` | `swift` | `.swift` | +| `text` | `text` | `.txt` | +| `ts` | `typescript` | `.ts` | +| `txt` | `text` | `.txt` | +| `typescript` | `typescript` | `.ts` | +| (empty string) | `undefined` | `.txt` | +| `none` | `undefined` | `.txt` | +| (unknown) | (unchanged) | `.txt` | + +**Notes:** +- Language identifiers are case-insensitive +- Unknown languages are returned unchanged by `NormalizeLanguage()` but map to `.txt` extension +- The normalization handles common aliases (e.g., `ts` → `typescript`, `golang` → `go`, `c++` → `cpp`) ## Contributing @@ -1067,7 +1144,3 @@ When contributing to this project: 3. **Update documentation** - Keep this README up to date with new features 4. **Run tests before committing** - Ensure `go test ./...` passes 5. **Use meaningful commit messages** - Describe what changed and why - -## License - -[Add license information here] diff --git a/audit-cli/main.go b/audit-cli/main.go index cad9975..a6ce75d 100644 --- a/audit-cli/main.go +++ b/audit-cli/main.go @@ -5,13 +5,13 @@ // // The CLI is organized into parent commands with subcommands: // - extract: Extract content from RST files -// - code-examples: Extract code examples from RST directives +// - code-examples: Extract code examples from RST directives // - search: Search through extracted content -// - find-string: Search for substrings in extracted files +// - find-string: Search for substrings in extracted files // - analyze: Analyze RST file structures -// - includes: Analyze include directive relationships +// - includes: Analyze include directive relationships // - compare: Compare files across different versions -// - file-contents: Compare file contents across versions +// - file-contents: Compare file contents across versions package main import ( @@ -39,5 +39,8 @@ with special handling for MongoDB documentation conventions.`, rootCmd.AddCommand(analyze.NewAnalyzeCommand()) rootCmd.AddCommand(compare.NewCompareCommand()) - rootCmd.Execute() + err := rootCmd.Execute() + if err != nil { + return + } }