From cfce6d548447bc6e79f317d8fd0d5516771038ce Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Fri, 5 Dec 2025 14:49:47 -0500 Subject: [PATCH 01/14] Show path for include files to disambiguate files with the same filename --- .../commands/analyze/includes/analyzer.go | 4 +- audit-cli/commands/analyze/includes/output.go | 73 ++++++++++++++++++- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/audit-cli/commands/analyze/includes/analyzer.go b/audit-cli/commands/analyze/includes/analyzer.go index 2d02dad..0d9d155 100644 --- a/audit-cli/commands/analyze/includes/analyzer.go +++ b/audit-cli/commands/analyze/includes/analyzer.go @@ -93,7 +93,7 @@ func buildIncludeTree(filePath string, visited map[string]bool, verbose bool, de if visited[absPath] { if verbose { indent := getIndent(depth) - fmt.Printf("%s⚠ Circular include detected: %s\n", indent, filepath.Base(absPath)) + fmt.Printf("%s⚠ Circular include detected: %s\n", indent, formatDisplayPath(absPath)) } return node, nil } @@ -108,7 +108,7 @@ func buildIncludeTree(filePath string, visited map[string]bool, verbose bool, de if verbose && len(includeFiles) > 0 { indent := getIndent(depth) - fmt.Printf("%sšŸ“„ %s (%d includes)\n", indent, filepath.Base(absPath), len(includeFiles)) + fmt.Printf("%sšŸ“„ %s (%d includes)\n", indent, formatDisplayPath(absPath), len(includeFiles)) } // Recursively process each included file diff --git a/audit-cli/commands/analyze/includes/output.go b/audit-cli/commands/analyze/includes/output.go index bd33aa7..3fe3948 100644 --- a/audit-cli/commands/analyze/includes/output.go +++ b/audit-cli/commands/analyze/includes/output.go @@ -3,6 +3,9 @@ package includes import ( "fmt" "path/filepath" + "strings" + + "github.com/mongodb/code-example-tooling/audit-cli/internal/pathresolver" ) // PrintTree prints the include tree structure. @@ -45,13 +48,13 @@ func printTreeNode(node *IncludeNode, prefix string, isLast bool, isRoot bool) { // Print the current node if isRoot { - fmt.Printf("%s\n", filepath.Base(node.FilePath)) + fmt.Printf("%s\n", formatDisplayPath(node.FilePath)) } else { connector := "ā”œā”€ā”€ " if isLast { connector = "└── " } - fmt.Printf("%s%s%s\n", prefix, connector, filepath.Base(node.FilePath)) + fmt.Printf("%s%s%s\n", prefix, connector, formatDisplayPath(node.FilePath)) } // Print children @@ -114,3 +117,69 @@ func PrintSummary(analysis *IncludeAnalysis) { fmt.Println() } +// formatDisplayPath formats a file path for display in the tree or verbose output. +// +// This function returns: +// - If the file is in an "includes" directory: the path starting from "includes" +// (e.g., "includes/load-sample-data.rst" or "includes/php/connection.rst") +// - If the file is NOT in an "includes" directory: the path from the source directory +// (e.g., "get-started/node/language-connection-steps.rst") +// +// This helps writers understand the directory structure and disambiguate files +// with the same name in different directories. +// +// Parameters: +// - filePath: Absolute path to the file +// +// Returns: +// - string: Formatted path for display +func formatDisplayPath(filePath string) string { + // Try to find the source directory + sourceDir, err := pathresolver.FindSourceDirectory(filePath) + if err != nil { + // If we can't find source directory, just return the base name + return filepath.Base(filePath) + } + + // Check if the file is in an includes directory + // Walk up from the file to find if there's an "includes" directory + dir := filepath.Dir(filePath) + var includesDir string + + for { + // Check if the current directory is named "includes" + if filepath.Base(dir) == "includes" { + includesDir = dir + break + } + + // Move up one directory + parent := filepath.Dir(dir) + + // If we've reached the source directory or root, stop + if parent == dir || dir == sourceDir { + break + } + + dir = parent + } + + // If we found an includes directory, get the relative path from it + if includesDir != "" { + relPath, err := filepath.Rel(includesDir, filePath) + if err == nil && !strings.HasPrefix(relPath, "..") { + // Prepend "includes/" to show it's in the includes directory + return filepath.Join("includes", relPath) + } + } + + // Otherwise, get the relative path from the source directory + relPath, err := filepath.Rel(sourceDir, filePath) + if err != nil { + // If we can't get relative path, just return the base name + return filepath.Base(filePath) + } + + return relPath +} + From 0004f099d6e4291bb47153dcf02ad5b3b49c86f6 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Fri, 5 Dec 2025 15:24:45 -0500 Subject: [PATCH 02/14] Clarify outputs and improve counts for include tracking --- .../commands/analyze/includes/analyzer.go | 136 +++++++++++++++--- audit-cli/commands/analyze/includes/output.go | 9 +- audit-cli/commands/analyze/includes/types.go | 11 +- 3 files changed, 126 insertions(+), 30 deletions(-) diff --git a/audit-cli/commands/analyze/includes/analyzer.go b/audit-cli/commands/analyze/includes/analyzer.go index 0d9d155..c482cb6 100644 --- a/audit-cli/commands/analyze/includes/analyzer.go +++ b/audit-cli/commands/analyze/includes/analyzer.go @@ -36,28 +36,31 @@ func AnalyzeIncludes(filePath string, verbose bool) (*IncludeAnalysis, error) { } // Build the tree structure - visited := make(map[string]bool) - tree, err := buildIncludeTree(absPath, visited, verbose, 0) + // Use a recursion path to detect true circular includes + recursionPath := make(map[string]bool) + // Track which files we've seen for verbose output (to show duplicates with different bullet) + seenFiles := make(map[string]bool) + tree, err := buildIncludeTree(absPath, recursionPath, seenFiles, verbose, 0) if err != nil { return nil, err } - // Collect all unique files from the visited map - // The visited map contains all unique files that were processed - allFiles := make([]string, 0, len(visited)) - for file := range visited { - allFiles = append(allFiles, file) - } + // Collect all unique files from the tree + allFiles := collectUniqueFiles(tree) // Calculate max depth maxDepth := calculateMaxDepth(tree, 0) + // Count total include directives + totalDirectives := countIncludeDirectives(tree) + analysis := &IncludeAnalysis{ - RootFile: absPath, - Tree: tree, - AllFiles: allFiles, - TotalFiles: len(allFiles), - MaxDepth: maxDepth, + RootFile: absPath, + Tree: tree, + AllFiles: allFiles, + TotalFiles: len(allFiles), + TotalIncludeDirectives: totalDirectives, + MaxDepth: maxDepth, } return analysis, nil @@ -66,18 +69,19 @@ func AnalyzeIncludes(filePath string, verbose bool) (*IncludeAnalysis, error) { // buildIncludeTree recursively builds a tree of include relationships. // // This function creates an IncludeNode for the given file and recursively -// processes all files it includes, preventing circular includes. +// processes all files it includes, preventing true circular includes. // // Parameters: // - filePath: Path to the file to process -// - visited: Map tracking already-processed files (prevents circular includes) +// - recursionPath: Map tracking files in the current recursion path (prevents circular includes) +// - seenFiles: Map tracking files we've already printed (for duplicate indicators in verbose mode) // - verbose: If true, print detailed processing information // - depth: Current depth in the tree (for verbose output) // // Returns: // - *IncludeNode: Tree node representing this file and its includes // - error: Any error encountered during processing -func buildIncludeTree(filePath string, visited map[string]bool, verbose bool, depth int) (*IncludeNode, error) { +func buildIncludeTree(filePath string, recursionPath map[string]bool, seenFiles map[string]bool, verbose bool, depth int) (*IncludeNode, error) { absPath, err := filepath.Abs(filePath) if err != nil { return nil, err @@ -89,15 +93,19 @@ func buildIncludeTree(filePath string, visited map[string]bool, verbose bool, de Children: []*IncludeNode{}, } - // Check if we've already visited this file (circular include) - if visited[absPath] { + // Check if this file is already in the current recursion path (true circular include) + if recursionPath[absPath] { if verbose { indent := getIndent(depth) fmt.Printf("%s⚠ Circular include detected: %s\n", indent, formatDisplayPath(absPath)) } return node, nil } - visited[absPath] = true + + // Add this file to the recursion path + recursionPath[absPath] = true + // Ensure we remove it when we're done processing this branch + defer delete(recursionPath, absPath) // Find include directives in this file includeFiles, err := rst.FindIncludeDirectives(absPath) @@ -106,14 +114,31 @@ func buildIncludeTree(filePath string, visited map[string]bool, verbose bool, de includeFiles = []string{} } - if verbose && len(includeFiles) > 0 { + // Print verbose output for this file + if verbose { indent := getIndent(depth) - fmt.Printf("%sšŸ“„ %s (%d includes)\n", indent, formatDisplayPath(absPath), len(includeFiles)) + // Use hollow bullet (ā—¦) for files we've seen before, filled bullet (•) for first occurrence + bullet := "•" + if seenFiles[absPath] { + bullet = "ā—¦" + } else { + seenFiles[absPath] = true + } + + if len(includeFiles) > 0 { + directiveWord := "include directives" + if len(includeFiles) == 1 { + directiveWord = "include directive" + } + fmt.Printf("%s%s %s (%d %s)\n", indent, bullet, formatDisplayPath(absPath), len(includeFiles), directiveWord) + } else { + fmt.Printf("%s%s %s\n", indent, bullet, formatDisplayPath(absPath)) + } } // Recursively process each included file for _, includeFile := range includeFiles { - childNode, err := buildIncludeTree(includeFile, visited, verbose, depth+1) + childNode, err := buildIncludeTree(includeFile, recursionPath, seenFiles, verbose, depth+1) if err != nil { fmt.Fprintf(os.Stderr, "Warning: failed to process file %s: %v\n", includeFile, err) continue @@ -167,3 +192,70 @@ func getIndent(depth int) string { return indent } +// collectUniqueFiles traverses the tree and collects all unique file paths. +// +// This function recursively walks the tree and builds a list of all unique +// files that appear in the tree, even if they appear multiple times. +// +// Parameters: +// - node: The root node of the tree to traverse +// +// Returns: +// - []string: List of unique file paths +func collectUniqueFiles(node *IncludeNode) []string { + if node == nil { + return []string{} + } + + visited := make(map[string]bool) + var files []string + + var traverse func(*IncludeNode) + traverse = func(n *IncludeNode) { + if n == nil { + return + } + + // Add this file if we haven't seen it before + if !visited[n.FilePath] { + visited[n.FilePath] = true + files = append(files, n.FilePath) + } + + // Traverse children + for _, child := range n.Children { + traverse(child) + } + } + + traverse(node) + return files +} + +// countIncludeDirectives counts the total number of include directive instances in the tree. +// +// This function counts every include directive in every file, including duplicates. +// For example, if file A includes file B, and file C also includes file B, +// that counts as 2 include directives (even though B is only one unique file). +// +// Parameters: +// - node: The root node of the tree to traverse +// +// Returns: +// - int: Total number of include directive instances +func countIncludeDirectives(node *IncludeNode) int { + if node == nil { + return 0 + } + + // Count the children of this node (these are the include directives in this file) + count := len(node.Children) + + // Recursively count include directives in all children + for _, child := range node.Children { + count += countIncludeDirectives(child) + } + + return count +} + diff --git a/audit-cli/commands/analyze/includes/output.go b/audit-cli/commands/analyze/includes/output.go index 3fe3948..b6d4e81 100644 --- a/audit-cli/commands/analyze/includes/output.go +++ b/audit-cli/commands/analyze/includes/output.go @@ -20,7 +20,8 @@ func PrintTree(analysis *IncludeAnalysis) { fmt.Println("INCLUDE TREE") fmt.Println("============================================================") fmt.Printf("Root File: %s\n", analysis.RootFile) - fmt.Printf("Total Files: %d\n", analysis.TotalFiles) + fmt.Printf("Unique Files: %d\n", analysis.TotalFiles) + fmt.Printf("Include Directives: %d\n", analysis.TotalIncludeDirectives) fmt.Printf("Max Depth: %d\n", analysis.MaxDepth) fmt.Println("============================================================") fmt.Println() @@ -85,7 +86,8 @@ func PrintList(analysis *IncludeAnalysis) { fmt.Println("INCLUDE FILE LIST") fmt.Println("============================================================") fmt.Printf("Root File: %s\n", analysis.RootFile) - fmt.Printf("Total Files: %d\n", analysis.TotalFiles) + fmt.Printf("Unique Files: %d\n", analysis.TotalFiles) + fmt.Printf("Include Directives: %d\n", analysis.TotalIncludeDirectives) fmt.Println("============================================================") fmt.Println() @@ -108,7 +110,8 @@ func PrintSummary(analysis *IncludeAnalysis) { fmt.Println("INCLUDE ANALYSIS SUMMARY") fmt.Println("============================================================") fmt.Printf("Root File: %s\n", analysis.RootFile) - fmt.Printf("Total Files: %d\n", analysis.TotalFiles) + fmt.Printf("Unique Files: %d\n", analysis.TotalFiles) + fmt.Printf("Include Directives: %d\n", analysis.TotalIncludeDirectives) fmt.Printf("Max Depth: %d\n", analysis.MaxDepth) fmt.Println("============================================================") fmt.Println() diff --git a/audit-cli/commands/analyze/includes/types.go b/audit-cli/commands/analyze/includes/types.go index 5f7bcc9..13cde82 100644 --- a/audit-cli/commands/analyze/includes/types.go +++ b/audit-cli/commands/analyze/includes/types.go @@ -14,10 +14,11 @@ type IncludeNode struct { // This type holds both the tree structure and the flat list of all files // discovered through include directives. type IncludeAnalysis struct { - RootFile string // The original file that was analyzed - Tree *IncludeNode // Tree structure of include relationships - AllFiles []string // Flat list of all files (in order discovered) - TotalFiles int // Total number of unique files - MaxDepth int // Maximum depth of include nesting + RootFile string // The original file that was analyzed + Tree *IncludeNode // Tree structure of include relationships + AllFiles []string // Flat list of all files (in order discovered) + TotalFiles int // Total number of unique files + TotalIncludeDirectives int // Total number of include directive instances across all files + MaxDepth int // Maximum depth of include nesting } From 0de775f20d60a2c7975fa7ed62ee7dc75db10d74 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Fri, 5 Dec 2025 15:32:32 -0500 Subject: [PATCH 03/14] Update README wording to reflect broader use cases for tool --- audit-cli/README.md | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/audit-cli/README.md b/audit-cli/README.md index 57a0a24..326acb9 100644 --- a/audit-cli/README.md +++ b/audit-cli/README.md @@ -1,6 +1,6 @@ # audit-cli -A Go CLI tool for extracting and analyzing code examples from MongoDB documentation written in reStructuredText (RST). +A Go CLI tool for performing audit-related tasks in the MongoDB documentation monorepo. ## Table of Contents @@ -21,14 +21,17 @@ A Go CLI tool for extracting and analyzing code examples from MongoDB documentat ## Overview -This CLI tool helps maintain code quality across MongoDB's documentation by: +This CLI tool helps with maintenance and audit-related tasks across MongoDB's documentation by: -1. **Extracting code examples** from RST files into individual, testable files -2. **Searching extracted code** for specific patterns or substrings -3. **Analyzing include relationships** to understand file dependencies +1. **Extracting code examples** or **procedures** from RST files into individual, testable files +2. **Searching files** for specific patterns or substrings +3. **Analyzing reference relationships** to understand file dependencies 4. **Comparing file contents** across documentation versions to identify differences 5. **Following include directives** to process entire documentation trees -6. **Handling MongoDB-specific conventions** like steps files, extracts, and template variables +6. **Counting documentation pages** or **tested code examples** to track coverage and quality metrics + +This CLI provides built-in handling for MongoDB-specific conventions like steps files, extracts, version comprehension, +and template variables. ## Installation @@ -63,7 +66,6 @@ audit-cli │ ā”œā”€ā”€ includes │ ā”œā”€ā”€ usage │ └── procedures -│ └── usage ā”œā”€ā”€ compare # Compare files across versions │ └── file-contents └── count # Count code examples and documentation pages @@ -256,7 +258,8 @@ After extraction, the report shows: #### `search find-string` -Search through files for a specific substring. Can search through extracted code example files or RST source files. +Search through files for a specific substring. Can search through extracted code example or procedure files or RST +source files. **Default Behavior:** - **Case-insensitive** search (matches "curl", "CURL", "Curl", etc.) @@ -337,13 +340,14 @@ With `-v` flag, also shows: Analyze `include` directive relationships in RST files to understand file dependencies. -This command recursively follows `.. include::` directives to show all files that are referenced from a starting file. This helps you understand which content is transcluded into a page. +This command recursively follows `.. include::` directives to show all files that are referenced from a starting file. +This helps you understand which content is transcluded into a page. **Use Cases:** This command helps writers: - Understand the impact of changes to widely-included files -- Identify circular include dependencies (files included multiple times) +- Identify files included multiple times - Document file relationships for maintenance - Plan refactoring of complex include structures - See what content is actually pulled into a page @@ -406,9 +410,12 @@ the `analyze usage` command with the `--include-toctree` flag. #### `analyze usage` -Find all files that use a target file through RST directives. This performs reverse dependency analysis, showing which files reference the target file through `include`, `literalinclude`, `io-code-block`, or `toctree` directives. +Find all files that use a target file through RST directives. This performs reverse dependency analysis, showing which +files reference the target file through `include`, `literalinclude`, `io-code-block`, or `toctree` directives. -The command searches all RST files (`.rst` and `.txt` extensions) and YAML files (`.yaml` and `.yml` extensions) in the source directory tree. YAML files are included because extract and release files contain RST directives within their content blocks. +The command searches all RST files (`.rst` and `.txt` extensions) and YAML files (`.yaml` and `.yml` extensions) in the +source directory tree. YAML files are included because extract and release files contain RST directives within their +content blocks. **Use Cases:** @@ -501,7 +508,8 @@ With `--include-toctree`, also tracks: getting-started ``` -**Note:** Only file-based references are tracked. Inline content (e.g., `.. input::` with `:language:` but no file path) is not tracked since it doesn't reference external files. +**Note:** Only file-based references are tracked. Inline content (e.g., `.. input::` with `:language:` but no file path) +is not tracked since it doesn't reference external files. **Output Formats:** From d1adae077291b8f0a2808073a8d2a7838c8e4114 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Fri, 5 Dec 2025 15:34:25 -0500 Subject: [PATCH 04/14] Update output examples in README to match updated analyze includes output --- audit-cli/README.md | 57 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/audit-cli/README.md b/audit-cli/README.md index 326acb9..c0c2225 100644 --- a/audit-cli/README.md +++ b/audit-cli/README.md @@ -380,8 +380,22 @@ This command helps writers: **Output Formats:** **Summary** (default - no flags): +``` +============================================================ +INCLUDE ANALYSIS SUMMARY +============================================================ +Root File: /path/to/file.rst +Unique Files: 18 +Include Directives: 56 +Max Depth: 2 +============================================================ + +Use --tree to see the hierarchical structure +Use --list to see a flat list of all files +``` - Root file path -- Total number of files +- Number of unique files discovered +- Total number of include directive instances (counting duplicates) - Maximum depth of include nesting - Hints to use --tree or --list for more details @@ -389,18 +403,49 @@ This command helps writers: - Hierarchical tree structure showing include relationships - Uses box-drawing characters for visual clarity - Shows which files include which other files +- Displays directory paths to help disambiguate files with the same name + - Files in `includes` directories: `includes/filename.rst` + - Files outside `includes`: `path/from/source/filename.rst` **List** (--list flag): -- Flat numbered list of all files +- Flat numbered list of all unique files - Files listed in depth-first traversal order - Shows absolute paths to all files +**Verbose** (-v flag): +- Shows complete dependency tree with all nodes (including duplicates) +- Each file displays the number of include directives it contains +- Uses visual indicators to show duplicate includes: + - `•` (filled bullet) - First occurrence of a file + - `ā—¦` (hollow bullet) - Subsequent occurrences (duplicates) +- Example output: +``` +• get-started.txt (24 include directives) + • get-started/node/language-connection-steps.rst (3 include directives) + • includes/load-sample-data.rst + • includes/connection-string-note.rst + • includes/application-output.rst + • includes/next-steps.rst + • get-started/python/language-connection-steps.rst (3 include directives) + ā—¦ includes/load-sample-data.rst + ā—¦ includes/connection-string-note.rst + ā—¦ includes/application-output.rst + ā—¦ includes/next-steps.rst +``` + **Note on File Counting:** -The total file count represents **unique files** discovered through include directives. If a file is included multiple -times (e.g., file A includes file C, and file B also includes file C), the file is counted only once in the total. -However, the tree view will show it in all locations where it appears, with subsequent occurrences marked as circular -includes in verbose mode. +The command reports two distinct metrics: + +1. **Unique Files**: Number of distinct files discovered through include directives. If a file is included multiple + times (e.g., file A includes file C, and file B also includes file C), the file is counted only once. + +2. **Include Directives**: Total number of include directive instances across all files. This counts every occurrence, + including duplicates. For example, if `load-sample-data.rst` is included 12 times across different files, it + contributes 12 to this count. + +In verbose mode, the tree view shows files in all locations where they appear. Duplicate occurrences are marked with +a hollow bullet (`ā—¦`) to help you identify files that are included multiple times. **Note on Toctree:** From 74ffe553c09ca29e3978dd673895039b63815e7e Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Fri, 5 Dec 2025 15:39:33 -0500 Subject: [PATCH 05/14] Update help text to reflect broader scope of the tool --- audit-cli/commands/count/count.go | 12 ++++--- .../search/find-string/find_string.go | 15 +++++---- audit-cli/commands/search/search.go | 14 ++++---- audit-cli/main.go | 33 ++++++++++--------- 4 files changed, 42 insertions(+), 32 deletions(-) diff --git a/audit-cli/commands/count/count.go b/audit-cli/commands/count/count.go index 4d291c2..94cf474 100644 --- a/audit-cli/commands/count/count.go +++ b/audit-cli/commands/count/count.go @@ -1,9 +1,11 @@ -// Package count provides the parent command for counting code examples and documentation pages. +// Package count provides the parent command for counting documentation content. // // This package serves as the parent command for various counting operations. // Currently supports: // - tested-examples: Count tested code examples in the MongoDB documentation monorepo // - pages: Count documentation pages (.txt files) in the MongoDB documentation monorepo +// +// These commands help writers track coverage metrics and report to stakeholders. package count import ( @@ -14,13 +16,15 @@ import ( // NewCountCommand creates the count parent command. // -// This command serves as a parent for various counting operations on code examples and documentation pages. +// This command serves as a parent for various counting operations on documentation content. // It doesn't perform any operations itself but provides a namespace for subcommands. func NewCountCommand() *cobra.Command { cmd := &cobra.Command{ Use: "count", - Short: "Count code examples and documentation pages", - Long: `Count various types of content in the MongoDB documentation. + Short: "Count documentation content for metrics and reporting", + Long: `Count various types of content in the MongoDB documentation monorepo. + +Helps writers track coverage metrics and report statistics to stakeholders. Currently supports: - tested-examples: Count tested code examples in the documentation monorepo diff --git a/audit-cli/commands/search/find-string/find_string.go b/audit-cli/commands/search/find-string/find_string.go index be90ec5..c60168e 100644 --- a/audit-cli/commands/search/find-string/find_string.go +++ b/audit-cli/commands/search/find-string/find_string.go @@ -1,7 +1,7 @@ -// Package find_string provides functionality for searching code example files for substrings. +// Package find_string provides functionality for searching documentation files for substrings. // // This package implements the "search find-string" subcommand, which searches through -// extracted code example files to find occurrences of a specific substring. +// RST source files or extracted content to find occurrences of a specific substring. // // By default, the search is case-insensitive and matches exact words only (not partial matches // within larger words). These behaviors can be changed with the --case-sensitive and @@ -29,7 +29,7 @@ import ( // NewFindStringCommand creates the find-string subcommand. // -// This command searches through extracted code example files for a specific substring. +// This command searches through documentation files or extracted content for a specific substring. // Supports flags for recursive search, following includes, and verbose output. // // Flags: @@ -49,9 +49,12 @@ func NewFindStringCommand() *cobra.Command { cmd := &cobra.Command{ Use: "find-string [filepath] [substring]", - Short: "Search for a substring in extracted code example files", - Long: `Search through extracted code example files to find occurrences of a specific substring. -Reports the number of code examples containing the substring. + Short: "Search for a substring in documentation files", + Long: `Search through RST source files or extracted content to find occurrences of a specific substring. +Reports the number of files containing the substring. + +Helps writers identify files that need updates when a string needs to be changed, +and scope maintenance work related to specific changes. By default, the search is case-insensitive and matches exact words only. Use --case-sensitive to make the search case-sensitive, or --partial-match to allow matching the substring as part diff --git a/audit-cli/commands/search/search.go b/audit-cli/commands/search/search.go index ed9bdee..e6c6cec 100644 --- a/audit-cli/commands/search/search.go +++ b/audit-cli/commands/search/search.go @@ -1,8 +1,8 @@ -// Package search provides the parent command for searching through extracted content. +// Package search provides the parent command for searching through documentation files. // // This package serves as the parent command for various search operations. // Currently supports: -// - find-string: Search for substrings in extracted code example files +// - find-string: Search for substrings in documentation files or extracted content // // Future subcommands could include pattern matching, regex search, or semantic search. package search @@ -14,15 +14,17 @@ import ( // NewSearchCommand creates the search parent command. // -// This command serves as a parent for various search operations on extracted content. +// This command serves as a parent for various search operations on documentation files. // It doesn't perform any operations itself but provides a namespace for subcommands. func NewSearchCommand() *cobra.Command { cmd := &cobra.Command{ Use: "search", - Short: "Search through extracted content", - Long: `Search through extracted content such as code examples. + Short: "Search through documentation files", + Long: `Search through documentation files or extracted content. + +Currently supports searching for substrings in RST source files or extracted content. +Helps writers identify files that need updates and scope maintenance work. -Currently supports searching for substrings in extracted code example files. Future subcommands may support pattern matching, regex search, or semantic search.`, } diff --git a/audit-cli/main.go b/audit-cli/main.go index 2345b84..c9a8d2c 100644 --- a/audit-cli/main.go +++ b/audit-cli/main.go @@ -1,19 +1,15 @@ // Package main provides the entry point for the audit-cli tool. // -// audit-cli is a command-line tool for extracting and analyzing code examples -// from MongoDB documentation written in reStructuredText (RST). +// audit-cli is a command-line tool for performing audit-related tasks in the +// MongoDB documentation monorepo. It helps technical writers with maintenance +// tasks, scoping work, and reporting information to stakeholders. // // The CLI is organized into parent commands with subcommands: -// - extract: Extract content from RST files -// - code-examples: Extract code examples from RST directives -// - search: Search through extracted content -// - find-string: Search for substrings in extracted files -// - analyze: Analyze RST file structures -// - includes: Analyze include directive relationships +// - extract: Extract content from RST files (code examples, procedures) +// - search: Search through documentation files +// - analyze: Analyze RST file structures and relationships // - compare: Compare files across different versions -// - file-contents: Compare file contents across versions -// - count: Count code examples -// - tested-examples: Count tested code examples in the monorepo +// - count: Count documentation content (code examples, pages) package main import ( @@ -28,12 +24,17 @@ import ( func main() { var rootCmd = &cobra.Command{ Use: "audit-cli", - Short: "A CLI tool for extracting and analyzing code examples from MongoDB documentation", - Long: `audit-cli extracts code examples from reStructuredText files and provides -tools for searching and analyzing the extracted content. + Short: "A CLI tool for auditing and analyzing MongoDB documentation", + Long: `audit-cli helps MongoDB technical writers perform audit-related tasks in the +documentation monorepo, including: -Supports extraction from literalinclude, code-block, and io-code-block directives, -with special handling for MongoDB documentation conventions.`, + - Extracting content (code examples, procedures) for testing and migration + - Searching documentation files for specific strings or patterns + - Analyzing file dependencies and relationships + - Comparing files across documentation versions + - Counting documentation content for reporting and metrics + +Designed for maintenance tasks, scoping work, and reporting to stakeholders.`, } // Add parent commands From 81c5afdce00d710b1df72c7de160fbf672dd9fa0 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Fri, 5 Dec 2025 16:15:18 -0500 Subject: [PATCH 06/14] Add step and extract file handling to usage analysis, add recursive handling --- audit-cli/commands/analyze/usage/analyzer.go | 299 +++++++++++++++++- audit-cli/commands/analyze/usage/output.go | 109 ++++--- audit-cli/commands/analyze/usage/usage.go | 27 +- .../commands/analyze/usage/usage_test.go | 126 ++++++++ 4 files changed, 513 insertions(+), 48 deletions(-) diff --git a/audit-cli/commands/analyze/usage/analyzer.go b/audit-cli/commands/analyze/usage/analyzer.go index e1b152f..39080ed 100644 --- a/audit-cli/commands/analyze/usage/analyzer.go +++ b/audit-cli/commands/analyze/usage/analyzer.go @@ -138,6 +138,155 @@ func AnalyzeUsage(targetFile string, includeToctree bool, verbose bool, excludeP return analysis, nil } +// AnalyzeUsageRecursive finds all .txt files that ultimately use the target file. +// +// This function recursively follows the usage tree upward until it reaches only .txt files, +// which represent documentation pages. For each non-.txt file that uses the target, it +// recursively analyzes what uses that file, continuing until all paths lead to .txt files. +// +// Parameters: +// - targetFile: Absolute path to the file to analyze +// - includeToctree: If true, include toctree entries in the search +// - verbose: If true, show progress information +// - excludePattern: Glob pattern for paths to exclude (empty string means no exclusion) +// +// Returns: +// - *UsageAnalysis: The analysis results containing only .txt files +// - error: Any error encountered during analysis +func AnalyzeUsageRecursive(targetFile string, includeToctree bool, verbose bool, excludePattern string) (*UsageAnalysis, error) { + // Track all .txt files we've found (as a set to avoid duplicates) + txtFilesSet := make(map[string]bool) + processed := make(map[string]bool) + + // Get absolute path + absTargetFile, err := filepath.Abs(targetFile) + if err != nil { + return nil, fmt.Errorf("failed to get absolute path: %w", err) + } + + // Find the source directory + sourceDir, err := pathresolver.FindSourceDirectory(absTargetFile) + if err != nil { + return nil, fmt.Errorf("failed to find source directory: %w\n\nThe source directory is detected by looking for a 'source' directory in the file's path.\nMake sure the target file is within a documentation repository with a 'source' directory.", err) + } + + if verbose { + fmt.Fprintf(os.Stderr, "Starting recursive analysis for: %s\n", absTargetFile) + fmt.Fprintf(os.Stderr, "Following usage tree until reaching .txt files...\n\n") + } + + // Recursively analyze usage + if err := analyzeUsageRecursiveHelper(absTargetFile, sourceDir, includeToctree, verbose, excludePattern, txtFilesSet, processed, 0); err != nil { + return nil, err + } + + if verbose { + fmt.Fprintf(os.Stderr, "\nRecursive analysis complete. Found %d .txt files.\n", len(txtFilesSet)) + } + + // Convert set to FileUsage slice + var allUsages []FileUsage + for txtFile := range txtFilesSet { + // Create a simple FileUsage entry for each .txt file + // We use "include" as a generic directive type since we're showing the final pages + allUsages = append(allUsages, FileUsage{ + FilePath: txtFile, + DirectiveType: "include", + UsagePath: txtFile, + LineNumber: 0, + }) + } + + // Sort by file path for consistent output + sort.Slice(allUsages, func(i, j int) bool { + return allUsages[i].FilePath < allUsages[j].FilePath + }) + + // Create analysis result + analysis := &UsageAnalysis{ + TargetFile: absTargetFile, + SourceDir: sourceDir, + UsingFiles: allUsages, + TotalUsages: len(allUsages), + TotalFiles: len(txtFilesSet), + } + + return analysis, nil +} + +// analyzeUsageRecursiveHelper is a helper function that recursively analyzes usage. +// +// This function analyzes the target file and for each non-.txt file that uses it, +// recursively analyzes what uses that file. It continues until all paths lead to .txt files. +// +// Parameters: +// - targetFile: Absolute path to the file to analyze +// - sourceDir: Source directory for the documentation +// - includeToctree: If true, include toctree entries in the search +// - verbose: If true, show progress information +// - excludePattern: Glob pattern for paths to exclude +// - txtFiles: Set to collect all .txt files found +// - processed: Set of files we've already processed to avoid cycles +// - depth: Current recursion depth (for indentation in verbose mode) +// +// Returns: +// - error: Any error encountered during analysis +func analyzeUsageRecursiveHelper(targetFile, sourceDir string, includeToctree, verbose bool, excludePattern string, txtFiles map[string]bool, processed map[string]bool, depth int) error { + // Skip if we've already processed this file + if processed[targetFile] { + return nil + } + processed[targetFile] = true + + if verbose { + relPath, _ := filepath.Rel(sourceDir, targetFile) + indent := strings.Repeat(" ", depth) + fmt.Fprintf(os.Stderr, "%sAnalyzing: %s\n", indent, relPath) + } + + // Analyze usage for this file + analysis, err := AnalyzeUsage(targetFile, includeToctree, false, excludePattern) + if err != nil { + return err + } + + // If no files use this file, we're done + if len(analysis.UsingFiles) == 0 { + if verbose { + indent := strings.Repeat(" ", depth) + fmt.Fprintf(os.Stderr, "%s (no usages found)\n", indent) + } + return nil + } + + // Process each file that uses the target + for _, usage := range analysis.UsingFiles { + ext := filepath.Ext(usage.FilePath) + + if ext == ".txt" { + // This is a documentation page - add it to our results + txtFiles[usage.FilePath] = true + if verbose { + relPath, _ := filepath.Rel(sourceDir, usage.FilePath) + indent := strings.Repeat(" ", depth) + fmt.Fprintf(os.Stderr, "%s -> [.txt] %s\n", indent, relPath) + } + } else { + // This is an include file (.rst, .yaml, etc.) - recursively analyze it + if verbose { + relPath, _ := filepath.Rel(sourceDir, usage.FilePath) + indent := strings.Repeat(" ", depth) + fmt.Fprintf(os.Stderr, "%s -> [%s] %s (following...)\n", indent, ext, relPath) + } + if err := analyzeUsageRecursiveHelper(usage.FilePath, sourceDir, includeToctree, verbose, excludePattern, txtFiles, processed, depth+1); err != nil { + return err + } + } + } + + return nil +} + // findUsagesInFile searches a single file for usages of the target file. // // This function scans through the file line by line looking for include, @@ -287,6 +436,10 @@ func findUsagesInFile(filePath, targetFile, sourceDir string, includeToctree boo // referencesTarget checks if a reference path points to the target file. // // This function resolves the reference path and compares it to the target file. +// It also handles special cases like: +// - Step files: "steps-something.yaml" referenced as "steps/something.rst" +// - Extract files: "extracts-name.yaml" with refs referenced as "extracts/ref-id.rst" +// - Release files: "release-name.yaml" with refs referenced as "release/ref-id.rst" // // Parameters: // - refPath: The path from the directive (e.g., "/includes/file.rst") @@ -316,8 +469,150 @@ func referencesTarget(refPath, targetFile, sourceDir, currentFile string) bool { return false } - // Compare with target file - return absResolvedPath == targetFile + // Direct match + if absResolvedPath == targetFile { + return true + } + + targetBase := filepath.Base(targetFile) + + // Special case: Check if the target is a step file (steps-*.yaml) + // These are referenced as steps/*.rst in includes + if strings.HasPrefix(targetBase, "steps-") && strings.HasSuffix(targetBase, ".yaml") { + // Transform the target path from steps-something.yaml to steps/something.rst + transformedPath := transformStepFilePath(targetFile) + if absResolvedPath == transformedPath { + return true + } + } + + // Special case: Check if the target is an extract or release file (extracts-*.yaml or release-*.yaml) + // These are referenced as extracts/ref-id.rst or release/ref-id.rst + if (strings.HasPrefix(targetBase, "extracts-") || strings.HasPrefix(targetBase, "release-")) && strings.HasSuffix(targetBase, ".yaml") { + // Get all refs from the YAML file + refs, err := getExtractRefs(targetFile) + if err != nil { + // If we can't read the refs, skip this check + return false + } + + // Check if the resolved path matches any of the transformed ref paths + for _, refID := range refs { + transformedPath := transformExtractFilePath(targetFile, refID) + if absResolvedPath == transformedPath { + return true + } + } + } + + return false +} + +// transformStepFilePath transforms a step file path from the YAML format to the RST format. +// +// MongoDB's build system transforms step files: +// - From: /path/to/includes/steps-shard-collection.yaml +// - To: /path/to/includes/steps/shard-collection.rst +// +// Parameters: +// - stepFilePath: Absolute path to the step YAML file +// +// Returns: +// - string: Transformed path as it would appear in include directives +func transformStepFilePath(stepFilePath string) string { + dir := filepath.Dir(stepFilePath) + base := filepath.Base(stepFilePath) + + // Remove "steps-" prefix and ".yaml" extension + if !strings.HasPrefix(base, "steps-") || !strings.HasSuffix(base, ".yaml") { + return stepFilePath + } + + // Extract the name part (e.g., "shard-collection" from "steps-shard-collection.yaml") + name := strings.TrimPrefix(base, "steps-") + name = strings.TrimSuffix(name, ".yaml") + + // Build the transformed path: /path/to/includes/steps/shard-collection.rst + transformedPath := filepath.Join(dir, "steps", name+".rst") + + return transformedPath +} + +// getExtractRefs extracts all ref IDs from an extract or release YAML file. +// +// MongoDB documentation uses extract and release files that contain multiple +// content blocks, each with a unique ref ID. These are referenced in includes as: +// /includes/extracts/ref-id.rst or /includes/release/ref-id.rst +// +// Parameters: +// - yamlFilePath: Absolute path to the extract or release YAML file +// +// Returns: +// - []string: List of ref IDs found in the file +// - error: Any error encountered during parsing +func getExtractRefs(yamlFilePath string) ([]string, error) { + file, err := os.Open(yamlFilePath) + if err != nil { + return nil, err + } + defer file.Close() + + var refs []string + scanner := bufio.NewScanner(file) + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + // Look for lines that start with "ref:" + if strings.HasPrefix(line, "ref:") { + // Extract the ref value + refValue := strings.TrimPrefix(line, "ref:") + refValue = strings.TrimSpace(refValue) + // Remove quotes if present + refValue = strings.Trim(refValue, "\"'") + if refValue != "" { + refs = append(refs, refValue) + } + } + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return refs, nil +} + +// transformExtractFilePath transforms an extract/release file path and ref to the RST format. +// +// MongoDB's build system references extract and release files by ref: +// - From: /path/to/includes/extracts-single-threaded-driver.yaml with ref: c-driver-single-threaded +// - To: /path/to/includes/extracts/c-driver-single-threaded.rst +// +// Parameters: +// - yamlFilePath: Absolute path to the extract/release YAML file +// - refID: The ref ID to transform +// +// Returns: +// - string: Transformed path as it would appear in include directives +func transformExtractFilePath(yamlFilePath, refID string) string { + dir := filepath.Dir(yamlFilePath) + base := filepath.Base(yamlFilePath) + + // Determine the type (extracts or release) + var dirType string + if strings.HasPrefix(base, "extracts-") { + dirType = "extracts" + } else if strings.HasPrefix(base, "release-") { + dirType = "release" + } else { + // Not an extract or release file + return yamlFilePath + } + + // Build the transformed path: /path/to/includes/extracts/ref-id.rst + transformedPath := filepath.Join(dir, dirType, refID+".rst") + + return transformedPath } // referencesToctreeTarget checks if a toctree document name points to the target file. diff --git a/audit-cli/commands/analyze/usage/output.go b/audit-cli/commands/analyze/usage/output.go index 6af9fa0..15cc367 100644 --- a/audit-cli/commands/analyze/usage/output.go +++ b/audit-cli/commands/analyze/usage/output.go @@ -25,12 +25,13 @@ const ( // - analysis: The analysis results to print // - format: The output format (text or json) // - verbose: If true, show additional details -func PrintAnalysis(analysis *UsageAnalysis, format OutputFormat, verbose bool) error { +// - recursive: If true, indicates recursive mode was used +func PrintAnalysis(analysis *UsageAnalysis, format OutputFormat, verbose bool, recursive bool) error { switch format { case FormatJSON: return printJSON(analysis) case FormatText: - printText(analysis, verbose) + printText(analysis, verbose, recursive) return nil default: return fmt.Errorf("unknown output format: %s", format) @@ -38,23 +39,41 @@ func PrintAnalysis(analysis *UsageAnalysis, format OutputFormat, verbose bool) e } // printText prints the analysis results in human-readable text format. -func printText(analysis *UsageAnalysis, verbose bool) { +func printText(analysis *UsageAnalysis, verbose bool, recursive bool) { fmt.Println("============================================================") - fmt.Println("USAGE ANALYSIS") + if recursive { + fmt.Println("RECURSIVE USAGE ANALYSIS") + } else { + fmt.Println("USAGE ANALYSIS") + } fmt.Println("============================================================") fmt.Printf("Target File: %s\n", analysis.TargetFile) - fmt.Printf("Total Files: %d\n", analysis.TotalFiles) - fmt.Printf("Total Usages: %d\n", analysis.TotalUsages) + if recursive { + fmt.Printf("Total .txt Files: %d\n", analysis.TotalFiles) + fmt.Println("(Showing only .txt documentation pages)") + } else { + fmt.Printf("Total Files: %d\n", analysis.TotalFiles) + fmt.Printf("Total Usages: %d\n", analysis.TotalUsages) + } fmt.Println("============================================================") fmt.Println() if analysis.TotalUsages == 0 { - fmt.Println("No files use this file.") - fmt.Println() - fmt.Println("This could mean:") - fmt.Println(" - The file is not included in any documentation pages") - fmt.Println(" - The file might be orphaned (not used)") - fmt.Println(" - The file is used with a different path") + if recursive { + fmt.Println("No .txt files ultimately use this file.") + fmt.Println() + fmt.Println("This could mean:") + fmt.Println(" - The file is only used by other include files, not by any .txt pages") + fmt.Println(" - The file might be orphaned (not used)") + fmt.Println(" - The file is used with a different path") + } else { + fmt.Println("No files use this file.") + fmt.Println() + fmt.Println("This could mean:") + fmt.Println(" - The file is not included in any documentation pages") + fmt.Println(" - The file might be orphaned (not used)") + fmt.Println(" - The file is used with a different path") + } fmt.Println() fmt.Println("Note: By default, only content inclusion directives are searched.") fmt.Println("Use --include-toctree to also search for toctree navigation links.") @@ -62,29 +81,32 @@ func printText(analysis *UsageAnalysis, verbose bool) { return } - // Group usages by directive type - byDirectiveType := groupByDirectiveType(analysis.UsingFiles) - - // Print breakdown by directive type with file and reference counts - directiveTypes := []string{"include", "literalinclude", "io-code-block", "toctree"} - for _, directiveType := range directiveTypes { - if refs, ok := byDirectiveType[directiveType]; ok { - uniqueFiles := countUniqueFiles(refs) - totalRefs := len(refs) - if uniqueFiles == totalRefs { - // No duplicates - just show count - fmt.Printf("%-20s: %d\n", directiveType, uniqueFiles) - } else { - // Has duplicates - show both counts - if uniqueFiles == 1 { - fmt.Printf("%-20s: %d file, %d usages\n", directiveType, uniqueFiles, totalRefs) + // In recursive mode, skip the directive type breakdown since we only show .txt files + if !recursive { + // Group usages by directive type + byDirectiveType := groupByDirectiveType(analysis.UsingFiles) + + // Print breakdown by directive type with file and reference counts + directiveTypes := []string{"include", "literalinclude", "io-code-block", "toctree"} + for _, directiveType := range directiveTypes { + if refs, ok := byDirectiveType[directiveType]; ok { + uniqueFiles := countUniqueFiles(refs) + totalRefs := len(refs) + if uniqueFiles == totalRefs { + // No duplicates - just show count + fmt.Printf("%-20s: %d\n", directiveType, uniqueFiles) } else { - fmt.Printf("%-20s: %d files, %d usages\n", directiveType, uniqueFiles, totalRefs) + // Has duplicates - show both counts + if uniqueFiles == 1 { + fmt.Printf("%-20s: %d file, %d usages\n", directiveType, uniqueFiles, totalRefs) + } else { + fmt.Printf("%-20s: %d files, %d usages\n", directiveType, uniqueFiles, totalRefs) + } } } } + fmt.Println() } - fmt.Println() // Group usages by file grouped := GroupUsagesByFile(analysis.UsingFiles) @@ -97,19 +119,24 @@ func printText(analysis *UsageAnalysis, verbose bool) { relPath = group.FilePath } - // Print file path with directive type label - if group.Count > 1 { - // Multiple usages from this file - fmt.Printf("%3d. [%s] %s (%d usages)\n", i+1, group.DirectiveType, relPath, group.Count) + if recursive { + // In recursive mode, just show the .txt file paths + fmt.Printf("%3d. %s\n", i+1, relPath) } else { - // Single usage - fmt.Printf("%3d. [%s] %s\n", i+1, group.DirectiveType, relPath) - } + // Print file path with directive type label + if group.Count > 1 { + // Multiple usages from this file + fmt.Printf("%3d. [%s] %s (%d usages)\n", i+1, group.DirectiveType, relPath, group.Count) + } else { + // Single usage + fmt.Printf("%3d. [%s] %s\n", i+1, group.DirectiveType, relPath) + } - // Print line numbers in verbose mode - if verbose { - for _, usage := range group.Usages { - fmt.Printf(" Line %d: %s\n", usage.LineNumber, usage.UsagePath) + // Print line numbers in verbose mode + if verbose { + for _, usage := range group.Usages { + fmt.Printf(" Line %d: %s\n", usage.LineNumber, usage.UsagePath) + } } } } diff --git a/audit-cli/commands/analyze/usage/usage.go b/audit-cli/commands/analyze/usage/usage.go index 94198e4..a644562 100644 --- a/audit-cli/commands/analyze/usage/usage.go +++ b/audit-cli/commands/analyze/usage/usage.go @@ -37,6 +37,7 @@ import ( // - -t, --directive-type: Filter by directive type (include, literalinclude, io-code-block, toctree) // - --include-toctree: Include toctree entries (navigation links) in addition to content inclusion directives // - --exclude: Exclude paths matching this glob pattern (e.g., '*/archive/*') +// - -r, --recursive: Recursively follow usage tree until reaching only .txt files (documentation pages) func NewUsageCommand() *cobra.Command { var ( format string @@ -47,6 +48,7 @@ func NewUsageCommand() *cobra.Command { directiveType string includeToctree bool excludePattern string + recursive bool ) cmd := &cobra.Command{ @@ -103,10 +105,13 @@ Examples: analyze usage /path/to/file.rst --exclude "*/archive/*" # Filter by directive type - analyze usage /path/to/file.rst --directive-type include`, + analyze usage /path/to/file.rst --directive-type include + + # Recursively follow usage tree to find all .txt documentation pages + analyze usage /path/to/includes/fact.rst --recursive`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - return runUsage(args[0], format, verbose, countOnly, pathsOnly, summaryOnly, directiveType, includeToctree, excludePattern) + return runUsage(args[0], format, verbose, countOnly, pathsOnly, summaryOnly, directiveType, includeToctree, excludePattern, recursive) }, } @@ -118,6 +123,7 @@ Examples: cmd.Flags().StringVarP(&directiveType, "directive-type", "t", "", "Filter by directive type (include, literalinclude, io-code-block, toctree)") cmd.Flags().BoolVar(&includeToctree, "include-toctree", false, "Include toctree entries (navigation links) in addition to content inclusion directives") cmd.Flags().StringVar(&excludePattern, "exclude", "", "Exclude paths matching this glob pattern (e.g., '*/archive/*' or '*/deprecated/*')") + cmd.Flags().BoolVarP(&recursive, "recursive", "r", false, "Recursively follow usage tree until reaching only .txt files (documentation pages)") return cmd } @@ -136,10 +142,11 @@ Examples: // - directiveType: Filter by directive type (empty string means all types) // - includeToctree: If true, include toctree entries in the search // - excludePattern: Glob pattern for paths to exclude (empty string means no exclusion) +// - recursive: If true, recursively follow usage tree until reaching only .txt files // // Returns: // - error: Any error encountered during analysis -func runUsage(targetFile, format string, verbose, countOnly, pathsOnly, summaryOnly bool, directiveType string, includeToctree bool, excludePattern string) error { +func runUsage(targetFile, format string, verbose, countOnly, pathsOnly, summaryOnly bool, directiveType string, includeToctree bool, excludePattern string, recursive bool) error { // Validate directive type if specified if directiveType != "" { validTypes := map[string]bool{ @@ -178,7 +185,17 @@ func runUsage(targetFile, format string, verbose, countOnly, pathsOnly, summaryO } // Perform analysis - analysis, err := AnalyzeUsage(targetFile, includeToctree, verbose, excludePattern) + var analysis *UsageAnalysis + var err error + + if recursive { + // Perform recursive analysis to find all .txt files + analysis, err = AnalyzeUsageRecursive(targetFile, includeToctree, verbose, excludePattern) + } else { + // Perform standard single-level analysis + analysis, err = AnalyzeUsage(targetFile, includeToctree, verbose, excludePattern) + } + if err != nil { return fmt.Errorf("failed to analyze usage: %w", err) } @@ -205,6 +222,6 @@ func runUsage(targetFile, format string, verbose, countOnly, pathsOnly, summaryO } // Print full results - return PrintAnalysis(analysis, outputFormat, verbose) + return PrintAnalysis(analysis, outputFormat, verbose, recursive) } diff --git a/audit-cli/commands/analyze/usage/usage_test.go b/audit-cli/commands/analyze/usage/usage_test.go index 5c67413..9a61eb4 100644 --- a/audit-cli/commands/analyze/usage/usage_test.go +++ b/audit-cli/commands/analyze/usage/usage_test.go @@ -258,6 +258,27 @@ func TestReferencesTarget(t *testing.T) { currentFile: filepath.Join(absTestDataDir, "includes/nested-include.rst"), expected: false, }, + { + name: "Step file transformation - absolute path", + refPath: "/includes/steps/shard-collection.rst", + targetFile: filepath.Join(absTestDataDir, "includes/steps-shard-collection.yaml"), + currentFile: filepath.Join(absTestDataDir, "test.txt"), + expected: true, + }, + { + name: "Step file transformation - relative path", + refPath: "steps/shard-collection.rst", + targetFile: filepath.Join(absTestDataDir, "includes/steps-shard-collection.yaml"), + currentFile: filepath.Join(absTestDataDir, "includes/test.txt"), + expected: true, + }, + { + name: "Step file no match - different name", + refPath: "/includes/steps/other-steps.rst", + targetFile: filepath.Join(absTestDataDir, "includes/steps-shard-collection.yaml"), + currentFile: filepath.Join(absTestDataDir, "test.txt"), + expected: false, + }, } for _, tt := range tests { @@ -270,6 +291,111 @@ func TestReferencesTarget(t *testing.T) { } } +// TestTransformStepFilePath tests the transformStepFilePath function. +func TestTransformStepFilePath(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "Step file transformation", + input: "/path/to/includes/steps-shard-collection.yaml", + expected: "/path/to/includes/steps/shard-collection.rst", + }, + { + name: "Step file with complex name", + input: "/path/to/includes/steps-convert-replset-to-sharded-cluster.yaml", + expected: "/path/to/includes/steps/convert-replset-to-sharded-cluster.rst", + }, + { + name: "Non-step file - no transformation", + input: "/path/to/includes/fact-something.yaml", + expected: "/path/to/includes/fact-something.yaml", + }, + { + name: "Non-yaml file - no transformation", + input: "/path/to/includes/steps-something.rst", + expected: "/path/to/includes/steps-something.rst", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := transformStepFilePath(tt.input) + if result != tt.expected { + t.Errorf("expected %v, got %v", tt.expected, result) + } + }) + } +} + +// TestTransformExtractFilePath tests the transformExtractFilePath function. +func TestTransformExtractFilePath(t *testing.T) { + tests := []struct { + name string + filePath string + refID string + expected string + }{ + { + name: "Extract file transformation", + filePath: "/path/to/includes/extracts-single-threaded-driver.yaml", + refID: "c-driver-single-threaded", + expected: "/path/to/includes/extracts/c-driver-single-threaded.rst", + }, + { + name: "Release file transformation", + filePath: "/path/to/includes/release-pinning.yaml", + refID: "pin-repo-to-version-yum", + expected: "/path/to/includes/release/pin-repo-to-version-yum.rst", + }, + { + name: "Non-extract file - no transformation", + filePath: "/path/to/includes/fact-something.yaml", + refID: "some-ref", + expected: "/path/to/includes/fact-something.yaml", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := transformExtractFilePath(tt.filePath, tt.refID) + if result != tt.expected { + t.Errorf("expected %v, got %v", tt.expected, result) + } + }) + } +} + +// TestGetExtractRefs tests the getExtractRefs function. +func TestGetExtractRefs(t *testing.T) { + // Use the test extract file from testdata + testFile := "../../../testdata/input-files/source/includes/extracts-test.yaml" + + refs, err := getExtractRefs(testFile) + if err != nil { + t.Fatalf("getExtractRefs failed: %v", err) + } + + expectedRefs := []string{"test-extract-intro", "test-extract-examples"} + if len(refs) != len(expectedRefs) { + t.Errorf("expected %d refs, got %d", len(expectedRefs), len(refs)) + } + + // Check that all expected refs are present + refMap := make(map[string]bool) + for _, ref := range refs { + refMap[ref] = true + } + + for _, expectedRef := range expectedRefs { + if !refMap[expectedRef] { + t.Errorf("expected ref %s not found", expectedRef) + } + } +} + // TestGroupByDirectiveType tests the groupByDirectiveType function. func TestGroupByDirectiveType(t *testing.T) { usages := []FileUsage{ From 39ea70c915a5f3b429784c8970e48698294105b7 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Fri, 5 Dec 2025 17:05:35 -0500 Subject: [PATCH 07/14] Remove unneeded product path flag from compare file-contents command' --- audit-cli/README.md | 9 +--- .../compare/file-contents/file_contents.go | 50 ++++++++++++------- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/audit-cli/README.md b/audit-cli/README.md index c0c2225..33a40d2 100644 --- a/audit-cli/README.md +++ b/audit-cli/README.md @@ -855,20 +855,17 @@ This command helps writers: # Version comparison across MongoDB documentation versions ./audit-cli compare file-contents \ /path/to/manual/manual/source/includes/example.rst \ - --product-dir /path/to/manual \ --versions manual,upcoming,v8.0,v7.0 # Show which files differ ./audit-cli compare file-contents \ /path/to/manual/manual/source/includes/example.rst \ - --product-dir /path/to/manual \ --versions manual,upcoming,v8.0,v7.0 \ --show-paths # Show detailed diffs ./audit-cli compare file-contents \ /path/to/manual/manual/source/includes/example.rst \ - --product-dir /path/to/manual \ --versions manual,upcoming,v8.0,v7.0 \ --show-diff @@ -878,7 +875,6 @@ This command helps writers: **Flags:** -- `-p, --product-dir ` - Product directory path (required for version comparison) - `-V, --versions ` - Comma-separated list of versions (e.g., `manual,upcoming,v8.0`) - `--show-paths` - Display file paths grouped by status (matching, differing, not found) - `-d, --show-diff` - Display unified diff output (implies `--show-paths`) @@ -901,12 +897,11 @@ This mode: **2. Version Comparison (Product Directory)** -Provide one file path plus `--product-dir` and `--versions`: +Provide one file path plus `--versions`. The product directory is automatically detected from the file path: ```bash ./audit-cli compare file-contents \ /path/to/manual/manual/source/includes/example.rst \ - --product-dir /path/to/manual \ --versions manual,upcoming,v8.0 ``` @@ -961,13 +956,11 @@ product-dir/ # Check if a file is consistent across all versions ./audit-cli compare file-contents \ ~/workspace/docs-mongodb-internal/content/manual/manual/source/includes/fact-atlas-search.rst \ - --product-dir ~/workspace/docs-mongodb-internal/content/manual \ --versions manual,upcoming,v8.0,v7.0,v6.0 # Find differences and see what changed ./audit-cli compare file-contents \ ~/workspace/docs-mongodb-internal/content/manual/manual/source/includes/fact-atlas-search.rst \ - --product-dir ~/workspace/docs-mongodb-internal/content/manual \ --versions manual,upcoming,v8.0,v7.0,v6.0 \ --show-diff diff --git a/audit-cli/commands/compare/file-contents/file_contents.go b/audit-cli/commands/compare/file-contents/file_contents.go index 32a17bf..79fd6df 100644 --- a/audit-cli/commands/compare/file-contents/file_contents.go +++ b/audit-cli/commands/compare/file-contents/file_contents.go @@ -16,8 +16,10 @@ package file_contents import ( "fmt" + "path/filepath" "strings" + "github.com/mongodb/code-example-tooling/audit-cli/internal/pathresolver" "github.com/spf13/cobra" ) @@ -31,21 +33,21 @@ import ( // compare file-contents file1.rst file2.rst // // 2. Version comparison: -// compare file-contents file.rst --product-dir /path/to/product --versions v1,v2,v3 +// compare file-contents file.rst --versions v1,v2,v3 +// +// The product directory is automatically detected from the file path. // // Flags: -// - -p, --product-dir: Product directory path (required for version comparison) // - -V, --versions: Comma-separated list of versions (required for version comparison) // - --show-paths: Display file paths of files that differ // - -d, --show-diff: Display unified diff output // - -v, --verbose: Show detailed processing information func NewFileContentsCommand() *cobra.Command { var ( - productDir string - versions string - showPaths bool - showDiff bool - verbose bool + versions string + showPaths bool + showDiff bool + verbose bool ) cmd := &cobra.Command{ @@ -61,8 +63,8 @@ This command supports two modes: 2. Version comparison (one file argument + flags): Compare the same file across multiple documentation versions. + The product directory is automatically detected from the file path. Example: compare file-contents /path/to/manual/manual/source/file.rst \ - --product-dir /path/to/manual \ --versions manual,upcoming,v8.1,v8.0 The command provides progressive output detail: @@ -74,11 +76,10 @@ Files that don't exist in certain versions are reported separately and do not cause errors.`, Args: cobra.RangeArgs(1, 2), RunE: func(cmd *cobra.Command, args []string) error { - return runCompare(args, productDir, versions, showPaths, showDiff, verbose) + return runCompare(args, versions, showPaths, showDiff, verbose) }, } - cmd.Flags().StringVarP(&productDir, "product-dir", "p", "", "Product directory path (e.g., /path/to/manual)") cmd.Flags().StringVarP(&versions, "versions", "V", "", "Comma-separated list of versions (e.g., manual,upcoming,v8.1)") cmd.Flags().BoolVar(&showPaths, "show-paths", false, "Display file paths of files that differ") cmd.Flags().BoolVarP(&showDiff, "show-diff", "d", false, "Display unified diff output") @@ -94,7 +95,6 @@ do not cause errors.`, // // Parameters: // - args: Command line arguments (1 or 2 file paths) -// - productDir: Product directory path (for version comparison) // - versions: Comma-separated version list (for version comparison) // - showPaths: If true, show file paths // - showDiff: If true, show diffs @@ -102,23 +102,37 @@ do not cause errors.`, // // Returns: // - error: Any error encountered during comparison -func runCompare(args []string, productDir, versions string, showPaths, showDiff, verbose bool) error { +func runCompare(args []string, versions string, showPaths, showDiff, verbose bool) error { // Validate arguments based on mode if len(args) == 2 { // Direct comparison mode - if productDir != "" || versions != "" { - return fmt.Errorf("--product-dir and --versions cannot be used with two file arguments") + if versions != "" { + return fmt.Errorf("--versions cannot be used with two file arguments") } return runDirectComparison(args[0], args[1], showPaths, showDiff, verbose) } else if len(args) == 1 { // Version comparison mode - if productDir == "" { - return fmt.Errorf("--product-dir is required when comparing versions (use -p or --product-dir)") - } if versions == "" { return fmt.Errorf("--versions is required when comparing versions (use -V or --versions)") } - return runVersionComparison(args[0], productDir, versions, showPaths, showDiff, verbose) + + // Convert to absolute path + absPath, err := filepath.Abs(args[0]) + if err != nil { + return fmt.Errorf("failed to get absolute path: %w", err) + } + + // Auto-detect product directory from the file path + productDir, err := pathresolver.FindProductDirectory(absPath) + if err != nil { + return fmt.Errorf("failed to detect product directory from file path: %w\n\nPlease ensure the file is within a MongoDB documentation structure (e.g., /path/to/product/{version}/source/...)", err) + } + + if verbose { + fmt.Printf("Auto-detected product directory: %s\n", productDir) + } + + return runVersionComparison(absPath, productDir, versions, showPaths, showDiff, verbose) } return fmt.Errorf("expected 1 or 2 file arguments") From ec0d74b694bc2addbbbb7f272f953eff214df65b Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Fri, 5 Dec 2025 17:32:25 -0500 Subject: [PATCH 08/14] Move version handling logic out to internal package, rename internal package --- audit-cli/README.md | 52 ++++-- audit-cli/commands/analyze/includes/output.go | 4 +- audit-cli/commands/analyze/usage/analyzer.go | 6 +- .../compare/file-contents/comparer.go | 4 +- .../compare/file-contents/file_contents.go | 35 +++- .../compare/file-contents/version_resolver.go | 10 +- audit-cli/commands/count/pages/counter.go | 74 +++----- .../pathresolver.go | 2 +- .../pathresolver_test.go | 170 +++++++++++++++++- .../source_finder.go | 2 +- .../{pathresolver => projectinfo}/types.go | 2 +- .../version_resolver.go | 69 ++++++- audit-cli/internal/rst/include_resolver.go | 6 +- 13 files changed, 334 insertions(+), 102 deletions(-) rename audit-cli/internal/{pathresolver => projectinfo}/pathresolver.go (99%) rename audit-cli/internal/{pathresolver => projectinfo}/pathresolver_test.go (61%) rename audit-cli/internal/{pathresolver => projectinfo}/source_finder.go (98%) rename audit-cli/internal/{pathresolver => projectinfo}/types.go (98%) rename audit-cli/internal/{pathresolver => projectinfo}/version_resolver.go (76%) diff --git a/audit-cli/README.md b/audit-cli/README.md index 33a40d2..db7d09c 100644 --- a/audit-cli/README.md +++ b/audit-cli/README.md @@ -852,7 +852,11 @@ This command helps writers: # Compare with diff output ./audit-cli compare file-contents file1.rst file2.rst --show-diff -# Version comparison across MongoDB documentation versions +# Version comparison - auto-discovers all versions +./audit-cli compare file-contents \ + /path/to/manual/manual/source/includes/example.rst + +# Version comparison - specific versions only ./audit-cli compare file-contents \ /path/to/manual/manual/source/includes/example.rst \ --versions manual,upcoming,v8.0,v7.0 @@ -860,25 +864,25 @@ This command helps writers: # Show which files differ ./audit-cli compare file-contents \ /path/to/manual/manual/source/includes/example.rst \ - --versions manual,upcoming,v8.0,v7.0 \ --show-paths # Show detailed diffs ./audit-cli compare file-contents \ /path/to/manual/manual/source/includes/example.rst \ - --versions manual,upcoming,v8.0,v7.0 \ --show-diff -# Verbose output (show processing details) -./audit-cli compare file-contents file1.rst file2.rst -v +# Verbose output (show processing details and auto-discovered versions) +./audit-cli compare file-contents \ + /path/to/manual/manual/source/includes/example.rst \ + -v ``` **Flags:** -- `-V, --versions ` - Comma-separated list of versions (e.g., `manual,upcoming,v8.0`) +- `-V, --versions ` - Comma-separated list of versions (optional; auto-discovers all versions if not specified) - `--show-paths` - Display file paths grouped by status (matching, differing, not found) - `-d, --show-diff` - Display unified diff output (implies `--show-paths`) -- `-v, --verbose` - Show detailed processing information +- `-v, --verbose` - Show detailed processing information (including auto-discovered versions and product directory) **Comparison Modes:** @@ -897,15 +901,22 @@ This mode: **2. Version Comparison (Product Directory)** -Provide one file path plus `--versions`. The product directory is automatically detected from the file path: +Provide one file path. The product directory and versions are automatically detected from the file path: ```bash +# Auto-discover all versions +./audit-cli compare file-contents \ + /path/to/manual/manual/source/includes/example.rst + +# Or specify specific versions ./audit-cli compare file-contents \ /path/to/manual/manual/source/includes/example.rst \ --versions manual,upcoming,v8.0 ``` This mode: +- Automatically detects the product directory from the file path +- Auto-discovers all available versions (unless `--versions` is specified) - Extracts the relative path from the reference file - Resolves the same relative path in each version directory - Compares all versions against the reference file @@ -953,18 +964,21 @@ product-dir/ **Examples:** ```bash -# Check if a file is consistent across all versions +# Check if a file is consistent across all versions (auto-discovered) ./audit-cli compare file-contents \ - ~/workspace/docs-mongodb-internal/content/manual/manual/source/includes/fact-atlas-search.rst \ - --versions manual,upcoming,v8.0,v7.0,v6.0 + ~/workspace/docs-mongodb-internal/content/manual/manual/source/includes/fact-atlas-search.rst -# Find differences and see what changed +# Find differences and see what changed (all versions) ./audit-cli compare file-contents \ ~/workspace/docs-mongodb-internal/content/manual/manual/source/includes/fact-atlas-search.rst \ - --versions manual,upcoming,v8.0,v7.0,v6.0 \ --show-diff -# Compare two specific versions of a file +# Compare across specific versions only +./audit-cli compare file-contents \ + ~/workspace/docs-mongodb-internal/content/manual/manual/source/includes/fact-atlas-search.rst \ + --versions manual,upcoming,v8.0,v7.0,v6.0 + +# Compare two specific versions of a file directly ./audit-cli compare file-contents \ ~/workspace/docs-mongodb-internal/content/manual/manual/source/includes/example.rst \ ~/workspace/docs-mongodb-internal/content/manual/v8.0/source/includes/example.rst \ @@ -1237,7 +1251,7 @@ audit-cli/ │ ā”œā”€ā”€ output.go # Output formatting │ └── types.go # Type definitions ā”œā”€ā”€ internal/ # Internal packages -│ ā”œā”€ā”€ pathresolver/ # Path resolution utilities +│ ā”œā”€ā”€ projectinfo/ # Project structure and info utilities │ │ ā”œā”€ā”€ pathresolver.go # Core path resolution │ │ ā”œā”€ā”€ pathresolver_test.go # Tests │ │ ā”œā”€ā”€ source_finder.go # Source directory detection @@ -1841,22 +1855,24 @@ used as the base for resolving relative include paths. ## Internal Packages -### `internal/pathresolver` +### `internal/projectinfo` -Provides centralized path resolution utilities for working with MongoDB documentation structure: +Provides centralized utilities for understanding MongoDB documentation project structure: - **Source directory detection** - Finds the documentation root by walking up the directory tree - **Project info detection** - Identifies product directory, version, and whether a project is versioned +- **Version discovery** - Automatically discovers all available versions in a product directory - **Version path resolution** - Resolves file paths across multiple documentation versions - **Relative path resolution** - Resolves paths relative to the source directory **Key Functions:** - `FindSourceDirectory(filePath string)` - Finds the source directory for a given file - `DetectProjectInfo(filePath string)` - Detects project structure information +- `DiscoverAllVersions(productDir string)` - Discovers all available versions in a product - `ResolveVersionPaths(referenceFile, productDir string, versions []string)` - Resolves paths across versions - `ResolveRelativeToSource(sourceDir, relativePath string)` - Resolves relative paths -See the code in `internal/pathresolver/` for implementation details. +See the code in `internal/projectinfo/` for implementation details. ### `internal/rst` diff --git a/audit-cli/commands/analyze/includes/output.go b/audit-cli/commands/analyze/includes/output.go index b6d4e81..a71cdc2 100644 --- a/audit-cli/commands/analyze/includes/output.go +++ b/audit-cli/commands/analyze/includes/output.go @@ -5,7 +5,7 @@ import ( "path/filepath" "strings" - "github.com/mongodb/code-example-tooling/audit-cli/internal/pathresolver" + "github.com/mongodb/code-example-tooling/audit-cli/internal/projectinfo" ) // PrintTree prints the include tree structure. @@ -138,7 +138,7 @@ func PrintSummary(analysis *IncludeAnalysis) { // - string: Formatted path for display func formatDisplayPath(filePath string) string { // Try to find the source directory - sourceDir, err := pathresolver.FindSourceDirectory(filePath) + sourceDir, err := projectinfo.FindSourceDirectory(filePath) if err != nil { // If we can't find source directory, just return the base name return filepath.Base(filePath) diff --git a/audit-cli/commands/analyze/usage/analyzer.go b/audit-cli/commands/analyze/usage/analyzer.go index 39080ed..99aadc9 100644 --- a/audit-cli/commands/analyze/usage/analyzer.go +++ b/audit-cli/commands/analyze/usage/analyzer.go @@ -8,7 +8,7 @@ import ( "sort" "strings" - "github.com/mongodb/code-example-tooling/audit-cli/internal/pathresolver" + "github.com/mongodb/code-example-tooling/audit-cli/internal/projectinfo" "github.com/mongodb/code-example-tooling/audit-cli/internal/rst" ) @@ -44,7 +44,7 @@ func AnalyzeUsage(targetFile string, includeToctree bool, verbose bool, excludeP } // Find the source directory - sourceDir, err := pathresolver.FindSourceDirectory(absTargetFile) + sourceDir, err := projectinfo.FindSourceDirectory(absTargetFile) if err != nil { return nil, fmt.Errorf("failed to find source directory: %w\n\nThe source directory is detected by looking for a 'source' directory in the file's path.\nMake sure the target file is within a documentation repository with a 'source' directory.", err) } @@ -165,7 +165,7 @@ func AnalyzeUsageRecursive(targetFile string, includeToctree bool, verbose bool, } // Find the source directory - sourceDir, err := pathresolver.FindSourceDirectory(absTargetFile) + sourceDir, err := projectinfo.FindSourceDirectory(absTargetFile) if err != nil { return nil, fmt.Errorf("failed to find source directory: %w\n\nThe source directory is detected by looking for a 'source' directory in the file's path.\nMake sure the target file is within a documentation repository with a 'source' directory.", err) } diff --git a/audit-cli/commands/compare/file-contents/comparer.go b/audit-cli/commands/compare/file-contents/comparer.go index 4f23eb4..08b4fc0 100644 --- a/audit-cli/commands/compare/file-contents/comparer.go +++ b/audit-cli/commands/compare/file-contents/comparer.go @@ -5,7 +5,7 @@ import ( "os" "path/filepath" - "github.com/mongodb/code-example-tooling/audit-cli/internal/pathresolver" + "github.com/mongodb/code-example-tooling/audit-cli/internal/projectinfo" ) // CompareFiles performs a direct comparison between two files. @@ -162,7 +162,7 @@ func CompareVersions(referenceFile, productDir string, versions []string, genera // // Returns: // - FileComparison: The comparison result for this file -func compareFile(referencePath, referenceContent string, versionPath pathresolver.VersionPath, generateDiff bool, verbose bool) FileComparison { +func compareFile(referencePath, referenceContent string, versionPath projectinfo.VersionPath, generateDiff bool, verbose bool) FileComparison { comparison := FileComparison{ Version: versionPath.Version, FilePath: versionPath.FilePath, diff --git a/audit-cli/commands/compare/file-contents/file_contents.go b/audit-cli/commands/compare/file-contents/file_contents.go index 79fd6df..1eb73fd 100644 --- a/audit-cli/commands/compare/file-contents/file_contents.go +++ b/audit-cli/commands/compare/file-contents/file_contents.go @@ -19,7 +19,7 @@ import ( "path/filepath" "strings" - "github.com/mongodb/code-example-tooling/audit-cli/internal/pathresolver" + "github.com/mongodb/code-example-tooling/audit-cli/internal/projectinfo" "github.com/spf13/cobra" ) @@ -61,10 +61,19 @@ This command supports two modes: Compare two specific files directly. Example: compare file-contents file1.rst file2.rst -2. Version comparison (one file argument + flags): +2. Version comparison (one file argument): Compare the same file across multiple documentation versions. The product directory is automatically detected from the file path. - Example: compare file-contents /path/to/manual/manual/source/file.rst \ + + By default, all available versions are automatically discovered and compared. + You can optionally specify specific versions using --versions. + + Examples: + # Compare across all versions (auto-discovered) + compare file-contents /path/to/manual/manual/source/file.rst + + # Compare across specific versions + compare file-contents /path/to/manual/manual/source/file.rst \ --versions manual,upcoming,v8.1,v8.0 The command provides progressive output detail: @@ -80,7 +89,7 @@ do not cause errors.`, }, } - cmd.Flags().StringVarP(&versions, "versions", "V", "", "Comma-separated list of versions (e.g., manual,upcoming,v8.1)") + cmd.Flags().StringVarP(&versions, "versions", "V", "", "Comma-separated list of versions (optional; auto-discovers all versions if not specified)") cmd.Flags().BoolVar(&showPaths, "show-paths", false, "Display file paths of files that differ") cmd.Flags().BoolVarP(&showDiff, "show-diff", "d", false, "Display unified diff output") cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Show detailed processing information") @@ -112,10 +121,6 @@ func runCompare(args []string, versions string, showPaths, showDiff, verbose boo return runDirectComparison(args[0], args[1], showPaths, showDiff, verbose) } else if len(args) == 1 { // Version comparison mode - if versions == "" { - return fmt.Errorf("--versions is required when comparing versions (use -V or --versions)") - } - // Convert to absolute path absPath, err := filepath.Abs(args[0]) if err != nil { @@ -123,7 +128,7 @@ func runCompare(args []string, versions string, showPaths, showDiff, verbose boo } // Auto-detect product directory from the file path - productDir, err := pathresolver.FindProductDirectory(absPath) + productDir, err := projectinfo.FindProductDirectory(absPath) if err != nil { return fmt.Errorf("failed to detect product directory from file path: %w\n\nPlease ensure the file is within a MongoDB documentation structure (e.g., /path/to/product/{version}/source/...)", err) } @@ -132,6 +137,18 @@ func runCompare(args []string, versions string, showPaths, showDiff, verbose boo fmt.Printf("Auto-detected product directory: %s\n", productDir) } + // If no versions specified, auto-discover all versions + if versions == "" { + discoveredVersions, err := projectinfo.DiscoverAllVersions(productDir) + if err != nil { + return fmt.Errorf("failed to discover versions: %w\n\nYou can specify versions manually using --versions", err) + } + versions = strings.Join(discoveredVersions, ",") + if verbose { + fmt.Printf("Auto-discovered versions: %s\n", versions) + } + } + return runVersionComparison(absPath, productDir, versions, showPaths, showDiff, verbose) } diff --git a/audit-cli/commands/compare/file-contents/version_resolver.go b/audit-cli/commands/compare/file-contents/version_resolver.go index 52f43a1..487d51c 100644 --- a/audit-cli/commands/compare/file-contents/version_resolver.go +++ b/audit-cli/commands/compare/file-contents/version_resolver.go @@ -1,7 +1,7 @@ package file_contents import ( - "github.com/mongodb/code-example-tooling/audit-cli/internal/pathresolver" + "github.com/mongodb/code-example-tooling/audit-cli/internal/projectinfo" ) // ResolveVersionPaths resolves file paths for all specified versions. @@ -25,10 +25,10 @@ import ( // - versions: List of version identifiers // // Returns: -// - []pathresolver.VersionPath: List of resolved version paths +// - []projectinfo.VersionPath: List of resolved version paths // - error: Any error encountered during resolution -func ResolveVersionPaths(referenceFile string, productDir string, versions []string) ([]pathresolver.VersionPath, error) { - return pathresolver.ResolveVersionPaths(referenceFile, productDir, versions) +func ResolveVersionPaths(referenceFile string, productDir string, versions []string) ([]projectinfo.VersionPath, error) { + return projectinfo.ResolveVersionPaths(referenceFile, productDir, versions) } // ExtractVersionFromPath extracts the version identifier from a file path. @@ -49,6 +49,6 @@ func ResolveVersionPaths(referenceFile string, productDir string, versions []str // - string: The version identifier // - error: Any error encountered during extraction func ExtractVersionFromPath(filePath string, productDir string) (string, error) { - return pathresolver.ExtractVersionFromPath(filePath, productDir) + return projectinfo.ExtractVersionFromPath(filePath, productDir) } diff --git a/audit-cli/commands/count/pages/counter.go b/audit-cli/commands/count/pages/counter.go index 9b99f16..8bd8cfc 100644 --- a/audit-cli/commands/count/pages/counter.go +++ b/audit-cli/commands/count/pages/counter.go @@ -6,6 +6,8 @@ import ( "os" "path/filepath" "strings" + + "github.com/mongodb/code-example-tooling/audit-cli/internal/projectinfo" ) // CountPages counts .txt files in the content directory. @@ -147,7 +149,7 @@ func CountPages(dirPath string, forProject string, excludeDirs []string, current // For non-versioned projects, versionName will be empty, which is fine if len(versions) > 0 { // This is a versioned project - only count if in current version - if !isCurrentVersion(versionName) { + if !projectinfo.IsCurrentVersion(versionName) { return nil } } @@ -227,72 +229,38 @@ func extractVersionFromPath(relPath string, projectName string) string { } // Check if parts[1] looks like a version directory - if isVersionDirectory(parts[1]) { + if projectinfo.IsVersionDirectory(parts[1]) { return parts[1] } return "" } -// isVersionDirectory checks if a directory name looks like a version directory. -// Version directories can be: -// - "current" or "manual" (current version) -// - "upcoming" (upcoming version) -// - Starting with "v" (e.g., "v8.0", "v7.3") -func isVersionDirectory(dirName string) bool { - if dirName == "current" || dirName == "manual" || dirName == "upcoming" { - return true - } - return strings.HasPrefix(dirName, "v") -} - -// isCurrentVersion checks if a version name represents the current version. -// The current version is either "current" or "manual". -func isCurrentVersion(versionName string) bool { - return versionName == "current" || versionName == "manual" -} - // findVersionDirectories finds all version directories within a project directory. // Returns a list of VersionInfo structs with version names and whether they're current. // If the project has no versions (source is directly under project), returns empty slice. func findVersionDirectories(projectDir string) ([]VersionInfo, error) { - entries, err := os.ReadDir(projectDir) - if err != nil { - return nil, fmt.Errorf("failed to read project directory: %w", err) + // Check if there's a direct "source" directory (non-versioned project) + sourceDir := filepath.Join(projectDir, "source") + if _, err := os.Stat(sourceDir); err == nil { + // Non-versioned project + return []VersionInfo{}, nil } - var versions []VersionInfo - hasSourceDir := false - - for _, entry := range entries { - if !entry.IsDir() { - continue - } - - dirName := entry.Name() - - // Check if there's a direct "source" directory (non-versioned project) - if dirName == "source" { - hasSourceDir = true - continue - } - - // Check if this looks like a version directory - if isVersionDirectory(dirName) { - // Verify it has a source subdirectory - sourceDir := filepath.Join(projectDir, dirName, "source") - if _, err := os.Stat(sourceDir); err == nil { - versions = append(versions, VersionInfo{ - Name: dirName, - IsCurrent: isCurrentVersion(dirName), - }) - } - } + // Use projectinfo to discover all versions + versionNames, err := projectinfo.DiscoverAllVersions(projectDir) + if err != nil { + // If no versions found, treat as non-versioned + return []VersionInfo{}, nil } - // If there's a direct source directory, this is a non-versioned project - if hasSourceDir { - return []VersionInfo{}, nil + // Convert to VersionInfo structs with IsCurrent flag + var versions []VersionInfo + for _, name := range versionNames { + versions = append(versions, VersionInfo{ + Name: name, + IsCurrent: projectinfo.IsCurrentVersion(name), + }) } return versions, nil diff --git a/audit-cli/internal/pathresolver/pathresolver.go b/audit-cli/internal/projectinfo/pathresolver.go similarity index 99% rename from audit-cli/internal/pathresolver/pathresolver.go rename to audit-cli/internal/projectinfo/pathresolver.go index 7294b4a..850737d 100644 --- a/audit-cli/internal/pathresolver/pathresolver.go +++ b/audit-cli/internal/projectinfo/pathresolver.go @@ -1,4 +1,4 @@ -package pathresolver +package projectinfo import ( "fmt" diff --git a/audit-cli/internal/pathresolver/pathresolver_test.go b/audit-cli/internal/projectinfo/pathresolver_test.go similarity index 61% rename from audit-cli/internal/pathresolver/pathresolver_test.go rename to audit-cli/internal/projectinfo/pathresolver_test.go index 6766d97..96d5cf2 100644 --- a/audit-cli/internal/pathresolver/pathresolver_test.go +++ b/audit-cli/internal/projectinfo/pathresolver_test.go @@ -1,4 +1,4 @@ -package pathresolver +package projectinfo import ( "path/filepath" @@ -163,7 +163,7 @@ func TestExtractVersionFromPath(t *testing.T) { func TestResolveRelativeToSource(t *testing.T) { sourceDir := "/path/to/manual/v8.0/source" - + tests := []struct { name string relativePath string @@ -180,7 +180,7 @@ func TestResolveRelativeToSource(t *testing.T) { want: "/path/to/manual/v8.0/source/includes/file.rst", }, } - + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got, err := ResolveRelativeToSource(sourceDir, tt.relativePath) @@ -195,3 +195,167 @@ func TestResolveRelativeToSource(t *testing.T) { } } +func TestIsVersionDirectory(t *testing.T) { + tests := []struct { + name string + dirName string + want bool + }{ + { + name: "current version", + dirName: "current", + want: true, + }, + { + name: "manual version", + dirName: "manual", + want: true, + }, + { + name: "upcoming version", + dirName: "upcoming", + want: true, + }, + { + name: "v8.0 version", + dirName: "v8.0", + want: true, + }, + { + name: "v7.3 version", + dirName: "v7.3", + want: true, + }, + { + name: "v10.0 version", + dirName: "v10.0", + want: true, + }, + { + name: "source directory", + dirName: "source", + want: false, + }, + { + name: "includes directory", + dirName: "includes", + want: false, + }, + { + name: "random directory", + dirName: "random", + want: false, + }, + { + name: "build directory", + dirName: "build", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := IsVersionDirectory(tt.dirName) + if got != tt.want { + t.Errorf("IsVersionDirectory(%q) = %v, want %v", tt.dirName, got, tt.want) + } + }) + } +} + +func TestIsCurrentVersion(t *testing.T) { + tests := []struct { + name string + versionName string + want bool + }{ + { + name: "current is current", + versionName: "current", + want: true, + }, + { + name: "manual is current", + versionName: "manual", + want: true, + }, + { + name: "upcoming is not current", + versionName: "upcoming", + want: false, + }, + { + name: "v8.0 is not current", + versionName: "v8.0", + want: false, + }, + { + name: "v7.3 is not current", + versionName: "v7.3", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := IsCurrentVersion(tt.versionName) + if got != tt.want { + t.Errorf("IsCurrentVersion(%q) = %v, want %v", tt.versionName, got, tt.want) + } + }) + } +} + +func TestDiscoverAllVersions(t *testing.T) { + // Get absolute path to test data product directory + testFile := "../../testdata/compare/product/v8.0/source/includes/example.rst" + absTestFile, _ := filepath.Abs(testFile) + + // Get product directory (parent of v8.0) + sourceDir := filepath.Dir(absTestFile) // .../includes + sourceDir = filepath.Dir(sourceDir) // .../source + versionDir := filepath.Dir(sourceDir) // .../v8.0 + productDir := filepath.Dir(versionDir) // .../product + + got, err := DiscoverAllVersions(productDir) + if err != nil { + t.Fatalf("DiscoverAllVersions() error = %v", err) + } + + // We expect to find: manual, upcoming, v8.0 + expectedVersions := map[string]bool{ + "manual": true, + "upcoming": true, + "v8.0": true, + } + + if len(got) != len(expectedVersions) { + t.Errorf("DiscoverAllVersions() returned %d versions, want %d. Got: %v", len(got), len(expectedVersions), got) + } + + // Check that all expected versions are present + for _, version := range got { + if !expectedVersions[version] { + t.Errorf("DiscoverAllVersions() returned unexpected version: %v", version) + } + } + + // Check that all expected versions were found + foundVersions := make(map[string]bool) + for _, version := range got { + foundVersions[version] = true + } + for expectedVersion := range expectedVersions { + if !foundVersions[expectedVersion] { + t.Errorf("DiscoverAllVersions() did not find expected version: %v", expectedVersion) + } + } +} + +func TestDiscoverAllVersionsNonExistentDir(t *testing.T) { + _, err := DiscoverAllVersions("/nonexistent/path/to/product") + if err == nil { + t.Error("DiscoverAllVersions() expected error for non-existent directory, got nil") + } +} + diff --git a/audit-cli/internal/pathresolver/source_finder.go b/audit-cli/internal/projectinfo/source_finder.go similarity index 98% rename from audit-cli/internal/pathresolver/source_finder.go rename to audit-cli/internal/projectinfo/source_finder.go index 77395f6..fb2d836 100644 --- a/audit-cli/internal/pathresolver/source_finder.go +++ b/audit-cli/internal/projectinfo/source_finder.go @@ -1,4 +1,4 @@ -package pathresolver +package projectinfo import ( "fmt" diff --git a/audit-cli/internal/pathresolver/types.go b/audit-cli/internal/projectinfo/types.go similarity index 98% rename from audit-cli/internal/pathresolver/types.go rename to audit-cli/internal/projectinfo/types.go index f6b071c..575371f 100644 --- a/audit-cli/internal/pathresolver/types.go +++ b/audit-cli/internal/projectinfo/types.go @@ -1,4 +1,4 @@ -package pathresolver +package projectinfo // ProjectInfo contains information about a documentation project's structure. // diff --git a/audit-cli/internal/pathresolver/version_resolver.go b/audit-cli/internal/projectinfo/version_resolver.go similarity index 76% rename from audit-cli/internal/pathresolver/version_resolver.go rename to audit-cli/internal/projectinfo/version_resolver.go index f7c6344..16b2f86 100644 --- a/audit-cli/internal/pathresolver/version_resolver.go +++ b/audit-cli/internal/projectinfo/version_resolver.go @@ -1,7 +1,8 @@ -package pathresolver +package projectinfo import ( "fmt" + "os" "path/filepath" "strings" ) @@ -194,3 +195,69 @@ func IsVersionedProject(sourceDir string) (bool, error) { return parentName != "", nil } +// IsVersionDirectory checks if a directory name looks like a version directory. +// Version directories can be: +// - "current" or "manual" (current version) +// - "upcoming" (upcoming version) +// - Starting with "v" (e.g., "v8.0", "v7.3") +func IsVersionDirectory(dirName string) bool { + if dirName == "current" || dirName == "manual" || dirName == "upcoming" { + return true + } + return strings.HasPrefix(dirName, "v") +} + +// IsCurrentVersion checks if a version name represents the current version. +// The current version is either "current" or "manual". +func IsCurrentVersion(versionName string) bool { + return versionName == "current" || versionName == "manual" +} + +// DiscoverAllVersions finds all version directories within a product directory. +// +// This function scans the product directory to find all subdirectories that: +// 1. Look like version directories (e.g., "manual", "upcoming", "v8.0") +// 2. Contain a "source" subdirectory +// +// Parameters: +// - productDir: The absolute path to the product directory +// +// Returns: +// - []string: List of version identifiers found +// - error: Any error encountered during discovery +func DiscoverAllVersions(productDir string) ([]string, error) { + entries, err := os.ReadDir(productDir) + if err != nil { + return nil, fmt.Errorf("failed to read product directory: %w", err) + } + + var versions []string + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + dirName := entry.Name() + + // Skip the "source" directory itself (for non-versioned projects) + if dirName == "source" { + continue + } + + // Check if this looks like a version directory + if IsVersionDirectory(dirName) { + // Verify it has a source subdirectory + sourceDir := filepath.Join(productDir, dirName, "source") + if info, err := os.Stat(sourceDir); err == nil && info.IsDir() { + versions = append(versions, dirName) + } + } + } + + if len(versions) == 0 { + return nil, fmt.Errorf("no version directories found in %s", productDir) + } + + return versions, nil +} diff --git a/audit-cli/internal/rst/include_resolver.go b/audit-cli/internal/rst/include_resolver.go index d57243e..61bad68 100644 --- a/audit-cli/internal/rst/include_resolver.go +++ b/audit-cli/internal/rst/include_resolver.go @@ -7,7 +7,7 @@ import ( "path/filepath" "strings" - "github.com/mongodb/code-example-tooling/audit-cli/internal/pathresolver" + "github.com/mongodb/code-example-tooling/audit-cli/internal/projectinfo" ) // FindIncludeDirectives finds all include directives in a file and resolves their paths. @@ -137,7 +137,7 @@ func FindToctreeEntries(filePath string) ([]string, error) { // - error: Error if the document cannot be found func ResolveToctreePath(currentFilePath, docName string) (string, error) { // Find the source directory - sourceDir, err := pathresolver.FindSourceDirectory(currentFilePath) + sourceDir, err := projectinfo.FindSourceDirectory(currentFilePath) if err != nil { return "", err } @@ -196,7 +196,7 @@ func ResolveIncludePath(currentFilePath, includePath string) (string, error) { } // Find the source directory by walking up from the current file - sourceDir, err := pathresolver.FindSourceDirectory(currentFilePath) + sourceDir, err := projectinfo.FindSourceDirectory(currentFilePath) if err != nil { return "", err } From 8799055a1bfd4e0dceeb2cffc1e4fcd6bdf6b27f Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Fri, 5 Dec 2025 17:54:50 -0500 Subject: [PATCH 09/14] Remove unneeded hint from output in direct file comparison mode --- audit-cli/commands/compare/file-contents/output.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/audit-cli/commands/compare/file-contents/output.go b/audit-cli/commands/compare/file-contents/output.go index d9db245..4634d68 100644 --- a/audit-cli/commands/compare/file-contents/output.go +++ b/audit-cli/commands/compare/file-contents/output.go @@ -74,10 +74,16 @@ func printSummary(result *ComparisonResult) { fmt.Printf(" - %d version(s) had errors\n", result.ErrorFiles) } - // Show hints - fmt.Println() - fmt.Println("Use --show-paths to see which files differ") - fmt.Println("Use --show-diff to see the differences") + // Show hints (only in version comparison mode) + if result.ReferenceVersion != "" { + fmt.Println() + fmt.Println("Use --show-paths to see which files differ") + fmt.Println("Use --show-diff to see the differences") + } else { + // Direct comparison mode - only show diff hint + fmt.Println() + fmt.Println("Use --show-diff to see the differences") + } } else if result.NotFoundFiles > 0 || result.ErrorFiles > 0 { // No differences, but some files not found or had errors fmt.Printf("āœ“ No differences found among existing files\n") From f1a08d5683e9abf9dcbbf9ffdadbbdde3b81e118 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Mon, 8 Dec 2025 10:49:47 -0500 Subject: [PATCH 10/14] Add a --preserve-dirs flag for recursive code example extraction --- audit-cli/README.md | 8 ++ .../extract/code-examples/code_examples.go | 21 ++-- .../code-examples/code_examples_test.go | 119 ++++++++++++++++-- .../commands/extract/code-examples/writer.go | 39 +++++- 4 files changed, 167 insertions(+), 20 deletions(-) diff --git a/audit-cli/README.md b/audit-cli/README.md index db7d09c..5ecbd17 100644 --- a/audit-cli/README.md +++ b/audit-cli/README.md @@ -102,6 +102,9 @@ This command helps writers: # Extract recursively from all subdirectories ./audit-cli extract code-examples path/to/docs -o ./output -r +# Extract recursively and preserve directory structure +./audit-cli extract code-examples path/to/docs -o ./output -r --preserve-dirs + # Follow include directives ./audit-cli extract code-examples path/to/file.rst -o ./output -f @@ -121,6 +124,11 @@ This command helps writers: - `-r, --recursive` - Recursively scan directories for RST files. If you do not provide this flag, the tool will only extract code examples from the top-level RST file. If you do provide this flag, the tool will recursively scan all subdirectories for RST files and extract code examples from all files. +- `--preserve-dirs` - Preserve directory structure in output (use with `--recursive`). By default, all extracted files + are written to a flat structure in the output directory. When this flag is enabled with `--recursive`, the tool will + preserve the directory structure relative to the input directory. For example, if extracting from `docs/source/` and + a file is located at `docs/source/includes/example.rst`, the output will be written to `output/includes/example.*.ext` + instead of `output/example.*.ext`. - `-f, --follow-includes` - Follow `.. include::` directives in RST files. If you do not provide this flag, the tool will only extract code examples from the top-level RST file. If you do provide this flag, the tool will follow any `.. include::` directives in the RST file and extract code examples from all included files. When combined with `-r`, diff --git a/audit-cli/commands/extract/code-examples/code_examples.go b/audit-cli/commands/extract/code-examples/code_examples.go index 475806f..1d8c568 100644 --- a/audit-cli/commands/extract/code-examples/code_examples.go +++ b/audit-cli/commands/extract/code-examples/code_examples.go @@ -29,6 +29,7 @@ import ( // - -o, --output: Output directory for extracted files // - --dry-run: Show what would be extracted without writing files // - -v, --verbose: Show detailed processing information +// - --preserve-dirs: Preserve directory structure when used with --recursive func NewCodeExamplesCommand() *cobra.Command { var ( recursive bool @@ -36,6 +37,7 @@ func NewCodeExamplesCommand() *cobra.Command { outputDir string dryRun bool verbose bool + preserveDirs bool ) cmd := &cobra.Command{ @@ -46,7 +48,7 @@ and output them as individual files.`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { filePath := args[0] - return runExtract(filePath, recursive, followIncludes, outputDir, dryRun, verbose) + return runExtract(filePath, recursive, followIncludes, outputDir, dryRun, verbose, preserveDirs) }, } @@ -55,6 +57,7 @@ and output them as individual files.`, cmd.Flags().StringVarP(&outputDir, "output", "o", "./output", "Output directory for code example files") cmd.Flags().BoolVar(&dryRun, "dry-run", false, "Show what would be outputted without writing files") cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Provide additional information during execution") + cmd.Flags().BoolVar(&preserveDirs, "preserve-dirs", false, "Preserve directory structure in output (use with --recursive)") return cmd } @@ -71,12 +74,13 @@ and output them as individual files.`, // - followIncludes: If true, follow .. include:: directives // - dryRun: If true, show what would be extracted without writing files // - verbose: If true, show detailed processing information +// - preserveDirs: If true, preserve directory structure in output (use with recursive) // // Returns: // - *Report: Statistics about the extraction operation // - error: Any error encountered during extraction -func RunExtract(filePath string, outputDir string, recursive bool, followIncludes bool, dryRun bool, verbose bool) (*Report, error) { - report, err := runExtractInternal(filePath, recursive, followIncludes, outputDir, dryRun, verbose) +func RunExtract(filePath string, outputDir string, recursive bool, followIncludes bool, dryRun bool, verbose bool, preserveDirs bool) (*Report, error) { + report, err := runExtractInternal(filePath, recursive, followIncludes, outputDir, dryRun, verbose, preserveDirs) return report, err } @@ -84,13 +88,13 @@ func RunExtract(filePath string, outputDir string, recursive bool, followInclude // // This is a thin wrapper around runExtractInternal that discards the report // and only returns errors, suitable for use in the CLI command handler. -func runExtract(filePath string, recursive bool, followIncludes bool, outputDir string, dryRun bool, verbose bool) error { - _, err := runExtractInternal(filePath, recursive, followIncludes, outputDir, dryRun, verbose) +func runExtract(filePath string, recursive bool, followIncludes bool, outputDir string, dryRun bool, verbose bool, preserveDirs bool) error { + _, err := runExtractInternal(filePath, recursive, followIncludes, outputDir, dryRun, verbose, preserveDirs) return err } // runExtractInternal executes the extraction operation -func runExtractInternal(filePath string, recursive bool, followIncludes bool, outputDir string, dryRun bool, verbose bool) (*Report, error) { +func runExtractInternal(filePath string, recursive bool, followIncludes bool, outputDir string, dryRun bool, verbose bool, preserveDirs bool) (*Report, error) { fileInfo, err := os.Stat(filePath) if err != nil { return nil, fmt.Errorf("failed to access path %s: %w", filePath, err) @@ -99,6 +103,7 @@ func runExtractInternal(filePath string, recursive bool, followIncludes bool, ou report := NewReport() var filesToProcess []string + var rootPath string if fileInfo.IsDir() { if verbose { @@ -108,8 +113,10 @@ func runExtractInternal(filePath string, recursive bool, followIncludes bool, ou if err != nil { return nil, fmt.Errorf("failed to traverse directory: %w", err) } + rootPath = filePath } else { filesToProcess = []string{filePath} + rootPath = "" } var filteredFiles []string @@ -151,7 +158,7 @@ func runExtractInternal(filePath string, recursive bool, followIncludes bool, ou } for _, example := range examples { - outputPath, err := WriteCodeExample(example, outputDir, dryRun) + outputPath, err := WriteCodeExample(example, outputDir, rootPath, dryRun, preserveDirs) if err != nil { fmt.Fprintf(os.Stderr, "Warning: failed to write code example: %v\n", err) continue diff --git a/audit-cli/commands/extract/code-examples/code_examples_test.go b/audit-cli/commands/extract/code-examples/code_examples_test.go index b6c63ee..9bad1cf 100644 --- a/audit-cli/commands/extract/code-examples/code_examples_test.go +++ b/audit-cli/commands/extract/code-examples/code_examples_test.go @@ -21,7 +21,7 @@ func TestLiteralIncludeDirective(t *testing.T) { defer os.RemoveAll(tempDir) // Run the extract command - report, err := RunExtract(inputFile, tempDir, false, false, false, false) + report, err := RunExtract(inputFile, tempDir, false, false, false, false, false) if err != nil { t.Fatalf("RunExtract failed: %v", err) } @@ -110,7 +110,7 @@ func TestIncludeDirectiveFollowing(t *testing.T) { defer os.RemoveAll(tempDir) // Run the extract command with include following enabled - report, err := RunExtract(inputFile, tempDir, false, true, false, false) + report, err := RunExtract(inputFile, tempDir, false, true, false, false, false) if err != nil { t.Fatalf("RunExtract failed: %v", err) } @@ -166,7 +166,7 @@ func TestCodeBlockDirective(t *testing.T) { defer os.RemoveAll(tempDir) // Run extract on code-block test file - report, err := RunExtract(inputFile, tempDir, false, false, false, false) + report, err := RunExtract(inputFile, tempDir, false, false, false, false, false) if err != nil { t.Fatalf("RunExtract failed: %v", err) } @@ -229,7 +229,7 @@ func TestNestedCodeBlockDirective(t *testing.T) { defer os.RemoveAll(tempDir) // Run extract on nested code-block test file - report, err := RunExtract(inputFile, tempDir, false, false, false, false) + report, err := RunExtract(inputFile, tempDir, false, false, false, false, false) if err != nil { t.Fatalf("RunExtract failed: %v", err) } @@ -304,7 +304,7 @@ func TestIoCodeBlockDirective(t *testing.T) { defer os.RemoveAll(tempDir) // Run extract on io-code-block test file - report, err := RunExtract(inputFile, tempDir, false, false, false, false) + report, err := RunExtract(inputFile, tempDir, false, false, false, false, false) if err != nil { t.Fatalf("RunExtract failed: %v", err) } @@ -405,7 +405,7 @@ func TestEmptyFile(t *testing.T) { } // Run the extract command - report, err := RunExtract(emptyFile, outputDir, false, false, false, false) + report, err := RunExtract(emptyFile, outputDir, false, false, false, false, false) if err != nil { t.Fatalf("RunExtract failed: %v", err) } @@ -435,7 +435,7 @@ func TestRecursiveDirectoryScanning(t *testing.T) { defer os.RemoveAll(tempDir) // Run the extract command with recursive=true, followIncludes=false - report, err := RunExtract(inputDir, tempDir, true, false, false, false) + report, err := RunExtract(inputDir, tempDir, true, false, false, false, false) if err != nil { t.Fatalf("RunExtract failed: %v", err) } @@ -485,7 +485,7 @@ func TestFollowIncludesWithoutRecursive(t *testing.T) { defer os.RemoveAll(tempDir) // Run the extract command with recursive=false, followIncludes=true - report, err := RunExtract(inputFile, tempDir, false, true, false, false) + report, err := RunExtract(inputFile, tempDir, false, true, false, false, false) if err != nil { t.Fatalf("RunExtract failed: %v", err) } @@ -526,7 +526,7 @@ func TestRecursiveWithFollowIncludes(t *testing.T) { defer os.RemoveAll(tempDir) // Run the extract command with recursive=true, followIncludes=true - report, err := RunExtract(inputDir, tempDir, true, true, false, false) + report, err := RunExtract(inputDir, tempDir, true, true, false, false, false) if err != nil { t.Fatalf("RunExtract failed: %v", err) } @@ -573,7 +573,7 @@ func TestNoFlagsOnDirectory(t *testing.T) { defer os.RemoveAll(tempDir) // Run the extract command with recursive=false, followIncludes=false on a directory - report, err := RunExtract(inputDir, tempDir, false, false, false, false) + report, err := RunExtract(inputDir, tempDir, false, false, false, false, false) if err != nil { t.Fatalf("RunExtract failed: %v", err) } @@ -596,3 +596,102 @@ func TestNoFlagsOnDirectory(t *testing.T) { report.OutputFilesWritten) } } + +// TestPreserveDirs tests that --preserve-dirs flag preserves directory structure +func TestPreserveDirs(t *testing.T) { + // Setup paths + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputDir := filepath.Join(testDataDir, "input-files", "source") + + // Create temporary output directory + tempDir, err := os.MkdirTemp("", "audit-test-preserve-dirs-*") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Run the extract command with recursive=true, preserveDirs=true + report, err := RunExtract(inputDir, tempDir, true, false, false, false, true) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Verify that files were extracted + if report.OutputFilesWritten < 30 { + t.Errorf("Expected at least 30 output files, got %d", report.OutputFilesWritten) + } + + // Verify that directory structure is preserved + // Check that files from includes/ subdirectory are in the includes/ subdirectory of output + includesOutputDir := filepath.Join(tempDir, "includes") + if _, err := os.Stat(includesOutputDir); os.IsNotExist(err) { + t.Errorf("Expected includes/ subdirectory in output, but it doesn't exist") + } + + // Check that files from the root are in the root of output + // literalinclude-test.rst should produce files in the root + rootFiles, err := os.ReadDir(tempDir) + if err != nil { + t.Fatalf("Failed to read output directory: %v", err) + } + + hasRootFiles := false + for _, entry := range rootFiles { + if !entry.IsDir() { + hasRootFiles = true + break + } + } + + if !hasRootFiles { + t.Errorf("Expected files in the root of output directory") + } + + // Verify that files from includes/ are in the includes/ subdirectory + if _, err := os.Stat(includesOutputDir); err == nil { + includesFiles, err := os.ReadDir(includesOutputDir) + if err != nil { + t.Fatalf("Failed to read includes output directory: %v", err) + } + + if len(includesFiles) == 0 { + t.Errorf("Expected files in includes/ subdirectory of output") + } + } +} + +// TestPreserveDirsWithoutRecursive tests that --preserve-dirs without --recursive still works +func TestPreserveDirsWithoutRecursive(t *testing.T) { + // Setup paths + testDataDir := filepath.Join("..", "..", "..", "testdata") + inputFile := filepath.Join(testDataDir, "input-files", "source", "literalinclude-test.rst") + + // Create temporary output directory + tempDir, err := os.MkdirTemp("", "audit-test-preserve-single-*") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Run the extract command with recursive=false, preserveDirs=true + // This should work but have no effect since we're processing a single file + report, err := RunExtract(inputFile, tempDir, false, false, false, false, true) + if err != nil { + t.Fatalf("RunExtract failed: %v", err) + } + + // Verify that files were extracted + if report.OutputFilesWritten != 7 { + t.Errorf("Expected 7 output files, got %d", report.OutputFilesWritten) + } + + // All files should be in the root since we're processing a single file + files, err := os.ReadDir(tempDir) + if err != nil { + t.Fatalf("Failed to read output directory: %v", err) + } + + if len(files) != 7 { + t.Errorf("Expected 7 files in output directory, got %d", len(files)) + } +} diff --git a/audit-cli/commands/extract/code-examples/writer.go b/audit-cli/commands/extract/code-examples/writer.go index c8a6670..15f1d52 100644 --- a/audit-cli/commands/extract/code-examples/writer.go +++ b/audit-cli/commands/extract/code-examples/writer.go @@ -11,24 +11,57 @@ import ( // // Generates a standardized filename and writes the code content to that file. // If dryRun is true, returns the filename without actually writing the file. +// If preserveDirs is true and rootPath is provided, preserves the directory structure +// relative to rootPath in the output directory. // // Parameters: // - example: The code example to write // - outputDir: Directory where the file should be written +// - rootPath: Root directory for computing relative paths (empty string if not preserving dirs) // - dryRun: If true, skip writing and only return the filename +// - preserveDirs: If true, preserve directory structure in output // // Returns: // - string: The full path to the output file // - error: Any error encountered during writing -func WriteCodeExample(example CodeExample, outputDir string, dryRun bool) (string, error) { +func WriteCodeExample(example CodeExample, outputDir string, rootPath string, dryRun bool, preserveDirs bool) (string, error) { filename := GenerateOutputFilename(example) - outputPath := filepath.Join(outputDir, filename) + + var outputPath string + var targetDir string + + if preserveDirs && rootPath != "" { + // Compute the relative path from rootPath to the source file's directory + absSourceFile, err := filepath.Abs(example.SourceFile) + if err != nil { + return "", fmt.Errorf("failed to get absolute path for source file: %w", err) + } + + absRootPath, err := filepath.Abs(rootPath) + if err != nil { + return "", fmt.Errorf("failed to get absolute path for root: %w", err) + } + + sourceDir := filepath.Dir(absSourceFile) + relPath, err := filepath.Rel(absRootPath, sourceDir) + if err != nil { + return "", fmt.Errorf("failed to compute relative path: %w", err) + } + + // Create the target directory preserving the structure + targetDir = filepath.Join(outputDir, relPath) + outputPath = filepath.Join(targetDir, filename) + } else { + // Flat structure - all files in the output directory + targetDir = outputDir + outputPath = filepath.Join(outputDir, filename) + } if dryRun { return outputPath, nil } - if err := os.MkdirAll(outputDir, 0755); err != nil { + if err := os.MkdirAll(targetDir, 0755); err != nil { return "", fmt.Errorf("failed to create output directory: %w", err) } From a1a6445d447210e2970b404a034ce0bbf35939c8 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Mon, 8 Dec 2025 12:52:45 -0500 Subject: [PATCH 11/14] Handle sub-procedures in procedure extraction, procedure bugfixes --- .../code-examples/code_examples_test.go | 5 +- .../commands/extract/procedures/procedures.go | 95 +++++- audit-cli/docs/PROCEDURE_PARSING.md | 184 ++++++++++- audit-cli/internal/rst/parse_procedures.go | 307 +++++++++++++++++- .../internal/rst/parse_procedures_test.go | 230 +++++++++++++ audit-cli/internal/rst/procedure_types.go | 21 +- .../source/continuation-marker-test.rst | 38 +++ .../source/rotate-key-sharded-cluster.txt | 269 +++++++++++++++ 8 files changed, 1123 insertions(+), 26 deletions(-) create mode 100644 audit-cli/testdata/input-files/source/continuation-marker-test.rst create mode 100644 audit-cli/testdata/input-files/source/rotate-key-sharded-cluster.txt diff --git a/audit-cli/commands/extract/code-examples/code_examples_test.go b/audit-cli/commands/extract/code-examples/code_examples_test.go index 9bad1cf..5688ca4 100644 --- a/audit-cli/commands/extract/code-examples/code_examples_test.go +++ b/audit-cli/commands/extract/code-examples/code_examples_test.go @@ -582,8 +582,9 @@ func TestNoFlagsOnDirectory(t *testing.T) { // Should NOT include files in includes/ subdirectory // Expected: code-block-test.rst, duplicate-include-test.rst, include-test.rst, // io-code-block-test.rst, literalinclude-test.rst, nested-code-block-test.rst, - // nested-include-test.rst, index.rst, procedure-test.rst, procedure-with-includes.rst (10 files) - expectedFiles := 11 + // nested-include-test.rst, index.rst, procedure-test.rst, procedure-with-includes.rst, + // rotate-key-sharded-cluster.txt, continuation-marker-test.rst (12 files) + expectedFiles := 13 if report.FilesTraversed != expectedFiles { t.Errorf("Expected %d files traversed (top-level only), got %d", expectedFiles, report.FilesTraversed) diff --git a/audit-cli/commands/extract/procedures/procedures.go b/audit-cli/commands/extract/procedures/procedures.go index 34595bf..0874982 100644 --- a/audit-cli/commands/extract/procedures/procedures.go +++ b/audit-cli/commands/extract/procedures/procedures.go @@ -31,11 +31,13 @@ import ( // - -v, --verbose: Show detailed processing information func NewProceduresCommand() *cobra.Command { var ( - selection string - outputDir string - dryRun bool - verbose bool - expandIncludes bool + selection string + outputDir string + dryRun bool + verbose bool + expandIncludes bool + showSteps bool + showSubProcedures bool ) cmd := &cobra.Command{ @@ -60,7 +62,7 @@ to inline the content of included files.`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { filePath := args[0] - return runExtract(filePath, selection, outputDir, dryRun, verbose, expandIncludes) + return runExtract(filePath, selection, outputDir, dryRun, verbose, expandIncludes, showSteps, showSubProcedures) }, } @@ -69,12 +71,14 @@ to inline the content of included files.`, cmd.Flags().BoolVar(&dryRun, "dry-run", false, "Show what would be extracted without writing files") cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Provide additional information during execution") cmd.Flags().BoolVar(&expandIncludes, "expand-includes", false, "Expand include directives inline instead of preserving them") + cmd.Flags().BoolVar(&showSteps, "show-steps", false, "Show detailed information about each step in the procedure") + cmd.Flags().BoolVar(&showSubProcedures, "show-sub-procedures", false, "Show information about detected sub-procedures within steps") return cmd } // runExtract executes the extraction operation. -func runExtract(filePath string, selection string, outputDir string, dryRun bool, verbose bool, expandIncludes bool) error { +func runExtract(filePath string, selection string, outputDir string, dryRun bool, verbose bool, expandIncludes bool, showSteps bool, showSubProcedures bool) error { // Verify the file exists fileInfo, err := os.Stat(filePath) if err != nil { @@ -121,6 +125,83 @@ func runExtract(filePath string, selection string, outputDir string, dryRun bool } else { fmt.Printf(" Appears in: (no specific selections)\n") } + + // Show step details if requested + if showSteps { + fmt.Printf("\n Step Details:\n") + for stepIdx, step := range v.Procedure.Steps { + // Check if the title already contains numbering + hasNumbering := false + title := step.Title + if len(title) > 0 { + // Check for numbered (1., 2., etc.) or lettered (a., b., etc.) prefix + if (title[0] >= '0' && title[0] <= '9') || (title[0] >= 'a' && title[0] <= 'z') { + if len(title) > 1 && title[1] == '.' { + hasNumbering = true + } + } + } + + if hasNumbering { + fmt.Printf(" - %s\n", title) + } else { + fmt.Printf(" %d. %s\n", stepIdx+1, title) + } + + if len(step.SubProcedures) > 0 { + totalSubSteps := 0 + for _, subProc := range step.SubProcedures { + totalSubSteps += len(subProc.Steps) + } + fmt.Printf(" Contains %d sub-procedure(s) with a total of %d sub-step(s)\n", len(step.SubProcedures), totalSubSteps) + } + if len(step.Variations) > 0 { + fmt.Printf(" Contains %d variation(s)\n", len(step.Variations)) + } + } + } + + // Show sub-procedure information if requested + if showSubProcedures && v.Procedure.HasSubSteps { + fmt.Printf("\n Sub-Procedures:\n") + for stepIdx, step := range v.Procedure.Steps { + if len(step.SubProcedures) > 0 { + totalSubSteps := 0 + for _, subProc := range step.SubProcedures { + totalSubSteps += len(subProc.Steps) + } + fmt.Printf(" Step %d (%s) contains %d sub-procedure(s) with a total of %d sub-step(s)\n", + stepIdx+1, step.Title, len(step.SubProcedures), totalSubSteps) + + for subProcIdx, subProc := range step.SubProcedures { + fmt.Printf("\n Sub-procedure %d (%d step(s)):\n", subProcIdx+1, len(subProc.Steps)) + for subStepIdx, subStep := range subProc.Steps { + // Use the appropriate marker based on list type + marker := "" + if subProc.ListType == "lettered" { + // Convert index to letter (0->a, 1->b, etc.) + marker = string(rune('a' + subStepIdx)) + } else { + // Default to numbered + marker = fmt.Sprintf("%d", subStepIdx+1) + } + + // Strip any existing marker from the title + title := subStep.Title + // Check if title starts with a marker (e.g., "a. ", "b. ", "1. ", "2. ") + if len(title) > 2 && title[1] == '.' && title[2] == ' ' { + // Check if it's a letter or number marker + if (title[0] >= 'a' && title[0] <= 'z') || (title[0] >= '0' && title[0] <= '9') { + title = title[3:] // Strip the marker + } + } + + fmt.Printf(" %s. %s\n", marker, title) + } + } + } + } + } } fmt.Println() } diff --git a/audit-cli/docs/PROCEDURE_PARSING.md b/audit-cli/docs/PROCEDURE_PARSING.md index c792107..7b116ae 100644 --- a/audit-cli/docs/PROCEDURE_PARSING.md +++ b/audit-cli/docs/PROCEDURE_PARSING.md @@ -8,6 +8,7 @@ This document describes the business logic behind procedure parsing in the `audi - [What is a Procedure?](#what-is-a-procedure) - [Procedure Formats](#procedure-formats) - [Procedure Variations](#procedure-variations) +- [Sub-Procedures and List Type Tracking](#sub-procedures-and-list-type-tracking) - [Include Directive Handling](#include-directive-handling) - [Uniqueness and Grouping](#uniqueness-and-grouping) - [Analysis vs. Extraction Semantics](#analysis-vs-extraction-semantics) @@ -30,7 +31,7 @@ Procedures have: - A **title/heading** (the section heading above the procedure) - A **series of steps** (numbered or bulleted instructions) - Optional **variations** (different content for different contexts) -- Optional **sub-steps** (nested procedures within steps) +- Optional **sub-procedures** (ordered lists within steps, each tracked separately with its list marker type) ## Procedure Formats @@ -82,6 +83,55 @@ b. Second step c. Third step ``` +**Continuation Markers:** MongoDB documentation uses `#.` as a continuation marker for ordered lists, allowing the build system to automatically number items: + +```rst +a. First step + +#. Second step (automatically becomes 'b.') + +#. Third step (automatically becomes 'c.') +``` + +The parser recognizes `#.` as a continuation of the current list type (numbered or lettered) and converts it to the appropriate next marker. + +### 2a. Hierarchical Procedures with Numbered Headings + +Some procedures use numbered headings to represent top-level steps, with ordered lists as sub-steps: + +```rst +Procedure +--------- + +1. Modify the Keyfile +~~~~~~~~~~~~~~~~~~~~~ + +Update the keyfile to include both old and new keys. + +a. Open the keyfile in a text editor. + +#. Add the new key on a separate line. + +#. Save the file. + +2. Restart Each Member +~~~~~~~~~~~~~~~~~~~~~~ + +Restart all members one at a time. + +a. Shut down the member. + +#. Restart the member. +``` + +**Parser Behavior:** +- Detects "Procedure" heading followed by numbered headings (1., 2., 3., etc.) +- Treats numbered headings as top-level steps of a single procedure +- Parses ordered lists within each numbered heading as sub-steps +- Sets `HasSubSteps` flag to true if sub-steps are found +- **Analysis:** Shows 1 procedure with N steps (where N is the number of numbered headings) +- **Extraction:** Creates 1 file containing all numbered heading steps and their sub-steps + ### 3. YAML Steps Files MongoDB's build system converts YAML files to procedures: @@ -321,6 +371,90 @@ Installation Instructions **Rationale:** Each platform has a completely different installation procedure with different steps, so they should be extracted as separate files. However, for analysis/reporting, they're grouped as one logical "Installation Instructions" procedure with platform variations. +## Sub-Procedures and List Type Tracking + +MongoDB documentation often contains **sub-procedures** - ordered lists within procedure steps that represent nested sequences of actions. The parser tracks these sub-procedures separately and preserves their list marker type (numbered vs. lettered). + +### Sub-Procedure Structure + +Each step can contain multiple sub-procedures, where each sub-procedure is a separate ordered list: + +```rst +.. procedure:: + + .. step:: Restart Each Member + + **For each secondary member:** + + a. Shut down the member. + + b. Restart the member. + + **For the primary:** + + a. Step down the primary. + + #. Shut down the member. + + #. Restart the member. +``` + +In this example, step "Restart Each Member" contains **two separate sub-procedures**: +1. Sub-procedure 1 (2 steps): For each secondary member +2. Sub-procedure 2 (3 steps): For the primary + +### List Type Tracking + +The parser tracks whether each sub-procedure uses numbered (`1.`, `2.`, `3.`) or lettered (`a.`, `b.`, `c.`) markers: + +**Data Structure:** +```go +type SubProcedure struct { + Steps []Step // The steps in this sub-procedure + ListType string // "numbered" or "lettered" +} + +type Step struct { + Title string + Content string + SubProcedures []SubProcedure // Multiple sub-procedures within this step + SubSteps []Step // Deprecated: Flattened list for backward compatibility +} +``` + +**Parser Behavior:** +- Detects each ordered list within a step as a separate sub-procedure +- Determines list type from the first item (`1.` → numbered, `a.` → lettered) +- Stores each sub-procedure with its list type +- Maintains `SubSteps` as a flattened list for backward compatibility + +### Display with `--show-sub-procedures` Flag + +The `extract procedures` command includes a `--show-sub-procedures` flag that displays sub-procedures using their original list marker type: + +**Example Output:** +``` +Step 2 (Restart Each Member) contains 2 sub-procedure(s) with a total of 5 sub-step(s) + + Sub-procedure 1 (2 step(s)): + a. Shut down the member. + b. Restart the member. + + Sub-procedure 2 (3 step(s)): + a. Step down the primary. + b. Shut down the member. + c. Restart the member. +``` + +**Benefits:** +- Makes it easier for writers to match CLI output with source files +- Preserves the semantic meaning of list marker types +- Shows the structure of multiple sub-procedures within a step + +### Backward Compatibility + +The `SubSteps` field is still populated with a flattened list of all sub-steps for backward compatibility with existing code that depends on it. New code should use `SubProcedures` to access the structured sub-procedure information. + ## Include Directive Handling MongoDB documentation uses `.. include::` directives to reuse content across files. The parser handles includes with context-aware expansion: @@ -677,6 +811,54 @@ Setup Instructions - `HasSubSteps` flag is set to true - Sub-procedure is not extracted separately (only top-level procedures are extracted) +### Pattern: Hierarchical Procedure with Numbered Headings + +```rst +Procedure +--------- + +1. First Major Step +~~~~~~~~~~~~~~~~~~~ + +Description of the first step. + +a. Sub-step one + +#. Sub-step two + +2. Second Major Step +~~~~~~~~~~~~~~~~~~~~ + +Description of the second step. + +a. Sub-step one + +#. Sub-step two +``` + +**Result:** +- Analysis: 1 unique procedure with 2 steps +- `HasSubSteps` flag is set to true (because of the ordered lists) +- Extraction: 1 file containing both numbered heading steps and their sub-steps + +### Pattern: Continuation Markers in Ordered Lists + +```rst +Setup Steps +----------- + +a. First step + +#. Second step (becomes 'b.') + +#. Third step (becomes 'c.') +``` + +**Result:** +- Parser recognizes `#.` as continuation of lettered list +- Converts to: a., b., c. +- Works for both numbered (1., 2., 3.) and lettered (a., b., c.) lists + ## Testing Strategy The parser has comprehensive test coverage: diff --git a/audit-cli/internal/rst/parse_procedures.go b/audit-cli/internal/rst/parse_procedures.go index a992cac..1470545 100644 --- a/audit-cli/internal/rst/parse_procedures.go +++ b/audit-cli/internal/rst/parse_procedures.go @@ -114,6 +114,7 @@ import ( "os" "path/filepath" "sort" + "strconv" "strings" "gopkg.in/yaml.v3" @@ -178,7 +179,37 @@ func parseProceduresFromLines(lines []string, filePath string) ([]Procedure, err if isHeadingUnderline(nextLine) && len(nextLine) >= len(trimmedLine) { // Skip empty headings and generic headings that don't provide meaningful context headingLower := strings.ToLower(trimmedLine) - if trimmedLine != "" && headingLower != "procedure" && headingLower != "overview" && headingLower != "steps" { + + // Check if this is a "Procedure" heading + if headingLower == "procedure" || headingLower == "steps" { + currentHeading = trimmedLine + i += 2 // Skip heading and underline + + // Look ahead to see if the next heading is numbered + // If so, parse as hierarchical procedure + j := i + for j < len(lines) && strings.TrimSpace(lines[j]) == "" { + j++ + } + if j+1 < len(lines) { + nextHeading := strings.TrimSpace(lines[j]) + nextUnderline := strings.TrimSpace(lines[j+1]) + if isHeadingUnderline(nextUnderline) && isNumberedHeading(nextHeading) { + // Parse hierarchical procedure + procedure, endLine := parseHierarchicalProcedure(lines, j, currentHeading) + if len(procedure.Steps) > 0 { + procedure.LineNum = i - 1 // Line where "Procedure" heading starts + procedure.EndLineNum = endLine + 1 + procedures = append(procedures, procedure) + } + i = endLine + 1 + continue + } + } + continue + } + + if trimmedLine != "" && headingLower != "overview" { currentHeading = trimmedLine } i += 2 // Skip heading and underline @@ -272,6 +303,165 @@ func isHeadingUnderline(line string) bool { return true } +// isNumberedHeading checks if a heading starts with a number followed by a period +func isNumberedHeading(heading string) bool { + trimmed := strings.TrimSpace(heading) + if len(trimmed) < 3 { + return false + } + // Check if it starts with a digit followed by a period + if trimmed[0] >= '0' && trimmed[0] <= '9' { + // Find the period + for i := 1; i < len(trimmed); i++ { + if trimmed[i] == '.' { + return true + } + if trimmed[i] < '0' || trimmed[i] > '9' { + return false + } + } + } + return false +} + +// parseHierarchicalProcedure parses a procedure with numbered headings as steps +// This handles the pattern where a "Procedure" heading is followed by numbered headings +// like "1. First Step", "2. Second Step", etc. +func parseHierarchicalProcedure(lines []string, startIdx int, title string) (Procedure, int) { + procedure := Procedure{ + Type: OrderedList, + Title: title, + Steps: []Step{}, + } + + i := startIdx + + // Parse each numbered heading as a step + for i < len(lines) { + line := lines[i] + trimmedLine := strings.TrimSpace(line) + + // Empty line + if trimmedLine == "" { + i++ + continue + } + + // Check if this is a numbered heading + if i+1 < len(lines) { + nextLine := strings.TrimSpace(lines[i+1]) + if isHeadingUnderline(nextLine) && len(nextLine) >= len(trimmedLine) { + if isNumberedHeading(trimmedLine) { + // Parse this numbered heading as a step + step, endLine := parseNumberedHeadingStep(lines, i) + procedure.Steps = append(procedure.Steps, step) + i = endLine + 1 + continue + } else { + // Non-numbered heading - end of this procedure + break + } + } + } + + // Check for directive or other content that signals end of procedure + if strings.HasPrefix(trimmedLine, "..") { + break + } + + i++ + } + + // Check for sub-steps + for _, step := range procedure.Steps { + if len(step.SubSteps) > 0 { + procedure.HasSubSteps = true + break + } + } + + return procedure, i - 1 +} + +// parseNumberedHeadingStep parses a numbered heading and its content as a procedure step +func parseNumberedHeadingStep(lines []string, startIdx int) (Step, int) { + heading := strings.TrimSpace(lines[startIdx]) + _ = strings.TrimSpace(lines[startIdx+1]) // underline (not used but needed to skip) + + step := Step{ + Title: heading, + LineNum: startIdx + 1, + } + + i := startIdx + 2 // Skip heading and underline + var contentLines []string + var subSteps []Step + var subProcedures []SubProcedure + + // Parse the content under this heading + for i < len(lines) { + line := lines[i] + trimmedLine := strings.TrimSpace(line) + + // Empty line + if trimmedLine == "" { + contentLines = append(contentLines, "") + i++ + continue + } + + // Check if we've hit the next numbered heading + if i+1 < len(lines) { + nextLine := strings.TrimSpace(lines[i+1]) + if isHeadingUnderline(nextLine) && len(nextLine) >= len(trimmedLine) { + // This is a heading - check if it's numbered (next step) or a subheading + if isNumberedHeading(trimmedLine) { + // Next numbered step - we're done with this step + break + } + // Non-numbered heading - could be a subheading, include it in content + } + } + + // Check for ordered list (sub-steps) + if isOrderedListStart(trimmedLine) { + subProcedureSteps, listType, endLine := parseOrderedListSteps(lines, i) + // Add as a separate sub-procedure with its list type + subProcedures = append(subProcedures, SubProcedure{ + Steps: subProcedureSteps, + ListType: listType, + }) + // Also add to subSteps for backward compatibility + subSteps = append(subSteps, subProcedureSteps...) + // Add the sub-steps to content as well + for j := i; j <= endLine; j++ { + contentLines = append(contentLines, lines[j]) + } + i = endLine + 1 + continue + } + + // Check for directive + if strings.HasPrefix(trimmedLine, "..") { + // Include directives in content + contentLines = append(contentLines, line) + i++ + continue + } + + // Regular content line + contentLines = append(contentLines, line) + i++ + } + + step.Content = strings.Join(contentLines, "\n") + step.SubSteps = subSteps + step.SubProcedures = subProcedures + + return step, i - 1 +} + + // computeProcedureContentHash generates a hash of the procedure's content // to detect when procedures are identical across different selections func computeProcedureContentHash(proc *Procedure) string { @@ -322,7 +512,7 @@ func computeProcedureContentHash(proc *Procedure) string { // isOrderedListStart checks if a line starts an ordered list func isOrderedListStart(line string) bool { - return numberedListRegex.MatchString(line) || letteredListRegex.MatchString(line) + return numberedListRegex.MatchString(line) || letteredListRegex.MatchString(line) || continuationMarkerRegex.MatchString(line) } // getIndentLevel returns the indentation level of a line @@ -703,8 +893,14 @@ func parseStepDirectiveFromLines(lines []string, startIdx int, title string, fil // Check for ordered list (sub-steps) if isOrderedListStart(trimmedLine) { - subSteps, endLine := parseOrderedListSteps(lines, i) - step.SubSteps = append(step.SubSteps, subSteps...) + subProcedureSteps, listType, endLine := parseOrderedListSteps(lines, i) + // Add as a separate sub-procedure with its list type + step.SubProcedures = append(step.SubProcedures, SubProcedure{ + Steps: subProcedureSteps, + ListType: listType, + }) + // Also add to SubSteps for backward compatibility + step.SubSteps = append(step.SubSteps, subProcedureSteps...) // Add the sub-steps to content as well for j := i; j <= endLine; j++ { contentLines = append(contentLines, lines[j]) @@ -782,18 +978,22 @@ func parseOrderedListProcedure(lines []string, startIdx int, title string) (Proc Steps: []Step{}, } - steps, endLine := parseOrderedListSteps(lines, startIdx) + steps, _, endLine := parseOrderedListSteps(lines, startIdx) procedure.Steps = steps return procedure, endLine } -// parseOrderedListSteps parses ordered list items as steps -func parseOrderedListSteps(lines []string, startIdx int) ([]Step, int) { +// parseOrderedListSteps parses ordered list items as steps and returns the list type +func parseOrderedListSteps(lines []string, startIdx int) ([]Step, string, int) { var steps []Step i := startIdx baseIndent := getIndentLevel(lines[i]) + // Track the list type (numbered or lettered) and the last marker + var listType string // "numbered" or "lettered" + var lastMarker string // last number or letter used + for i < len(lines) { line := lines[i] trimmedLine := strings.TrimSpace(line) @@ -808,8 +1008,28 @@ func parseOrderedListSteps(lines []string, startIdx int) ([]Step, int) { // Check if this is a list item at the same level if indent == baseIndent && isOrderedListStart(trimmedLine) { - step, endLine := parseOrderedListItem(lines, i) + // Determine list type from first item if not set + if listType == "" { + if numberedListRegex.MatchString(trimmedLine) { + listType = "numbered" + } else if letteredListRegex.MatchString(trimmedLine) { + listType = "lettered" + } + } + + step, endLine := parseOrderedListItem(lines, i, listType, lastMarker) steps = append(steps, step) + + // Update last marker based on the step we just parsed + marker := getListMarker(lines[i], listType) + if marker != "" { + // Regular marker - use it + lastMarker = marker + } else { + // Continuation marker - compute the next marker + lastMarker = getNextMarker(lastMarker, listType) + } + i = endLine + 1 continue } @@ -822,11 +1042,11 @@ func parseOrderedListSteps(lines []string, startIdx int) ([]Step, int) { i++ } - return steps, i - 1 + return steps, listType, i - 1 } // parseOrderedListItem parses a single ordered list item -func parseOrderedListItem(lines []string, startIdx int) (Step, int) { +func parseOrderedListItem(lines []string, startIdx int, listType string, lastMarker string) (Step, int) { line := lines[startIdx] var title string var contentLines []string @@ -836,6 +1056,14 @@ func parseOrderedListItem(lines []string, startIdx int) (Step, int) { title = strings.TrimSpace(matches[3]) } else if matches := letteredListRegex.FindStringSubmatch(line); len(matches) > 3 { title = strings.TrimSpace(matches[3]) + } else if matches := continuationMarkerRegex.FindStringSubmatch(line); len(matches) > 2 { + // Handle continuation marker (#.) - convert to next number/letter + nextMarker := getNextMarker(lastMarker, listType) + title = strings.TrimSpace(matches[2]) + // Prepend the computed marker to the title for display purposes + if nextMarker != "" { + title = nextMarker + ". " + title + } } baseIndent := getIndentLevel(line) @@ -892,6 +1120,65 @@ func parseOrderedListItem(lines []string, startIdx int) (Step, int) { return step, i - 1 } +// getListMarker extracts the marker (number or letter) from a list item line +func getListMarker(line string, listType string) string { + trimmedLine := strings.TrimSpace(line) + + // Check for continuation marker - return empty string as we'll compute it + if continuationMarkerRegex.MatchString(trimmedLine) { + return "" + } + + if listType == "numbered" { + if matches := numberedListRegex.FindStringSubmatch(trimmedLine); len(matches) > 2 { + return matches[2] + } + } else if listType == "lettered" { + if matches := letteredListRegex.FindStringSubmatch(trimmedLine); len(matches) > 2 { + return matches[2] + } + } + + return "" +} + +// getNextMarker computes the next marker in a sequence +func getNextMarker(lastMarker string, listType string) string { + if lastMarker == "" { + // If no last marker, start from 1 or 'a' + if listType == "numbered" { + return "1" + } else if listType == "lettered" { + return "a" + } + return "" + } + + if listType == "numbered" { + // Parse the number and increment + if num, err := strconv.Atoi(lastMarker); err == nil { + return strconv.Itoa(num + 1) + } + } else if listType == "lettered" { + // Increment the letter + if len(lastMarker) == 1 { + char := lastMarker[0] + if char >= 'a' && char < 'z' { + return string(char + 1) + } else if char >= 'A' && char < 'Z' { + return string(char + 1) + } else if char == 'z' { + return "aa" // Handle overflow (rare case) + } else if char == 'Z' { + return "AA" + } + } + } + + return lastMarker +} + + // parseTabsVariation parses a .. tabs:: directive and its tab content func parseTabsVariation(lines []string, startIdx int) (Variation, int) { variation := Variation{ diff --git a/audit-cli/internal/rst/parse_procedures_test.go b/audit-cli/internal/rst/parse_procedures_test.go index 36b422c..3b7baf8 100644 --- a/audit-cli/internal/rst/parse_procedures_test.go +++ b/audit-cli/internal/rst/parse_procedures_test.go @@ -182,3 +182,233 @@ func TestAbsolutePath(t *testing.T) { t.Logf("Successfully parsed with absolute path: %s", absPath) } + +func TestContinuationMarkers(t *testing.T) { + testFile := "../../testdata/input-files/source/continuation-marker-test.rst" + + procedures, err := ParseProceduresWithOptions(testFile, false) + if err != nil { + t.Fatalf("ParseProceduresWithOptions failed: %v", err) + } + + if len(procedures) != 2 { + t.Fatalf("Expected 2 procedures, got %d", len(procedures)) + } + + // Test lettered list with continuation markers + letteredProc := procedures[0] + if letteredProc.Title != "Lettered List with Continuation" { + t.Errorf("Expected title 'Lettered List with Continuation', got '%s'", letteredProc.Title) + } + + if len(letteredProc.Steps) != 3 { + t.Fatalf("Expected 3 steps in lettered list, got %d", len(letteredProc.Steps)) + } + + // Verify step titles (note: regular list items don't include the marker in the title) + // Only continuation markers get the computed marker prepended + expectedTitles := []string{"First step", "b. Second step", "c. Third step"} + for i, step := range letteredProc.Steps { + if step.Title != expectedTitles[i] { + t.Errorf("Step %d: expected title '%s', got '%s'", i, expectedTitles[i], step.Title) + } + } + + // Test numbered list with continuation markers + numberedProc := procedures[1] + if numberedProc.Title != "Numbered List with Continuation" { + t.Errorf("Expected title 'Numbered List with Continuation', got '%s'", numberedProc.Title) + } + + if len(numberedProc.Steps) != 4 { + t.Fatalf("Expected 4 steps in numbered list, got %d", len(numberedProc.Steps)) + } + + // Verify step titles (note: regular list items don't include the marker in the title) + // Only continuation markers get the computed marker prepended + expectedNumberedTitles := []string{"First step", "2. Second step", "3. Third step", "4. Fourth step"} + for i, step := range numberedProc.Steps { + if step.Title != expectedNumberedTitles[i] { + t.Errorf("Step %d: expected title '%s', got '%s'", i, expectedNumberedTitles[i], step.Title) + } + } + + t.Logf("Continuation markers parsed correctly") +} + +func TestHierarchicalProcedure(t *testing.T) { + testFile := "../../testdata/input-files/source/rotate-key-sharded-cluster.txt" + + procedures, err := ParseProceduresWithOptions(testFile, false) + if err != nil { + t.Fatalf("ParseProceduresWithOptions failed: %v", err) + } + + // Should parse as 1 procedure (not 10 separate procedures) + if len(procedures) != 1 { + t.Fatalf("Expected 1 procedure, got %d", len(procedures)) + } + + proc := procedures[0] + if proc.Title != "Procedure" { + t.Errorf("Expected title 'Procedure', got '%s'", proc.Title) + } + + // Should have 4 top-level steps (the numbered headings) + if len(proc.Steps) != 4 { + t.Fatalf("Expected 4 steps, got %d", len(proc.Steps)) + } + + // Verify step titles match the numbered headings + expectedStepTitles := []string{ + "1. Modify the Keyfile to Include Old and New Keys", + "2. Restart Each Member", + "3. Update Keyfile Content to the New Key Only", + "4. Restart Each Member", + } + + for i, step := range proc.Steps { + if step.Title != expectedStepTitles[i] { + t.Errorf("Step %d: expected title '%s', got '%s'", i, expectedStepTitles[i], step.Title) + } + } + + // Verify HasSubSteps is set + if !proc.HasSubSteps { + t.Error("Expected HasSubSteps to be true") + } + + // Verify that step 2 has sub-steps (the ordered lists) + step2 := proc.Steps[1] + if len(step2.SubSteps) == 0 { + t.Error("Expected step 2 to have sub-steps") + } + + t.Logf("Hierarchical procedure parsed correctly with %d steps", len(proc.Steps)) +} + +func TestSubProcedureDetection(t *testing.T) { + testFile := "../../testdata/input-files/source/procedure-test.rst" + + procedures, err := ParseProceduresWithOptions(testFile, false) + if err != nil { + t.Fatalf("ParseProceduresWithOptions failed: %v", err) + } + + // Find the "Procedure with Sub-steps" procedure + var subStepProc *Procedure + for i := range procedures { + if procedures[i].Title == "Procedure with Sub-steps" { + subStepProc = &procedures[i] + break + } + } + + if subStepProc == nil { + t.Fatal("Could not find 'Procedure with Sub-steps'") + } + + // Verify HasSubSteps is set + if !subStepProc.HasSubSteps { + t.Error("Expected HasSubSteps to be true for 'Procedure with Sub-steps'") + } + + // Verify at least one step has sub-steps + hasSubSteps := false + for _, step := range subStepProc.Steps { + if len(step.SubSteps) > 0 { + hasSubSteps = true + break + } + } + + if !hasSubSteps { + t.Error("Expected at least one step to have sub-steps") + } + + t.Logf("Sub-procedure detection working correctly") +} + +func TestSubProcedureListTypes(t *testing.T) { + testFile := "../../testdata/input-files/source/rotate-key-sharded-cluster.txt" + + procedures, err := ParseProceduresWithOptions(testFile, false) + if err != nil { + t.Fatalf("ParseProceduresWithOptions failed: %v", err) + } + + // Find the hierarchical procedure + if len(procedures) == 0 { + t.Fatal("Expected at least one procedure") + } + + proc := procedures[0] + + // Verify it has steps with sub-procedures + if len(proc.Steps) < 2 { + t.Fatalf("Expected at least 2 steps, got %d", len(proc.Steps)) + } + + // Check step 2 (index 1) which should have sub-procedures + step := proc.Steps[1] + if len(step.SubProcedures) == 0 { + t.Fatal("Expected step 2 to have sub-procedures") + } + + // Verify all sub-procedures have the correct list type + for i, subProc := range step.SubProcedures { + if subProc.ListType != "lettered" { + t.Errorf("Sub-procedure %d: expected list type 'lettered', got '%s'", i+1, subProc.ListType) + } + + if len(subProc.Steps) == 0 { + t.Errorf("Sub-procedure %d: expected at least one step", i+1) + } + + // Verify steps are present + t.Logf("Sub-procedure %d has %d steps with list type '%s'", i+1, len(subProc.Steps), subProc.ListType) + } + + // Verify backward compatibility - SubSteps should still be populated + if len(step.SubSteps) == 0 { + t.Error("Expected SubSteps to be populated for backward compatibility") + } + + // Count total steps across all sub-procedures + totalSteps := 0 + for _, subProc := range step.SubProcedures { + totalSteps += len(subProc.Steps) + } + + // Verify SubSteps has the same total count + if len(step.SubSteps) != totalSteps { + t.Errorf("Expected SubSteps to have %d steps (flattened), got %d", totalSteps, len(step.SubSteps)) + } + + t.Logf("Sub-procedure list types tracked correctly: %d sub-procedures with %d total steps", + len(step.SubProcedures), totalSteps) +} + +func TestNumberedHeadingDetection(t *testing.T) { + tests := []struct { + heading string + expected bool + }{ + {"1. First Step", true}, + {"2. Second Step", true}, + {"10. Tenth Step", true}, + {"123. Large Number", true}, + {"Step 1", false}, + {"1 First Step", false}, + {"a. Lettered Step", false}, + {"Procedure", false}, + {"", false}, + } + + for _, tt := range tests { + result := isNumberedHeading(tt.heading) + if result != tt.expected { + t.Errorf("isNumberedHeading(%q) = %v, want %v", tt.heading, result, tt.expected) + } + } +} diff --git a/audit-cli/internal/rst/procedure_types.go b/audit-cli/internal/rst/procedure_types.go index cf4afb3..fda2f4d 100644 --- a/audit-cli/internal/rst/procedure_types.go +++ b/audit-cli/internal/rst/procedure_types.go @@ -36,12 +36,19 @@ type TabSetInfo struct { // Step represents a single step in a procedure. type Step struct { - Title string // Step title (for .. step:: directive) - Content string // Step content (raw RST) - Options map[string]string // Step options - LineNum int // Line number where step starts - Variations []Variation // Variations within this step (tabs or selected content) - SubSteps []Step // Sub-steps (ordered lists within this step) + Title string // Step title (for .. step:: directive) + Content string // Step content (raw RST) + Options map[string]string // Step options + LineNum int // Line number where step starts + Variations []Variation // Variations within this step (tabs or selected content) + SubSteps []Step // DEPRECATED: Use SubProcedures instead. Kept for backward compatibility. + SubProcedures []SubProcedure // Multiple sub-procedures (each is an ordered list within this step) +} + +// SubProcedure represents an ordered list within a step +type SubProcedure struct { + Steps []Step // The steps in this sub-procedure + ListType string // "numbered" or "lettered" - the type of ordered list marker used } // Variation represents a content variation within a step. @@ -97,6 +104,8 @@ var ( numberedListRegex = regexp.MustCompile(`^(\s*)(\d+)[\.\)]\s+(.*)$`) // Matches lettered lists: a. or a) or A. or A) letteredListRegex = regexp.MustCompile(`^(\s*)([a-zA-Z])[\.\)]\s+(.*)$`) + // Matches continuation marker: #. (used to continue an ordered list) + continuationMarkerRegex = regexp.MustCompile(`^(\s*)#[\.\)]\s+(.*)$`) ) // YAMLStep represents a step in a YAML steps file diff --git a/audit-cli/testdata/input-files/source/continuation-marker-test.rst b/audit-cli/testdata/input-files/source/continuation-marker-test.rst new file mode 100644 index 0000000..e38428f --- /dev/null +++ b/audit-cli/testdata/input-files/source/continuation-marker-test.rst @@ -0,0 +1,38 @@ +================================== +Continuation Marker Test +================================== + +Lettered List with Continuation +-------------------------------- + +a. First step + + This is the first step with some content. + +#. Second step + + This is the second step using a continuation marker. + +#. Third step + + This is the third step also using a continuation marker. + +Numbered List with Continuation +-------------------------------- + +1. First step + + This is the first numbered step. + +#. Second step + + This uses a continuation marker and should become step 2. + +#. Third step + + This should become step 3. + +#. Fourth step + + This should become step 4. + diff --git a/audit-cli/testdata/input-files/source/rotate-key-sharded-cluster.txt b/audit-cli/testdata/input-files/source/rotate-key-sharded-cluster.txt new file mode 100644 index 0000000..d14dac8 --- /dev/null +++ b/audit-cli/testdata/input-files/source/rotate-key-sharded-cluster.txt @@ -0,0 +1,269 @@ +.. meta:: + :robots: noindex, nosnippet + +================================ +Rotate Keys for Sharded Clusters +================================ + +.. default-domain:: mongodb + +Sharded cluster members can use :ref:`keyfiles ` +to authenticate each other as memers of the same deployment. + +Starting in version 4.2, a :ref:`keyfile ` can +contain multiple keys and membership authentication is established if +at least one key is common across members. This allows for rolling +upgrade of the keys without downtime. + +The following tutorial steps through the process to update, without any +downtime, the key for a sharded cluster. [#exclude-encryption-keyfile]_ + +.. warning:: + + The example keys in this tutorial are for illustrative purposes + only. Do :red:`NOT` use for your deployement. Instead, generate a + keyfile using any method you choose (e.g. ``openssl rand -base64 + 756``, etc.). + +Consider a sharded cluster where each member's keyfile contains the +following key: + +.. figure:: /images/example-key1.png + :alt: Image of current key to replace. + :figwidth: 568px + +The following procedure updates the sharded cluster members to use a +new key: + +.. figure:: /images/example-key2.png + :alt: Image of new key. + :figwidth: 568px + +.. [#exclude-encryption-keyfile] + + This tutorial is not applicable to the :ref:`keyfile + ` used for the :doc:`MongoDB's encrypted + storage engine ` local key + management. That :ref:`keyfile ` can only + contain a single key. + +Procedure +--------- + +1. Modify the Keyfile to Include Old and New Keys +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Modify each member's keyfile to include both the old and new keys. + +.. warning:: + + The example keys in this tutorial are for illustrative purposes + only. Do :red:`NOT` use for your deployement. Instead, generate a + keyfile using any method you choose (e.g. ``openssl rand -base64 + 756``, etc.). + +You can specify multiple key strings as a sequence of key strings (optionally +enclosed in quotes): + +.. figure:: /images/example-multiple-keys2.png + :alt: Image of multiple key string sequence. + :figwidth: 600px + +2. Restart Each Member +~~~~~~~~~~~~~~~~~~~~~~ + +Once all the keyfiles contain both the old and new keys, restart each +member one at a time. + +Config Servers +`````````````` + +**For each secondary of the config server replica set (CSRS)**, +connect a :binary:`~bin.mongo` shell to the member and: + +a. Use the :method:`db.shutdownServer()` method to shut down the member: + + .. code-block:: javascript + + use admin + db.shutdownServer() + +b. Restart the member. + +**For the primary**, connect a :binary:`~bin.mongo` shell to the member and + +a. Use :method:`rs.stepDown()` to step down the member: + + .. code-block:: javascript + + rs.stepDown() + + +#. Use the :method:`db.shutdownServer()` method to shut down the member: + + .. code-block:: javascript + + use admin + db.shutdownServer() + +#. Restart the member. + +Shard Replica Sets +`````````````````` + +**For each secondary member of the shard replica sets**, connect a +:binary:`~bin.mongo` shell to the member and: + +a. Use the :method:`db.shutdownServer()` method to shut down the member: + + .. code-block:: javascript + + use admin + db.shutdownServer() + +b. Restart the member. + +**For the primary of each shard replica set**, connect a +:binary:`~bin.mongo` shell to the member and + +a. Use :method:`rs.stepDown()` to step down the member: + + .. code-block:: javascript + + rs.stepDown() + + +#. Use the :method:`db.shutdownServer()` method to shut down the member: + + .. code-block:: javascript + + use admin + db.shutdownServer() + +#. Restart the member. + + +``mongos`` Routers +`````````````````` + +**For each mongos/router instance**, connect a +:binary:`~bin.mongo` shell to the :binary:`~bin.mongos` instance and: + +a. Use the :method:`db.shutdownServer()` method to shut down the member: + + .. code-block:: javascript + + use admin + db.shutdownServer() + +b. Restart the member. + +Once all members have been restarted, the members now accept either the +old or new key for membership authentication. + +3. Update Keyfile Content to the New Key Only +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + The example keys in this tutorial are for illustrative purposes + only. Do :red:`NOT` use for your deployement. Instead, generate a + keyfile using any method you choose (e.g. ``openssl rand -base64 + 756``, etc.). + +Modify each member's keyfile to include only the new password. + +.. figure:: /images/example-key2.png + :alt: Image of new key. + :figwidth: 558px + +4. Restart Each Member +~~~~~~~~~~~~~~~~~~~~~~ + +Once all the keyfiles contain the new key only, restart each member one +at a time. + +Config Servers +`````````````` + +**For each secondary of the config server replica set (CSRS)**, +connect a :binary:`~bin.mongo` shell to the member and: + +a. Use the :method:`db.shutdownServer()` method to shut down the member: + + .. code-block:: javascript + + use admin + db.shutdownServer() + +b. Restart the member. + +**For the primary**, connect a :binary:`~bin.mongo` shell to the member and + +a. Use :method:`rs.stepDown()` to step down the member: + + .. code-block:: javascript + + rs.stepDown() + + +#. Use the :method:`db.shutdownServer()` method to shut down the member: + + .. code-block:: javascript + + use admin + db.shutdownServer() + +#. Restart the member. + +Shard Replica Sets +`````````````````` + +**For each secondary member of the shard replica sets**, connect a +:binary:`~bin.mongo` shell to the member and: + +a. Use the :method:`db.shutdownServer()` method to shut down the member: + + .. code-block:: javascript + + use admin + db.shutdownServer() + +b. Restart the member. + +**For the primary of each shard replica set**, connect a +:binary:`~bin.mongo` shell to the member and + +a. Use :method:`rs.stepDown()` to step down the member: + + .. code-block:: javascript + + rs.stepDown() + + +#. Use the :method:`db.shutdownServer()` method to shut down the member: + + .. code-block:: javascript + + use admin + db.shutdownServer() + +#. Restart the member. + +``mongos`` Routers +`````````````````` + +**For each mongos/router instance**, connect a +:binary:`~bin.mongo` shell to the :binary:`~bin.mongos` instance and: + +a. Use the :method:`db.shutdownServer()` method to shut down the member: + + .. code-block:: javascript + + use admin + db.shutdownServer() + +b. Restart the member. + +Once all members have been restarted, the members now accept only the +new key for membership authentication. From 126d8837b35e579c165e806e735e505ba08f659c Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Mon, 8 Dec 2025 17:43:03 -0500 Subject: [PATCH 12/14] Add more info in verbose output for procedure parsing --- audit-cli/commands/extract/procedures/writer.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/audit-cli/commands/extract/procedures/writer.go b/audit-cli/commands/extract/procedures/writer.go index 7fb554d..9e545a6 100644 --- a/audit-cli/commands/extract/procedures/writer.go +++ b/audit-cli/commands/extract/procedures/writer.go @@ -67,6 +67,15 @@ func WriteAllVariations(variations []ProcedureVariation, outputDir string, dryRu return filesWritten, err } + if verbose { + outputPath := filepath.Join(outputDir, variation.OutputFile) + if dryRun { + fmt.Printf(" [DRY RUN] Would write: %s\n", outputPath) + } else { + fmt.Printf(" Wrote: %s\n", outputPath) + } + } + filesWritten++ } From 5e407a7c807ca502b3d735a3711548411bf163f4 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Wed, 10 Dec 2025 09:07:43 -0500 Subject: [PATCH 13/14] Remove backward-compat SubStep, remove '(s)' in output --- .../commands/extract/procedures/procedures.go | 20 ++++++------- audit-cli/docs/PROCEDURE_PARSING.md | 12 ++------ audit-cli/internal/rst/parse_procedures.go | 29 +++++++++---------- .../internal/rst/parse_procedures_test.go | 28 ++++++------------ audit-cli/internal/rst/procedure_types.go | 1 - 5 files changed, 35 insertions(+), 55 deletions(-) diff --git a/audit-cli/commands/extract/procedures/procedures.go b/audit-cli/commands/extract/procedures/procedures.go index 0874982..f34ef66 100644 --- a/audit-cli/commands/extract/procedures/procedures.go +++ b/audit-cli/commands/extract/procedures/procedures.go @@ -8,7 +8,8 @@ // - Procedure directives // // The extracted procedures are written to individual RST files with standardized naming: -// {heading}-{selection}.rst +// +// {heading}-{selection}.rst // // Supports filtering to extract only specific variations using the --selection flag. package procedures @@ -109,7 +110,7 @@ func runExtract(filePath string, selection string, outputDir string, dryRun bool // Report what was found if verbose || dryRun { - fmt.Printf("\nFound %d unique procedure(s):\n", len(variations)) + fmt.Printf("\nFound %d unique procedures:\n", len(variations)) for i, v := range variations { fmt.Printf("\n%d. %s\n", i+1, v.Procedure.Title) fmt.Printf(" Output file: %s\n", v.OutputFile) @@ -118,7 +119,7 @@ func runExtract(filePath string, selection string, outputDir string, dryRun bool if v.VariationName != "" { // Split the selections and format as a list selections := strings.Split(v.VariationName, "; ") - fmt.Printf(" Appears in %d selection(s):\n", len(selections)) + fmt.Printf(" Appears in %d selections:\n", len(selections)) for _, sel := range selections { fmt.Printf(" - %s\n", sel) } @@ -153,10 +154,10 @@ func runExtract(filePath string, selection string, outputDir string, dryRun bool for _, subProc := range step.SubProcedures { totalSubSteps += len(subProc.Steps) } - fmt.Printf(" Contains %d sub-procedure(s) with a total of %d sub-step(s)\n", len(step.SubProcedures), totalSubSteps) + fmt.Printf(" Contains %d sub-procedures with a total of %d sub-steps\n", len(step.SubProcedures), totalSubSteps) } if len(step.Variations) > 0 { - fmt.Printf(" Contains %d variation(s)\n", len(step.Variations)) + fmt.Printf(" Contains %d variations\n", len(step.Variations)) } } } @@ -170,11 +171,11 @@ func runExtract(filePath string, selection string, outputDir string, dryRun bool for _, subProc := range step.SubProcedures { totalSubSteps += len(subProc.Steps) } - fmt.Printf(" Step %d (%s) contains %d sub-procedure(s) with a total of %d sub-step(s)\n", + fmt.Printf(" Step %d (%s) contains %d sub-procedures with a total of %d sub-steps\n", stepIdx+1, step.Title, len(step.SubProcedures), totalSubSteps) for subProcIdx, subProc := range step.SubProcedures { - fmt.Printf("\n Sub-procedure %d (%d step(s)):\n", subProcIdx+1, len(subProc.Steps)) + fmt.Printf("\n Sub-procedure %d (%d steps):\n", subProcIdx+1, len(subProc.Steps)) for subStepIdx, subStep := range subProc.Steps { // Use the appropriate marker based on list type marker := "" @@ -214,11 +215,10 @@ func runExtract(filePath string, selection string, outputDir string, dryRun bool // Print summary if dryRun { - fmt.Printf("Dry run complete. Would have written %d file(s) to %s\n", len(variations), outputDir) + fmt.Printf("Dry run complete. Would have written %d files to %s\n", len(variations), outputDir) } else { - fmt.Printf("Successfully extracted %d unique procedure(s) to %s\n", filesWritten, outputDir) + fmt.Printf("Successfully extracted %d unique procedures to %s\n", filesWritten, outputDir) } return nil } - diff --git a/audit-cli/docs/PROCEDURE_PARSING.md b/audit-cli/docs/PROCEDURE_PARSING.md index 7b116ae..db2fcb4 100644 --- a/audit-cli/docs/PROCEDURE_PARSING.md +++ b/audit-cli/docs/PROCEDURE_PARSING.md @@ -418,7 +418,6 @@ type Step struct { Title string Content string SubProcedures []SubProcedure // Multiple sub-procedures within this step - SubSteps []Step // Deprecated: Flattened list for backward compatibility } ``` @@ -426,7 +425,6 @@ type Step struct { - Detects each ordered list within a step as a separate sub-procedure - Determines list type from the first item (`1.` → numbered, `a.` → lettered) - Stores each sub-procedure with its list type -- Maintains `SubSteps` as a flattened list for backward compatibility ### Display with `--show-sub-procedures` Flag @@ -434,13 +432,13 @@ The `extract procedures` command includes a `--show-sub-procedures` flag that di **Example Output:** ``` -Step 2 (Restart Each Member) contains 2 sub-procedure(s) with a total of 5 sub-step(s) +Step 2 (Restart Each Member) contains 2 sub-procedures with a total of 5 sub-steps - Sub-procedure 1 (2 step(s)): + Sub-procedure 1 (2 steps): a. Shut down the member. b. Restart the member. - Sub-procedure 2 (3 step(s)): + Sub-procedure 2 (3 steps): a. Step down the primary. b. Shut down the member. c. Restart the member. @@ -451,10 +449,6 @@ Step 2 (Restart Each Member) contains 2 sub-procedure(s) with a total of 5 sub-s - Preserves the semantic meaning of list marker types - Shows the structure of multiple sub-procedures within a step -### Backward Compatibility - -The `SubSteps` field is still populated with a flattened list of all sub-steps for backward compatibility with existing code that depends on it. New code should use `SubProcedures` to access the structured sub-procedure information. - ## Include Directive Handling MongoDB documentation uses `.. include::` directives to reuse content across files. The parser handles includes with context-aware expansion: diff --git a/audit-cli/internal/rst/parse_procedures.go b/audit-cli/internal/rst/parse_procedures.go index 1470545..736a759 100644 --- a/audit-cli/internal/rst/parse_procedures.go +++ b/audit-cli/internal/rst/parse_procedures.go @@ -372,9 +372,9 @@ func parseHierarchicalProcedure(lines []string, startIdx int, title string) (Pro i++ } - // Check for sub-steps + // Check for sub-procedures for _, step := range procedure.Steps { - if len(step.SubSteps) > 0 { + if len(step.SubProcedures) > 0 { procedure.HasSubSteps = true break } @@ -395,7 +395,6 @@ func parseNumberedHeadingStep(lines []string, startIdx int) (Step, int) { i := startIdx + 2 // Skip heading and underline var contentLines []string - var subSteps []Step var subProcedures []SubProcedure // Parse the content under this heading @@ -431,8 +430,6 @@ func parseNumberedHeadingStep(lines []string, startIdx int) (Step, int) { Steps: subProcedureSteps, ListType: listType, }) - // Also add to subSteps for backward compatibility - subSteps = append(subSteps, subProcedureSteps...) // Add the sub-steps to content as well for j := i; j <= endLine; j++ { contentLines = append(contentLines, lines[j]) @@ -455,7 +452,6 @@ func parseNumberedHeadingStep(lines []string, startIdx int) (Step, int) { } step.Content = strings.Join(contentLines, "\n") - step.SubSteps = subSteps step.SubProcedures = subProcedures return step, i - 1 @@ -496,12 +492,16 @@ func computeProcedureContentHash(proc *Procedure) string { } } - // Include substeps - for _, substep := range step.SubSteps { - content.WriteString(substep.Title) - content.WriteString("|") - content.WriteString(substep.Content) + // Include sub-procedures + for _, subProc := range step.SubProcedures { + content.WriteString(subProc.ListType) content.WriteString("|") + for _, substep := range subProc.Steps { + content.WriteString(substep.Title) + content.WriteString("|") + content.WriteString(substep.Content) + content.WriteString("|") + } } } @@ -817,9 +817,9 @@ func parseProcedureDirectiveFromLines(lines []string, startIdx int, title string i++ } - // Check for sub-steps + // Check for sub-procedures for _, step := range procedure.Steps { - if len(step.SubSteps) > 0 { + if len(step.SubProcedures) > 0 { procedure.HasSubSteps = true break } @@ -835,7 +835,6 @@ func parseStepDirectiveFromLines(lines []string, startIdx int, title string, fil Options: make(map[string]string), LineNum: startIdx + 1, Variations: []Variation{}, - SubSteps: []Step{}, } i := startIdx + 1 // Skip the .. step:: line @@ -899,8 +898,6 @@ func parseStepDirectiveFromLines(lines []string, startIdx int, title string, fil Steps: subProcedureSteps, ListType: listType, }) - // Also add to SubSteps for backward compatibility - step.SubSteps = append(step.SubSteps, subProcedureSteps...) // Add the sub-steps to content as well for j := i; j <= endLine; j++ { contentLines = append(contentLines, lines[j]) diff --git a/audit-cli/internal/rst/parse_procedures_test.go b/audit-cli/internal/rst/parse_procedures_test.go index 3b7baf8..0f09e0c 100644 --- a/audit-cli/internal/rst/parse_procedures_test.go +++ b/audit-cli/internal/rst/parse_procedures_test.go @@ -278,10 +278,10 @@ func TestHierarchicalProcedure(t *testing.T) { t.Error("Expected HasSubSteps to be true") } - // Verify that step 2 has sub-steps (the ordered lists) + // Verify that step 2 has sub-procedures (the ordered lists) step2 := proc.Steps[1] - if len(step2.SubSteps) == 0 { - t.Error("Expected step 2 to have sub-steps") + if len(step2.SubProcedures) == 0 { + t.Error("Expected step 2 to have sub-procedures") } t.Logf("Hierarchical procedure parsed correctly with %d steps", len(proc.Steps)) @@ -313,17 +313,17 @@ func TestSubProcedureDetection(t *testing.T) { t.Error("Expected HasSubSteps to be true for 'Procedure with Sub-steps'") } - // Verify at least one step has sub-steps - hasSubSteps := false + // Verify at least one step has sub-procedures + hasSubProcedures := false for _, step := range subStepProc.Steps { - if len(step.SubSteps) > 0 { - hasSubSteps = true + if len(step.SubProcedures) > 0 { + hasSubProcedures = true break } } - if !hasSubSteps { - t.Error("Expected at least one step to have sub-steps") + if !hasSubProcedures { + t.Error("Expected at least one step to have sub-procedures") } t.Logf("Sub-procedure detection working correctly") @@ -369,22 +369,12 @@ func TestSubProcedureListTypes(t *testing.T) { t.Logf("Sub-procedure %d has %d steps with list type '%s'", i+1, len(subProc.Steps), subProc.ListType) } - // Verify backward compatibility - SubSteps should still be populated - if len(step.SubSteps) == 0 { - t.Error("Expected SubSteps to be populated for backward compatibility") - } - // Count total steps across all sub-procedures totalSteps := 0 for _, subProc := range step.SubProcedures { totalSteps += len(subProc.Steps) } - // Verify SubSteps has the same total count - if len(step.SubSteps) != totalSteps { - t.Errorf("Expected SubSteps to have %d steps (flattened), got %d", totalSteps, len(step.SubSteps)) - } - t.Logf("Sub-procedure list types tracked correctly: %d sub-procedures with %d total steps", len(step.SubProcedures), totalSteps) } diff --git a/audit-cli/internal/rst/procedure_types.go b/audit-cli/internal/rst/procedure_types.go index fda2f4d..deade53 100644 --- a/audit-cli/internal/rst/procedure_types.go +++ b/audit-cli/internal/rst/procedure_types.go @@ -41,7 +41,6 @@ type Step struct { Options map[string]string // Step options LineNum int // Line number where step starts Variations []Variation // Variations within this step (tabs or selected content) - SubSteps []Step // DEPRECATED: Use SubProcedures instead. Kept for backward compatibility. SubProcedures []SubProcedure // Multiple sub-procedures (each is an ordered list within this step) } From 4a1a27e704c3893f5603c2ee567e9196d0197d13 Mon Sep 17 00:00:00 2001 From: Dachary Date: Wed, 10 Dec 2025 09:11:29 -0500 Subject: [PATCH 14/14] Provide more details about version comparison default Co-authored-by: cory <115956901+cbullinger@users.noreply.github.com> --- audit-cli/commands/compare/file-contents/file_contents.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audit-cli/commands/compare/file-contents/file_contents.go b/audit-cli/commands/compare/file-contents/file_contents.go index 1eb73fd..87b74fa 100644 --- a/audit-cli/commands/compare/file-contents/file_contents.go +++ b/audit-cli/commands/compare/file-contents/file_contents.go @@ -38,7 +38,7 @@ import ( // The product directory is automatically detected from the file path. // // Flags: -// - -V, --versions: Comma-separated list of versions (required for version comparison) +// - -V, --versions: Comma-separated list of versions (optional; auto-discovers all versions if not specified) // - --show-paths: Display file paths of files that differ // - -d, --show-diff: Display unified diff output // - -v, --verbose: Show detailed processing information