diff --git a/.gitignore b/.gitignore index e1c2f5848437..1f228127426f 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,7 @@ scripts/alias-verification/aliases-suspicious.txt scripts/alias-verification/deletes.txt scripts/alias-verification/renames.txt scripts/alias-verification/fixes-data.txt +scripts/alias-verification/historical-aliases-correct.txt +scripts/alias-verification/historical-aliases-missing.txt +scripts/alias-verification/historical-aliases-report.txt +scripts/alias-verification/historical-fixes.json diff --git a/AGENTS.md b/AGENTS.md index 7763639e6ef5..635cc83c5f89 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -42,7 +42,7 @@ Do not substitute other tools or commands. ## Absolute Prohibitions - **Package manager**: Do **not** change `package.json` to use pnpm. Yarn/npm only. -- **New files**: Must always end with a newline. +- **Markdown (.md) files**: Must always end with a newline. --- diff --git a/content/docs/administration/access-identity/oidc-client/kubernetes-eks.md b/content/docs/administration/access-identity/oidc-client/kubernetes-eks.md index 4b4f7eeef607..e053635072ed 100644 --- a/content/docs/administration/access-identity/oidc-client/kubernetes-eks.md +++ b/content/docs/administration/access-identity/oidc-client/kubernetes-eks.md @@ -14,8 +14,9 @@ menu: parent: openid-connect-client weight: 1 aliases: -- /docs/pulumi-cloud/oidc/client/kubernetes-eks/ -- /docs/pulumi-cloud/access-management/oidc-client/kubernetes-eks/ + - /docs/pulumi-cloud/access-management/oidc-client/kubernetes-eks/ + - /docs/pulumi-cloud/access-management/oidc/client/kubernetes-eks/ + - /docs/pulumi-cloud/oidc/client/kubernetes-eks/ --- This document outlines the steps required to configure Pulumi to accept Elastic Kubernetes Service (EKS) id_tokens to be exchanged for a personal access token. With this configuration, Kubernetes pods authenticate to Pulumi Cloud using OIDC tokens issued by EKS. diff --git a/content/docs/deployments/deployments/oidc/_index.md b/content/docs/deployments/deployments/oidc/_index.md index 2224c008422e..53c117955a0b 100644 --- a/content/docs/deployments/deployments/oidc/_index.md +++ b/content/docs/deployments/deployments/oidc/_index.md @@ -11,11 +11,12 @@ menu: weight: 60 identifier: deployments-deployments-oidc aliases: -- /docs/pulumi-cloud/oidc/ -- /docs/administration/access-identity/oidc/ -- /docs/pulumi-cloud/oidc/provider/ -- /docs/administration/access-identity/oidc/provider/ -- /docs/pulumi-cloud/deployments/oidc/ + - /docs/administration/access-identity/oidc/ + - /docs/administration/access-identity/oidc/provider/ + - /docs/pulumi-cloud/access-management/oidc/provider/ + - /docs/pulumi-cloud/deployments/oidc/ + - /docs/pulumi-cloud/oidc/ + - /docs/pulumi-cloud/oidc/provider/ --- Pulumi Deployments supports OpenID Connect (OIDC) integration with popular cloud providers. In order for a Pulumi IaC operation like `update` or `preview` to work, the Pulumi CLI must be able to access credentials that will allow it to perform the necessary CRUD operations on the resources in your stack. Pulumi Deployments' OIDC integrations allow your your deployments to use dynamic, short-lived cloud credentials for supported clouds instead of static credentials which are less secure and difficult to rotate. This page explains how to set up OIDC for Pulumi Deployments to access resources in your cloud provider accounts. diff --git a/content/docs/deployments/deployments/oidc/aws.md b/content/docs/deployments/deployments/oidc/aws.md index 5a440c8b41dd..42d7300c2bc5 100644 --- a/content/docs/deployments/deployments/oidc/aws.md +++ b/content/docs/deployments/deployments/oidc/aws.md @@ -11,13 +11,14 @@ menu: weight: 1 identifier: deployments-deployments-oidc-aws aliases: -- /docs/pulumi-cloud/deployments/oidc/aws/ -- /docs/guides/oidc/provider/aws -- /docs/intro/deployments/oidc/provider/aws/ -- /docs/pulumi-cloud/deployments/oidc/provider/aws/ -- /docs/pulumi-cloud/oidc/provider/aws/ -- /docs/pulumi-cloud/oidc/aws/ -- /docs/administration/access-identity/oidc/provider/aws/ + - /docs/administration/access-identity/oidc/provider/aws/ + - /docs/guides/oidc/provider/aws + - /docs/intro/deployments/oidc/provider/aws/ + - /docs/pulumi-cloud/access-management/oidc/provider/aws/ + - /docs/pulumi-cloud/deployments/oidc/aws/ + - /docs/pulumi-cloud/deployments/oidc/provider/aws/ + - /docs/pulumi-cloud/oidc/aws/ + - /docs/pulumi-cloud/oidc/provider/aws/ --- {{% notes type="info" %}} diff --git a/content/docs/deployments/deployments/oidc/azure.md b/content/docs/deployments/deployments/oidc/azure.md index 99af69631149..ba528e8ae093 100644 --- a/content/docs/deployments/deployments/oidc/azure.md +++ b/content/docs/deployments/deployments/oidc/azure.md @@ -11,13 +11,14 @@ menu: weight: 2 identifier: deployments-deployments-oidc-azure aliases: -- /docs/pulumi-cloud/deployments/oidc/azure/ -- /docs/guides/oidc/provider/azure -- /docs/intro/deployments/oidc/provider/azure/ -- /docs/pulumi-cloud/deployments/oidc/provider/azure/ -- /docs/pulumi-cloud/oidc/provider/azure/ -- /docs/pulumi-cloud/oidc/azure/ -- /docs/administration/access-identity/oidc/provider/azure/ + - /docs/administration/access-identity/oidc/provider/azure/ + - /docs/guides/oidc/provider/azure + - /docs/intro/deployments/oidc/provider/azure/ + - /docs/pulumi-cloud/access-management/oidc/provider/azure/ + - /docs/pulumi-cloud/deployments/oidc/azure/ + - /docs/pulumi-cloud/deployments/oidc/provider/azure/ + - /docs/pulumi-cloud/oidc/azure/ + - /docs/pulumi-cloud/oidc/provider/azure/ --- {{% notes type="info" %}} diff --git a/content/docs/deployments/deployments/oidc/gcp.md b/content/docs/deployments/deployments/oidc/gcp.md index 59ecdb581003..7790cb048316 100644 --- a/content/docs/deployments/deployments/oidc/gcp.md +++ b/content/docs/deployments/deployments/oidc/gcp.md @@ -11,13 +11,14 @@ menu: weight: 3 identifier: deployments-deployments-oidc-gcp aliases: -- /docs/pulumi-cloud/deployments/oidc/gcp/ -- /docs/guides/oidc/provider/gcp -- /docs/intro/deployments/oidc/provider/gcp/ -- /docs/pulumi-cloud/deployments/oidc/provider/gcp/ -- /docs/pulumi-cloud/oidc/provider/gcp/ -- /docs/pulumi-cloud/oidc/gcp/ -- /docs/administration/access-identity/oidc/provider/gcp/ + - /docs/administration/access-identity/oidc/provider/gcp/ + - /docs/guides/oidc/provider/gcp + - /docs/intro/deployments/oidc/provider/gcp/ + - /docs/pulumi-cloud/access-management/oidc/provider/gcp/ + - /docs/pulumi-cloud/deployments/oidc/gcp/ + - /docs/pulumi-cloud/deployments/oidc/provider/gcp/ + - /docs/pulumi-cloud/oidc/gcp/ + - /docs/pulumi-cloud/oidc/provider/gcp/ --- {{% notes type="info" %}} diff --git a/content/docs/deployments/deployments/using/post-automation.md b/content/docs/deployments/deployments/using/post-automation.md index d3261a8bab82..8fa9304e9964 100644 --- a/content/docs/deployments/deployments/using/post-automation.md +++ b/content/docs/deployments/deployments/using/post-automation.md @@ -5,7 +5,8 @@ title: "Post-Deployment Automation" h1: "Post-Deployment Automation" meta_image: /images/docs/meta-images/docs-meta.png aliases: -- /docs/pulumi-cloud/deployments/using/post-automation/ + - /docs/pulumi-cloud/deployments/reference/ + - /docs/pulumi-cloud/deployments/using/post-automation/ menu: deployments: parent: deployments-deployments-using diff --git a/content/docs/idp/developer-portals/templates/_index.md b/content/docs/idp/developer-portals/templates/_index.md index fb333cf1a589..2950d6717301 100644 --- a/content/docs/idp/developer-portals/templates/_index.md +++ b/content/docs/idp/developer-portals/templates/_index.md @@ -2,7 +2,7 @@ title: Organization templates title_tag: Get started with organization templates h1: Building developer portals with organization templates -meta_desc: Lean how to build template projects and configure them to work with your Pulumi organization. +meta_desc: Learn how to build template projects and configure them to work with your Pulumi organization. menu: idp: name: Organization templates @@ -12,6 +12,7 @@ menu: aliases: - /docs/idp/developer-portals/templates/ - /docs/pulumi-cloud/developer-platforms/templates/ + - /docs/pulumi-cloud/developer-portals/templates/ --- {{% notes "info" %}} diff --git a/content/docs/reference/cloud-rest-api/deployments/_index.md b/content/docs/reference/cloud-rest-api/deployments/_index.md index bc085e60f7a8..ac1d54af26fd 100644 --- a/content/docs/reference/cloud-rest-api/deployments/_index.md +++ b/content/docs/reference/cloud-rest-api/deployments/_index.md @@ -7,14 +7,15 @@ menu: parent: cloud-rest-api weight: 4.5 aliases: - - /docs/reference/cloud-rest-api/deployments/ - /docs/deployments/deployments/api - /docs/deployments/deployments/api/ - - /docs/reference/deployments-rest-api - - /docs/reference/deployments-rest-api/ - /docs/intro/deployments/api - /docs/intro/deployments/api/ + - /docs/pulumi-cloud/deployments/api/ - /docs/pulumi-cloud/reference/deployments/ + - /docs/reference/cloud-rest-api/deployments/ + - /docs/reference/deployments-rest-api + - /docs/reference/deployments-rest-api/ --- The Deployments API allows you to configure and manage Pulumi Deployments, which enable you to execute Pulumi updates and other operations through the Pulumi Cloud. With this API, you can configure deployment settings for your stacks, trigger deployments, view deployment status and logs, and manage deployment execution. diff --git a/scripts/alias-verification/README.md b/scripts/alias-verification/README.md index 01574f4fd1c5..d600646b832c 100644 --- a/scripts/alias-verification/README.md +++ b/scripts/alias-verification/README.md @@ -93,3 +93,146 @@ python3 verify-aliases.py 4. If issues found, use `generate-fixes.py` and `apply-fixes.py` 5. Re-run `python3 verify-aliases.py` until it passes (exit 0) 6. Merge your PR with confidence! + +--- + +## Comprehensive Historical Verification + +The scripts above check **branch-level changes** (current branch vs master). However, they can miss **pre-reorg moves** - files that were moved on master before your branch was created, or multi-hop moves (A→B→C where only B is aliased). + +### When to Use Historical Verification + +Use these scripts when: +- You've completed a major documentation reorganization +- You want to ensure ALL historical paths have aliases (not just recent branch changes) +- You're investigating reports of missing aliases that the branch verification missed + +### Comprehensive Verification Workflow + +#### Step 1: Verify All Historical Aliases + +```bash +cd scripts/alias-verification +python3 verify-all-historical-aliases.py +``` + +This script: +- Checks the **complete git history** of every file (limited to past 6 months) +- Uses `git log --follow -M30% origin/master` to track all historical paths +- **Only checks master branch** - ignores development branches that were never merged/published +- **30% similarity detection** catches files that were significantly rewritten during moves (e.g., documentation revamps) +- Checks both frontmatter aliases AND S3 redirect files (`scripts/redirects/*.txt`) +- Identifies files missing any historical alias + +Output files: +- `historical-aliases-missing.txt` - Files with missing historical aliases ❌ +- `historical-aliases-correct.txt` - Files with complete coverage ✓ +- `historical-aliases-report.txt` - Detailed analysis with git history + +#### Step 2: Generate Fixes + +```bash +python3 generate-historical-fixes.py +``` + +This script: +- Parses the missing aliases log +- Reads current aliases from each file +- Generates a combined list of all aliases (existing + missing) +- Outputs `historical-fixes.json` with the complete fix data + +#### Step 3: Apply Fixes + +```bash +python3 apply-historical-fixes.py +``` + +This script: +- Reads `historical-fixes.json` +- Updates each file's frontmatter to add missing aliases +- Prompts for confirmation before modifying files +- Reports success/failure for each file + +**⚠️ WARNING**: This modifies files in place. Make sure to commit any important changes first! + +#### Step 3.5: Review for False Positives (IMPORTANT) + +Before committing the changes, **you must review them for false positives**. The 30% similarity detection can occasionally match unrelated files that happen to have similar content patterns. + +```bash +git diff +``` + +**Common false positive patterns to watch for:** + +1. **Unrelated file replacements**: Files with similar names but completely different purposes + - Example: `doppler.md` matched with historical path `infisical.md` (different products) + - Example: CLI commands that were never actually renamed (e.g., `pulumi_project` ← `pulumi_env_init`) + +2. **Content rewrites that aren't renames**: Files that were deleted and recreated with new content + - Low similarity can match files that share some boilerplate but are fundamentally different + +3. **Development-only paths**: Should be rare now that we only check master, but verify paths make sense + +**How to remove false positive aliases:** + +If you find an incorrect alias, simply edit the file and remove it from the `aliases:` list in the frontmatter. + +**Example of removing a false positive:** + +```yaml +# Before (with false positive) +aliases: + - /docs/esc/integrations/dynamic-login-credentials/infisical-login/ # ← FALSE POSITIVE + - /docs/esc/providers/doppler-login/ + +# After (false positive removed) +aliases: + - /docs/esc/providers/doppler-login/ +``` + +After removing false positives, re-run the apply script if needed, or proceed to verification. + +#### Step 4: Re-verify + +```bash +python3 verify-all-historical-aliases.py +``` + +Run the comprehensive verification again to confirm all aliases are now present. + +### Example Output + +``` +================================================================================ +=== VERIFICATION SUMMARY === +================================================================================ +Total markdown files scanned: 699 +Files with historical moves: 353 +Files with complete aliases: ✓ 271 +Files with missing aliases: ❌ 82 +Total missing aliases: ❌ 82 +``` + +### What Gets Checked + +The comprehensive verification checks: +1. **Git History**: All paths a file has had in the past 6 months on the master branch +1. **Frontmatter Aliases**: The `aliases:` field in markdown frontmatter +1. **S3 Redirects**: Redirect mappings in `scripts/redirects/*.txt` files +1. **Multi-hop Moves**: Files moved multiple times (A→B→C) +1. **Pre-reorg Moves**: Files moved on master before your branch existed +1. **Low-Similarity Renames**: Files that were significantly rewritten during moves using git's 30% similarity detection (catches delete+add operations that are actually content revamps) + +**Note**: The script only checks `origin/master` history, not development branches. This prevents false positives from paths that only existed during development and were never published. + +### Differences from Branch Verification + +| Feature | Branch Verification | Historical Verification | +|---------|-------------------|------------------------| +| Scope | Current branch vs master | Complete git history | +| Time Range | Branch lifetime | Past 6 months | +| Catches pre-reorg moves | ❌ No | ✓ Yes | +| Catches multi-hop moves | ❌ No | ✓ Yes | +| Checks S3 redirects | ❌ No | ✓ Yes | +| When to use | Every PR with file moves | After major reorgs | diff --git a/scripts/alias-verification/apply-historical-fixes.py b/scripts/alias-verification/apply-historical-fixes.py new file mode 100755 index 000000000000..33f8034a2476 --- /dev/null +++ b/scripts/alias-verification/apply-historical-fixes.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +""" +Apply Historical Alias Fixes + +Reads the fixes generated by generate-historical-fixes.py and applies them +to the markdown files by updating their frontmatter aliases sections. +""" + +import sys +import json +from pathlib import Path +from typing import List, Tuple + +def update_frontmatter_aliases(file_path: Path, new_aliases: List[str]) -> bool: + """ + Update the aliases section in a markdown file's frontmatter. + Returns True if successful, False otherwise. + """ + try: + with open(file_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + + # Find frontmatter boundaries + frontmatter_start = -1 + frontmatter_end = -1 + for i, line in enumerate(lines): + if line.strip() == '---': + if frontmatter_start == -1: + frontmatter_start = i + else: + frontmatter_end = i + break + + if frontmatter_start == -1 or frontmatter_end == -1: + print(f"ERROR: Could not find frontmatter in {file_path}", file=sys.stderr) + return False + + # Find existing aliases section in frontmatter + aliases_start = -1 + aliases_end = -1 + for i in range(frontmatter_start + 1, frontmatter_end): + line = lines[i] + if line.strip().startswith('aliases:'): + aliases_start = i + # Find end of aliases section + for j in range(i + 1, frontmatter_end): + next_line = lines[j] + # End if we hit a non-indented line or a line that doesn't start with - + if next_line.strip() and not next_line.startswith((' ', '\t', '-')): + aliases_end = j + break + else: + # Aliases section goes to end of frontmatter + aliases_end = frontmatter_end + break + + # Generate new aliases section + new_aliases_lines = ['aliases:\n'] + for alias in new_aliases: + new_aliases_lines.append(f' - {alias}\n') + + # Reconstruct the file + new_lines = [] + + if aliases_start != -1: + # Replace existing aliases section + new_lines.extend(lines[:aliases_start]) + new_lines.extend(new_aliases_lines) + new_lines.extend(lines[aliases_end:]) + else: + # Add new aliases section after title (or at start of frontmatter if no title) + # Try to find title field to insert after it + title_line = -1 + for i in range(frontmatter_start + 1, frontmatter_end): + if lines[i].strip().startswith('title:'): + title_line = i + break + + if title_line != -1: + # Insert after title + new_lines.extend(lines[:title_line + 1]) + new_lines.extend(new_aliases_lines) + new_lines.extend(lines[title_line + 1:]) + else: + # Insert at start of frontmatter + new_lines.extend(lines[:frontmatter_start + 1]) + new_lines.extend(new_aliases_lines) + new_lines.extend(lines[frontmatter_start + 1:]) + + # Write back to file + with open(file_path, 'w', encoding='utf-8') as f: + f.writelines(new_lines) + + return True + + except Exception as e: + print(f"ERROR: Failed to update {file_path}: {e}", file=sys.stderr) + return False + +def main(): + script_dir = Path(__file__).parent + repo_root = script_dir.parent.parent + fixes_file = script_dir / 'historical-fixes.json' + + if not fixes_file.exists(): + print(f"ERROR: Fixes file not found: {fixes_file}", file=sys.stderr) + print("Run generate-historical-fixes.py first", file=sys.stderr) + sys.exit(1) + + # Load fixes + print("Loading fixes from historical-fixes.json...") + with open(fixes_file, 'r', encoding='utf-8') as f: + fixes = json.load(f) + + print(f"Found {len(fixes)} files to fix") + print() + + # Confirm before proceeding + print("⚠️ WARNING: This will modify markdown files in place!") + print(" Make sure you have committed any important changes first.") + print() + response = input("Proceed with applying fixes? (yes/no): ") + + if response.lower() not in ['yes', 'y']: + print("Aborted.") + sys.exit(0) + + print() + print("Applying fixes...") + print() + + # Apply fixes + success_count = 0 + error_count = 0 + + for i, fix in enumerate(fixes, 1): + file_path = repo_root / fix['file_path'] + new_aliases = fix['new_aliases'] + + print(f"[{i}/{len(fixes)}] Updating {fix['file_path']}...", end=' ') + + if update_frontmatter_aliases(file_path, new_aliases): + success_count += 1 + print("✓") + else: + error_count += 1 + print("❌") + + # Summary + print() + print("="*80) + print("=== APPLICATION SUMMARY ===") + print("="*80) + print(f"Total files processed: {len(fixes)}") + print(f"Successfully updated: ✓ {success_count}") + print(f"Errors: ❌ {error_count}") + print() + + if error_count == 0: + print("🎉 ALL FIXES APPLIED SUCCESSFULLY!") + print() + print("💡 Next steps:") + print(" 1. Review the changes with: git diff") + print(" 2. Re-run verify-all-historical-aliases.py to verify") + print(" 3. If everything looks good, commit the changes") + sys.exit(0) + else: + print("⚠️ SOME FIXES FAILED") + print(" Review the errors above and fix manually if needed") + sys.exit(1) + +if __name__ == '__main__': + main() diff --git a/scripts/alias-verification/generate-historical-fixes.py b/scripts/alias-verification/generate-historical-fixes.py new file mode 100755 index 000000000000..74fde831b05f --- /dev/null +++ b/scripts/alias-verification/generate-historical-fixes.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +""" +Generate Historical Alias Fixes + +Reads the output from verify-all-historical-aliases.py and generates +a structured fixes file that can be applied by apply-historical-fixes.py +""" + +import sys +import json +from pathlib import Path +from typing import Set, List, Dict + +def extract_aliases(filepath: Path) -> Set[str]: + """Extract aliases set from frontmatter.""" + aliases = set() + in_frontmatter = False + in_aliases = False + frontmatter_count = 0 + + try: + with open(filepath, 'r', encoding='utf-8') as f: + for line in f: + line = line.rstrip() + + # Track frontmatter boundaries + if line == '---': + frontmatter_count += 1 + if frontmatter_count == 1: + in_frontmatter = True + elif frontmatter_count == 2: + break + continue + + if not in_frontmatter: + continue + + # Check for aliases field + if line.startswith('aliases:'): + in_aliases = True + continue + + # End of aliases section + if in_aliases and line and not line.startswith((' ', '\t', '-')): + in_aliases = False + + # Extract alias entries + if in_aliases and line.strip().startswith('-'): + alias = line.strip()[1:].strip() + aliases.add(alias) + + except Exception as e: + print(f"Warning: Could not read {filepath}: {e}", file=sys.stderr) + + return aliases + +def parse_missing_log(missing_log: Path, repo_root: Path) -> List[Dict]: + """ + Parse the historical-aliases-missing.txt file. + Returns a list of dicts with file_path and missing_aliases. + """ + fixes = [] + current_file = None + current_missing = [] + in_missing_section = False + + with open(missing_log, 'r', encoding='utf-8') as f: + for line in f: + line = line.rstrip() + + # New file entry + if line.startswith('❌ MISSING ALIASES:'): + # Save previous entry if exists + if current_file and current_missing: + fixes.append({ + 'file_path': current_file, + 'missing_aliases': sorted(current_missing) + }) + + # Start new entry + current_file = line.split('❌ MISSING ALIASES:')[1].strip() + current_missing = [] + in_missing_section = False + + # Start of missing aliases list + elif 'Missing aliases' in line and line.strip().endswith(':'): + in_missing_section = True + + # Alias entry + elif in_missing_section and line.strip().startswith('-'): + alias = line.strip()[1:].strip() + current_missing.append(alias) + + # Empty line ends the current file entry + elif not line.strip() and current_file and current_missing: + fixes.append({ + 'file_path': current_file, + 'missing_aliases': sorted(current_missing) + }) + current_file = None + current_missing = [] + in_missing_section = False + + # Don't forget the last entry + if current_file and current_missing: + fixes.append({ + 'file_path': current_file, + 'missing_aliases': sorted(current_missing) + }) + + return fixes + +def main(): + script_dir = Path(__file__).parent + repo_root = script_dir.parent.parent + missing_log = script_dir / 'historical-aliases-missing.txt' + output_file = script_dir / 'historical-fixes.json' + + if not missing_log.exists(): + print(f"ERROR: Missing log file not found: {missing_log}", file=sys.stderr) + print("Run verify-all-historical-aliases.py first", file=sys.stderr) + sys.exit(1) + + print("Parsing historical-aliases-missing.txt...") + fixes_data = parse_missing_log(missing_log, repo_root) + + if not fixes_data: + print("No missing aliases found in log file!") + sys.exit(0) + + print(f"Found {len(fixes_data)} files with missing aliases") + print("\nGenerating fixes with current alias information...") + + # Enhance each fix with current aliases + enhanced_fixes = [] + for fix in fixes_data: + file_path = repo_root / fix['file_path'] + + if not file_path.exists(): + print(f"Warning: File not found: {file_path}", file=sys.stderr) + continue + + # Get current aliases + current_aliases = extract_aliases(file_path) + + # Combine current + missing aliases (deduplicated and sorted) + all_aliases = sorted(set(current_aliases) | set(fix['missing_aliases'])) + + enhanced_fixes.append({ + 'file_path': str(fix['file_path']), + 'current_aliases': sorted(current_aliases), + 'missing_aliases': fix['missing_aliases'], + 'new_aliases': all_aliases, + 'aliases_to_add': len(fix['missing_aliases']) + }) + + # Write output + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(enhanced_fixes, f, indent=2) + + # Print summary + print("\n" + "="*80) + print("=== FIX GENERATION SUMMARY ===") + print("="*80) + print(f"Files to fix: {len(enhanced_fixes)}") + print(f"Total aliases to add: {sum(f['aliases_to_add'] for f in enhanced_fixes)}") + print() + print(f"📄 Fixes written to: {output_file}") + print() + print("💡 Next steps:") + print(" 1. Review the generated fixes in historical-fixes.json") + print(" 2. Run apply-historical-fixes.py to apply the fixes") + print(" 3. Re-run verify-all-historical-aliases.py to verify") + print() + + # Show a few examples + print("📋 Example fixes (first 5 files):") + for i, fix in enumerate(enhanced_fixes[:5], 1): + print(f"\n{i}. {fix['file_path']}") + print(f" Current aliases: {len(fix['current_aliases'])}") + print(f" Adding {len(fix['missing_aliases'])} missing aliases:") + for alias in fix['missing_aliases']: + print(f" + {alias}") + + if len(enhanced_fixes) > 5: + print(f"\n ... and {len(enhanced_fixes) - 5} more files") + +if __name__ == '__main__': + main() diff --git a/scripts/alias-verification/verify-all-historical-aliases.py b/scripts/alias-verification/verify-all-historical-aliases.py new file mode 100755 index 000000000000..15cb7846a0ad --- /dev/null +++ b/scripts/alias-verification/verify-all-historical-aliases.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +""" +Comprehensive Historical Alias Verification Script + +This script checks the COMPLETE git history of every file in content/docs/ +to ensure ALL historical paths have corresponding aliases. Unlike verify-aliases.py +which only checks branch diffs, this catches: +- Pre-reorg moves that happened on master +- Multi-hop moves (A→B→C where only B is aliased) +- Cross-branch moves +- Old repository migrations +""" + +import sys +import subprocess +from pathlib import Path +from typing import Set, List, Tuple, Dict + +def path_to_url(filepath: str) -> str: + """Convert file path to Hugo URL path.""" + # Remove content/ prefix + url = filepath.replace('content', '', 1) + # Remove .md extension + url = url.replace('.md', '') + # For _index.md files, remove _index + url = url.replace('/_index', '') + # Ensure trailing slash + if not url.endswith('/'): + url += '/' + return url + +def get_file_history(repo_root: Path, current_path: Path) -> Set[str]: + """ + Get all historical paths for a file using git log --follow. + Only checks the past 6 months to avoid very old/irrelevant paths. + Returns a set of unique file paths this file has ever had. + """ + try: + # Use --follow to track renames, --name-only to get just paths + # origin/master to check only published history (not dev branches) + # --since="6 months ago" to limit scope to recent history + # -M30% to detect renames even when content changed significantly (e.g., revamps) + result = subprocess.run( + ['git', 'log', '--follow', '-M30%', 'origin/master', '--since=6 months ago', '--name-only', '--format=', '--', str(current_path.relative_to(repo_root))], + cwd=repo_root, + capture_output=True, + text=True, + check=True + ) + + # Extract unique paths, filter out empty lines + paths = set(line.strip() for line in result.stdout.split('\n') if line.strip()) + + # Filter to only content/docs paths + docs_paths = {p for p in paths if p.startswith('content/docs/')} + + return docs_paths + + except subprocess.CalledProcessError as e: + print(f"Warning: Could not get history for {current_path}: {e}", file=sys.stderr) + return set() + +def load_s3_redirects(repo_root: Path) -> Dict[str, str]: + """Load all S3 redirect mappings from scripts/redirects/*.txt files. + + Returns a dict mapping source URLs to destination URLs. + Format: {'/docs/esc-cli/': '/docs/esc/cli/'} + """ + redirects = {} + redirects_dir = repo_root / 'scripts' / 'redirects' + + if not redirects_dir.exists(): + return redirects + + for redirect_file in redirects_dir.glob('*.txt'): + try: + with open(redirect_file, 'r', encoding='utf-8') as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if not line or line.startswith('#'): + continue + + # Format: source|destination + if '|' not in line: + print(f"Warning: Invalid redirect format in {redirect_file}:{line_num}", file=sys.stderr) + continue + + source, destination = line.split('|', 1) + source = source.strip() + destination = destination.strip() + + # Convert source to URL format + # Remove /index.html suffix if present + source_url = '/' + source.replace('/index.html', '/') + + redirects[source_url] = destination + + except Exception as e: + print(f"Warning: Could not read redirect file {redirect_file}: {e}", file=sys.stderr) + + return redirects + +def extract_aliases(filepath: Path) -> Set[str]: + """Extract aliases set from frontmatter.""" + aliases = set() + in_frontmatter = False + in_aliases = False + frontmatter_count = 0 + + try: + with open(filepath, 'r', encoding='utf-8') as f: + for line in f: + line = line.rstrip() + + # Track frontmatter boundaries + if line == '---': + frontmatter_count += 1 + if frontmatter_count == 1: + in_frontmatter = True + elif frontmatter_count == 2: + break + continue + + if not in_frontmatter: + continue + + # Check for aliases field + if line.startswith('aliases:'): + in_aliases = True + continue + + # End of aliases section + if in_aliases and line and not line.startswith((' ', '\t', '-')): + in_aliases = False + + # Extract alias entries + if in_aliases and line.strip().startswith('-'): + alias = line.strip()[1:].strip() + aliases.add(alias) + + except Exception as e: + print(f"Warning: Could not read {filepath}: {e}", file=sys.stderr) + + return aliases + +def analyze_file(repo_root: Path, current_file: Path, s3_redirects: Dict[str, str]) -> Tuple[Set[str], Set[str], Set[str], Set[str]]: + """ + Analyze a single file for historical aliases. + Returns (all_historical_urls, current_aliases, missing_aliases, s3_covered_urls) + """ + # Get all historical paths + historical_paths = get_file_history(repo_root, current_file) + + # Convert to URLs + all_historical_urls = {path_to_url(path) for path in historical_paths} + + # Get current URL (should be in the set) + current_url = path_to_url(str(current_file.relative_to(repo_root))) + + # Remove current URL from historical URLs (we don't need to alias to ourselves) + all_historical_urls.discard(current_url) + + # Get current aliases from frontmatter + current_aliases = extract_aliases(current_file) + + # Check which historical URLs are covered by S3 redirects + s3_covered_urls = {url for url in all_historical_urls if url in s3_redirects} + + # Find missing aliases (not in frontmatter AND not in S3 redirects) + missing_aliases = all_historical_urls - current_aliases - s3_covered_urls + + return all_historical_urls, current_aliases, missing_aliases, s3_covered_urls + +def main(): + script_dir = Path(__file__).parent + repo_root = script_dir.parent.parent + content_dir = repo_root / 'content' / 'docs' + + if not content_dir.exists(): + print(f"ERROR: Content directory not found: {content_dir}", file=sys.stderr) + sys.exit(1) + + # Output files + correct_log = script_dir / 'historical-aliases-correct.txt' + missing_log = script_dir / 'historical-aliases-missing.txt' + report_log = script_dir / 'historical-aliases-report.txt' + + correct_log.write_text('') + missing_log.write_text('') + report_log.write_text('') + + # Counters + total_files = 0 + files_with_history = 0 + files_correct = 0 + files_missing = 0 + total_missing_aliases = 0 + + # Load S3 redirects + print("Loading S3 redirect mappings from scripts/redirects/...") + s3_redirects = load_s3_redirects(repo_root) + print(f"Loaded {len(s3_redirects)} S3 redirect mappings\n") + + print("Scanning all content/docs/ files for complete git history...") + print("This may take a few minutes...\n") + + # Get all markdown files + all_md_files = sorted(content_dir.rglob('*.md')) + + with open(report_log, 'a') as report: + report.write("=== COMPREHENSIVE HISTORICAL ALIAS VERIFICATION ===\n") + report.write(f"Checking {len(all_md_files)} markdown files\n") + report.write(f"Git history scope: Past 6 months\n") + report.write(f"S3 redirects loaded: {len(s3_redirects)}\n\n") + + for md_file in all_md_files: + total_files += 1 + + # Progress indicator + if total_files % 50 == 0: + print(f"Processed {total_files}/{len(all_md_files)} files...", file=sys.stderr) + + # Analyze this file + historical_urls, current_aliases, missing_aliases, s3_covered = analyze_file(repo_root, md_file, s3_redirects) + + # Skip files with no history (only ever existed at current path) + if not historical_urls: + continue + + files_with_history += 1 + rel_path = md_file.relative_to(repo_root) + + if missing_aliases: + files_missing += 1 + total_missing_aliases += len(missing_aliases) + + with open(missing_log, 'a') as log: + log.write(f"❌ MISSING ALIASES: {rel_path}\n") + log.write(f" Historical paths found: {len(historical_urls)}\n") + log.write(f" Current aliases: {len(current_aliases)}\n") + log.write(f" S3 redirects: {len(s3_covered)}\n") + log.write(f" Missing aliases ({len(missing_aliases)}):\n") + for alias in sorted(missing_aliases): + log.write(f" - {alias}\n") + log.write("\n") + + with open(report_log, 'a') as report: + report.write(f"\n{'='*80}\n") + report.write(f"FILE: {rel_path}\n") + report.write(f"{'='*80}\n\n") + report.write(f"HISTORICAL URLS ({len(historical_urls)}):\n") + for url in sorted(historical_urls): + if url in current_aliases: + status = "✓ ALIAS" + elif url in s3_covered: + status = "✓ S3" + else: + status = "❌ MISSING" + report.write(f" {status:12} {url}\n") + report.write(f"\nCURRENT ALIASES ({len(current_aliases)}):\n") + for alias in sorted(current_aliases): + report.write(f" - {alias}\n") + if s3_covered: + report.write(f"\nS3 REDIRECTS ({len(s3_covered)}):\n") + for url in sorted(s3_covered): + report.write(f" → {url}\n") + report.write(f"\nMISSING ALIASES ({len(missing_aliases)}):\n") + for alias in sorted(missing_aliases): + report.write(f" ❌ {alias}\n") + report.write("\n") + else: + files_correct += 1 + + with open(correct_log, 'a') as log: + log.write(f"✓ {rel_path}\n") + log.write(f" Historical paths: {len(historical_urls)}, Aliases: {len(current_aliases)}, S3: {len(s3_covered)}\n") + + # Print summary + print("\n" + "="*80) + print("=== VERIFICATION SUMMARY ===") + print("="*80) + print(f"Total markdown files scanned: {total_files}") + print(f"Files with historical moves: {files_with_history}") + print(f"Files with complete aliases: ✓ {files_correct}") + print(f"Files with missing aliases: ❌ {files_missing}") + print(f"Total missing aliases: ❌ {total_missing_aliases}") + print() + + if files_missing == 0: + print(f"🎉 ALL HISTORICAL ALIASES VERIFIED!") + print(f" All {files_with_history} files with history have complete alias coverage.") + sys.exit(0) + else: + print("❌ MISSING ALIASES FOUND!") + print() + print(f"📄 Detailed reports generated:") + print(f" ✓ Files with complete aliases: {correct_log}") + print(f" ❌ Files missing aliases: {missing_log}") + print(f" 📊 Full analysis report: {report_log}") + print() + print("💡 Next steps:") + print(" 1. Review missing-aliases.txt") + print(" 2. Run generate-historical-fixes.py to create fix commands") + print(" 3. Run apply-historical-fixes.py to apply the fixes") + print(" 4. Re-run this script to verify") + sys.exit(1) + +if __name__ == '__main__': + main()