From 0850522ce877c5931a266dc535fb032880950e1d Mon Sep 17 00:00:00 2001 From: Thomas Bartlett <67928676+thomas-bartlett@users.noreply.github.com> Date: Mon, 17 Nov 2025 13:41:58 -0500 Subject: [PATCH 1/4] Added optional tags field and filtering support --- sources/core/codeguard-0-api-web-services.md | 4 ++ .../core/codeguard-0-authentication-mfa.md | 3 + src/convert_to_ide_formats.py | 60 ++++++++++++++++--- src/converter.py | 14 ++++- src/formats/base.py | 2 + src/utils.py | 35 +++++++++++ src/validate_unified_rules.py | 9 ++- 7 files changed, 117 insertions(+), 10 deletions(-) diff --git a/sources/core/codeguard-0-api-web-services.md b/sources/core/codeguard-0-api-web-services.md index 94a51fc..0539f56 100644 --- a/sources/core/codeguard-0-api-web-services.md +++ b/sources/core/codeguard-0-api-web-services.md @@ -12,6 +12,10 @@ languages: - typescript - xml - yaml +tags: +- api +- web-security +- microservices alwaysApply: false --- diff --git a/sources/core/codeguard-0-authentication-mfa.md b/sources/core/codeguard-0-authentication-mfa.md index 2be26cc..ff8cd20 100644 --- a/sources/core/codeguard-0-authentication-mfa.md +++ b/sources/core/codeguard-0-authentication-mfa.md @@ -13,6 +13,9 @@ languages: - ruby - swift - typescript +tags: +- authentication +- web-security alwaysApply: false --- diff --git a/src/convert_to_ide_formats.py b/src/convert_to_ide_formats.py index 1d4f5fb..120a455 100644 --- a/src/convert_to_ide_formats.py +++ b/src/convert_to_ide_formats.py @@ -36,6 +36,23 @@ def sync_plugin_metadata(version: str) -> None: print(f"✅ Synced plugin metadata to {version}") +def matches_tag_filter(rule_tags: list[str], filter_tags: list[str]) -> bool: + """ + Check if rule has all required tags (case-insensitive AND logic). + + Args: + rule_tags: List of tags from the rule (already lowercase from parsing) + filter_tags: List of tags to filter by + + Returns: + True if rule has all filter tags (or no filter), False otherwise + """ + if not filter_tags: + return True # No filter means all pass + + return all(tag.lower() in rule_tags for tag in filter_tags) + + def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) -> None: """ Update SKILL.md with language-to-rules mapping table. @@ -81,7 +98,7 @@ def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) -> print(f"Updated SKILL.md with language mappings") -def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None) -> dict[str, list[str]]: +def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None, filter_tags: list[str] = None) -> dict[str, list[str]]: """ Convert rule file(s) to all supported IDE formats using RuleConverter. @@ -90,6 +107,7 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: output_dir: Output directory (default: 'dist/') include_claudecode: Whether to generate Claude Code plugin (default: True, only for core rules) version: Version string to use (default: read from pyproject.toml) + filter_tags: Optional list of tags to filter by (AND logic, case-insensitive) Returns: Dictionary with 'success' and 'errors' lists: @@ -138,7 +156,7 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: # Setup output directory output_base = Path(output_dir) - results = {"success": [], "errors": []} + results = {"success": [], "errors": [], "skipped": []} language_to_rules = defaultdict(list) # Process each file @@ -146,6 +164,11 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: try: # Convert the file (raises exceptions on error) result = converter.convert(md_file) + + # Apply tag filter if specified + if filter_tags and not matches_tag_filter(result.tags, filter_tags): + results["skipped"].append(result.filename) + continue # Write each format output_files = [] @@ -192,9 +215,14 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: results["errors"].append(error_msg) # Summary - print( - f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors" - ) + if filter_tags: + print( + f"\nResults: {len(results['success'])} success, {len(results['skipped'])} skipped (tag filter), {len(results['errors'])} errors" + ) + else: + print( + f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors" + ) # Generate SKILL.md with language mappings (only if Claude Code is included) if include_claudecode and language_to_rules: @@ -256,6 +284,12 @@ def _resolve_source_paths(args) -> list[Path]: default="dist", help="Output directory for generated bundles (default: dist).", ) + parser.add_argument( + "--tag", + "--tags", + dest="tags", + help="Filter rules by tags (comma-separated, case-insensitive, AND logic). Example: --tag api,web-security", + ) cli_args = parser.parse_args() source_paths = _resolve_source_paths(cli_args) @@ -316,7 +350,16 @@ def _resolve_source_paths(args) -> list[Path]: print() # Convert all sources - aggregated = {"success": [], "errors": []} + aggregated = {"success": [], "errors": [], "skipped": []} + # Parse comma-separated tags + filter_tags = None + if cli_args.tags: + filter_tags = [tag.strip() for tag in cli_args.tags.split(",") if tag.strip()] + + # Print tag filter info if active + if filter_tags: + print(f"Tag filter active: {', '.join(filter_tags)} (AND logic - rules must have all tags)\n") + for source_path in source_paths: is_core = source_path == Path("sources/core") @@ -325,11 +368,14 @@ def _resolve_source_paths(args) -> list[Path]: str(source_path), cli_args.output_dir, include_claudecode=is_core, - version=version + version=version, + filter_tags=filter_tags ) aggregated["success"].extend(results["success"]) aggregated["errors"].extend(results["errors"]) + if "skipped" in results: + aggregated["skipped"].extend(results["skipped"]) print("") if aggregated["errors"]: diff --git a/src/converter.py b/src/converter.py index 39f4fce..4a71a94 100644 --- a/src/converter.py +++ b/src/converter.py @@ -12,7 +12,7 @@ from pathlib import Path from language_mappings import languages_to_globs -from utils import parse_frontmatter_and_content +from utils import parse_frontmatter_and_content, validate_tags from formats import ( BaseFormat, ProcessedRule, @@ -45,6 +45,7 @@ class ConversionResult: basename: Filename without extension (e.g., 'my-rule') outputs: Dictionary mapping format names to their outputs languages: List of programming languages the rule applies to, empty list if always applies + tags: List of tags for categorizing and filtering rules Example: result = ConversionResult( filename="my-rule.md", @@ -56,7 +57,8 @@ class ConversionResult: subpath=".cursor/rules" ) }, - languages=["python", "javascript"] + languages=["python", "javascript"], + tags=["authentication", "web-security"] ) """ @@ -64,6 +66,7 @@ class ConversionResult: basename: str outputs: dict[str, FormatOutput] languages: list[str] + tags: list[str] class RuleConverter: @@ -159,6 +162,11 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule: f"'languages' must be a non-empty list in {filename} when alwaysApply is false" ) + # Parse and validate tags (optional field) + tags = [] + if "tags" in frontmatter: + tags = validate_tags(frontmatter["tags"], filename) + # Adding rule_id to the beginning of the content rule_id = Path(filename).stem markdown_content = f"rule_id: {rule_id}\n\n{markdown_content}" @@ -169,6 +177,7 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule: always_apply=always_apply, content=markdown_content, filename=filename, + tags=tags, ) def generate_globs(self, languages: list[str]) -> str: @@ -242,4 +251,5 @@ def convert(self, filepath: str) -> ConversionResult: basename=basename, outputs=outputs, languages=rule.languages, + tags=rule.tags, ) diff --git a/src/formats/base.py b/src/formats/base.py index 5af8732..65c75e5 100644 --- a/src/formats/base.py +++ b/src/formats/base.py @@ -25,6 +25,7 @@ class ProcessedRule: always_apply: Whether this rule should apply to all files content: The actual rule content in markdown format filename: Original filename of the rule + tags: List of tags for categorizing and filtering rules """ description: str @@ -32,6 +33,7 @@ class ProcessedRule: always_apply: bool content: str filename: str + tags: list[str] class BaseFormat(ABC): diff --git a/src/utils.py b/src/utils.py index fb0fed6..cc64646 100644 --- a/src/utils.py +++ b/src/utils.py @@ -57,6 +57,41 @@ def parse_frontmatter_and_content(content: str) -> tuple[dict | None, str]: return frontmatter, markdown_content.strip() +def validate_tags(tags, filename=None) -> list[str]: + """ + Validate tags list and return normalized (lowercase) tags. + + Args: + tags: The tags value to validate (should be a list) + filename: Optional filename for better error messages + + Returns: + List of normalized (lowercase) tags + + Raises: + ValueError: If tags are invalid (wrong type, contain whitespace, empty, etc.) + """ + context = f" in {filename}" if filename else "" + + if not isinstance(tags, list): + raise ValueError(f"'tags' must be a list{context}") + + normalized = [] + for tag in tags: + if not isinstance(tag, str): + raise ValueError(f"All tags must be strings{context}, found: {type(tag).__name__}") + + if any(c.isspace() for c in tag): + raise ValueError(f"Tags cannot contain whitespace: '{tag}'{context}") + + if not tag: + raise ValueError(f"Empty tag found{context}") + + normalized.append(tag.lower()) + + return normalized + + def get_version_from_pyproject() -> str: """ Read version from pyproject.toml using Python's built-in TOML parser. diff --git a/src/validate_unified_rules.py b/src/validate_unified_rules.py index bd509bc..a30e56c 100755 --- a/src/validate_unified_rules.py +++ b/src/validate_unified_rules.py @@ -12,7 +12,7 @@ from pathlib import Path from language_mappings import LANGUAGE_TO_EXTENSIONS -from utils import parse_frontmatter_and_content +from utils import parse_frontmatter_and_content, validate_tags def validate_rule(file_path: Path) -> dict[str, list[str]]: @@ -54,6 +54,13 @@ def validate_rule(file_path: Path) -> dict[str, list[str]]: if unknown: warnings.append(f"Unknown languages: {', '.join(unknown)}") + # Validate tags if present + if "tags" in frontmatter: + try: + validate_tags(frontmatter["tags"], file_path.name) + except ValueError as e: + errors.append(str(e)) + # Check content exists if not markdown_content.strip(): errors.append("Rule content cannot be empty") From 4f489ba2c6c340f027cbcf8a0c5a355ddd4ff19e Mon Sep 17 00:00:00 2001 From: Thomas Bartlett <67928676+thomas-bartlett@users.noreply.github.com> Date: Tue, 18 Nov 2025 09:30:13 -0500 Subject: [PATCH 2/4] Improved tag validation and normalization --- src/convert_to_ide_formats.py | 12 ++++++------ src/utils.py | 13 ++++++++++--- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/convert_to_ide_formats.py b/src/convert_to_ide_formats.py index 120a455..3c3cab3 100644 --- a/src/convert_to_ide_formats.py +++ b/src/convert_to_ide_formats.py @@ -38,11 +38,11 @@ def sync_plugin_metadata(version: str) -> None: def matches_tag_filter(rule_tags: list[str], filter_tags: list[str]) -> bool: """ - Check if rule has all required tags (case-insensitive AND logic). + Check if rule has all required tags (AND logic). Args: - rule_tags: List of tags from the rule (already lowercase from parsing) - filter_tags: List of tags to filter by + rule_tags: List of tags from the rule (already normalized to lowercase) + filter_tags: List of tags to filter by (already normalized to lowercase) Returns: True if rule has all filter tags (or no filter), False otherwise @@ -50,7 +50,7 @@ def matches_tag_filter(rule_tags: list[str], filter_tags: list[str]) -> bool: if not filter_tags: return True # No filter means all pass - return all(tag.lower() in rule_tags for tag in filter_tags) + return all(tag in rule_tags for tag in filter_tags) def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) -> None: @@ -351,10 +351,10 @@ def _resolve_source_paths(args) -> list[Path]: # Convert all sources aggregated = {"success": [], "errors": [], "skipped": []} - # Parse comma-separated tags + # Parse comma-separated tags and normalize to lowercase filter_tags = None if cli_args.tags: - filter_tags = [tag.strip() for tag in cli_args.tags.split(",") if tag.strip()] + filter_tags = [tag.strip().lower() for tag in cli_args.tags.split(",") if tag.strip()] # Print tag filter info if active if filter_tags: diff --git a/src/utils.py b/src/utils.py index cc64646..b37d2e9 100644 --- a/src/utils.py +++ b/src/utils.py @@ -62,20 +62,27 @@ def validate_tags(tags, filename=None) -> list[str]: Validate tags list and return normalized (lowercase) tags. Args: - tags: The tags value to validate (should be a list) + tags: The tags value to validate (should be a non-empty list) filename: Optional filename for better error messages Returns: List of normalized (lowercase) tags Raises: - ValueError: If tags are invalid (wrong type, contain whitespace, empty, etc.) + ValueError: If tags are invalid (wrong type, empty list, contain whitespace, etc.) + + Note: + An empty tags list (tags: []) is considered invalid. If you have no tags, + omit the 'tags' field entirely from the frontmatter. """ context = f" in {filename}" if filename else "" if not isinstance(tags, list): raise ValueError(f"'tags' must be a list{context}") + if not tags: + raise ValueError(f"'tags' list cannot be empty{context}. Omit the field if you have no tags.") + normalized = [] for tag in tags: if not isinstance(tag, str): @@ -89,7 +96,7 @@ def validate_tags(tags, filename=None) -> list[str]: normalized.append(tag.lower()) - return normalized + return list(set(normalized)) def get_version_from_pyproject() -> str: From 09839b8bcfeda3fa01eaf432a5264865914387b6 Mon Sep 17 00:00:00 2001 From: Thomas Bartlett <67928676+thomas-bartlett@users.noreply.github.com> Date: Tue, 18 Nov 2025 11:58:59 -0500 Subject: [PATCH 3/4] Added tag system for rule categorization and filtering. --- .../codeguard-0-additional-cryptography.md | 3 +++ sources/core/codeguard-0-api-web-services.md | 4 +--- .../core/codeguard-0-authentication-mfa.md | 2 +- .../codeguard-0-client-side-web-security.md | 2 ++ ...eguard-0-cloud-orchestration-kubernetes.md | 2 ++ sources/core/codeguard-0-data-storage.md | 3 +++ .../codeguard-0-devops-ci-cd-containers.md | 2 ++ sources/core/codeguard-0-iac-security.md | 2 ++ .../codeguard-0-input-validation-injection.md | 2 ++ sources/core/codeguard-0-logging.md | 2 ++ .../codeguard-0-privacy-data-protection.md | 2 ++ ...eguard-0-session-management-and-cookies.md | 3 +++ .../core/codeguard-1-digital-certificates.md | 2 ++ .../core/codeguard-1-hardcoded-credentials.md | 2 ++ src/tag_mappings.py | 21 +++++++++++++++++++ src/validate_unified_rules.py | 7 ++++++- 16 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 src/tag_mappings.py diff --git a/sources/core/codeguard-0-additional-cryptography.md b/sources/core/codeguard-0-additional-cryptography.md index 6bcb4fd..db5edae 100644 --- a/sources/core/codeguard-0-additional-cryptography.md +++ b/sources/core/codeguard-0-additional-cryptography.md @@ -14,6 +14,9 @@ languages: - typescript - xml - yaml +tags: +- data-security +- secrets alwaysApply: false --- diff --git a/sources/core/codeguard-0-api-web-services.md b/sources/core/codeguard-0-api-web-services.md index 0539f56..2c78e45 100644 --- a/sources/core/codeguard-0-api-web-services.md +++ b/sources/core/codeguard-0-api-web-services.md @@ -13,9 +13,7 @@ languages: - xml - yaml tags: -- api -- web-security -- microservices +- web alwaysApply: false --- diff --git a/sources/core/codeguard-0-authentication-mfa.md b/sources/core/codeguard-0-authentication-mfa.md index ff8cd20..580af4e 100644 --- a/sources/core/codeguard-0-authentication-mfa.md +++ b/sources/core/codeguard-0-authentication-mfa.md @@ -15,7 +15,7 @@ languages: - typescript tags: - authentication -- web-security +- web alwaysApply: false --- diff --git a/sources/core/codeguard-0-client-side-web-security.md b/sources/core/codeguard-0-client-side-web-security.md index 17a4e89..c2b0c68 100644 --- a/sources/core/codeguard-0-client-side-web-security.md +++ b/sources/core/codeguard-0-client-side-web-security.md @@ -8,6 +8,8 @@ languages: - php - typescript - vlang +tags: +- web alwaysApply: false --- diff --git a/sources/core/codeguard-0-cloud-orchestration-kubernetes.md b/sources/core/codeguard-0-cloud-orchestration-kubernetes.md index ec2e982..828edd9 100644 --- a/sources/core/codeguard-0-cloud-orchestration-kubernetes.md +++ b/sources/core/codeguard-0-cloud-orchestration-kubernetes.md @@ -4,6 +4,8 @@ description: Kubernetes hardening (RBAC, admission policies, network policies, s languages: - javascript - yaml +tags: +- infrastructure alwaysApply: false --- diff --git a/sources/core/codeguard-0-data-storage.md b/sources/core/codeguard-0-data-storage.md index 6bd68f5..e01057b 100644 --- a/sources/core/codeguard-0-data-storage.md +++ b/sources/core/codeguard-0-data-storage.md @@ -6,6 +6,9 @@ languages: - javascript - sql - yaml +tags: +- data-security +- infrastructure alwaysApply: false --- diff --git a/sources/core/codeguard-0-devops-ci-cd-containers.md b/sources/core/codeguard-0-devops-ci-cd-containers.md index 1db3562..52bb26c 100644 --- a/sources/core/codeguard-0-devops-ci-cd-containers.md +++ b/sources/core/codeguard-0-devops-ci-cd-containers.md @@ -8,6 +8,8 @@ languages: - shell - xml - yaml +tags: +- infrastructure alwaysApply: false --- diff --git a/sources/core/codeguard-0-iac-security.md b/sources/core/codeguard-0-iac-security.md index 0785120..17fe6de 100644 --- a/sources/core/codeguard-0-iac-security.md +++ b/sources/core/codeguard-0-iac-security.md @@ -8,6 +8,8 @@ languages: - ruby - shell - yaml +tags: +- infrastructure alwaysApply: false --- diff --git a/sources/core/codeguard-0-input-validation-injection.md b/sources/core/codeguard-0-input-validation-injection.md index 9ae2ab1..fc15368 100644 --- a/sources/core/codeguard-0-input-validation-injection.md +++ b/sources/core/codeguard-0-input-validation-injection.md @@ -14,6 +14,8 @@ languages: - shell - sql - typescript +tags: +- web alwaysApply: false --- diff --git a/sources/core/codeguard-0-logging.md b/sources/core/codeguard-0-logging.md index 659be01..2a354aa 100644 --- a/sources/core/codeguard-0-logging.md +++ b/sources/core/codeguard-0-logging.md @@ -5,6 +5,8 @@ languages: - c - javascript - yaml +tags: +- privacy alwaysApply: false --- diff --git a/sources/core/codeguard-0-privacy-data-protection.md b/sources/core/codeguard-0-privacy-data-protection.md index f28876d..22f522d 100644 --- a/sources/core/codeguard-0-privacy-data-protection.md +++ b/sources/core/codeguard-0-privacy-data-protection.md @@ -5,6 +5,8 @@ languages: - javascript - matlab - yaml +tags: +- privacy alwaysApply: false --- diff --git a/sources/core/codeguard-0-session-management-and-cookies.md b/sources/core/codeguard-0-session-management-and-cookies.md index be73bf8..e0d53e8 100644 --- a/sources/core/codeguard-0-session-management-and-cookies.md +++ b/sources/core/codeguard-0-session-management-and-cookies.md @@ -11,6 +11,9 @@ languages: - python - ruby - typescript +tags: +- authentication +- web alwaysApply: false --- diff --git a/sources/core/codeguard-1-digital-certificates.md b/sources/core/codeguard-1-digital-certificates.md index 3d73c70..c333fa2 100644 --- a/sources/core/codeguard-1-digital-certificates.md +++ b/sources/core/codeguard-1-digital-certificates.md @@ -1,6 +1,8 @@ --- description: Certificate Best Practices languages: [] +tags: +- secrets alwaysApply: true --- diff --git a/sources/core/codeguard-1-hardcoded-credentials.md b/sources/core/codeguard-1-hardcoded-credentials.md index 5f885ec..978d48a 100644 --- a/sources/core/codeguard-1-hardcoded-credentials.md +++ b/sources/core/codeguard-1-hardcoded-credentials.md @@ -1,6 +1,8 @@ --- description: No Hardcoded Credentials languages: [] +tags: +- secrets alwaysApply: true --- diff --git a/src/tag_mappings.py b/src/tag_mappings.py new file mode 100644 index 0000000..304992b --- /dev/null +++ b/src/tag_mappings.py @@ -0,0 +1,21 @@ +# Copyright 2025 Cisco Systems, Inc. and its affiliates +# +# SPDX-License-Identifier: Apache-2.0 + +""" +Tag Mappings + +Centralized list of known tags for categorizing security rules. +""" + +# Known tags used in rules +# Add new tags here as they are introduced in rules +KNOWN_TAGS = { + "authentication", + "data-security", + "infrastructure", + "privacy", + "secrets", + "web", +} + diff --git a/src/validate_unified_rules.py b/src/validate_unified_rules.py index a30e56c..8fd454c 100755 --- a/src/validate_unified_rules.py +++ b/src/validate_unified_rules.py @@ -12,6 +12,7 @@ from pathlib import Path from language_mappings import LANGUAGE_TO_EXTENSIONS +from tag_mappings import KNOWN_TAGS from utils import parse_frontmatter_and_content, validate_tags @@ -57,7 +58,11 @@ def validate_rule(file_path: Path) -> dict[str, list[str]]: # Validate tags if present if "tags" in frontmatter: try: - validate_tags(frontmatter["tags"], file_path.name) + normalized_tags = validate_tags(frontmatter["tags"], file_path.name) + # Error on tags not in known list + unknown_tags = [tag for tag in normalized_tags if tag not in KNOWN_TAGS] + if unknown_tags: + errors.append(f"Unknown tags (add to KNOWN_TAGS): {', '.join(sorted(unknown_tags))}") except ValueError as e: errors.append(str(e)) From 46bac17e8723472a5286a83dd4473930c59192a4 Mon Sep 17 00:00:00 2001 From: Thomas Bartlett <67928676+thomas-bartlett@users.noreply.github.com> Date: Tue, 18 Nov 2025 12:11:20 -0500 Subject: [PATCH 4/4] Preserve tag order when deduplicating in validate_tags --- src/utils.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/utils.py b/src/utils.py index b37d2e9..a360e74 100644 --- a/src/utils.py +++ b/src/utils.py @@ -66,14 +66,17 @@ def validate_tags(tags, filename=None) -> list[str]: filename: Optional filename for better error messages Returns: - List of normalized (lowercase) tags + List of normalized (lowercase) tags with duplicates removed. + Original order is preserved. Raises: ValueError: If tags are invalid (wrong type, empty list, contain whitespace, etc.) Note: - An empty tags list (tags: []) is considered invalid. If you have no tags, - omit the 'tags' field entirely from the frontmatter. + - An empty tags list (tags: []) is considered invalid. If you have no tags, + omit the 'tags' field entirely from the frontmatter. + - Duplicate tags (after normalization) are automatically removed while + preserving the order of first occurrence. """ context = f" in {filename}" if filename else "" @@ -96,7 +99,7 @@ def validate_tags(tags, filename=None) -> list[str]: normalized.append(tag.lower()) - return list(set(normalized)) + return list(dict.fromkeys(normalized)) def get_version_from_pyproject() -> str: