project-codeguard · thomas-bartlett · Nov 18, 2025 · Nov 17, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/sources/core/codeguard-0-additional-cryptography.md b/sources/core/codeguard-0-additional-cryptography.md
@@ -14,6 +14,9 @@ languages:
 - typescript
 - xml
 - yaml
+tags:
+- data-security
+- secrets
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-0-api-web-services.md b/sources/core/codeguard-0-api-web-services.md
@@ -12,6 +12,8 @@ languages:
 - typescript
 - xml
 - yaml
+tags:
+- web
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-0-authentication-mfa.md b/sources/core/codeguard-0-authentication-mfa.md
@@ -13,6 +13,9 @@ languages:
 - ruby
 - swift
 - typescript
+tags:
+- authentication
+- web
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-0-client-side-web-security.md b/sources/core/codeguard-0-client-side-web-security.md
@@ -8,6 +8,8 @@ languages:
 - php
 - typescript
 - vlang
+tags:
+- web
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-0-cloud-orchestration-kubernetes.md b/sources/core/codeguard-0-cloud-orchestration-kubernetes.md
@@ -4,6 +4,8 @@ description: Kubernetes hardening (RBAC, admission policies, network policies, s
 languages:
 - javascript
 - yaml
+tags:
+- infrastructure
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-0-data-storage.md b/sources/core/codeguard-0-data-storage.md
@@ -6,6 +6,9 @@ languages:
 - javascript
 - sql
 - yaml
+tags:
+- data-security
+- infrastructure
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-0-devops-ci-cd-containers.md b/sources/core/codeguard-0-devops-ci-cd-containers.md
@@ -8,6 +8,8 @@ languages:
 - shell
 - xml
 - yaml
+tags:
+- infrastructure
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-0-iac-security.md b/sources/core/codeguard-0-iac-security.md
@@ -8,6 +8,8 @@ languages:
 - ruby
 - shell
 - yaml
+tags:
+- infrastructure
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-0-input-validation-injection.md b/sources/core/codeguard-0-input-validation-injection.md
@@ -14,6 +14,8 @@ languages:
 - shell
 - sql
 - typescript
+tags:
+- web
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-0-logging.md b/sources/core/codeguard-0-logging.md
@@ -5,6 +5,8 @@ languages:
 - c
 - javascript
 - yaml
+tags:
+- privacy
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-0-privacy-data-protection.md b/sources/core/codeguard-0-privacy-data-protection.md
@@ -5,6 +5,8 @@ languages:
 - javascript
 - matlab
 - yaml
+tags:
+- privacy
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-0-session-management-and-cookies.md b/sources/core/codeguard-0-session-management-and-cookies.md
@@ -11,6 +11,9 @@ languages:
 - python
 - ruby
 - typescript
+tags:
+- authentication
+- web
 alwaysApply: false
 ---
 

diff --git a/sources/core/codeguard-1-digital-certificates.md b/sources/core/codeguard-1-digital-certificates.md
@@ -1,6 +1,8 @@
 ---
 description: Certificate Best Practices
 languages: []
+tags:
+- secrets
 alwaysApply: true
 ---
 

diff --git a/sources/core/codeguard-1-hardcoded-credentials.md b/sources/core/codeguard-1-hardcoded-credentials.md
@@ -1,6 +1,8 @@
 ---
 description: No Hardcoded Credentials
 languages: []
+tags:
+- secrets
 alwaysApply: true
 ---
 

diff --git a/src/convert_to_ide_formats.py b/src/convert_to_ide_formats.py
@@ -36,6 +36,23 @@ def sync_plugin_metadata(version: str) -> None:
     print(f"✅ Synced plugin metadata to {version}")
 
 
+def matches_tag_filter(rule_tags: list[str], filter_tags: list[str]) -> bool:
+    """
+    Check if rule has all required tags (AND logic).
+
+    Args:
+        rule_tags: List of tags from the rule (already normalized to lowercase)
+        filter_tags: List of tags to filter by (already normalized to lowercase)
+
+    Returns:
+        True if rule has all filter tags (or no filter), False otherwise
+    """
+    if not filter_tags:
+        return True  # No filter means all pass
+
+    return all(tag in rule_tags for tag in filter_tags)
+
+
 def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) -> None:
     """
     Update SKILL.md with language-to-rules mapping table.
@@ -81,7 +98,7 @@ def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) ->
     print(f"Updated SKILL.md with language mappings")
 
 
-def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None) -> dict[str, list[str]]:
+def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None, filter_tags: list[str] = None) -> dict[str, list[str]]:
     """
     Convert rule file(s) to all supported IDE formats using RuleConverter.
 
@@ -90,6 +107,7 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
         output_dir: Output directory (default: 'dist/')
         include_claudecode: Whether to generate Claude Code plugin (default: True, only for core rules)
         version: Version string to use (default: read from pyproject.toml)
+        filter_tags: Optional list of tags to filter by (AND logic, case-insensitive)
 
     Returns:
         Dictionary with 'success' and 'errors' lists:
@@ -138,14 +156,19 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
     # Setup output directory
     output_base = Path(output_dir)
 
-    results = {"success": [], "errors": []}
+    results = {"success": [], "errors": [], "skipped": []}
     language_to_rules = defaultdict(list)
 
     # Process each file
     for md_file in md_files:
         try:
             # Convert the file (raises exceptions on error)
             result = converter.convert(md_file)
+
+            # Apply tag filter if specified
+            if filter_tags and not matches_tag_filter(result.tags, filter_tags):
+                results["skipped"].append(result.filename)
+                continue
 
             # Write each format
             output_files = []
@@ -192,9 +215,14 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
             results["errors"].append(error_msg)
 
     # Summary
-    print(
-        f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors"
-    )
+    if filter_tags:
+        print(
+            f"\nResults: {len(results['success'])} success, {len(results['skipped'])} skipped (tag filter), {len(results['errors'])} errors"
+        )
+    else:
+        print(
+            f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors"
+        )
 
     # Generate SKILL.md with language mappings (only if Claude Code is included)
     if include_claudecode and language_to_rules:
@@ -256,6 +284,12 @@ def _resolve_source_paths(args) -> list[Path]:
         default="dist",
         help="Output directory for generated bundles (default: dist).",
     )
+    parser.add_argument(
+        "--tag",
+        "--tags",
+        dest="tags",
+        help="Filter rules by tags (comma-separated, case-insensitive, AND logic). Example: --tag api,web-security",
+    )
 
     cli_args = parser.parse_args()
     source_paths = _resolve_source_paths(cli_args)
@@ -316,7 +350,16 @@ def _resolve_source_paths(args) -> list[Path]:
         print()
 
     # Convert all sources
-    aggregated = {"success": [], "errors": []}
+    aggregated = {"success": [], "errors": [], "skipped": []}
+    # Parse comma-separated tags and normalize to lowercase
+    filter_tags = None
+    if cli_args.tags:
+        filter_tags = [tag.strip().lower() for tag in cli_args.tags.split(",") if tag.strip()]
+
+    # Print tag filter info if active
+    if filter_tags:
+        print(f"Tag filter active: {', '.join(filter_tags)} (AND logic - rules must have all tags)\n")
+
     for source_path in source_paths:
         is_core = source_path == Path("sources/core")
 
@@ -325,11 +368,14 @@ def _resolve_source_paths(args) -> list[Path]:
             str(source_path), 
             cli_args.output_dir, 
             include_claudecode=is_core,
-            version=version
+            version=version,
+            filter_tags=filter_tags
         )
 
         aggregated["success"].extend(results["success"])
         aggregated["errors"].extend(results["errors"])
+        if "skipped" in results:
+            aggregated["skipped"].extend(results["skipped"])
         print("")
 
     if aggregated["errors"]:

diff --git a/src/converter.py b/src/converter.py
@@ -12,7 +12,7 @@
 from pathlib import Path
 
 from language_mappings import languages_to_globs
-from utils import parse_frontmatter_and_content
+from utils import parse_frontmatter_and_content, validate_tags
 from formats import (
     BaseFormat,
     ProcessedRule,
@@ -45,6 +45,7 @@ class ConversionResult:
         basename: Filename without extension (e.g., 'my-rule')
         outputs: Dictionary mapping format names to their outputs
         languages: List of programming languages the rule applies to, empty list if always applies
+        tags: List of tags for categorizing and filtering rules
     Example:
         result = ConversionResult(
             filename="my-rule.md",
@@ -56,14 +57,16 @@ class ConversionResult:
                     subpath=".cursor/rules"
                 )
             },
-            languages=["python", "javascript"]
+            languages=["python", "javascript"],
+            tags=["authentication", "web-security"]
         )
     """
 
     filename: str
     basename: str
     outputs: dict[str, FormatOutput]
     languages: list[str]
+    tags: list[str]
 
 
 class RuleConverter:
@@ -159,6 +162,11 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule:
                     f"'languages' must be a non-empty list in {filename} when alwaysApply is false"
                 )
 
+        # Parse and validate tags (optional field)
+        tags = []
+        if "tags" in frontmatter:
+            tags = validate_tags(frontmatter["tags"], filename)
+
         # Adding rule_id to the beginning of the content
         rule_id = Path(filename).stem
         markdown_content = f"rule_id: {rule_id}\n\n{markdown_content}"
@@ -169,6 +177,7 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule:
             always_apply=always_apply,
             content=markdown_content,
             filename=filename,
+            tags=tags,
         )
 
     def generate_globs(self, languages: list[str]) -> str:
@@ -242,4 +251,5 @@ def convert(self, filepath: str) -> ConversionResult:
             basename=basename,
             outputs=outputs,
             languages=rule.languages,
+            tags=rule.tags,
         )
diff --git a/src/formats/base.py b/src/formats/base.py
@@ -25,13 +25,15 @@ class ProcessedRule:
         always_apply: Whether this rule should apply to all files
         content: The actual rule content in markdown format
         filename: Original filename of the rule
+        tags: List of tags for categorizing and filtering rules
     """
 
     description: str
     languages: list[str]
     always_apply: bool
     content: str
     filename: str
+    tags: list[str]
 
 
 class BaseFormat(ABC):

diff --git a/src/tag_mappings.py b/src/tag_mappings.py
@@ -0,0 +1,21 @@
+# Copyright 2025 Cisco Systems, Inc. and its affiliates
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Tag Mappings
+
+Centralized list of known tags for categorizing security rules.
+"""
+
+# Known tags used in rules
+# Add new tags here as they are introduced in rules
+KNOWN_TAGS = {
+    "authentication",
+    "data-security",
+    "infrastructure",
+    "privacy",
+    "secrets",
+    "web",
+}
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,6 +14,9 @@ languages: @@
     - typescript
     - xml
     - yaml
+    tags:
+    - data-security
+    - secrets
     alwaysApply: false
     ---
@@ Expand Down @@