From 0850522ce877c5931a266dc535fb032880950e1d Mon Sep 17 00:00:00 2001
From: Thomas Bartlett <67928676+thomas-bartlett@users.noreply.github.com>
Date: Mon, 17 Nov 2025 13:41:58 -0500
Subject: [PATCH 1/4] Added optional tags field and filtering support

---
 sources/core/codeguard-0-api-web-services.md  |  4 ++
 .../core/codeguard-0-authentication-mfa.md    |  3 +
 src/convert_to_ide_formats.py                 | 60 ++++++++++++++++---
 src/converter.py                              | 14 ++++-
 src/formats/base.py                           |  2 +
 src/utils.py                                  | 35 +++++++++++
 src/validate_unified_rules.py                 |  9 ++-
 7 files changed, 117 insertions(+), 10 deletions(-)

diff --git a/sources/core/codeguard-0-api-web-services.md b/sources/core/codeguard-0-api-web-services.md
index 94a51fc..0539f56 100644
--- a/sources/core/codeguard-0-api-web-services.md
+++ b/sources/core/codeguard-0-api-web-services.md
@@ -12,6 +12,10 @@ languages:
 - typescript
 - xml
 - yaml
+tags:
+- api
+- web-security
+- microservices
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-authentication-mfa.md b/sources/core/codeguard-0-authentication-mfa.md
index 2be26cc..ff8cd20 100644
--- a/sources/core/codeguard-0-authentication-mfa.md
+++ b/sources/core/codeguard-0-authentication-mfa.md
@@ -13,6 +13,9 @@ languages:
 - ruby
 - swift
 - typescript
+tags:
+- authentication
+- web-security
 alwaysApply: false
 ---
 
diff --git a/src/convert_to_ide_formats.py b/src/convert_to_ide_formats.py
index 1d4f5fb..120a455 100644
--- a/src/convert_to_ide_formats.py
+++ b/src/convert_to_ide_formats.py
@@ -36,6 +36,23 @@ def sync_plugin_metadata(version: str) -> None:
     print(f"✅ Synced plugin metadata to {version}")
 
 
+def matches_tag_filter(rule_tags: list[str], filter_tags: list[str]) -> bool:
+    """
+    Check if rule has all required tags (case-insensitive AND logic).
+    
+    Args:
+        rule_tags: List of tags from the rule (already lowercase from parsing)
+        filter_tags: List of tags to filter by
+    
+    Returns:
+        True if rule has all filter tags (or no filter), False otherwise
+    """
+    if not filter_tags:
+        return True  # No filter means all pass
+    
+    return all(tag.lower() in rule_tags for tag in filter_tags)
+
+
 def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) -> None:
     """
     Update SKILL.md with language-to-rules mapping table.
@@ -81,7 +98,7 @@ def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) ->
     print(f"Updated SKILL.md with language mappings")
 
 
-def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None) -> dict[str, list[str]]:
+def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None, filter_tags: list[str] = None) -> dict[str, list[str]]:
     """
     Convert rule file(s) to all supported IDE formats using RuleConverter.
 
@@ -90,6 +107,7 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
         output_dir: Output directory (default: 'dist/')
         include_claudecode: Whether to generate Claude Code plugin (default: True, only for core rules)
         version: Version string to use (default: read from pyproject.toml)
+        filter_tags: Optional list of tags to filter by (AND logic, case-insensitive)
 
     Returns:
         Dictionary with 'success' and 'errors' lists:
@@ -138,7 +156,7 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
     # Setup output directory
     output_base = Path(output_dir)
 
-    results = {"success": [], "errors": []}
+    results = {"success": [], "errors": [], "skipped": []}
     language_to_rules = defaultdict(list)
 
     # Process each file
@@ -146,6 +164,11 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
         try:
             # Convert the file (raises exceptions on error)
             result = converter.convert(md_file)
+            
+            # Apply tag filter if specified
+            if filter_tags and not matches_tag_filter(result.tags, filter_tags):
+                results["skipped"].append(result.filename)
+                continue
 
             # Write each format
             output_files = []
@@ -192,9 +215,14 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
             results["errors"].append(error_msg)
 
     # Summary
-    print(
-        f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors"
-    )
+    if filter_tags:
+        print(
+            f"\nResults: {len(results['success'])} success, {len(results['skipped'])} skipped (tag filter), {len(results['errors'])} errors"
+        )
+    else:
+        print(
+            f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors"
+        )
 
     # Generate SKILL.md with language mappings (only if Claude Code is included)
     if include_claudecode and language_to_rules:
@@ -256,6 +284,12 @@ def _resolve_source_paths(args) -> list[Path]:
         default="dist",
         help="Output directory for generated bundles (default: dist).",
     )
+    parser.add_argument(
+        "--tag",
+        "--tags",
+        dest="tags",
+        help="Filter rules by tags (comma-separated, case-insensitive, AND logic). Example: --tag api,web-security",
+    )
     
     cli_args = parser.parse_args()
     source_paths = _resolve_source_paths(cli_args)
@@ -316,7 +350,16 @@ def _resolve_source_paths(args) -> list[Path]:
         print()
     
     # Convert all sources
-    aggregated = {"success": [], "errors": []}
+    aggregated = {"success": [], "errors": [], "skipped": []}
+    # Parse comma-separated tags
+    filter_tags = None
+    if cli_args.tags:
+        filter_tags = [tag.strip() for tag in cli_args.tags.split(",") if tag.strip()]
+    
+    # Print tag filter info if active
+    if filter_tags:
+        print(f"Tag filter active: {', '.join(filter_tags)} (AND logic - rules must have all tags)\n")
+    
     for source_path in source_paths:
         is_core = source_path == Path("sources/core")
         
@@ -325,11 +368,14 @@ def _resolve_source_paths(args) -> list[Path]:
             str(source_path), 
             cli_args.output_dir, 
             include_claudecode=is_core,
-            version=version
+            version=version,
+            filter_tags=filter_tags
         )
         
         aggregated["success"].extend(results["success"])
         aggregated["errors"].extend(results["errors"])
+        if "skipped" in results:
+            aggregated["skipped"].extend(results["skipped"])
         print("")
     
     if aggregated["errors"]:
diff --git a/src/converter.py b/src/converter.py
index 39f4fce..4a71a94 100644
--- a/src/converter.py
+++ b/src/converter.py
@@ -12,7 +12,7 @@
 from pathlib import Path
 
 from language_mappings import languages_to_globs
-from utils import parse_frontmatter_and_content
+from utils import parse_frontmatter_and_content, validate_tags
 from formats import (
     BaseFormat,
     ProcessedRule,
@@ -45,6 +45,7 @@ class ConversionResult:
         basename: Filename without extension (e.g., 'my-rule')
         outputs: Dictionary mapping format names to their outputs
         languages: List of programming languages the rule applies to, empty list if always applies
+        tags: List of tags for categorizing and filtering rules
     Example:
         result = ConversionResult(
             filename="my-rule.md",
@@ -56,7 +57,8 @@ class ConversionResult:
                     subpath=".cursor/rules"
                 )
             },
-            languages=["python", "javascript"]
+            languages=["python", "javascript"],
+            tags=["authentication", "web-security"]
         )
     """
 
@@ -64,6 +66,7 @@ class ConversionResult:
     basename: str
     outputs: dict[str, FormatOutput]
     languages: list[str]
+    tags: list[str]
 
 
 class RuleConverter:
@@ -159,6 +162,11 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule:
                     f"'languages' must be a non-empty list in {filename} when alwaysApply is false"
                 )
 
+        # Parse and validate tags (optional field)
+        tags = []
+        if "tags" in frontmatter:
+            tags = validate_tags(frontmatter["tags"], filename)
+
         # Adding rule_id to the beginning of the content
         rule_id = Path(filename).stem
         markdown_content = f"rule_id: {rule_id}\n\n{markdown_content}"
@@ -169,6 +177,7 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule:
             always_apply=always_apply,
             content=markdown_content,
             filename=filename,
+            tags=tags,
         )
 
     def generate_globs(self, languages: list[str]) -> str:
@@ -242,4 +251,5 @@ def convert(self, filepath: str) -> ConversionResult:
             basename=basename,
             outputs=outputs,
             languages=rule.languages,
+            tags=rule.tags,
         )
diff --git a/src/formats/base.py b/src/formats/base.py
index 5af8732..65c75e5 100644
--- a/src/formats/base.py
+++ b/src/formats/base.py
@@ -25,6 +25,7 @@ class ProcessedRule:
         always_apply: Whether this rule should apply to all files
         content: The actual rule content in markdown format
         filename: Original filename of the rule
+        tags: List of tags for categorizing and filtering rules
     """
 
     description: str
@@ -32,6 +33,7 @@ class ProcessedRule:
     always_apply: bool
     content: str
     filename: str
+    tags: list[str]
 
 
 class BaseFormat(ABC):
diff --git a/src/utils.py b/src/utils.py
index fb0fed6..cc64646 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -57,6 +57,41 @@ def parse_frontmatter_and_content(content: str) -> tuple[dict | None, str]:
     return frontmatter, markdown_content.strip()
 
 
+def validate_tags(tags, filename=None) -> list[str]:
+    """
+    Validate tags list and return normalized (lowercase) tags.
+    
+    Args:
+        tags: The tags value to validate (should be a list)
+        filename: Optional filename for better error messages
+    
+    Returns:
+        List of normalized (lowercase) tags
+    
+    Raises:
+        ValueError: If tags are invalid (wrong type, contain whitespace, empty, etc.)
+    """
+    context = f" in {filename}" if filename else ""
+    
+    if not isinstance(tags, list):
+        raise ValueError(f"'tags' must be a list{context}")
+    
+    normalized = []
+    for tag in tags:
+        if not isinstance(tag, str):
+            raise ValueError(f"All tags must be strings{context}, found: {type(tag).__name__}")
+        
+        if any(c.isspace() for c in tag):
+            raise ValueError(f"Tags cannot contain whitespace: '{tag}'{context}")
+        
+        if not tag:
+            raise ValueError(f"Empty tag found{context}")
+        
+        normalized.append(tag.lower())
+    
+    return normalized
+
+
 def get_version_from_pyproject() -> str:
     """
     Read version from pyproject.toml using Python's built-in TOML parser.
diff --git a/src/validate_unified_rules.py b/src/validate_unified_rules.py
index bd509bc..a30e56c 100755
--- a/src/validate_unified_rules.py
+++ b/src/validate_unified_rules.py
@@ -12,7 +12,7 @@
 from pathlib import Path
 
 from language_mappings import LANGUAGE_TO_EXTENSIONS
-from utils import parse_frontmatter_and_content
+from utils import parse_frontmatter_and_content, validate_tags
 
 
 def validate_rule(file_path: Path) -> dict[str, list[str]]:
@@ -54,6 +54,13 @@ def validate_rule(file_path: Path) -> dict[str, list[str]]:
             if unknown:
                 warnings.append(f"Unknown languages: {', '.join(unknown)}")
 
+        # Validate tags if present
+        if "tags" in frontmatter:
+            try:
+                validate_tags(frontmatter["tags"], file_path.name)
+            except ValueError as e:
+                errors.append(str(e))
+
         # Check content exists
         if not markdown_content.strip():
             errors.append("Rule content cannot be empty")

From 4f489ba2c6c340f027cbcf8a0c5a355ddd4ff19e Mon Sep 17 00:00:00 2001
From: Thomas Bartlett <67928676+thomas-bartlett@users.noreply.github.com>
Date: Tue, 18 Nov 2025 09:30:13 -0500
Subject: [PATCH 2/4] Improved tag validation and normalization

---
 src/convert_to_ide_formats.py | 12 ++++++------
 src/utils.py                  | 13 ++++++++++---
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/convert_to_ide_formats.py b/src/convert_to_ide_formats.py
index 120a455..3c3cab3 100644
--- a/src/convert_to_ide_formats.py
+++ b/src/convert_to_ide_formats.py
@@ -38,11 +38,11 @@ def sync_plugin_metadata(version: str) -> None:
 
 def matches_tag_filter(rule_tags: list[str], filter_tags: list[str]) -> bool:
     """
-    Check if rule has all required tags (case-insensitive AND logic).
+    Check if rule has all required tags (AND logic).
     
     Args:
-        rule_tags: List of tags from the rule (already lowercase from parsing)
-        filter_tags: List of tags to filter by
+        rule_tags: List of tags from the rule (already normalized to lowercase)
+        filter_tags: List of tags to filter by (already normalized to lowercase)
     
     Returns:
         True if rule has all filter tags (or no filter), False otherwise
@@ -50,7 +50,7 @@ def matches_tag_filter(rule_tags: list[str], filter_tags: list[str]) -> bool:
     if not filter_tags:
         return True  # No filter means all pass
     
-    return all(tag.lower() in rule_tags for tag in filter_tags)
+    return all(tag in rule_tags for tag in filter_tags)
 
 
 def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) -> None:
@@ -351,10 +351,10 @@ def _resolve_source_paths(args) -> list[Path]:
     
     # Convert all sources
     aggregated = {"success": [], "errors": [], "skipped": []}
-    # Parse comma-separated tags
+    # Parse comma-separated tags and normalize to lowercase
     filter_tags = None
     if cli_args.tags:
-        filter_tags = [tag.strip() for tag in cli_args.tags.split(",") if tag.strip()]
+        filter_tags = [tag.strip().lower() for tag in cli_args.tags.split(",") if tag.strip()]
     
     # Print tag filter info if active
     if filter_tags:
diff --git a/src/utils.py b/src/utils.py
index cc64646..b37d2e9 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -62,20 +62,27 @@ def validate_tags(tags, filename=None) -> list[str]:
     Validate tags list and return normalized (lowercase) tags.
     
     Args:
-        tags: The tags value to validate (should be a list)
+        tags: The tags value to validate (should be a non-empty list)
         filename: Optional filename for better error messages
     
     Returns:
         List of normalized (lowercase) tags
     
     Raises:
-        ValueError: If tags are invalid (wrong type, contain whitespace, empty, etc.)
+        ValueError: If tags are invalid (wrong type, empty list, contain whitespace, etc.)
+    
+    Note:
+        An empty tags list (tags: []) is considered invalid. If you have no tags,
+        omit the 'tags' field entirely from the frontmatter.
     """
     context = f" in {filename}" if filename else ""
     
     if not isinstance(tags, list):
         raise ValueError(f"'tags' must be a list{context}")
     
+    if not tags:
+        raise ValueError(f"'tags' list cannot be empty{context}. Omit the field if you have no tags.")
+    
     normalized = []
     for tag in tags:
         if not isinstance(tag, str):
@@ -89,7 +96,7 @@ def validate_tags(tags, filename=None) -> list[str]:
         
         normalized.append(tag.lower())
     
-    return normalized
+    return list(set(normalized))
 
 
 def get_version_from_pyproject() -> str:

From 09839b8bcfeda3fa01eaf432a5264865914387b6 Mon Sep 17 00:00:00 2001
From: Thomas Bartlett <67928676+thomas-bartlett@users.noreply.github.com>
Date: Tue, 18 Nov 2025 11:58:59 -0500
Subject: [PATCH 3/4] Added tag system for rule categorization and filtering.

---
 .../codeguard-0-additional-cryptography.md    |  3 +++
 sources/core/codeguard-0-api-web-services.md  |  4 +---
 .../core/codeguard-0-authentication-mfa.md    |  2 +-
 .../codeguard-0-client-side-web-security.md   |  2 ++
 ...eguard-0-cloud-orchestration-kubernetes.md |  2 ++
 sources/core/codeguard-0-data-storage.md      |  3 +++
 .../codeguard-0-devops-ci-cd-containers.md    |  2 ++
 sources/core/codeguard-0-iac-security.md      |  2 ++
 .../codeguard-0-input-validation-injection.md |  2 ++
 sources/core/codeguard-0-logging.md           |  2 ++
 .../codeguard-0-privacy-data-protection.md    |  2 ++
 ...eguard-0-session-management-and-cookies.md |  3 +++
 .../core/codeguard-1-digital-certificates.md  |  2 ++
 .../core/codeguard-1-hardcoded-credentials.md |  2 ++
 src/tag_mappings.py                           | 21 +++++++++++++++++++
 src/validate_unified_rules.py                 |  7 ++++++-
 16 files changed, 56 insertions(+), 5 deletions(-)
 create mode 100644 src/tag_mappings.py

diff --git a/sources/core/codeguard-0-additional-cryptography.md b/sources/core/codeguard-0-additional-cryptography.md
index 6bcb4fd..db5edae 100644
--- a/sources/core/codeguard-0-additional-cryptography.md
+++ b/sources/core/codeguard-0-additional-cryptography.md
@@ -14,6 +14,9 @@ languages:
 - typescript
 - xml
 - yaml
+tags:
+- data-security
+- secrets
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-api-web-services.md b/sources/core/codeguard-0-api-web-services.md
index 0539f56..2c78e45 100644
--- a/sources/core/codeguard-0-api-web-services.md
+++ b/sources/core/codeguard-0-api-web-services.md
@@ -13,9 +13,7 @@ languages:
 - xml
 - yaml
 tags:
-- api
-- web-security
-- microservices
+- web
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-authentication-mfa.md b/sources/core/codeguard-0-authentication-mfa.md
index ff8cd20..580af4e 100644
--- a/sources/core/codeguard-0-authentication-mfa.md
+++ b/sources/core/codeguard-0-authentication-mfa.md
@@ -15,7 +15,7 @@ languages:
 - typescript
 tags:
 - authentication
-- web-security
+- web
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-client-side-web-security.md b/sources/core/codeguard-0-client-side-web-security.md
index 17a4e89..c2b0c68 100644
--- a/sources/core/codeguard-0-client-side-web-security.md
+++ b/sources/core/codeguard-0-client-side-web-security.md
@@ -8,6 +8,8 @@ languages:
 - php
 - typescript
 - vlang
+tags:
+- web
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-cloud-orchestration-kubernetes.md b/sources/core/codeguard-0-cloud-orchestration-kubernetes.md
index ec2e982..828edd9 100644
--- a/sources/core/codeguard-0-cloud-orchestration-kubernetes.md
+++ b/sources/core/codeguard-0-cloud-orchestration-kubernetes.md
@@ -4,6 +4,8 @@ description: Kubernetes hardening (RBAC, admission policies, network policies, s
 languages:
 - javascript
 - yaml
+tags:
+- infrastructure
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-data-storage.md b/sources/core/codeguard-0-data-storage.md
index 6bd68f5..e01057b 100644
--- a/sources/core/codeguard-0-data-storage.md
+++ b/sources/core/codeguard-0-data-storage.md
@@ -6,6 +6,9 @@ languages:
 - javascript
 - sql
 - yaml
+tags:
+- data-security
+- infrastructure
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-devops-ci-cd-containers.md b/sources/core/codeguard-0-devops-ci-cd-containers.md
index 1db3562..52bb26c 100644
--- a/sources/core/codeguard-0-devops-ci-cd-containers.md
+++ b/sources/core/codeguard-0-devops-ci-cd-containers.md
@@ -8,6 +8,8 @@ languages:
 - shell
 - xml
 - yaml
+tags:
+- infrastructure
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-iac-security.md b/sources/core/codeguard-0-iac-security.md
index 0785120..17fe6de 100644
--- a/sources/core/codeguard-0-iac-security.md
+++ b/sources/core/codeguard-0-iac-security.md
@@ -8,6 +8,8 @@ languages:
 - ruby
 - shell
 - yaml
+tags:
+- infrastructure
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-input-validation-injection.md b/sources/core/codeguard-0-input-validation-injection.md
index 9ae2ab1..fc15368 100644
--- a/sources/core/codeguard-0-input-validation-injection.md
+++ b/sources/core/codeguard-0-input-validation-injection.md
@@ -14,6 +14,8 @@ languages:
 - shell
 - sql
 - typescript
+tags:
+- web
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-logging.md b/sources/core/codeguard-0-logging.md
index 659be01..2a354aa 100644
--- a/sources/core/codeguard-0-logging.md
+++ b/sources/core/codeguard-0-logging.md
@@ -5,6 +5,8 @@ languages:
 - c
 - javascript
 - yaml
+tags:
+- privacy
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-privacy-data-protection.md b/sources/core/codeguard-0-privacy-data-protection.md
index f28876d..22f522d 100644
--- a/sources/core/codeguard-0-privacy-data-protection.md
+++ b/sources/core/codeguard-0-privacy-data-protection.md
@@ -5,6 +5,8 @@ languages:
 - javascript
 - matlab
 - yaml
+tags:
+- privacy
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-session-management-and-cookies.md b/sources/core/codeguard-0-session-management-and-cookies.md
index be73bf8..e0d53e8 100644
--- a/sources/core/codeguard-0-session-management-and-cookies.md
+++ b/sources/core/codeguard-0-session-management-and-cookies.md
@@ -11,6 +11,9 @@ languages:
 - python
 - ruby
 - typescript
+tags:
+- authentication
+- web
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-1-digital-certificates.md b/sources/core/codeguard-1-digital-certificates.md
index 3d73c70..c333fa2 100644
--- a/sources/core/codeguard-1-digital-certificates.md
+++ b/sources/core/codeguard-1-digital-certificates.md
@@ -1,6 +1,8 @@
 ---
 description: Certificate Best Practices
 languages: []
+tags:
+- secrets
 alwaysApply: true
 ---
 
diff --git a/sources/core/codeguard-1-hardcoded-credentials.md b/sources/core/codeguard-1-hardcoded-credentials.md
index 5f885ec..978d48a 100644
--- a/sources/core/codeguard-1-hardcoded-credentials.md
+++ b/sources/core/codeguard-1-hardcoded-credentials.md
@@ -1,6 +1,8 @@
 ---
 description: No Hardcoded Credentials
 languages: []
+tags:
+- secrets
 alwaysApply: true
 ---
 
diff --git a/src/tag_mappings.py b/src/tag_mappings.py
new file mode 100644
index 0000000..304992b
--- /dev/null
+++ b/src/tag_mappings.py
@@ -0,0 +1,21 @@
+# Copyright 2025 Cisco Systems, Inc. and its affiliates
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Tag Mappings
+
+Centralized list of known tags for categorizing security rules.
+"""
+
+# Known tags used in rules
+# Add new tags here as they are introduced in rules
+KNOWN_TAGS = {
+    "authentication",
+    "data-security",
+    "infrastructure",
+    "privacy",
+    "secrets",
+    "web",
+}
+
diff --git a/src/validate_unified_rules.py b/src/validate_unified_rules.py
index a30e56c..8fd454c 100755
--- a/src/validate_unified_rules.py
+++ b/src/validate_unified_rules.py
@@ -12,6 +12,7 @@
 from pathlib import Path
 
 from language_mappings import LANGUAGE_TO_EXTENSIONS
+from tag_mappings import KNOWN_TAGS
 from utils import parse_frontmatter_and_content, validate_tags
 
 
@@ -57,7 +58,11 @@ def validate_rule(file_path: Path) -> dict[str, list[str]]:
         # Validate tags if present
         if "tags" in frontmatter:
             try:
-                validate_tags(frontmatter["tags"], file_path.name)
+                normalized_tags = validate_tags(frontmatter["tags"], file_path.name)
+                # Error on tags not in known list
+                unknown_tags = [tag for tag in normalized_tags if tag not in KNOWN_TAGS]
+                if unknown_tags:
+                    errors.append(f"Unknown tags (add to KNOWN_TAGS): {', '.join(sorted(unknown_tags))}")
             except ValueError as e:
                 errors.append(str(e))
 

From 46bac17e8723472a5286a83dd4473930c59192a4 Mon Sep 17 00:00:00 2001
From: Thomas Bartlett <67928676+thomas-bartlett@users.noreply.github.com>
Date: Tue, 18 Nov 2025 12:11:20 -0500
Subject: [PATCH 4/4] Preserve tag order when deduplicating in validate_tags

---
 src/utils.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/utils.py b/src/utils.py
index b37d2e9..a360e74 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -66,14 +66,17 @@ def validate_tags(tags, filename=None) -> list[str]:
         filename: Optional filename for better error messages
     
     Returns:
-        List of normalized (lowercase) tags
+        List of normalized (lowercase) tags with duplicates removed.
+        Original order is preserved.
     
     Raises:
         ValueError: If tags are invalid (wrong type, empty list, contain whitespace, etc.)
     
     Note:
-        An empty tags list (tags: []) is considered invalid. If you have no tags,
-        omit the 'tags' field entirely from the frontmatter.
+        - An empty tags list (tags: []) is considered invalid. If you have no tags,
+          omit the 'tags' field entirely from the frontmatter.
+        - Duplicate tags (after normalization) are automatically removed while
+          preserving the order of first occurrence.
     """
     context = f" in {filename}" if filename else ""
     
@@ -96,7 +99,7 @@ def validate_tags(tags, filename=None) -> list[str]:
         
         normalized.append(tag.lower())
     
-    return list(set(normalized))
+    return list(dict.fromkeys(normalized))
 
 
 def get_version_from_pyproject() -> str: