mandiant · akshat4703 · Mar 13, 2026 · Mar 13, 2026
diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py
@@ -1650,6 +1650,9 @@ class _RuleFeatureIndex:
         # Mapping from rule name to list of Bytes features that have to match.
         # All these features will be evaluated whenever a Bytes feature is encountered.
         bytes_rules: dict[str, list[Feature]]
+        # Rules that cannot be safely handled by optimized candidate filtering.
+        # These are always evaluated via the unoptimized path.
+        fallback_rules: set[str]
 
     # this routine is unstable and may change before the next major release.
     @staticmethod
@@ -1802,13 +1805,18 @@ def and_score_key(item):
         # Ideally we find a way to get rid of all of these, eventually.
         string_rules: dict[str, list[Feature]] = {}
         bytes_rules: dict[str, list[Feature]] = {}
+        fallback_rules: set[str] = set()
 
         for rule in rules:
             rule_name = rule.meta["name"]
 
             root = rule.statement
             item = rec(rule_name, root)
-            assert item is not None
+            if item is None:
+                logger.debug("indexing: no stable index feature for rule, will use fallback matcher: %s", rule_name)
+                scores_by_rule[rule_name] = 0
+                fallback_rules.add(rule_name)
+                continue
             score, features = item
 
             string_features = [
@@ -1847,8 +1855,9 @@ def and_score_key(item):
         logger.debug(
             "indexing: %d scanning string features, %d scanning bytes features", len(string_rules), len(bytes_rules)
         )
+        logger.debug("indexing: %d rules evaluated via fallback matcher", len(fallback_rules))
 
-        return RuleSet._RuleFeatureIndex(rules_by_feature, string_rules, bytes_rules)
+        return RuleSet._RuleFeatureIndex(rules_by_feature, string_rules, bytes_rules, fallback_rules)
 
     @staticmethod
     def _get_rules_for_scope(rules, scope) -> list[Rule]:
@@ -1976,7 +1985,7 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> tuple[Fea
         # Find all the rules that could match the given feature set.
         # Ideally we want this set to be as small and focused as possible,
         # and we can tune it by tweaking `_index_rules_by_feature`.
-        candidate_rule_names: set[str] = set()
+        candidate_rule_names: set[str] = set(feature_index.fallback_rules)
         for feature in features:
             candidate_rule_names.update(feature_index.rules_by_feature.get(feature, ()))
 
@@ -2109,13 +2118,9 @@ def match(
         This wrapper around _match exists so that we can assert it matches precisely
         the same as `capa.engine.match`, just faster.
 
-        This matcher does not handle some edge cases:
-          - top level NOT statements
-              - also top level counted features with zero occurances, like: `count(menmonic(mov)): 0`
-          - nested NOT statements (NOT: NOT: foo)
-
-        We should discourage/forbid these constructs from our rules and add lints for them.
-        TODO(williballenthin): add lints for logic edge cases
+        Some edge-case rule constructs cannot be safely optimized for candidate filtering.
+        In those cases, this matcher transparently falls back to unoptimized evaluation
+        for the affected rules.
 
         Args:
           paranoid: when true, demonstrate that the naive matcher agrees with this

diff --git a/tests/test_match.py b/tests/test_match.py
@@ -198,6 +198,31 @@ def test_match_range_with_zero():
     assert "test rule" not in matches
 
 
+def test_match_top_level_range_exact_zero():
+    rule = textwrap.dedent(
+        """
+        rule:
+            meta:
+                name: test rule
+                scopes:
+                    static: function
+                    dynamic: process
+            features:
+                - count(number(100)): 0
+        """
+    )
+    r = capa.rules.Rule.from_yaml(rule)
+
+    _, matches = match([r], {}, 0x0)
+    assert "test rule" in matches
+
+    _, matches = match([r], {capa.features.insn.Number(100): {}}, 0x0)
+    assert "test rule" in matches
+
+    _, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
+    assert "test rule" not in matches
+
+
 def test_match_adds_matched_rule_feature():
     """show that using `match` adds a feature for matched rules."""
     rule = textwrap.dedent(
@@ -585,7 +610,6 @@ def test_regex_get_value_str(pattern):
     assert capa.features.common.Regex(pattern).get_value_str() == pattern
 
 
-@pytest.mark.xfail(reason="can't have top level NOT")
 def test_match_only_not():
     rule = textwrap.dedent(
         """
@@ -630,7 +654,6 @@ def test_match_not():
     assert "test rule" in matches
 
 
-@pytest.mark.xfail(reason="can't have nested NOT")
 def test_match_not_not():
     rule = textwrap.dedent(
         """

diff --git a/tests/test_rules.py b/tests/test_rules.py
@@ -1685,3 +1685,62 @@ def test_circular_dependency():
     ]
     with pytest.raises(capa.rules.InvalidRule):
         list(capa.rules.get_rules_and_dependencies(rules, rules[0].name))
+
+
+def test_invalid_top_level_not_statement():
+    with pytest.raises(capa.rules.InvalidRule, match="top level statement may not be a `not` statement"):
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: test rule
+                        scopes:
+                            static: function
+                            dynamic: process
+                    features:
+                        - not:
+                            - number: 1
+                """
+            )
+        )
+
+
+def test_invalid_nested_not_statement():
+    with pytest.raises(capa.rules.InvalidRule, match="nested `not` statements are not supported"):
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: test rule
+                        scopes:
+                            static: function
+                            dynamic: process
+                    features:
+                        - and:
+                            - mnemonic: mov
+                            - not:
+                                - not:
+                                    - number: 1
+                """
+            )
+        )
+
+
+def test_invalid_top_level_count_zero_statement():
+    with pytest.raises(capa.rules.InvalidRule, match="top level statement may not be `count\\(...\\): 0`"):
+        capa.rules.Rule.from_yaml(
+            textwrap.dedent(
+                """
+                rule:
+                    meta:
+                        name: test rule
+                        scopes:
+                            static: function
+                            dynamic: process
+                    features:
+                        - count(number(100)): 0
+                """
+            )
+        )