Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions capa/rules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1650,6 +1650,9 @@ class _RuleFeatureIndex:
# Mapping from rule name to list of Bytes features that have to match.
# All these features will be evaluated whenever a Bytes feature is encountered.
bytes_rules: dict[str, list[Feature]]
# Rules that cannot be safely handled by optimized candidate filtering.
# These are always evaluated via the unoptimized path.
fallback_rules: set[str]

# this routine is unstable and may change before the next major release.
@staticmethod
Expand Down Expand Up @@ -1802,13 +1805,18 @@ def and_score_key(item):
# Ideally we find a way to get rid of all of these, eventually.
string_rules: dict[str, list[Feature]] = {}
bytes_rules: dict[str, list[Feature]] = {}
fallback_rules: set[str] = set()

for rule in rules:
rule_name = rule.meta["name"]

root = rule.statement
item = rec(rule_name, root)
assert item is not None
if item is None:
logger.debug("indexing: no stable index feature for rule, will use fallback matcher: %s", rule_name)
scores_by_rule[rule_name] = 0
fallback_rules.add(rule_name)
continue
score, features = item

string_features = [
Expand Down Expand Up @@ -1847,8 +1855,9 @@ def and_score_key(item):
logger.debug(
"indexing: %d scanning string features, %d scanning bytes features", len(string_rules), len(bytes_rules)
)
logger.debug("indexing: %d rules evaluated via fallback matcher", len(fallback_rules))

return RuleSet._RuleFeatureIndex(rules_by_feature, string_rules, bytes_rules)
return RuleSet._RuleFeatureIndex(rules_by_feature, string_rules, bytes_rules, fallback_rules)

@staticmethod
def _get_rules_for_scope(rules, scope) -> list[Rule]:
Expand Down Expand Up @@ -1976,7 +1985,7 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> tuple[Fea
# Find all the rules that could match the given feature set.
# Ideally we want this set to be as small and focused as possible,
# and we can tune it by tweaking `_index_rules_by_feature`.
candidate_rule_names: set[str] = set()
candidate_rule_names: set[str] = set(feature_index.fallback_rules)
for feature in features:
candidate_rule_names.update(feature_index.rules_by_feature.get(feature, ()))

Expand Down Expand Up @@ -2109,13 +2118,9 @@ def match(
This wrapper around _match exists so that we can assert it matches precisely
the same as `capa.engine.match`, just faster.

This matcher does not handle some edge cases:
- top level NOT statements
- also top level counted features with zero occurances, like: `count(menmonic(mov)): 0`
- nested NOT statements (NOT: NOT: foo)

We should discourage/forbid these constructs from our rules and add lints for them.
TODO(williballenthin): add lints for logic edge cases
Some edge-case rule constructs cannot be safely optimized for candidate filtering.
In those cases, this matcher transparently falls back to unoptimized evaluation
for the affected rules.

Args:
paranoid: when true, demonstrate that the naive matcher agrees with this
Expand Down
27 changes: 25 additions & 2 deletions tests/test_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,31 @@ def test_match_range_with_zero():
assert "test rule" not in matches


def test_match_top_level_range_exact_zero():
rule = textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- count(number(100)): 0
"""
)
r = capa.rules.Rule.from_yaml(rule)

_, matches = match([r], {}, 0x0)
assert "test rule" in matches

_, matches = match([r], {capa.features.insn.Number(100): {}}, 0x0)
assert "test rule" in matches

_, matches = match([r], {capa.features.insn.Number(100): {1}}, 0x0)
assert "test rule" not in matches


def test_match_adds_matched_rule_feature():
"""show that using `match` adds a feature for matched rules."""
rule = textwrap.dedent(
Expand Down Expand Up @@ -585,7 +610,6 @@ def test_regex_get_value_str(pattern):
assert capa.features.common.Regex(pattern).get_value_str() == pattern


@pytest.mark.xfail(reason="can't have top level NOT")
def test_match_only_not():
rule = textwrap.dedent(
"""
Expand Down Expand Up @@ -630,7 +654,6 @@ def test_match_not():
assert "test rule" in matches


@pytest.mark.xfail(reason="can't have nested NOT")
def test_match_not_not():
rule = textwrap.dedent(
"""
Expand Down
59 changes: 59 additions & 0 deletions tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -1685,3 +1685,62 @@ def test_circular_dependency():
]
with pytest.raises(capa.rules.InvalidRule):
list(capa.rules.get_rules_and_dependencies(rules, rules[0].name))


def test_invalid_top_level_not_statement():
with pytest.raises(capa.rules.InvalidRule, match="top level statement may not be a `not` statement"):
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- not:
- number: 1
"""
)
)


def test_invalid_nested_not_statement():
with pytest.raises(capa.rules.InvalidRule, match="nested `not` statements are not supported"):
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- and:
- mnemonic: mov
- not:
- not:
- number: 1
"""
)
)


def test_invalid_top_level_count_zero_statement():
with pytest.raises(capa.rules.InvalidRule, match="top level statement may not be `count\\(...\\): 0`"):
capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
scopes:
static: function
dynamic: process
features:
- count(number(100)): 0
"""
)
)
Loading