Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tighten rule pre-selection #2080

Closed
wants to merge 37 commits into from
Closed
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
8b0076b
features: mark format as a global feature
williballenthin May 14, 2024
8858537
pep8
williballenthin May 14, 2024
9c0c662
rules: optimize rule pre-filtering, first revision
williballenthin May 14, 2024
07f347b
Merge branch 'feat/1755' into perf-rule-pre-selection
williballenthin May 22, 2024
2d9c82f
lints
williballenthin May 22, 2024
0dc0c51
rules: add documentation for optimized match routine
williballenthin May 22, 2024
f86a60c
bytes: log length of bytes evaluations
williballenthin May 22, 2024
6e50f48
ruleset: document optimized match behavior
williballenthin May 22, 2024
b7d0734
changelog
williballenthin May 22, 2024
f853214
ruleset: infrastructure to test optimized matcher
williballenthin May 22, 2024
0bc9cb5
Merge branch 'feat/1755' into perf-rule-pre-selection
williballenthin Jun 1, 2024
d25d74f
Merge branch 'feat/1755' into perf-rule-pre-selection
williballenthin Jun 3, 2024
6f9c34b
Merge branch 'perf-rule-pre-selection' of github.com:mandiant/capa in…
williballenthin Jun 3, 2024
9b7fb4e
pep8
williballenthin Jun 3, 2024
e8ef897
linters
williballenthin Jun 3, 2024
e49d47d
rules: match: handle namespace match statements
williballenthin Jun 3, 2024
a4f4f0b
rules: more tests for logic edge cases
williballenthin Jun 3, 2024
bff7f0a
rules: match paranoid true
williballenthin Jun 3, 2024
d20f040
rules: document logic edge cases
williballenthin Jun 3, 2024
ad3643b
Merge branch 'feat/1755' into perf-rule-pre-selection
williballenthin Jun 3, 2024
ced0226
Merge branch 'perf-rule-pre-selection' of github.com:mandiant/capa in…
williballenthin Jun 3, 2024
a7e24e6
pep8
williballenthin Jun 3, 2024
a66524a
rules: match: better debug paranoid matching
williballenthin Jun 3, 2024
61d01bb
rules: matcher: more doc
williballenthin Jun 3, 2024
4f00118
Merge branch 'feat/1755' into perf-rule-pre-selection
williballenthin Jun 3, 2024
62c4452
rules: match: disable paranoid mode by default
williballenthin Jun 3, 2024
376a9ac
Merge branch 'perf-rule-pre-selection' of github.com:mandiant/capa in…
williballenthin Jun 3, 2024
8ccae6e
add tests demonstrating optimized matching behavior
williballenthin Jun 3, 2024
12a78f3
rules: match: remove inline closure comment
williballenthin Jun 3, 2024
960ee86
typo
williballenthin Jun 3, 2024
e21a70f
rules: clarify the term "unstable" with longer comments
williballenthin Jun 4, 2024
e1252b0
Merge branch 'perf-rule-pre-selection' of github.com:mandiant/capa in…
williballenthin Jun 4, 2024
457cfe0
rules: more comments describing how features are scored
williballenthin Jun 4, 2024
b34667f
black
williballenthin Jun 4, 2024
1f1e142
rules: match: re-introduce default rule scores
williballenthin Jun 6, 2024
347619d
rules: match: remove trace comments
williballenthin Jun 6, 2024
b376a84
black
williballenthin Jun 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

- add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123
- document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz
- optimize rule matching #2080 @williballenthin
- replace Halo spinner with Rich #2086 @s-ff

### Breaking Changes
Expand Down
15 changes: 10 additions & 5 deletions capa/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,14 @@ def evaluate(self, features: FeatureSet, short_circuit=True):
MatchResults = Mapping[str, List[Tuple[Address, Result]]]


def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
namespace = rule.meta.get("namespace")
if namespace:
while namespace:
yield namespace
namespace, _, _ = namespace.rpartition("/")


def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations: Iterable[Address]):
"""
record into the given featureset that the given rule matched at the given locations.
Expand All @@ -280,11 +288,8 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:
updates `features` in-place. doesn't modify the remaining arguments.
"""
features[capa.features.common.MatchedRule(rule.name)].update(locations)
namespace = rule.meta.get("namespace")
if namespace:
while namespace:
features[capa.features.common.MatchedRule(namespace)].update(locations)
namespace, _, _ = namespace.rpartition("/")
for namespace in get_rule_namespaces(rule):
features[capa.features.common.MatchedRule(namespace)].update(locations)


def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]:
Expand Down
8 changes: 5 additions & 3 deletions capa/features/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,10 +385,12 @@ def __init__(self, value: bytes, description=None):
self.value = value

def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True):
assert isinstance(self.value, bytes)

capa.perf.counters["evaluate.feature"] += 1
capa.perf.counters["evaluate.feature.bytes"] += 1
capa.perf.counters["evaluate.feature.bytes." + str(len(self.value))] += 1

assert isinstance(self.value, bytes)
for feature, locations in features.items():
if not isinstance(feature, (Bytes,)):
continue
Expand Down Expand Up @@ -490,6 +492,6 @@ def __init__(self, value: str, description=None):
def is_global_feature(feature):
"""
is this a feature that is extracted at every scope?
today, these are OS and arch features.
today, these are OS, arch, and format features.
"""
return isinstance(feature, (OS, Arch))
return isinstance(feature, (OS, Arch, Format))
4 changes: 3 additions & 1 deletion capa/features/extractors/binexport2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,9 @@ def _find_base_address(self):
def _compute_thunks(self):
for addr, idx in self.idx.vertex_index_by_address.items():
vertex: BinExport2.CallGraph.Vertex = self.be2.call_graph.vertex[idx]
if not capa.features.extractors.binexport2.helpers.is_vertex_type(vertex, BinExport2.CallGraph.Vertex.Type.THUNK):
if not capa.features.extractors.binexport2.helpers.is_vertex_type(
vertex, BinExport2.CallGraph.Vertex.Type.THUNK
):
continue

curr_idx: int = idx
Expand Down
1 change: 1 addition & 0 deletions capa/features/extractors/binexport2/basicblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from capa.features.basicblock import BasicBlock
from capa.features.extractors.binexport2 import FunctionContext, BasicBlockContext
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2


def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
Expand Down
4 changes: 3 additions & 1 deletion capa/features/extractors/binexport2/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ def get_functions(self) -> Iterator[FunctionHandle]:
be2_vertex: BinExport2.CallGraph.Vertex = self.be2.call_graph.vertex[vertex_idx]

# skip thunks
if capa.features.extractors.binexport2.helpers.is_vertex_type(be2_vertex, BinExport2.CallGraph.Vertex.Type.THUNK):
if capa.features.extractors.binexport2.helpers.is_vertex_type(
be2_vertex, BinExport2.CallGraph.Vertex.Type.THUNK
):
continue

yield FunctionHandle(
Expand Down
4 changes: 3 additions & 1 deletion capa/features/extractors/binexport2/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
vertex_idx: int = be2_index.vertex_index_by_address[addr]
vertex: BinExport2.CallGraph.Vertex = be2.call_graph.vertex[vertex_idx]

if not capa.features.extractors.binexport2.helpers.is_vertex_type(vertex, BinExport2.CallGraph.Vertex.Type.IMPORTED):
if not capa.features.extractors.binexport2.helpers.is_vertex_type(
vertex, BinExport2.CallGraph.Vertex.Type.IMPORTED
):
continue

if not vertex.HasField("mangled_name"):
Expand Down