-
Notifications
You must be signed in to change notification settings - Fork 566
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Integrate fast v2 #2836
Integrate fast v2 #2836
Changes from 3 commits
2d6a3d7
f021845
ec0c97b
d909198
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -535,10 +535,97 @@ def _run_rules( | |
match_time_matrix, | ||
) | ||
|
||
def _run_rules_direct_to_semgrep_core( | ||
self, | ||
rules: List[Rule], | ||
target_manager: TargetManager, | ||
profiler: ProfileManager, | ||
) -> Tuple[ | ||
Dict[Rule, List[RuleMatch]], | ||
Dict[Rule, List[Any]], | ||
List[SemgrepError], | ||
Set[Path], | ||
Dict[Any, Any], | ||
]: | ||
from itertools import chain | ||
from collections import defaultdict | ||
|
||
outputs: Dict[Rule, List[RuleMatch]] = defaultdict(list) | ||
errors: List[SemgrepError] = [] | ||
for rule, language in tuple( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Later on we can try to pass multiple rules at the same time if they have the same language and same set of target files |
||
chain( | ||
*([(rule, language) for language in rule.languages] for rule in rules) | ||
) | ||
): | ||
with tempfile.NamedTemporaryFile( | ||
"w", suffix=".yaml" | ||
) as rule_file, tempfile.NamedTemporaryFile("w") as target_file: | ||
targets = self.get_files_for_language(language, rule, target_manager) | ||
target_file.write("\n".join(map(lambda p: str(p), targets))) | ||
target_file.flush() | ||
yaml = YAML() | ||
yaml.dump({"rules": [rule._raw]}, rule_file) | ||
rule_file.flush() | ||
|
||
cmd = [SEMGREP_PATH] + [ | ||
"-lang", | ||
language, | ||
"-fast", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In theory -fast is another thing, but we can bundle it in --experimental. |
||
"-json", | ||
"-config", | ||
rule_file.name, | ||
minusworld marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"-j", | ||
str(self._jobs), | ||
"-target_file", | ||
target_file.name, | ||
"-timeout", | ||
str(self._timeout), | ||
"-max_memory", | ||
str(self._max_memory), | ||
] | ||
|
||
r = sub_run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
out_bytes, err_bytes, returncode = r.stdout, r.stderr, r.returncode | ||
output_json = self._parse_core_output(out_bytes, err_bytes, returncode) | ||
|
||
if returncode != 0: | ||
if "error" in output_json: | ||
self._raise_semgrep_error_from_json(output_json, [], rule) | ||
else: | ||
raise SemgrepError( | ||
f"unexpected json output while invoking semgrep-core with rule '{rule.id}':\n{PLEASE_FILE_ISSUE_TEXT}" | ||
) | ||
|
||
# end with tempfile.NamedTemporaryFile(...) ... | ||
outputs[rule].extend( | ||
[ | ||
RuleMatch.from_pattern_match( | ||
rule.id, | ||
PatternMatch(pattern_match), | ||
message=rule.message, | ||
metadata=rule.metadata, | ||
severity=rule.severity, | ||
fix=rule.fix, | ||
fix_regex=rule.fix_regex, | ||
) | ||
for pattern_match in output_json["matches"] | ||
] | ||
) | ||
errors.extend( | ||
CoreException.from_json(e, language, rule.id).into_semgrep_error() | ||
for e in output_json["errors"] | ||
) | ||
# end for rule, language ... | ||
|
||
return outputs, {}, errors, set(Path(p) for p in target_manager.targets), {} | ||
|
||
# end _run_rules_direct_to_semgrep_core | ||
|
||
def invoke_semgrep( | ||
self, | ||
target_manager: TargetManager, | ||
rules: List[Rule], | ||
experimental: bool = False, | ||
) -> Tuple[ | ||
Dict[Rule, List[RuleMatch]], | ||
Dict[Rule, List[Dict[str, Any]]], | ||
|
@@ -553,13 +640,16 @@ def invoke_semgrep( | |
start = datetime.now() | ||
profiler = ProfileManager() | ||
|
||
runner_fxn = ( | ||
self._run_rules_direct_to_semgrep_core if experimental else self._run_rules | ||
) | ||
( | ||
findings_by_rule, | ||
debug_steps_by_rule, | ||
errors, | ||
all_targets, | ||
match_time_matrix, | ||
) = self._run_rules(rules, target_manager, profiler) | ||
) = runner_fxn(rules, target_manager, profiler) | ||
|
||
logger.debug( | ||
f"semgrep ran in {datetime.now() - start} on {len(all_targets)} files" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we name this something less generic like
--core-direct-rules
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's probably fine; this is really only meant for internal use. I think Pad was thinking that this would be an entrypoint for future experimental work, too. In any case, we can always change it later - it's just a string after all.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep, --core-direct-rules sounds good.