Skip to content

Commit

Permalink
feat(metrics): add parse rate info to app scan data (#5844)
Browse files Browse the repository at this point in the history
* Add parse rates to app scan data

* Update test snapshots

* Update PRIVACY.md

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Austin Theriault <austin@cutedogs.org>
  • Loading branch information
3 people committed Aug 1, 2022
1 parent 2725f1c commit 3438b7b
Show file tree
Hide file tree
Showing 35 changed files with 308 additions and 8 deletions.
1 change: 1 addition & 0 deletions PRIVACY.md
Expand Up @@ -322,6 +322,7 @@ The classes of scan data are:
- Review and review-requester identifying data (e.g., pull-request ID, branch, merge base, request author)
- Scan metadata, including type of scan and scan parameters (e.g., paths scanned and extensions of ignored files)
- Timing metrics (e.g., time taken to scan per-rule and per-path)
- Parse metrics (e.g., number of files targeted and parsed per-language)
- Semgrep environment (e.g., version, interpreter, timestamp)

**Findings data** are used to provide human readable content for notifications and integrations,
Expand Down
11 changes: 11 additions & 0 deletions cli/src/semgrep/app/scans.py
Expand Up @@ -15,6 +15,7 @@
from boltons.iterutils import partition

from semgrep.error import SemgrepError
from semgrep.parsing_data import ParsingData
from semgrep.rule import Rule
from semgrep.rule_match import RuleMatchMap
from semgrep.state import get_state
Expand Down Expand Up @@ -160,6 +161,7 @@ def report_findings(
rules: List[Rule],
targets: Set[Path],
ignored_targets: Set[Path],
parse_rate: ParsingData,
total_time: float,
commit_date: str,
) -> None:
Expand Down Expand Up @@ -213,6 +215,15 @@ def report_findings(
"errors": [error.to_dict() for error in errors],
"total_time": total_time,
"unsupported_exts": dict(ignored_ext_freqs),
"parse_rate": {
lang: {
"targets_parsed": data.num_targets - data.targets_with_errors,
"num_targets": data.num_targets,
"bytes_parsed": data.num_bytes - data.error_bytes,
"num_bytes": data.num_bytes,
}
for (lang, data) in parse_rate.get_errors_by_lang().items()
},
},
}

Expand Down
2 changes: 2 additions & 0 deletions cli/src/semgrep/commands/ci.py
Expand Up @@ -333,6 +333,7 @@ def ci(
filtered_rules,
profiler,
profiling_data,
parsing_data,
shown_severities,
) = semgrep.semgrep_main.main(
core_opts_str=core_opts,
Expand Down Expand Up @@ -443,6 +444,7 @@ def ci(
filtered_rules,
all_targets,
ignore_log.unsupported_lang_paths,
parsing_data,
total_time,
metadata.commit_datetime,
)
Expand Down
1 change: 1 addition & 0 deletions cli/src/semgrep/commands/scan.py
Expand Up @@ -810,6 +810,7 @@ def scan(
filtered_rules,
profiler,
profiling_data,
_,
shown_severities,
) = semgrep.semgrep_main.main(
core_opts_str=core_opts,
Expand Down
62 changes: 59 additions & 3 deletions cli/src/semgrep/lsp/config.py
@@ -1,25 +1,36 @@
import glob
import urllib
from functools import partial
from pathlib import Path
from typing import Any
from typing import Callable
from typing import Collection
from typing import List
from typing import Mapping
from typing import Optional
from typing import Set
from typing import Tuple
from typing import Union

import semgrep.commands.ci
import semgrep.semgrep_main
from semgrep.app.scans import ScanHandler
from semgrep.config_resolver import get_config
from semgrep.constants import OutputFormat
from semgrep.constants import RuleSeverity
from semgrep.error import SemgrepError
from semgrep.meta import generate_meta_from_environment
from semgrep.metrics import MetricsState
from semgrep.output import OutputHandler
from semgrep.output import OutputSettings
from semgrep.parsing_data import ParsingData
from semgrep.profile_manager import ProfileManager
from semgrep.profiling import ProfilingData
from semgrep.project import get_project_url
from semgrep.rule import Rule
from semgrep.rule_match import RuleMatchMap
from semgrep.state import get_state
from semgrep.target_manager import FileTargetingLog
from semgrep.target_manager import TargetManager
from semgrep.types import JsonObject
from semgrep.util import git_check_output
Expand Down Expand Up @@ -215,7 +226,22 @@ def is_git_dir(self) -> bool:
except Exception:
return False

def _scanner(self, configs: List[str]) -> Callable:
def _scanner(
self, configs: List[str]
) -> Callable[
...,
Tuple[
RuleMatchMap,
List[SemgrepError],
Set[Path],
FileTargetingLog,
List[Rule],
ProfileManager,
ProfilingData,
ParsingData,
Collection[RuleSeverity],
],
]:
"""Generate a scanner according to the config"""
output_settings = OutputSettings(output_format=OutputFormat.JSON)
output_handler = OutputHandler(output_settings)
Expand All @@ -242,11 +268,41 @@ def _scanner(self, configs: List[str]) -> Callable:
# I like doing it this way because then it's all in one spot
# but I can see an argument for this being a function that takes a config
@property
def scanner(self) -> Callable:
def scanner(
self,
) -> Callable[
...,
Tuple[
RuleMatchMap,
List[SemgrepError],
Set[Path],
FileTargetingLog,
List[Rule],
ProfileManager,
ProfilingData,
ParsingData,
Collection[RuleSeverity],
],
]:
return self._scanner(configs=self.configs)

@property
def scanner_ci(self) -> Callable:
def scanner_ci(
self,
) -> Callable[
...,
Tuple[
RuleMatchMap,
List[SemgrepError],
Set[Path],
FileTargetingLog,
List[Rule],
ProfileManager,
ProfilingData,
ParsingData,
Collection[RuleSeverity],
],
]:
return self._scanner(configs=[self.scan_url])

# =====================
Expand Down
2 changes: 2 additions & 0 deletions cli/src/semgrep/lsp/run_semgrep.py
Expand Up @@ -22,6 +22,7 @@ def run_rules(
_,
_,
_,
_,
) = config.scanner(target=targets)
# ignore this type since we're doing weird things with partial :O
return (filtered_matches_by_rule, all_targets)
Expand All @@ -39,6 +40,7 @@ def run_rules_ci(
_,
_,
_,
_,
) = config.scanner_ci(target=targets)
# ignore this type since we're doing weird things with partial :O
return (filtered_matches_by_rule, all_targets)
3 changes: 3 additions & 0 deletions cli/src/semgrep/semgrep_main.py
Expand Up @@ -109,6 +109,7 @@ def invoke_semgrep(
filtered_rules,
profiler,
profiling_data,
_,
shown_severities,
) = main(
output_handler=output_handler,
Expand Down Expand Up @@ -267,6 +268,7 @@ def main(
List[Rule],
ProfileManager,
ProfilingData,
ParsingData,
Collection[RuleSeverity],
]:
logger.debug(f"semgrep version {__VERSION__}")
Expand Down Expand Up @@ -473,5 +475,6 @@ def main(
filtered_rules,
profiler,
profiling_data,
parsing_data,
shown_severities,
)
8 changes: 8 additions & 0 deletions cli/tests/e2e/snapshots/test_ci/test_dryrun/results.txt
Expand Up @@ -316,6 +316,14 @@ Would have sent complete blob: {
"total_time": <MASKED>
"unsupported_exts": {
".txt": 1
},
"parse_rate": {
"python": {
"targets_parsed": 1,
"num_targets": 1,
"bytes_parsed": 336,
"num_bytes": 336
}
}
}
}
Expand Down
Expand Up @@ -6,6 +6,14 @@
"total_time": 0.5,
"unsupported_exts": {
".txt": 1
},
"parse_rate": {
"python": {
"targets_parsed": 1,
"num_targets": 1,
"bytes_parsed": 336,
"num_bytes": 336
}
}
}
}
Expand Up @@ -6,6 +6,14 @@
"total_time": 0.5,
"unsupported_exts": {
".txt": 1
},
"parse_rate": {
"python": {
"targets_parsed": 1,
"num_targets": 1,
"bytes_parsed": 336,
"num_bytes": 336
}
}
}
}
Expand Up @@ -6,6 +6,14 @@
"total_time": 0.5,
"unsupported_exts": {
".txt": 1
},
"parse_rate": {
"python": {
"targets_parsed": 1,
"num_targets": 1,
"bytes_parsed": 336,
"num_bytes": 336
}
}
}
}
Expand Up @@ -6,6 +6,14 @@
"total_time": 0.5,
"unsupported_exts": {
".txt": 1
},
"parse_rate": {
"python": {
"targets_parsed": 1,
"num_targets": 1,
"bytes_parsed": 336,
"num_bytes": 336
}
}
}
}
Expand Up @@ -6,6 +6,14 @@
"total_time": 0.5,
"unsupported_exts": {
".txt": 1
},
"parse_rate": {
"python": {
"targets_parsed": 1,
"num_targets": 1,
"bytes_parsed": 336,
"num_bytes": 336
}
}
}
}
Expand Up @@ -4,6 +4,14 @@
"findings": 9,
"errors": [],
"total_time": 0.5,
"unsupported_exts": {}
"unsupported_exts": {},
"parse_rate": {
"python": {
"targets_parsed": 1,
"num_targets": 1,
"bytes_parsed": 336,
"num_bytes": 336
}
}
}
}
Expand Up @@ -6,6 +6,14 @@
"total_time": 0.5,
"unsupported_exts": {
".txt": 1
},
"parse_rate": {
"python": {
"targets_parsed": 1,
"num_targets": 1,
"bytes_parsed": 336,
"num_bytes": 336
}
}
}
}
Expand Up @@ -6,6 +6,14 @@
"total_time": 0.5,
"unsupported_exts": {
".txt": 1
},
"parse_rate": {
"python": {
"targets_parsed": 1,
"num_targets": 1,
"bytes_parsed": 336,
"num_bytes": 336
}
}
}
}
Expand Up @@ -4,6 +4,14 @@
"findings": 9,
"errors": [],
"total_time": 0.5,
"unsupported_exts": {}
"unsupported_exts": {},
"parse_rate": {
"python": {
"targets_parsed": 1,
"num_targets": 1,
"bytes_parsed": 336,
"num_bytes": 336
}
}
}
}
Expand Up @@ -6,6 +6,14 @@
"total_time": 0.5,
"unsupported_exts": {
".txt": 1
},
"parse_rate": {
"python": {
"targets_parsed": 1,
"num_targets": 1,
"bytes_parsed": 336,
"num_bytes": 336
}
}
}
}

0 comments on commit 3438b7b

Please sign in to comment.