Skip to content

Commit

Permalink
Add more pre-commit hooks (#4943)
Browse files Browse the repository at this point in the history
* Add more pre-commit hooks
* Update pre-commit hooks
* Fix pre-commit errors
  • Loading branch information
underyx committed Apr 5, 2022
1 parent 1408398 commit 8b7d9e3
Show file tree
Hide file tree
Showing 45 changed files with 169 additions and 214 deletions.
51 changes: 42 additions & 9 deletions .pre-commit-config.yaml
@@ -1,21 +1,37 @@
exclude: "^semgrep/tests/e2e/(targets|snapshots)|semgrep-core/tests"
exclude: "^semgrep/tests/e2e/(targets|snapshots)|^semgrep-core/tests|^semgrep/semgrep/external|\\binvalid\\b"

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
hooks:
- id: check-yaml
args: [--allow-multiple-documents]
exclude: ^semgrep\/tests\/.+$|^perf\/bench\/gitlab-rules\/.+$$
- id: end-of-file-fixer
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: check-case-conflict
- id: check-added-large-files
- id: check-ast
- id: check-builtin-literals
- id: check-case-conflict
- id: check-docstring-first
- id: check-executables-have-shebangs
- id: check-json
- id: check-merge-conflict
- id: check-shebang-scripts-are-executable
- id: check-symlinks
- id: check-toml
- id: check-vcs-permalinks
- id: check-xml
- id: check-yaml
args: [--allow-multiple-documents]
- id: debug-statements
- id: destroyed-symlinks
- id: detect-private-key
- id: end-of-file-fixer
- id: fix-byte-order-marker
- id: fix-encoding-pragma
args: [--remove]
- id: mixed-line-ending
args: [--fix=lf]
- id: no-commit-to-branch
- id: requirements-txt-fixer
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]

- repo: https://github.com/psf/black
rev: 22.3.0
Expand All @@ -27,7 +43,13 @@ repos:
rev: v3.0.1
hooks:
- id: reorder-python-imports
args: [--py37-plus]
args: ["--application-directories=.:semgrep", --py37-plus]

- repo: https://github.com/asottile/pyupgrade
rev: v2.31.1
hooks:
- id: pyupgrade
args: ["--py37-plus"]

- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v0.942"
Expand Down Expand Up @@ -66,6 +88,17 @@ repos:
additional_dependencies: ["flake8-bugbear==22.1.11"]
args: ["--select=B,E9,F4,F63,F7,F82"]

- repo: https://github.com/myint/autoflake
rev: v1.4
hooks:
- id: autoflake
args:
- --in-place
- --remove-unused-variables
- --remove-duplicate-keys
- --remove-all-unused-imports
- --ignore-init-module-imports

- repo: https://github.com/returntocorp/semgrep
rev: "0.86.5-tmp-fix-pre-commit"
hooks:
Expand Down
3 changes: 1 addition & 2 deletions .vscode/tasks.json
@@ -1,6 +1,5 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"_comment": "See https://go.microsoft.com/fwlink/?LinkId=733558 for the documentation about the tasks.json format",
"version": "2.0.0",
"tasks": [
{
Expand Down
4 changes: 2 additions & 2 deletions perf/compare-bench-findings
Expand Up @@ -15,8 +15,8 @@ FINDINGS_SNAPSHOT_PATH = "snapshots/benchmark_findings.json"

def findings_differ(expected: Dict[str, Any], findings: Dict[str, Any]) -> bool:
name = findings["name"]
baseline = set(json.dumps(result) for result in expected["findings"]["results"])
latest = set(json.dumps(result) for result in findings["findings"]["results"])
baseline = {json.dumps(result) for result in expected["findings"]["results"]}
latest = {json.dumps(result) for result in findings["findings"]["results"]}

def output_diff(diff: set) -> None:
for d in sorted(diff):
Expand Down
10 changes: 5 additions & 5 deletions perf/config.py
Expand Up @@ -28,13 +28,13 @@


@define
class Repository(object):
class Repository:
url: str = field(default="")
commit_hash: str = field(default="HEAD")


@define
class RuleConfig(object):
class RuleConfig:
config_str: str = field(default="")

def _fetch_rule_config_from_url(self, rule_config_url: str) -> Optional[str]:
Expand Down Expand Up @@ -175,7 +175,7 @@ def resolve_to_cache(self, cache_path: Path) -> None:


@define
class BenchmarkRunSetupData(object):
class BenchmarkRunSetupData:
"""
Stores data about an individual benchmark run
"""
Expand All @@ -187,7 +187,7 @@ class BenchmarkRunSetupData(object):


@define
class SemgrepBenchmarkConfig(object):
class SemgrepBenchmarkConfig:
"""
Stores data needed to start a benchmarking run.
Expand Down Expand Up @@ -215,7 +215,7 @@ def parse_config(
cls: Type["SemgrepBenchmarkConfig"], config_file: Path
) -> "SemgrepBenchmarkConfig":
logger.debug(f"Using config at {config_file.absolute()}")
with open(config_file, "r") as fin:
with open(config_file) as fin:
config = yaml.load(fin)

return SemgrepBenchmarkConfig(
Expand Down
2 changes: 1 addition & 1 deletion perf/r2c-rules/r2c-security-audit.yml
Expand Up @@ -3704,7 +3704,7 @@ rules:
Scripting')"
references:
- https://www.developsec.com/2017/11/09/xss-in-a-script-tag/
- https://github.com/bkimminich/juice-shop/blob/master/routes/videoHandler.js#L64
- https://github.com/juice-shop/juice-shop/blob/1ceb8751e986dacd3214a618c37e7411be6bc11a/routes/videoHandler.ts#L68
severity: WARNING
languages:
- javascript
Expand Down
4 changes: 2 additions & 2 deletions perf/run-benchmarks
Expand Up @@ -321,7 +321,7 @@ def prepare_rule_cache_for_this_run(
logger.info(
f"Rule cache for run {setup_data.run_name} created at {rule_cache_for_this_run}"
)
rule_config_paths = list()
rule_config_paths = []
for rule_config in setup_data.rule_configs:
logger.info(f"Checking for rule config '{rule_config}' in cache")
normalized_rule_config = rule_config.normalize_rule_config_name()
Expand Down Expand Up @@ -350,7 +350,7 @@ def prepare_benchmark_run(
- downloads rule configs from an endpoint if necessary
- generates a 'prep' file which clones a repo from a URL and checks out the commit
"""
corpuses: List[Corpus] = list()
corpuses: List[Corpus] = []
for setup_data in benchmark_config.benchmark_setup_data:
logger.info(f"Setting up benchmark run for run '{setup_data.run_name}'")
rule_cache_dir, _ = prepare_rule_cache_for_this_run(setup_data, clean)
Expand Down
9 changes: 4 additions & 5 deletions scripts/generate_cheatsheet.py 100644 → 100755
Expand Up @@ -169,9 +169,9 @@
"deep": ["expr_operator"],
}

NUM_ALPHA_FEATURES = sum([len(val) for val in ALPHA_FEATURES.values()])
NUM_BETA_FEATURES = sum([len(val) for val in BETA_FEATURES.values()])
NUM_GA_FEATURES = sum([len(val) for val in GA_FEATURES.values()])
NUM_ALPHA_FEATURES = sum(len(val) for val in ALPHA_FEATURES.values())
NUM_BETA_FEATURES = sum(len(val) for val in BETA_FEATURES.values())
NUM_GA_FEATURES = sum(len(val) for val in GA_FEATURES.values())


def find_path(
Expand Down Expand Up @@ -228,8 +228,7 @@ def run_semgrep_on_example(
print(">>> " + " ".join(cmd))
output = subprocess.run( # nosemgrep: python.lang.security.audit.dangerous-subprocess-use.dangerous-subprocess-use
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
capture_output=True,
)
if output.returncode == 0:
print(output.stderr.decode("utf-8"))
Expand Down
Empty file modified scripts/merge-rules.py 100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion semgrep-core/perf/input/l300.py
Expand Up @@ -44,7 +44,7 @@ def pytest_runtest_setup(item):
if "incremental" in item.keywords:
previousfailed = getattr(item.parent, "_previousfailed", None)
if previousfailed is not None:
pytest.xfail("previous test failed ({0})".format(previousfailed.name))
pytest.xfail(f"previous test failed ({previousfailed.name})")


def pytest_runtest_makereport(item, call):
Expand Down
4 changes: 2 additions & 2 deletions semgrep-core/scripts/run-coverage.py
Expand Up @@ -9,7 +9,7 @@

def report_summary_stat() -> str:
stat = os.popen("bisect-ppx-report summary").read()
patt = re.compile("Coverage:\s+\d+/\d+\s+\((\d+\.\d*)%\)")
patt = re.compile(r"Coverage:\s+\d+/\d+\s+\((\d+\.\d*)%\)")
# mobj = patt.match("Coverage: 4/4 (4.4%)")
mobj = patt.match(stat)
if mobj is not None:
Expand All @@ -19,7 +19,7 @@ def report_summary_stat() -> str:

def report_summary_for_file_stat(file: str) -> str:
stat = os.popen("bisect-ppx-report summary --per-file").readlines()
patt = re.compile(f"\s*(\d+.\d*)\s+%\s+\d+/\d+\s+{file}")
patt = re.compile(rf"\s*(\d+.\d*)\s+%\s+\d+/\d+\s+{file}")
for line in stat:
mobj = patt.match(line)
if mobj is not None:
Expand Down
4 changes: 1 addition & 3 deletions semgrep/dependencyparser/find_lockfiles.py
Expand Up @@ -23,9 +23,7 @@ def find_lockfiles(
if entry.is_dir() and (
seen_paths is None or not (resolved_path in seen_paths)
):
new_paths = set([resolved_path]).union(
seen_paths if seen_paths else set([])
)
new_paths = {resolved_path}.union(seen_paths if seen_paths else set())
yield from find_lockfiles(full_path, frozenset(new_paths))
if entry.is_file() and entry.name.lower() in TARGET_LOCKFILE_FILENAMES:
yield full_path
4 changes: 2 additions & 2 deletions semgrep/dependencyparser/package_restrictions.py
Expand Up @@ -7,12 +7,12 @@
from typing import Tuple

import packaging.version
from packaging.specifiers import SpecifierSet

from dependencyparser.find_lockfiles import find_lockfiles
from dependencyparser.models import LockfileDependency
from dependencyparser.models import PackageManagers
from dependencyparser.parse_lockfile import parse_lockfile_str
from packaging.specifiers import SpecifierSet

from semgrep.error import SemgrepError


Expand Down
11 changes: 4 additions & 7 deletions semgrep/semgrep/commands/ci.py
Expand Up @@ -92,8 +92,7 @@ def fix_head_if_github_action(metadata: GitMeta) -> Iterator[None]:
encoding="utf-8",
check=True,
timeout=GIT_SH_TIMEOUT,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
capture_output=True,
)
logger.debug(f"git rev-parse stdout: {rev_parse.stdout}")
logger.debug(f"git rev-parse stderr: {rev_parse.stderr}")
Expand All @@ -105,8 +104,7 @@ def fix_head_if_github_action(metadata: GitMeta) -> Iterator[None]:
["git", "checkout", metadata.head_ref],
encoding="utf-8",
check=True,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
capture_output=True,
timeout=GIT_SH_TIMEOUT,
)
logger.debug(f"git checkout stdout: {checkout.stdout}")
Expand All @@ -119,8 +117,7 @@ def fix_head_if_github_action(metadata: GitMeta) -> Iterator[None]:
subprocess.run(
["git", "checkout", stashed_rev],
encoding="utf-8",
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
capture_output=True,
check=True,
timeout=GIT_SH_TIMEOUT,
)
Expand Down Expand Up @@ -393,7 +390,7 @@ def ci(
match for match in matches if not match.is_ignored
]

num_cai_findings = sum(len(v) for v in cai_matches_by_rule.values())
sum(len(v) for v in cai_matches_by_rule.values())
num_nonblocking_findings = sum(len(v) for v in nonblocking_matches_by_rule.values())
num_blocking_findings = sum(len(v) for v in blocking_matches_by_rule.values())

Expand Down
8 changes: 4 additions & 4 deletions semgrep/semgrep/config_resolver.py
Expand Up @@ -234,7 +234,7 @@ def _make_config_request(self) -> str:
"text/yaml",
"text/vnd.yaml",
]
if content_type and any((ct in content_type for ct in yaml_types)):
if content_type and any(ct in content_type for ct in yaml_types):
return r.content.decode("utf-8", errors="replace")
else:
raise SemgrepError(
Expand Down Expand Up @@ -485,7 +485,7 @@ def parse_config_string(
try:
data = parse_yaml_preserve_spans(contents, filename)
return {config_id: data}
except EmptyYamlException as se:
except EmptyYamlException:
raise SemgrepError(
f"Empty configuration file {filename}",
code=UNPARSEABLE_YAML_EXIT_CODE,
Expand Down Expand Up @@ -641,7 +641,7 @@ def get_latest_version(ruleset_name: str) -> Version:
for version_string in versions_json:
try:
versions_parsed.append(Version(version_string))
except ValueError as e:
except ValueError:
logger.info(
f"Could not parse {version_string} in versions of {ruleset_name} pack as valid semver. Ignoring that version string."
)
Expand Down Expand Up @@ -811,7 +811,7 @@ def list_current_public_rulesets() -> List[JsonObject]:
headers = {"User-Agent": SEMGREP_USER_AGENT}
try:
r = requests.get(api_full_url, headers=headers, timeout=20)
except Exception as e:
except Exception:
raise SemgrepError(f"Failed to download list of public rulesets")

if not r.ok:
Expand Down
2 changes: 1 addition & 1 deletion semgrep/semgrep/core_output.py
Expand Up @@ -130,7 +130,7 @@ def parse(cls, raw_json: JsonObject) -> "CoreError":
path = Path(location["path"])
start = core.Position.from_json(location["start"])
end = core.Position.from_json(location["end"])
_extra = raw_json.get("extra", {})
raw_json.get("extra", {})
message = CoreErrorMessage(raw_json.get("message", "<no error message>"))
level_str = raw_json["severity"]
if level_str.upper() == "WARNING":
Expand Down
3 changes: 1 addition & 2 deletions semgrep/semgrep/core_runner.py
Expand Up @@ -264,7 +264,7 @@ class Task:
class Plan(List[Task]):
@property
def rule_count(self) -> int:
return len(set(rule for task in self for rule in task.rule_ids))
return len({rule for task in self for rule in task.rule_ids})

@property
def file_count(self) -> int:
Expand Down Expand Up @@ -616,7 +616,6 @@ def _run_rules_direct_to_semgrep_core(
stderr: Optional[int] = subprocess.PIPE
if is_debug():
cmd += ["--debug"]
stderr = None

if dump_command_for_core:
print(" ".join(cmd))
Expand Down
8 changes: 4 additions & 4 deletions semgrep/semgrep/default_group.py
Expand Up @@ -39,7 +39,7 @@ def init()

def __init__(self, *args: Any, **kwargs: Any) -> None:
default_command = kwargs.pop("default_command", None)
super(DefaultGroup, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)
self.default_command_name = None
if default_command is not None:
self.default_command_name = default_command
Expand All @@ -59,7 +59,7 @@ def parse_args(self, ctx: click.Context, args: List[str]) -> List[str]:
"""
if not args and self.default_command_name is not None:
args.insert(0, self.default_command_name)
return super(DefaultGroup, self).parse_args(ctx, args)
return super().parse_args(ctx, args)

def get_command(
self, ctx: click.Context, command_name: str
Expand All @@ -76,7 +76,7 @@ def get_command(
ctx._default_command_overwrite_args0 = command_name # type: ignore
command_name = self.default_command_name

return super(DefaultGroup, self).get_command(ctx, command_name)
return super().get_command(ctx, command_name)

def resolve_command(
self, ctx: click.Context, args: List[str]
Expand All @@ -92,7 +92,7 @@ def resolve_command(
If args[0] is actually a command name then _default_command_overwrite_args0
will not be set so this function is equivalent to existing behavior
"""
cmd_name, cmd, args = super(DefaultGroup, self).resolve_command(ctx, args)
cmd_name, cmd, args = super().resolve_command(ctx, args)
if hasattr(ctx, "_default_command_overwrite_args0"):
args.insert(0, ctx._default_command_overwrite_args0) # type: ignore
return cmd_name, cmd, args
3 changes: 1 addition & 2 deletions semgrep/semgrep/dependency_aware_rule.py
Expand Up @@ -5,12 +5,11 @@
from typing import List
from typing import Tuple

import semgrep.output_from_core as core
from dependencyparser.models import PackageManagers
from dependencyparser.package_restrictions import dependencies_range_match_any
from dependencyparser.package_restrictions import find_and_parse_lockfiles
from dependencyparser.package_restrictions import ProjectDependsOnEntry

import semgrep.output_from_core as core
from semgrep.error import SemgrepError
from semgrep.rule import Rule
from semgrep.rule_match import RuleMatch
Expand Down

0 comments on commit 8b7d9e3

Please sign in to comment.