Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simultaneous regex filtering at descriptor and linter levels #2669

Merged
merged 4 commits into from
May 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ Note: Can be used with `oxsecurity/megalinter@beta` in your GitHub Action mega-l
- Run stale workflow only on schedule, by @echoix in [#2641](https://github.com/oxsecurity/megalinter/pull/2641)
- Add explicit permissions to stale workflow, by @echoix in [#2641](https://github.com/oxsecurity/megalinter/pull/2641)
- Allow MEGALINTER_CONFIG to contain a full path to a MegaLinter config file
- Simultaneous regex filtering at descriptor and linter levels

- Documentation
- Apply many updates after the use of [Vale](https://vale.sh/) on MegaLinter own sources and docs
Expand Down
2 changes: 1 addition & 1 deletion docs/descriptors/xml_xmllint.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ To apply file formatting you must set `XML_XMLLINT_CLI_LINT_MODE: file` and `XML
| Variable | Description | Default value |
|-----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|
| XML_XMLLINT_AUTOFORMAT | If set to `true`, it will reformat and reindent the output | `false` |
| XML_XMLLINT_INDENT | The number of indentation spaces when `XML_XMLLINT_AUTOFORMAT` is `true` | ` ` |
| XML_XMLLINT_INDENT | The number of indentation spaces when `XML_XMLLINT_AUTOFORMAT` is `true` | `` |
| XML_XMLLINT_ARGUMENTS | User custom arguments to add in linter CLI call<br/>Ex: `-s --foo "bar"` | |
| XML_XMLLINT_FILTER_REGEX_INCLUDE | Custom regex including filter<br/>Ex: `(src\|lib)` | Include every file |
| XML_XMLLINT_FILTER_REGEX_EXCLUDE | Custom regex excluding filter<br/>Ex: `(test\|examples)` | Exclude no file |
Expand Down
26 changes: 15 additions & 11 deletions megalinter/Linter.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,8 @@ def __init__(self, params=None, linter_config=None):
self.ignore_file_label = None
self.ignore_file_error = None
self.filter_regex_include = None
self.filter_regex_exclude = None
self.filter_regex_exclude_descriptor = None
self.filter_regex_exclude_linter = None
self.post_linter_status = (
params["post_linter_status"]
if "post_linter_status" in params
Expand Down Expand Up @@ -687,17 +688,16 @@ def load_config_vars(self, params):
== "true"
):
self.disable_errors = True
# Exclude regex: try first NAME + _FILTER_REGEX_EXCLUDE, then LANGUAGE + _FILTER_REGEX_EXCLUDE
# Exclude regex: descriptor level
if config.exists(self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE"):
self.filter_regex_exclude_descriptor = config.get(
self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE"
)
# Exclude regex: linter level
if config.exists(self.request_id, self.name + "_FILTER_REGEX_EXCLUDE"):
self.filter_regex_exclude = config.get(
self.filter_regex_exclude_linter = config.get(
self.request_id, self.name + "_FILTER_REGEX_EXCLUDE"
)
elif config.exists(
self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE"
):
self.filter_regex_exclude = config.get(
self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE"
)
# Override default docker image version
if config.exists(self.request_id, self.name + "_DOCKER_IMAGE_VERSION"):
self.cli_docker_image_version = config.get(
Expand Down Expand Up @@ -832,7 +832,8 @@ def log_file_filters(self):
log_object = {
"name": self.name,
"filter_regex_include": self.filter_regex_include,
"filter_regex_exclude": self.filter_regex_exclude,
"filter_regex_exclude_descriptor": self.filter_regex_exclude_descriptor,
"filter_regex_exclude_linter": self.filter_regex_exclude_linter,
"files_sub_directory": self.files_sub_directory,
"lint_all_files": self.lint_all_files,
"lint_all_other_linters_files": self.lint_all_other_linters_files,
Expand All @@ -851,7 +852,10 @@ def collect_files(self, all_files):
self.files = utils.filter_files(
all_files=all_files,
filter_regex_include=self.filter_regex_include,
filter_regex_exclude=self.filter_regex_exclude,
filter_regex_exclude=[
self.filter_regex_exclude_descriptor,
self.filter_regex_exclude_linter,
],
file_names_regex=self.file_names_regex,
file_extensions=self.file_extensions,
ignored_files=[],
Expand Down
2 changes: 1 addition & 1 deletion megalinter/MegaLinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ def collect_files(self):
filtered_files = utils.filter_files(
all_files=all_files,
filter_regex_include=self.filter_regex_include,
filter_regex_exclude=self.filter_regex_exclude,
filter_regex_exclude=[self.filter_regex_exclude],
file_names_regex=self.file_names_regex,
file_extensions=self.file_extensions,
ignored_files=ignored_files,
Expand Down
28 changes: 26 additions & 2 deletions megalinter/tests/test_megalinter/filters_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_filter_files_with_ignored_files(self):
filtered_files = utils.filter_files(
all_files=all_files,
filter_regex_include=None,
filter_regex_exclude=None,
filter_regex_exclude=[None],
file_names_regex=[],
file_extensions=["", ".md", ".ext"],
ignored_files=ignored_files,
Expand All @@ -95,7 +95,7 @@ def test_filter_files_with_file_extensions(self):
filtered_files = utils.filter_files(
all_files=all_files,
filter_regex_include=None,
filter_regex_exclude=None,
filter_regex_exclude=[],
file_names_regex=[],
file_extensions=file_extensions,
ignored_files=[],
Expand All @@ -104,3 +104,27 @@ def test_filter_files_with_file_extensions(self):
self.assertListEqual(
sorted(filtered_files), sorted(expected), f"check {file_extensions}"
)

def test_filter_regex_exclude_multilevel(self):
all_files = [
"should/be/excluded/descriptor-level/test.md",
"target/foo.md",
"should/be/excluded/descriptor-level/test2.md",
"should/be/excluded/linter-level/test.md",
"should/be/excluded/linter-level/test2.md",
"target/foo2.ext",
]
filtered_files = utils.filter_files(
all_files=all_files,
filter_regex_include=None,
filter_regex_exclude=["(descriptor-level)", "(linter-level)"],
file_names_regex=[],
file_extensions=[".md"],
ignored_files=[],
ignore_generated_files=False,
)
self.assertListEqual(
sorted(filtered_files),
sorted(["target/foo.md"]),
"check regex_exclude_multilevel",
)
24 changes: 16 additions & 8 deletions megalinter/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def get_excluded_directories(request_id):
def filter_files(
all_files: Sequence[str],
filter_regex_include: Optional[str],
filter_regex_exclude: Optional[str],
filter_regex_exclude: Sequence[str],
file_names_regex: Sequence[str],
file_extensions: Any,
ignored_files: Optional[Sequence[str]],
Expand All @@ -108,9 +108,12 @@ def filter_files(
filter_regex_include_object = (
re.compile(filter_regex_include) if filter_regex_include else None
)
filter_regex_exclude_object = (
re.compile(filter_regex_exclude) if filter_regex_exclude else None
)
filter_regex_exclude_objects = []
for filter_regex_exclude_item in filter_regex_exclude:
filter_regex_exclude_object = (
re.compile(filter_regex_exclude_item) if filter_regex_exclude_item else None
)
filter_regex_exclude_objects += [filter_regex_exclude_object]
file_names_regex_object = re.compile("|".join(file_names_regex))
filtered_files = []
file_contains_regex_object = (
Expand Down Expand Up @@ -152,10 +155,15 @@ def filter_files(
file_with_workspace
):
continue
# Skip according to FILTER_REGEX_EXCLUDE
if filter_regex_exclude_object and filter_regex_exclude_object.search(
file_with_workspace
):
# Skip according to FILTER_REGEX_EXCLUDE list
excluded_by_regex = False
for filter_regex_exclude_object in filter_regex_exclude_objects:
if filter_regex_exclude_object and filter_regex_exclude_object.search(
file_with_workspace
):
excluded_by_regex = True
break
if excluded_by_regex is True:
continue

# Skip according to file extension (only if lint_all_other_linter_files is false or file_extensions is defined)
Expand Down