From 63e74ad1eab40076f7462268c39bd9761a05c850 Mon Sep 17 00:00:00 2001 From: Nicolas Vuillamy Date: Fri, 19 May 2023 18:54:37 +0200 Subject: [PATCH] Simultaneous regex filtering at descriptor and linter levels (#2669) * Simultaneous regex filtering at decriptor and linter levels Fixes https://github.com/oxsecurity/megalinter/issues/2668 * Add test method * typo * [MegaLinter] Apply linters fixes --------- Co-authored-by: nvuillam --- CHANGELOG.md | 1 + docs/descriptors/xml_xmllint.md | 2 +- megalinter/Linter.py | 26 +++++++++-------- megalinter/MegaLinter.py | 2 +- .../tests/test_megalinter/filters_test.py | 28 +++++++++++++++++-- megalinter/utils.py | 24 ++++++++++------ 6 files changed, 60 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 707e7f4c362..de6c621ec8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ Note: Can be used with `oxsecurity/megalinter@beta` in your GitHub Action mega-l - Run stale workflow only on schedule, by @echoix in [#2641](https://github.com/oxsecurity/megalinter/pull/2641) - Add explicit permissions to stale workflow, by @echoix in [#2641](https://github.com/oxsecurity/megalinter/pull/2641) - Allow MEGALINTER_CONFIG to contain a full path to a MegaLinter config file + - Simultaneous regex filtering at descriptor and linter levels - Documentation - Apply many updates after the use of [Vale](https://vale.sh/) on MegaLinter own sources and docs diff --git a/docs/descriptors/xml_xmllint.md b/docs/descriptors/xml_xmllint.md index 2a0f09b98f8..3821a397df2 100644 --- a/docs/descriptors/xml_xmllint.md +++ b/docs/descriptors/xml_xmllint.md @@ -25,7 +25,7 @@ To apply file formatting you must set `XML_XMLLINT_CLI_LINT_MODE: file` and `XML | Variable | Description | Default value | |-----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------| | XML_XMLLINT_AUTOFORMAT | If set to `true`, it will reformat and reindent the output | `false` | -| XML_XMLLINT_INDENT | The number of indentation spaces when `XML_XMLLINT_AUTOFORMAT` is `true` | ` ` | +| XML_XMLLINT_INDENT | The number of indentation spaces when `XML_XMLLINT_AUTOFORMAT` is `true` | `` | | XML_XMLLINT_ARGUMENTS | User custom arguments to add in linter CLI call
Ex: `-s --foo "bar"` | | | XML_XMLLINT_FILTER_REGEX_INCLUDE | Custom regex including filter
Ex: `(src\|lib)` | Include every file | | XML_XMLLINT_FILTER_REGEX_EXCLUDE | Custom regex excluding filter
Ex: `(test\|examples)` | Exclude no file | diff --git a/megalinter/Linter.py b/megalinter/Linter.py index 28ee0d97f2a..d35c2c4c96a 100644 --- a/megalinter/Linter.py +++ b/megalinter/Linter.py @@ -294,7 +294,8 @@ def __init__(self, params=None, linter_config=None): self.ignore_file_label = None self.ignore_file_error = None self.filter_regex_include = None - self.filter_regex_exclude = None + self.filter_regex_exclude_descriptor = None + self.filter_regex_exclude_linter = None self.post_linter_status = ( params["post_linter_status"] if "post_linter_status" in params @@ -687,17 +688,16 @@ def load_config_vars(self, params): == "true" ): self.disable_errors = True - # Exclude regex: try first NAME + _FILTER_REGEX_EXCLUDE, then LANGUAGE + _FILTER_REGEX_EXCLUDE + # Exclude regex: descriptor level + if config.exists(self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE"): + self.filter_regex_exclude_descriptor = config.get( + self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE" + ) + # Exclude regex: linter level if config.exists(self.request_id, self.name + "_FILTER_REGEX_EXCLUDE"): - self.filter_regex_exclude = config.get( + self.filter_regex_exclude_linter = config.get( self.request_id, self.name + "_FILTER_REGEX_EXCLUDE" ) - elif config.exists( - self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE" - ): - self.filter_regex_exclude = config.get( - self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE" - ) # Override default docker image version if config.exists(self.request_id, self.name + "_DOCKER_IMAGE_VERSION"): self.cli_docker_image_version = config.get( @@ -832,7 +832,8 @@ def log_file_filters(self): log_object = { "name": self.name, "filter_regex_include": self.filter_regex_include, - "filter_regex_exclude": self.filter_regex_exclude, + "filter_regex_exclude_descriptor": self.filter_regex_exclude_descriptor, + "filter_regex_exclude_linter": self.filter_regex_exclude_linter, "files_sub_directory": self.files_sub_directory, "lint_all_files": self.lint_all_files, "lint_all_other_linters_files": self.lint_all_other_linters_files, @@ -851,7 +852,10 @@ def collect_files(self, all_files): self.files = utils.filter_files( all_files=all_files, filter_regex_include=self.filter_regex_include, - filter_regex_exclude=self.filter_regex_exclude, + filter_regex_exclude=[ + self.filter_regex_exclude_descriptor, + self.filter_regex_exclude_linter, + ], file_names_regex=self.file_names_regex, file_extensions=self.file_extensions, ignored_files=[], diff --git a/megalinter/MegaLinter.py b/megalinter/MegaLinter.py index 524496d1bf5..43e8f8e9b80 100644 --- a/megalinter/MegaLinter.py +++ b/megalinter/MegaLinter.py @@ -703,7 +703,7 @@ def collect_files(self): filtered_files = utils.filter_files( all_files=all_files, filter_regex_include=self.filter_regex_include, - filter_regex_exclude=self.filter_regex_exclude, + filter_regex_exclude=[self.filter_regex_exclude], file_names_regex=self.file_names_regex, file_extensions=self.file_extensions, ignored_files=ignored_files, diff --git a/megalinter/tests/test_megalinter/filters_test.py b/megalinter/tests/test_megalinter/filters_test.py index ed7f05e7112..845519cd033 100644 --- a/megalinter/tests/test_megalinter/filters_test.py +++ b/megalinter/tests/test_megalinter/filters_test.py @@ -68,7 +68,7 @@ def test_filter_files_with_ignored_files(self): filtered_files = utils.filter_files( all_files=all_files, filter_regex_include=None, - filter_regex_exclude=None, + filter_regex_exclude=[None], file_names_regex=[], file_extensions=["", ".md", ".ext"], ignored_files=ignored_files, @@ -95,7 +95,7 @@ def test_filter_files_with_file_extensions(self): filtered_files = utils.filter_files( all_files=all_files, filter_regex_include=None, - filter_regex_exclude=None, + filter_regex_exclude=[], file_names_regex=[], file_extensions=file_extensions, ignored_files=[], @@ -104,3 +104,27 @@ def test_filter_files_with_file_extensions(self): self.assertListEqual( sorted(filtered_files), sorted(expected), f"check {file_extensions}" ) + + def test_filter_regex_exclude_multilevel(self): + all_files = [ + "should/be/excluded/descriptor-level/test.md", + "target/foo.md", + "should/be/excluded/descriptor-level/test2.md", + "should/be/excluded/linter-level/test.md", + "should/be/excluded/linter-level/test2.md", + "target/foo2.ext", + ] + filtered_files = utils.filter_files( + all_files=all_files, + filter_regex_include=None, + filter_regex_exclude=["(descriptor-level)", "(linter-level)"], + file_names_regex=[], + file_extensions=[".md"], + ignored_files=[], + ignore_generated_files=False, + ) + self.assertListEqual( + sorted(filtered_files), + sorted(["target/foo.md"]), + "check regex_exclude_multilevel", + ) diff --git a/megalinter/utils.py b/megalinter/utils.py index 5f538a192e0..665f1f49d62 100644 --- a/megalinter/utils.py +++ b/megalinter/utils.py @@ -92,7 +92,7 @@ def get_excluded_directories(request_id): def filter_files( all_files: Sequence[str], filter_regex_include: Optional[str], - filter_regex_exclude: Optional[str], + filter_regex_exclude: Sequence[str], file_names_regex: Sequence[str], file_extensions: Any, ignored_files: Optional[Sequence[str]], @@ -108,9 +108,12 @@ def filter_files( filter_regex_include_object = ( re.compile(filter_regex_include) if filter_regex_include else None ) - filter_regex_exclude_object = ( - re.compile(filter_regex_exclude) if filter_regex_exclude else None - ) + filter_regex_exclude_objects = [] + for filter_regex_exclude_item in filter_regex_exclude: + filter_regex_exclude_object = ( + re.compile(filter_regex_exclude_item) if filter_regex_exclude_item else None + ) + filter_regex_exclude_objects += [filter_regex_exclude_object] file_names_regex_object = re.compile("|".join(file_names_regex)) filtered_files = [] file_contains_regex_object = ( @@ -152,10 +155,15 @@ def filter_files( file_with_workspace ): continue - # Skip according to FILTER_REGEX_EXCLUDE - if filter_regex_exclude_object and filter_regex_exclude_object.search( - file_with_workspace - ): + # Skip according to FILTER_REGEX_EXCLUDE list + excluded_by_regex = False + for filter_regex_exclude_object in filter_regex_exclude_objects: + if filter_regex_exclude_object and filter_regex_exclude_object.search( + file_with_workspace + ): + excluded_by_regex = True + break + if excluded_by_regex is True: continue # Skip according to file extension (only if lint_all_other_linter_files is false or file_extensions is defined)