Skip to content

Commit

Permalink
Implemented default-include configuration option
Browse files Browse the repository at this point in the history
  • Loading branch information
abyss638 committed Oct 3, 2023
1 parent ac44bb7 commit 221f528
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 98 deletions.
22 changes: 16 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -624,22 +624,32 @@ The data in each `result` object can then be used for filtering via the `--filte
# Optional description for the filter. If no title is specified, the filter file name is used.
description: Example filter from README.md

# Optional configuration section to override default values.
configuration:
# This option controls whether to include results where a property to check is missing, default value is true.
default-include: false

# Items in `include` list are interpreted as inclusion filtering rules.
# Items are treated with OR operator, the filtered results includes objects matching any rule.
# Each item can be one rule or a list of rules, in the latter case rules in the list are treated with AND operator - all rules must match.
include:
# The following line includes issues whose author-mail field contains "@microsoft.com" AND found in Java files.
# The following line includes issues whose author-mail property contains "@microsoft.com" AND found in Java files.
# Values with special characters `\:;_()$%^@,` must be enclosed in quotes (single or double):
- author-mail: "@microsoft.com"
locations[*].physicalLocation.artifactLocation.uri: "*.java"
# Instead of a substring, a regular expression can be used, enclosed in "/" characters. Issues whose committer-mail field includes a string matching the regular expression are included. Use ^ and $ to match the whole committer-mail field.
- committer-mail: "/^<myname.*\\.com>$/"
# Instead of a substring, a regular expression can be used, enclosed in "/" characters.
# Issues whose committer-mail property includes a string matching the regular expression are included.
# Use ^ and $ to match the whole committer-mail property.
- committer-mail:
value: "/^<myname.*\\.com>$/"
# Configuration options can be overriden for any rule.
default-include: true

# Lines under `exclude` are interpreted as exclusion filtering rules.
exclude:
# The following line excludes issues whose location is in test Java files with names starting with the "Test" prefix.
- location: "Test*.java"
# The value for the field can be empty, in this case only existence of the field in
# The value for the property can be empty, in this case only existence of the property is checked.
- suppression:
```

Expand All @@ -658,7 +668,7 @@ exclude:
Field names must be specified in [JSONPath notation](https://goessner.net/articles/JsonPath/)
accessing data in the [SARIF `result` object](https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/sarif-v2.1.0-cs01.html#_Toc16012594).

For commonly used fields the following shortcuts are defined:
For commonly used properties the following shortcuts are defined:
| Shortcut | Full JSONPath |
| -------- | -------- |
| author | properties.blame.author |
Expand All @@ -669,7 +679,7 @@ For commonly used fields the following shortcuts are defined:
| rule | ruleId |
| suppression | suppressions[*].kind |

For the field `uri` (e.g. in `locations[*].physicalLocation.artifactLocation.uri`) file name wildcard characters can be used as it represents a file location:
For the property `uri` (e.g. in `locations[*].physicalLocation.artifactLocation.uri`) file name wildcard characters can be used as it represents a file location:
- `?` - a single occurrence of any character in a directory or file name
- `*` - zero or more occurrences of any character in a directory or file name
- `**` - zero or more occurrences across multiple directory levels
Expand Down
13 changes: 13 additions & 0 deletions sarif/filter/filter_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def __init__(self, filter_description):
self.filter_datetime = None
self.filtered_in_result_count = 0
self.filtered_out_result_count = 0
self.missing_property_count = 0

def reset_counters(self):
"""
Expand All @@ -21,6 +22,7 @@ def reset_counters(self):
self.filter_datetime = datetime.datetime.now()
self.filtered_in_result_count = 0
self.filtered_out_result_count = 0
self.missing_property_count = 0

def add(self, other_filter_stats):
"""
Expand All @@ -35,6 +37,7 @@ def add(self, other_filter_stats):
self.filtered_out_result_count += (
other_filter_stats.filtered_out_result_count
)
self.missing_property_count += other_filter_stats.missing_property_count

def __str__(self):
"""
Expand All @@ -54,6 +57,12 @@ def to_string(self):
f": {self.filtered_out_result_count} filtered out, "
f"{self.filtered_in_result_count} passed the filter"
)
if self.missing_property_count:
ret += (
f", {self.missing_property_count} included by default "
"for lacking data to filter"
)

return ret

def to_json_camel_case(self):
Expand All @@ -65,6 +74,9 @@ def to_json_camel_case(self):
"filter": self.filter_description,
"in": self.filtered_in_result_count,
"out": self.filtered_out_result_count,
"default": {
"noProperty": self.missing_property_count,
},
}


Expand All @@ -79,4 +91,5 @@ def load_filter_stats_from_json(json_data):
ret.rehydrated = True
ret.filtered_in_result_count = json_data.get("in", 0)
ret.filtered_out_result_count = json_data.get("out", 0)
ret.missing_property_count = json_data.get("default", {}).get("noProperty", 0)
return ret
97 changes: 70 additions & 27 deletions sarif/filter/general_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import re
from typing import Optional, List

import copy
import jsonpath_ng.ext
import yaml

from sarif.filter.filter_stats import FilterStats, load_filter_stats_from_json

# Commonly used fields can be specified using shortcuts
# Commonly used properties can be specified using shortcuts
# instead of full JSON path
FILTER_SHORTCUTS = {
"author": "properties.blame.author",
Expand All @@ -19,10 +20,15 @@
"suppression": "suppressions[*].kind",
}

# Some fields can have specific shortcuts to make it easier to write filters
# Some properties can have specific shortcuts to make it easier to write filters
# For example a file location can be specified using wildcards
FIELDS_REGEX_SHORTCUTS = {"uri": {"**": ".*", "*": "[^/]*", "?": "."}}

# Default configuration for all filters
DEFAULT_CONFIGURATION = {
"default-include": True,
}


def get_filter_function(filter_spec):
if filter_spec:
Expand All @@ -38,22 +44,22 @@ def get_filter_function(filter_spec):
return lambda value: True


def _convert_glob_to_regex(field_name, field_value_spec):
# skip if field_value_spec is a regex
if field_value_spec and not (
field_value_spec.startswith("/") and field_value_spec.endswith("/")
def _convert_glob_to_regex(property_name, property_value_spec):
# skip if property_value_spec is a regex
if property_value_spec and not (
property_value_spec.startswith("/") and property_value_spec.endswith("/")
):
# get last component of field name
last_component = field_name.split(".")[-1]
# get last component of property name
last_component = property_name.split(".")[-1]
if last_component in FIELDS_REGEX_SHORTCUTS:
shortcuts = FIELDS_REGEX_SHORTCUTS[last_component]
rx = re.compile("|".join(map(re.escape, shortcuts.keys())))
field_value_spec = rx.sub(
lambda match: shortcuts[match.group(0)], field_value_spec
property_value_spec = rx.sub(
lambda match: shortcuts[match.group(0)], property_value_spec
)

return f"/{field_value_spec}/"
return field_value_spec
return f"/{property_value_spec}/"
return property_value_spec


class GeneralFilter:
Expand All @@ -67,8 +73,11 @@ def __init__(self):
self.apply_inclusion_filter = False
self.exclude_filters = {}
self.apply_exclusion_filter = False
self.configuration = DEFAULT_CONFIGURATION

def init_filter(self, filter_description, include_filters, exclude_filters):
def init_filter(
self, filter_description, configuration, include_filters, exclude_filters
):
"""
Initialise the filter with the given filter patterns.
"""
Expand All @@ -77,6 +86,7 @@ def init_filter(self, filter_description, include_filters, exclude_filters):
self.apply_inclusion_filter = len(include_filters) > 0
self.exclude_filters = exclude_filters
self.apply_exclusion_filter = len(exclude_filters) > 0
self.configuration.update(configuration)

def rehydrate_filter_stats(self, dehydrated_filter_stats, filter_datetime):
"""
Expand All @@ -97,29 +107,30 @@ def _filter_append(self, filtered_results: List[dict], result: dict):
# Remove any existing filter log on the result
result.setdefault("properties", {}).pop("filtered", None)

matched_include_filters = []
included_stats = None
if self.apply_inclusion_filter:
matched_include_filters = self._filter_result(result, self.include_filters)
if not matched_include_filters:
included_stats = self._filter_result(result, self.include_filters)
if not included_stats["matchedFilter"]:
return

if self.apply_exclusion_filter:
if self._filter_result(result, self.exclude_filters):
excluded_stats = self._filter_result(result, self.exclude_filters)
if excluded_stats["matchedFilter"]:
self.filter_stats.filtered_out_result_count += 1
return

included = {
"state": "included",
"matchedFilter": matched_include_filters,
}
self.filter_stats.filtered_in_result_count += 1
included["filter"] = self.filter_stats.filter_description
result["properties"]["filtered"] = included
if included_stats["state"] == "included":
self.filter_stats.filtered_in_result_count += 1
else:
self.filter_stats.missing_property_count += 1
included_stats["filter"] = self.filter_stats.filter_description
result["properties"]["filtered"] = included_stats

filtered_results.append(result)

def _filter_result(self, result: dict, filters: List[str]) -> List[dict]:
def _filter_result(self, result: dict, filters: List[str]) -> dict:
matched_filters = []
warnings = []
if filters:
# filters contains rules which treated as OR.
# if any rule matches, the record is selected.
Expand All @@ -130,6 +141,14 @@ def _filter_result(self, result: dict, filters: List[str]) -> List[dict]:
for prop_path, prop_value_spec in filter_spec.items():
resolved_prop_path = FILTER_SHORTCUTS.get(prop_path, prop_path)
jsonpath_expr = jsonpath_ng.ext.parse(resolved_prop_path)
filter_configuration = self.configuration

# if prop_value_spec is a dict, update filter configuration from it
if isinstance(prop_value_spec, dict):
filter_configuration = copy.deepcopy(self.configuration)
filter_configuration.update(prop_value_spec)
# actual value for the filter is in "value" key
prop_value_spec = prop_value_spec.get("value", None)

found_results = jsonpath_expr.find(result)
if found_results:
Expand All @@ -140,11 +159,34 @@ def _filter_result(self, result: dict, filters: List[str]) -> List[dict]:
filter_function = get_filter_function(value_spec)
if filter_function(value):
continue
else:
# property to filter on is not found.
# if "default-include" is true, include the "result" with a warning.
if filter_configuration.get("default-include", True):
warnings.append(
f"Field '{prop_path}' is missing but the result included as default-include is true"
)
continue
matched = False
break
if matched:
matched_filters.append(filter_spec)
return matched_filters
break

stats = {
"state": "included",
"matchedFilter": matched_filters,
}

if warnings:
stats.update(
{
"state": "default",
"warnings": warnings,
}
)

return stats

def filter_results(self, results: List[dict]) -> List[dict]:
"""
Expand Down Expand Up @@ -177,8 +219,9 @@ def load_filter_file(file_path):
with open(file_path, encoding="utf-8") as file_in:
yaml_content = yaml.safe_load(file_in)
filter_description = yaml_content.get("description", file_name)
configuration = yaml_content.get("configuration", {})
include_filters = yaml_content.get("include", {})
exclude_filters = yaml_content.get("exclude", {})
except yaml.YAMLError as error:
raise IOError(f"Cannot read filter file {file_path}") from error
return filter_description, include_filters, exclude_filters
return filter_description, configuration, include_filters, exclude_filters
28 changes: 18 additions & 10 deletions sarif/sarif_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,15 +204,19 @@ def init_default_line_number_1(self):
self._default_line_number = "1"
self._cached_records = None

def init_general_filter(self, filter_description, include_filters, exclude_filters):
def init_general_filter(
self, filter_description, configuration, include_filters, exclude_filters
):
"""
Set up general filtering. This is applied to all fields in results array in each SARIF file.
Set up general filtering. This is applied to all properties in results array in each SARIF file.
If only inclusion criteria are provided, only issues matching the inclusion criteria are considered.
If only exclusion criteria are provided, only issues not matching the exclusion criteria are considered.
If both are provided, only issues matching the inclusion criteria and not matching the
exclusion criteria are considered.
"""
self._filter.init_filter(filter_description, include_filters, exclude_filters)
self._filter.init_filter(
filter_description, configuration, include_filters, exclude_filters
)
# Clear the unfiltered records cached by get_records() above.
self._cached_records = None

Expand Down Expand Up @@ -420,17 +424,19 @@ def init_default_line_number_1(self):
for run in self.runs:
run.init_default_line_number_1()

def init_general_filter(self, filter_description, include_filters, exclude_filters):
def init_general_filter(
self, filter_description, configuration, include_filters, exclude_filters
):
"""
Set up general filtering. This is applied to all fields in results array in each SARIF file.
Set up general filtering. This is applied to all properties in results array in each SARIF file.
If only inclusion criteria are provided, only issues matching the inclusion criteria are considered.
If only exclusion criteria are provided, only issues not matching the exclusion criteria are considered.
If both are provided, only issues matching the inclusion criteria and not matching the
exclusion criteria are considered.
"""
for run in self.runs:
run.init_general_filter(
filter_description, include_filters, exclude_filters
filter_description, configuration, include_filters, exclude_filters
)

def get_abs_file_path(self) -> str:
Expand Down Expand Up @@ -619,21 +625,23 @@ def init_default_line_number_1(self):
for input_file in self.files:
input_file.init_default_line_number_1()

def init_general_filter(self, filter_description, include_filters, exclude_filters):
def init_general_filter(
self, filter_description, configuration, include_filters, exclude_filters
):
"""
Set up general filtering. This is applied to all fields in results array in each SARIF file.
Set up general filtering. This is applied to all properties in results array in each SARIF file.
If only inclusion criteria are provided, only issues matching the inclusion criteria are considered.
If only exclusion criteria are provided, only issues not matching the exclusion criteria are considered.
If both are provided, only issues matching the inclusion criteria and not matching the
exclusion criteria are considered.
"""
for subdir in self.subdirs:
subdir.init_general_filter(
filter_description, include_filters, exclude_filters
filter_description, configuration, include_filters, exclude_filters
)
for input_file in self.files:
input_file.init_general_filter(
filter_description, include_filters, exclude_filters
filter_description, configuration, include_filters, exclude_filters
)

def add_dir(self, sarif_file_set):
Expand Down

0 comments on commit 221f528

Please sign in to comment.