Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented general filtering (replaces blame filtering) #28

Merged
merged 6 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 10 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,13 @@
*.pyc
*.orig
/.venv
/.vscode
/.vscode
/.idea
/.pytest_cache
.DS_Store
*.sarif
*.json
*.csv
.coverage
coverage.xml
*filter.yaml
231 changes: 130 additions & 101 deletions README.md

Large diffs are not rendered by default.

185 changes: 184 additions & 1 deletion poetry.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@ jinja2 = "^3.1.2"
python = "^3.8"
python-docx = "^0.8.11"
matplotlib = "^3.5.1"
pyyaml = "^6.0.1"
jsonpath-ng = "^1.6.0"

[tool.poetry.dev-dependencies]
black = "^22.3.0"
pylint = "^2.13.8"
pytest = "^5.2"
pytest-cov = "^4.1.0"

[tool.poetry.scripts]
sarif = "sarif.cmdline.main:main"
Expand Down
103 changes: 29 additions & 74 deletions sarif/cmdline/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import sys

from sarif import loader, sarif_file
from sarif.filter.general_filter import load_filter_file

from sarif.operations import (
blame_op,
Expand Down Expand Up @@ -92,13 +93,23 @@ def _create_arg_parser():
"--output", "-o", type=str, metavar="FILE", help="Output file"
)

for cmd in ["codeclimate", "copy", "csv", "diff", "summary", "html", "emacs", "trend", "word"]:
for cmd in [
"codeclimate",
"copy",
"csv",
"diff",
"summary",
"html",
"emacs",
"trend",
"word",
]:
subparser[cmd].add_argument(
"--blame-filter",
"--filter",
"-b",
type=str,
metavar="FILE",
help="Specify the blame filter file to apply. See README for format.",
help="Specify the filter file to apply. See README for format.",
)

# Command-specific options
Expand Down Expand Up @@ -204,66 +215,10 @@ def _check(input_files: sarif_file.SarifFileSet, check_level):
return ret


def _load_blame_filter_file(file_path):
filter_description = os.path.basename(file_path)
include_substrings = []
include_regexps = []
exclude_substrings = []
exclude_regexps = []
try:
with open(file_path, encoding="utf-8") as file_in:
for line in file_in.readlines():
if line.startswith("\ufeff"):
# Strip byte order mark
line = line[1:]
lstrip = line.strip()
if lstrip.startswith("#"):
# Ignore comment lines
continue
pattern_spec = None
is_include = True
if lstrip.startswith("description:"):
filter_description = lstrip[12:].strip()
elif lstrip.startswith("+: "):
is_include = True
pattern_spec = lstrip[3:].strip()
elif lstrip.startswith("-: "):
is_include = False
pattern_spec = lstrip[3:].strip()
else:
is_include = True
pattern_spec = lstrip
if pattern_spec:
pattern_spec_len = len(pattern_spec)
if (
pattern_spec_len > 2
and pattern_spec.startswith("/")
and pattern_spec.endswith("/")
):
(include_regexps if is_include else exclude_regexps).append(
pattern_spec[1 : pattern_spec_len - 1]
)
else:
(
include_substrings if is_include else exclude_substrings
).append(pattern_spec)
except UnicodeDecodeError as error:
raise IOError(
f"Cannot read blame filter file {file_path}: not UTF-8 encoded?"
) from error
return (
filter_description,
include_substrings,
include_regexps,
exclude_substrings,
exclude_regexps,
)


def _init_blame_filtering(input_files, args):
if args.blame_filter:
filters = _load_blame_filter_file(args.blame_filter)
input_files.init_blame_filter(*filters)
def _init_filtering(input_files, args):
if args.filter:
filters = load_filter_file(args.filter)
input_files.init_general_filter(*filters)


def _init_path_prefix_stripping(input_files, args, strip_by_default):
Expand Down Expand Up @@ -337,15 +292,15 @@ def _codeclimate(args):
input_files = loader.load_sarif_files(*args.files_or_dirs)
input_files.init_default_line_number_1()
_init_path_prefix_stripping(input_files, args, strip_by_default=False)
_init_blame_filtering(input_files, args)
_init_filtering(input_files, args)
(output, multiple_file_output) = _prepare_output(input_files, args.output, ".json")
codeclimate_op.generate(input_files, output, multiple_file_output)
return _check(input_files, args.check)


def _copy(args):
input_files = loader.load_sarif_files(*args.files_or_dirs)
_init_blame_filtering(input_files, args)
_init_filtering(input_files, args)
output = args.output or "out.sarif"
output_sarif_file_set = copy_op.generate_sarif(
input_files,
Expand All @@ -361,7 +316,7 @@ def _csv(args):
input_files = loader.load_sarif_files(*args.files_or_dirs)
input_files.init_default_line_number_1()
_init_path_prefix_stripping(input_files, args, strip_by_default=False)
_init_blame_filtering(input_files, args)
_init_filtering(input_files, args)
(output, multiple_file_output) = _prepare_output(input_files, args.output, ".csv")
csv_op.generate_csv(input_files, output, multiple_file_output)
return _check(input_files, args.check)
Expand All @@ -370,16 +325,16 @@ def _csv(args):
def _diff(args):
original_sarif = loader.load_sarif_files(args.old_file_or_dir[0])
new_sarif = loader.load_sarif_files(args.new_file_or_dir[0])
_init_blame_filtering(original_sarif, args)
_init_blame_filtering(new_sarif, args)
_init_filtering(original_sarif, args)
_init_filtering(new_sarif, args)
return diff_op.print_diff(original_sarif, new_sarif, args.output, args.check)


def _html(args):
input_files = loader.load_sarif_files(*args.files_or_dirs)
input_files.init_default_line_number_1()
_init_path_prefix_stripping(input_files, args, strip_by_default=True)
_init_blame_filtering(input_files, args)
_init_filtering(input_files, args)
(output, multiple_file_output) = _prepare_output(input_files, args.output, ".html")
html_op.generate_html(input_files, args.image, output, multiple_file_output)
return _check(input_files, args.check)
Expand All @@ -389,7 +344,7 @@ def _emacs(args):
input_files = loader.load_sarif_files(*args.files_or_dirs)
input_files.init_default_line_number_1()
_init_path_prefix_stripping(input_files, args, strip_by_default=True)
_init_blame_filtering(input_files, args)
_init_filtering(input_files, args)
(output, multiple_file_output) = _prepare_output(input_files, args.output, ".txt")
emacs_op.generate_compile(input_files, output, multiple_file_output)
return _check(input_files, args.check)
Expand All @@ -413,7 +368,7 @@ def _ls(args):

def _summary(args):
input_files = loader.load_sarif_files(*args.files_or_dirs)
_init_blame_filtering(input_files, args)
_init_filtering(input_files, args)
(output, multiple_file_output) = (None, False)
if args.output:
(output, multiple_file_output) = _prepare_output(
Expand All @@ -426,7 +381,7 @@ def _summary(args):
def _trend(args):
input_files = loader.load_sarif_files(*args.files_or_dirs)
input_files.init_default_line_number_1()
_init_blame_filtering(input_files, args)
_init_filtering(input_files, args)
if args.output:
_ensure_dir(os.path.dirname(args.output))
output = args.output
Expand All @@ -453,7 +408,7 @@ def _word(args):
input_files = loader.load_sarif_files(*args.files_or_dirs)
input_files.init_default_line_number_1()
_init_path_prefix_stripping(input_files, args, strip_by_default=True)
_init_blame_filtering(input_files, args)
_init_filtering(input_files, args)
(output, multiple_file_output) = _prepare_output(input_files, args.output, ".docx")
word_op.generate_word_docs_from_sarif_inputs(
input_files, args.image, output, multiple_file_output
Expand All @@ -469,7 +424,7 @@ def _word(args):
"codeclimate": {
"fn": _codeclimate,
"desc": "Write a JSON representation in Code Climate format of SARIF file(s) "
"for viewing as a Code Quality report in GitLab UI",
"for viewing as a Code Quality report in GitLab UI",
},
"copy": {
"fn": _copy,
Expand Down
Empty file added sarif/filter/__init__.py
Empty file.
95 changes: 95 additions & 0 deletions sarif/filter/filter_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import datetime


class FilterStats:
"""
Statistics that record the outcome of a a filter.
"""

def __init__(self, filter_description):
self.filter_description = filter_description
# Filter stats can also be loaded from a file created by `sarif copy`.
self.rehydrated = False
self.filter_datetime = None
self.filtered_in_result_count = 0
self.filtered_out_result_count = 0
self.missing_property_count = 0

def reset_counters(self):
"""
Zero all the counters.
"""
self.filter_datetime = datetime.datetime.now()
self.filtered_in_result_count = 0
self.filtered_out_result_count = 0
self.missing_property_count = 0

def add(self, other_filter_stats):
"""
Add another set of filter stats to my totals.
"""
if other_filter_stats:
if other_filter_stats.filter_description and (
other_filter_stats.filter_description != self.filter_description
):
self.filter_description += f", {other_filter_stats.filter_description}"
self.filtered_in_result_count += other_filter_stats.filtered_in_result_count
self.filtered_out_result_count += (
other_filter_stats.filtered_out_result_count
)
self.missing_property_count += other_filter_stats.missing_property_count

def __str__(self):
"""
Automatic to_string()
"""
return self.to_string()

def to_string(self):
"""
Generate a summary string for these filter stats.
"""
ret = f"'{self.filter_description}'"
if self.filter_datetime:
ret += " at "
ret += self.filter_datetime.strftime("%c")
ret += (
f": {self.filtered_out_result_count} filtered out, "
f"{self.filtered_in_result_count} passed the filter"
)
if self.missing_property_count:
ret += (
f", {self.missing_property_count} included by default "
"for lacking data to filter"
)

return ret

def to_json_camel_case(self):
abyss638 marked this conversation as resolved.
Show resolved Hide resolved
"""
Generate filter stats as JSON using camelCase naming,
to fit with SARIF standard section 3.8.1 (Property Bags).
"""
return {
"filter": self.filter_description,
"in": self.filtered_in_result_count,
"out": self.filtered_out_result_count,
"default": {
"noProperty": self.missing_property_count,
},
}


def load_filter_stats_from_json(json_data):
"""
Load filter stats from a SARIF file property bag using camelCase naming
as per SARIF standard section 3.8.1 (Property Bags).
"""
ret = None
if json_data:
ret = FilterStats(json_data["filter"])
ret.rehydrated = True
ret.filtered_in_result_count = json_data.get("in", 0)
ret.filtered_out_result_count = json_data.get("out", 0)
ret.missing_property_count = json_data.get("default", {}).get("noProperty", 0)
return ret