From ebf822cff92a83700dd5ced1fb8cb683a9f5a070 Mon Sep 17 00:00:00 2001 From: Xavier Morel Date: Tue, 25 Nov 2025 07:32:38 +0100 Subject: [PATCH] SARIF reporter SARIF is a unified file format used to exchange information between static analysis tools (like pylint) and various types of formatters, meta-runners, broadcasters / alert system, ... This implementation is ad-hoc, and non-validating. Spec v Github ------------- Turns out Github both doesn't implement all of SARIF (which makes sense) and requires a bunch of properties which the spec considers optional. The [official SARIF validator][] (linked to by both oasis and github) was used to validate the output of the reporter, ensuring that all the github requirements it flags are fulfilled, and fixing *some* of the validator's pet issues. As of now the following issues are left unaddressed: - azure requires `run.automationDetails`, looking at the spec I don't think it makes sense for the reporter to inject that, it's more up to the CI - the validator wants a `run.versionControlProvenance`, same as above - the validator wants rule names in PascalCase, lol - the validator wants templated result messages, but without pylint providing the args as part of the `Message` that's a bit of a chore - the validator wants `region` to include a snippet (the flagged content) - the validator wants `physicalLocation` to have a `contextRegion` (most likely with a snippet) On URIs ------- The reporter makes use of URIs for artifacts (~files). Per ["guidance on the use of artifactLocation objects"][3.4.7], `uri` *should* capture the deterministic part of the artifact location and `uriBaseId` *should* capture the non-deterministic part. However as far as I can tell pylint has no requirement (and no clean way to require) consistent resolution roots: `path` is just relative to the cwd, and there is no requirement to have project-level files to use pylint. This makes the use of relative uris dodgy, but absolute uris are pretty much always broken for the purpose of *interchange* so they're not really any better. As a side-note, Github [asserts][relative-uri-guidance] > While this [nb: `originalUriBaseIds`] is not required by GitHub for > the code scanning results to be displayed correctly, it is required > to produce a valid SARIF output when using relative URI references. However per [3.4.4][] this is incorrect, the `uriBaseId` can be resolved through end-user configuration, `originalUriBaseIds`, external information (e.g. envvars), or heuristics. It would be nice to document the "relative root" via `originalUriBaseIds` (which may be omitted for that purpose per [3.14.14][], but per the above claiming a consistent project root is dodgy. We *could* resolve known project files (e.g. pyproject.toml, tox.ini, etc...) in order to find a consistent root (project root, repo root, ...) and set / use that for relative URIs but that's a lot of additional complexity which I'm not sure is warranted at least for a first version. Fixes #5493 [3.4.4]: https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10540869 [3.4.7]: https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10540872 [3.14.14]: https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html#_Toc10540936 [relative-uri-guidance]: https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning#relative-uri-guidance-for-sarif-producers [official SARIF validator]: https://sarifweb.azurewebsites.net/ --- doc/whatsnew/fragments/5493.feature | 4 + pylint/lint/base_options.py | 3 +- pylint/reporters/__init__.py | 2 + pylint/reporters/sarif_reporter.py | 277 +++++++++++++++++++++ tests/lint/unittest_expand_modules.py | 8 + tests/reporters/unittest_sarif_reporter.py | 117 +++++++++ 6 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 doc/whatsnew/fragments/5493.feature create mode 100644 pylint/reporters/sarif_reporter.py create mode 100644 tests/reporters/unittest_sarif_reporter.py diff --git a/doc/whatsnew/fragments/5493.feature b/doc/whatsnew/fragments/5493.feature new file mode 100644 index 0000000000..38614365c6 --- /dev/null +++ b/doc/whatsnew/fragments/5493.feature @@ -0,0 +1,4 @@ +Support for SARIF as an output format. + +Closes #5493 +Closes #10647 diff --git a/pylint/lint/base_options.py b/pylint/lint/base_options.py index 4ec84d2e9e..75abb32da0 100644 --- a/pylint/lint/base_options.py +++ b/pylint/lint/base_options.py @@ -104,7 +104,8 @@ def _make_linter_options(linter: PyLinter) -> Options: "group": "Reports", "help": "Set the output format. Available formats are: 'text', " "'parseable', 'colorized', 'json2' (improved json format), 'json' " - "(old json format), msvs (visual studio) and 'github' (GitHub actions). " + "(old json format), msvs (visual studio), 'github' (GitHub actions), " + "and 'sarif'. " "You can also give a reporter class, e.g. mypackage.mymodule." "MyReporterClass.", "kwargs": {"linter": linter}, diff --git a/pylint/reporters/__init__.py b/pylint/reporters/__init__.py index c24ea4be88..b357fe2015 100644 --- a/pylint/reporters/__init__.py +++ b/pylint/reporters/__init__.py @@ -14,6 +14,7 @@ from pylint.reporters.json_reporter import JSON2Reporter, JSONReporter from pylint.reporters.multi_reporter import MultiReporter from pylint.reporters.reports_handler_mix_in import ReportsHandlerMixIn +from pylint.reporters.sarif_reporter import SARIFReporter if TYPE_CHECKING: from pylint.lint.pylinter import PyLinter @@ -31,4 +32,5 @@ def initialize(linter: PyLinter) -> None: "JSONReporter", "MultiReporter", "ReportsHandlerMixIn", + "SARIFReporter", ] diff --git a/pylint/reporters/sarif_reporter.py b/pylint/reporters/sarif_reporter.py new file mode 100644 index 0000000000..629996b287 --- /dev/null +++ b/pylint/reporters/sarif_reporter.py @@ -0,0 +1,277 @@ +# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html +# For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE +# Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt + +# pylint: disable=wrong-spelling-in-comment,wrong-spelling-in-docstring + +from __future__ import annotations + +import json +import os.path +from textwrap import shorten +from typing import TYPE_CHECKING, Literal, TypedDict +from urllib.parse import quote + +import pylint +import pylint.message +from pylint.constants import MSG_TYPES +from pylint.reporters import BaseReporter + +if TYPE_CHECKING: + from pylint.lint import PyLinter + from pylint.reporters.ureports.nodes import Section + + +def register(linter: PyLinter) -> None: + linter.register_reporter(SARIFReporter) + + +class SARIFReporter(BaseReporter): + name = "sarif" + extension = "sarif" + linter: PyLinter + + def display_reports(self, layout: Section) -> None: + """Don't do anything in this reporter.""" + + def _display(self, layout: Section) -> None: + """Do nothing.""" + + def display_messages(self, layout: Section | None) -> None: + """Launch layouts display.""" + output: Log = { + "version": "2.1.0", + "$schema": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/schemas/sarif-schema-2.1.0.json", + "runs": [ + { + "tool": { + "driver": { + "name": "pylint", + "fullName": f"pylint {pylint.__version__}", + "version": pylint.__version__, + # should be versioned but not all versions are kept so... + "informationUri": "https://pylint.readthedocs.io/", + "rules": [ + { + "id": m.msgid, + "name": m.symbol, + "deprecatedIds": [ + msgid for msgid, _ in m.old_names + ], + "deprecatedNames": [ + name for _, name in m.old_names + ], + # per 3.19.19 shortDescription should be a + # single sentence which can't be guaranteed, + # however github requires it... + "shortDescription": { + "text": m.description.split(".", 1)[0] + }, + # github requires that this is less than 1024 characters + "fullDescription": { + "text": shorten( + m.description, 1024, placeholder="..." + ) + }, + "help": {"text": m.format_help()}, + "helpUri": f"https://pylint.readthedocs.io/en/stable/user_guide/messages/{MSG_TYPES[m.msgid[0]]}/{m.symbol}.html", + # handle_message only gets the formatted message, + # so to use `messageStrings` we'd need to + # convert the templating and extract the args + # out of the msg + } + for checker in self.linter.get_checkers() + for m in checker.messages + if m.symbol in self.linter.stats.by_msg + ], + } + }, + "results": [self.serialize(message) for message in self.messages], + } + ], + } + json.dump(output, self.out) + + @staticmethod + def serialize(message: pylint.message.Message) -> Result: + region: Region = { + "startLine": message.line, + "startColumn": message.column + 1, + "endLine": message.end_line or message.line, + "endColumn": (message.end_column or message.column) + 1, + } + + location: Location = { + "physicalLocation": { + "artifactLocation": { + "uri": path_to_uri(message.path), + }, + "region": region, + }, + } + if message.obj: + logical_location: LogicalLocation = { + "name": message.obj, + "fullyQualifiedName": f"{message.module}.{message.obj}", + } + location["logicalLocations"] = [logical_location] + + return { + "ruleId": message.msg_id, + "message": {"text": message.msg}, + "level": CATEGORY_MAP[message.category], + "locations": [location], + "partialFingerprints": { + # encoding the node path seems like it would be useful to dedup alerts? + "nodePath/v1": "", + }, + } + + +def path_to_uri(path: str) -> str: + """Converts a relative FS path to a relative URI. + + Does not check the validity of the path. + + An alternative would be to use `Path.as_uri` (on concrete path) on both the + artifact path and a reference path, then create a relative URI from this. + """ + if os.path.altsep: + path = path.replace(os.path.altsep, "/") + if os.path.sep != "/": + path = path.replace(os.path.sep, "/") + return quote(path) + + +CATEGORY_MAP: dict[str, ResultLevel] = { + "convention": "note", + "refactor": "note", + "statement": "note", + "info": "note", + "warning": "warning", + "error": "error", + "fatal": "error", +} + + +class Run(TypedDict): + tool: Tool + # invocation parameters / environment for the tool + # invocation: list[Invocations] + results: list[Result] + # originalUriBaseIds: dict[str, ArtifactLocation] + + +Log = TypedDict( + "Log", + { + "version": Literal["2.1.0"], + "$schema": Literal[ + "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/schemas/sarif-schema-2.1.0.json" + ], + "runs": list[Run], + }, +) + + +class Tool(TypedDict): + driver: Driver + + +class Driver(TypedDict): + name: Literal["pylint"] + # optional but azure wants it + fullName: str + version: str + informationUri: str # not required but validator wants it + rules: list[ReportingDescriptor] + + +class ReportingDescriptorOpt(TypedDict, total=False): + deprecatedIds: list[str] + deprecatedNames: list[str] + messageStrings: dict[str, MessageString] + + +class ReportingDescriptor(ReportingDescriptorOpt): + id: str + # optional but validator really wants it (then complains that it's not pascal cased) + name: str + # not required per spec but required by github + shortDescription: MessageString + fullDescription: MessageString + help: MessageString + helpUri: str + + +class MarkdownMessageString(TypedDict, total=False): + markdown: str + + +class MessageString(MarkdownMessageString): + text: str + + +ResultLevel = Literal["none", "note", "warning", "error"] + + +class ResultOpt(TypedDict, total=False): + ruleId: str + ruleIndex: int + + level: ResultLevel + + +class Result(ResultOpt): + message: Message + # not required per spec but required by github + locations: list[Location] + partialFingerprints: dict[str, str] + + +class Message(TypedDict, total=False): + # needs to have either text or id but it's a PITA to type + + #: plain text message string (can have markdown links but no other formatting) + text: str + #: formatted GFM text + markdown: str + #: rule id + id: str + #: arguments for templated rule messages + arguments: list[str] + + +class Location(TypedDict, total=False): + physicalLocation: PhysicalLocation # actually required by github + logicalLocations: list[LogicalLocation] + + +class PhysicalLocation(TypedDict): + artifactLocation: ArtifactLocation + # not required per spec, required by github + region: Region + + +class ArtifactLocation(TypedDict, total=False): + uri: str + #: id of base URI for resolving relative `uri` + uriBaseId: str + description: Message + + +class LogicalLocation(TypedDict, total=False): + name: str + fullyQualifiedName: str + #: schema is `str` with a bunch of *suggested* terms, of which this is a subset + kind: Literal[ + "function", "member", "module", "parameter", "returnType", "type", "variable" + ] + + +class Region(TypedDict): + # none required per spec, all required by github + startLine: int + startColumn: int + endLine: int + endColumn: int diff --git a/tests/lint/unittest_expand_modules.py b/tests/lint/unittest_expand_modules.py index 5b3a7ddbbe..96ef38ec60 100644 --- a/tests/lint/unittest_expand_modules.py +++ b/tests/lint/unittest_expand_modules.py @@ -151,6 +151,14 @@ def test__is_in_ignore_list_re_match() -> None: "name": "reporters.unittest_reporting", "isignored": False, }, + str(REPORTERS_PATH / "unittest_sarif_reporter.py"): { + "basename": "reporters", + "basepath": str(REPORTERS_PATH / "__init__.py"), + "isarg": False, + "path": str(REPORTERS_PATH / "unittest_sarif_reporter.py"), + "name": "reporters.unittest_sarif_reporter", + "isignored": False, + }, } diff --git a/tests/reporters/unittest_sarif_reporter.py b/tests/reporters/unittest_sarif_reporter.py new file mode 100644 index 0000000000..7df6ec293d --- /dev/null +++ b/tests/reporters/unittest_sarif_reporter.py @@ -0,0 +1,117 @@ +# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html +# For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE +# Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt + +import io +import json + +from astroid import nodes + +from pylint import __version__, checkers +from pylint.lint import PyLinter +from pylint.reporters import SARIFReporter + + +def test_simple_sarif() -> None: + output = io.StringIO() + reporter = SARIFReporter(output) + linter = PyLinter(reporter=reporter) + checkers.initialize(linter) + linter.config.persistent = 0 + linter.open() + linter.set_current_module("0123") + linter.add_message( + "line-too-long", line=1, args=(1, 2), end_lineno=1, end_col_offset=4 + ) + reporter.display_messages(None) + assert json.loads(output.getvalue()) == { + "version": "2.1.0", + "$schema": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/schemas/sarif-schema-2.1.0.json", + "runs": [ + { + "tool": { + "driver": { + "name": "pylint", + "fullName": f"pylint {__version__}", + "version": __version__, + "informationUri": "https://pylint.readthedocs.io/", + "rules": [ + { + "id": "C0301", + "deprecatedIds": [], + "name": "line-too-long", + "deprecatedNames": [], + "shortDescription": { + "text": "Used when a line is longer than a given number of characters" + }, + "fullDescription": { + "text": "Used when a line is longer than a given number of characters." + }, + "help": { + "text": ":line-too-long (C0301): *Line too long (%s/%s)*\n " + "Used when a line is longer than a given number of characters." + }, + "helpUri": "https://pylint.readthedocs.io/en/stable/user_guide/messages/convention/line-too-long.html", + } + ], + }, + }, + "results": [ + { + "ruleId": "C0301", + "message": {"text": "Line too long (1/2)"}, + "level": "note", + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "0123", + }, + "region": { + "startLine": 1, + "startColumn": 1, + "endLine": 1, + "endColumn": 5, + }, + }, + } + ], + "partialFingerprints": {"nodePath/v1": ""}, + } + ], + } + ], + } + + +def test_sarif_node() -> None: + output = io.StringIO() + reporter = SARIFReporter(output) + linter = PyLinter(reporter=reporter) + checkers.initialize(linter) + linter.config.persistent = 0 + linter.open() + linter.set_current_module("0123") + mod = nodes.Module("bar", "baz") + fn = nodes.FunctionDef( + "foo", lineno=1, col_offset=1, parent=mod, end_lineno=42, end_col_offset=86 + ) + linter.add_message( + "multiple-statements", + node=nodes.Import( + names=[("io", None), ("echo", None)], + parent=fn, + lineno=1, + end_lineno=1, + end_col_offset=4, + ), + ) + reporter.display_messages(None) + result = json.loads(output.getvalue())["runs"][0]["results"][0] + assert result["ruleId"] == "C0321" + assert result["locations"][0]["logicalLocations"] == [ + { + "name": "foo", + "fullyQualifiedName": "bar.foo", + } + ]