From 612173f37c6d121587a8f40968a5000e9bdadcfb Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 22 Oct 2025 10:54:22 -0700
Subject: [PATCH 1/2] tests: kdoc: re-implement in python

The existing kdoc test is written in shell, based on a simple heuristic of
checking whether the number of lines of output from scripts/kernel-doc
increases.

This test can pass in some cases even when new warnings are introduced,
such as if a single patch adds some new warnings while removing others. In
addition, the provided log output is not very useful to humans. It uses a
traditional diff to compare the prior and current warnings. Because the
warnings include line number data, this results in an unreadable mess.

Implementing a proper comparison in the shell script is possible, but the
resulting logic and code is difficult to maintain. Ultimately, what we want
to do is a sort of set intersection on the warnings while ignoring line
numbers. Implementing this in python is much more practical.

As a first step, convert the kdoc test into a python module. Commands are
executed via subprocess. The improved comparison algorithm will be
implemented in a following change.

Since we will be implementing a new comparison algorithm, do not bother
porting the "diff" implementation forward. This results in a temporary
regression as we no longer compute the difference or file summary for the
user. This will be resolved with the following change implementing the
improved algorithm.

This version has a couple of minor changes worth nothing:

1) The log output is now in stdout instead of stderr, since the
   python-based tests do not support logging output to the stderr file.
2) We no longer print out "Checking the tree..." lines.
3) The description line is included within the stdout log text as well as
   the desc file.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
---
 tests/patch/kdoc/info.json |  3 +-
 tests/patch/kdoc/kdoc.sh   | 52 ---------------------
 tests/patch/kdoc/test.py   | 92 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 94 insertions(+), 53 deletions(-)
 delete mode 100755 tests/patch/kdoc/kdoc.sh
 create mode 100644 tests/patch/kdoc/test.py

diff --git a/tests/patch/kdoc/info.json b/tests/patch/kdoc/info.json
index e6d6d9a..bfac5c9 100644
--- a/tests/patch/kdoc/info.json
+++ b/tests/patch/kdoc/info.json
@@ -1,4 +1,5 @@
 {
-  "run": ["kdoc.sh"],
+  "pymod": "test",
+  "pyfunc": "kdoc",
   "pull-requests": true
 }
diff --git a/tests/patch/kdoc/kdoc.sh b/tests/patch/kdoc/kdoc.sh
deleted file mode 100755
index 63ae6eb..0000000
--- a/tests/patch/kdoc/kdoc.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Copyright (C) 2019 Netronome Systems, Inc.
-# Copyright (c) 2020 Facebook
-
-tmpfile_o=$(mktemp)
-tmpfile_n=$(mktemp)
-rc=0
-
-files_mod=$(git show --diff-filter=M  --pretty="" --name-only "HEAD")
-files_all=$(git show --diff-filter=AM --pretty="" --name-only "HEAD")
-
-HEAD=$(git rev-parse HEAD)
-
-echo "Checking the tree before the patch"
-git checkout -q HEAD~
-./scripts/kernel-doc -Wall -none $files_mod 2> >(tee $tmpfile_o >&2)
-
-incumbent=$(grep -v 'Error: Cannot open file ' $tmpfile_o | wc -l)
-
-echo "Checking the tree with the patch"
-
-git checkout -q $HEAD
-./scripts/kernel-doc -Wall -none $files_all 2> >(tee $tmpfile_n >&2)
-
-current=$(grep -v 'Error: Cannot open file ' $tmpfile_n | wc -l)
-
-echo "Errors and warnings before: $incumbent this patch: $current" >&$DESC_FD
-
-if [ $current -gt $incumbent ]; then
-  echo "New warnings added" 1>&2
-  diff $tmpfile_o $tmpfile_n 1>&2
-
-  echo "Per-file breakdown" 1>&2
-  tmpfile_fo=$(mktemp)
-  tmpfile_fn=$(mktemp)
-
-  grep -i "\(warn\|error\)" $tmpfile_o | sed -n 's@\(^[/a-zA-Z0-9_.-]*.[ch]\):.*@\1@p' | sort | uniq -c \
-    > $tmpfile_fo
-  grep -i "\(warn\|error\)" $tmpfile_n | sed -n 's@\(^[/a-zA-Z0-9_.-]*.[ch]\):.*@\1@p' | sort | uniq -c \
-    > $tmpfile_fn
-
-  diff $tmpfile_fo $tmpfile_fn 1>&2
-  rm $tmpfile_fo $tmpfile_fn
-
-  rc=1
-fi
-
-rm $tmpfile_o $tmpfile_n
-
-exit $rc
diff --git a/tests/patch/kdoc/test.py b/tests/patch/kdoc/test.py
new file mode 100644
index 0000000..ac53b30
--- /dev/null
+++ b/tests/patch/kdoc/test.py
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: GPL-2.0
+
+""" Test if kernel-doc generates new warnings """
+
+import os
+import subprocess
+from typing import List, Optional, Tuple
+
+def get_git_head(tree) -> str:
+    """ Get the git commit ID for head commit. """
+
+    cmd = ["git", "rev-parse", "HEAD"]
+    result = subprocess.run(cmd, cwd=tree.path, capture_output=True, text=True,
+                            check=True)
+
+    return result.stdout.strip()
+
+def run_kernel_doc(tree, commitish, files):
+    """ Run ./scripts/kdoc on a given commit and capture its results. """
+
+    cmd = ["git", "checkout", "-q", commitish]
+    subprocess.run(cmd, cwd=tree.path, capture_output=False, check=True)
+
+    cmd = ["./scripts/kernel-doc", "-Wall", "-none"] + files
+    result = subprocess.run(cmd, cwd=tree.path, text=True, check=False,
+                            stderr=subprocess.PIPE)
+
+    return result.stderr.strip().split('\n')
+
+def extract_files(patch):
+    """Extract paths added or modified by the patch."""
+
+    all_files = set()
+    mod_files = set()
+    lines = patch.raw_patch.split("\n")
+
+    # Walk lines, skip last since it doesn't have next
+    for i, line in enumerate(lines[:-1]):
+        next_line = lines[i + 1]
+
+        if not next_line.startswith("+++ b/"):
+            continue
+
+        file_path = next_line[6:]
+
+        all_files.add(file_path)
+
+        if line != "--- /dev/null":
+            mod_files.add(file_path)
+
+    return list(mod_files), list(all_files)
+
+def kdoc(tree, patch, result_dir) -> Tuple[int, str, str]:
+    """ Main function / entry point """
+
+    mod_files, all_files = extract_files(patch)
+
+    if not mod_files or not all_files:
+        return 1, "Patch has no modified files?", ""
+
+    ret = 0
+    desc = ""
+    log = []
+
+    head_commit = get_git_head(tree)
+
+    try:
+        incumbent_warnings = run_kernel_doc(tree, "HEAD~", mod_files)
+        log += ["Warnings before patch:"]
+        log.extend(map(str, incumbent_warnings))
+
+        current_warnings = run_kernel_doc(tree, head_commit, all_files)
+        log += ["", "Current warnings:"]
+        log.extend(map(str, current_warnings))
+    except subprocess.CalledProcessError as e:
+        desc = f'{e.cmd} failed with exit code {e.returncode}'
+        if e.stderr:
+            log += e.stderr.split('\n')
+        ret = 1
+
+        return ret, desc, "\n".join(log)
+
+    incumbent_count = len(incumbent_warnings)
+    current_count = len(current_warnings)
+
+    desc = f'Errors and warnings before: {incumbent_count} This patch: {current_count}'
+    log += ["", desc]
+
+    if current_count > incumbent_count:
+        ret = 1
+
+    return ret, desc, "\n".join(log)

From 1209a47b2707845bcef111e553eb04c2f2dc892a Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 22 Oct 2025 12:16:38 -0700
Subject: [PATCH 2/2] tests: kdoc: improve test comparison and output

The current algorithm for the kdoc test is a simple line number comparison
of the output from ./scripts/kernel-doc. This is not guaranteed to be
accurate, as a patch could fix some warnings while introducing others,
resulting in the total number of lines not changing.

In addition, it is difficult to quickly determine which warnings are new.
Historically, a simple "diff" was used to compare the before and after
results. This was difficult to parse because line numbers change due to
code being added or moved.

To fix this, use a set difference algorithm which compares based on data
from the warning lines without including the line number.

To do this, introduce a KdocWarning dataclass, which represents a warning
based on its kind, file, line, and content. Extract these from the lines
via a regular expression that looks for the expected pattern of output from
./scripts/kernel-doc.

Using a @dataclass allows specifying which fields to compare against. In
particular, the line number is not counted. Additionally, the original
message as output from ./scripts/kernel-doc is preserved as its own
(non-compared) field.

Some warnings are spread over two lines, indicated by the first line ending
in a semicolon. Handle this when iterating over the output of kernel-doc
and converting to the new warning objects.

Any output line which doesn't match the regular expression is converted so
that its entire message becomes the contents. This ensures that such lines
still get counted and still get compared in some form, rather than silently
ignored.

After obtaining the converted output from ./scripts/kernel-doc, convert the
current_warnings and incumbent_warnings lists into sets.

Calculate the set of new and removed warnings by iterating the original
lists. For new_warnings, find the sequence of warnings which are in the
current_warnings but not in the incumbent set. Do the same to calculate the
set of removed warnings.

This implementation is fast, as it can use the hash-based comparisons
for efficient set lookup. Using the original list also preserves the order
of the warnings as output by ./scripts/kernel-doc.

Calculating both the new and removed counts provides useful data for users
of the NIPA tests. It is much easier to see at a glance what warnings were
added.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
---
 tests/patch/kdoc/test.py | 120 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 117 insertions(+), 3 deletions(-)

diff --git a/tests/patch/kdoc/test.py b/tests/patch/kdoc/test.py
index ac53b30..6715161 100644
--- a/tests/patch/kdoc/test.py
+++ b/tests/patch/kdoc/test.py
@@ -2,7 +2,10 @@
 
 """ Test if kernel-doc generates new warnings """
 
+import collections
+import dataclasses
 import os
+import re
 import subprocess
 from typing import List, Optional, Tuple
 
@@ -15,7 +18,84 @@ def get_git_head(tree) -> str:
 
     return result.stdout.strip()
 
-def run_kernel_doc(tree, commitish, files):
+@dataclasses.dataclass(frozen=True, eq=True, order=True, init=True)
+class KdocWarning:
+    # The original warning message
+    message : str = dataclasses.field(repr=False, compare=False)
+    _ : dataclasses.KW_ONLY
+    # Kind of warning line, determined during init
+    kind : str = dataclasses.field(repr=True, compare=True)
+    # The file path, or None if unable to determine
+    file : Optional[str] = dataclasses.field(repr=True, compare=True)
+    # The line, or None if unable to determine
+    # Note: *not* part of comparison, or hash!
+    line : Optional[int] = dataclasses.field(repr=True, compare=False)
+    # The content of the warning (excluding kind, file, line)
+    content : str = dataclasses.field(repr=True, compare=True)
+
+    @classmethod
+    def from_text(self, line, extra=None):
+        message = line
+
+        if extra:
+            message += '\n' + extra
+
+        parser = re.compile(
+            r"""
+            ^                         # Start of string
+            (?P<kind>warning|error):  # Severity
+            \s+                       # Spacing
+            (?P<file>[/a-z0-9_.-]*):  # File path
+            (?P<line>[0-9]+)          # Line number
+            \s*                       # Spacing
+            (?P<content>.*)           # Warning content
+            $                         # End of string
+            """,
+            re.VERBOSE | re.IGNORECASE)
+
+        m = parser.match(line)
+        if m:
+            kind = m['kind']
+            file = m['file']
+            line = int(m['line'])
+            content = m['content']
+            if extra:
+                content += '\n' + extra
+        else:
+            kind = 'Unknown'
+            file = None
+            line = None
+            content = message
+
+        return KdocWarning(message, kind=kind, file=file, line=line,
+                           content=content)
+
+    def __str__(self):
+        return self.message
+
+def parse_warnings(lines) -> List[KdocWarning]:
+    skip = False
+    length = len(lines)
+
+    warnings = []
+
+    # Walk through lines and convert to warning objects
+    for i, line in enumerate(lines):
+        if skip:
+            skip = False
+            continue
+
+        if line.endswith(':') and i + 1 < length:
+            extra = lines[i + 1]
+            skip = True
+        else:
+            extra = None
+
+        warnings.append(KdocWarning.from_text(line, extra))
+
+    return warnings
+
+def run_kernel_doc(tree, commitish, files) -> List[KdocWarning]:
     """ Run ./scripts/kdoc on a given commit and capture its results. """
 
     cmd = ["git", "checkout", "-q", commitish]
@@ -25,7 +105,9 @@ def run_kernel_doc(tree, commitish, files):
     result = subprocess.run(cmd, cwd=tree.path, text=True, check=False,
                             stderr=subprocess.PIPE)
 
-    return result.stderr.strip().split('\n')
+    lines = result.stderr.strip().split('\n')
+
+    return parse_warnings(lines)
 
 def extract_files(patch):
     """Extract paths added or modified by the patch."""
@@ -80,13 +162,45 @@ def kdoc(tree, patch, result_dir) -> Tuple[int, str, str]:
 
         return ret, desc, "\n".join(log)
 
+    current_set = set(current_warnings)
+    incumbent_set = set(incumbent_warnings)
+
+    # This construction preserves ordering vs using set difference
+    new_warnings = [x for x in current_warnings if x not in incumbent_set]
+    rm_warnings = [x for x in incumbent_warnings if x not in current_set]
+
     incumbent_count = len(incumbent_warnings)
     current_count = len(current_warnings)
+    new_count = len(new_warnings)
+    rm_count = len(rm_warnings)
 
     desc = f'Errors and warnings before: {incumbent_count} This patch: {current_count}'
+    if new_count:
+        desc += f' New: {new_count}'
+    if rm_count:
+        desc += f' Removed: {rm_count}'
     log += ["", desc]
 
-    if current_count > incumbent_count:
+    if rm_count:
+        log += ["", "Warnings removed:"]
+        log.extend(map(str, rm_warnings))
+
+        file_breakdown = collections.Counter((x.file for x in rm_warnings))
+
+        log += ["Per-file breakdown:"]
+        for f, count in file_breakdown.items():
+            log += [f'{count:6} {f}']
+
+    if new_count:
         ret = 1
 
+        log += ["", "New warnings added:"]
+        log.extend(map(str, new_warnings))
+
+        file_breakdown = collections.Counter((x.file for x in new_warnings))
+
+        log += ["Per-file breakdown:"]
+        for f, count in file_breakdown.items():
+            log += [f'{count:6} {f}']
+
     return ret, desc, "\n".join(log)