Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ Doc/howto/clinic.rst @erlend-aasland @AA-Turner
# C Analyser
Tools/c-analyzer/ @ericsnowcurrently

# C API Documentation Checks
Tools/check-c-api-docs/ @ZeroIntensity

# Fuzzing
Modules/_xxtestfuzz/ @ammaraskar

Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ jobs:
- name: Check for unsupported C global variables
if: github.event_name == 'pull_request' # $GITHUB_EVENT_NAME
run: make check-c-globals
- name: Check for undocumented C APIs
run: make check-c-api-docs


build-windows:
name: >-
Expand Down
5 changes: 5 additions & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -3313,6 +3313,11 @@ check-c-globals:
--format summary \
--traceback

# Check for undocumented C APIs.
.PHONY: check-c-api-docs
check-c-api-docs:
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/check-c-api-docs/main.py

# Find files with funny names
.PHONY: funny
funny:
Expand Down
100 changes: 100 additions & 0 deletions Tools/check-c-api-docs/ignored_c_api.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# descrobject.h
PyClassMethodDescr_Type
PyDictProxy_Type
PyGetSetDescr_Type
PyMemberDescr_Type
PyMethodDescr_Type
PyWrapperDescr_Type
# pydtrace_probes.h
PyDTrace_AUDIT
PyDTrace_FUNCTION_ENTRY
PyDTrace_FUNCTION_RETURN
PyDTrace_GC_DONE
PyDTrace_GC_START
PyDTrace_IMPORT_FIND_LOAD_DONE
PyDTrace_IMPORT_FIND_LOAD_START
PyDTrace_INSTANCE_DELETE_DONE
PyDTrace_INSTANCE_DELETE_START
PyDTrace_INSTANCE_NEW_DONE
PyDTrace_INSTANCE_NEW_START
PyDTrace_LINE
# fileobject.h
Py_FileSystemDefaultEncodeErrors
Py_FileSystemDefaultEncoding
Py_HasFileSystemDefaultEncoding
Py_UTF8Mode
# pyhash.h
Py_HASH_EXTERNAL
# exports.h
PyAPI_DATA
Py_EXPORTED_SYMBOL
Py_IMPORTED_SYMBOL
Py_LOCAL_SYMBOL
# modsupport.h
PyABIInfo_FREETHREADING_AGNOSTIC
# moduleobject.h
PyModuleDef_Type
# object.h
Py_INVALID_SIZE
Py_TPFLAGS_HAVE_VERSION_TAG
Py_TPFLAGS_INLINE_VALUES
Py_TPFLAGS_IS_ABSTRACT
# pyexpat.h
PyExpat_CAPI_MAGIC
PyExpat_CAPSULE_NAME
# pyport.h
Py_ALIGNED
Py_ARITHMETIC_RIGHT_SHIFT
Py_CAN_START_THREADS
Py_FORCE_EXPANSION
Py_GCC_ATTRIBUTE
Py_LL
Py_SAFE_DOWNCAST
Py_ULL
Py_VA_COPY
# unicodeobject.h
Py_UNICODE_SIZE
# cpython/methodobject.h
PyCFunction_GET_CLASS
# cpython/compile.h
PyCF_ALLOW_INCOMPLETE_INPUT
PyCF_COMPILE_MASK
PyCF_DONT_IMPLY_DEDENT
PyCF_IGNORE_COOKIE
PyCF_MASK
PyCF_MASK_OBSOLETE
PyCF_SOURCE_IS_UTF8
# cpython/descrobject.h
PyDescr_COMMON
PyDescr_NAME
PyDescr_TYPE
PyWrapperFlag_KEYWORDS
# cpython/fileobject.h
PyFile_NewStdPrinter
PyStdPrinter_Type
Py_UniversalNewlineFgets
# cpython/setobject.h
PySet_MINSIZE
# cpython/ceval.h
PyUnstable_CopyPerfMapFile
PyUnstable_PerfTrampoline_CompileCode
PyUnstable_PerfTrampoline_SetPersistAfterFork
# cpython/genobject.h
PyAsyncGenASend_CheckExact
# cpython/longintrepr.h
PyLong_BASE
PyLong_MASK
PyLong_SHIFT
# cpython/pyerrors.h
PyException_HEAD
# cpython/pyframe.h
PyUnstable_EXECUTABLE_KINDS
PyUnstable_EXECUTABLE_KIND_BUILTIN_FUNCTION
PyUnstable_EXECUTABLE_KIND_METHOD_DESCRIPTOR
PyUnstable_EXECUTABLE_KIND_PY_FUNCTION
PyUnstable_EXECUTABLE_KIND_SKIP
# cpython/pylifecycle.h
Py_FrozenMain
# cpython/unicodeobject.h
PyUnicode_IS_COMPACT
PyUnicode_IS_COMPACT_ASCII
193 changes: 193 additions & 0 deletions Tools/check-c-api-docs/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
import re
from pathlib import Path
import sys
import _colorize
import textwrap

SIMPLE_FUNCTION_REGEX = re.compile(r"PyAPI_FUNC(.+) (\w+)\(")
SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ")
SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)")
SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)")

CPYTHON = Path(__file__).parent.parent.parent
INCLUDE = CPYTHON / "Include"
C_API_DOCS = CPYTHON / "Doc" / "c-api"
IGNORED = (
(CPYTHON / "Tools" / "check-c-api-docs" / "ignored_c_api.txt")
.read_text()
.split("\n")
)

for index, line in enumerate(IGNORED):
if line.startswith("#"):
IGNORED.pop(index)

MISTAKE = """
If this is a mistake and this script should not be failing, create an
issue and tag Peter (@ZeroIntensity) on it.\
"""


def found_undocumented(singular: bool) -> str:
some = "an" if singular else "some"
s = "" if singular else "s"
these = "this" if singular else "these"
them = "it" if singular else "them"
were = "was" if singular else "were"

return (
textwrap.dedent(
f"""
Found {some} undocumented C API{s}!

Python requires documentation on all public C API symbols, macros, and types.
If {these} API{s} {were} not meant to be public, prefix {them} with a
leading underscore (_PySomething_API) or move {them} to the internal C API
(pycore_*.h files).

In exceptional cases, certain APIs can be ignored by adding them to
Tools/c-api-docs-check/ignored_c_api.txt
"""
)
+ MISTAKE
)


def found_ignored_documented(singular: bool) -> str:
some = "a" if singular else "some"
s = "" if singular else "s"
them = "it" if singular else "them"
were = "was" if singular else "were"
they = "it" if singular else "they"

return (
textwrap.dedent(
f"""
Found {some} C API{s} listed in Tools/c-api-docs-check/ignored_c_api.txt, but
{they} {were} found in the documentation. To fix this, remove {them} from
ignored_c_api.txt.
"""
)
+ MISTAKE
)


def is_documented(name: str) -> bool:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think is_documented is a little different from is present. Why not improve the check to include verifying it's a doc declaration, i.e. if it's in the text check it's on the same line as the c:func:: etc.?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that, but I decided that it was a case of "practicality beats purity". Running Sphinx or using a regular expression will make this significantly slower, and probably won't save us much in practice.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a compromise, we could go ever simpler, just checking if :: and .. are in the same line as the API name.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, that may work, but the list in #141004 was created using a full-text search, not a search for :: or .., so there are probably some APIs that still need work. Let's finish the list in #141004 first, and then we can look into finding more edge cases.

"""
Is a name present in the C API documentation?
"""
for path in C_API_DOCS.iterdir():
if path.is_dir():
continue
if path.suffix != ".rst":
continue

text = path.read_text(encoding="utf-8")
if name in text:
return True

return False


def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
"""
Scan a header file for C API functions.
"""
undocumented: list[str] = []
documented_ignored: list[str] = []
colors = _colorize.get_colors()

def check_for_name(name: str) -> None:
documented = is_documented(name)
if documented and (name in IGNORED):
documented_ignored.append(name)
elif not documented and (name not in IGNORED):
undocumented.append(name)

for function in SIMPLE_FUNCTION_REGEX.finditer(text):
name = function.group(2)
if not name.startswith("Py"):
continue

check_for_name(name)

for macro in SIMPLE_MACRO_REGEX.finditer(text):
name = macro.group(1)
if not name.startswith("Py"):
continue

if "(" in name:
name = name[: name.index("(")]

check_for_name(name)

for inline in SIMPLE_INLINE_REGEX.finditer(text):
name = inline.group(2)
if not name.startswith("Py"):
continue

check_for_name(name)

for data in SIMPLE_DATA_REGEX.finditer(text):
name = data.group(1)
if not name.startswith("Py"):
continue

check_for_name(name)

# Remove duplicates and sort alphabetically to keep the output deterministic
undocumented = list(set(undocumented))
undocumented.sort()

if undocumented or documented_ignored:
print(f"{filename} {colors.RED}BAD{colors.RESET}")
for name in undocumented:
print(f"{colors.BOLD_RED}UNDOCUMENTED:{colors.RESET} {name}")
for name in documented_ignored:
print(f"{colors.BOLD_YELLOW}DOCUMENTED BUT IGNORED:{colors.RESET} {name}")
else:
print(f"{filename} {colors.GREEN}OK{colors.RESET}")

return undocumented, documented_ignored


def main() -> None:
print("Scanning for undocumented C API functions...")
files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()]
all_missing: list[str] = []
all_found_ignored: list[str] = []

for file in files:
if file.is_dir():
continue
assert file.exists()
text = file.read_text(encoding="utf-8")
missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text)
all_found_ignored += ignored
all_missing += missing

fail = False
to_check = [
(all_missing, "missing", found_undocumented(len(all_missing) == 1)),
(
all_found_ignored,
"documented but ignored",
found_ignored_documented(len(all_found_ignored) == 1),
),
]
for name_list, what, message in to_check:
if not name_list:
continue

s = "s" if len(name_list) != 1 else ""
print(f"-- {len(name_list)} {what} C API{s} --")
for name in name_list:
print(f" - {name}")
print(message)
fail = True

sys.exit(1 if fail else 0)


if __name__ == "__main__":
main()
Loading