From 198fd9af5c2aed05232d17e85ebb216686201a0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 7 Nov 2025 17:14:52 +0100 Subject: [PATCH 1/7] document `Py_HASH_*` macros --- Doc/c-api/hash.rst | 55 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index b5fe93573a1456..0337b7497f7e89 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -17,15 +17,66 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. versionadded:: 3.2 +.. c:macro:: Py_HASH_ALGORITHM + + A numerical value indicating the algorithm for hashing of :class:`str`, + :class:`bytes`, and :class:`memoryview`. + + The algorithm name is exposed by :data:`sys.hash_info.algorithm`. + +.. c:macro:: Py_HASH_FNV + Py_HASH_SIPHASH13 + Py_HASH_SIPHASH24 + + Numerical values to compare to :c:macro:`Py_HASH_ALGORITHM` to determine + which algorithm is used for hashing. The hash algorithm can be configured + via the configure :option:`--with-hash-algorithm` option. + +.. c:macro:: Py_HASH_EXTERNAL + + If :c:macro:`Py_HASH_ALGORITHM` is set to that value, this means that + the hash function is externally implemented, that is, embedders must + provide a definition for ``extern PyHash_FuncDef PyHash_Func`` when + building Python: + + .. code-block:: c + + static Py_hash_t + my_siphash24(const void *src, Py_ssize_t src_sz) { ... } + + PyHash_FuncDef PyHash_Func = { + .hash = my_siphash24, + .name = "my_siphash24", + .hash_bits = 64, + .seed_bits = 128, + }; + + .. availability:: Unix + +.. c:macro:: Py_HASH_CUTOFF + + Buffers of length in range ``[1, Py_HASH_CUTOFF)`` are hashed using DJBX33A + instead of the algorithm described by :c:macro:`Py_HASH_ALGORITHM`. + + - A :c:macro:`!Py_HASH_CUTOFF` of 0 disables the optimization. + - :c:macro:`!Py_HASH_CUTOFF` must non-negative and less or equal than 7. + + 32-bit platforms should use a cutoff smaller than 64-bit platforms because + it is easier to create colliding strings. A cutoff of 7 on 64-bit platforms + and 5 on 32-bit platforms should provide a decent safety margin. + .. c:macro:: PyHASH_MODULUS - The `Mersenne prime `_ ``P = 2**n -1``, used for numeric hash scheme. + The `Mersenne prime `_ ``P = 2**n -1``, + used for numeric hash scheme. + This corresponds to the :data:`sys.hash_info.modulus` constant. .. versionadded:: 3.13 .. c:macro:: PyHASH_BITS The exponent ``n`` of ``P`` in :c:macro:`PyHASH_MODULUS`. + This corresponds to the :data:`sys.hash_info.hash_bits` constant. .. versionadded:: 3.13 @@ -38,12 +89,14 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. c:macro:: PyHASH_INF The hash value returned for a positive infinity. + This corresponds to the :data:`sys.hash_info.inf` constant. .. versionadded:: 3.13 .. c:macro:: PyHASH_IMAG The multiplier used for the imaginary part of a complex number. + This corresponds to the :data:`sys.hash_info.imag` constant. .. versionadded:: 3.13 From f6c2c28680a29068388578595f9e40c293bad8ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 7 Nov 2025 17:15:46 +0100 Subject: [PATCH 2/7] correctly support `Py_HASH_EXTERNAL` for Unix platforms --- Doc/using/configure.rst | 5 +++-- configure | 6 +++++- configure.ac | 5 ++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 1f773a3a547c2b..1afa2628f1d3a4 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -954,13 +954,14 @@ Libraries options Security Options ---------------- -.. option:: --with-hash-algorithm=[fnv|siphash13|siphash24] +.. option:: --with-hash-algorithm=[fnv|siphash13|siphash24|external] Select hash algorithm for use in ``Python/pyhash.c``: * ``siphash13`` (default); * ``siphash24``; - * ``fnv``. + * ``fnv``; + * ``external``. .. versionadded:: 3.4 diff --git a/configure b/configure index 60521492755820..4dd2260b77afcc 100755 --- a/configure +++ b/configure @@ -1902,7 +1902,7 @@ Optional Packages: behaviour detector, 'ubsan' (default is no) --with-thread-sanitizer enable ThreadSanitizer data race detector, 'tsan' (default is no) - --with-hash-algorithm=[fnv|siphash13|siphash24] + --with-hash-algorithm=[fnv|siphash13|siphash24|external] select hash algorithm for use in Python/pyhash.c (default is SipHash13) --with-tzpath= @@ -14934,6 +14934,10 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $withval" >&5 printf "%s\n" "$withval" >&6; } case "$withval" in + external) + printf "%s\n" "#define Py_HASH_ALGORITHM 0" >>confdefs.h + + ;; siphash13) printf "%s\n" "#define Py_HASH_ALGORITHM 3" >>confdefs.h diff --git a/configure.ac b/configure.ac index 135492d82e08fd..91e6c15025c40d 100644 --- a/configure.ac +++ b/configure.ac @@ -3966,12 +3966,15 @@ dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output AC_ARG_WITH( [hash_algorithm], [AS_HELP_STRING( - [--with-hash-algorithm=@<:@fnv|siphash13|siphash24@:>@], + [--with-hash-algorithm=@<:@fnv|siphash13|siphash24|external@:>@], [select hash algorithm for use in Python/pyhash.c (default is SipHash13)] )], [ AC_MSG_RESULT([$withval]) case "$withval" in + external) + AC_DEFINE([Py_HASH_ALGORITHM], [0]) + ;; siphash13) AC_DEFINE([Py_HASH_ALGORITHM], [3]) ;; From aabd97fa0bbcadacbbcc62dcbd652a1f25db89f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 7 Nov 2025 17:42:03 +0100 Subject: [PATCH 3/7] remove support `Py_HASH_EXTERNAL` for Unix platforms for now --- Doc/c-api/hash.rst | 21 --------------------- Doc/using/configure.rst | 5 ++--- configure | 6 +----- configure.ac | 5 +---- 4 files changed, 4 insertions(+), 33 deletions(-) diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index 0337b7497f7e89..e0c4b6066ece55 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -32,27 +32,6 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. which algorithm is used for hashing. The hash algorithm can be configured via the configure :option:`--with-hash-algorithm` option. -.. c:macro:: Py_HASH_EXTERNAL - - If :c:macro:`Py_HASH_ALGORITHM` is set to that value, this means that - the hash function is externally implemented, that is, embedders must - provide a definition for ``extern PyHash_FuncDef PyHash_Func`` when - building Python: - - .. code-block:: c - - static Py_hash_t - my_siphash24(const void *src, Py_ssize_t src_sz) { ... } - - PyHash_FuncDef PyHash_Func = { - .hash = my_siphash24, - .name = "my_siphash24", - .hash_bits = 64, - .seed_bits = 128, - }; - - .. availability:: Unix - .. c:macro:: Py_HASH_CUTOFF Buffers of length in range ``[1, Py_HASH_CUTOFF)`` are hashed using DJBX33A diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 1afa2628f1d3a4..1f773a3a547c2b 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -954,14 +954,13 @@ Libraries options Security Options ---------------- -.. option:: --with-hash-algorithm=[fnv|siphash13|siphash24|external] +.. option:: --with-hash-algorithm=[fnv|siphash13|siphash24] Select hash algorithm for use in ``Python/pyhash.c``: * ``siphash13`` (default); * ``siphash24``; - * ``fnv``; - * ``external``. + * ``fnv``. .. versionadded:: 3.4 diff --git a/configure b/configure index 4dd2260b77afcc..60521492755820 100755 --- a/configure +++ b/configure @@ -1902,7 +1902,7 @@ Optional Packages: behaviour detector, 'ubsan' (default is no) --with-thread-sanitizer enable ThreadSanitizer data race detector, 'tsan' (default is no) - --with-hash-algorithm=[fnv|siphash13|siphash24|external] + --with-hash-algorithm=[fnv|siphash13|siphash24] select hash algorithm for use in Python/pyhash.c (default is SipHash13) --with-tzpath= @@ -14934,10 +14934,6 @@ then : { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $withval" >&5 printf "%s\n" "$withval" >&6; } case "$withval" in - external) - printf "%s\n" "#define Py_HASH_ALGORITHM 0" >>confdefs.h - - ;; siphash13) printf "%s\n" "#define Py_HASH_ALGORITHM 3" >>confdefs.h diff --git a/configure.ac b/configure.ac index 91e6c15025c40d..135492d82e08fd 100644 --- a/configure.ac +++ b/configure.ac @@ -3966,15 +3966,12 @@ dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output AC_ARG_WITH( [hash_algorithm], [AS_HELP_STRING( - [--with-hash-algorithm=@<:@fnv|siphash13|siphash24|external@:>@], + [--with-hash-algorithm=@<:@fnv|siphash13|siphash24@:>@], [select hash algorithm for use in Python/pyhash.c (default is SipHash13)] )], [ AC_MSG_RESULT([$withval]) case "$withval" in - external) - AC_DEFINE([Py_HASH_ALGORITHM], [0]) - ;; siphash13) AC_DEFINE([Py_HASH_ALGORITHM], [3]) ;; From 2443931a85e9a4bb8822168c24d45738fd1f2416 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 7 Nov 2025 18:52:25 +0100 Subject: [PATCH 4/7] add `.. versionadded::` --- Doc/c-api/hash.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index e0c4b6066ece55..de40b47431636e 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -24,14 +24,19 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. The algorithm name is exposed by :data:`sys.hash_info.algorithm`. + .. versionadded:: 3.4 + .. c:macro:: Py_HASH_FNV - Py_HASH_SIPHASH13 Py_HASH_SIPHASH24 + Py_HASH_SIPHASH13 Numerical values to compare to :c:macro:`Py_HASH_ALGORITHM` to determine which algorithm is used for hashing. The hash algorithm can be configured via the configure :option:`--with-hash-algorithm` option. + .. versionadded:: 3.11 + Add :c:macro:`!Py_HASH_SIPHASH13`. + .. c:macro:: Py_HASH_CUTOFF Buffers of length in range ``[1, Py_HASH_CUTOFF)`` are hashed using DJBX33A @@ -44,6 +49,8 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. it is easier to create colliding strings. A cutoff of 7 on 64-bit platforms and 5 on 32-bit platforms should provide a decent safety margin. + .. versionadded:: 3.4 + .. c:macro:: PyHASH_MODULUS The `Mersenne prime `_ ``P = 2**n -1``, From 8815b8bcd0c83a9bfa22bcb57dd4f79b29ca178e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 7 Nov 2025 18:54:19 +0100 Subject: [PATCH 5/7] add blank lines --- Doc/c-api/hash.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index de40b47431636e..f0059981f756d0 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -11,12 +11,14 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. versionadded:: 3.2 + .. c:type:: Py_uhash_t Hash value type: unsigned integer. .. versionadded:: 3.2 + .. c:macro:: Py_HASH_ALGORITHM A numerical value indicating the algorithm for hashing of :class:`str`, @@ -26,6 +28,7 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. versionadded:: 3.4 + .. c:macro:: Py_HASH_FNV Py_HASH_SIPHASH24 Py_HASH_SIPHASH13 @@ -37,6 +40,7 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. versionadded:: 3.11 Add :c:macro:`!Py_HASH_SIPHASH13`. + .. c:macro:: Py_HASH_CUTOFF Buffers of length in range ``[1, Py_HASH_CUTOFF)`` are hashed using DJBX33A @@ -51,6 +55,7 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. versionadded:: 3.4 + .. c:macro:: PyHASH_MODULUS The `Mersenne prime `_ ``P = 2**n -1``, @@ -59,6 +64,7 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. versionadded:: 3.13 + .. c:macro:: PyHASH_BITS The exponent ``n`` of ``P`` in :c:macro:`PyHASH_MODULUS`. @@ -66,12 +72,14 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. versionadded:: 3.13 + .. c:macro:: PyHASH_MULTIPLIER Prime multiplier used in string and various other hashes. .. versionadded:: 3.13 + .. c:macro:: PyHASH_INF The hash value returned for a positive infinity. @@ -79,6 +87,7 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. versionadded:: 3.13 + .. c:macro:: PyHASH_IMAG The multiplier used for the imaginary part of a complex number. @@ -86,6 +95,7 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. versionadded:: 3.13 + .. c:type:: PyHash_FuncDef Hash function definition used by :c:func:`PyHash_GetFuncDef`. From 32173a93b44a374fc3e2947cf55c428313826f17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 7 Nov 2025 19:04:23 +0100 Subject: [PATCH 6/7] fixup --- Doc/c-api/hash.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index f0059981f756d0..f67688629895cc 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -37,7 +37,10 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. which algorithm is used for hashing. The hash algorithm can be configured via the configure :option:`--with-hash-algorithm` option. - .. versionadded:: 3.11 + .. versionadded:: 3.4 + Add :c:macro:`!Py_HASH_FNV` and :c:macro:`!Py_HASH_SIPHASH24`. + + .. versionadded:: 3.13 Add :c:macro:`!Py_HASH_SIPHASH13`. From db8f82360bdfac2cd3d988587d348d8c0f935c09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 7 Nov 2025 19:08:24 +0100 Subject: [PATCH 7/7] typo --- Doc/c-api/hash.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index f67688629895cc..e74f5b4d18ebe1 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -50,7 +50,7 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. instead of the algorithm described by :c:macro:`Py_HASH_ALGORITHM`. - A :c:macro:`!Py_HASH_CUTOFF` of 0 disables the optimization. - - :c:macro:`!Py_HASH_CUTOFF` must non-negative and less or equal than 7. + - :c:macro:`!Py_HASH_CUTOFF` must be non-negative and less or equal than 7. 32-bit platforms should use a cutoff smaller than 64-bit platforms because it is easier to create colliding strings. A cutoff of 7 on 64-bit platforms