From 1acabab2c9cd20ceefaf2d7fce84368bb3e41eb2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 14 Nov 2025 11:23:19 +0200 Subject: [PATCH 1/3] gh-141370: Fix undefined behavior when using Py_ABS() --- Lib/test/test_bytes.py | 11 +++++++++++ Lib/test/test_marshal.py | 5 +++++ Lib/test/test_memoryview.py | 19 +++++++++++++++++++ Python/marshal.c | 4 +++- Python/pystrhex.c | 5 +++-- 5 files changed, 41 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 86898bfcab9135..038351a0bc0f4e 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -549,6 +549,17 @@ def test_hex_separator_basics(self): self.assertEqual(three_bytes.hex(':', 2), 'b9:01ef') self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef') self.assertEqual(three_bytes.hex('*', -2), 'b901*ef') + self.assertEqual(three_bytes.hex(sep=':', bytes_per_sep=2), 'b9:01ef') + self.assertEqual(three_bytes.hex(sep='*', bytes_per_sep=-2), 'b901*ef') + for bytes_per_sep in 3, -3, 2**31-1, -(2**31-1): + with self.subTest(bytes_per_sep=bytes_per_sep): + self.assertEqual(three_bytes.hex(':', bytes_per_sep), 'b901ef') + for bytes_per_sep in 2**31, -2**31, 2**1000, -2**1000: + with self.subTest(bytes_per_sep=bytes_per_sep): + try: + self.assertEqual(three_bytes.hex(':', bytes_per_sep), 'b901ef') + except OverflowError: + pass value = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000' self.assertEqual(value.hex('.', 8), '7b7305000000776f.726c646902000000.730500000068656c.6c6f690100000030') diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index 8b1fb0eba1f8b6..662bdfccc79125 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -43,6 +43,11 @@ def test_ints(self): for expected in (-n, n): self.helper(expected) n = n >> 1 + n = 1 << 100 + while n: + for expected in (-n, -n+1, n-1, n): + self.helper(expected) + n = n >> 1 def test_int64(self): # Simulate int marshaling with TYPE_INT64. diff --git a/Lib/test/test_memoryview.py b/Lib/test/test_memoryview.py index 64f440f180bbf0..1bd58eb6408833 100644 --- a/Lib/test/test_memoryview.py +++ b/Lib/test/test_memoryview.py @@ -600,6 +600,25 @@ def test_memoryview_hex(self): m2 = m1[::-1] self.assertEqual(m2.hex(), '30' * 200000) + def test_memoryview_hex_separator(self): + x = bytes(range(97, 102)) + m1 = memoryview(x) + m2 = m1[::-1] + self.assertEqual(m2.hex(':'), '65:64:63:62:61') + self.assertEqual(m2.hex(':', 2), '65:6463:6261') + self.assertEqual(m2.hex(':', -2), '6564:6362:61') + self.assertEqual(m2.hex(sep=':', bytes_per_sep=2), '65:6463:6261') + self.assertEqual(m2.hex(sep=':', bytes_per_sep=-2), '6564:6362:61') + for bytes_per_sep in 5, -5, 2**31-1, -(2**31-1): + with self.subTest(bytes_per_sep=bytes_per_sep): + self.assertEqual(m2.hex(':', bytes_per_sep), '6564636261') + for bytes_per_sep in 2**31, -2**31, 2**1000, -2**1000: + with self.subTest(bytes_per_sep=bytes_per_sep): + try: + self.assertEqual(m2.hex(':', bytes_per_sep), '6564636261') + except OverflowError: + pass + def test_copy(self): m = memoryview(b'abc') with self.assertRaises(TypeError): diff --git a/Python/marshal.c b/Python/marshal.c index 8b56de6575559c..bdd9d489ddf91e 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -310,7 +310,9 @@ w_PyLong(const PyLongObject *ob, char flag, WFILE *p) } if (!long_export.digits) { int8_t sign = long_export.value < 0 ? -1 : 1; - uint64_t abs_value = Py_ABS(long_export.value); + uint64_t abs_value = long_export.value < -INT64_MAX + ? (uint64_t)INT64_MAX + (uint64_t)-(long_export.value + INT64_MAX) + : (uint64_t)Py_ABS(long_export.value); uint64_t d = abs_value; long l = 0; diff --git a/Python/pystrhex.c b/Python/pystrhex.c index 38484f5a7d4227..63441a585bbd79 100644 --- a/Python/pystrhex.c +++ b/Python/pystrhex.c @@ -42,8 +42,9 @@ static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen, else { bytes_per_sep_group = 0; } - - unsigned int abs_bytes_per_sep = Py_ABS(bytes_per_sep_group); + unsigned int abs_bytes_per_sep = (bytes_per_sep_group < -INT_MAX) + ? (unsigned int)INT_MAX + (unsigned int)-(bytes_per_sep_group + INT_MAX) + : (unsigned int)Py_ABS(bytes_per_sep_group); Py_ssize_t resultlen = 0; if (bytes_per_sep_group && arglen > 0) { /* How many sep characters we'll be inserting. */ From df24c4f24312f901c7a787ad31cc240bf1a315cf Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 17 Nov 2025 20:13:14 +0200 Subject: [PATCH 2/3] Use more efficient implementation. Co-authored-by: Sergey B Kirpichev --- Include/pymacro.h | 1 + Python/marshal.c | 4 +--- Python/pystrhex.c | 4 +--- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Include/pymacro.h b/Include/pymacro.h index 857cdf12db9bf2..6924f4228c8703 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -115,6 +115,7 @@ #define Py_MAX(x, y) (((x) > (y)) ? (x) : (y)) /* Absolute value of the number x */ +#define _Py_ABS_CAST(T,x) ((x) >= 0 ? ((T) (x)) : (- (((T) ((x) + 1)) - 1))) #define Py_ABS(x) ((x) < 0 ? -(x) : (x)) #define _Py_XSTRINGIFY(x) #x diff --git a/Python/marshal.c b/Python/marshal.c index bdd9d489ddf91e..69d6dd7cf0f802 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -310,9 +310,7 @@ w_PyLong(const PyLongObject *ob, char flag, WFILE *p) } if (!long_export.digits) { int8_t sign = long_export.value < 0 ? -1 : 1; - uint64_t abs_value = long_export.value < -INT64_MAX - ? (uint64_t)INT64_MAX + (uint64_t)-(long_export.value + INT64_MAX) - : (uint64_t)Py_ABS(long_export.value); + uint64_t abs_value = _Py_ABS_CAST(uint64_t, long_export.value); uint64_t d = abs_value; long l = 0; diff --git a/Python/pystrhex.c b/Python/pystrhex.c index 63441a585bbd79..af2f5c5dce5fca 100644 --- a/Python/pystrhex.c +++ b/Python/pystrhex.c @@ -42,9 +42,7 @@ static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen, else { bytes_per_sep_group = 0; } - unsigned int abs_bytes_per_sep = (bytes_per_sep_group < -INT_MAX) - ? (unsigned int)INT_MAX + (unsigned int)-(bytes_per_sep_group + INT_MAX) - : (unsigned int)Py_ABS(bytes_per_sep_group); + unsigned int abs_bytes_per_sep = _Py_ABS_CAST(unsigned int, bytes_per_sep_group); Py_ssize_t resultlen = 0; if (bytes_per_sep_group && arglen > 0) { /* How many sep characters we'll be inserting. */ From b29256168bb675d523d1ca1a55f3abf16365b253 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 18 Nov 2025 12:45:12 +0200 Subject: [PATCH 3/3] Try other code. Add a comment. --- Include/pymacro.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Include/pymacro.h b/Include/pymacro.h index 6924f4228c8703..7ecce44a0d2a42 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -115,8 +115,13 @@ #define Py_MAX(x, y) (((x) > (y)) ? (x) : (y)) /* Absolute value of the number x */ -#define _Py_ABS_CAST(T,x) ((x) >= 0 ? ((T) (x)) : (- (((T) ((x) + 1)) - 1))) #define Py_ABS(x) ((x) < 0 ? -(x) : (x)) +/* Safer implementation that avoids an undefined behavior for the minimal + value of the signed integer type if its absolute value is larger than + the maximal value of the signed integer type (in the two's complement + representations, which is common). + */ +#define _Py_ABS_CAST(T, x) ((x) >= 0 ? ((T) (x)) : ((T) (((T) -((x) + 1)) + 1u))) #define _Py_XSTRINGIFY(x) #x