From bcac51fc09be79a66a6b7f4256d01941bfaa7acf Mon Sep 17 00:00:00 2001 From: Davide Date: Thu, 17 Oct 2024 18:15:49 +0200 Subject: [PATCH 1/6] gh-125651: Fix UUID hex parsing with underscores --- Lib/test/test_uuid.py | 2 ++ Lib/uuid.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 33045a78721aac..d95295f27acc3d 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -277,6 +277,8 @@ def test_exceptions(self): # Badly formed hex strings. badvalue(lambda: self.uuid.UUID('')) badvalue(lambda: self.uuid.UUID('abc')) + badvalue(lambda: self.uuid.UUID("123_4567812345678123456781234567")) + badvalue(lambda: self.uuid.UUID("123_4567812345678123456781_23456")) badvalue(lambda: self.uuid.UUID('1234567812345678123456781234567')) badvalue(lambda: self.uuid.UUID('123456781234567812345678123456789')) badvalue(lambda: self.uuid.UUID('123456781234567812345678z2345678')) diff --git a/Lib/uuid.py b/Lib/uuid.py index c0150a59d7cb9a..368942a571fe19 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -214,7 +214,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, pass elif hex is not None: hex = hex.replace('urn:', '').replace('uuid:', '') - hex = hex.strip('{}').replace('-', '') + hex = hex.strip("{}").replace("-", "").replace("_", "") if len(hex) != 32: raise ValueError('badly formed hexadecimal UUID string') int = int_(hex, 16) From 2f59b4049117d12e4be7264638e36afc92972788 Mon Sep 17 00:00:00 2001 From: Davide Date: Fri, 18 Oct 2024 09:50:16 +0200 Subject: [PATCH 2/6] gh-125651: Use regex to avoid any unallowed character to passed to int --- Lib/test/test_uuid.py | 10 ++++++++-- Lib/uuid.py | 5 +++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index d95295f27acc3d..b28141d74a45ce 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -277,11 +277,17 @@ def test_exceptions(self): # Badly formed hex strings. badvalue(lambda: self.uuid.UUID('')) badvalue(lambda: self.uuid.UUID('abc')) - badvalue(lambda: self.uuid.UUID("123_4567812345678123456781234567")) - badvalue(lambda: self.uuid.UUID("123_4567812345678123456781_23456")) + badvalue(lambda: self.uuid.UUID('123_4567812345678123456781234567')) + badvalue(lambda: self.uuid.UUID('123_4567812345678123456781_23456')) + badvalue(lambda: self.uuid.UUID('123_4567812345678123456781_23456')) badvalue(lambda: self.uuid.UUID('1234567812345678123456781234567')) badvalue(lambda: self.uuid.UUID('123456781234567812345678123456789')) badvalue(lambda: self.uuid.UUID('123456781234567812345678z2345678')) + badvalue(lambda: self.uuid.UUID('0x123456781234567812345678z23456')) + badvalue(lambda: self.uuid.UUID('0X123456781234567812345678z23456')) + badvalue(lambda: self.uuid.UUID('+123456781234567812345678z234567')) + badvalue(lambda: self.uuid.UUID(' 123456781234567812345678z23456 ')) + badvalue(lambda: self.uuid.UUID(' 123456781234567812345678z2345 ')) # Badly formed bytes. badvalue(lambda: self.uuid.UUID(bytes='abc')) diff --git a/Lib/uuid.py b/Lib/uuid.py index 368942a571fe19..0a637ecc731749 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -57,6 +57,7 @@ """ import os +import re import sys import time @@ -214,8 +215,8 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, pass elif hex is not None: hex = hex.replace('urn:', '').replace('uuid:', '') - hex = hex.strip("{}").replace("-", "").replace("_", "") - if len(hex) != 32: + hex = hex.strip('{}').replace('-', '') + if not re.fullmatch(r'[0-9A-Fa-f]{32}', hex): raise ValueError('badly formed hexadecimal UUID string') int = int_(hex, 16) elif bytes_le is not None: From 265ecb1b654e0ffde67502e298c807bfbca4305e Mon Sep 17 00:00:00 2001 From: Davide Date: Fri, 18 Oct 2024 11:03:46 +0200 Subject: [PATCH 3/6] gh-125651: Remove duplicated check in test --- Lib/test/test_uuid.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index b28141d74a45ce..0bf639f99d7786 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -279,7 +279,6 @@ def test_exceptions(self): badvalue(lambda: self.uuid.UUID('abc')) badvalue(lambda: self.uuid.UUID('123_4567812345678123456781234567')) badvalue(lambda: self.uuid.UUID('123_4567812345678123456781_23456')) - badvalue(lambda: self.uuid.UUID('123_4567812345678123456781_23456')) badvalue(lambda: self.uuid.UUID('1234567812345678123456781234567')) badvalue(lambda: self.uuid.UUID('123456781234567812345678123456789')) badvalue(lambda: self.uuid.UUID('123456781234567812345678z2345678')) From f6668684302145e40bebcb42db1c1d244611ea8e Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 18 Oct 2024 09:45:35 +0000 Subject: [PATCH 4/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2024-10-18-09-45-34.gh-issue-125651.M0wSAS.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2024-10-18-09-45-34.gh-issue-125651.M0wSAS.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-18-09-45-34.gh-issue-125651.M0wSAS.rst b/Misc/NEWS.d/next/Library/2024-10-18-09-45-34.gh-issue-125651.M0wSAS.rst new file mode 100644 index 00000000000000..e5964c83fd8712 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-18-09-45-34.gh-issue-125651.M0wSAS.rst @@ -0,0 +1 @@ +Fix parsing of HEX encoded UUID string From 412da700dc457f03b6441c47f360de6d64d655f4 Mon Sep 17 00:00:00 2001 From: Davide Date: Fri, 18 Oct 2024 12:29:06 +0200 Subject: [PATCH 5/6] gh-125651: Add test for unicode sequence and adjust news message --- Lib/test/test_uuid.py | 1 + .../next/Library/2024-10-18-09-45-34.gh-issue-125651.M0wSAS.rst | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 0bf639f99d7786..88a9d693ed6c6e 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -287,6 +287,7 @@ def test_exceptions(self): badvalue(lambda: self.uuid.UUID('+123456781234567812345678z234567')) badvalue(lambda: self.uuid.UUID(' 123456781234567812345678z23456 ')) badvalue(lambda: self.uuid.UUID(' 123456781234567812345678z2345 ')) + badvalue(lambda: self.uuid.UUID('\uff10123456781234567812345678z234567')) # Badly formed bytes. badvalue(lambda: self.uuid.UUID(bytes='abc')) diff --git a/Misc/NEWS.d/next/Library/2024-10-18-09-45-34.gh-issue-125651.M0wSAS.rst b/Misc/NEWS.d/next/Library/2024-10-18-09-45-34.gh-issue-125651.M0wSAS.rst index e5964c83fd8712..00c79d2dd87366 100644 --- a/Misc/NEWS.d/next/Library/2024-10-18-09-45-34.gh-issue-125651.M0wSAS.rst +++ b/Misc/NEWS.d/next/Library/2024-10-18-09-45-34.gh-issue-125651.M0wSAS.rst @@ -1 +1 @@ -Fix parsing of HEX encoded UUID string +Fix HEX parsing of :class:`uuid.UUID`. From f21225aece430104724cdd419bb3a84550d428d8 Mon Sep 17 00:00:00 2001 From: Davide Date: Wed, 17 Sep 2025 15:25:32 +0200 Subject: [PATCH 6/6] Use str.maketrans to ensure that only hex characters are given to UUID init --- Lib/uuid.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 0a637ecc731749..b5641112d65a56 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -57,7 +57,6 @@ """ import os -import re import sys import time @@ -84,6 +83,7 @@ _MAC_DELIM = b'.' _MAC_OMITS_LEADING_ZEROES = True +_HEX_TT = str.maketrans('', '', 'abcdefABCDEF0123456789') RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [ 'reserved for NCS compatibility', 'specified in RFC 4122', 'reserved for Microsoft compatibility', 'reserved for future definition'] @@ -216,7 +216,8 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, elif hex is not None: hex = hex.replace('urn:', '').replace('uuid:', '') hex = hex.strip('{}').replace('-', '') - if not re.fullmatch(r'[0-9A-Fa-f]{32}', hex): + # ensure that only 32 hex characters pass through + if len(hex) != 32 or hex.translate(_HEX_TT): raise ValueError('badly formed hexadecimal UUID string') int = int_(hex, 16) elif bytes_le is not None: