Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
822230d
gh-115952: Fix vulnerability in the pickle module
serhiy-storchaka May 20, 2024
88f1461
Try to fix tests of 32-bit platforms.
serhiy-storchaka May 20, 2024
048099b
Try to fix more tests on 32-bit platforms.
serhiy-storchaka May 20, 2024
d9d1d1d
Apply suggestions from code review
serhiy-storchaka May 22, 2024
6f6f765
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka May 22, 2024
d0e667e
Remove empty lines.
serhiy-storchaka Jun 29, 2024
3462d0e
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Jun 29, 2024
becbd25
Merge remote-tracking branch 'refs/remotes/origin/unpickle-overalloca…
serhiy-storchaka Jun 29, 2024
b257974
Change names, add more commentis and update the NEWS entry.
serhiy-storchaka Jun 30, 2024
1e487ca
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Sep 6, 2024
184984d
Support arbitrary non-continuous memo keys.
serhiy-storchaka Sep 6, 2024
f0c0728
Reworded NEWS a bit.
gpshead Sep 27, 2024
1f4e2f1
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Sep 28, 2024
c72d095
Fix C to Python integer conversion.
serhiy-storchaka Sep 28, 2024
e89bfea
Add more comments.
serhiy-storchaka Sep 28, 2024
a80106c
Fix test on 32-bit platforms.
serhiy-storchaka Sep 28, 2024
01bc6b9
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Apr 8, 2025
20aa1bf
Fix __sizeof__.
serhiy-storchaka Apr 8, 2025
ab58869
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Apr 9, 2025
2a1cff8
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Nov 18, 2025
9d4af4e
Improve security in pickle module
serhiy-storchaka Nov 18, 2025
572a2f2
reword NEWS a bit
gpshead Nov 23, 2025
d6279ae
add a couple of comments
gpshead Nov 23, 2025
022108d
expand comment in test_too_large_long_binput
gpshead Nov 23, 2025
f5f50e7
Add memory DoS impact benchmark for pickle module
gpshead Nov 24, 2025
44dbe03
fix docs build?
gpshead Nov 24, 2025
a29c90c
Merge branch 'main' into unpickle-overallocate
gpshead Nov 24, 2025
583df53
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Nov 27, 2025
54dfd58
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Dec 1, 2025
7afe4e1
Update comments.
serhiy-storchaka Dec 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions Lib/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,11 @@ def __init__(self, value):
__all__.extend(x for x in dir() if x.isupper() and not x.startswith('_'))


# Data larger than this will be read in chunks, to prevent extreme
# overallocation.
_MIN_READ_BUF_SIZE = (1 << 20)


class _Framer:

_FRAME_SIZE_MIN = 4
Expand Down Expand Up @@ -287,7 +292,7 @@ def read(self, n):
"pickle exhausted before end of frame")
return data
else:
return self.file_read(n)
return self._chunked_file_read(n)

def readline(self):
if self.current_frame:
Expand All @@ -302,11 +307,23 @@ def readline(self):
else:
return self.file_readline()

def _chunked_file_read(self, size):
cursize = min(size, _MIN_READ_BUF_SIZE)
b = self.file_read(cursize)
while cursize < size and len(b) == cursize:
delta = min(cursize, size - cursize)
b += self.file_read(delta)
cursize += delta
return b

def load_frame(self, frame_size):
if self.current_frame and self.current_frame.read() != b'':
raise UnpicklingError(
"beginning of a new frame before end of current frame")
self.current_frame = io.BytesIO(self.file_read(frame_size))
data = self._chunked_file_read(frame_size)
if len(data) < frame_size:
raise EOFError
self.current_frame = io.BytesIO(data)


# Tools used for pickling.
Expand Down Expand Up @@ -1496,12 +1513,17 @@ def load_binbytes8(self):
dispatch[BINBYTES8[0]] = load_binbytes8

def load_bytearray8(self):
len, = unpack('<Q', self.read(8))
if len > maxsize:
size, = unpack('<Q', self.read(8))
if size > maxsize:
raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
"of %d bytes" % maxsize)
b = bytearray(len)
self.readinto(b)
cursize = min(size, _MIN_READ_BUF_SIZE)
b = bytearray(cursize)
if self.readinto(b) == cursize:
while cursize < size and len(b) == cursize:
delta = min(cursize, size - cursize)
b += self.read(delta)
cursize += delta
self.append(b)
dispatch[BYTEARRAY8[0]] = load_bytearray8

Expand Down
167 changes: 164 additions & 3 deletions Lib/test/pickletester.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ def count_opcode(code, pickle):
def identity(x):
return x

def itersize(start, stop):
# Produce geometrical increasing sequence from start to stop
# (inclusively) for tests.
size = start
while size < stop:
yield size
size <<= 1
yield stop


class UnseekableIO(io.BytesIO):
def peek(self, *args):
Expand Down Expand Up @@ -853,9 +862,8 @@ def assert_is_copy(self, obj, objcopy, msg=None):
self.assertEqual(getattr(obj, slot, None),
getattr(objcopy, slot, None), msg=msg)

def check_unpickling_error(self, errors, data):
with self.subTest(data=data), \
self.assertRaises(errors):
def check_unpickling_error_strict(self, errors, data):
with self.assertRaises(errors):
try:
self.loads(data)
except BaseException as exc:
Expand All @@ -864,6 +872,10 @@ def check_unpickling_error(self, errors, data):
(data, exc.__class__.__name__, exc))
raise

def check_unpickling_error(self, errors, data):
with self.subTest(data=data):
self.check_unpickling_error_strict(errors, data)

def test_load_from_data0(self):
self.assert_is_copy(self._testdata, self.loads(DATA0))

Expand Down Expand Up @@ -1150,6 +1162,155 @@ def test_negative_32b_binput(self):
dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.'
self.check_unpickling_error(ValueError, dumped)

def test_too_large_put(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment explaining why this and the next test method result in ([], []) being returned no matter what rather than an error when the values are too large? (I suspect readers with a knowledge of the specific pickle protocol may understand, but it isn't obvious otherwise)

# Test that PUT with large id does not cause allocation of
# too large memo table. The C implementation uses a dict-based memo
# for sparse indices (when idx > memo_len * 2) instead of allocating
# a massive array. This test verifies large sparse indices work without
# causing memory exhaustion.
#
# The following simple pickle creates an empty list, memoizes it
# using a large index, then loads it back on the stack, builds
# a tuple containing 2 identical empty lists and returns it.
data = lambda n: (b'((lp' + str(n).encode() + b'\n' +
b'g' + str(n).encode() + b'\nt.')
# 0: ( MARK
# 1: ( MARK
# 2: l LIST (MARK at 1)
# 3: p PUT 1000000000000
# 18: g GET 1000000000000
# 33: t TUPLE (MARK at 0)
# 34: . STOP
for idx in [10**6, 10**9, 10**12]:
if idx > sys.maxsize:
continue
self.assertEqual(self.loads(data(idx)), ([],)*2)

def test_too_large_long_binput(self):
# Test that LONG_BINPUT with large id does not cause allocation of
# too large memo table. The C implementation uses a dict-based memo
# for sparse indices (when idx > memo_len * 2) instead of allocating
# a massive array. This test verifies large sparse indices work without
# causing memory exhaustion.
#
# The following simple pickle creates an empty list, memoizes it
# using a large index, then loads it back on the stack, builds
# a tuple containing 2 identical empty lists and returns it.
data = lambda n: (b'(]r' + struct.pack('<I', n) +
b'j' + struct.pack('<I', n) + b't.')
# 0: ( MARK
# 1: ] EMPTY_LIST
# 2: r LONG_BINPUT 4294967295
# 7: j LONG_BINGET 4294967295
# 12: t TUPLE (MARK at 0)
# 13: . STOP
for idx in itersize(1 << 20, min(sys.maxsize, (1 << 32) - 1)):
self.assertEqual(self.loads(data(idx)), ([],)*2)

def _test_truncated_data(self, dumped, expected_error=None):
# Test that instructions to read large data without providing
# such amount of data do not cause large memory usage.
if expected_error is None:
expected_error = self.truncated_data_error
# BytesIO
with self.assertRaisesRegex(*expected_error):
self.loads(dumped)
if hasattr(self, 'unpickler'):
try:
with open(TESTFN, 'wb') as f:
f.write(dumped)
# buffered file
with open(TESTFN, 'rb') as f:
u = self.unpickler(f)
with self.assertRaisesRegex(*expected_error):
u.load()
# unbuffered file
with open(TESTFN, 'rb', buffering=0) as f:
u = self.unpickler(f)
with self.assertRaisesRegex(*expected_error):
u.load()
finally:
os_helper.unlink(TESTFN)

def test_truncated_large_binstring(self):
data = lambda size: b'T' + struct.pack('<I', size) + b'.' * 5
# 0: T BINSTRING '....'
# 9: . STOP
self.assertEqual(self.loads(data(4)), '....') # self-testing
for size in itersize(1 << 10, min(sys.maxsize - 5, (1 << 31) - 1)):
self._test_truncated_data(data(size))
self._test_truncated_data(data(1 << 31),
(pickle.UnpicklingError, 'truncated|exceeds|negative byte count'))

def test_truncated_large_binunicode(self):
data = lambda size: b'X' + struct.pack('<I', size) + b'.' * 5
# 0: X BINUNICODE '....'
# 9: . STOP
self.assertEqual(self.loads(data(4)), '....') # self-testing
for size in itersize(1 << 10, min(sys.maxsize - 5, (1 << 32) - 1)):
self._test_truncated_data(data(size))

def test_truncated_large_binbytes(self):
data = lambda size: b'B' + struct.pack('<I', size) + b'.' * 5
# 0: B BINBYTES b'....'
# 9: . STOP
self.assertEqual(self.loads(data(4)), b'....') # self-testing
for size in itersize(1 << 10, min(sys.maxsize, 1 << 31)):
self._test_truncated_data(data(size))

def test_truncated_large_long4(self):
data = lambda size: b'\x8b' + struct.pack('<I', size) + b'.' * 5
# 0: \x8b LONG4 0x2e2e2e2e
# 9: . STOP
self.assertEqual(self.loads(data(4)), 0x2e2e2e2e) # self-testing
for size in itersize(1 << 10, min(sys.maxsize - 5, (1 << 31) - 1)):
self._test_truncated_data(data(size))
self._test_truncated_data(data(1 << 31),
(pickle.UnpicklingError, 'LONG pickle has negative byte count'))

def test_truncated_large_frame(self):
data = lambda size: b'\x95' + struct.pack('<Q', size) + b'N.'
# 0: \x95 FRAME 2
# 9: N NONE
# 10: . STOP
self.assertIsNone(self.loads(data(2))) # self-testing
for size in itersize(1 << 10, sys.maxsize - 9):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1),
((OverflowError, ValueError),
'FRAME length exceeds|frame size > sys.maxsize'))

def test_truncated_large_binunicode8(self):
data = lambda size: b'\x8d' + struct.pack('<Q', size) + b'.' * 5
# 0: \x8d BINUNICODE8 '....'
# 13: . STOP
self.assertEqual(self.loads(data(4)), '....') # self-testing
for size in itersize(1 << 10, sys.maxsize - 9):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)

def test_truncated_large_binbytes8(self):
data = lambda size: b'\x8e' + struct.pack('<Q', size) + b'.' * 5
# 0: \x8e BINBYTES8 b'....'
# 13: . STOP
self.assertEqual(self.loads(data(4)), b'....') # self-testing
for size in itersize(1 << 10, sys.maxsize):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)

def test_truncated_large_bytearray8(self):
data = lambda size: b'\x96' + struct.pack('<Q', size) + b'.' * 5
# 0: \x96 BYTEARRAY8 bytearray(b'....')
# 13: . STOP
self.assertEqual(self.loads(data(4)), bytearray(b'....')) # self-testing
for size in itersize(1 << 10, sys.maxsize):
self._test_truncated_data(data(size))
if sys.maxsize + 1 < 1 << 64:
self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)

def test_badly_escaped_string(self):
self.check_unpickling_error(ValueError, b"S'\\'\n.")

Expand Down
8 changes: 7 additions & 1 deletion Lib/test/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class PyUnpicklerTests(AbstractUnpickleTests, unittest.TestCase):
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
truncated_data_error = (EOFError, '')
size_overflow_error = (pickle.UnpicklingError, 'exceeds')

def loads(self, buf, **kwds):
f = io.BytesIO(buf)
Expand Down Expand Up @@ -103,6 +105,8 @@ class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
truncated_data_error = ((pickle.UnpicklingError, EOFError), '')
size_overflow_error = ((OverflowError, pickle.UnpicklingError), 'exceeds')

def dumps(self, arg, protocol=None, **kwargs):
return pickle.dumps(arg, protocol, **kwargs)
Expand Down Expand Up @@ -375,6 +379,8 @@ class CUnpicklerTests(PyUnpicklerTests):
unpickler = _pickle.Unpickler
bad_stack_errors = (pickle.UnpicklingError,)
truncated_errors = (pickle.UnpicklingError,)
truncated_data_error = (pickle.UnpicklingError, 'truncated')
size_overflow_error = (OverflowError, 'exceeds')

class CPicklingErrorTests(PyPicklingErrorTests):
pickler = _pickle.Pickler
Expand Down Expand Up @@ -478,7 +484,7 @@ def test_pickler(self):
0) # Write buffer is cleared after every dump().

def test_unpickler(self):
basesize = support.calcobjsize('2P2n2P 2P2n2i5P 2P3n8P2n2i')
basesize = support.calcobjsize('2P2n3P 2P2n2i5P 2P3n8P2n2i')
unpickler = _pickle.Unpickler
P = struct.calcsize('P') # Size of memo table entry.
n = struct.calcsize('n') # Size of mark table entry.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Fix a potential memory denial of service in the :mod:`pickle` module.
When reading a pickled data received from untrusted source, it could cause
an arbitrary amount of memory to be allocated, even if the code that is
allowed to execute is restricted by overriding the
:meth:`~pickle.Unpickler.find_class` method.
This could have led to symptoms including a :exc:`MemoryError`, swapping, out
of memory (OOM) killed processes or containers, or even system crashes.
Loading
Loading