Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions Lib/_pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
# when the device block size is available.
DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes

# Data larger than this will be read in chunks, to prevent extreme
# overallocation.
_MIN_READ_BUF_SIZE = 1 << 20

# NOTE: Base classes defined here are registered with the "official" ABCs
# defined in io.py. We don't use real inheritance though, because we don't want
# to inherit the C implementations.
Expand Down Expand Up @@ -611,15 +615,29 @@ def read(self, size=-1):
"""
if size is None:
size = -1
size = size.__index__()
if size < 0:
return self.readall()
b = bytearray(size.__index__())
b = bytearray(min(size, _MIN_READ_BUF_SIZE))
n = self.readinto(b)
if n is None:
return None
if n < 0 or n > len(b):
raise ValueError(f"readinto returned {n} outside buffer size {len(b)}")
del b[n:]
written = 0
while True:
if n != len(b) - written:
if n < 0 or n > len(b) - written:
raise ValueError(f"readinto returned {n} outside buffer size {len(b) - written}")
written += n
break
written += n
if written >= size:
break
b.resize(min(2*len(b), size))
with memoryview(b) as m:
n = self.readinto(m[written:])
if n is None:
break
del b[written:]
return b.take_bytes()

def readall(self):
Expand Down
10 changes: 10 additions & 0 deletions Lib/test/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3184,3 +3184,13 @@ def linked_to_musl():
return _linked_to_musl
_linked_to_musl = tuple(map(int, version.split('.')))
return _linked_to_musl


def itersize(start, stop):
# Produce geometrical increasing sequence from start to stop
# (inclusively) for tests.
size = start
while size < stop:
yield size
size <<= 1
yield stop
19 changes: 19 additions & 0 deletions Lib/test/test_io/test_bufferedio.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,25 @@ def test_read_all(self):

self.assertEqual(b"abcdefg", bufio.read())

def test_large_read_from_small_file(self):
for size in support.itersize(1 << 20, sys.maxsize):
rawio = self.MockRawIO((b'abc',))
bufio = self.tp(rawio)
self.assertEqual(bufio.read(size), b'abc')

def test_large_read_from_large_file(self):
data = b'abc' * ((5 << 20) + 54321)
for size in (len(data), sys.maxsize):
rawio = self.MockFileIO(data)
bufio = self.tp(rawio)
self.assertEqual(bufio.read(size), data)

def test_large_read1_from_small_file(self):
for size in support.itersize(1 << 20, sys.maxsize):
rawio = self.MockRawIO((b'abc',))
bufio = self.tp(rawio)
self.assertEqual(bufio.read1(size), b'abc')

@threading_helper.requires_working_threading()
@support.requires_resource('cpu')
def test_threads(self):
Expand Down
10 changes: 10 additions & 0 deletions Lib/test/test_io/test_fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from weakref import proxy
from functools import wraps

from test import support
from test.support import (
cpython_only, swap_attr, gc_collect, is_wasi,
infinite_recursion, strace_helper
Expand Down Expand Up @@ -730,6 +731,15 @@ def __setattr__(self, name, value):
self.assertRaises(MyException, MyFileIO, fd)
os.close(fd) # should not raise OSError(EBADF)

def test_large_read_from_small_file(self):
self.addCleanup(os.remove, TESTFN)
data = b'abc'
with self.FileIO(TESTFN, 'wb') as f:
f.write(data)
for size in support.itersize(1 << 20, sys.maxsize):
with self.FileIO(TESTFN, 'rb') as f:
self.assertEqual(f.read(size), data)


class COtherFileTests(OtherFileTests, unittest.TestCase):
FileIO = _io.FileIO
Expand Down
5 changes: 5 additions & 0 deletions Lib/test/test_io/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,11 @@ def readinto(self, buf):
rawio = RawIOKeepsReference([b"1234"])
rawio.read(4)

def test_RawIOBase_large_read_from_small_file(self):
for size in support.itersize(1 << 20, sys.maxsize):
rawio = self.MockRawIOWithoutRead((b"abc",))
self.assertEqual(rawio.read(size), b'abc')

def test_types_have_dict(self):
test = (
self.IOBase(),
Expand Down
18 changes: 4 additions & 14 deletions Lib/test/test_os/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,25 +321,15 @@ def test_readinto_badarg(self):
self.assertEqual(s, 4)
self.assertEqual(buffer, b"spam")

@support.cpython_only
# Skip the test on 32-bit platforms: the number of bytes must fit in a
# Py_ssize_t type
@unittest.skipUnless(INT_MAX < PY_SSIZE_T_MAX,
"needs INT_MAX < PY_SSIZE_T_MAX")
@support.bigmemtest(size=INT_MAX + 10, memuse=1, dry_run=False)
def test_large_read(self, size):
def test_large_read_from_small_file(self):
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
create_file(os_helper.TESTFN, b'test')

# Issue #21932: Make sure that os.read() does not raise an
# OverflowError for size larger than INT_MAX
with open(os_helper.TESTFN, "rb") as fp:
data = os.read(fp.fileno(), size)

# The test does not try to read more than 2 GiB at once because the
# operating system is free to return less bytes than requested.
self.assertEqual(data, b'test')

for size in support.itersize(1 << 20, sys.maxsize):
with open(os_helper.TESTFN, "rb") as fp:
self.assertEqual(os.read(fp.fileno(), size), b'test')

@support.cpython_only
# Skip the test on 32-bit platforms: the number of bytes must fit in a
Expand Down
14 changes: 14 additions & 0 deletions Lib/test/test_os/test_posix.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,20 @@ def test_pread(self):
finally:
os.close(fd)

@unittest.skipUnless(hasattr(posix, 'pread'), "test needs posix.pread()")
def test_large_pread_from_small_file(self):
fd = os.open(os_helper.TESTFN, os.O_WRONLY | os.O_CREAT)
try:
os.write(fd, b'test')
finally:
os.close(fd)
fd = os.open(os_helper.TESTFN, os.O_RDONLY)
try:
for size in support.itersize(1 << 20, sys.maxsize):
self.assertEqual(posix.pread(fd, size, 1), b'est')
finally:
os.close(fd)

@unittest.skipUnless(hasattr(posix, 'preadv'), "test needs posix.preadv()")
def test_preadv(self):
fd = os.open(os_helper.TESTFN, os.O_RDWR | os.O_CREAT)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Fix a potential memory denial of service when reading from a file,
a file descriptor or a buffered stream the large specific number of bytes.
This affects the :func:`os.read` and :func:`os.pread` functions and
the :meth:`!read` and :meth:`!read1` methods of various :mod:`io` classes.
When the number of bytes to read is received from untrusted source, it could
cause an arbitrary amount of memory to be allocated.
This could have led to symptoms including a :exc:`MemoryError`, swapping,
out of memory (OOM) killed processes or containers, or even system crashes.
4 changes: 4 additions & 0 deletions Modules/_io/_iomodule.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ extern int _PyIO_trap_eintr(void);

#define DEFAULT_BUFFER_SIZE (128 * 1024) /* bytes */

// Data larger than this will be read in chunks, to prevent extreme
// overallocation.
#define MIN_READ_BUF_SIZE (1 << 20)

/*
* Offset type for positioning.
*/
Expand Down
59 changes: 45 additions & 14 deletions Modules/_io/bufferedio.c
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,8 @@ _enter_buffered_busy(buffered *self)

#define MINUS_LAST_BLOCK(self, size) \
(self->buffer_mask ? \
(size & ~self->buffer_mask) : \
(self->buffer_size * (size / self->buffer_size)))
((size) & ~self->buffer_mask) : \
(self->buffer_size * ((size) / self->buffer_size)))


static int
Expand Down Expand Up @@ -1071,6 +1071,7 @@ _io__Buffered_read1_impl(buffered *self, Py_ssize_t n)
}
_bufferedreader_reset_buf(self);

n = Py_MIN(n, self->buffer_size);
PyBytesWriter *writer = PyBytesWriter_Create(n);
if (writer == NULL) {
return NULL;
Expand Down Expand Up @@ -1795,25 +1796,32 @@ _bufferedreader_read_fast(buffered *self, Py_ssize_t n)
* or until an EOF occurs or until read() would block.
*/
static PyObject *
_bufferedreader_read_generic(buffered *self, Py_ssize_t n)
_bufferedreader_read_generic(buffered *self, Py_ssize_t size)
{
Py_ssize_t current_size, remaining, written;
Py_ssize_t current_size, written;

current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
if (n <= current_size)
return _bufferedreader_read_fast(self, n);
if (size <= current_size)
return _bufferedreader_read_fast(self, size);

PyBytesWriter *writer = PyBytesWriter_Create(n);
Py_ssize_t chunksize = self->buffer_size;
if (chunksize < MIN_READ_BUF_SIZE) {
chunksize = MINUS_LAST_BLOCK(self, MIN_READ_BUF_SIZE);
}
Py_ssize_t allocated = size, resize_after = size;
if (size - current_size > chunksize) {
allocated = current_size + chunksize;
resize_after = allocated - Py_MAX(self->buffer_size, chunksize/4);
}
PyBytesWriter *writer = PyBytesWriter_Create(allocated);
if (writer == NULL) {
goto error;
}
char *out = PyBytesWriter_GetData(writer);

remaining = n;
written = 0;
if (current_size > 0) {
memcpy(out, self->buffer + self->pos, current_size);
remaining -= current_size;
written += current_size;
self->pos += current_size;
}
Expand All @@ -1825,12 +1833,28 @@ _bufferedreader_read_generic(buffered *self, Py_ssize_t n)
Py_DECREF(r);
}
_bufferedreader_reset_buf(self);
while (remaining > 0) {
while (written < size) {
/* We want to read a whole block at the end into buffer.
If we had readv() we could do this in one pass. */
Py_ssize_t r = MINUS_LAST_BLOCK(self, remaining);
if (r == 0)
If we had readv() we could do this in one pass for the last chunc. */
if (written > resize_after) {
if (size - allocated > chunksize) {
allocated += chunksize;
resize_after = allocated - Py_MAX(self->buffer_size, chunksize/4);
chunksize += Py_MIN(chunksize, size - allocated - chunksize);
}
else {
resize_after = allocated = size;
}
if (PyBytesWriter_Resize(writer, allocated) < 0) {
PyBytesWriter_Discard(writer);
goto error;
}
out = PyBytesWriter_GetData(writer);
}
Py_ssize_t r = MINUS_LAST_BLOCK(self, allocated - written);
if (r == 0) {
break;
}
r = _bufferedreader_raw_read(self, out + written, r);
if (r == -1)
goto error;
Expand All @@ -1842,13 +1866,20 @@ _bufferedreader_read_generic(buffered *self, Py_ssize_t n)
PyBytesWriter_Discard(writer);
Py_RETURN_NONE;
}
remaining -= r;
written += r;
}
Py_ssize_t remaining = size - written;
assert(remaining <= self->buffer_size);
self->pos = 0;
self->raw_pos = 0;
self->read_end = 0;
if (allocated < size) {
if (PyBytesWriter_Resize(writer, size) < 0) {
PyBytesWriter_Discard(writer);
goto error;
}
out = PyBytesWriter_GetData(writer);
}
/* NOTE: when the read is satisfied, we avoid issuing any additional
reads, which could block indefinitely (e.g. on a socket).
See issue #9550. */
Expand Down
10 changes: 5 additions & 5 deletions Modules/_io/clinic/iobase.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 29 additions & 13 deletions Modules/_io/fileio.c
Original file line number Diff line number Diff line change
Expand Up @@ -886,25 +886,41 @@ _io_FileIO_read_impl(fileio *self, PyTypeObject *cls, Py_ssize_t size)
size = _PY_READ_MAX;
}

PyBytesWriter *writer = PyBytesWriter_Create(size);
Py_ssize_t allocated = Py_MIN(size, MIN_READ_BUF_SIZE);
Py_ssize_t written = 0;

PyBytesWriter *writer = PyBytesWriter_Create(allocated);
if (writer == NULL) {
return NULL;
}
char *ptr = PyBytesWriter_GetData(writer);

Py_ssize_t n = _Py_read(self->fd, ptr, size);
if (n == -1) {
// copy errno because PyBytesWriter_Discard() can indirectly modify it
int err = errno;
PyBytesWriter_Discard(writer);
if (err == EAGAIN) {
PyErr_Clear();
Py_RETURN_NONE;
while (1) {
char *ptr = PyBytesWriter_GetData(writer);
Py_ssize_t n = _Py_read(self->fd, ptr + written, allocated - written);
if (n == -1) {
if (errno == EAGAIN) {
if (!written) {
// Nothing was read yet -- return None.
PyBytesWriter_Discard(writer);
PyErr_Clear();
Py_RETURN_NONE;
}
break;
}
PyBytesWriter_Discard(writer);
return NULL;
}
written += n;
if (written < allocated || allocated >= size) {
break;
}
allocated += Py_MIN(allocated, size - allocated);
if (PyBytesWriter_Resize(writer, allocated) < 0) {
PyBytesWriter_Discard(writer);
return NULL;
}
return NULL;
}

return PyBytesWriter_FinishWithSize(writer, n);
return PyBytesWriter_FinishWithSize(writer, written);
}

/*[clinic input]
Expand Down
Loading
Loading