Skip to content

Commit

Permalink
pythongh-95782: Fix io.BufferedReader.tell() etc. being able to retur…
Browse files Browse the repository at this point in the history
…n offsets < 0 (pythonGH-99709)

lseek() always returns 0 for character pseudo-devices like
`/dev/urandom` (for other non-regular files, e.g. `/dev/stdin`, it
always returns -1, to which CPython reacts by raising appropriate
exceptions). They are thus technically seekable despite not having seek
semantics.

When calling read() on e.g. an instance of `io.BufferedReader` that
wraps such a file, `BufferedReader` reads ahead, filling its buffer,
creating a discrepancy between the number of bytes read and the internal
`tell()` always returning 0, which previously resulted in e.g.
`BufferedReader.tell()` or `BufferedReader.seek()` being able to return
positions < 0 even though these are supposed to be always >= 0.

Invariably keep the return value non-negative by returning
max(former_return_value, 0) instead, and add some corresponding tests.
(cherry picked from commit 26800cf)

Co-authored-by: 6t8k <58048945+6t8k@users.noreply.github.com>
  • Loading branch information
6t8k authored and miss-islington committed Feb 17, 2024
1 parent ba2f2ca commit 24c736a
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 3 deletions.
3 changes: 2 additions & 1 deletion Lib/_pyio.py
Expand Up @@ -1224,7 +1224,8 @@ def _readinto(self, buf, read1):
return written

def tell(self):
return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
# GH-95782: Keep return value non-negative
return max(_BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos, 0)

def seek(self, pos, whence=0):
if whence not in valid_seek_flags:
Expand Down
47 changes: 46 additions & 1 deletion Lib/test/test_io.py
Expand Up @@ -263,6 +263,27 @@ class PyMockUnseekableIO(MockUnseekableIO, pyio.BytesIO):
UnsupportedOperation = pyio.UnsupportedOperation


class MockCharPseudoDevFileIO(MockFileIO):
# GH-95782
# ftruncate() does not work on these special files (and CPython then raises
# appropriate exceptions), so truncate() does not have to be accounted for
# here.
def __init__(self, data):
super().__init__(data)

def seek(self, *args):
return 0

def tell(self, *args):
return 0

class CMockCharPseudoDevFileIO(MockCharPseudoDevFileIO, io.BytesIO):
pass

class PyMockCharPseudoDevFileIO(MockCharPseudoDevFileIO, pyio.BytesIO):
pass


class MockNonBlockWriterIO:

def __init__(self):
Expand Down Expand Up @@ -1556,6 +1577,30 @@ def test_truncate_on_read_only(self):
self.assertRaises(self.UnsupportedOperation, bufio.truncate)
self.assertRaises(self.UnsupportedOperation, bufio.truncate, 0)

def test_tell_character_device_file(self):
# GH-95782
# For the (former) bug in BufferedIO to manifest, the wrapped IO obj
# must be able to produce at least 2 bytes.
raw = self.MockCharPseudoDevFileIO(b"12")
buf = self.tp(raw)
self.assertEqual(buf.tell(), 0)
self.assertEqual(buf.read(1), b"1")
self.assertEqual(buf.tell(), 0)

def test_seek_character_device_file(self):
raw = self.MockCharPseudoDevFileIO(b"12")
buf = self.tp(raw)
self.assertEqual(buf.seek(0, io.SEEK_CUR), 0)
self.assertEqual(buf.seek(1, io.SEEK_SET), 0)
self.assertEqual(buf.seek(0, io.SEEK_CUR), 0)
self.assertEqual(buf.read(1), b"1")

# In the C implementation, tell() sets the BufferedIO's abs_pos to 0,
# which means that the next seek() could return a negative offset if it
# does not sanity-check:
self.assertEqual(buf.tell(), 0)
self.assertEqual(buf.seek(0, io.SEEK_CUR), 0)


class CBufferedReaderTest(BufferedReaderTest, SizeofTest):
tp = io.BufferedReader
Expand Down Expand Up @@ -4790,7 +4835,7 @@ def load_tests(loader, tests, pattern):
# classes in the __dict__ of each test.
mocks = (MockRawIO, MisbehavedRawIO, MockFileIO, CloseFailureIO,
MockNonBlockWriterIO, MockUnseekableIO, MockRawIOWithoutRead,
SlowFlushRawIO)
SlowFlushRawIO, MockCharPseudoDevFileIO)
all_members = io.__all__
c_io_ns = {name : getattr(io, name) for name in all_members}
py_io_ns = {name : getattr(pyio, name) for name in all_members}
Expand Down
@@ -0,0 +1,4 @@
Fix :func:`io.BufferedReader.tell`, :func:`io.BufferedReader.seek`,
:func:`_pyio.BufferedReader.tell`, :func:`io.BufferedRandom.tell`,
:func:`io.BufferedRandom.seek` and :func:`_pyio.BufferedRandom.tell`
being able to return negative offsets.
11 changes: 10 additions & 1 deletion Modules/_io/bufferedio.c
Expand Up @@ -1196,7 +1196,11 @@ buffered_tell(buffered *self, PyObject *Py_UNUSED(ignored))
if (pos == -1)
return NULL;
pos -= RAW_OFFSET(self);
/* TODO: sanity check (pos >= 0) */

// GH-95782
if (pos < 0)
pos = 0;

return PyLong_FromOff_t(pos);
}

Expand Down Expand Up @@ -1263,6 +1267,11 @@ _io__Buffered_seek_impl(buffered *self, PyObject *targetobj, int whence)
offset = target;
if (offset >= -self->pos && offset <= avail) {
self->pos += offset;

// GH-95782
if (current - avail + offset < 0)
return PyLong_FromOff_t(0);

return PyLong_FromOff_t(current - avail + offset);
}
}
Expand Down

0 comments on commit 24c736a

Please sign in to comment.