Skip to content

Commit

Permalink
bpo-43260: io: Prevent large data remains in textio buffer. (GH-24592)
Browse files Browse the repository at this point in the history
When very large data remains in TextIOWrapper, flush() may fail forever.

So prevent that data larger than chunk_size is remained in TextIOWrapper internal
buffer.

Co-Authored-By: Eryk Sun.
(cherry picked from commit 01806d5)
  • Loading branch information
methane committed Feb 22, 2021
1 parent 6ddb255 commit 6e2f144
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 3 deletions.
27 changes: 27 additions & 0 deletions Lib/test/test_io.py
Expand Up @@ -3736,6 +3736,33 @@ def test_del__CHUNK_SIZE_SystemError(self):
with self.assertRaises(AttributeError):
del t._CHUNK_SIZE

def test_internal_buffer_size(self):
# bpo-43260: TextIOWrapper's internal buffer should not store
# data larger than chunk size.
chunk_size = 8192 # default chunk size, updated later

class MockIO(self.MockRawIO):
def write(self, data):
if len(data) > chunk_size:
raise RuntimeError
return super().write(data)

buf = MockIO()
t = self.TextIOWrapper(buf, encoding="ascii")
chunk_size = t._CHUNK_SIZE
t.write("abc")
t.write("def")
# default chunk size is 8192 bytes so t don't write data to buf.
self.assertEqual([], buf._write_stack)

with self.assertRaises(RuntimeError):
t.write("x"*(chunk_size+1))

self.assertEqual([b"abcdef"], buf._write_stack)
t.write("ghi")
t.write("x"*chunk_size)
self.assertEqual([b"abcdef", b"ghi", b"x"*chunk_size], buf._write_stack)


class PyTextIOWrapperTest(TextIOWrapperTest):
io = pyio
Expand Down
@@ -0,0 +1,2 @@
Fix TextIOWrapper can not flush internal buffer forever after very large
text is written.
20 changes: 17 additions & 3 deletions Modules/_io/textio.c
Expand Up @@ -1558,6 +1558,8 @@ _textiowrapper_writeflush(textio *self)
_PyIO_str_write, b, NULL);
} while (ret == NULL && _PyIO_trap_eintr());
Py_DECREF(b);
// NOTE: We cleared buffer but we don't know how many bytes are actually written
// when an error occurred.
if (ret == NULL)
return -1;
Py_DECREF(ret);
Expand Down Expand Up @@ -1615,7 +1617,10 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)

/* XXX What if we were just reading? */
if (self->encodefunc != NULL) {
if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
if (PyUnicode_IS_ASCII(text) &&
// See bpo-43260
PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
is_asciicompat_encoding(self->encodefunc)) {
b = text;
Py_INCREF(b);
}
Expand All @@ -1624,9 +1629,10 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
}
self->encoding_start_of_stream = 0;
}
else
else {
b = PyObject_CallMethodObjArgs(self->encoder,
_PyIO_str_encode, text, NULL);
}

Py_DECREF(text);
if (b == NULL)
Expand All @@ -1651,6 +1657,14 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
self->pending_bytes_count = 0;
self->pending_bytes = b;
}
else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
// Prevent to concatenate more than chunk_size data.
if (_textiowrapper_writeflush(self) < 0) {
Py_DECREF(b);
return NULL;
}
self->pending_bytes = b;
}
else if (!PyList_CheckExact(self->pending_bytes)) {
PyObject *list = PyList_New(2);
if (list == NULL) {
Expand All @@ -1670,7 +1684,7 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
}

self->pending_bytes_count += bytes_len;
if (self->pending_bytes_count > self->chunk_size || needflush ||
if (self->pending_bytes_count >= self->chunk_size || needflush ||
text_needflush) {
if (_textiowrapper_writeflush(self) < 0)
return NULL;
Expand Down

0 comments on commit 6e2f144

Please sign in to comment.