python · vstinner · Apr 30, 2024 · Apr 17, 2024 · Apr 18, 2024 · Apr 18, 2024
@@ -47,6 +47,10 @@ def write(self, s):
             raise TypeError(
                 f"write() argument must be str, not {type(s).__name__}")
 
+        # In case `s` is a str subclass that writes itself to stdout or stderr
+        # when we call its methods, convert it to an actual str.
+        s = str.__str__(s)
+
         # We want to emit one log message per line wherever possible, so split
         # the string before sending it to the superclass. Note that
         # "".splitlines() == [], so nothing will be logged for an empty string.
@@ -77,14 +81,17 @@ def write(self, b):
             raise TypeError(
                 f"write() argument must be bytes-like, not {type(b).__name__}"
             ) from None
-        else:
-            b_out = bytes(b)
 
-        # Encode null bytes using "modified UTF-8" to avoid truncating the
-        # message.
-        b_out = b_out.replace(b"\x00", b"\xc0\x80")
+        b_out = bytes(b)
+        b_len = len(b_out)  # May be different from len(b) if b is an array.
 
         # Writing an empty string to the stream should have no effect.
         if b_out:
+            # Encode null bytes using "modified UTF-8" to avoid truncating the
+            # message. This should not affect the return value, as the caller
+            # may be expecting it to match the length of the input.
+            b_out = b_out.replace(b"\x00", b"\xc0\x80")
+
             self.android_log_write(self.prio, self.tag, b_out)
-        return len(b)
+
+        return b_len
@@ -4,8 +4,10 @@
 import subprocess
 import sys
 import unittest
+from array import array
 from contextlib import contextmanager
 from threading import Thread
+from test.support import LOOPBACK_TIMEOUT
 from time import time
 
 
@@ -69,7 +71,7 @@ def assert_log(self, level, tag, expected, *, skip=False, timeout=0.5):
 
     def tearDown(self):
         self.logcat_process.terminate()
-        self.logcat_process.wait(0.1)
+        self.logcat_process.wait(LOOPBACK_TIMEOUT)
 
     @contextmanager
     def unbuffered(self, stream):
@@ -119,11 +121,17 @@ def write(s, lines=None):
                     # Non-BMP emoji
                     write("\U0001f600")
 
+                    # Non-encodable surrogates
+                    write("\ud800\udc00", ["\\ud800\\udc00"])
+
+                    # Code used by surrogateescape (which isn't enabled here)
+                    write("\udc80", ["\\udc80"])
+
                     # Null characters are logged using "modified UTF-8".
-                    write("\u0000", [r"\xc0\x80"])
-                    write("a\u0000", [r"a\xc0\x80"])
-                    write("\u0000b", [r"\xc0\x80b"])
-                    write("a\u0000b", [r"a\xc0\x80b"])
+                    write("\u0000", ["\\xc0\\x80"])
+                    write("a\u0000", ["a\\xc0\\x80"])
+                    write("\u0000b", ["\\xc0\\x80b"])
+                    write("a\u0000b", ["a\\xc0\\x80b"])
 
                 # Multi-line messages. Avoid identical consecutive lines, as
                 # they may activate "chatty" filtering and break the tests.
@@ -153,6 +161,22 @@ def write(s, lines=None):
                 write("hello\r\nworld\r\n", ["hello", "world"])
                 write("\r\n", [""])
 
+                # Non-standard line separators should be preserved.
+                write("before form feed\x0cafter form feed\n",
+                      ["before form feed\x0cafter form feed"])
+                write("before line separator\u2028after line separator\n",
+                      ["before line separator\u2028after line separator"])
+
+                # String subclasses are accepted, and if their methods write
+                # themselves, this doesn't cause infinite recursion.
+                class CustomStr(str):
+                    def splitlines(self, *args, **kwargs):
+                        sys.stdout.write(self)
+                        return super().splitlines(*args, **kwargs)
+
+                write(CustomStr("custom\n"), ["custom"])
+
+                # Non-string classes are not accepted.
                 for obj in [b"", b"hello", None, 42]:
                     with self.subTest(obj=obj):
                         with self.assertRaisesRegex(
@@ -171,9 +195,10 @@ def write(s, lines=None):
                 stream.flush()
                 self.assert_log(level, tag, "helloworld")
 
-                # Long lines are split into blocks of 1000 *characters*, but
-                # TextIOWrapper should then join them back together as much as
-                # possible without exceeding 4000 UTF-8 *bytes*.
+                # Long lines are split into blocks of 1000 characters
+                # (MAX_CHARS_PER_WRITE), but TextIOWrapper should then join them
+                # back together as much as possible without exceeding 4000 UTF-8
+                # bytes (MAX_BYTES_PER_WRITE).
                 #
                 # ASCII (1 byte per character)
                 write(("foobar" * 700) + "\n",
@@ -201,8 +226,10 @@ def test_bytes(self):
                 self.assertTrue(stream.writable())
                 self.assertFalse(stream.readable())
 
-                def write(b, lines=None):
-                    self.assertEqual(len(b), stream.write(b))
+                def write(b, lines=None, *, write_len=None):
+                    if write_len is None:
+                        write_len = len(b)
+                    self.assertEqual(write_len, stream.write(b))
                     if lines is None:
                         lines = [b.decode()]
                     self.assert_logs(level, tag, lines)
@@ -223,17 +250,17 @@ def write(b, lines=None):
                 # Non-BMP emoji
                 write(b"\xf0\x9f\x98\x80")
 
-                # Null characters are logged using "modified UTF-8".
-                write(b"\x00", [r"\xc0\x80"])
-                write(b"a\x00", [r"a\xc0\x80"])
-                write(b"\x00b", [r"\xc0\x80b"])
-                write(b"a\x00b", [r"a\xc0\x80b"])
+                # Null bytes are logged using "modified UTF-8".
+                write(b"\x00", ["\\xc0\\x80"])
+                write(b"a\x00", ["a\\xc0\\x80"])
+                write(b"\x00b", ["\\xc0\\x80b"])
+                write(b"a\x00b", ["a\\xc0\\x80b"])
 
                 # Invalid UTF-8
-                write(b"\xff", [r"\xff"])
-                write(b"a\xff", [r"a\xff"])
-                write(b"\xffb", [r"\xffb"])
-                write(b"a\xffb", [r"a\xffb"])
+                write(b"\xff", ["\\xff"])
+                write(b"a\xff", ["a\\xff"])
+                write(b"\xffb", ["\\xffb"])
+                write(b"a\xffb", ["a\\xffb"])
 
                 # Log entries containing newlines are shown differently by
                 # `logcat -v tag`, `logcat -v long`, and Android Studio. We
@@ -259,6 +286,34 @@ def write(b, lines=None):
                 write(b"hello\r\nworld\r\n", ["hello", "world"])
                 write(b"\r\n", [""])
 
+                # Other bytes-like objects are accepted.
+                write(bytearray(b"bytearray"))
+
+                mv = memoryview(b"memoryview")
+                write(mv, ["memoryview"])  # Continuous
+                write(mv[::2], ["mmrve"])  # Discontinuous
+
+                write(
+                    # Android only supports little-endian architectures, so the
+                    # bytes representation is as follows:
+                    array("H", [
+                        0,      # 00 00
+                        1,      # 01 00
+                        65534,  # FE FF
+                        65535,  # FF FF
+                    ]),
+
+                    # After encoding null bytes with modified UTF-8, the only
+                    # valid UTF-8 sequence is \x01. All other bytes are handled
+                    # by backslashreplace.
+                    ["\\xc0\\x80\\xc0\\x80"
+                     "\x01\\xc0\\x80"
+                     "\\xfe\\xff"
+                     "\\xff\\xff"],
+                    write_len=8,
+                )
+
+                # Non-bytes-like classes are not accepted.
                 for obj in ["", "hello", None, 42]:
                     with self.subTest(obj=obj):
                         with self.assertRaisesRegex(