From a553e8dbef795f99620c7a28d9abc9eefeae0644 Mon Sep 17 00:00:00 2001 From: CF Bolz-Tereick Date: Sat, 2 Mar 2024 12:51:06 +0100 Subject: [PATCH] make expandtabs not quadratic for bytes either --- pypy/objspace/std/stringmethods.py | 19 ++++++++++++------- pypy/objspace/std/unicodeobject.py | 1 + 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py index 2569e8ecf53..6509b3effcc 100644 --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -201,14 +201,19 @@ def descr_expandtabs(self, space, tabsize=8): ovfcheck(len(splitted) * tabsize) except OverflowError: raise oefmt(space.w_OverflowError, "new string is too long") - expanded = oldtoken = splitted.pop(0) - - for token in splitted: - expanded += self._multi_chr(self._chr(' ')) * self._tabindent(oldtoken, - tabsize) + token + newlen = self._len() - len(splitted) + 1 + builder = self._builder(len(value)) + oldtoken = splitted[0] + builder.append(oldtoken) + + for index in range(1, len(splitted)): + token = splitted[index] + dist = self._tabindent(oldtoken, tabsize) + builder.append_multiple_char(' ', dist) + builder.append(token) + newlen += dist oldtoken = token - - return self._new(expanded) + return self._new(builder.build()) def _tabindent(self, token, tabsize): """calculates distance behind the token to the next tabstop""" diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py index 0a5dec8773f..82e10d9d5f4 100644 --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -466,6 +466,7 @@ def descr_expandtabs(self, space, tabsize=8): if not replacements and type(self) is W_UnicodeObject: return self newlength = self._length - replacements + assert res is not None return W_UnicodeObject(res, newlength) splitted = value.split('\t')