Skip to content

Commit

Permalink
pythongh-115154: Fix untokenize handling of unicode named literals (p…
Browse files Browse the repository at this point in the history
…ythonGH-115171)

(cherry picked from commit ecf16ee)

Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
  • Loading branch information
pablogsal authored and miss-islington committed Feb 19, 2024
1 parent 8a5731e commit 954aaba
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 10 deletions.
40 changes: 37 additions & 3 deletions Lib/test/test_tokenize.py
Expand Up @@ -1874,6 +1874,43 @@ def test_roundtrip(self):
" print('Can not import' # comment2\n)"
"else: print('Loaded')\n")

self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
self.check_roundtrip(r"f'\\N{SNAKE}'")
self.check_roundtrip(r"f'\\N{{SNAKE}}'")
self.check_roundtrip(r"f'\N{SNAKE}'")
self.check_roundtrip(r"f'\\\N{SNAKE}'")
self.check_roundtrip(r"f'\\\\\N{SNAKE}'")
self.check_roundtrip(r"f'\\\\\\\N{SNAKE}'")

self.check_roundtrip(r"f'\\N{1}'")
self.check_roundtrip(r"f'\\\\N{2}'")
self.check_roundtrip(r"f'\\\\\\N{3}'")
self.check_roundtrip(r"f'\\\\\\\\N{4}'")

self.check_roundtrip(r"f'\\N{{'")
self.check_roundtrip(r"f'\\\\N{{'")
self.check_roundtrip(r"f'\\\\\\N{{'")
self.check_roundtrip(r"f'\\\\\\\\N{{'")
cases = [
"""
if 1:
"foo"
"bar"
""",
"""
if 1:
("foo"
"bar")
""",
"""
if 1:
"foo"
"bar"
""" ]
for case in cases:
self.check_roundtrip(case)


def test_continuation(self):
# Balancing continuation
self.check_roundtrip("a = (3,4, \n"
Expand Down Expand Up @@ -1908,9 +1945,6 @@ def test_random_files(self):
tempdir = os.path.dirname(__file__) or os.curdir
testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))

# TODO: Remove this once we can untokenize PEP 701 syntax
testfiles.remove(os.path.join(tempdir, "test_fstring.py"))

if not support.is_resource_enabled("cpu"):
testfiles = random.sample(testfiles, 10)

Expand Down
53 changes: 46 additions & 7 deletions Lib/tokenize.py
Expand Up @@ -170,6 +170,7 @@ def __init__(self):
self.tokens = []
self.prev_row = 1
self.prev_col = 0
self.prev_type = None
self.encoding = None

def add_whitespace(self, start):
Expand All @@ -185,6 +186,29 @@ def add_whitespace(self, start):
if col_offset:
self.tokens.append(" " * col_offset)

def escape_brackets(self, token):
characters = []
consume_until_next_bracket = False
for character in token:
if character == "}":
if consume_until_next_bracket:
consume_until_next_bracket = False
else:
characters.append(character)
if character == "{":
n_backslashes = sum(
1 for char in _itertools.takewhile(
"\\".__eq__,
characters[-2::-1]
)
)
if n_backslashes % 2 == 0:
characters.append(character)
else:
consume_until_next_bracket = True
characters.append(character)
return "".join(characters)

def untokenize(self, iterable):
it = iter(iterable)
indents = []
Expand Down Expand Up @@ -216,25 +240,29 @@ def untokenize(self, iterable):
startline = False
elif tok_type == FSTRING_MIDDLE:
if '{' in token or '}' in token:
token = self.escape_brackets(token)
last_line = token.splitlines()[-1]
end_line, end_col = end
end = (end_line, end_col + token.count('{') + token.count('}'))
token = re.sub('{', '{{', token)
token = re.sub('}', '}}', token)

extra_chars = last_line.count("{{") + last_line.count("}}")
end = (end_line, end_col + extra_chars)
elif tok_type in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
self.tokens.append(" ")

self.add_whitespace(start)
self.tokens.append(token)
self.prev_row, self.prev_col = end
if tok_type in (NEWLINE, NL):
self.prev_row += 1
self.prev_col = 0
self.prev_type = tok_type
return "".join(self.tokens)

def compat(self, token, iterable):
indents = []
toks_append = self.tokens.append
startline = token[0] in (NEWLINE, NL)
prevstring = False
in_fstring = 0

for tok in _itertools.chain([token], iterable):
toknum, tokval = tok[:2]
Expand All @@ -253,6 +281,10 @@ def compat(self, token, iterable):
else:
prevstring = False

if toknum == FSTRING_START:
in_fstring += 1
elif toknum == FSTRING_END:
in_fstring -= 1
if toknum == INDENT:
indents.append(tokval)
continue
Expand All @@ -265,11 +297,18 @@ def compat(self, token, iterable):
toks_append(indents[-1])
startline = False
elif toknum == FSTRING_MIDDLE:
if '{' in tokval or '}' in tokval:
tokval = re.sub('{', '{{', tokval)
tokval = re.sub('}', '}}', tokval)
tokval = self.escape_brackets(tokval)

# Insert a space between two consecutive brackets if we are in an f-string
if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring:
tokval = ' ' + tokval

# Insert a space between two consecutive f-strings
if toknum in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
self.tokens.append(" ")

toks_append(tokval)
self.prev_type = toknum


def untokenize(iterable):
Expand Down
@@ -0,0 +1,2 @@
Fix a bug that was causing the :func:`tokenize.untokenize` function to
handle unicode named literals incorrectly. Patch by Pablo Galindo

0 comments on commit 954aaba

Please sign in to comment.