From 42738b39e3dc132d7f94cc6a10cbc7b78a722d90 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 19 Oct 2025 20:00:42 +0100 Subject: [PATCH 1/8] Commit --- Lib/idlelib/colorizer.py | 261 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 252 insertions(+), 9 deletions(-) diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py index bffa2ddd3cd9cd..7475bb798a2bd1 100644 --- a/Lib/idlelib/colorizer.py +++ b/Lib/idlelib/colorizer.py @@ -2,6 +2,12 @@ import keyword import re import time +import token as T +import tokenize +from collections import deque +from io import StringIO +from tokenize import TokenInfo as TI +from typing import Iterable, Iterator, Match, NamedTuple, Self from idlelib.config import idleConf from idlelib.delegator import Delegator @@ -9,6 +15,242 @@ DEBUG = False +ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") +ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") +ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) +IDENTIFIERS_AFTER = {"def", "class"} +KEYWORD_CONSTANTS = {"True", "False", "None"} +BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')} + + +class Span(NamedTuple): + """Span indexing that's inclusive on both ends.""" + + start: int + end: int + + @classmethod + def from_re(cls, m: Match[str], group: int | str) -> Self: + re_span = m.span(group) + return cls(re_span[0], re_span[1] - 1) + + @classmethod + def from_token(cls, token: TI, line_len: list[int]) -> Self: + end_offset = -1 + if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE} + and token.string.endswith(("{", "}"))): + # gh-134158: a visible trailing brace comes from a double brace in input + end_offset += 1 + + return cls( + line_len[token.start[0] - 1] + token.start[1], + line_len[token.end[0] - 1] + token.end[1] + end_offset, + ) + + +class ColorSpan(NamedTuple): + span: Span + tag: str + + +def prev_next_window[T]( + iterable: Iterable[T] +) -> Iterator[tuple[T | None, ...]]: + """Generates three-tuples of (previous, current, next) items. + + On the first iteration previous is None. On the last iteration next + is None. In case of exception next is None and the exception is re-raised + on a subsequent next() call. + + Inspired by `sliding_window` from `itertools` recipes. + """ + + iterator = iter(iterable) + window = deque((None, next(iterator)), maxlen=3) + try: + for x in iterator: + window.append(x) + yield tuple(window) + except Exception: + raise + finally: + window.append(None) + yield tuple(window) + + +keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"} +keyword_first_sets_case = {"False", "None", "True"} + + +def is_soft_keyword_used(*tokens: TI | None) -> bool: + """Returns True if the current token is a keyword in this context. + + For the `*tokens` to match anything, they have to be a three-tuple of + (previous, current, next). + """ + #trace("is_soft_keyword_used{t}", t=tokens) + match tokens: + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + TI(string="match"), + TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) + | TI(T.OP, string="(" | "*" | "[" | "{" | "~" | "...") + ): + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + TI(string="match"), + TI(T.NAME, string=s) + ): + if keyword.iskeyword(s): + return s in keyword_first_sets_match + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), + TI(string="case"), + TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) + | TI(T.OP, string="(" | "*" | "-" | "[" | "{") + ): + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), + TI(string="case"), + TI(T.NAME, string=s) + ): + if keyword.iskeyword(s): + return s in keyword_first_sets_case + return True + case (TI(string="case"), TI(string="_"), TI(string=":")): + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), + TI(string="type"), + TI(T.NAME, string=s) + ): + return not keyword.iskeyword(s) + case _: + return False + + +def recover_unterminated_string( + exc: tokenize.TokenError, + line_lengths: list[int], + last_emitted: ColorSpan | None, + buffer: str, +) -> Iterator[ColorSpan]: + msg, loc = exc.args + if loc is None: + return + + line_no, column = loc + + if msg.startswith( + ( + "unterminated string literal", + "unterminated f-string literal", + "unterminated t-string literal", + "EOF in multi-line string", + "unterminated triple-quoted f-string literal", + "unterminated triple-quoted t-string literal", + ) + ): + start = line_lengths[line_no - 1] + column - 1 + end = line_lengths[-1] - 1 + + # in case FSTRING_START was already emitted + if last_emitted and start <= last_emitted.span.start: + start = last_emitted.span.end + 1 + + span = Span(start, end) + yield ColorSpan(span, "STRING") + + +def gen_colors_from_token_stream( + token_generator: Iterator[TI], + line_lengths: list[int], +) -> Iterator[ColorSpan]: + token_window = prev_next_window(token_generator) + + is_def_name = False + bracket_level = 0 + for prev_token, token, next_token in token_window: + assert token is not None + if token.start == token.end: + continue + + match token.type: + case ( + T.STRING + | T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END + | T.TSTRING_START | T.TSTRING_MIDDLE | T.TSTRING_END + ): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "STRING") + case T.COMMENT: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "COMMENT") + case T.NUMBER: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "STRING") + case T.OP: + if token.string in "([{": + bracket_level += 1 + elif token.string in ")]}": + bracket_level -= 1 + # span = Span.from_token(token, line_lengths) + # yield ColorSpan(span, "op") + case T.NAME: + if is_def_name: + is_def_name = False + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "DEFINITION") + elif keyword.iskeyword(token.string): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "KEYWORD") + if token.string in IDENTIFIERS_AFTER: + is_def_name = True + elif ( + keyword.issoftkeyword(token.string) + and bracket_level == 0 + and is_soft_keyword_used(prev_token, token, next_token) + ): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "KEYWORD") + elif ( + token.string in BUILTINS + and not (prev_token and prev_token.exact_type == T.DOT) + ): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "BUILTIN") + + +def gen_colors(buffer: str) -> Iterator[ColorSpan]: + """Returns a list of index spans to color using the given color tag. + + The input `buffer` should be a valid start of a Python code block, i.e. + it cannot be a block starting in the middle of a multiline string. + """ + sio = StringIO(buffer) + line_lengths = [0] + [len(line) for line in sio.readlines()] + # make line_lengths cumulative + for i in range(1, len(line_lengths)): + line_lengths[i] += line_lengths[i-1] + + sio.seek(0) + gen = tokenize.generate_tokens(sio.readline) + last_emitted: ColorSpan | None = None + try: + for color in gen_colors_from_token_stream(gen, line_lengths): + yield color + last_emitted = color + except SyntaxError: + return + except tokenize.TokenError as te: + yield from recover_unterminated_string( + te, line_lengths, last_emitted, buffer + ) + + def any(name, alternates): "Return a named group pattern matching list of alternates." return "(?P<%s>" % name + "|".join(alternates) + ")" @@ -333,21 +575,22 @@ def _add_tag(self, start, end, head, matched_group_name): f"{head}+{end:d}c") def _add_tags_in_section(self, chars, head): - """Parse and add highlighting tags to a given part of the text. + """Parse and add highlighting tags using pyrepl's tokenization. `chars` is a string with the text to parse and to which highlighting is to be applied. `head` is the index in the text widget where the text is found. """ - for m in self.prog.finditer(chars): - for name, matched_text in matched_named_groups(m): - a, b = m.span(name) - self._add_tag(a, b, head, name) - if matched_text in ("def", "class"): - if m1 := self.idprog.match(chars, b): - a, b = m1.span(1) - self._add_tag(a, b, head, "DEFINITION") + # Use pyrepl's gen_colors to get color spans + color_spans = list(gen_colors(chars)) + + # Convert pyrepl spans to IDLE text widget positions and add tags + for color_span in color_spans: + start_pos = color_span.span.start + end_pos = color_span.span.end + 1 # pyrepl spans are inclusive, tkinter expects exclusive end + tag = color_span.tag + self._add_tag(start_pos, end_pos, head, tag) def removecolors(self): "Remove all colorizing tags." From 0e5dc44cca34c306176a6e94fcc94400cad28df7 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 19 Oct 2025 20:02:16 +0100 Subject: [PATCH 2/8] Commit --- Lib/idlelib/colorizer.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py index 7475bb798a2bd1..70fb4629d786a2 100644 --- a/Lib/idlelib/colorizer.py +++ b/Lib/idlelib/colorizer.py @@ -575,20 +575,18 @@ def _add_tag(self, start, end, head, matched_group_name): f"{head}+{end:d}c") def _add_tags_in_section(self, chars, head): - """Parse and add highlighting tags using pyrepl's tokenization. + """Parse and add highlighting tags to a given part of the text.. `chars` is a string with the text to parse and to which highlighting is to be applied. `head` is the index in the text widget where the text is found. """ - # Use pyrepl's gen_colors to get color spans color_spans = list(gen_colors(chars)) - # Convert pyrepl spans to IDLE text widget positions and add tags for color_span in color_spans: start_pos = color_span.span.start - end_pos = color_span.span.end + 1 # pyrepl spans are inclusive, tkinter expects exclusive end + end_pos = color_span.span.end + 1 tag = color_span.tag self._add_tag(start_pos, end_pos, head, tag) From a273099848d79a738ab8190d7731619504824016 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 19 Oct 2025 20:02:31 +0100 Subject: [PATCH 3/8] !fixup Commit --- Lib/idlelib/colorizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py index 70fb4629d786a2..162defbec42a1f 100644 --- a/Lib/idlelib/colorizer.py +++ b/Lib/idlelib/colorizer.py @@ -575,7 +575,7 @@ def _add_tag(self, start, end, head, matched_group_name): f"{head}+{end:d}c") def _add_tags_in_section(self, chars, head): - """Parse and add highlighting tags to a given part of the text.. + """Parse and add highlighting tags to a given part of the text. `chars` is a string with the text to parse and to which highlighting is to be applied. From 0156eb70df1bdb7f75fea8e97f4d20b7e8906296 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 19 Oct 2025 20:14:06 +0100 Subject: [PATCH 4/8] Remove unused regex --- Lib/idlelib/colorizer.py | 73 +------------------------ Lib/idlelib/idle_test/test_colorizer.py | 33 ----------- 2 files changed, 1 insertion(+), 105 deletions(-) diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py index 162defbec42a1f..3de0293ecfbc85 100644 --- a/Lib/idlelib/colorizer.py +++ b/Lib/idlelib/colorizer.py @@ -251,73 +251,6 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]: ) -def any(name, alternates): - "Return a named group pattern matching list of alternates." - return "(?P<%s>" % name + "|".join(alternates) + ")" - - -def make_pat(): - kw = r"\b" + any("KEYWORD", keyword.kwlist) + r"\b" - match_softkw = ( - r"^[ \t]*" + # at beginning of line + possible indentation - r"(?Pmatch)\b" + - r"(?![ \t]*(?:" + "|".join([ # not followed by ... - r"[:,;=^&|@~)\]}]", # a character which means it can't be a - # pattern-matching statement - r"\b(?:" + r"|".join(keyword.kwlist) + r")\b", # a keyword - ]) + - r"))" - ) - case_default = ( - r"^[ \t]*" + # at beginning of line + possible indentation - r"(?Pcase)" + - r"[ \t]+(?P_\b)" - ) - case_softkw_and_pattern = ( - r"^[ \t]*" + # at beginning of line + possible indentation - r"(?Pcase)\b" + - r"(?![ \t]*(?:" + "|".join([ # not followed by ... - r"_\b", # a lone underscore - r"[:,;=^&|@~)\]}]", # a character which means it can't be a - # pattern-matching case - r"\b(?:" + r"|".join(keyword.kwlist) + r")\b", # a keyword - ]) + - r"))" - ) - builtinlist = [str(name) for name in dir(builtins) - if not name.startswith('_') and - name not in keyword.kwlist] - builtin = r"([^.'\"\\#]\b|^)" + any("BUILTIN", builtinlist) + r"\b" - comment = any("COMMENT", [r"#[^\n]*"]) - stringprefix = r"(?i:r|u|f|fr|rf|b|br|rb|t|rt|tr)?" - sqstring = stringprefix + r"'[^'\\\n]*(\\.[^'\\\n]*)*'?" - dqstring = stringprefix + r'"[^"\\\n]*(\\.[^"\\\n]*)*"?' - sq3string = stringprefix + r"'''[^'\\]*((\\.|'(?!''))[^'\\]*)*(''')?" - dq3string = stringprefix + r'"""[^"\\]*((\\.|"(?!""))[^"\\]*)*(""")?' - string = any("STRING", [sq3string, dq3string, sqstring, dqstring]) - prog = re.compile("|".join([ - builtin, comment, string, kw, - match_softkw, case_default, - case_softkw_and_pattern, - any("SYNC", [r"\n"]), - ]), - re.DOTALL | re.MULTILINE) - return prog - - -prog = make_pat() -idprog = re.compile(r"\s+(\w+)") -prog_group_name_to_tag = { - "MATCH_SOFTKW": "KEYWORD", - "CASE_SOFTKW": "KEYWORD", - "CASE_DEFAULT_UNDERSCORE": "KEYWORD", - "CASE_SOFTKW2": "KEYWORD", -} - - -def matched_named_groups(re_match): - "Get only the non-empty named groups from an re.Match object." - return ((k, v) for (k, v) in re_match.groupdict().items() if v) def color_config(text): @@ -360,8 +293,6 @@ class ColorDelegator(Delegator): def __init__(self): Delegator.__init__(self) self.init_state() - self.prog = prog - self.idprog = idprog self.LoadTagDefs() def init_state(self): @@ -557,7 +488,7 @@ def recolorize_main(self): if DEBUG: print("colorizing stopped") return - def _add_tag(self, start, end, head, matched_group_name): + def _add_tag(self, start, end, head, tag): """Add a tag to a given range in the text widget. This is a utility function, receiving the range as `start` and @@ -568,8 +499,6 @@ def _add_tag(self, start, end, head, matched_group_name): the name of a regular expression "named group" as matched by by the relevant highlighting regexps. """ - tag = prog_group_name_to_tag.get(matched_group_name, - matched_group_name) self.tag_add(tag, f"{head}+{start:d}c", f"{head}+{end:d}c") diff --git a/Lib/idlelib/idle_test/test_colorizer.py b/Lib/idlelib/idle_test/test_colorizer.py index 40800df97b0bd3..3c2a8bd12c0692 100644 --- a/Lib/idlelib/idle_test/test_colorizer.py +++ b/Lib/idlelib/idle_test/test_colorizer.py @@ -63,39 +63,6 @@ def tearDownModule(): colorizer.idleConf.userCfg = usercfg -class FunctionTest(unittest.TestCase): - - def test_any(self): - self.assertEqual(colorizer.any('test', ('a', 'b', 'cd')), - '(?Pa|b|cd)') - - def test_make_pat(self): - # Tested in more detail by testing prog. - self.assertTrue(colorizer.make_pat()) - - def test_prog(self): - prog = colorizer.prog - eq = self.assertEqual - line = 'def f():\n print("hello")\n' - m = prog.search(line) - eq(m.groupdict()['KEYWORD'], 'def') - m = prog.search(line, m.end()) - eq(m.groupdict()['SYNC'], '\n') - m = prog.search(line, m.end()) - eq(m.groupdict()['BUILTIN'], 'print') - m = prog.search(line, m.end()) - eq(m.groupdict()['STRING'], '"hello"') - m = prog.search(line, m.end()) - eq(m.groupdict()['SYNC'], '\n') - - def test_idprog(self): - idprog = colorizer.idprog - m = idprog.match('nospace') - self.assertIsNone(m) - m = idprog.match(' space') - self.assertEqual(m.group(0), ' space') - - class ColorConfigTest(unittest.TestCase): @classmethod From af630ab5f2372fdcd1ca45a8ae635a6ddda67916 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 19 Oct 2025 20:59:23 +0100 Subject: [PATCH 5/8] Try each line separately on error, add spans back --- Lib/idlelib/colorizer.py | 51 +++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py index 3de0293ecfbc85..cef5ec8f44acc6 100644 --- a/Lib/idlelib/colorizer.py +++ b/Lib/idlelib/colorizer.py @@ -128,6 +128,12 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool: TI(T.NAME, string=s) ): return not keyword.iskeyword(s) + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), + TI(string="match" | "case" | "type"), + None | TI(T.ENDMARKER) | TI(T.NEWLINE) + ): + return True case _: return False @@ -189,9 +195,9 @@ def gen_colors_from_token_stream( case T.COMMENT: span = Span.from_token(token, line_lengths) yield ColorSpan(span, "COMMENT") - case T.NUMBER: + case T.NEWLINE: span = Span.from_token(token, line_lengths) - yield ColorSpan(span, "STRING") + yield ColorSpan(span, "SYNC") case T.OP: if token.string in "([{": bracket_level += 1 @@ -243,12 +249,37 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]: for color in gen_colors_from_token_stream(gen, line_lengths): yield color last_emitted = color - except SyntaxError: - return - except tokenize.TokenError as te: - yield from recover_unterminated_string( - te, line_lengths, last_emitted, buffer - ) + except (SyntaxError, tokenize.TokenError) as e: + recovered = False + if isinstance(e, tokenize.TokenError): + for recovered_color in recover_unterminated_string( + e, line_lengths, last_emitted, buffer + ): + yield recovered_color + recovered = True + + # fall back to trying each line seperetly + if not recovered: + lines = buffer.split('\n') + current_offset = 0 + for i, line in enumerate(lines): + if not line.strip(): + current_offset += len(line) + 1 + continue + try: + line_sio = StringIO(line + '\n') + line_gen = tokenize.generate_tokens(line_sio.readline) + line_line_lengths = [0, len(line) + 1] + + for color in gen_colors_from_token_stream(line_gen, line_line_lengths): + adjusted_span = Span( + color.span.start + current_offset, + color.span.end + current_offset + ) + yield ColorSpan(adjusted_span, color.tag) + except Exception: + pass + current_offset += len(line) + 1 @@ -511,9 +542,7 @@ def _add_tags_in_section(self, chars, head): `head` is the index in the text widget where the text is found. """ - color_spans = list(gen_colors(chars)) - - for color_span in color_spans: + for color_span in gen_colors(chars): start_pos = color_span.span.start end_pos = color_span.span.end + 1 tag = color_span.tag From 93db8e9a508b352e94b7ae12661fb56ac372f4cc Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 20 Oct 2025 18:03:04 +0100 Subject: [PATCH 6/8] Fix fall-back for multiline strings; drop types --- Lib/idlelib/colorizer.py | 73 +++++++++++++++++-------- Lib/idlelib/idle_test/test_colorizer.py | 4 +- 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py index cef5ec8f44acc6..6568c553987fb9 100644 --- a/Lib/idlelib/colorizer.py +++ b/Lib/idlelib/colorizer.py @@ -7,7 +7,7 @@ from collections import deque from io import StringIO from tokenize import TokenInfo as TI -from typing import Iterable, Iterator, Match, NamedTuple, Self +from typing import NamedTuple from idlelib.config import idleConf from idlelib.delegator import Delegator @@ -30,12 +30,12 @@ class Span(NamedTuple): end: int @classmethod - def from_re(cls, m: Match[str], group: int | str) -> Self: + def from_re(cls, m, group): re_span = m.span(group) return cls(re_span[0], re_span[1] - 1) @classmethod - def from_token(cls, token: TI, line_len: list[int]) -> Self: + def from_token(cls, token, line_len): end_offset = -1 if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE} and token.string.endswith(("{", "}"))): @@ -53,9 +53,7 @@ class ColorSpan(NamedTuple): tag: str -def prev_next_window[T]( - iterable: Iterable[T] -) -> Iterator[tuple[T | None, ...]]: +def prev_next_window(iterable): """Generates three-tuples of (previous, current, next) items. On the first iteration previous is None. On the last iteration next @@ -82,7 +80,7 @@ def prev_next_window[T]( keyword_first_sets_case = {"False", "None", "True"} -def is_soft_keyword_used(*tokens: TI | None) -> bool: +def is_soft_keyword_used(*tokens): """Returns True if the current token is a keyword in this context. For the `*tokens` to match anything, they have to be a three-tuple of @@ -138,12 +136,7 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool: return False -def recover_unterminated_string( - exc: tokenize.TokenError, - line_lengths: list[int], - last_emitted: ColorSpan | None, - buffer: str, -) -> Iterator[ColorSpan]: +def recover_unterminated_string(exc, line_lengths, last_emitted, buffer): msg, loc = exc.args if loc is None: return @@ -171,10 +164,7 @@ def recover_unterminated_string( yield ColorSpan(span, "STRING") -def gen_colors_from_token_stream( - token_generator: Iterator[TI], - line_lengths: list[int], -) -> Iterator[ColorSpan]: +def gen_colors_from_token_stream(token_generator, line_lengths): token_window = prev_next_window(token_generator) is_def_name = False @@ -195,6 +185,7 @@ def gen_colors_from_token_stream( case T.COMMENT: span = Span.from_token(token, line_lengths) yield ColorSpan(span, "COMMENT") + # XXX the old colorizer added SYNC on newlines, do we still need this? case T.NEWLINE: span = Span.from_token(token, line_lengths) yield ColorSpan(span, "SYNC") @@ -203,8 +194,7 @@ def gen_colors_from_token_stream( bracket_level += 1 elif token.string in ")]}": bracket_level -= 1 - # span = Span.from_token(token, line_lengths) - # yield ColorSpan(span, "op") + # IDLE does not color operators case T.NAME: if is_def_name: is_def_name = False @@ -230,7 +220,7 @@ def gen_colors_from_token_stream( yield ColorSpan(span, "BUILTIN") -def gen_colors(buffer: str) -> Iterator[ColorSpan]: +def gen_colors(buffer): """Returns a list of index spans to color using the given color tag. The input `buffer` should be a valid start of a Python code block, i.e. @@ -244,11 +234,14 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]: sio.seek(0) gen = tokenize.generate_tokens(sio.readline) - last_emitted: ColorSpan | None = None + last_emitted = None + maxpos = 0 + try: for color in gen_colors_from_token_stream(gen, line_lengths): yield color last_emitted = color + maxpos = max(maxpos, color.span.end) except (SyntaxError, tokenize.TokenError) as e: recovered = False if isinstance(e, tokenize.TokenError): @@ -257,12 +250,48 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]: ): yield recovered_color recovered = True + maxpos = max(maxpos, recovered_color.span.end) # fall back to trying each line seperetly if not recovered: + bad_line = 0 + for i, total_len in enumerate(line_lengths[1:], 1): + if total_len > maxpos: + bad_line = i - 1 + break + lines = buffer.split('\n') current_offset = 0 + in_multiline = False + multiline_start = 0 + multiline_quote = None + for i, line in enumerate(lines): + if i < bad_line: + current_offset += len(line) + 1 + continue + + if not in_multiline: + start = line.strip()[:3] + rest = line.strip()[3:] + if start == "'''" or start == '"""': + if not (rest.endswith(start) and len(rest) > 3): + in_multiline = True + multiline_start = current_offset + multiline_quote = start + current_offset += len(line) + 1 + continue + else: + if multiline_quote and line.strip().endswith(multiline_quote): + string_end = current_offset + len(line) + yield ColorSpan(Span(multiline_start, string_end), "STRING") + in_multiline = False + multiline_quote = None + current_offset += len(line) + 1 + continue + else: + current_offset += len(line) + 1 + continue if not line.strip(): current_offset += len(line) + 1 continue @@ -282,8 +311,6 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]: current_offset += len(line) + 1 - - def color_config(text): """Set color options of Text widget. diff --git a/Lib/idlelib/idle_test/test_colorizer.py b/Lib/idlelib/idle_test/test_colorizer.py index 3c2a8bd12c0692..863a206656caa1 100644 --- a/Lib/idlelib/idle_test/test_colorizer.py +++ b/Lib/idlelib/idle_test/test_colorizer.py @@ -363,7 +363,7 @@ def test_recolorize_main(self, mock_notify): #('23.4', ('KEYWORD',)), ('23.10', ('KEYWORD',)), ('23.14', ('KEYWORD',)), ('23.19', ('STRING',)), #('24.12', ('KEYWORD',)), ('25.8', ('KEYWORD',)), - ('26.4', ('KEYWORD',)), ('26.9', ('KEYWORD',)), + ('26.4', ('KEYWORD',)), # XXX ('26.9', ('KEYWORD',)), ('26.11', ('KEYWORD',)), ('26.15', ('STRING',)), ('26.19', ('KEYWORD',)), ('26.22', ()), ('26.24', ('KEYWORD',)), ('26.29', ('BUILTIN',)), ('26.37', ('KEYWORD',)), @@ -401,7 +401,7 @@ def test_recolorize_main(self, mock_notify): eq(text.tag_nextrange('STRING', '8.12'), ('8.14', '8.17')) eq(text.tag_nextrange('STRING', '8.17'), ('8.19', '8.26')) eq(text.tag_nextrange('SYNC', '8.0'), ('8.26', '9.0')) - eq(text.tag_nextrange('SYNC', '31.0'), ('31.10', '33.0')) + eq(text.tag_nextrange('SYNC', '31.0'), ('31.10', '32.0')) def _assert_highlighting(self, source, tag_ranges): """Check highlighting of a given piece of code. From 65e4a2215aa14b6bf44e27cc26bc54213d5997a7 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 20 Oct 2025 19:58:34 +0100 Subject: [PATCH 7/8] Add blurb --- .../next/IDLE/2025-10-20-19-58-15.gh-issue-140347.4mMfYZ.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/IDLE/2025-10-20-19-58-15.gh-issue-140347.4mMfYZ.rst diff --git a/Misc/NEWS.d/next/IDLE/2025-10-20-19-58-15.gh-issue-140347.4mMfYZ.rst b/Misc/NEWS.d/next/IDLE/2025-10-20-19-58-15.gh-issue-140347.4mMfYZ.rst new file mode 100644 index 00000000000000..d9b0b6cb06c52d --- /dev/null +++ b/Misc/NEWS.d/next/IDLE/2025-10-20-19-58-15.gh-issue-140347.4mMfYZ.rst @@ -0,0 +1 @@ +IDLE colorizer now uses a tokenizer. From 57e065a933343cb9809547812bb3cd46e3bb4154 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 20 Oct 2025 20:19:31 +0100 Subject: [PATCH 8/8] Remove some unused constants, convert others to frozensets --- Lib/idlelib/colorizer.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py index 6568c553987fb9..b32397165e931a 100644 --- a/Lib/idlelib/colorizer.py +++ b/Lib/idlelib/colorizer.py @@ -14,13 +14,8 @@ DEBUG = False - -ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") -ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") -ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) -IDENTIFIERS_AFTER = {"def", "class"} -KEYWORD_CONSTANTS = {"True", "False", "None"} -BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')} +IDENTIFIERS_AFTER = frozenset({"def", "class"}) +BUILTINS = frozenset({str(name) for name in dir(builtins) if not name.startswith('_')}) class Span(NamedTuple): @@ -76,8 +71,8 @@ def prev_next_window(iterable): yield tuple(window) -keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"} -keyword_first_sets_case = {"False", "None", "True"} +keyword_first_sets_match = frozenset({"False", "None", "True", "await", "lambda", "not"}) +keyword_first_sets_case = frozenset({"False", "None", "True"}) def is_soft_keyword_used(*tokens):