From 42738b39e3dc132d7f94cc6a10cbc7b78a722d90 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Sun, 19 Oct 2025 20:00:42 +0100
Subject: [PATCH 1/8] Commit

---
 Lib/idlelib/colorizer.py | 261 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 252 insertions(+), 9 deletions(-)

diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py
index bffa2ddd3cd9cd..7475bb798a2bd1 100644
--- a/Lib/idlelib/colorizer.py
+++ b/Lib/idlelib/colorizer.py
@@ -2,6 +2,12 @@
 import keyword
 import re
 import time
+import token as T
+import tokenize
+from collections import deque
+from io import StringIO
+from tokenize import TokenInfo as TI
+from typing import Iterable, Iterator, Match, NamedTuple, Self
 
 from idlelib.config import idleConf
 from idlelib.delegator import Delegator
@@ -9,6 +15,242 @@
 DEBUG = False
 
 
+ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
+ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
+ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
+IDENTIFIERS_AFTER = {"def", "class"}
+KEYWORD_CONSTANTS = {"True", "False", "None"}
+BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')}
+
+
+class Span(NamedTuple):
+    """Span indexing that's inclusive on both ends."""
+
+    start: int
+    end: int
+
+    @classmethod
+    def from_re(cls, m: Match[str], group: int | str) -> Self:
+        re_span = m.span(group)
+        return cls(re_span[0], re_span[1] - 1)
+
+    @classmethod
+    def from_token(cls, token: TI, line_len: list[int]) -> Self:
+        end_offset = -1
+        if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE}
+            and token.string.endswith(("{", "}"))):
+            # gh-134158: a visible trailing brace comes from a double brace in input
+            end_offset += 1
+
+        return cls(
+            line_len[token.start[0] - 1] + token.start[1],
+            line_len[token.end[0] - 1] + token.end[1] + end_offset,
+        )
+
+
+class ColorSpan(NamedTuple):
+    span: Span
+    tag: str
+
+
+def prev_next_window[T](
+    iterable: Iterable[T]
+) -> Iterator[tuple[T | None, ...]]:
+    """Generates three-tuples of (previous, current, next) items.
+
+    On the first iteration previous is None. On the last iteration next
+    is None. In case of exception next is None and the exception is re-raised
+    on a subsequent next() call.
+
+    Inspired by `sliding_window` from `itertools` recipes.
+    """
+
+    iterator = iter(iterable)
+    window = deque((None, next(iterator)), maxlen=3)
+    try:
+        for x in iterator:
+            window.append(x)
+            yield tuple(window)
+    except Exception:
+        raise
+    finally:
+        window.append(None)
+        yield tuple(window)
+
+
+keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"}
+keyword_first_sets_case = {"False", "None", "True"}
+
+
+def is_soft_keyword_used(*tokens: TI | None) -> bool:
+    """Returns True if the current token is a keyword in this context.
+
+    For the `*tokens` to match anything, they have to be a three-tuple of
+    (previous, current, next).
+    """
+    #trace("is_soft_keyword_used{t}", t=tokens)
+    match tokens:
+        case (
+            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),
+            TI(string="match"),
+            TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START)
+            | TI(T.OP, string="(" | "*" | "[" | "{" | "~" | "...")
+        ):
+            return True
+        case (
+            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),
+            TI(string="match"),
+            TI(T.NAME, string=s)
+        ):
+            if keyword.iskeyword(s):
+                return s in keyword_first_sets_match
+            return True
+        case (
+            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),
+            TI(string="case"),
+            TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START)
+            | TI(T.OP, string="(" | "*" | "-" | "[" | "{")
+        ):
+            return True
+        case (
+            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),
+            TI(string="case"),
+            TI(T.NAME, string=s)
+        ):
+            if keyword.iskeyword(s):
+                return s in keyword_first_sets_case
+            return True
+        case (TI(string="case"), TI(string="_"), TI(string=":")):
+            return True
+        case (
+            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),
+            TI(string="type"),
+            TI(T.NAME, string=s)
+        ):
+            return not keyword.iskeyword(s)
+        case _:
+            return False
+
+
+def recover_unterminated_string(
+    exc: tokenize.TokenError,
+    line_lengths: list[int],
+    last_emitted: ColorSpan | None,
+    buffer: str,
+) -> Iterator[ColorSpan]:
+    msg, loc = exc.args
+    if loc is None:
+        return
+
+    line_no, column = loc
+
+    if msg.startswith(
+        (
+            "unterminated string literal",
+            "unterminated f-string literal",
+            "unterminated t-string literal",
+            "EOF in multi-line string",
+            "unterminated triple-quoted f-string literal",
+            "unterminated triple-quoted t-string literal",
+        )
+    ):
+        start = line_lengths[line_no - 1] + column - 1
+        end = line_lengths[-1] - 1
+
+        # in case FSTRING_START was already emitted
+        if last_emitted and start <= last_emitted.span.start:
+            start = last_emitted.span.end + 1
+
+        span = Span(start, end)
+        yield ColorSpan(span, "STRING")
+
+
+def gen_colors_from_token_stream(
+    token_generator: Iterator[TI],
+    line_lengths: list[int],
+) -> Iterator[ColorSpan]:
+    token_window = prev_next_window(token_generator)
+
+    is_def_name = False
+    bracket_level = 0
+    for prev_token, token, next_token in token_window:
+        assert token is not None
+        if token.start == token.end:
+            continue
+
+        match token.type:
+            case (
+                T.STRING
+                | T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END
+                | T.TSTRING_START | T.TSTRING_MIDDLE | T.TSTRING_END
+            ):
+                span = Span.from_token(token, line_lengths)
+                yield ColorSpan(span, "STRING")
+            case T.COMMENT:
+                span = Span.from_token(token, line_lengths)
+                yield ColorSpan(span, "COMMENT")
+            case T.NUMBER:
+                span = Span.from_token(token, line_lengths)
+                yield ColorSpan(span, "STRING")
+            case T.OP:
+                if token.string in "([{":
+                    bracket_level += 1
+                elif token.string in ")]}":
+                    bracket_level -= 1
+                # span = Span.from_token(token, line_lengths)
+                # yield ColorSpan(span, "op")
+            case T.NAME:
+                if is_def_name:
+                    is_def_name = False
+                    span = Span.from_token(token, line_lengths)
+                    yield ColorSpan(span, "DEFINITION")
+                elif keyword.iskeyword(token.string):
+                    span = Span.from_token(token, line_lengths)
+                    yield ColorSpan(span, "KEYWORD")
+                    if token.string in IDENTIFIERS_AFTER:
+                        is_def_name = True
+                elif (
+                    keyword.issoftkeyword(token.string)
+                    and bracket_level == 0
+                    and is_soft_keyword_used(prev_token, token, next_token)
+                ):
+                    span = Span.from_token(token, line_lengths)
+                    yield ColorSpan(span, "KEYWORD")
+                elif (
+                    token.string in BUILTINS
+                    and not (prev_token and prev_token.exact_type == T.DOT)
+                ):
+                    span = Span.from_token(token, line_lengths)
+                    yield ColorSpan(span, "BUILTIN")
+
+
+def gen_colors(buffer: str) -> Iterator[ColorSpan]:
+    """Returns a list of index spans to color using the given color tag.
+
+    The input `buffer` should be a valid start of a Python code block, i.e.
+    it cannot be a block starting in the middle of a multiline string.
+    """
+    sio = StringIO(buffer)
+    line_lengths = [0] + [len(line) for line in sio.readlines()]
+    # make line_lengths cumulative
+    for i in range(1, len(line_lengths)):
+        line_lengths[i] += line_lengths[i-1]
+
+    sio.seek(0)
+    gen = tokenize.generate_tokens(sio.readline)
+    last_emitted: ColorSpan | None = None
+    try:
+        for color in gen_colors_from_token_stream(gen, line_lengths):
+            yield color
+            last_emitted = color
+    except SyntaxError:
+        return
+    except tokenize.TokenError as te:
+        yield from recover_unterminated_string(
+            te, line_lengths, last_emitted, buffer
+        )
+
+
 def any(name, alternates):
     "Return a named group pattern matching list of alternates."
     return "(?P<%s>" % name + "|".join(alternates) + ")"
@@ -333,21 +575,22 @@ def _add_tag(self, start, end, head, matched_group_name):
                      f"{head}+{end:d}c")
 
     def _add_tags_in_section(self, chars, head):
-        """Parse and add highlighting tags to a given part of the text.
+        """Parse and add highlighting tags using pyrepl's tokenization.
 
         `chars` is a string with the text to parse and to which
         highlighting is to be applied.
 
             `head` is the index in the text widget where the text is found.
         """
-        for m in self.prog.finditer(chars):
-            for name, matched_text in matched_named_groups(m):
-                a, b = m.span(name)
-                self._add_tag(a, b, head, name)
-                if matched_text in ("def", "class"):
-                    if m1 := self.idprog.match(chars, b):
-                        a, b = m1.span(1)
-                        self._add_tag(a, b, head, "DEFINITION")
+        # Use pyrepl's gen_colors to get color spans
+        color_spans = list(gen_colors(chars))
+
+        # Convert pyrepl spans to IDLE text widget positions and add tags
+        for color_span in color_spans:
+            start_pos = color_span.span.start
+            end_pos = color_span.span.end + 1  # pyrepl spans are inclusive, tkinter expects exclusive end
+            tag = color_span.tag
+            self._add_tag(start_pos, end_pos, head, tag)
 
     def removecolors(self):
         "Remove all colorizing tags."

From 0e5dc44cca34c306176a6e94fcc94400cad28df7 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Sun, 19 Oct 2025 20:02:16 +0100
Subject: [PATCH 2/8] Commit

---
 Lib/idlelib/colorizer.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py
index 7475bb798a2bd1..70fb4629d786a2 100644
--- a/Lib/idlelib/colorizer.py
+++ b/Lib/idlelib/colorizer.py
@@ -575,20 +575,18 @@ def _add_tag(self, start, end, head, matched_group_name):
                      f"{head}+{end:d}c")
 
     def _add_tags_in_section(self, chars, head):
-        """Parse and add highlighting tags using pyrepl's tokenization.
+        """Parse and add highlighting tags to a given part of the text..
 
         `chars` is a string with the text to parse and to which
         highlighting is to be applied.
 
             `head` is the index in the text widget where the text is found.
         """
-        # Use pyrepl's gen_colors to get color spans
         color_spans = list(gen_colors(chars))
 
-        # Convert pyrepl spans to IDLE text widget positions and add tags
         for color_span in color_spans:
             start_pos = color_span.span.start
-            end_pos = color_span.span.end + 1  # pyrepl spans are inclusive, tkinter expects exclusive end
+            end_pos = color_span.span.end + 1
             tag = color_span.tag
             self._add_tag(start_pos, end_pos, head, tag)
 

From a273099848d79a738ab8190d7731619504824016 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Sun, 19 Oct 2025 20:02:31 +0100
Subject: [PATCH 3/8] !fixup Commit

---
 Lib/idlelib/colorizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py
index 70fb4629d786a2..162defbec42a1f 100644
--- a/Lib/idlelib/colorizer.py
+++ b/Lib/idlelib/colorizer.py
@@ -575,7 +575,7 @@ def _add_tag(self, start, end, head, matched_group_name):
                      f"{head}+{end:d}c")
 
     def _add_tags_in_section(self, chars, head):
-        """Parse and add highlighting tags to a given part of the text..
+        """Parse and add highlighting tags to a given part of the text.
 
         `chars` is a string with the text to parse and to which
         highlighting is to be applied.

From 0156eb70df1bdb7f75fea8e97f4d20b7e8906296 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Sun, 19 Oct 2025 20:14:06 +0100
Subject: [PATCH 4/8] Remove unused regex

---
 Lib/idlelib/colorizer.py                | 73 +------------------------
 Lib/idlelib/idle_test/test_colorizer.py | 33 -----------
 2 files changed, 1 insertion(+), 105 deletions(-)

diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py
index 162defbec42a1f..3de0293ecfbc85 100644
--- a/Lib/idlelib/colorizer.py
+++ b/Lib/idlelib/colorizer.py
@@ -251,73 +251,6 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]:
         )
 
 
-def any(name, alternates):
-    "Return a named group pattern matching list of alternates."
-    return "(?P<%s>" % name + "|".join(alternates) + ")"
-
-
-def make_pat():
-    kw = r"\b" + any("KEYWORD", keyword.kwlist) + r"\b"
-    match_softkw = (
-        r"^[ \t]*" +  # at beginning of line + possible indentation
-        r"(?P<MATCH_SOFTKW>match)\b" +
-        r"(?![ \t]*(?:" + "|".join([  # not followed by ...
-            r"[:,;=^&|@~)\]}]",  # a character which means it can't be a
-                                 # pattern-matching statement
-            r"\b(?:" + r"|".join(keyword.kwlist) + r")\b",  # a keyword
-        ]) +
-        r"))"
-    )
-    case_default = (
-        r"^[ \t]*" +  # at beginning of line + possible indentation
-        r"(?P<CASE_SOFTKW>case)" +
-        r"[ \t]+(?P<CASE_DEFAULT_UNDERSCORE>_\b)"
-    )
-    case_softkw_and_pattern = (
-        r"^[ \t]*" +  # at beginning of line + possible indentation
-        r"(?P<CASE_SOFTKW2>case)\b" +
-        r"(?![ \t]*(?:" + "|".join([  # not followed by ...
-            r"_\b",  # a lone underscore
-            r"[:,;=^&|@~)\]}]",  # a character which means it can't be a
-                                 # pattern-matching case
-            r"\b(?:" + r"|".join(keyword.kwlist) + r")\b",  # a keyword
-        ]) +
-        r"))"
-    )
-    builtinlist = [str(name) for name in dir(builtins)
-                   if not name.startswith('_') and
-                   name not in keyword.kwlist]
-    builtin = r"([^.'\"\\#]\b|^)" + any("BUILTIN", builtinlist) + r"\b"
-    comment = any("COMMENT", [r"#[^\n]*"])
-    stringprefix = r"(?i:r|u|f|fr|rf|b|br|rb|t|rt|tr)?"
-    sqstring = stringprefix + r"'[^'\\\n]*(\\.[^'\\\n]*)*'?"
-    dqstring = stringprefix + r'"[^"\\\n]*(\\.[^"\\\n]*)*"?'
-    sq3string = stringprefix + r"'''[^'\\]*((\\.|'(?!''))[^'\\]*)*(''')?"
-    dq3string = stringprefix + r'"""[^"\\]*((\\.|"(?!""))[^"\\]*)*(""")?'
-    string = any("STRING", [sq3string, dq3string, sqstring, dqstring])
-    prog = re.compile("|".join([
-                                builtin, comment, string, kw,
-                                match_softkw, case_default,
-                                case_softkw_and_pattern,
-                                any("SYNC", [r"\n"]),
-                               ]),
-                      re.DOTALL | re.MULTILINE)
-    return prog
-
-
-prog = make_pat()
-idprog = re.compile(r"\s+(\w+)")
-prog_group_name_to_tag = {
-    "MATCH_SOFTKW": "KEYWORD",
-    "CASE_SOFTKW": "KEYWORD",
-    "CASE_DEFAULT_UNDERSCORE": "KEYWORD",
-    "CASE_SOFTKW2": "KEYWORD",
-}
-
-
-def matched_named_groups(re_match):
-    "Get only the non-empty named groups from an re.Match object."
-    return ((k, v) for (k, v) in re_match.groupdict().items() if v)
 
 
 def color_config(text):
@@ -360,8 +293,6 @@ class ColorDelegator(Delegator):
     def __init__(self):
         Delegator.__init__(self)
         self.init_state()
-        self.prog = prog
-        self.idprog = idprog
         self.LoadTagDefs()
 
     def init_state(self):
@@ -557,7 +488,7 @@ def recolorize_main(self):
                     if DEBUG: print("colorizing stopped")
                     return
 
-    def _add_tag(self, start, end, head, matched_group_name):
+    def _add_tag(self, start, end, head, tag):
         """Add a tag to a given range in the text widget.
 
         This is a utility function, receiving the range as `start` and
@@ -568,8 +499,6 @@ def _add_tag(self, start, end, head, matched_group_name):
         the name of a regular expression "named group" as matched by
         by the relevant highlighting regexps.
         """
-        tag = prog_group_name_to_tag.get(matched_group_name,
-                                         matched_group_name)
         self.tag_add(tag,
                      f"{head}+{start:d}c",
                      f"{head}+{end:d}c")
diff --git a/Lib/idlelib/idle_test/test_colorizer.py b/Lib/idlelib/idle_test/test_colorizer.py
index 40800df97b0bd3..3c2a8bd12c0692 100644
--- a/Lib/idlelib/idle_test/test_colorizer.py
+++ b/Lib/idlelib/idle_test/test_colorizer.py
@@ -63,39 +63,6 @@ def tearDownModule():
     colorizer.idleConf.userCfg = usercfg
 
 
-class FunctionTest(unittest.TestCase):
-
-    def test_any(self):
-        self.assertEqual(colorizer.any('test', ('a', 'b', 'cd')),
-                         '(?P<test>a|b|cd)')
-
-    def test_make_pat(self):
-        # Tested in more detail by testing prog.
-        self.assertTrue(colorizer.make_pat())
-
-    def test_prog(self):
-        prog = colorizer.prog
-        eq = self.assertEqual
-        line = 'def f():\n    print("hello")\n'
-        m = prog.search(line)
-        eq(m.groupdict()['KEYWORD'], 'def')
-        m = prog.search(line, m.end())
-        eq(m.groupdict()['SYNC'], '\n')
-        m = prog.search(line, m.end())
-        eq(m.groupdict()['BUILTIN'], 'print')
-        m = prog.search(line, m.end())
-        eq(m.groupdict()['STRING'], '"hello"')
-        m = prog.search(line, m.end())
-        eq(m.groupdict()['SYNC'], '\n')
-
-    def test_idprog(self):
-        idprog = colorizer.idprog
-        m = idprog.match('nospace')
-        self.assertIsNone(m)
-        m = idprog.match(' space')
-        self.assertEqual(m.group(0), ' space')
-
-
 class ColorConfigTest(unittest.TestCase):
 
     @classmethod

From af630ab5f2372fdcd1ca45a8ae635a6ddda67916 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Sun, 19 Oct 2025 20:59:23 +0100
Subject: [PATCH 5/8] Try each line separately on error, add spans back

---
 Lib/idlelib/colorizer.py | 51 +++++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py
index 3de0293ecfbc85..cef5ec8f44acc6 100644
--- a/Lib/idlelib/colorizer.py
+++ b/Lib/idlelib/colorizer.py
@@ -128,6 +128,12 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool:
             TI(T.NAME, string=s)
         ):
             return not keyword.iskeyword(s)
+        case (
+            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),
+            TI(string="match" | "case" | "type"),
+            None | TI(T.ENDMARKER) | TI(T.NEWLINE)
+        ):
+            return True
         case _:
             return False
 
@@ -189,9 +195,9 @@ def gen_colors_from_token_stream(
             case T.COMMENT:
                 span = Span.from_token(token, line_lengths)
                 yield ColorSpan(span, "COMMENT")
-            case T.NUMBER:
+            case T.NEWLINE:
                 span = Span.from_token(token, line_lengths)
-                yield ColorSpan(span, "STRING")
+                yield ColorSpan(span, "SYNC")
             case T.OP:
                 if token.string in "([{":
                     bracket_level += 1
@@ -243,12 +249,37 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]:
         for color in gen_colors_from_token_stream(gen, line_lengths):
             yield color
             last_emitted = color
-    except SyntaxError:
-        return
-    except tokenize.TokenError as te:
-        yield from recover_unterminated_string(
-            te, line_lengths, last_emitted, buffer
-        )
+    except (SyntaxError, tokenize.TokenError) as e:
+        recovered = False
+        if isinstance(e, tokenize.TokenError):
+            for recovered_color in recover_unterminated_string(
+                e, line_lengths, last_emitted, buffer
+            ):
+                yield recovered_color
+                recovered = True
+
+        # fall back to trying each line seperetly
+        if not recovered:
+            lines = buffer.split('\n')
+            current_offset = 0
+            for i, line in enumerate(lines):
+                if not line.strip():
+                    current_offset += len(line) + 1
+                    continue
+                try:
+                    line_sio = StringIO(line + '\n')
+                    line_gen = tokenize.generate_tokens(line_sio.readline)
+                    line_line_lengths = [0, len(line) + 1]
+
+                    for color in gen_colors_from_token_stream(line_gen, line_line_lengths):
+                        adjusted_span = Span(
+                            color.span.start + current_offset,
+                            color.span.end + current_offset
+                        )
+                        yield ColorSpan(adjusted_span, color.tag)
+                except Exception:
+                    pass
+                current_offset += len(line) + 1
 
 
 
@@ -511,9 +542,7 @@ def _add_tags_in_section(self, chars, head):
 
             `head` is the index in the text widget where the text is found.
         """
-        color_spans = list(gen_colors(chars))
-
-        for color_span in color_spans:
+        for color_span in gen_colors(chars):
             start_pos = color_span.span.start
             end_pos = color_span.span.end + 1
             tag = color_span.tag

From 93db8e9a508b352e94b7ae12661fb56ac372f4cc Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Mon, 20 Oct 2025 18:03:04 +0100
Subject: [PATCH 6/8] Fix fall-back for multiline strings; drop types

---
 Lib/idlelib/colorizer.py                | 73 +++++++++++++++++--------
 Lib/idlelib/idle_test/test_colorizer.py |  4 +-
 2 files changed, 52 insertions(+), 25 deletions(-)

diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py
index cef5ec8f44acc6..6568c553987fb9 100644
--- a/Lib/idlelib/colorizer.py
+++ b/Lib/idlelib/colorizer.py
@@ -7,7 +7,7 @@
 from collections import deque
 from io import StringIO
 from tokenize import TokenInfo as TI
-from typing import Iterable, Iterator, Match, NamedTuple, Self
+from typing import NamedTuple
 
 from idlelib.config import idleConf
 from idlelib.delegator import Delegator
@@ -30,12 +30,12 @@ class Span(NamedTuple):
     end: int
 
     @classmethod
-    def from_re(cls, m: Match[str], group: int | str) -> Self:
+    def from_re(cls, m, group):
         re_span = m.span(group)
         return cls(re_span[0], re_span[1] - 1)
 
     @classmethod
-    def from_token(cls, token: TI, line_len: list[int]) -> Self:
+    def from_token(cls, token, line_len):
         end_offset = -1
         if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE}
             and token.string.endswith(("{", "}"))):
@@ -53,9 +53,7 @@ class ColorSpan(NamedTuple):
     tag: str
 
 
-def prev_next_window[T](
-    iterable: Iterable[T]
-) -> Iterator[tuple[T | None, ...]]:
+def prev_next_window(iterable):
     """Generates three-tuples of (previous, current, next) items.
 
     On the first iteration previous is None. On the last iteration next
@@ -82,7 +80,7 @@ def prev_next_window[T](
 keyword_first_sets_case = {"False", "None", "True"}
 
 
-def is_soft_keyword_used(*tokens: TI | None) -> bool:
+def is_soft_keyword_used(*tokens):
     """Returns True if the current token is a keyword in this context.
 
     For the `*tokens` to match anything, they have to be a three-tuple of
@@ -138,12 +136,7 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool:
             return False
 
 
-def recover_unterminated_string(
-    exc: tokenize.TokenError,
-    line_lengths: list[int],
-    last_emitted: ColorSpan | None,
-    buffer: str,
-) -> Iterator[ColorSpan]:
+def recover_unterminated_string(exc, line_lengths, last_emitted, buffer):
     msg, loc = exc.args
     if loc is None:
         return
@@ -171,10 +164,7 @@ def recover_unterminated_string(
         yield ColorSpan(span, "STRING")
 
 
-def gen_colors_from_token_stream(
-    token_generator: Iterator[TI],
-    line_lengths: list[int],
-) -> Iterator[ColorSpan]:
+def gen_colors_from_token_stream(token_generator, line_lengths):
     token_window = prev_next_window(token_generator)
 
     is_def_name = False
@@ -195,6 +185,7 @@ def gen_colors_from_token_stream(
             case T.COMMENT:
                 span = Span.from_token(token, line_lengths)
                 yield ColorSpan(span, "COMMENT")
+            # XXX the old colorizer added SYNC on newlines, do we still need this?
             case T.NEWLINE:
                 span = Span.from_token(token, line_lengths)
                 yield ColorSpan(span, "SYNC")
@@ -203,8 +194,7 @@ def gen_colors_from_token_stream(
                     bracket_level += 1
                 elif token.string in ")]}":
                     bracket_level -= 1
-                # span = Span.from_token(token, line_lengths)
-                # yield ColorSpan(span, "op")
+                # IDLE does not color operators
             case T.NAME:
                 if is_def_name:
                     is_def_name = False
@@ -230,7 +220,7 @@ def gen_colors_from_token_stream(
                     yield ColorSpan(span, "BUILTIN")
 
 
-def gen_colors(buffer: str) -> Iterator[ColorSpan]:
+def gen_colors(buffer):
     """Returns a list of index spans to color using the given color tag.
 
     The input `buffer` should be a valid start of a Python code block, i.e.
@@ -244,11 +234,14 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]:
 
     sio.seek(0)
     gen = tokenize.generate_tokens(sio.readline)
-    last_emitted: ColorSpan | None = None
+    last_emitted = None
+    maxpos = 0
+
     try:
         for color in gen_colors_from_token_stream(gen, line_lengths):
             yield color
             last_emitted = color
+            maxpos = max(maxpos, color.span.end)
     except (SyntaxError, tokenize.TokenError) as e:
         recovered = False
         if isinstance(e, tokenize.TokenError):
@@ -257,12 +250,48 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]:
             ):
                 yield recovered_color
                 recovered = True
+                maxpos = max(maxpos, recovered_color.span.end)
 
         # fall back to trying each line seperetly
         if not recovered:
+            bad_line = 0
+            for i, total_len in enumerate(line_lengths[1:], 1):
+                if total_len > maxpos:
+                    bad_line = i - 1
+                    break
+
             lines = buffer.split('\n')
             current_offset = 0
+            in_multiline = False
+            multiline_start = 0
+            multiline_quote = None
+
             for i, line in enumerate(lines):
+                if i < bad_line:
+                    current_offset += len(line) + 1
+                    continue
+
+                if not in_multiline:
+                    start = line.strip()[:3]
+                    rest = line.strip()[3:]
+                    if start == "'''" or start == '"""':
+                        if not (rest.endswith(start) and len(rest) > 3):
+                            in_multiline = True
+                            multiline_start = current_offset
+                            multiline_quote = start
+                            current_offset += len(line) + 1
+                            continue
+                else:
+                    if multiline_quote and line.strip().endswith(multiline_quote):
+                        string_end = current_offset + len(line)
+                        yield ColorSpan(Span(multiline_start, string_end), "STRING")
+                        in_multiline = False
+                        multiline_quote = None
+                        current_offset += len(line) + 1
+                        continue
+                    else:
+                        current_offset += len(line) + 1
+                        continue
                 if not line.strip():
                     current_offset += len(line) + 1
                     continue
@@ -282,8 +311,6 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]:
                 current_offset += len(line) + 1
 
 
-
-
 def color_config(text):
     """Set color options of Text widget.
 
diff --git a/Lib/idlelib/idle_test/test_colorizer.py b/Lib/idlelib/idle_test/test_colorizer.py
index 3c2a8bd12c0692..863a206656caa1 100644
--- a/Lib/idlelib/idle_test/test_colorizer.py
+++ b/Lib/idlelib/idle_test/test_colorizer.py
@@ -363,7 +363,7 @@ def test_recolorize_main(self, mock_notify):
                     #('23.4', ('KEYWORD',)), ('23.10', ('KEYWORD',)), ('23.14', ('KEYWORD',)), ('23.19', ('STRING',)),
                     #('24.12', ('KEYWORD',)),
                     ('25.8', ('KEYWORD',)),
-                    ('26.4', ('KEYWORD',)), ('26.9', ('KEYWORD',)),
+                    ('26.4', ('KEYWORD',)), # XXX ('26.9', ('KEYWORD',)),
                     ('26.11', ('KEYWORD',)), ('26.15', ('STRING',)),
                     ('26.19', ('KEYWORD',)), ('26.22', ()),
                     ('26.24', ('KEYWORD',)), ('26.29', ('BUILTIN',)), ('26.37', ('KEYWORD',)),
@@ -401,7 +401,7 @@ def test_recolorize_main(self, mock_notify):
         eq(text.tag_nextrange('STRING', '8.12'), ('8.14', '8.17'))
         eq(text.tag_nextrange('STRING', '8.17'), ('8.19', '8.26'))
         eq(text.tag_nextrange('SYNC', '8.0'), ('8.26', '9.0'))
-        eq(text.tag_nextrange('SYNC', '31.0'), ('31.10', '33.0'))
+        eq(text.tag_nextrange('SYNC', '31.0'), ('31.10', '32.0'))
 
     def _assert_highlighting(self, source, tag_ranges):
         """Check highlighting of a given piece of code.

From 65e4a2215aa14b6bf44e27cc26bc54213d5997a7 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Mon, 20 Oct 2025 19:58:34 +0100
Subject: [PATCH 7/8] Add blurb

---
 .../next/IDLE/2025-10-20-19-58-15.gh-issue-140347.4mMfYZ.rst     | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 Misc/NEWS.d/next/IDLE/2025-10-20-19-58-15.gh-issue-140347.4mMfYZ.rst

diff --git a/Misc/NEWS.d/next/IDLE/2025-10-20-19-58-15.gh-issue-140347.4mMfYZ.rst b/Misc/NEWS.d/next/IDLE/2025-10-20-19-58-15.gh-issue-140347.4mMfYZ.rst
new file mode 100644
index 00000000000000..d9b0b6cb06c52d
--- /dev/null
+++ b/Misc/NEWS.d/next/IDLE/2025-10-20-19-58-15.gh-issue-140347.4mMfYZ.rst
@@ -0,0 +1 @@
+IDLE colorizer now uses a tokenizer.

From 57e065a933343cb9809547812bb3cd46e3bb4154 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Mon, 20 Oct 2025 20:19:31 +0100
Subject: [PATCH 8/8] Remove some unused constants, convert others to
 frozensets

---
 Lib/idlelib/colorizer.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/Lib/idlelib/colorizer.py b/Lib/idlelib/colorizer.py
index 6568c553987fb9..b32397165e931a 100644
--- a/Lib/idlelib/colorizer.py
+++ b/Lib/idlelib/colorizer.py
@@ -14,13 +14,8 @@
 
 DEBUG = False
 
-
-ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
-ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
-ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
-IDENTIFIERS_AFTER = {"def", "class"}
-KEYWORD_CONSTANTS = {"True", "False", "None"}
-BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')}
+IDENTIFIERS_AFTER = frozenset({"def", "class"})
+BUILTINS = frozenset({str(name) for name in dir(builtins) if not name.startswith('_')})
 
 
 class Span(NamedTuple):
@@ -76,8 +71,8 @@ def prev_next_window(iterable):
         yield tuple(window)
 
 
-keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"}
-keyword_first_sets_case = {"False", "None", "True"}
+keyword_first_sets_match = frozenset({"False", "None", "True", "await", "lambda", "not"})
+keyword_first_sets_case = frozenset({"False", "None", "True"})
 
 
 def is_soft_keyword_used(*tokens):