From 48ad67cabb3426ec8ce8c2d28953a273c6f3bb8a Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sat, 29 Jul 2023 23:45:56 +0530
Subject: [PATCH 01/77] Add PEP701 support

---
 src/blib2to3/pgen2/token.py    |   5 +-
 src/blib2to3/pgen2/tokenize.py | 113 ++++++++++++++++++++++++++++++---
 2 files changed, 109 insertions(+), 9 deletions(-)

diff --git a/src/blib2to3/pgen2/token.py b/src/blib2to3/pgen2/token.py
index ed2fc4e85fc..761cc1c7e88 100644
--- a/src/blib2to3/pgen2/token.py
+++ b/src/blib2to3/pgen2/token.py
@@ -66,7 +66,10 @@
 ASYNC: Final = 57
 ERRORTOKEN: Final = 58
 COLONEQUAL: Final = 59
-N_TOKENS: Final = 60
+FSTRING_START: Final = 60
+FSTRING_MIDDLE: Final = 61
+FSTRING_END: Final = 62
+N_TOKENS: Final = 63
 NT_OFFSET: Final = 256
 # --end constants--
 
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index d0607f4b1e1..f02c76284c1 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -27,6 +27,7 @@
 function to which the 5 fields described above are passed as 5 arguments,
 each time a new token is found."""
 
+import io
 import sys
 from typing import (
     Callable,
@@ -57,6 +58,11 @@
     NUMBER,
     OP,
     STRING,
+    LBRACE,
+    RBRACE,
+    FSTRING_START,
+    FSTRING_MIDDLE,
+    FSTRING_END,
     tok_name,
 )
 
@@ -66,7 +72,7 @@
 import re
 from codecs import BOM_UTF8, lookup
 
-from . import token
+from blib2to3.pgen2 import token
 
 __all__ = [x for x in dir(token) if x[0] != "_"] + [
     "tokenize",
@@ -468,10 +474,12 @@ def generate_tokens(
                 raise TokenError("EOF in multi-line string", strstart)
             endmatch = endprog.match(line)
             if endmatch:
+                endquote = endmatch.group(0)
                 pos = end = endmatch.end(0)
-                yield (
-                    STRING,
+                yield from tokenize_string(
                     contstr + line[:end],
+                    startquote,
+                    endquote,
                     strstart,
                     (lnum, end),
                     contline + line,
@@ -590,15 +598,19 @@ def generate_tokens(
                         stashed = None
                     yield (COMMENT, token, spos, epos, line)
                 elif token in triple_quoted:
-                    endprog = endprogs[token]
+                    startquote = token
+                    endprog = endprogs[startquote]
                     endmatch = endprog.match(line, pos)
                     if endmatch:  # all on one line
+                        endquote = endmatch.group(0)
                         pos = endmatch.end(0)
                         token = line[start:pos]
                         if stashed:
                             yield stashed
                             stashed = None
-                        yield (STRING, token, spos, (lnum, pos), line)
+                        yield from tokenize_string(
+                            token, startquote, endquote, spos, (lnum, pos), line
+                        )
                     else:
                         strstart = (lnum, start)  # multiple lines
                         contstr = line[start:]
@@ -627,7 +639,18 @@ def generate_tokens(
                         if stashed:
                             yield stashed
                             stashed = None
-                        yield (STRING, token, spos, epos, line)
+
+                        if initial in single_quoted:
+                            startquote = initial
+                        elif token[:2] in single_quoted:
+                            startquote = token[:2]
+                        else:
+                            startquote = token[:3]
+
+                        endquote = token[-1]
+                        yield from tokenize_string(
+                            token, startquote, endquote, spos, epos, line
+                        )
                 elif initial.isidentifier():  # ordinary name
                     if token in ("async", "await"):
                         if async_keywords or async_def:
@@ -694,8 +717,82 @@ def generate_tokens(
     yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "")
 
 
+def tokenize_string(
+    string: str,
+    startquote: str,
+    endquote: str,
+    startpos: Coord,
+    endpos: Coord,
+    line: str,
+) -> GoodTokenInfo:
+    if not string.startswith(("f", "F")):
+        # regular strings can still be returned as usual
+        yield (STRING, string, startpos, endpos, line)
+        return
+
+    lnum = startpos[0]
+    yield (FSTRING_START, startquote, startpos, (lnum, len(startquote)), line)
+    pos = len(startquote)
+    max = len(string) - len(endquote)
+    while pos < max:
+        opening_bracket_index = string.find("{", pos)
+        if opening_bracket_index == -1:
+            string_part = string[pos:max]
+            yield (FSTRING_MIDDLE, string_part, (lnum, pos), (lnum, max), line)
+            pos = max
+        else:
+            string_part = string[pos:opening_bracket_index]
+            yield (
+                FSTRING_MIDDLE,
+                string_part,
+                (lnum, pos),
+                (lnum, opening_bracket_index),
+                line,
+            )
+            yield (
+                LBRACE,
+                "{",
+                (lnum, opening_bracket_index),
+                (lnum, opening_bracket_index + 1),
+                line,
+            )
+            pos = opening_bracket_index + 1
+
+        # TODO: skip over {{
+        if pos < max:
+            inner_source = string[pos:max]
+            curly_brace_level = 1
+            startpos = pos
+            for token in generate_tokens(io.StringIO(inner_source).readline):
+                pos = startpos + token[3][1]
+
+                if token[0] == OP and token[1] == "{":
+                    curly_brace_level += 1
+                elif token[0] == OP and token[1] == "}":
+                    curly_brace_level -= 1
+
+                if curly_brace_level == 0:
+                    yield (
+                        RBRACE,
+                        "}",
+                        (lnum, pos),
+                        (lnum, pos + 1),
+                        line,
+                    )
+                    break
+
+                token_with_updated_pos = (
+                    token[0],
+                    token[1],
+                    (token[2][0], startpos + token[2][1]),
+                    (token[3][0], startpos + token[3][1]),
+                    token[4],
+                )
+                yield token_with_updated_pos
+
+    yield (FSTRING_END, endquote, (lnum, max), endpos, line)
+
+
 if __name__ == "__main__":  # testing
     if len(sys.argv) > 1:
         tokenize(open(sys.argv[1]).readline)
-    else:
-        tokenize(sys.stdin.readline)

From 175942b906da22583821c86369de5d5fe9cf38e3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 29 Jul 2023 18:18:29 +0000
Subject: [PATCH 02/77] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/blib2to3/pgen2/tokenize.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index f02c76284c1..0e8815e9da5 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -51,18 +51,18 @@
     DEDENT,
     ENDMARKER,
     ERRORTOKEN,
+    FSTRING_END,
+    FSTRING_MIDDLE,
+    FSTRING_START,
     INDENT,
+    LBRACE,
     NAME,
     NEWLINE,
     NL,
     NUMBER,
     OP,
-    STRING,
-    LBRACE,
     RBRACE,
-    FSTRING_START,
-    FSTRING_MIDDLE,
-    FSTRING_END,
+    STRING,
     tok_name,
 )
 

From 9e344f43283024d911a5bab1d0e8cb93c35aae49 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 14 Aug 2023 00:59:11 +0530
Subject: [PATCH 03/77] Add FSTRING_START and FSTRING_MIDDLE tokenizing

---
 src/blib2to3/pgen2/tokenize.py | 178 ++++++++++++---------------------
 1 file changed, 63 insertions(+), 115 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 0e8815e9da5..ea90cccd8c6 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -27,7 +27,6 @@
 function to which the 5 fields described above are passed as 5 arguments,
 each time a new token is found."""
 
-import io
 import sys
 from typing import (
     Callable,
@@ -61,7 +60,6 @@
     NL,
     NUMBER,
     OP,
-    RBRACE,
     STRING,
     tok_name,
 )
@@ -72,7 +70,7 @@
 import re
 from codecs import BOM_UTF8, lookup
 
-from blib2to3.pgen2 import token
+from . import token
 
 __all__ = [x for x in dir(token) if x[0] != "_"] + [
     "tokenize",
@@ -127,13 +125,12 @@ def _combinations(*l: str) -> Set[str]:
 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
 # Tail end of """ string.
 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
-_litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?"
+_litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?"
+_fstringlitprefix = r"(?:rF|FR|Fr|fr|RF|F|rf|f|Rf|fR)"
 Triple = group(_litprefix + "'''", _litprefix + '"""')
-# Single-line ' or " string.
-String = group(
-    _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
-    _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"',
-)
+
+SingleLbrace = r"[^{\\]*(?:\\.[^{\\]*)*{"
+DoubleLbrace = r"[^{\\]*(?:\\.[^{\\]*)*{"
 
 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
@@ -155,41 +152,57 @@ def _combinations(*l: str) -> Set[str]:
 Funny = group(Operator, Bracket, Special)
 
 # First (or only) line of ' or " string.
+# TODO: handle escaping `{{`
 ContStr = group(
     _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
     _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
+    rf"({_fstringlitprefix}')[^\n'\\{{]*(?:\\.[^\n'\\{{]*)*"
+    + group("'", "{", r"\\\r?\n"),
+    rf'({_fstringlitprefix}")[^\n"\\{{]*(?:\\.[^\n"\\{{]*)*'
+    + group('"', "{", r"\\\r?\n"),
 )
 PseudoExtras = group(r"\\\r?\n", Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
 
 pseudoprog: Final = re.compile(PseudoToken, re.UNICODE)
+
+singleprog = re.compile(Single)
+singleprog_plus_lbrace = re.compile(group(SingleLbrace, Single))
+doubleprog = re.compile(Double)
+doubleprog_plus_lbrace = re.compile(group(DoubleLbrace, Double))
+
 single3prog = re.compile(Single3)
+single3prog_plus_lbrace = re.compile(group(SingleLbrace, Single3))
 double3prog = re.compile(Double3)
+double3prog_plus_lbrace = re.compile(group(DoubleLbrace, Double3))
 
-_strprefixes = (
-    _combinations("r", "R", "f", "F")
-    | _combinations("r", "R", "b", "B")
-    | {"u", "U", "ur", "uR", "Ur", "UR"}
-)
+_strprefixes = _combinations("r", "R", "b", "B") | {"u", "U", "ur", "uR", "Ur", "UR"}
+_fstring_prefixes = _combinations("r", "R", "f", "F") - {"r", "R"}
 
 endprogs: Final = {
-    "'": re.compile(Single),
-    '"': re.compile(Double),
+    "'": singleprog,
+    '"': doubleprog,
     "'''": single3prog,
     '"""': double3prog,
+    **{f"{prefix}'": singleprog for prefix in _strprefixes},
+    **{f'{prefix}"': doubleprog for prefix in _strprefixes},
+    **{f"{prefix}'": singleprog_plus_lbrace for prefix in _fstring_prefixes},
+    **{f'{prefix}"': doubleprog_plus_lbrace for prefix in _fstring_prefixes},
     **{f"{prefix}'''": single3prog for prefix in _strprefixes},
     **{f'{prefix}"""': double3prog for prefix in _strprefixes},
+    **{f"{prefix}'''": single3prog_plus_lbrace for prefix in _fstring_prefixes},
+    **{f'{prefix}"""': double3prog_plus_lbrace for prefix in _fstring_prefixes},
 }
 
 triple_quoted: Final = (
     {"'''", '"""'}
-    | {f"{prefix}'''" for prefix in _strprefixes}
-    | {f'{prefix}"""' for prefix in _strprefixes}
+    | {f"{prefix}'''" for prefix in _strprefixes | _fstring_prefixes}
+    | {f'{prefix}"""' for prefix in _strprefixes | _fstring_prefixes}
 )
 single_quoted: Final = (
     {"'", '"'}
-    | {f"{prefix}'" for prefix in _strprefixes}
-    | {f'{prefix}"' for prefix in _strprefixes}
+    | {f"{prefix}'" for prefix in _strprefixes | _fstring_prefixes}
+    | {f'{prefix}"' for prefix in _strprefixes | _fstring_prefixes}
 )
 
 tabsize = 8
@@ -474,12 +487,10 @@ def generate_tokens(
                 raise TokenError("EOF in multi-line string", strstart)
             endmatch = endprog.match(line)
             if endmatch:
-                endquote = endmatch.group(0)
                 pos = end = endmatch.end(0)
-                yield from tokenize_string(
+                yield (
+                    STRING,
                     contstr + line[:end],
-                    startquote,
-                    endquote,
                     strstart,
                     (lnum, end),
                     contline + line,
@@ -598,19 +609,15 @@ def generate_tokens(
                         stashed = None
                     yield (COMMENT, token, spos, epos, line)
                 elif token in triple_quoted:
-                    startquote = token
-                    endprog = endprogs[startquote]
+                    endprog = endprogs[token]
                     endmatch = endprog.match(line, pos)
                     if endmatch:  # all on one line
-                        endquote = endmatch.group(0)
                         pos = endmatch.end(0)
                         token = line[start:pos]
                         if stashed:
                             yield stashed
                             stashed = None
-                        yield from tokenize_string(
-                            token, startquote, endquote, spos, (lnum, pos), line
-                        )
+                        yield (STRING, token, spos, (lnum, pos), line)
                     else:
                         strstart = (lnum, start)  # multiple lines
                         contstr = line[start:]
@@ -640,17 +647,32 @@ def generate_tokens(
                             yield stashed
                             stashed = None
 
-                        if initial in single_quoted:
-                            startquote = initial
-                        elif token[:2] in single_quoted:
-                            startquote = token[:2]
+                        # TODO: move this logic to a function
+                        if not token.endswith("{"):
+                            yield (STRING, token, spos, epos, line)
                         else:
-                            startquote = token[:3]
+                            if pseudomatch[20] is not None:
+                                fstring_start = pseudomatch[20]
+                                offset = pseudomatch.end(20) - pseudomatch.start()
+                                start_epos = (lnum, start + offset)
+                            else:
+                                fstring_start = pseudomatch[22]
+                                offset = pseudomatch.end(22) - pseudomatch.start()
+                                start_epos = (lnum, start + offset - 1)
+                            yield (FSTRING_START, fstring_start, spos, start_epos, line)
+                            end_offset = pseudomatch.end() - 1
+                            fstring_middle = line[start + offset - 1 : end_offset]
+                            middle_spos = (lnum, start + offset)
+                            middle_epos = (lnum, end_offset + 1)
+                            yield (
+                                FSTRING_MIDDLE,
+                                fstring_middle,
+                                middle_spos,
+                                middle_epos,
+                                line,
+                            )
+                            yield (LBRACE, "{", (lnum, end_offset + 1), epos, line)
 
-                        endquote = token[-1]
-                        yield from tokenize_string(
-                            token, startquote, endquote, spos, epos, line
-                        )
                 elif initial.isidentifier():  # ordinary name
                     if token in ("async", "await"):
                         if async_keywords or async_def:
@@ -717,82 +739,8 @@ def generate_tokens(
     yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "")
 
 
-def tokenize_string(
-    string: str,
-    startquote: str,
-    endquote: str,
-    startpos: Coord,
-    endpos: Coord,
-    line: str,
-) -> GoodTokenInfo:
-    if not string.startswith(("f", "F")):
-        # regular strings can still be returned as usual
-        yield (STRING, string, startpos, endpos, line)
-        return
-
-    lnum = startpos[0]
-    yield (FSTRING_START, startquote, startpos, (lnum, len(startquote)), line)
-    pos = len(startquote)
-    max = len(string) - len(endquote)
-    while pos < max:
-        opening_bracket_index = string.find("{", pos)
-        if opening_bracket_index == -1:
-            string_part = string[pos:max]
-            yield (FSTRING_MIDDLE, string_part, (lnum, pos), (lnum, max), line)
-            pos = max
-        else:
-            string_part = string[pos:opening_bracket_index]
-            yield (
-                FSTRING_MIDDLE,
-                string_part,
-                (lnum, pos),
-                (lnum, opening_bracket_index),
-                line,
-            )
-            yield (
-                LBRACE,
-                "{",
-                (lnum, opening_bracket_index),
-                (lnum, opening_bracket_index + 1),
-                line,
-            )
-            pos = opening_bracket_index + 1
-
-        # TODO: skip over {{
-        if pos < max:
-            inner_source = string[pos:max]
-            curly_brace_level = 1
-            startpos = pos
-            for token in generate_tokens(io.StringIO(inner_source).readline):
-                pos = startpos + token[3][1]
-
-                if token[0] == OP and token[1] == "{":
-                    curly_brace_level += 1
-                elif token[0] == OP and token[1] == "}":
-                    curly_brace_level -= 1
-
-                if curly_brace_level == 0:
-                    yield (
-                        RBRACE,
-                        "}",
-                        (lnum, pos),
-                        (lnum, pos + 1),
-                        line,
-                    )
-                    break
-
-                token_with_updated_pos = (
-                    token[0],
-                    token[1],
-                    (token[2][0], startpos + token[2][1]),
-                    (token[3][0], startpos + token[3][1]),
-                    token[4],
-                )
-                yield token_with_updated_pos
-
-    yield (FSTRING_END, endquote, (lnum, max), endpos, line)
-
-
 if __name__ == "__main__":  # testing
     if len(sys.argv) > 1:
         tokenize(open(sys.argv[1]).readline)
+    else:
+        tokenize(sys.stdin.readline)

From dbdb02c5c1b8d8bcd89b3f5d6ca8ff657d48fea0 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Wed, 16 Aug 2023 00:46:13 +0530
Subject: [PATCH 04/77] Support escaping of `{{`

---
 src/blib2to3/pgen2/tokenize.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index ea90cccd8c6..1158bfc1d29 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -152,14 +152,11 @@ def _combinations(*l: str) -> Set[str]:
 Funny = group(Operator, Bracket, Special)
 
 # First (or only) line of ' or " string.
-# TODO: handle escaping `{{`
 ContStr = group(
     _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
     _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
-    rf"({_fstringlitprefix}')[^\n'\\{{]*(?:\\.[^\n'\\{{]*)*"
-    + group("'", "{", r"\\\r?\n"),
-    rf'({_fstringlitprefix}")[^\n"\\{{]*(?:\\.[^\n"\\{{]*)*'
-    + group('"', "{", r"\\\r?\n"),
+    group(_fstringlitprefix + "'") + r"[^\n'\\]*(?:\\.[^\n'\\]*)*({{)(?<!{{{{)",
+    group(_fstringlitprefix + '"') + r'[^\n"\\]*(?:\\.[^\n"\\]*)*({{)(?<!{{{{)',
 )
 PseudoExtras = group(r"\\\r?\n", Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)

From 5acb397b54ee95c922590f31eaafd335eeed8943 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Wed, 16 Aug 2023 00:50:56 +0530
Subject: [PATCH 05/77] typo

---
 src/blib2to3/pgen2/tokenize.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 1158bfc1d29..19dc35c2b52 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -155,8 +155,8 @@ def _combinations(*l: str) -> Set[str]:
 ContStr = group(
     _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
     _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
-    group(_fstringlitprefix + "'") + r"[^\n'\\]*(?:\\.[^\n'\\]*)*({{)(?<!{{{{)",
-    group(_fstringlitprefix + '"') + r'[^\n"\\]*(?:\\.[^\n"\\]*)*({{)(?<!{{{{)',
+    group(_fstringlitprefix + "'") + r"[^\n'\\]*(?:\\.[^\n'\\]*)*({)(?<!{{)",
+    group(_fstringlitprefix + '"') + r'[^\n"\\]*(?:\\.[^\n"\\]*)*({)(?<!{{)',
 )
 PseudoExtras = group(r"\\\r?\n", Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)

From 4a69ffaafa0864bfa59e0dd78673137a316e6d31 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 27 Aug 2023 19:08:58 +0530
Subject: [PATCH 06/77] fix some problems with triple quoted strings

---
 src/blib2to3/pgen2/tokenize.py | 45 ++++++++++++++++++++++++++++------
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 19dc35c2b52..6ce1a579bba 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -127,11 +127,20 @@ def _combinations(*l: str) -> Set[str]:
 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
 _litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?"
 _fstringlitprefix = r"(?:rF|FR|Fr|fr|RF|F|rf|f|Rf|fR)"
-Triple = group(_litprefix + "'''", _litprefix + '"""')
+Triple = group(
+    _litprefix + "'''",
+    _litprefix + '"""',
+    _fstringlitprefix + '"""',
+    _fstringlitprefix + "'''",
+)
 
+# TODO: these two are the same. remove one
 SingleLbrace = r"[^{\\]*(?:\\.[^{\\]*)*{"
 DoubleLbrace = r"[^{\\]*(?:\\.[^{\\]*)*{"
 
+Single3Lbrace = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*{"
+Double3Lbrace = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*{'
+
 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
 # recognized as two instances of =).
@@ -151,12 +160,16 @@ def _combinations(*l: str) -> Set[str]:
 Special = group(r"\r?\n", r"[:;.,`@]")
 Funny = group(Operator, Bracket, Special)
 
+# FSTRING_MIDDLE and LBRACE, inside a single quoted fstring
+_fstring_middle_single = r"[^\n'\\]*(?:\\.[^\n'\\]*)*({)(?<!{{)"
+_fstring_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*({)(?<!{{)'
+
 # First (or only) line of ' or " string.
 ContStr = group(
     _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
     _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
-    group(_fstringlitprefix + "'") + r"[^\n'\\]*(?:\\.[^\n'\\]*)*({)(?<!{{)",
-    group(_fstringlitprefix + '"') + r'[^\n"\\]*(?:\\.[^\n"\\]*)*({)(?<!{{)',
+    group(_fstringlitprefix + "'") + _fstring_middle_single,
+    group(_fstringlitprefix + '"') + _fstring_middle_double,
 )
 PseudoExtras = group(r"\\\r?\n", Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
@@ -169,9 +182,9 @@ def _combinations(*l: str) -> Set[str]:
 doubleprog_plus_lbrace = re.compile(group(DoubleLbrace, Double))
 
 single3prog = re.compile(Single3)
-single3prog_plus_lbrace = re.compile(group(SingleLbrace, Single3))
+single3prog_plus_lbrace = re.compile(group(Single3Lbrace, Single3))
 double3prog = re.compile(Double3)
-double3prog_plus_lbrace = re.compile(group(DoubleLbrace, Double3))
+double3prog_plus_lbrace = re.compile(group(Double3Lbrace, Double3))
 
 _strprefixes = _combinations("r", "R", "b", "B") | {"u", "U", "ur", "uR", "Ur", "UR"}
 _fstring_prefixes = _combinations("r", "R", "f", "F") - {"r", "R"}
@@ -609,12 +622,28 @@ def generate_tokens(
                     endprog = endprogs[token]
                     endmatch = endprog.match(line, pos)
                     if endmatch:  # all on one line
-                        pos = endmatch.end(0)
-                        token = line[start:pos]
                         if stashed:
                             yield stashed
                             stashed = None
-                        yield (STRING, token, spos, (lnum, pos), line)
+                        # TODO: move this logic to a function
+                        # TODO: not how you should identify FSTRING_START
+                        if not token.startswith("f"):
+                            pos = endmatch.end(0)
+                            token = line[start:pos]
+                            yield (STRING, token, spos, epos, line)
+                        else:
+                            # TODO: most of this is wrong
+                            yield (FSTRING_START, token, spos, epos, line)
+                            pos = endmatch.end(0)
+                            token = line[start:pos]
+                            yield (
+                                FSTRING_MIDDLE,
+                                token,
+                                spos,
+                                epos,
+                                line,
+                            )
+                            yield (LBRACE, "{", epos, epos, line)
                     else:
                         strstart = (lnum, start)  # multiple lines
                         contstr = line[start:]

From ee30cde47372e2fe8404ea3fffdd9becc75d1a92 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 27 Aug 2023 20:48:05 +0530
Subject: [PATCH 07/77] Add support for FSTRING_MIDDLE and FSTRING_END

---
 src/blib2to3/pgen2/tokenize.py | 63 +++++++++++++++++++++++++++-------
 1 file changed, 50 insertions(+), 13 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 6ce1a579bba..1a963639ef0 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -60,6 +60,7 @@
     NL,
     NUMBER,
     OP,
+    RBRACE,
     STRING,
     tok_name,
 )
@@ -465,7 +466,8 @@ def generate_tokens(
     and the line on which the token was found. The line passed is the
     logical line; continuation lines are included.
     """
-    lnum = parenlev = continued = 0
+    lnum = parenlev = fstring_level = continued = 0
+    inside_fstring_braces = False
     numchars: Final[str] = "0123456789"
     contstr, needcont = "", 0
     contline: Optional[str] = None
@@ -491,7 +493,7 @@ def generate_tokens(
         lnum += 1
         pos, max = 0, len(line)
 
-        if contstr:  # continued string
+        if contstr and not inside_fstring_braces:  # continued string
             assert contline is not None
             if not line:
                 raise TokenError("EOF in multi-line string", strstart)
@@ -523,7 +525,8 @@ def generate_tokens(
                 contline = contline + line
                 continue
 
-        elif parenlev == 0 and not continued:  # new statement
+        # new statement
+        elif parenlev == 0 and not continued and not inside_fstring_braces:
             if not line:
                 break
             column = 0
@@ -591,6 +594,32 @@ def generate_tokens(
             continued = 0
 
         while pos < max:
+            if fstring_level > 0 and not inside_fstring_braces:
+                endmatch = endprog.match(line, pos)
+                if endmatch:  # all on one line
+                    start, end = endmatch.span(0)
+                    token = line[start:end]
+                    pos = end
+                    # TODO: unsure if this can be safely removed
+                    if stashed:
+                        yield stashed
+                        stashed = None
+                    if not token.endswith("{"):
+                        # TODO: locations
+                        yield (FSTRING_MIDDLE, token, (lnum, 0), (lnum, 0), line)
+                        yield (FSTRING_END, token, (lnum, 0), (lnum, 0), line)
+                        fstring_level -= 1
+                    else:
+                        # TODO: most of the positions are wrong
+                        yield (FSTRING_MIDDLE, token, (lnum, 0), (lnum, 0), line)
+                        yield (LBRACE, "{", (lnum, 0), (lnum, 0), line)
+                        inside_fstring_braces = True
+                else:  # multiple lines
+                    breakpoint()  # TODO: see if the code below is correct
+                    contstr += line
+                    contline += line
+                    break
+
             pseudomatch = pseudoprog.match(line, pos)
             if pseudomatch:  # scan for tokens
                 start, end = pseudomatch.span(1)
@@ -632,8 +661,9 @@ def generate_tokens(
                             token = line[start:pos]
                             yield (STRING, token, spos, epos, line)
                         else:
-                            # TODO: most of this is wrong
+                            # TODO: most of the positions are wrong
                             yield (FSTRING_START, token, spos, epos, line)
+                            fstring_level += 1
                             pos = endmatch.end(0)
                             token = line[start:pos]
                             yield (
@@ -644,6 +674,7 @@ def generate_tokens(
                                 line,
                             )
                             yield (LBRACE, "{", epos, epos, line)
+                            inside_fstring_braces = True
                     else:
                         strstart = (lnum, start)  # multiple lines
                         contstr = line[start:]
@@ -654,17 +685,17 @@ def generate_tokens(
                     or token[:2] in single_quoted
                     or token[:3] in single_quoted
                 ):
+                    maybe_endprog = (
+                        endprogs.get(initial)
+                        or endprogs.get(token[1])
+                        or endprogs.get(token[2])
+                    )
+                    assert (
+                        maybe_endprog is not None
+                    ), f"endprog not found for {token}"
+                    endprog = maybe_endprog
                     if token[-1] == "\n":  # continued string
                         strstart = (lnum, start)
-                        maybe_endprog = (
-                            endprogs.get(initial)
-                            or endprogs.get(token[1])
-                            or endprogs.get(token[2])
-                        )
-                        assert (
-                            maybe_endprog is not None
-                        ), f"endprog not found for {token}"
-                        endprog = maybe_endprog
                         contstr, needcont = line[start:], 1
                         contline = line
                         break
@@ -686,6 +717,8 @@ def generate_tokens(
                                 offset = pseudomatch.end(22) - pseudomatch.start()
                                 start_epos = (lnum, start + offset - 1)
                             yield (FSTRING_START, fstring_start, spos, start_epos, line)
+                            fstring_level += 1
+
                             end_offset = pseudomatch.end() - 1
                             fstring_middle = line[start + offset - 1 : end_offset]
                             middle_spos = (lnum, start + offset)
@@ -698,6 +731,7 @@ def generate_tokens(
                                 line,
                             )
                             yield (LBRACE, "{", (lnum, end_offset + 1), epos, line)
+                            inside_fstring_braces = True
 
                 elif initial.isidentifier():  # ordinary name
                     if token in ("async", "await"):
@@ -743,6 +777,9 @@ def generate_tokens(
                         stashed = None
                     yield (NL, token, spos, (lnum, pos), line)
                     continued = 1
+                elif initial == '}' and parenlev == 0 and inside_fstring_braces:
+                    inside_fstring_braces = False
+                    yield (RBRACE, token, spos, epos, line)
                 else:
                     if initial in "([{":
                         parenlev += 1

From e7b58500b91d00b55f4339d20d4bc72862d2d0f2 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Tue, 29 Aug 2023 18:35:51 +0530
Subject: [PATCH 08/77] bugfix and simplify the regexes

---
 src/blib2to3/pgen2/tokenize.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 1a963639ef0..4ae4324074f 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -135,12 +135,11 @@ def _combinations(*l: str) -> Set[str]:
     _fstringlitprefix + "'''",
 )
 
-# TODO: these two are the same. remove one
-SingleLbrace = r"[^{\\]*(?:\\.[^{\\]*)*{"
-DoubleLbrace = r"[^{\\]*(?:\\.[^{\\]*)*{"
+SingleLbrace = r"[^'\\{]*(?:(?:\\.|{{)[^'\\{]*)*{"
+DoubleLbrace = r'[^"\\{]*(?:(?:\\.|{{)[^"\\{]*)*{'
 
-Single3Lbrace = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*{"
-Double3Lbrace = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*{'
+Single3Lbrace = r"[^'\\{]*(?:(?:\\.|{{|'(?!''))[^'\\{]*)*{"
+Double3Lbrace = r'[^"\\{]*(?:(?:\\.|{{|"(?!""))[^"\\{]*)*{'
 
 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
@@ -162,8 +161,8 @@ def _combinations(*l: str) -> Set[str]:
 Funny = group(Operator, Bracket, Special)
 
 # FSTRING_MIDDLE and LBRACE, inside a single quoted fstring
-_fstring_middle_single = r"[^\n'\\]*(?:\\.[^\n'\\]*)*({)(?<!{{)"
-_fstring_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*({)(?<!{{)'
+_fstring_middle_single = r"[^\n'\\{]*(?:(?:\\.|{{)[^\n'\\{]*)*({)"
+_fstring_middle_double = r'[^\n"\\{]*(?:(?:\\.|{{)[^\n"\\{]*)*({)'
 
 # First (or only) line of ' or " string.
 ContStr = group(
@@ -687,12 +686,10 @@ def generate_tokens(
                 ):
                     maybe_endprog = (
                         endprogs.get(initial)
-                        or endprogs.get(token[1])
-                        or endprogs.get(token[2])
+                        or endprogs.get(token[:2])
+                        or endprogs.get(token[:3])
                     )
-                    assert (
-                        maybe_endprog is not None
-                    ), f"endprog not found for {token}"
+                    assert maybe_endprog is not None, f"endprog not found for {token}"
                     endprog = maybe_endprog
                     if token[-1] == "\n":  # continued string
                         strstart = (lnum, start)
@@ -777,7 +774,7 @@ def generate_tokens(
                         stashed = None
                     yield (NL, token, spos, (lnum, pos), line)
                     continued = 1
-                elif initial == '}' and parenlev == 0 and inside_fstring_braces:
+                elif initial == "}" and parenlev == 0 and inside_fstring_braces:
                     inside_fstring_braces = False
                     yield (RBRACE, token, spos, epos, line)
                 else:

From 88af1c114d95690f70cc031f3ad58ef34b00d6fa Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Wed, 6 Sep 2023 15:51:30 +0530
Subject: [PATCH 09/77] Fix small regex problems

---
 src/blib2to3/pgen2/tokenize.py | 44 +++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 4ae4324074f..fdb5f4e74cf 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -135,11 +135,11 @@ def _combinations(*l: str) -> Set[str]:
     _fstringlitprefix + "'''",
 )
 
-SingleLbrace = r"[^'\\{]*(?:(?:\\.|{{)[^'\\{]*)*{"
-DoubleLbrace = r'[^"\\{]*(?:(?:\\.|{{)[^"\\{]*)*{'
+SingleLbrace = r"[^'\\{]*(?:(?:\\.|{{)[^'\\{]*)*{(?!{)"
+DoubleLbrace = r'[^"\\{]*(?:(?:\\.|{{)[^"\\{]*)*{(?!{)'
 
-Single3Lbrace = r"[^'\\{]*(?:(?:\\.|{{|'(?!''))[^'\\{]*)*{"
-Double3Lbrace = r'[^"\\{]*(?:(?:\\.|{{|"(?!""))[^"\\{]*)*{'
+Single3Lbrace = r"[^'\\{]*(?:(?:\\.|{{|'(?!''))[^'\\{]*)*{(?!{)"
+Double3Lbrace = r'[^"\\{]*(?:(?:\\.|{{|"(?!""))[^"\\{]*)*{(?!{)'
 
 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
@@ -499,13 +499,24 @@ def generate_tokens(
             endmatch = endprog.match(line)
             if endmatch:
                 pos = end = endmatch.end(0)
-                yield (
-                    STRING,
-                    contstr + line[:end],
-                    strstart,
-                    (lnum, end),
-                    contline + line,
-                )
+                token = contstr + line[:end]
+                spos = strstart
+                epos = (lnum, end)
+                tokenline = contline + line
+                # TODO: better way to detect fstring
+                if fstring_level == 0:
+                    yield (STRING, token, spos, epos, tokenline)
+                else:
+                    # TODO: positions are all wrong
+                    yield (FSTRING_MIDDLE, token, spos, epos, tokenline)
+                    if token.endswith("{"):
+                        yield (LBRACE, "{", spos, epos, tokenline)
+                        inside_fstring_braces = True
+                    else:
+                        yield (FSTRING_END, token, spos, epos, tokenline)
+                        fstring_level -= 1
+                    # TODO: contstr reliance doesn't work now because we can be inside
+                    # an fstring and still empty contstr right here.
                 contstr, needcont = "", 0
                 contline = None
             elif needcont and line[-2:] != "\\\n" and line[-3:] != "\\\r\n":
@@ -614,9 +625,8 @@ def generate_tokens(
                         yield (LBRACE, "{", (lnum, 0), (lnum, 0), line)
                         inside_fstring_braces = True
                 else:  # multiple lines
-                    breakpoint()  # TODO: see if the code below is correct
                     contstr += line
-                    contline += line
+                    contline = line
                     break
 
             pseudomatch = pseudoprog.match(line, pos)
@@ -647,6 +657,10 @@ def generate_tokens(
                         stashed = None
                     yield (COMMENT, token, spos, epos, line)
                 elif token in triple_quoted:
+                    if token.startswith("f"):
+                        yield (FSTRING_START, token, spos, epos, line)
+                        fstring_level += 1
+
                     endprog = endprogs[token]
                     endmatch = endprog.match(line, pos)
                     if endmatch:  # all on one line
@@ -661,8 +675,6 @@ def generate_tokens(
                             yield (STRING, token, spos, epos, line)
                         else:
                             # TODO: most of the positions are wrong
-                            yield (FSTRING_START, token, spos, epos, line)
-                            fstring_level += 1
                             pos = endmatch.end(0)
                             token = line[start:pos]
                             yield (
@@ -775,8 +787,8 @@ def generate_tokens(
                     yield (NL, token, spos, (lnum, pos), line)
                     continued = 1
                 elif initial == "}" and parenlev == 0 and inside_fstring_braces:
-                    inside_fstring_braces = False
                     yield (RBRACE, token, spos, epos, line)
+                    inside_fstring_braces = False
                 else:
                     if initial in "([{":
                         parenlev += 1

From c1ecc146f84682276f2490838f5c1b39aeb88197 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Thu, 7 Sep 2023 00:28:15 +0530
Subject: [PATCH 10/77] fix newline type

---
 src/blib2to3/pgen2/tokenize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index fdb5f4e74cf..95dd6aa63b6 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -641,7 +641,7 @@ def generate_tokens(
                     yield (NUMBER, token, spos, epos, line)
                 elif initial in "\r\n":
                     newline = NEWLINE
-                    if parenlev > 0:
+                    if parenlev > 0 or inside_fstring_braces:
                         newline = NL
                     elif async_def:
                         async_def_nl = True

From 644c5cc05906effef9c77699541776cc71906612 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 10 Sep 2023 15:12:39 +0530
Subject: [PATCH 11/77] turn endprog into endprog_stack

---
 src/blib2to3/pgen2/tokenize.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 95dd6aa63b6..396b21a5c33 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -482,7 +482,7 @@ def generate_tokens(
     async_def_nl = False
 
     strstart: Tuple[int, int]
-    endprog: Pattern[str]
+    endprog_stack: list[Pattern[str]] = []
 
     while 1:  # loop over lines in stream
         try:
@@ -496,6 +496,7 @@ def generate_tokens(
             assert contline is not None
             if not line:
                 raise TokenError("EOF in multi-line string", strstart)
+            endprog = endprog_stack[-1]
             endmatch = endprog.match(line)
             if endmatch:
                 pos = end = endmatch.end(0)
@@ -515,6 +516,7 @@ def generate_tokens(
                     else:
                         yield (FSTRING_END, token, spos, epos, tokenline)
                         fstring_level -= 1
+                        endprog_stack.pop()
                     # TODO: contstr reliance doesn't work now because we can be inside
                     # an fstring and still empty contstr right here.
                 contstr, needcont = "", 0
@@ -605,6 +607,7 @@ def generate_tokens(
 
         while pos < max:
             if fstring_level > 0 and not inside_fstring_braces:
+                endprog = endprog_stack[-1]
                 endmatch = endprog.match(line, pos)
                 if endmatch:  # all on one line
                     start, end = endmatch.span(0)
@@ -619,6 +622,7 @@ def generate_tokens(
                         yield (FSTRING_MIDDLE, token, (lnum, 0), (lnum, 0), line)
                         yield (FSTRING_END, token, (lnum, 0), (lnum, 0), line)
                         fstring_level -= 1
+                        endprog_stack.pop()
                     else:
                         # TODO: most of the positions are wrong
                         yield (FSTRING_MIDDLE, token, (lnum, 0), (lnum, 0), line)
@@ -657,11 +661,12 @@ def generate_tokens(
                         stashed = None
                     yield (COMMENT, token, spos, epos, line)
                 elif token in triple_quoted:
+                    endprog = endprogs[token]
                     if token.startswith("f"):
                         yield (FSTRING_START, token, spos, epos, line)
                         fstring_level += 1
+                        endprog_stack.append(endprog)
 
-                    endprog = endprogs[token]
                     endmatch = endprog.match(line, pos)
                     if endmatch:  # all on one line
                         if stashed:
@@ -704,6 +709,7 @@ def generate_tokens(
                     assert maybe_endprog is not None, f"endprog not found for {token}"
                     endprog = maybe_endprog
                     if token[-1] == "\n":  # continued string
+                        endprog_stack.append(endprog)
                         strstart = (lnum, start)
                         contstr, needcont = line[start:], 1
                         contline = line
@@ -727,6 +733,8 @@ def generate_tokens(
                                 start_epos = (lnum, start + offset - 1)
                             yield (FSTRING_START, fstring_start, spos, start_epos, line)
                             fstring_level += 1
+                            endprog = endprogs[fstring_start]
+                            endprog_stack.append(endprog)
 
                             end_offset = pseudomatch.end() - 1
                             fstring_middle = line[start + offset - 1 : end_offset]

From b23cdfd07242173fbdcc3c267b9a7ae573f11fb6 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Tue, 12 Sep 2023 00:11:57 +0530
Subject: [PATCH 12/77] Support fstrings with no braces

---
 src/blib2to3/pgen2/tokenize.py | 56 ++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 12 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 396b21a5c33..95eba97a53e 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -160,16 +160,21 @@ def _combinations(*l: str) -> Set[str]:
 Special = group(r"\r?\n", r"[:;.,`@]")
 Funny = group(Operator, Bracket, Special)
 
+_string_middle_single = r"[^\n'\\]*(?:\\.[^\n'\\]*)*"
+_string_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*'
+
 # FSTRING_MIDDLE and LBRACE, inside a single quoted fstring
-_fstring_middle_single = r"[^\n'\\{]*(?:(?:\\.|{{)[^\n'\\{]*)*({)"
-_fstring_middle_double = r'[^\n"\\{]*(?:(?:\\.|{{)[^\n"\\{]*)*({)'
+_fstring_middle_single = r"[^\n'\\{]*(?:(?:\\.|{{)[^\n'\\{]*)*({)(?!{)"
+_fstring_middle_double = r'[^\n"\\{]*(?:(?:\\.|{{)[^\n"\\{]*)*({)(?!{)'
 
 # First (or only) line of ' or " string.
 ContStr = group(
-    _litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
-    _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
+    _litprefix + "'" + _string_middle_single + group("'", r"\\\r?\n"),
+    _litprefix + '"' + _string_middle_double + group('"', r"\\\r?\n"),
     group(_fstringlitprefix + "'") + _fstring_middle_single,
     group(_fstringlitprefix + '"') + _fstring_middle_double,
+    group(_fstringlitprefix + "'") + _string_middle_single + group("'", r"\\\r?\n"),
+    group(_fstringlitprefix + '"') + _string_middle_double + group('"', r"\\\r?\n"),
 )
 PseudoExtras = group(r"\\\r?\n", Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
@@ -492,6 +497,13 @@ def generate_tokens(
         lnum += 1
         pos, max = 0, len(line)
 
+
+        # TODO: probably inside_fstring_braces is not the best boolean.
+        # what about a case of a string inside a multiline fstring inside a
+        # multiline fstring??
+        # for eg. this doesn't work right now: f"{f'{2+2}'}"
+        # because inside_fstring_braces gets set to false after the first `}`
+        # print(f'{parenlev = } {continued = } {inside_fstring_braces = }')
         if contstr and not inside_fstring_braces:  # continued string
             assert contline is not None
             if not line:
@@ -514,7 +526,7 @@ def generate_tokens(
                         yield (LBRACE, "{", spos, epos, tokenline)
                         inside_fstring_braces = True
                     else:
-                        yield (FSTRING_END, token, spos, epos, tokenline)
+                        yield (FSTRING_END, token[-1], spos, epos, tokenline)
                         fstring_level -= 1
                         endprog_stack.pop()
                     # TODO: contstr reliance doesn't work now because we can be inside
@@ -620,7 +632,7 @@ def generate_tokens(
                     if not token.endswith("{"):
                         # TODO: locations
                         yield (FSTRING_MIDDLE, token, (lnum, 0), (lnum, 0), line)
-                        yield (FSTRING_END, token, (lnum, 0), (lnum, 0), line)
+                        yield (FSTRING_END, token[-1], (lnum, 0), (lnum, 0), line)
                         fstring_level -= 1
                         endprog_stack.pop()
                     else:
@@ -689,8 +701,14 @@ def generate_tokens(
                                 epos,
                                 line,
                             )
-                            yield (LBRACE, "{", epos, epos, line)
-                            inside_fstring_braces = True
+                            if not token.endswith("{"):
+                                yield (FSTRING_END, token[-1], epos, epos, line)
+                                fstring_level -= 1
+                                endprog_stack.pop()
+                            else:
+                                # TODO: most of the positions are wrong
+                                yield (LBRACE, "{", epos, epos, line)
+                                inside_fstring_braces = True
                     else:
                         strstart = (lnum, start)  # multiple lines
                         contstr = line[start:]
@@ -720,17 +738,25 @@ def generate_tokens(
                             stashed = None
 
                         # TODO: move this logic to a function
-                        if not token.endswith("{"):
+                        if not token.startswith("f"):
                             yield (STRING, token, spos, epos, line)
                         else:
                             if pseudomatch[20] is not None:
                                 fstring_start = pseudomatch[20]
                                 offset = pseudomatch.end(20) - pseudomatch.start()
                                 start_epos = (lnum, start + offset)
-                            else:
+                            elif pseudomatch[22] is not None:
                                 fstring_start = pseudomatch[22]
                                 offset = pseudomatch.end(22) - pseudomatch.start()
                                 start_epos = (lnum, start + offset - 1)
+                            elif pseudomatch[24] is not None:
+                                fstring_start = pseudomatch[24]
+                                offset = pseudomatch.end(24) - pseudomatch.start()
+                                start_epos = (lnum, start + offset - 1)
+                            else:
+                                fstring_start = pseudomatch[26]
+                                offset = pseudomatch.end(26) - pseudomatch.start()
+                                start_epos = (lnum, start + offset - 1)
                             yield (FSTRING_START, fstring_start, spos, start_epos, line)
                             fstring_level += 1
                             endprog = endprogs[fstring_start]
@@ -747,8 +773,14 @@ def generate_tokens(
                                 middle_epos,
                                 line,
                             )
-                            yield (LBRACE, "{", (lnum, end_offset + 1), epos, line)
-                            inside_fstring_braces = True
+                            if not token.endswith("{"):
+                                yield (FSTRING_END, token[-1], epos, epos, line)
+                                fstring_level -= 1
+                                endprog_stack.pop()
+                            else:
+                                # TODO: most of the positions are wrong
+                                yield (LBRACE, "{", epos, epos, line)
+                                inside_fstring_braces = True
 
                 elif initial.isidentifier():  # ordinary name
                     if token in ("async", "await"):

From bbbac0abf529ed5854f777f060bbbee4bdbfa3e4 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 18 Sep 2023 02:52:48 +0530
Subject: [PATCH 13/77] Add grammar changes

---
 src/blib2to3/Grammar.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/blib2to3/Grammar.txt b/src/blib2to3/Grammar.txt
index e48e66363fb..0b369ddd9ff 100644
--- a/src/blib2to3/Grammar.txt
+++ b/src/blib2to3/Grammar.txt
@@ -166,7 +166,7 @@ atom: ('(' [yield_expr|testlist_gexp] ')' |
        '[' [listmaker] ']' |
        '{' [dictsetmaker] '}' |
        '`' testlist1 '`' |
-       NAME | NUMBER | STRING+ | '.' '.' '.')
+       NAME | NUMBER | STRING+ | fstring+ | '.' '.' '.')
 listmaker: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
 testlist_gexp: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
 lambdef: 'lambda' [varargslist] ':' test
@@ -257,3 +257,7 @@ case_block: "case" patterns [guard] ':' suite
 guard: 'if' namedexpr_test
 patterns: pattern (',' pattern)* [',']
 pattern: (expr|star_expr) ['as' expr]
+
+fstring: FSTRING_START fstring_middle* FSTRING_END
+fstring_middle: fstring_replacement_field | FSTRING_MIDDLE
+fstring_replacement_field: '{' (yield_expr | testlist_star_expr) '}'

From dadaa64d4efbb9c246e96a6cc6f3ae9c40910006 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 18 Sep 2023 19:55:32 +0530
Subject: [PATCH 14/77] fix some locations

---
 src/blib2to3/pgen2/tokenize.py | 43 +++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 95eba97a53e..276ccab531f 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -497,7 +497,6 @@ def generate_tokens(
         lnum += 1
         pos, max = 0, len(line)
 
-
         # TODO: probably inside_fstring_braces is not the best boolean.
         # what about a case of a string inside a multiline fstring inside a
         # multiline fstring??
@@ -624,22 +623,35 @@ def generate_tokens(
                 if endmatch:  # all on one line
                     start, end = endmatch.span(0)
                     token = line[start:end]
-                    pos = end
+                    # TODO: triple quotes
+                    middle_token, end_token = token[:-1], token[-1]
                     # TODO: unsure if this can be safely removed
                     if stashed:
                         yield stashed
                         stashed = None
+                    yield (
+                        FSTRING_MIDDLE,
+                        middle_token,
+                        (lnum, pos),
+                        (lnum, end - 1),
+                        line,
+                    )
                     if not token.endswith("{"):
-                        # TODO: locations
-                        yield (FSTRING_MIDDLE, token, (lnum, 0), (lnum, 0), line)
-                        yield (FSTRING_END, token[-1], (lnum, 0), (lnum, 0), line)
+                        # TODO: end-1 is probably wrong
+                        yield (
+                            FSTRING_END,
+                            end_token,
+                            (lnum, end - 1),
+                            (lnum, end),
+                            line,
+                        )
                         fstring_level -= 1
                         endprog_stack.pop()
                     else:
                         # TODO: most of the positions are wrong
-                        yield (FSTRING_MIDDLE, token, (lnum, 0), (lnum, 0), line)
                         yield (LBRACE, "{", (lnum, 0), (lnum, 0), line)
                         inside_fstring_braces = True
+                    pos = end
                 else:  # multiple lines
                     contstr += line
                     contline = line
@@ -748,24 +760,24 @@ def generate_tokens(
                             elif pseudomatch[22] is not None:
                                 fstring_start = pseudomatch[22]
                                 offset = pseudomatch.end(22) - pseudomatch.start()
-                                start_epos = (lnum, start + offset - 1)
+                                start_epos = (lnum, start + offset)
                             elif pseudomatch[24] is not None:
                                 fstring_start = pseudomatch[24]
                                 offset = pseudomatch.end(24) - pseudomatch.start()
-                                start_epos = (lnum, start + offset - 1)
+                                start_epos = (lnum, start + offset)
                             else:
                                 fstring_start = pseudomatch[26]
                                 offset = pseudomatch.end(26) - pseudomatch.start()
-                                start_epos = (lnum, start + offset - 1)
+                                start_epos = (lnum, start + offset)
                             yield (FSTRING_START, fstring_start, spos, start_epos, line)
                             fstring_level += 1
                             endprog = endprogs[fstring_start]
                             endprog_stack.append(endprog)
 
                             end_offset = pseudomatch.end() - 1
-                            fstring_middle = line[start + offset - 1 : end_offset]
+                            fstring_middle = line[start + offset : end_offset]
                             middle_spos = (lnum, start + offset)
-                            middle_epos = (lnum, end_offset + 1)
+                            middle_epos = (lnum, end_offset)
                             yield (
                                 FSTRING_MIDDLE,
                                 fstring_middle,
@@ -774,12 +786,15 @@ def generate_tokens(
                                 line,
                             )
                             if not token.endswith("{"):
-                                yield (FSTRING_END, token[-1], epos, epos, line)
+                                end_spos = (lnum, end_offset)
+                                end_epos = (lnum, end_offset + 1)
+                                yield (FSTRING_END, token[-1], end_spos, end_epos, line)
                                 fstring_level -= 1
                                 endprog_stack.pop()
                             else:
-                                # TODO: most of the positions are wrong
-                                yield (LBRACE, "{", epos, epos, line)
+                                end_spos = (lnum, end_offset)
+                                end_epos = (lnum, end_offset + 1)
+                                yield (LBRACE, "{", end_spos, end_epos, line)
                                 inside_fstring_braces = True
 
                 elif initial.isidentifier():  # ordinary name

From a57e404c84b9b45158974e5774a0ed8ceb0894ce Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 18 Sep 2023 23:43:40 +0530
Subject: [PATCH 15/77] remove padding from fstring_middle and fstring_end

---
 src/black/nodes.py     | 10 +++++++++-
 src/blib2to3/pygram.py |  3 +++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/black/nodes.py b/src/black/nodes.py
index 45423b2596b..3f9f0a9be2e 100644
--- a/src/black/nodes.py
+++ b/src/black/nodes.py
@@ -131,7 +131,12 @@
 OPENING_BRACKETS: Final = set(BRACKET.keys())
 CLOSING_BRACKETS: Final = set(BRACKET.values())
 BRACKETS: Final = OPENING_BRACKETS | CLOSING_BRACKETS
-ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
+ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {
+    token.COMMA,
+    STANDALONE_COMMENT,
+    token.FSTRING_MIDDLE,
+    token.FSTRING_END,
+}
 
 RARROW = 55
 
@@ -197,6 +202,9 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
     }:
         return NO
 
+    if t == token.LBRACE and p.type == syms.fstring_replacement_field:
+        return NO
+
     prev = leaf.prev_sibling
     if not prev:
         prevp = preceding_leaf(p)
diff --git a/src/blib2to3/pygram.py b/src/blib2to3/pygram.py
index c30c630e816..8c93e4ddb13 100644
--- a/src/blib2to3/pygram.py
+++ b/src/blib2to3/pygram.py
@@ -71,6 +71,9 @@ class _python_symbols(Symbols):
     file_input: int
     flow_stmt: int
     for_stmt: int
+    fstring: int
+    fstring_middle: int
+    fstring_replacement_field: int
     funcdef: int
     global_stmt: int
     guard: int

From fff25fbe9b7db2e8a383f69ab2a76d80bb8d4429 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Tue, 19 Sep 2023 00:37:58 +0530
Subject: [PATCH 16/77] Fix some positions

---
 src/blib2to3/pgen2/tokenize.py | 46 ++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 276ccab531f..2ecfbfdf892 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -624,6 +624,7 @@ def generate_tokens(
                     start, end = endmatch.span(0)
                     token = line[start:end]
                     # TODO: triple quotes
+                    # TODO: check if the token will ever have any whitespace around?
                     middle_token, end_token = token[:-1], token[-1]
                     # TODO: unsure if this can be safely removed
                     if stashed:
@@ -703,24 +704,43 @@ def generate_tokens(
                             token = line[start:pos]
                             yield (STRING, token, spos, epos, line)
                         else:
-                            # TODO: most of the positions are wrong
-                            pos = endmatch.end(0)
-                            token = line[start:pos]
-                            yield (
-                                FSTRING_MIDDLE,
-                                token,
-                                spos,
-                                epos,
-                                line,
-                            )
+                            end = endmatch.end(0)
+                            token = line[pos:end]
+                            spos, epos = (lnum, pos), (lnum, end)
+                            # TODO: confirm there will be no padding around the tokens
+                            # TODO: don't detect like this perhaps?
                             if not token.endswith("{"):
-                                yield (FSTRING_END, token[-1], epos, epos, line)
+                                fstring_middle, fstring_end = token[:-3], token[-3:]
+                                fstring_middle_epos = fstring_end_spos = (lnum, end - 3)
+                                yield (
+                                    FSTRING_MIDDLE,
+                                    fstring_middle,
+                                    spos,
+                                    fstring_middle_epos,
+                                    line,
+                                )
+                                yield (
+                                    FSTRING_END,
+                                    fstring_end,
+                                    fstring_end_spos,
+                                    epos,
+                                    line,
+                                )
                                 fstring_level -= 1
                                 endprog_stack.pop()
                             else:
-                                # TODO: most of the positions are wrong
-                                yield (LBRACE, "{", epos, epos, line)
+                                fstring_middle, lbrace = token[:-1], token[-1]
+                                fstring_middle_epos = lbrace_spos = (lnum, end - 1)
+                                yield (
+                                    FSTRING_MIDDLE,
+                                    fstring_middle,
+                                    spos,
+                                    fstring_middle_epos,
+                                    line,
+                                )
+                                yield (LBRACE, lbrace, lbrace_spos, epos, line)
                                 inside_fstring_braces = True
+                            pos = end
                     else:
                         strstart = (lnum, start)  # multiple lines
                         contstr = line[start:]

From 95cd0bab0b63bf0300f82d4340c7f25d189ff05f Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Tue, 19 Sep 2023 01:16:05 +0530
Subject: [PATCH 17/77] fix edge cases with padding

---
 src/blib2to3/pgen2/tokenize.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 2ecfbfdf892..3ddc44744eb 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -775,26 +775,26 @@ def generate_tokens(
                         else:
                             if pseudomatch[20] is not None:
                                 fstring_start = pseudomatch[20]
-                                offset = pseudomatch.end(20) - pseudomatch.start()
+                                offset = pseudomatch.end(20) - pseudomatch.start(1)
                                 start_epos = (lnum, start + offset)
                             elif pseudomatch[22] is not None:
                                 fstring_start = pseudomatch[22]
-                                offset = pseudomatch.end(22) - pseudomatch.start()
+                                offset = pseudomatch.end(22) - pseudomatch.start(1)
                                 start_epos = (lnum, start + offset)
                             elif pseudomatch[24] is not None:
                                 fstring_start = pseudomatch[24]
-                                offset = pseudomatch.end(24) - pseudomatch.start()
+                                offset = pseudomatch.end(24) - pseudomatch.start(1)
                                 start_epos = (lnum, start + offset)
                             else:
                                 fstring_start = pseudomatch[26]
-                                offset = pseudomatch.end(26) - pseudomatch.start()
+                                offset = pseudomatch.end(26) - pseudomatch.start(1)
                                 start_epos = (lnum, start + offset)
                             yield (FSTRING_START, fstring_start, spos, start_epos, line)
                             fstring_level += 1
                             endprog = endprogs[fstring_start]
                             endprog_stack.append(endprog)
 
-                            end_offset = pseudomatch.end() - 1
+                            end_offset = pseudomatch.end(1) - 1
                             fstring_middle = line[start + offset : end_offset]
                             middle_spos = (lnum, start + offset)
                             middle_epos = (lnum, end_offset)

From caafa758442d9abfab067b0d2eafd51546d389a2 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Wed, 20 Sep 2023 18:52:31 +0530
Subject: [PATCH 18/77] fix nested fstrings bug

---
 src/blib2to3/pgen2/tokenize.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 3ddc44744eb..3645a5022b9 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -497,12 +497,6 @@ def generate_tokens(
         lnum += 1
         pos, max = 0, len(line)
 
-        # TODO: probably inside_fstring_braces is not the best boolean.
-        # what about a case of a string inside a multiline fstring inside a
-        # multiline fstring??
-        # for eg. this doesn't work right now: f"{f'{2+2}'}"
-        # because inside_fstring_braces gets set to false after the first `}`
-        # print(f'{parenlev = } {continued = } {inside_fstring_braces = }')
         if contstr and not inside_fstring_braces:  # continued string
             assert contline is not None
             if not line:
@@ -861,7 +855,7 @@ def generate_tokens(
                         stashed = None
                     yield (NL, token, spos, (lnum, pos), line)
                     continued = 1
-                elif initial == "}" and parenlev == 0 and inside_fstring_braces:
+                elif initial == "}" and parenlev == 0 and fstring_level > 0:
                     yield (RBRACE, token, spos, epos, line)
                     inside_fstring_braces = False
                 else:

From 838f627175f1391646db4942535d73aa6d6dddea Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Wed, 20 Sep 2023 22:44:14 +0530
Subject: [PATCH 19/77] Fix bugs in multiline fstrings

---
 src/blib2to3/pgen2/tokenize.py | 56 ++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 13 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 3645a5022b9..075daf667e1 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -504,7 +504,7 @@ def generate_tokens(
             endprog = endprog_stack[-1]
             endmatch = endprog.match(line)
             if endmatch:
-                pos = end = endmatch.end(0)
+                end = endmatch.end(0)
                 token = contstr + line[:end]
                 spos = strstart
                 epos = (lnum, end)
@@ -512,18 +512,40 @@ def generate_tokens(
                 # TODO: better way to detect fstring
                 if fstring_level == 0:
                     yield (STRING, token, spos, epos, tokenline)
+                    endprog_stack.pop()
                 else:
-                    # TODO: positions are all wrong
-                    yield (FSTRING_MIDDLE, token, spos, epos, tokenline)
                     if token.endswith("{"):
-                        yield (LBRACE, "{", spos, epos, tokenline)
+                        fstring_middle, lbrace = token[:-1], token[-1]
+                        fstring_middle_epos = lbrace_spos = (lnum, end - 1)
+                        yield (
+                            FSTRING_MIDDLE,
+                            fstring_middle,
+                            spos,
+                            fstring_middle_epos,
+                            line,
+                        )
+                        yield (LBRACE, lbrace, lbrace_spos, epos, line)
                         inside_fstring_braces = True
                     else:
-                        yield (FSTRING_END, token[-1], spos, epos, tokenline)
+                        # TODO: -3 maybe not guaranteed
+                        fstring_middle, fstring_end = token[:-3], token[-3:]
+                        fstring_middle_epos = end_spos = (lnum, end - 3)
+                        yield (
+                            FSTRING_MIDDLE,
+                            fstring_middle,
+                            spos,
+                            fstring_middle_epos,
+                            line,
+                        )
+                        yield (
+                            FSTRING_END,
+                            fstring_end,
+                            end_spos,
+                            epos,
+                            line,
+                        )
                         fstring_level -= 1
-                        endprog_stack.pop()
-                    # TODO: contstr reliance doesn't work now because we can be inside
-                    # an fstring and still empty contstr right here.
+                pos = end
                 contstr, needcont = "", 0
                 contline = None
             elif needcont and line[-2:] != "\\\n" and line[-3:] != "\\\r\n":
@@ -648,7 +670,8 @@ def generate_tokens(
                         inside_fstring_braces = True
                     pos = end
                 else:  # multiple lines
-                    contstr += line
+                    strstart = (lnum, end)
+                    contstr = line[end:]
                     contline = line
                     break
 
@@ -681,10 +704,10 @@ def generate_tokens(
                     yield (COMMENT, token, spos, epos, line)
                 elif token in triple_quoted:
                     endprog = endprogs[token]
+                    endprog_stack.append(endprog)
                     if token.startswith("f"):
                         yield (FSTRING_START, token, spos, epos, line)
                         fstring_level += 1
-                        endprog_stack.append(endprog)
 
                     endmatch = endprog.match(line, pos)
                     if endmatch:  # all on one line
@@ -697,6 +720,7 @@ def generate_tokens(
                             pos = endmatch.end(0)
                             token = line[start:pos]
                             yield (STRING, token, spos, epos, line)
+                            endprog_stack.pop()
                         else:
                             end = endmatch.end(0)
                             token = line[pos:end]
@@ -736,8 +760,14 @@ def generate_tokens(
                                 inside_fstring_braces = True
                             pos = end
                     else:
-                        strstart = (lnum, start)  # multiple lines
-                        contstr = line[start:]
+                        # multiple lines
+                        # TODO: normalize fstring detection
+                        if token.startswith("f"):
+                            strstart = (lnum, pos)
+                            contstr = line[pos:]
+                        else:
+                            strstart = (lnum, start)
+                            contstr = line[start:]
                         contline = line
                         break
                 elif (
@@ -758,7 +788,7 @@ def generate_tokens(
                         contstr, needcont = line[start:], 1
                         contline = line
                         break
-                    else:  # ordinary string
+                    else:  # single line string
                         if stashed:
                             yield stashed
                             stashed = None

From f5abd4b63b052b460fd2e9fbb436b9ebdea6379e Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Thu, 21 Sep 2023 01:46:38 +0530
Subject: [PATCH 20/77] support fstring_middle ending with newline

---
 src/blib2to3/pgen2/driver.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/blib2to3/pgen2/driver.py b/src/blib2to3/pgen2/driver.py
index e629843f8b9..45109eabf30 100644
--- a/src/blib2to3/pgen2/driver.py
+++ b/src/blib2to3/pgen2/driver.py
@@ -167,7 +167,9 @@ def parse_tokens(self, tokens: Iterable[GoodTokenInfo], debug: bool = False) ->
             if type in {token.INDENT, token.DEDENT}:
                 prefix = _prefix
             lineno, column = end
-            if value.endswith("\n"):
+            # FSTRING_MIDDLE is the only character that can end with a newline, and
+            # `end` will point to the next line. For that case, don't increment lineno.
+            if value.endswith("\n") and type != token.FSTRING_MIDDLE:
                 lineno += 1
                 column = 0
         else:

From fd3e5e1caa0ad5f2d8c87f9cda43529698c4c436 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sat, 23 Sep 2023 21:20:02 +0530
Subject: [PATCH 21/77] fix edge case for triple quoted strings

---
 src/blib2to3/pgen2/tokenize.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 075daf667e1..e3c6da15c78 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -527,7 +527,7 @@ def generate_tokens(
                         yield (LBRACE, lbrace, lbrace_spos, epos, line)
                         inside_fstring_braces = True
                     else:
-                        # TODO: -3 maybe not guaranteed
+                        # TODO: -3 maybe not guaranteed, could be \ separated single line string
                         fstring_middle, fstring_end = token[:-3], token[-3:]
                         fstring_middle_epos = end_spos = (lnum, end - 3)
                         yield (
@@ -639,9 +639,13 @@ def generate_tokens(
                 if endmatch:  # all on one line
                     start, end = endmatch.span(0)
                     token = line[start:end]
-                    # TODO: triple quotes
                     # TODO: check if the token will ever have any whitespace around?
-                    middle_token, end_token = token[:-1], token[-1]
+                    if token.endswith(('"""', "'''")):
+                        middle_token, end_token = token[:-3], token[-3:]
+                        middle_epos = end_spos = (lnum, end - 3)
+                    else:
+                        middle_token, end_token = token[:-1], token[-1]
+                        middle_epos = end_spos = (lnum, end - 1)
                     # TODO: unsure if this can be safely removed
                     if stashed:
                         yield stashed
@@ -650,15 +654,14 @@ def generate_tokens(
                         FSTRING_MIDDLE,
                         middle_token,
                         (lnum, pos),
-                        (lnum, end - 1),
+                        middle_epos,
                         line,
                     )
                     if not token.endswith("{"):
-                        # TODO: end-1 is probably wrong
                         yield (
                             FSTRING_END,
                             end_token,
-                            (lnum, end - 1),
+                            end_spos,
                             (lnum, end),
                             line,
                         )

From 0c6906915f36f8906d57eaba9d25f609a67b0dc0 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sat, 23 Sep 2023 22:55:52 +0530
Subject: [PATCH 22/77] Add string normalization

---
 src/black/linegen.py | 25 ++++++++++++++++
 src/black/strings.py | 69 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index 507e860190f..1d4e701ebbb 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -60,6 +60,7 @@
 from black.strings import (
     fix_docstring,
     get_string_prefix,
+    normalize_fstring_quotes,
     normalize_string_prefix,
     normalize_string_quotes,
     normalize_unicode_escape_sequences,
@@ -480,6 +481,30 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
 
         yield from self.visit_default(leaf)
 
+    def visit_fstring(self, node: Node) -> Iterator[Line]:
+        """Bunch of hacks here. Needs improvement."""
+        fstring_start = node.children[0]
+        fstring_end = node.children[-1]
+
+        quote_char = fstring_end.value[0]
+        quote_idx = fstring_start.value.index(quote_char)
+        prefix, quote = fstring_start.value[:quote_idx], fstring_start.value[quote_idx:]
+        assert 'f' in prefix or 'F' in prefix
+        assert quote == fstring_end.value
+
+        is_raw_fstring = 'r' in prefix or 'R' in prefix
+        middles = [node for node in node.children if node.type == token.FSTRING_MIDDLE]
+        # if ''.join(m.value for m in middles) == 'foo':
+        #     breakpoint()
+
+        if self.mode.string_normalization:
+            middles, quote = normalize_fstring_quotes(quote, middles, is_raw_fstring)
+
+        fstring_start.value = prefix + quote
+        fstring_end.value = quote
+
+        yield from self.visit_default(node)
+
     def __post_init__(self) -> None:
         """You are in a twisty little maze of passages."""
         self.current_line = Line(mode=self.mode)
diff --git a/src/black/strings.py b/src/black/strings.py
index 0d30f09ed11..cd6da62b30d 100644
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -173,8 +173,7 @@ def _cached_compile(pattern: str) -> Pattern[str]:
 def normalize_string_quotes(s: str) -> str:
     """Prefer double quotes but only if it doesn't cause more escaping.
 
-    Adds or removes backslashes as appropriate. Doesn't parse and fix
-    strings nested in f-strings.
+    Adds or removes backslashes as appropriate.
     """
     value = s.lstrip(STRING_PREFIX_CHARS)
     if value[:3] == '"""':
@@ -215,6 +214,7 @@ def normalize_string_quotes(s: str) -> str:
             s = f"{prefix}{orig_quote}{body}{orig_quote}"
         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
+    # TODO: can probably be removed
     if "f" in prefix.casefold():
         matches = re.findall(
             r"""
@@ -243,6 +243,71 @@ def normalize_string_quotes(s: str) -> str:
 
     return f"{prefix}{new_quote}{new_body}{new_quote}"
 
+def normalize_fstring_quotes(
+    quote: str,
+    middles: list[str],
+    is_raw_fstring: bool
+) -> tuple[str, str]:
+    """Prefer double quotes but only if it doesn't cause more escaping.
+
+    Adds or removes backslashes as appropriate.
+    """
+    if quote == '"""':
+        return middles, quote
+
+    elif quote == "'''":
+        new_quote = '"""'
+    elif quote == '"':
+        new_quote = "'"
+    else:
+        new_quote = '"'
+
+    unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
+    escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
+    escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){quote}")
+    if is_raw_fstring:
+        for middle in middles:
+            if unescaped_new_quote.search(middle.value):
+                # There's at least one unescaped new_quote in this raw string
+                # so converting is impossible
+                return middles, quote
+
+        # Do not introduce or remove backslashes in raw strings
+        return middles, new_quote
+
+    new_segments = []
+    for middle in middles:
+        segment = middle.value
+        # remove unnecessary escapes
+        new_segment = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", segment)
+        if segment != new_segment:
+            # Consider the string without unnecessary escapes as the original
+            middle.value = new_segment
+        
+        new_segment = sub_twice(escaped_orig_quote, rf"\1\2{quote}", new_segment)
+        new_segment = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_segment)
+        new_segments.append(new_segment)
+
+
+    if new_quote == '"""' and new_segments[-1][-1:] == '"':
+        # edge case:
+        new_segments[-1] = new_segments[-1][:-1] + '\\"'
+
+    for middle, new_segment in zip(middles, new_segments):
+        orig_escape_count = middle.value.count("\\")
+        new_escape_count = new_segment.count("\\")
+
+    if new_escape_count > orig_escape_count:
+        return middles, quote  # Do not introduce more escaping
+
+    if new_escape_count == orig_escape_count and quote == '"':
+        return middles, quote  # Prefer double quotes
+
+    for middle, new_segment in zip(middles, new_segments):
+        middle.value = new_segment
+
+    return middles, new_quote
+
 
 def normalize_unicode_escape_sequences(leaf: Leaf) -> None:
     """Replace hex codes in Unicode escape sequences with lowercase representation."""

From c4d457e742c5388ead4d9bdeaf795fe6a094a7e5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 23 Sep 2023 17:26:14 +0000
Subject: [PATCH 23/77] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/black/strings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/black/strings.py b/src/black/strings.py
index cd6da62b30d..1300055ccbc 100644
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -283,7 +283,7 @@ def normalize_fstring_quotes(
         if segment != new_segment:
             # Consider the string without unnecessary escapes as the original
             middle.value = new_segment
-        
+
         new_segment = sub_twice(escaped_orig_quote, rf"\1\2{quote}", new_segment)
         new_segment = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_segment)
         new_segments.append(new_segment)

From ace80e01ccb5d472519cb170348933900a89d8a0 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sat, 23 Sep 2023 23:45:12 +0530
Subject: [PATCH 24/77] small bugfixes

---
 src/blib2to3/pgen2/tokenize.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index e3c6da15c78..ab846f0ae0d 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -471,6 +471,7 @@ def generate_tokens(
     logical line; continuation lines are included.
     """
     lnum = parenlev = fstring_level = continued = 0
+    parenlev_stack = []
     inside_fstring_braces = False
     numchars: Final[str] = "0123456789"
     contstr, needcont = "", 0
@@ -513,6 +514,7 @@ def generate_tokens(
                 if fstring_level == 0:
                     yield (STRING, token, spos, epos, tokenline)
                     endprog_stack.pop()
+                    parenlev = parenlev_stack.pop()
                 else:
                     if token.endswith("{"):
                         fstring_middle, lbrace = token[:-1], token[-1]
@@ -667,11 +669,12 @@ def generate_tokens(
                         )
                         fstring_level -= 1
                         endprog_stack.pop()
+                        parenlev = parenlev_stack.pop()
                     else:
-                        # TODO: most of the positions are wrong
-                        yield (LBRACE, "{", (lnum, 0), (lnum, 0), line)
+                        yield (LBRACE, "{", (lnum, end-1), (lnum, end), line)
                         inside_fstring_braces = True
                     pos = end
+                    continue
                 else:  # multiple lines
                     strstart = (lnum, end)
                     contstr = line[end:]
@@ -708,7 +711,9 @@ def generate_tokens(
                 elif token in triple_quoted:
                     endprog = endprogs[token]
                     endprog_stack.append(endprog)
-                    if token.startswith("f"):
+                    parenlev_stack.append(parenlev)
+                    parenlev = 0
+                    if token.startswith(("f", "F")):
                         yield (FSTRING_START, token, spos, epos, line)
                         fstring_level += 1
 
@@ -719,11 +724,12 @@ def generate_tokens(
                             stashed = None
                         # TODO: move this logic to a function
                         # TODO: not how you should identify FSTRING_START
-                        if not token.startswith("f"):
+                        if not token.startswith(("f", "F")):
                             pos = endmatch.end(0)
                             token = line[start:pos]
                             yield (STRING, token, spos, epos, line)
                             endprog_stack.pop()
+                            parenlev = parenlev_stack.pop()
                         else:
                             end = endmatch.end(0)
                             token = line[pos:end]
@@ -749,6 +755,7 @@ def generate_tokens(
                                 )
                                 fstring_level -= 1
                                 endprog_stack.pop()
+                                parenlev = parenlev_stack.pop()
                             else:
                                 fstring_middle, lbrace = token[:-1], token[-1]
                                 fstring_middle_epos = lbrace_spos = (lnum, end - 1)
@@ -765,7 +772,7 @@ def generate_tokens(
                     else:
                         # multiple lines
                         # TODO: normalize fstring detection
-                        if token.startswith("f"):
+                        if token.startswith(("f", "F")):
                             strstart = (lnum, pos)
                             contstr = line[pos:]
                         else:
@@ -787,6 +794,8 @@ def generate_tokens(
                     endprog = maybe_endprog
                     if token[-1] == "\n":  # continued string
                         endprog_stack.append(endprog)
+                        parenlev_stack.append(parenlev)
+                        parenlev = 0
                         strstart = (lnum, start)
                         contstr, needcont = line[start:], 1
                         contline = line
@@ -797,7 +806,7 @@ def generate_tokens(
                             stashed = None
 
                         # TODO: move this logic to a function
-                        if not token.startswith("f"):
+                        if not token.startswith(("f", "F")):
                             yield (STRING, token, spos, epos, line)
                         else:
                             if pseudomatch[20] is not None:
@@ -820,6 +829,8 @@ def generate_tokens(
                             fstring_level += 1
                             endprog = endprogs[fstring_start]
                             endprog_stack.append(endprog)
+                            parenlev_stack.append(parenlev)
+                            parenlev = 0
 
                             end_offset = pseudomatch.end(1) - 1
                             fstring_middle = line[start + offset : end_offset]
@@ -838,6 +849,7 @@ def generate_tokens(
                                 yield (FSTRING_END, token[-1], end_spos, end_epos, line)
                                 fstring_level -= 1
                                 endprog_stack.pop()
+                                parenlev = parenlev_stack.pop()
                             else:
                                 end_spos = (lnum, end_offset)
                                 end_epos = (lnum, end_offset + 1)

From c0a99c86530102b88079e84498a82c2c4b3f567e Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 24 Sep 2023 00:17:40 +0530
Subject: [PATCH 25/77] fix some bugs that I introduced just now

---
 src/blib2to3/pgen2/tokenize.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index ab846f0ae0d..7417dfd7f6b 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -547,6 +547,10 @@ def generate_tokens(
                             line,
                         )
                         fstring_level -= 1
+                        endprog_stack.pop()
+                        parenlev = parenlev_stack.pop()
+                        if fstring_level > 0:
+                            inside_fstring_braces = True
                 pos = end
                 contstr, needcont = "", 0
                 contline = None
@@ -670,6 +674,8 @@ def generate_tokens(
                         fstring_level -= 1
                         endprog_stack.pop()
                         parenlev = parenlev_stack.pop()
+                        if fstring_level > 0:
+                            inside_fstring_braces = True
                     else:
                         yield (LBRACE, "{", (lnum, end-1), (lnum, end), line)
                         inside_fstring_braces = True
@@ -756,6 +762,8 @@ def generate_tokens(
                                 fstring_level -= 1
                                 endprog_stack.pop()
                                 parenlev = parenlev_stack.pop()
+                                if fstring_level > 0:
+                                    inside_fstring_braces = True
                             else:
                                 fstring_middle, lbrace = token[:-1], token[-1]
                                 fstring_middle_epos = lbrace_spos = (lnum, end - 1)
@@ -850,6 +858,8 @@ def generate_tokens(
                                 fstring_level -= 1
                                 endprog_stack.pop()
                                 parenlev = parenlev_stack.pop()
+                                if fstring_level > 0:
+                                    inside_fstring_braces = True
                             else:
                                 end_spos = (lnum, end_offset)
                                 end_epos = (lnum, end_offset + 1)

From b02cf2a2f47ab218f1f1a4ff57d59b526b7d63e7 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 24 Sep 2023 03:31:53 +0530
Subject: [PATCH 26/77] strings and fstrings can have implicit concat

---
 src/blib2to3/Grammar.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/blib2to3/Grammar.txt b/src/blib2to3/Grammar.txt
index 0b369ddd9ff..54395423c1e 100644
--- a/src/blib2to3/Grammar.txt
+++ b/src/blib2to3/Grammar.txt
@@ -166,7 +166,7 @@ atom: ('(' [yield_expr|testlist_gexp] ')' |
        '[' [listmaker] ']' |
        '{' [dictsetmaker] '}' |
        '`' testlist1 '`' |
-       NAME | NUMBER | STRING+ | fstring+ | '.' '.' '.')
+       NAME | NUMBER | (STRING | fstring)+ | '.' '.' '.')
 listmaker: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
 testlist_gexp: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
 lambdef: 'lambda' [varargslist] ':' test

From acd3c79918c0a8719cfb6eb9fae054af83a3c2e4 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 24 Sep 2023 03:32:12 +0530
Subject: [PATCH 27/77] don't normalize docstring prefixes

---
 src/black/linegen.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index 1d4e701ebbb..a4dc7825a77 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -489,13 +489,14 @@ def visit_fstring(self, node: Node) -> Iterator[Line]:
         quote_char = fstring_end.value[0]
         quote_idx = fstring_start.value.index(quote_char)
         prefix, quote = fstring_start.value[:quote_idx], fstring_start.value[quote_idx:]
-        assert 'f' in prefix or 'F' in prefix
+
+        if not is_docstring(node):
+            prefix = normalize_string_prefix(prefix)
+
         assert quote == fstring_end.value
 
         is_raw_fstring = 'r' in prefix or 'R' in prefix
         middles = [node for node in node.children if node.type == token.FSTRING_MIDDLE]
-        # if ''.join(m.value for m in middles) == 'foo':
-        #     breakpoint()
 
         if self.mode.string_normalization:
             middles, quote = normalize_fstring_quotes(quote, middles, is_raw_fstring)

From 5bca062047fde535bb63d545653e1e1dd92dd52b Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 24 Sep 2023 04:02:01 +0530
Subject: [PATCH 28/77] Add !r format specifier support

---
 src/black/__init__.py          |  1 +
 src/black/nodes.py             |  5 +++++
 src/blib2to3/Grammar.txt       |  3 ++-
 src/blib2to3/pgen2/grammar.py  |  1 +
 src/blib2to3/pgen2/token.py    |  3 ++-
 src/blib2to3/pgen2/tokenize.py | 11 +++++++++++
 6 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/black/__init__.py b/src/black/__init__.py
index 6fc91d2e6d3..425a2f9b3d3 100644
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -1184,6 +1184,7 @@ def get_features_used(  # noqa: C901
         if is_string_token(n):
             value_head = n.value[:2]
             if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
+                # TODO: this will need tweaking
                 features.add(Feature.F_STRINGS)
                 if Feature.DEBUG_F_STRINGS not in features:
                     for span_beg, span_end in iter_fexpr_spans(n.value):
diff --git a/src/black/nodes.py b/src/black/nodes.py
index 3f9f0a9be2e..8abbf00b5bc 100644
--- a/src/black/nodes.py
+++ b/src/black/nodes.py
@@ -136,6 +136,7 @@
     STANDALONE_COMMENT,
     token.FSTRING_MIDDLE,
     token.FSTRING_END,
+    token.BANG,
 }
 
 RARROW = 55
@@ -266,6 +267,9 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
     elif prev.type in OPENING_BRACKETS:
         return NO
 
+    elif prev.type == token.BANG:
+        return NO
+
     if p.type in {syms.parameters, syms.arglist}:
         # untyped function signatures or calls
         if not prev or prev.type != token.COMMA:
@@ -384,6 +388,7 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str:  # noqa: C901
             elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
                 return NO
 
+        # TODO: add fstring here?
         elif t in {token.NAME, token.NUMBER, token.STRING}:
             return NO
 
diff --git a/src/blib2to3/Grammar.txt b/src/blib2to3/Grammar.txt
index 54395423c1e..4e78145cfcb 100644
--- a/src/blib2to3/Grammar.txt
+++ b/src/blib2to3/Grammar.txt
@@ -260,4 +260,5 @@ pattern: (expr|star_expr) ['as' expr]
 
 fstring: FSTRING_START fstring_middle* FSTRING_END
 fstring_middle: fstring_replacement_field | FSTRING_MIDDLE
-fstring_replacement_field: '{' (yield_expr | testlist_star_expr) '}'
+fstring_replacement_field: '{' (yield_expr | testlist_star_expr) [ "!" NAME ] [ ':' fstring_format_spec* ] '}'
+fstring_format_spec: FSTRING_MIDDLE | fstring_replacement_field
diff --git a/src/blib2to3/pgen2/grammar.py b/src/blib2to3/pgen2/grammar.py
index 1f3fdc55b97..804db1ad985 100644
--- a/src/blib2to3/pgen2/grammar.py
+++ b/src/blib2to3/pgen2/grammar.py
@@ -218,6 +218,7 @@ def report(self) -> None:
 //= DOUBLESLASHEQUAL
 -> RARROW
 := COLONEQUAL
+! BANG
 """
 
 opmap = {}
diff --git a/src/blib2to3/pgen2/token.py b/src/blib2to3/pgen2/token.py
index 761cc1c7e88..3068c3157fc 100644
--- a/src/blib2to3/pgen2/token.py
+++ b/src/blib2to3/pgen2/token.py
@@ -69,7 +69,8 @@
 FSTRING_START: Final = 60
 FSTRING_MIDDLE: Final = 61
 FSTRING_END: Final = 62
-N_TOKENS: Final = 63
+BANG: Final = 63
+N_TOKENS: Final = 64
 NT_OFFSET: Final = 256
 # --end constants--
 
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 7417dfd7f6b..1f8514d977d 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -141,6 +141,10 @@ def _combinations(*l: str) -> Set[str]:
 Single3Lbrace = r"[^'\\{]*(?:(?:\\.|{{|'(?!''))[^'\\{]*)*{(?!{)"
 Double3Lbrace = r'[^"\\{]*(?:(?:\\.|{{|"(?!""))[^"\\{]*)*{(?!{)'
 
+# ! format specifier inside an fstring brace
+Bang = Whitespace + group("!")
+bang = re.compile(Bang)
+
 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
 # recognized as two instances of =).
@@ -687,6 +691,13 @@ def generate_tokens(
                     contline = line
                     break
 
+            if fstring_level > 0 and inside_fstring_braces:
+                match = bang.match(line, pos)
+                if match:
+                    start, end = match.span(1)
+                    yield (OP, "!", (lnum, start), (lnum, end), line)
+                    pos = end
+
             pseudomatch = pseudoprog.match(line, pos)
             if pseudomatch:  # scan for tokens
                 start, end = pseudomatch.span(1)

From b755281b74f13089aafdeb98148417b6e4d00673 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 24 Sep 2023 13:12:07 +0530
Subject: [PATCH 29/77] Support non nested format specifiers

---
 src/blib2to3/Grammar.txt       |  2 +-
 src/blib2to3/pgen2/tokenize.py | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/blib2to3/Grammar.txt b/src/blib2to3/Grammar.txt
index 4e78145cfcb..43b23c51453 100644
--- a/src/blib2to3/Grammar.txt
+++ b/src/blib2to3/Grammar.txt
@@ -260,5 +260,5 @@ pattern: (expr|star_expr) ['as' expr]
 
 fstring: FSTRING_START fstring_middle* FSTRING_END
 fstring_middle: fstring_replacement_field | FSTRING_MIDDLE
-fstring_replacement_field: '{' (yield_expr | testlist_star_expr) [ "!" NAME ] [ ':' fstring_format_spec* ] '}'
+fstring_replacement_field: '{' (yield_expr | testlist_star_expr) ['='] [ "!" NAME ] [ ':' fstring_format_spec ] '}'
 fstring_format_spec: FSTRING_MIDDLE | fstring_replacement_field
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 1f8514d977d..8b96c4bee77 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -144,6 +144,11 @@ def _combinations(*l: str) -> Set[str]:
 # ! format specifier inside an fstring brace
 Bang = Whitespace + group("!")
 bang = re.compile(Bang)
+Colon = Whitespace + group(":")
+colon = re.compile(Colon)
+
+FstringMiddleAfterColon = Whitespace + group(r".+?") + group("{", "}")
+fstring_middle_after_colon = re.compile(FstringMiddleAfterColon)
 
 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
@@ -477,6 +482,7 @@ def generate_tokens(
     lnum = parenlev = fstring_level = continued = 0
     parenlev_stack = []
     inside_fstring_braces = False
+    inside_fstring_colon = False
     numchars: Final[str] = "0123456789"
     contstr, needcont = "", 0
     contline: Optional[str] = None
@@ -681,7 +687,7 @@ def generate_tokens(
                         if fstring_level > 0:
                             inside_fstring_braces = True
                     else:
-                        yield (LBRACE, "{", (lnum, end-1), (lnum, end), line)
+                        yield (LBRACE, "{", (lnum, end - 1), (lnum, end), line)
                         inside_fstring_braces = True
                     pos = end
                     continue
@@ -691,12 +697,37 @@ def generate_tokens(
                     contline = line
                     break
 
+            # TODO: fstring_level > 0 is redundant in both cases here,
+            # remove it and ensure nothing breaks
+            if fstring_level > 0 and inside_fstring_colon:
+                match = fstring_middle_after_colon.match(line, pos)
+                if match is None:
+                    raise TokenError("unterminated f-string literal", (lnum, pos))
+                
+                start, end = match.span(1)
+                token = line[start:end]
+                yield (FSTRING_MIDDLE, token, (lnum, start), (lnum, end), line)
+                inside_fstring_colon = False
+                pos = end
+                continue
+
             if fstring_level > 0 and inside_fstring_braces:
                 match = bang.match(line, pos)
                 if match:
                     start, end = match.span(1)
                     yield (OP, "!", (lnum, start), (lnum, end), line)
                     pos = end
+                    continue
+
+                match = colon.match(line, pos)
+                if match:
+                    start, end = match.span(1)
+                    yield (OP, ":", (lnum, start), (lnum, end), line)
+                    inside_fstring_colon = True
+                    pos = end
+                    continue
+
+                # TODO: `=` is left, eg. f"{abc = }"
 
             pseudomatch = pseudoprog.match(line, pos)
             if pseudomatch:  # scan for tokens

From 8f7ecdfd6f044e0f8fdf97b4cad6cf8acba2be1c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 24 Sep 2023 07:42:32 +0000
Subject: [PATCH 30/77] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/blib2to3/pgen2/tokenize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 8b96c4bee77..e8616b912bf 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -703,7 +703,7 @@ def generate_tokens(
                 match = fstring_middle_after_colon.match(line, pos)
                 if match is None:
                     raise TokenError("unterminated f-string literal", (lnum, pos))
-                
+
                 start, end = match.span(1)
                 token = line[start:end]
                 yield (FSTRING_MIDDLE, token, (lnum, start), (lnum, end), line)

From 00dc7ac025751eff1e1d6612387af7ead4b0b8c4 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 24 Sep 2023 13:19:35 +0530
Subject: [PATCH 31/77] fix walrus edge case

---
 src/blib2to3/pgen2/tokenize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index e8616b912bf..8cdfdc2a823 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -711,7 +711,7 @@ def generate_tokens(
                 pos = end
                 continue
 
-            if fstring_level > 0 and inside_fstring_braces:
+            if fstring_level > 0 and parenlev == 0 and inside_fstring_braces:
                 match = bang.match(line, pos)
                 if match:
                     start, end = match.span(1)

From 306b9e926985bd61df09169c8386ed46deb4a2af Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 24 Sep 2023 13:39:30 +0530
Subject: [PATCH 32/77] empty FSTRING_MIDDLE should not be truncated

---
 src/black/lines.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/black/lines.py b/src/black/lines.py
index 0a307b45eff..ea8572e6c0f 100644
--- a/src/black/lines.py
+++ b/src/black/lines.py
@@ -71,7 +71,12 @@ def append(
 
         Inline comments are put aside.
         """
-        has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
+        has_value = (
+            leaf.type in BRACKETS
+            # empty fstring-middles must not be truncated
+            or leaf.type == token.FSTRING_MIDDLE
+            or bool(leaf.value.strip())
+        )
         if not has_value:
             return
 

From 7323840a35d39b2bc9d7d9fa9599dc443a74c8b2 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 24 Sep 2023 14:04:54 +0530
Subject: [PATCH 33/77] support rf" tokens

---
 src/black/strings.py           |  4 ++--
 src/blib2to3/pgen2/tokenize.py | 26 +++++++++++++++++++-------
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/src/black/strings.py b/src/black/strings.py
index 1300055ccbc..b346e538766 100644
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -272,8 +272,8 @@ def normalize_fstring_quotes(
                 # so converting is impossible
                 return middles, quote
 
-        # Do not introduce or remove backslashes in raw strings
-        return middles, new_quote
+        # Do not introduce or remove backslashes in raw strings, just use double quote
+        return middles, '"'
 
     new_segments = []
     for middle in middles:
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 8cdfdc2a823..b8d6e28c86b 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -228,6 +228,12 @@ def _combinations(*l: str) -> Set[str]:
     | {f"{prefix}'" for prefix in _strprefixes | _fstring_prefixes}
     | {f'{prefix}"' for prefix in _strprefixes | _fstring_prefixes}
 )
+fstring_prefix: Final = (
+    {f"{prefix}'" for prefix in _fstring_prefixes}
+    | {f'{prefix}"' for prefix in _fstring_prefixes}
+    | {f"{prefix}'''" for prefix in _fstring_prefixes}
+    | {f'{prefix}"""' for prefix in _fstring_prefixes}
+)
 
 tabsize = 8
 
@@ -461,6 +467,15 @@ def untokenize(iterable: Iterable[TokenInfo]) -> str:
     return ut.untokenize(iterable)
 
 
+def is_fstring_start(token: str) -> bool:
+    # TODO: builtins.any is shadowed :(
+    for prefix in fstring_prefix:
+        if token.startswith(prefix):
+            return True
+
+    return False
+
+
 def generate_tokens(
     readline: Callable[[], str], grammar: Optional[Grammar] = None
 ) -> Iterator[GoodTokenInfo]:
@@ -761,7 +776,7 @@ def generate_tokens(
                     endprog_stack.append(endprog)
                     parenlev_stack.append(parenlev)
                     parenlev = 0
-                    if token.startswith(("f", "F")):
+                    if is_fstring_start(token):
                         yield (FSTRING_START, token, spos, epos, line)
                         fstring_level += 1
 
@@ -771,8 +786,7 @@ def generate_tokens(
                             yield stashed
                             stashed = None
                         # TODO: move this logic to a function
-                        # TODO: not how you should identify FSTRING_START
-                        if not token.startswith(("f", "F")):
+                        if not is_fstring_start(token):
                             pos = endmatch.end(0)
                             token = line[start:pos]
                             yield (STRING, token, spos, epos, line)
@@ -821,8 +835,7 @@ def generate_tokens(
                             pos = end
                     else:
                         # multiple lines
-                        # TODO: normalize fstring detection
-                        if token.startswith(("f", "F")):
+                        if is_fstring_start(token):
                             strstart = (lnum, pos)
                             contstr = line[pos:]
                         else:
@@ -855,8 +868,7 @@ def generate_tokens(
                             yield stashed
                             stashed = None
 
-                        # TODO: move this logic to a function
-                        if not token.startswith(("f", "F")):
+                        if not is_fstring_start(token):
                             yield (STRING, token, spos, epos, line)
                         else:
                             if pseudomatch[20] is not None:

From 4fc656d6a13ac5b4a50e2267b4260f6451913950 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 24 Sep 2023 14:35:17 +0530
Subject: [PATCH 34/77] fix fstring feature detection

---
 src/black/__init__.py | 16 ++++++----------
 tests/test_black.py   |  3 +--
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/black/__init__.py b/src/black/__init__.py
index 425a2f9b3d3..01ebfaebeaa 100644
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -1181,16 +1181,12 @@ def get_features_used(  # noqa: C901
         }
 
     for n in node.pre_order():
-        if is_string_token(n):
-            value_head = n.value[:2]
-            if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
-                # TODO: this will need tweaking
-                features.add(Feature.F_STRINGS)
-                if Feature.DEBUG_F_STRINGS not in features:
-                    for span_beg, span_end in iter_fexpr_spans(n.value):
-                        if n.value[span_beg : span_end - 1].rstrip().endswith("="):
-                            features.add(Feature.DEBUG_F_STRINGS)
-                            break
+        if n.type == token.FSTRING_START:
+            features.add(Feature.F_STRINGS)
+        elif n.type == token.RBRACE and any(
+            child.type == token.EQUAL for child in n.parent.children
+        ):
+            features.add(Feature.DEBUG_F_STRINGS)
 
         elif is_number_token(n):
             if "_" in n.value:
diff --git a/tests/test_black.py b/tests/test_black.py
index 79930fabf1f..c0ab06af9c0 100644
--- a/tests/test_black.py
+++ b/tests/test_black.py
@@ -360,12 +360,11 @@ def test_detect_debug_f_strings(self) -> None:
         features = black.get_features_used(root)
         self.assertNotIn(black.Feature.DEBUG_F_STRINGS, features)
 
-        # We don't yet support feature version detection in nested f-strings
         root = black.lib2to3_parse(
             """f"heard a rumour that { f'{1+1=}' } ... seems like it could be true" """
         )
         features = black.get_features_used(root)
-        self.assertNotIn(black.Feature.DEBUG_F_STRINGS, features)
+        self.assertIn(black.Feature.DEBUG_F_STRINGS, features)
 
     @patch("black.dump_to_file", dump_to_stderr)
     def test_string_quotes(self) -> None:

From ea70516b2e17e33fb75c222ba6d3b005c2d70b84 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sat, 30 Sep 2023 18:23:35 +0530
Subject: [PATCH 35/77] fix edge cases in format specifier tokenizing

---
 src/blib2to3/pgen2/tokenize.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index b8d6e28c86b..473bd8434b0 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -147,7 +147,7 @@ def _combinations(*l: str) -> Set[str]:
 Colon = Whitespace + group(":")
 colon = re.compile(Colon)
 
-FstringMiddleAfterColon = Whitespace + group(r".+?") + group("{", "}")
+FstringMiddleAfterColon = Whitespace + group(r".*?") + group("{", "}")
 fstring_middle_after_colon = re.compile(FstringMiddleAfterColon)
 
 # Because of leftmost-then-longest match semantics, be sure to put the
@@ -726,7 +726,7 @@ def generate_tokens(
                 pos = end
                 continue
 
-            if fstring_level > 0 and parenlev == 0 and inside_fstring_braces:
+            if fstring_level > 0 and inside_fstring_braces:
                 match = bang.match(line, pos)
                 if match:
                     start, end = match.span(1)

From 4b80fe1b289e2355e59d9f1339c3f4478edb95fc Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 2 Oct 2023 01:31:50 +0530
Subject: [PATCH 36/77] fix that one bug with depending on parenlev

---
 src/blib2to3/Grammar.txt       |  3 ++-
 src/blib2to3/pgen2/tokenize.py | 19 ++++++++++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/blib2to3/Grammar.txt b/src/blib2to3/Grammar.txt
index 43b23c51453..5829bb55bc9 100644
--- a/src/blib2to3/Grammar.txt
+++ b/src/blib2to3/Grammar.txt
@@ -259,6 +259,7 @@ patterns: pattern (',' pattern)* [',']
 pattern: (expr|star_expr) ['as' expr]
 
 fstring: FSTRING_START fstring_middle* FSTRING_END
+# TODO making these FSTRING_MIDDLE makes them unformattable so maybe put a new token here?
 fstring_middle: fstring_replacement_field | FSTRING_MIDDLE
-fstring_replacement_field: '{' (yield_expr | testlist_star_expr) ['='] [ "!" NAME ] [ ':' fstring_format_spec ] '}'
+fstring_replacement_field: '{' (yield_expr | testlist_star_expr) ['='] [ "!" NAME ] [ ':' fstring_format_spec* ] '}'
 fstring_format_spec: FSTRING_MIDDLE | fstring_replacement_field
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 473bd8434b0..d636875785e 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -498,6 +498,7 @@ def generate_tokens(
     parenlev_stack = []
     inside_fstring_braces = False
     inside_fstring_colon = False
+    bracelev = 0
     numchars: Final[str] = "0123456789"
     contstr, needcont = "", 0
     contline: Optional[str] = None
@@ -722,11 +723,19 @@ def generate_tokens(
                 start, end = match.span(1)
                 token = line[start:end]
                 yield (FSTRING_MIDDLE, token, (lnum, start), (lnum, end), line)
+                
+                brace_start, brace_end = match.span(2)
+                brace = line[brace_start:brace_end]
+                if brace == '{':
+                    yield (OP, brace, (lnum, brace_start), (lnum, brace_end), line)
+                    bracelev += 1
+                    end = brace_end
+
                 inside_fstring_colon = False
                 pos = end
                 continue
 
-            if fstring_level > 0 and inside_fstring_braces:
+            if fstring_level > 0 and parenlev == 0 and inside_fstring_braces:
                 match = bang.match(line, pos)
                 if match:
                     start, end = match.span(1)
@@ -964,11 +973,13 @@ def generate_tokens(
                         stashed = None
                     yield (NL, token, spos, (lnum, pos), line)
                     continued = 1
-                elif initial == "}" and parenlev == 0 and fstring_level > 0:
+                elif initial == "}" and parenlev == 0 and bracelev == 0 and fstring_level > 0:
                     yield (RBRACE, token, spos, epos, line)
                     inside_fstring_braces = False
                 else:
-                    if initial in "([{":
+                    if parenlev == 0 and bracelev > 0 and initial == '}':
+                        bracelev -= 1
+                    elif initial in "([{":
                         parenlev += 1
                     elif initial in ")]}":
                         parenlev -= 1
@@ -987,6 +998,8 @@ def generate_tokens(
     for _indent in indents[1:]:  # pop remaining indent levels
         yield (DEDENT, "", (lnum, 0), (lnum, 0), "")
     yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "")
+    assert len(endprog_stack) == 0
+    assert len(parenlev_stack) == 0
 
 
 if __name__ == "__main__":  # testing

From 420867d51246a60e6ba15aa8146a85f0bc8730d4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 1 Oct 2023 20:02:11 +0000
Subject: [PATCH 37/77] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/blib2to3/pgen2/tokenize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index d636875785e..2a99c335124 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -723,7 +723,7 @@ def generate_tokens(
                 start, end = match.span(1)
                 token = line[start:end]
                 yield (FSTRING_MIDDLE, token, (lnum, start), (lnum, end), line)
-                
+
                 brace_start, brace_end = match.span(2)
                 brace = line[brace_start:brace_end]
                 if brace == '{':

From 160ef4ef977ba547f9242c8c8e9f0dad0e3bd319 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 2 Oct 2023 19:03:39 +0530
Subject: [PATCH 38/77] fix line location for triple quoted strings

---
 src/blib2to3/pgen2/tokenize.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 2a99c335124..043855d0083 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -798,6 +798,7 @@ def generate_tokens(
                         if not is_fstring_start(token):
                             pos = endmatch.end(0)
                             token = line[start:pos]
+                            epos = (lnum, pos)
                             yield (STRING, token, spos, epos, line)
                             endprog_stack.pop()
                             parenlev = parenlev_stack.pop()

From edf3d795d089aff0d2335287fa7b458c446d434f Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 2 Oct 2023 19:18:17 +0530
Subject: [PATCH 39/77] try to fix mypy errors

---
 src/black/__init__.py          | 2 +-
 src/black/linegen.py           | 6 ++++--
 src/black/nodes.py             | 6 +++---
 src/black/strings.py           | 6 +++---
 src/blib2to3/pgen2/tokenize.py | 4 ++--
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/black/__init__.py b/src/black/__init__.py
index 01ebfaebeaa..053592bb31f 100644
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -1183,7 +1183,7 @@ def get_features_used(  # noqa: C901
     for n in node.pre_order():
         if n.type == token.FSTRING_START:
             features.add(Feature.F_STRINGS)
-        elif n.type == token.RBRACE and any(
+        elif n.type == token.RBRACE and n.parent is not None and any(
             child.type == token.EQUAL for child in n.parent.children
         ):
             features.add(Feature.DEBUG_F_STRINGS)
diff --git a/src/black/linegen.py b/src/black/linegen.py
index a4dc7825a77..0aeec8b8dcb 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -481,10 +481,12 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
 
         yield from self.visit_default(leaf)
 
-    def visit_fstring(self, node: Node) -> Iterator[Line]:
+    def visit_fstring(self, node: Leaf) -> Iterator[Line]:
         """Bunch of hacks here. Needs improvement."""
         fstring_start = node.children[0]
         fstring_end = node.children[-1]
+        assert isinstance(fstring_start, Leaf)
+        assert isinstance(fstring_end, Leaf)
 
         quote_char = fstring_end.value[0]
         quote_idx = fstring_start.value.index(quote_char)
@@ -496,7 +498,7 @@ def visit_fstring(self, node: Node) -> Iterator[Line]:
         assert quote == fstring_end.value
 
         is_raw_fstring = 'r' in prefix or 'R' in prefix
-        middles = [node for node in node.children if node.type == token.FSTRING_MIDDLE]
+        middles = [node for node in node.leaves() if node.type == token.FSTRING_MIDDLE]
 
         if self.mode.string_normalization:
             middles, quote = normalize_fstring_quotes(quote, middles, is_raw_fstring)
diff --git a/src/black/nodes.py b/src/black/nodes.py
index 8abbf00b5bc..2723eb8baae 100644
--- a/src/black/nodes.py
+++ b/src/black/nodes.py
@@ -531,14 +531,14 @@ def is_arith_like(node: LN) -> bool:
     }
 
 
-def is_docstring(leaf: Leaf) -> bool:
+def is_docstring(node: NL) -> bool:
     if prev_siblings_are(
-        leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
+        node.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
     ):
         return True
 
     # Multiline docstring on the same line as the `def`.
-    if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
+    if prev_siblings_are(node.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
         # `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
         # grammar. We're safe to return True without further checks.
         return True
diff --git a/src/black/strings.py b/src/black/strings.py
index b346e538766..7d8ba9b446d 100644
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -5,7 +5,7 @@
 import re
 import sys
 from functools import lru_cache
-from typing import Final, List, Match, Pattern
+from typing import Final, List, Match, Pattern, Tuple
 
 from black._width_table import WIDTH_TABLE
 from blib2to3.pytree import Leaf
@@ -245,9 +245,9 @@ def normalize_string_quotes(s: str) -> str:
 
 def normalize_fstring_quotes(
     quote: str,
-    middles: list[str],
+    middles: List[Leaf],
     is_raw_fstring: bool
-) -> tuple[str, str]:
+) -> Tuple[List[Leaf], str]:
     """Prefer double quotes but only if it doesn't cause more escaping.
 
     Adds or removes backslashes as appropriate.
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 043855d0083..c21b1223a80 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -495,7 +495,7 @@ def generate_tokens(
     logical line; continuation lines are included.
     """
     lnum = parenlev = fstring_level = continued = 0
-    parenlev_stack = []
+    parenlev_stack: List[int] = []
     inside_fstring_braces = False
     inside_fstring_colon = False
     bracelev = 0
@@ -514,7 +514,7 @@ def generate_tokens(
     async_def_nl = False
 
     strstart: Tuple[int, int]
-    endprog_stack: list[Pattern[str]] = []
+    endprog_stack: List[Pattern[str]] = []
 
     while 1:  # loop over lines in stream
         try:

From 23bee77e43caf30d415677c2ee30d7e814ef6571 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 2 Oct 2023 19:20:05 +0530
Subject: [PATCH 40/77] commit unstaged change

---
 src/black/linegen.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index 0aeec8b8dcb..7ace29766f5 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -481,7 +481,7 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
 
         yield from self.visit_default(leaf)
 
-    def visit_fstring(self, node: Leaf) -> Iterator[Line]:
+    def visit_fstring(self, node: Node) -> Iterator[Line]:
         """Bunch of hacks here. Needs improvement."""
         fstring_start = node.children[0]
         fstring_end = node.children[-1]

From 6997d1474e075959a66f6d7c07abb02103d1b7be Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 2 Oct 2023 22:08:02 +0530
Subject: [PATCH 41/77] Add `fstring_format_spec` to symbols

---
 src/blib2to3/pygram.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/blib2to3/pygram.py b/src/blib2to3/pygram.py
index 8c93e4ddb13..af0bd0035ca 100644
--- a/src/blib2to3/pygram.py
+++ b/src/blib2to3/pygram.py
@@ -72,6 +72,7 @@ class _python_symbols(Symbols):
     flow_stmt: int
     for_stmt: int
     fstring: int
+    fstring_format_spec: int
     fstring_middle: int
     fstring_replacement_field: int
     funcdef: int

From 4e201fc0c89e832110a0fc676ea8cfb38bd7f8bf Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 2 Oct 2023 22:33:07 +0530
Subject: [PATCH 42/77] fix possible cause of mypyc crash

---
 src/black/linegen.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index 7ace29766f5..c31b1c94543 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -498,7 +498,7 @@ def visit_fstring(self, node: Node) -> Iterator[Line]:
         assert quote == fstring_end.value
 
         is_raw_fstring = 'r' in prefix or 'R' in prefix
-        middles = [node for node in node.leaves() if node.type == token.FSTRING_MIDDLE]
+        middles = [leaf for leaf in node.leaves() if leaf.type == token.FSTRING_MIDDLE]
 
         if self.mode.string_normalization:
             middles, quote = normalize_fstring_quotes(quote, middles, is_raw_fstring)

From 17a90630cca9ac4a1aec073f307f5093bec72259 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sat, 7 Oct 2023 01:54:39 +0530
Subject: [PATCH 43/77] Fix edge case with wrapping format specs

---
 src/blib2to3/pgen2/tokenize.py | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index c21b1223a80..12266ccfa49 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -147,7 +147,7 @@ def _combinations(*l: str) -> Set[str]:
 Colon = Whitespace + group(":")
 colon = re.compile(Colon)
 
-FstringMiddleAfterColon = Whitespace + group(r".*?") + group("{", "}")
+FstringMiddleAfterColon = Whitespace + group(r".*?") + group("{", "}", "\n")
 fstring_middle_after_colon = re.compile(FstringMiddleAfterColon)
 
 # Because of leftmost-then-longest match semantics, be sure to put the
@@ -498,6 +498,7 @@ def generate_tokens(
     parenlev_stack: List[int] = []
     inside_fstring_braces = False
     inside_fstring_colon = False
+    formatspec = ""
     bracelev = 0
     numchars: Final[str] = "0123456789"
     contstr, needcont = "", 0
@@ -515,6 +516,7 @@ def generate_tokens(
 
     strstart: Tuple[int, int]
     endprog_stack: List[Pattern[str]] = []
+    formatspec_start: Tuple[int, int]
 
     while 1:  # loop over lines in stream
         try:
@@ -722,12 +724,21 @@ def generate_tokens(
 
                 start, end = match.span(1)
                 token = line[start:end]
-                yield (FSTRING_MIDDLE, token, (lnum, start), (lnum, end), line)
+                formatspec += token
 
                 brace_start, brace_end = match.span(2)
-                brace = line[brace_start:brace_end]
-                if brace == '{':
-                    yield (OP, brace, (lnum, brace_start), (lnum, brace_end), line)
+                brace_or_nl = line[brace_start:brace_end]
+                if brace_or_nl == "\n":
+                    # TODO: in a triple quoted string we should infact add the \n here
+                    # formatspec += "\n"
+                    pos = brace_end
+                    continue
+
+                yield (FSTRING_MIDDLE, formatspec, formatspec_start, (lnum, end), line)
+                formatspec = ""
+
+                if brace_or_nl == "{":
+                    yield (OP, "{", (lnum, brace_start), (lnum, brace_end), line)
                     bracelev += 1
                     end = brace_end
 
@@ -748,6 +759,7 @@ def generate_tokens(
                     start, end = match.span(1)
                     yield (OP, ":", (lnum, start), (lnum, end), line)
                     inside_fstring_colon = True
+                    formatspec_start = (lnum, end)
                     pos = end
                     continue
 
@@ -974,11 +986,16 @@ def generate_tokens(
                         stashed = None
                     yield (NL, token, spos, (lnum, pos), line)
                     continued = 1
-                elif initial == "}" and parenlev == 0 and bracelev == 0 and fstring_level > 0:
+                elif (
+                    initial == "}"
+                    and parenlev == 0
+                    and bracelev == 0
+                    and fstring_level > 0
+                ):
                     yield (RBRACE, token, spos, epos, line)
                     inside_fstring_braces = False
                 else:
-                    if parenlev == 0 and bracelev > 0 and initial == '}':
+                    if parenlev == 0 and bracelev > 0 and initial == "}":
                         bracelev -= 1
                     elif initial in "([{":
                         parenlev += 1

From d0af0c14415be3adad3b7ea99d84d04d4f07e825 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 15 Oct 2023 00:11:20 +0530
Subject: [PATCH 44/77] Add FSTRING_PARSING as a feature

---
 src/black/__init__.py | 6 +++++-
 src/black/mode.py     | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/black/__init__.py b/src/black/__init__.py
index b1416458ddb..7cb1e3bae05 100644
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -1104,7 +1104,11 @@ def _format_str_once(src_contents: str, *, mode: Mode) -> str:
     elt = EmptyLineTracker(mode=mode)
     split_line_features = {
         feature
-        for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
+        for feature in {
+            Feature.TRAILING_COMMA_IN_CALL,
+            Feature.TRAILING_COMMA_IN_DEF,
+            Feature.FSTRING_PARSING,
+        }
         if supports_feature(versions, feature)
     }
     block: Optional[LinesBlock] = None
diff --git a/src/black/mode.py b/src/black/mode.py
index 30c5d2f1b2f..be8db26eca6 100644
--- a/src/black/mode.py
+++ b/src/black/mode.py
@@ -47,6 +47,7 @@ class Feature(Enum):
     DEBUG_F_STRINGS = 16
     PARENTHESIZED_CONTEXT_MANAGERS = 17
     TYPE_PARAMS = 18
+    FSTRING_PARSING = 19
     FORCE_OPTIONAL_PARENTHESES = 50
 
     # __future__ flags
@@ -157,6 +158,7 @@ class Feature(Enum):
         Feature.EXCEPT_STAR,
         Feature.VARIADIC_GENERICS,
         Feature.TYPE_PARAMS,
+        Feature.FSTRING_PARSING,
     },
 }
 

From 78c1e9c23b769d94916dcd8f92bf16b943773c04 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 15 Oct 2023 19:08:34 +0530
Subject: [PATCH 45/77] Add test case

---
 tests/data/cases/pep_701.py | 157 ++++++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)
 create mode 100644 tests/data/cases/pep_701.py

diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
new file mode 100644
index 00000000000..af5520382af
--- /dev/null
+++ b/tests/data/cases/pep_701.py
@@ -0,0 +1,157 @@
+# flags: --minimum-version=3.12
+x = f"foo"
+x = f'foo'
+x = f"""foo"""
+x = f'''foo'''
+x = f"foo {{ bar {{ baz"
+x = f"foo {{ {2 + 2}bar {{ baz"
+x = f'foo {{ {2 + 2}bar {{ baz'
+x = f"""foo {{ {2 + 2}bar {{ baz"""
+x = f'''foo {{ {2 + 2}bar {{ baz'''
+
+# edge case: FSTRING_MIDDLE containing only whitespace should not be stripped
+x = f"{a} {b}"
+
+x = f"foo {
+    2 + 2
+} bar baz"
+
+x = f"foo {{ {"a  {2 + 2}  b"}bar {{ baz"
+x = f"foo {{ {f'a  {2 + 2}  b'}bar {{ baz"
+x = f"foo {{ {f"a  {2 + 2}  b"}bar {{ baz"
+
+x = f"foo {{ {f'a  {f"a  {2 + 2}  b"}  b'}bar {{ baz"
+x = f"foo {{ {f"a  {f"a  {2 + 2}  b"}  b"}bar {{ baz"
+
+x = """foo {{ {2 + 2}bar
+baz"""
+
+
+x = f"""foo {{ {2 + 2}bar {{ baz"""
+
+x = f"""foo {{ {
+    2 + 2
+}bar {{ baz"""
+
+
+x = f"""foo {{ {
+    2 + 2
+}bar
+baz"""
+
+x = f"""foo {{ a
+    foo {2 + 2}bar {{ baz
+
+    x = f"foo {{ {
+        2 + 2  # comment
+    }bar"
+
+    {{ baz
+
+    }} buzz
+
+    {print("abc" + "def"
+)}
+abc"""
+
+# edge case: end triple quotes at index zero
+f"""foo {2+2} bar
+"""
+
+f' \' {f"'"} \' '
+f" \" {f'"'} \" "
+
+x = f"a{2+2:=^72}b"
+x = f"a{2+2:x}b"
+
+rf'foo'
+rf'{foo}'
+
+x = f"a{2+2:=^{x}}b"
+x = f"a{2+2:=^{foo(x+y**2):something else}}b"
+f'{(abc:=10)}'
+
+f"This is a really long string, but just make sure that you reflow fstrings {
+    2+2:d
+}"
+f"This is a really long string, but just make sure that you reflow fstrings correctly {2+2:d}"
+
+# output
+
+x = f"foo"
+x = f"foo"
+x = f"""foo"""
+x = f"""foo"""
+x = f"foo {{ bar {{ baz"
+x = f"foo {{ {2 + 2}bar {{ baz"
+x = f"foo {{ {2 + 2}bar {{ baz"
+x = f"""foo {{ {2 + 2}bar {{ baz"""
+x = f"""foo {{ {2 + 2}bar {{ baz"""
+
+# edge case: FSTRING_MIDDLE containing only whitespace should not be stripped
+x = f"{a} {b}"
+
+x = f"foo {2 + 2} bar baz"
+
+x = f"foo {{ {"a  {2 + 2}  b"}bar {{ baz"
+x = f"foo {{ {f"a  {2 + 2}  b"}bar {{ baz"
+x = f"foo {{ {f"a  {2 + 2}  b"}bar {{ baz"
+
+x = f"foo {{ {f"a  {f"a  {2 + 2}  b"}  b"}bar {{ baz"
+x = f"foo {{ {f"a  {f"a  {2 + 2}  b"}  b"}bar {{ baz"
+
+x = """foo {{ {2 + 2}bar
+baz"""
+
+
+x = f"""foo {{ {2 + 2}bar {{ baz"""
+
+x = f"""foo {{ {2 + 2}bar {{ baz"""
+
+
+x = f"""foo {{ {
+    2 + 2
+}bar
+baz"""
+
+x = f"""foo {{ a
+    foo {
+    2 + 2
+}bar {{ baz
+
+    x = f"foo {{ {
+    2 + 2  # comment
+}bar"
+
+    {{ baz
+
+    }} buzz
+
+    {
+    print("abc" + "def")
+}
+abc"""
+
+# edge case: end triple quotes at index zero
+f"""foo {
+    2 + 2
+} bar
+"""
+
+f" ' {f"'"} ' "
+f' " {f'"'} " '
+
+x = f"a{2 + 2:=^72}b"
+x = f"a{2 + 2:x}b"
+
+rf"foo"
+rf"{foo}"
+
+x = f"a{2 + 2:=^{x}}b"
+x = f"a{2 + 2:=^{foo(x + y**2):something else}}b"
+f"{(abc := 10)}"
+
+f"This is a really long string, but just make sure that you reflow fstrings {2 + 2:d}"
+f"This is a really long string, but just make sure that you reflow fstrings correctly {
+    2 + 2:d
+}"

From 6931c9205d557d14d6c30e014b4ab9cc0a630ca7 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 15 Oct 2023 20:40:39 +0530
Subject: [PATCH 46/77] Add two todos in test case

---
 tests/data/cases/pep_701.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index af5520382af..90eb13c35be 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -76,6 +76,18 @@
 }"
 f"This is a really long string, but just make sure that you reflow fstrings correctly {2+2:d}"
 
+# TODO: Edge case: if the fstring replacement ends with a `=` it should not be touched
+# f"{2+2=}"
+# f"{2+2    =    }"
+# f"{     2      +     2    =    }"
+
+# TODO:
+# f"""foo {
+#     datetime.datetime.now():%Y
+# %m
+# %d
+# }"""
+
 # output
 
 x = f"foo"
@@ -155,3 +167,15 @@
 f"This is a really long string, but just make sure that you reflow fstrings correctly {
     2 + 2:d
 }"
+
+# TODO: Edge case: if the fstring replacement ends with a `=` it should not be touched
+# f"{2+2=}"
+# f"{2+2    =    }"
+# f"{     2      +     2    =    }"
+
+# TODO:
+# f"""foo {
+#     datetime.datetime.now():%Y
+# %m
+# %d
+# }"""

From 53ca71c034c5fbb7965fe4a6838fa596d4213339 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 16 Oct 2023 00:42:47 +0530
Subject: [PATCH 47/77] tiny changes

---
 src/black/linegen.py           |  1 -
 src/blib2to3/pgen2/tokenize.py | 15 +++++----------
 tests/data/cases/pep_701.py    |  2 +-
 tests/util.py                  |  2 +-
 4 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index c3f9b3e2261..5f346a2f3cb 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -501,7 +501,6 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
         yield from self.visit_default(leaf)
 
     def visit_fstring(self, node: Node) -> Iterator[Line]:
-        """Bunch of hacks here. Needs improvement."""
         fstring_start = node.children[0]
         fstring_end = node.children[-1]
         assert isinstance(fstring_start, Leaf)
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 12266ccfa49..d9985b6650f 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -538,7 +538,6 @@ def generate_tokens(
                 spos = strstart
                 epos = (lnum, end)
                 tokenline = contline + line
-                # TODO: better way to detect fstring
                 if fstring_level == 0:
                     yield (STRING, token, spos, epos, tokenline)
                     endprog_stack.pop()
@@ -717,7 +716,7 @@ def generate_tokens(
 
             # TODO: fstring_level > 0 is redundant in both cases here,
             # remove it and ensure nothing breaks
-            if fstring_level > 0 and inside_fstring_colon:
+            if inside_fstring_colon:
                 match = fstring_middle_after_colon.match(line, pos)
                 if match is None:
                     raise TokenError("unterminated f-string literal", (lnum, pos))
@@ -746,7 +745,7 @@ def generate_tokens(
                 pos = end
                 continue
 
-            if fstring_level > 0 and parenlev == 0 and inside_fstring_braces:
+            if inside_fstring_braces and parenlev == 0:
                 match = bang.match(line, pos)
                 if match:
                     start, end = match.span(1)
@@ -818,8 +817,6 @@ def generate_tokens(
                             end = endmatch.end(0)
                             token = line[pos:end]
                             spos, epos = (lnum, pos), (lnum, end)
-                            # TODO: confirm there will be no padding around the tokens
-                            # TODO: don't detect like this perhaps?
                             if not token.endswith("{"):
                                 fstring_middle, fstring_end = token[:-3], token[-3:]
                                 fstring_middle_epos = fstring_end_spos = (lnum, end - 3)
@@ -885,7 +882,7 @@ def generate_tokens(
                         contstr, needcont = line[start:], 1
                         contline = line
                         break
-                    else:  # single line string
+                    else:  # ordinary string
                         if stashed:
                             yield stashed
                             stashed = None
@@ -896,19 +893,17 @@ def generate_tokens(
                             if pseudomatch[20] is not None:
                                 fstring_start = pseudomatch[20]
                                 offset = pseudomatch.end(20) - pseudomatch.start(1)
-                                start_epos = (lnum, start + offset)
                             elif pseudomatch[22] is not None:
                                 fstring_start = pseudomatch[22]
                                 offset = pseudomatch.end(22) - pseudomatch.start(1)
-                                start_epos = (lnum, start + offset)
                             elif pseudomatch[24] is not None:
                                 fstring_start = pseudomatch[24]
                                 offset = pseudomatch.end(24) - pseudomatch.start(1)
-                                start_epos = (lnum, start + offset)
                             else:
                                 fstring_start = pseudomatch[26]
                                 offset = pseudomatch.end(26) - pseudomatch.start(1)
-                                start_epos = (lnum, start + offset)
+
+                            start_epos = (lnum, start + offset)
                             yield (FSTRING_START, fstring_start, spos, start_epos, line)
                             fstring_level += 1
                             endprog = endprogs[fstring_start]
diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index 90eb13c35be..9f75836610f 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -1,4 +1,4 @@
-# flags: --minimum-version=3.12
+# flags: --target-version=py312
 x = f"foo"
 x = f'foo'
 x = f"""foo"""
diff --git a/tests/util.py b/tests/util.py
index a31ae0992c2..3f4669c140f 100644
--- a/tests/util.py
+++ b/tests/util.py
@@ -214,7 +214,7 @@ def get_flags_parser() -> argparse.ArgumentParser:
         "--target-version",
         action="append",
         type=lambda val: TargetVersion[val.upper()],
-        default=(),
+        default=[],
     )
     parser.add_argument("--line-length", default=DEFAULT_LINE_LENGTH, type=int)
     parser.add_argument(

From e97dd01fba29523c176c40b8c55906ecc6ada990 Mon Sep 17 00:00:00 2001
From: hauntsaninja <hauntsaninja@gmail.com>
Date: Fri, 5 Jan 2024 22:38:10 -0800
Subject: [PATCH 48/77] fix merge

---
 src/black/linegen.py |  2 +-
 src/black/nodes.py   | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index 16c232ffd23..f477d2b12dc 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -518,7 +518,7 @@ def visit_fstring(self, node: Node) -> Iterator[Line]:
         quote_idx = fstring_start.value.index(quote_char)
         prefix, quote = fstring_start.value[:quote_idx], fstring_start.value[quote_idx:]
 
-        if not is_docstring(node):
+        if not is_docstring(node, self.mode):
             prefix = normalize_string_prefix(prefix)
 
         assert quote == fstring_end.value
diff --git a/src/black/nodes.py b/src/black/nodes.py
index 19d13816b77..71e9c3b3681 100644
--- a/src/black/nodes.py
+++ b/src/black/nodes.py
@@ -556,11 +556,11 @@ def is_docstring(node: NL, mode: Mode) -> bool:
 
     if (
         Preview.unify_docstring_detection in mode
-        and leaf.parent
-        and leaf.parent.type == syms.simple_stmt
-        and not leaf.parent.prev_sibling
-        and leaf.parent.parent
-        and leaf.parent.parent.type == syms.file_input
+        and node.parent
+        and node.parent.type == syms.simple_stmt
+        and not node.parent.prev_sibling
+        and node.parent.parent
+        and node.parent.parent.type == syms.file_input
     ):
         return True
 

From cf9b415461f2549c4eb0b56af66dc456bea88c39 Mon Sep 17 00:00:00 2001
From: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
Date: Fri, 5 Jan 2024 22:42:24 -0800
Subject: [PATCH 49/77] Update src/black/strings.py

Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
---
 src/black/strings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/black/strings.py b/src/black/strings.py
index b7107da2d9a..8cb10481bc9 100644
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -291,7 +291,7 @@ def normalize_fstring_quotes(
         new_segments.append(new_segment)
 
 
-    if new_quote == '"""' and new_segments[-1][-1:] == '"':
+    if new_quote == '"""' and new_segments[-1].endswith('"'):
         # edge case:
         new_segments[-1] = new_segments[-1][:-1] + '\\"'
 

From 9737159679a539e116d5688cca1f22954022c16d Mon Sep 17 00:00:00 2001
From: Jelle Zijlstra <jelle.zijlstra@gmail.com>
Date: Mon, 12 Feb 2024 06:38:30 -0800
Subject: [PATCH 50/77] changelog

---
 CHANGES.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGES.md b/CHANGES.md
index b1a6ae3bc1c..a5f48a62b9c 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -6,6 +6,8 @@
 
 <!-- Include any especially major or disruptive changes here -->
 
+- Add support for the new Python 3.12 f-string syntax introduced by PEP 701 (#3822)
+
 ### Stable style
 
 <!-- Changes that affect Black's stable style -->

From e220c10d5e25bd42a208abbf0d06581289491666 Mon Sep 17 00:00:00 2001
From: Jelle Zijlstra <jelle.zijlstra@gmail.com>
Date: Mon, 12 Feb 2024 06:40:01 -0800
Subject: [PATCH 51/77] Lint, remove unused function

---
 src/black/__init__.py | 2 --
 src/black/nodes.py    | 5 -----
 src/black/strings.py  | 2 +-
 3 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/black/__init__.py b/src/black/__init__.py
index 3e7de04cad1..a507528a4dc 100644
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -73,7 +73,6 @@
     STARS,
     is_number_token,
     is_simple_decorator_expression,
-    is_string_token,
     syms,
 )
 from black.output import color_diff, diff, dump_to_file, err, ipynb_diff, out
@@ -81,7 +80,6 @@
 from black.parsing import lib2to3_parse, parse_ast, stringify_ast
 from black.ranges import adjusted_lines, convert_unchanged_lines, parse_line_ranges
 from black.report import Changed, NothingChanged, Report
-from black.trans import iter_fexpr_spans
 from blib2to3.pgen2 import token
 from blib2to3.pytree import Leaf, Node
 
diff --git a/src/black/nodes.py b/src/black/nodes.py
index 1a17dda2c6d..42051588d29 100644
--- a/src/black/nodes.py
+++ b/src/black/nodes.py
@@ -544,7 +544,6 @@ def is_arith_like(node: LN) -> bool:
     }
 
 
-
 def is_docstring(node: NL, mode: Mode) -> bool:
     if isinstance(node, Leaf):
         if node.type != token.STRING:
@@ -958,10 +957,6 @@ def is_rpar_token(nl: NL) -> TypeGuard[Leaf]:
     return nl.type == token.RPAR
 
 
-def is_string_token(nl: NL) -> TypeGuard[Leaf]:
-    return nl.type == token.STRING
-
-
 def is_number_token(nl: NL) -> TypeGuard[Leaf]:
     return nl.type == token.NUMBER
 
diff --git a/src/black/strings.py b/src/black/strings.py
index 8cb10481bc9..517be9b9400 100644
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -245,6 +245,7 @@ def normalize_string_quotes(s: str) -> str:
 
     return f"{prefix}{new_quote}{new_body}{new_quote}"
 
+
 def normalize_fstring_quotes(
     quote: str,
     middles: List[Leaf],
@@ -290,7 +291,6 @@ def normalize_fstring_quotes(
         new_segment = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_segment)
         new_segments.append(new_segment)
 
-
     if new_quote == '"""' and new_segments[-1].endswith('"'):
         # edge case:
         new_segments[-1] = new_segments[-1][:-1] + '\\"'

From 7ef92dbe8b3f5391b25ce8f4e7b71c9ea45829b1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 12 Feb 2024 14:41:24 +0000
Subject: [PATCH 52/77] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/black/__init__.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/black/__init__.py b/src/black/__init__.py
index a507528a4dc..f6602455f2c 100644
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -69,12 +69,7 @@
 from black.mode import FUTURE_FLAG_TO_FEATURE, VERSION_TO_FEATURES, Feature
 from black.mode import Mode as Mode  # re-exported
 from black.mode import Preview, TargetVersion, supports_feature
-from black.nodes import (
-    STARS,
-    is_number_token,
-    is_simple_decorator_expression,
-    syms,
-)
+from black.nodes import STARS, is_number_token, is_simple_decorator_expression, syms
 from black.output import color_diff, diff, dump_to_file, err, ipynb_diff, out
 from black.parsing import InvalidInput  # noqa F401
 from black.parsing import lib2to3_parse, parse_ast, stringify_ast

From 62e0b2b597747b882e1308e47dfff81230c57439 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 17 Mar 2024 23:43:56 +0530
Subject: [PATCH 53/77] fix debug visitor test

---
 src/blib2to3/pgen2/tokenize.py             |   8 +-
 tests/data/miscellaneous/debug_visitor.out | 154 +++++++++++++++++++--
 2 files changed, 146 insertions(+), 16 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index d0843c557eb..64b547949c6 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -27,6 +27,7 @@
 function to which the 5 fields described above are passed as 5 arguments,
 each time a new token is found."""
 
+import builtins
 import sys
 from typing import (
     Callable,
@@ -465,12 +466,7 @@ def untokenize(iterable: Iterable[TokenInfo]) -> str:
 
 
 def is_fstring_start(token: str) -> bool:
-    # TODO: builtins.any is shadowed :(
-    for prefix in fstring_prefix:
-        if token.startswith(prefix):
-            return True
-
-    return False
+    return builtins.any(token.startswith(prefix) for prefix in fstring_prefix)
 
 
 def generate_tokens(
diff --git a/tests/data/miscellaneous/debug_visitor.out b/tests/data/miscellaneous/debug_visitor.out
index fa60010d421..24d7ed82472 100644
--- a/tests/data/miscellaneous/debug_visitor.out
+++ b/tests/data/miscellaneous/debug_visitor.out
@@ -229,8 +229,34 @@ file_input
                       LPAR
  '('
                       arglist
-                        STRING
- "f'{indent}{_type}'"
+                        fstring
+                          FSTRING_START
+ "f'"
+                          FSTRING_MIDDLE
+ ''
+                          fstring_replacement_field
+                            LBRACE
+ '{'
+                            NAME
+ 'indent'
+                            RBRACE
+ '}'
+                          /fstring_replacement_field
+                          FSTRING_MIDDLE
+ ''
+                          fstring_replacement_field
+                            LBRACE
+ '{'
+                            NAME
+ '_type'
+                            RBRACE
+ '}'
+                          /fstring_replacement_field
+                          FSTRING_MIDDLE
+ ''
+                          FSTRING_END
+ "'"
+                        /fstring
                         COMMA
  ','
                         argument
@@ -370,8 +396,34 @@ file_input
                       LPAR
  '('
                       arglist
-                        STRING
- "f'{indent}/{_type}'"
+                        fstring
+                          FSTRING_START
+ "f'"
+                          FSTRING_MIDDLE
+ ''
+                          fstring_replacement_field
+                            LBRACE
+ '{'
+                            NAME
+ 'indent'
+                            RBRACE
+ '}'
+                          /fstring_replacement_field
+                          FSTRING_MIDDLE
+ '/'
+                          fstring_replacement_field
+                            LBRACE
+ '{'
+                            NAME
+ '_type'
+                            RBRACE
+ '}'
+                          /fstring_replacement_field
+                          FSTRING_MIDDLE
+ ''
+                          FSTRING_END
+ "'"
+                        /fstring
                         COMMA
  ','
                         argument
@@ -494,8 +546,34 @@ file_input
                       LPAR
  '('
                       arglist
-                        STRING
- "f'{indent}{_type}'"
+                        fstring
+                          FSTRING_START
+ "f'"
+                          FSTRING_MIDDLE
+ ''
+                          fstring_replacement_field
+                            LBRACE
+ '{'
+                            NAME
+ 'indent'
+                            RBRACE
+ '}'
+                          /fstring_replacement_field
+                          FSTRING_MIDDLE
+ ''
+                          fstring_replacement_field
+                            LBRACE
+ '{'
+                            NAME
+ '_type'
+                            RBRACE
+ '}'
+                          /fstring_replacement_field
+                          FSTRING_MIDDLE
+ ''
+                          FSTRING_END
+ "'"
+                        /fstring
                         COMMA
  ','
                         argument
@@ -557,8 +635,36 @@ file_input
                           LPAR
  '('
                           arglist
-                            STRING
- "f' {node.prefix!r}'"
+                            fstring
+                              FSTRING_START
+ "f'"
+                              FSTRING_MIDDLE
+ ' '
+                              fstring_replacement_field
+                                LBRACE
+ '{'
+                                power
+                                  NAME
+ 'node'
+                                  trailer
+                                    DOT
+ '.'
+                                    NAME
+ 'prefix'
+                                  /trailer
+                                /power
+                                BANG
+ '!'
+                                NAME
+ 'r'
+                                RBRACE
+ '}'
+                              /fstring_replacement_field
+                              FSTRING_MIDDLE
+ ''
+                              FSTRING_END
+ "'"
+                            /fstring
                             COMMA
  ','
                             argument
@@ -613,8 +719,36 @@ file_input
                       LPAR
  '('
                       arglist
-                        STRING
- "f' {node.value!r}'"
+                        fstring
+                          FSTRING_START
+ "f'"
+                          FSTRING_MIDDLE
+ ' '
+                          fstring_replacement_field
+                            LBRACE
+ '{'
+                            power
+                              NAME
+ 'node'
+                              trailer
+                                DOT
+ '.'
+                                NAME
+ 'value'
+                              /trailer
+                            /power
+                            BANG
+ '!'
+                            NAME
+ 'r'
+                            RBRACE
+ '}'
+                          /fstring_replacement_field
+                          FSTRING_MIDDLE
+ ''
+                          FSTRING_END
+ "'"
+                        /fstring
                         COMMA
  ','
                         argument

From df38ea05adc7ff4d873fb7a0c1462b453c9aa1ae Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 25 Mar 2024 19:40:53 +0530
Subject: [PATCH 54/77] fix most tests

---
 src/black/__init__.py       | 12 +++++++-----
 src/black/linegen.py        | 14 +++++++++++++-
 src/black/strings.py        |  2 +-
 tests/data/cases/pep_701.py | 11 +++++++++++
 4 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/src/black/__init__.py b/src/black/__init__.py
index f6602455f2c..ce2603a9b1d 100644
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -1230,9 +1230,9 @@ def _format_str_once(
         future_imports = get_future_imports(src_node)
         versions = detect_target_versions(src_node, future_imports=future_imports)
 
-    context_manager_features = {
+    line_generator_features = {
         feature
-        for feature in {Feature.PARENTHESIZED_CONTEXT_MANAGERS}
+        for feature in {Feature.PARENTHESIZED_CONTEXT_MANAGERS, Feature.FSTRING_PARSING}
         if supports_feature(versions, feature)
     }
     normalize_fmt_off(src_node, mode, lines)
@@ -1240,7 +1240,7 @@ def _format_str_once(
         # This should be called after normalize_fmt_off.
         convert_unchanged_lines(src_node, lines)
 
-    line_generator = LineGenerator(mode=mode, features=context_manager_features)
+    line_generator = LineGenerator(mode=mode, features=line_generator_features)
     elt = EmptyLineTracker(mode=mode)
     split_line_features = {
         feature
@@ -1322,8 +1322,10 @@ def get_features_used(  # noqa: C901
     for n in node.pre_order():
         if n.type == token.FSTRING_START:
             features.add(Feature.F_STRINGS)
-        elif n.type == token.RBRACE and n.parent is not None and any(
-            child.type == token.EQUAL for child in n.parent.children
+        elif (
+            n.type == token.RBRACE
+            and n.parent is not None
+            and any(child.type == token.EQUAL for child in n.parent.children)
         ):
             features.add(Feature.DEBUG_F_STRINGS)
 
diff --git a/src/black/linegen.py b/src/black/linegen.py
index d94e779e827..58daf3904ab 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -502,6 +502,12 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
         yield from self.visit_default(leaf)
 
     def visit_fstring(self, node: Node) -> Iterator[Line]:
+        if Feature.FSTRING_PARSING not in self.features:
+            string_leaf = _fstring_to_string(node)
+            node.replace(string_leaf)
+            yield from self.visit_default(string_leaf)
+            return
+
         fstring_start = node.children[0]
         fstring_end = node.children[-1]
         assert isinstance(fstring_start, Leaf)
@@ -516,7 +522,7 @@ def visit_fstring(self, node: Node) -> Iterator[Line]:
 
         assert quote == fstring_end.value
 
-        is_raw_fstring = 'r' in prefix or 'R' in prefix
+        is_raw_fstring = "r" in prefix or "R" in prefix
         middles = [leaf for leaf in node.leaves() if leaf.type == token.FSTRING_MIDDLE]
 
         if self.mode.string_normalization:
@@ -560,6 +566,12 @@ def __post_init__(self) -> None:
             self.visit_guard = partial(v, keywords=Ø, parens={"if"})
 
 
+def _fstring_to_string(node: Node) -> Leaf:
+    """Converts an fstring node back to a string node."""
+    string_without_prefix = str(node).removeprefix(node.prefix)
+    return Leaf(token.STRING, string_without_prefix, prefix=node.prefix)
+
+
 def _hugging_power_ops_line_to_string(
     line: Line,
     features: Collection[Feature],
diff --git a/src/black/strings.py b/src/black/strings.py
index 517be9b9400..a505ef67171 100644
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -249,7 +249,7 @@ def normalize_string_quotes(s: str) -> str:
 def normalize_fstring_quotes(
     quote: str,
     middles: List[Leaf],
-    is_raw_fstring: bool
+    is_raw_fstring: bool,
 ) -> Tuple[List[Leaf], str]:
     """Prefer double quotes but only if it doesn't cause more escaping.
 
diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index 9f75836610f..f7efbcaa954 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -88,6 +88,11 @@
 # %d
 # }"""
 
+raise ValueError(
+                "xxxxxxxxxxxIncorrect --line-ranges format, expect START-END, found"
+                f" {lines_str!r}"
+            )
+
 # output
 
 x = f"foo"
@@ -179,3 +184,9 @@
 # %m
 # %d
 # }"""
+
+raise ValueError(
+    "xxxxxxxxxxxIncorrect --line-ranges format, expect START-END, found" f" {
+        lines_str!r
+    }"
+)

From 150a4fee086dd2087268a41c58d1c24295c071e0 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 25 Mar 2024 21:35:09 +0530
Subject: [PATCH 55/77] fix whitespace getting removed after fstring colon

---
 src/blib2to3/pgen2/tokenize.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 64b547949c6..aa2e3db3daf 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -141,13 +141,13 @@ def _combinations(*l: str) -> Set[str]:
 Single3Lbrace = r"[^'\\{]*(?:(?:\\.|{{|'(?!''))[^'\\{]*)*{(?!{)"
 Double3Lbrace = r'[^"\\{]*(?:(?:\\.|{{|"(?!""))[^"\\{]*)*{(?!{)'
 
-# ! format specifier inside an fstring brace
-Bang = Whitespace + group("!")
+# ! format specifier inside an fstring brace, ensure it's not a `!=` token
+Bang = Whitespace + group("!") + r'(?!=)'
 bang = re.compile(Bang)
 Colon = Whitespace + group(":")
 colon = re.compile(Colon)
 
-FstringMiddleAfterColon = Whitespace + group(r".*?") + group("{", "}", "\n")
+FstringMiddleAfterColon = group(Whitespace + r".*?") + group("{", "}", "\n")
 fstring_middle_after_colon = re.compile(FstringMiddleAfterColon)
 
 # Because of leftmost-then-longest match semantics, be sure to put the
@@ -755,8 +755,6 @@ def generate_tokens(
                     pos = end
                     continue
 
-                # TODO: `=` is left, eg. f"{abc = }"
-
             pseudomatch = pseudoprog.match(line, pos)
             if pseudomatch:  # scan for tokens
                 start, end = pseudomatch.span(1)

From c4487cb50ff48758eb48810f7209e7d1c54c0be6 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 25 Mar 2024 23:11:19 +0530
Subject: [PATCH 56/77] remove unnecessary continue

---
 src/blib2to3/pgen2/tokenize.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index aa2e3db3daf..6e92b9ed22c 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -724,7 +724,6 @@ def generate_tokens(
                     # TODO: in a triple quoted string we should infact add the \n here
                     # formatspec += "\n"
                     pos = brace_end
-                    continue
 
                 yield (FSTRING_MIDDLE, formatspec, formatspec_start, (lnum, end), line)
                 formatspec = ""

From ece7452535ea5247e48e8313419985796a09e198 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 25 Mar 2024 23:14:01 +0530
Subject: [PATCH 57/77] don't use removeprefix

---
 src/black/linegen.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index ec00345e3b5..298306a38e4 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -569,7 +569,7 @@ def __post_init__(self) -> None:
 
 def _fstring_to_string(node: Node) -> Leaf:
     """Converts an fstring node back to a string node."""
-    string_without_prefix = str(node).removeprefix(node.prefix)
+    string_without_prefix = str(node)[len(node.prefix):]
     return Leaf(token.STRING, string_without_prefix, prefix=node.prefix)
 
 

From dfd345581c5c9630ac7768194ffbd82c8e987892 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 25 Mar 2024 23:16:14 +0530
Subject: [PATCH 58/77] formatting

---
 src/black/linegen.py           | 2 +-
 src/blib2to3/pgen2/tokenize.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index 298306a38e4..e388d32d440 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -569,7 +569,7 @@ def __post_init__(self) -> None:
 
 def _fstring_to_string(node: Node) -> Leaf:
     """Converts an fstring node back to a string node."""
-    string_without_prefix = str(node)[len(node.prefix):]
+    string_without_prefix = str(node)[len(node.prefix) :]
     return Leaf(token.STRING, string_without_prefix, prefix=node.prefix)
 
 
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 6e92b9ed22c..c20d7783937 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -142,7 +142,7 @@ def _combinations(*l: str) -> Set[str]:
 Double3Lbrace = r'[^"\\{]*(?:(?:\\.|{{|"(?!""))[^"\\{]*)*{(?!{)'
 
 # ! format specifier inside an fstring brace, ensure it's not a `!=` token
-Bang = Whitespace + group("!") + r'(?!=)'
+Bang = Whitespace + group("!") + r"(?!=)"
 bang = re.compile(Bang)
 Colon = Whitespace + group(":")
 colon = re.compile(Colon)

From a81bae3bdbbcb2eac85d75375095b302218b14f1 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 25 Mar 2024 23:27:41 +0530
Subject: [PATCH 59/77] add minimum version

---
 tests/data/cases/pep_701.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index f7efbcaa954..3aa3f258912 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -1,4 +1,4 @@
-# flags: --target-version=py312
+# flags: --minimum-version=3.12 --target-version=py312
 x = f"foo"
 x = f'foo'
 x = f"""foo"""

From 0435144da8586ecb061190a9ab0a5bc00b13b256 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Thu, 28 Mar 2024 01:31:00 +0530
Subject: [PATCH 60/77] fix the one failing test

---
 src/blib2to3/Grammar.txt       | 1 -
 src/blib2to3/pgen2/tokenize.py | 7 ++++---
 tests/data/cases/pep_701.py    | 6 ++++++
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/blib2to3/Grammar.txt b/src/blib2to3/Grammar.txt
index 9ceebbd3d81..0c8ac99daba 100644
--- a/src/blib2to3/Grammar.txt
+++ b/src/blib2to3/Grammar.txt
@@ -256,7 +256,6 @@ patterns: pattern (',' pattern)* [',']
 pattern: (expr|star_expr) ['as' expr]
 
 fstring: FSTRING_START fstring_middle* FSTRING_END
-# TODO making these FSTRING_MIDDLE makes them unformattable so maybe put a new token here?
 fstring_middle: fstring_replacement_field | FSTRING_MIDDLE
 fstring_replacement_field: '{' (yield_expr | testlist_star_expr) ['='] [ "!" NAME ] [ ':' fstring_format_spec* ] '}'
 fstring_format_spec: FSTRING_MIDDLE | fstring_replacement_field
diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index c20d7783937..caa76de3b16 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -665,7 +665,6 @@ def generate_tokens(
                 if endmatch:  # all on one line
                     start, end = endmatch.span(0)
                     token = line[start:end]
-                    # TODO: check if the token will ever have any whitespace around?
                     if token.endswith(('"""', "'''")):
                         middle_token, end_token = token[:-3], token[-3:]
                         middle_epos = end_spos = (lnum, end - 3)
@@ -707,8 +706,6 @@ def generate_tokens(
                     contline = line
                     break
 
-            # TODO: fstring_level > 0 is redundant in both cases here,
-            # remove it and ensure nothing breaks
             if inside_fstring_colon:
                 match = fstring_middle_after_colon.match(line, pos)
                 if match is None:
@@ -982,6 +979,10 @@ def generate_tokens(
                 else:
                     if parenlev == 0 and bracelev > 0 and initial == "}":
                         bracelev -= 1
+                        # if we're still inside fstrings, we're still part of the format spec
+                        if inside_fstring_braces:
+                            inside_fstring_colon = True
+                            formatspec_start = (lnum, pos)
                     elif initial in "([{":
                         parenlev += 1
                     elif initial in ")]}":
diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index 3aa3f258912..4839e907526 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -67,8 +67,11 @@
 rf'foo'
 rf'{foo}'
 
+f"{x:{y}d}"
+
 x = f"a{2+2:=^{x}}b"
 x = f"a{2+2:=^{foo(x+y**2):something else}}b"
+x = f"a{2+2:=^{foo(x+y**2):something else}one more}b"
 f'{(abc:=10)}'
 
 f"This is a really long string, but just make sure that you reflow fstrings {
@@ -164,8 +167,11 @@
 rf"foo"
 rf"{foo}"
 
+f"{x:{y}d}"
+
 x = f"a{2 + 2:=^{x}}b"
 x = f"a{2 + 2:=^{foo(x + y**2):something else}}b"
+x = f"a{2 + 2:=^{foo(x + y**2):something else}one more}b"
 f"{(abc := 10)}"
 
 f"This is a really long string, but just make sure that you reflow fstrings {2 + 2:d}"

From 99f8eb7468bd1b1fa0d87362bc01f9da559f947d Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Thu, 28 Mar 2024 19:38:36 +0530
Subject: [PATCH 61/77] fix couple more bugs

---
 src/blib2to3/pgen2/tokenize.py | 35 +++++++++++++++++++++++++++++-----
 tests/data/cases/pep_701.py    |  8 ++++++++
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index caa76de3b16..83885fa9cad 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -469,6 +469,15 @@ def is_fstring_start(token: str) -> bool:
     return builtins.any(token.startswith(prefix) for prefix in fstring_prefix)
 
 
+def _split_fstring_start_and_middle(token: str) -> Tuple[str, str]:
+    for prefix in fstring_prefix:
+        _, prefix, rest = token.partition(prefix)
+        if prefix != "":
+            return prefix, rest
+
+    raise ValueError(f"Token {token!r} is not a valid f-string start")
+
+
 def generate_tokens(
     readline: Callable[[], str], grammar: Optional[Grammar] = None
 ) -> Iterator[GoodTokenInfo]:
@@ -531,11 +540,25 @@ def generate_tokens(
                 spos = strstart
                 epos = (lnum, end)
                 tokenline = contline + line
-                if fstring_level == 0:
+                if fstring_level == 0 and not is_fstring_start(token):
                     yield (STRING, token, spos, epos, tokenline)
                     endprog_stack.pop()
                     parenlev = parenlev_stack.pop()
                 else:
+                    if is_fstring_start(token):
+                        fstring_level += 1
+                        fstring_start, token = _split_fstring_start_and_middle(token)
+                        fstring_start_epos = (lnum, spos[1] + len(fstring_start))
+                        yield (
+                            FSTRING_START,
+                            fstring_start,
+                            spos,
+                            fstring_start_epos,
+                            tokenline,
+                        )
+                        # increase spos to the end of the fstring start
+                        spos = fstring_start_epos
+
                     if token.endswith("{"):
                         fstring_middle, lbrace = token[:-1], token[-1]
                         fstring_middle_epos = lbrace_spos = (lnum, end - 1)
@@ -549,9 +572,12 @@ def generate_tokens(
                         yield (LBRACE, lbrace, lbrace_spos, epos, line)
                         inside_fstring_braces = True
                     else:
-                        # TODO: -3 maybe not guaranteed, could be \ separated single line string
-                        fstring_middle, fstring_end = token[:-3], token[-3:]
-                        fstring_middle_epos = end_spos = (lnum, end - 3)
+                        if token.endswith(('"""', "'''")):
+                            fstring_middle, fstring_end = token[:-3], token[-3:]
+                            fstring_middle_epos = end_spos = (lnum, end - 3)
+                        else:
+                            fstring_middle, fstring_end = token[:-1], token[-1]
+                            fstring_middle_epos = end_spos = (lnum, end - 1)
                         yield (
                             FSTRING_MIDDLE,
                             fstring_middle,
@@ -792,7 +818,6 @@ def generate_tokens(
                         if stashed:
                             yield stashed
                             stashed = None
-                        # TODO: move this logic to a function
                         if not is_fstring_start(token):
                             pos = endmatch.end(0)
                             token = line[start:pos]
diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index 4839e907526..115f08d4a48 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -96,6 +96,9 @@
                 f" {lines_str!r}"
             )
 
+f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \
+got {escape}"
+
 # output
 
 x = f"foo"
@@ -196,3 +199,8 @@
         lines_str!r
     }"
 )
+
+f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \
+got {
+    escape
+}"

From 3e56204db7196cc5f9f2115dd0a2f0e1e74be658 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Wed, 3 Apr 2024 02:10:06 +0530
Subject: [PATCH 62/77] don't format fstrings at all

---
 src/black/__init__.py       |  7 ++---
 src/black/linegen.py        | 44 +++++++++++++--------------
 tests/data/cases/pep_701.py | 60 +++++++++++++++++--------------------
 3 files changed, 53 insertions(+), 58 deletions(-)

diff --git a/src/black/__init__.py b/src/black/__init__.py
index fb614ba87a1..6ba49d5ef2d 100644
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -1244,9 +1244,9 @@ def _format_str_once(
         future_imports = get_future_imports(src_node)
         versions = detect_target_versions(src_node, future_imports=future_imports)
 
-    line_generator_features = {
+    context_manager_features = {
         feature
-        for feature in {Feature.PARENTHESIZED_CONTEXT_MANAGERS, Feature.FSTRING_PARSING}
+        for feature in {Feature.PARENTHESIZED_CONTEXT_MANAGERS}
         if supports_feature(versions, feature)
     }
     normalize_fmt_off(src_node, mode, lines)
@@ -1254,14 +1254,13 @@ def _format_str_once(
         # This should be called after normalize_fmt_off.
         convert_unchanged_lines(src_node, lines)
 
-    line_generator = LineGenerator(mode=mode, features=line_generator_features)
+    line_generator = LineGenerator(mode=mode, features=context_manager_features)
     elt = EmptyLineTracker(mode=mode)
     split_line_features = {
         feature
         for feature in {
             Feature.TRAILING_COMMA_IN_CALL,
             Feature.TRAILING_COMMA_IN_DEF,
-            Feature.FSTRING_PARSING,
         }
         if supports_feature(versions, feature)
     }
diff --git a/src/black/linegen.py b/src/black/linegen.py
index e388d32d440..1caa59207ca 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -503,36 +503,36 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
         yield from self.visit_default(leaf)
 
     def visit_fstring(self, node: Node) -> Iterator[Line]:
-        if Feature.FSTRING_PARSING not in self.features:
-            string_leaf = _fstring_to_string(node)
-            node.replace(string_leaf)
-            yield from self.visit_default(string_leaf)
-            return
+        # currently we don't want to format and split f-strings at all.
+        string_leaf = _fstring_to_string(node)
+        node.replace(string_leaf)
+        yield from self.visit_default(string_leaf)
 
-        fstring_start = node.children[0]
-        fstring_end = node.children[-1]
-        assert isinstance(fstring_start, Leaf)
-        assert isinstance(fstring_end, Leaf)
+        # TODO: Uncomment Implementation to format f-string children
+        # fstring_start = node.children[0]
+        # fstring_end = node.children[-1]
+        # assert isinstance(fstring_start, Leaf)
+        # assert isinstance(fstring_end, Leaf)
 
-        quote_char = fstring_end.value[0]
-        quote_idx = fstring_start.value.index(quote_char)
-        prefix, quote = fstring_start.value[:quote_idx], fstring_start.value[quote_idx:]
+        # quote_char = fstring_end.value[0]
+        # quote_idx = fstring_start.value.index(quote_char)
+        # prefix, quote = fstring_start.value[:quote_idx], fstring_start.value[quote_idx:]
 
-        if not is_docstring(node, self.mode):
-            prefix = normalize_string_prefix(prefix)
+        # if not is_docstring(node, self.mode):
+        #     prefix = normalize_string_prefix(prefix)
 
-        assert quote == fstring_end.value
+        # assert quote == fstring_end.value
 
-        is_raw_fstring = "r" in prefix or "R" in prefix
-        middles = [leaf for leaf in node.leaves() if leaf.type == token.FSTRING_MIDDLE]
+        # is_raw_fstring = "r" in prefix or "R" in prefix
+        # middles = [leaf for leaf in node.leaves() if leaf.type == token.FSTRING_MIDDLE]
 
-        if self.mode.string_normalization:
-            middles, quote = normalize_fstring_quotes(quote, middles, is_raw_fstring)
+        # if self.mode.string_normalization:
+        #     middles, quote = normalize_fstring_quotes(quote, middles, is_raw_fstring)
 
-        fstring_start.value = prefix + quote
-        fstring_end.value = quote
+        # fstring_start.value = prefix + quote
+        # fstring_end.value = quote
 
-        yield from self.visit_default(node)
+        # yield from self.visit_default(node)
 
     def __post_init__(self) -> None:
         """You are in a twisty little maze of passages."""
diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index 115f08d4a48..927e0c4feca 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -114,13 +114,15 @@
 # edge case: FSTRING_MIDDLE containing only whitespace should not be stripped
 x = f"{a} {b}"
 
-x = f"foo {2 + 2} bar baz"
+x = f"foo {
+    2 + 2
+} bar baz"
 
 x = f"foo {{ {"a  {2 + 2}  b"}bar {{ baz"
-x = f"foo {{ {f"a  {2 + 2}  b"}bar {{ baz"
+x = f"foo {{ {f'a  {2 + 2}  b'}bar {{ baz"
 x = f"foo {{ {f"a  {2 + 2}  b"}bar {{ baz"
 
-x = f"foo {{ {f"a  {f"a  {2 + 2}  b"}  b"}bar {{ baz"
+x = f"foo {{ {f'a  {f"a  {2 + 2}  b"}  b'}bar {{ baz"
 x = f"foo {{ {f"a  {f"a  {2 + 2}  b"}  b"}bar {{ baz"
 
 x = """foo {{ {2 + 2}bar
@@ -129,7 +131,9 @@
 
 x = f"""foo {{ {2 + 2}bar {{ baz"""
 
-x = f"""foo {{ {2 + 2}bar {{ baz"""
+x = f"""foo {{ {
+    2 + 2
+}bar {{ baz"""
 
 
 x = f"""foo {{ {
@@ -138,49 +142,44 @@
 baz"""
 
 x = f"""foo {{ a
-    foo {
-    2 + 2
-}bar {{ baz
+    foo {2 + 2}bar {{ baz
 
     x = f"foo {{ {
-    2 + 2  # comment
-}bar"
+        2 + 2  # comment
+    }bar"
 
     {{ baz
 
     }} buzz
 
-    {
-    print("abc" + "def")
-}
+    {print("abc" + "def"
+)}
 abc"""
 
 # edge case: end triple quotes at index zero
-f"""foo {
-    2 + 2
-} bar
+f"""foo {2+2} bar
 """
 
-f" ' {f"'"} ' "
-f' " {f'"'} " '
+f' \' {f"'"} \' '
+f" \" {f'"'} \" "
 
-x = f"a{2 + 2:=^72}b"
-x = f"a{2 + 2:x}b"
+x = f"a{2+2:=^72}b"
+x = f"a{2+2:x}b"
 
 rf"foo"
 rf"{foo}"
 
 f"{x:{y}d}"
 
-x = f"a{2 + 2:=^{x}}b"
-x = f"a{2 + 2:=^{foo(x + y**2):something else}}b"
-x = f"a{2 + 2:=^{foo(x + y**2):something else}one more}b"
-f"{(abc := 10)}"
+x = f"a{2+2:=^{x}}b"
+x = f"a{2+2:=^{foo(x+y**2):something else}}b"
+x = f"a{2+2:=^{foo(x+y**2):something else}one more}b"
+f"{(abc:=10)}"
 
-f"This is a really long string, but just make sure that you reflow fstrings {2 + 2:d}"
-f"This is a really long string, but just make sure that you reflow fstrings correctly {
-    2 + 2:d
+f"This is a really long string, but just make sure that you reflow fstrings {
+    2+2:d
 }"
+f"This is a really long string, but just make sure that you reflow fstrings correctly {2+2:d}"
 
 # TODO: Edge case: if the fstring replacement ends with a `=` it should not be touched
 # f"{2+2=}"
@@ -195,12 +194,9 @@
 # }"""
 
 raise ValueError(
-    "xxxxxxxxxxxIncorrect --line-ranges format, expect START-END, found" f" {
-        lines_str!r
-    }"
+    "xxxxxxxxxxxIncorrect --line-ranges format, expect START-END, found"
+    f" {lines_str!r}"
 )
 
 f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \
-got {
-    escape
-}"
+got {escape}"

From 9495f5e2eeb7582dc03d8a78d04e1be4b73c78e8 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Wed, 3 Apr 2024 02:13:10 +0530
Subject: [PATCH 63/77] address comments

---
 src/black/linegen.py         | 1 -
 src/blib2to3/pgen2/driver.py | 2 +-
 tests/util.py                | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index 1caa59207ca..55102c02019 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -65,7 +65,6 @@
 from black.strings import (
     fix_docstring,
     get_string_prefix,
-    normalize_fstring_quotes,
     normalize_string_prefix,
     normalize_string_quotes,
     normalize_unicode_escape_sequences,
diff --git a/src/blib2to3/pgen2/driver.py b/src/blib2to3/pgen2/driver.py
index 01b9dd00aa3..71a147cbcd8 100644
--- a/src/blib2to3/pgen2/driver.py
+++ b/src/blib2to3/pgen2/driver.py
@@ -167,7 +167,7 @@ def parse_tokens(self, tokens: Iterable[GoodTokenInfo], debug: bool = False) ->
             if type in {token.INDENT, token.DEDENT}:
                 prefix = _prefix
             lineno, column = end
-            # FSTRING_MIDDLE is the only character that can end with a newline, and
+            # FSTRING_MIDDLE is the only token that can end with a newline, and
             # `end` will point to the next line. For that case, don't increment lineno.
             if value.endswith("\n") and type != token.FSTRING_MIDDLE:
                 lineno += 1
diff --git a/tests/util.py b/tests/util.py
index 3a6a0f9bd98..d5425f1f743 100644
--- a/tests/util.py
+++ b/tests/util.py
@@ -237,7 +237,7 @@ def get_flags_parser() -> argparse.ArgumentParser:
         "--target-version",
         action="append",
         type=lambda val: TargetVersion[val.upper()],
-        default=[],
+        default=(),
     )
     parser.add_argument("--line-length", default=DEFAULT_LINE_LENGTH, type=int)
     parser.add_argument(

From cf7648296c941be770725e0313f6a732b3556616 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Wed, 3 Apr 2024 02:17:00 +0530
Subject: [PATCH 64/77] flake8

---
 src/black/linegen.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index 55102c02019..2f2ae431818 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -515,7 +515,10 @@ def visit_fstring(self, node: Node) -> Iterator[Line]:
 
         # quote_char = fstring_end.value[0]
         # quote_idx = fstring_start.value.index(quote_char)
-        # prefix, quote = fstring_start.value[:quote_idx], fstring_start.value[quote_idx:]
+        # prefix, quote = (
+        #     fstring_start.value[:quote_idx],
+        #     fstring_start.value[quote_idx:]
+        # )
 
         # if not is_docstring(node, self.mode):
         #     prefix = normalize_string_prefix(prefix)
@@ -523,7 +526,11 @@ def visit_fstring(self, node: Node) -> Iterator[Line]:
         # assert quote == fstring_end.value
 
         # is_raw_fstring = "r" in prefix or "R" in prefix
-        # middles = [leaf for leaf in node.leaves() if leaf.type == token.FSTRING_MIDDLE]
+        # middles = [
+        #     leaf
+        #     for leaf in node.leaves()
+        #     if leaf.type == token.FSTRING_MIDDLE
+        # ]
 
         # if self.mode.string_normalization:
         #     middles, quote = normalize_fstring_quotes(quote, middles, is_raw_fstring)

From bbff3dee251bfb7f3b71a14d61dd27509d1e78ab Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Wed, 3 Apr 2024 02:39:47 +0530
Subject: [PATCH 65/77] fix failing test

---
 tests/data/cases/pep_701.py | 2 +-
 tests/util.py               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index 927e0c4feca..b7660d13d8c 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -1,4 +1,4 @@
-# flags: --minimum-version=3.12 --target-version=py312
+# flags: --minimum-version=3.12
 x = f"foo"
 x = f'foo'
 x = f"""foo"""
diff --git a/tests/util.py b/tests/util.py
index d5425f1f743..3a6a0f9bd98 100644
--- a/tests/util.py
+++ b/tests/util.py
@@ -237,7 +237,7 @@ def get_flags_parser() -> argparse.ArgumentParser:
         "--target-version",
         action="append",
         type=lambda val: TargetVersion[val.upper()],
-        default=(),
+        default=[],
     )
     parser.add_argument("--line-length", default=DEFAULT_LINE_LENGTH, type=int)
     parser.add_argument(

From 0fef83cff324c96019712fa681e4e2c2e09971bd Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Wed, 3 Apr 2024 03:02:22 +0530
Subject: [PATCH 66/77] undo default change

---
 tests/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/util.py b/tests/util.py
index 3a6a0f9bd98..d5425f1f743 100644
--- a/tests/util.py
+++ b/tests/util.py
@@ -237,7 +237,7 @@ def get_flags_parser() -> argparse.ArgumentParser:
         "--target-version",
         action="append",
         type=lambda val: TargetVersion[val.upper()],
-        default=[],
+        default=(),
     )
     parser.add_argument("--line-length", default=DEFAULT_LINE_LENGTH, type=int)
     parser.add_argument(

From c5703609e784b675949ecaf34c35066cf9b80d4b Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Wed, 3 Apr 2024 03:03:32 +0530
Subject: [PATCH 67/77] remove todo

---
 src/black/strings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/black/strings.py b/src/black/strings.py
index 031f3c31f2c..69a8c8002e9 100644
--- a/src/black/strings.py
+++ b/src/black/strings.py
@@ -210,7 +210,7 @@ def normalize_string_quotes(s: str) -> str:
             s = f"{prefix}{orig_quote}{body}{orig_quote}"
         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
-    # TODO: can probably be removed
+
     if "f" in prefix.casefold():
         matches = re.findall(
             r"""

From 2a697c8d815eb1377246d160631b4cbf22f3fb3d Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Fri, 5 Apr 2024 04:47:03 +0530
Subject: [PATCH 68/77] fix: \N{} case

---
 src/blib2to3/pgen2/tokenize.py | 16 +++++++++-------
 tests/data/cases/pep_701.py    |  3 +++
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 83885fa9cad..5e1676e2bc6 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -135,11 +135,13 @@ def _combinations(*l: str) -> Set[str]:
     _fstringlitprefix + "'''",
 )
 
-SingleLbrace = r"[^'\\{]*(?:(?:\\.|{{)[^'\\{]*)*{(?!{)"
-DoubleLbrace = r'[^"\\{]*(?:(?:\\.|{{)[^"\\{]*)*{(?!{)'
+# beginning of a single quoted f-string. must not end with `{{` or `\N{`
+SingleLbrace = r"[^'\\{]*(?:(?:\\N{|\\.|{{)[^'\\{]*)*(?<!\\N){(?!{)"
+DoubleLbrace = r'[^"\\{]*(?:(?:\\N{|\\.|{{)[^"\\{]*)*(?<!\\N){(?!{)'
 
-Single3Lbrace = r"[^'\\{]*(?:(?:\\.|{{|'(?!''))[^'\\{]*)*{(?!{)"
-Double3Lbrace = r'[^"\\{]*(?:(?:\\.|{{|"(?!""))[^"\\{]*)*{(?!{)'
+# beginning of a triple quoted f-string. must not end with `{{` or `\N{`
+Single3Lbrace = r"[^'\\{]*(?:(?:\\N{|\\.|{{|'(?!''))[^'\\{]*)*(?<!\\N){(?!{)"
+Double3Lbrace = r'[^"\\{]*(?:(?:\\N{|\\.|{{|"(?!""))[^"\\{]*)*(?<!\\N){(?!{)'
 
 # ! format specifier inside an fstring brace, ensure it's not a `!=` token
 Bang = Whitespace + group("!") + r"(?!=)"
@@ -172,9 +174,9 @@ def _combinations(*l: str) -> Set[str]:
 _string_middle_single = r"[^\n'\\]*(?:\\.[^\n'\\]*)*"
 _string_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*'
 
-# FSTRING_MIDDLE and LBRACE, inside a single quoted fstring
-_fstring_middle_single = r"[^\n'\\{]*(?:(?:\\.|{{)[^\n'\\{]*)*({)(?!{)"
-_fstring_middle_double = r'[^\n"\\{]*(?:(?:\\.|{{)[^\n"\\{]*)*({)(?!{)'
+# FSTRING_MIDDLE and LBRACE, must not end with a `{{` or `\N{`
+_fstring_middle_single = r"[^\n'\\{]*(?:(?:\\N{|\\.|{{)[^\n'\\{]*)*(?<!\\N)({)(?!{)"
+_fstring_middle_double = r'[^\n"\\{]*(?:(?:\\N{|\\.|{{)[^\n"\\{]*)*(?<!\\N)({)(?!{)'
 
 # First (or only) line of ' or " string.
 ContStr = group(
diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index b7660d13d8c..47af44b929e 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -99,6 +99,7 @@
 f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \
 got {escape}"
 
+x = f'\N{GREEK CAPITAL LETTER DELTA}'
 # output
 
 x = f"foo"
@@ -200,3 +201,5 @@
 
 f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \
 got {escape}"
+
+x = f"\N{GREEK CAPITAL LETTER DELTA}"
\ No newline at end of file

From a5f943b6b6b8b396708e9449fe9602e665971071 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 8 Apr 2024 23:44:38 +0530
Subject: [PATCH 69/77] make test a little better

---
 tests/data/cases/pep_701.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index 47af44b929e..0b6995bfd7e 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -99,7 +99,7 @@
 f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \
 got {escape}"
 
-x = f'\N{GREEK CAPITAL LETTER DELTA}'
+x = f'\N{GREEK CAPITAL LETTER DELTA} \N{SNOWMAN} {x}'
 # output
 
 x = f"foo"
@@ -202,4 +202,4 @@
 f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \
 got {escape}"
 
-x = f"\N{GREEK CAPITAL LETTER DELTA}"
\ No newline at end of file
+x = f"\N{GREEK CAPITAL LETTER DELTA} \N{SNOWMAN} {x}"

From 1ab815b6dbe577242fcec4001ccb521182d76a4d Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Sun, 14 Apr 2024 16:16:08 +0530
Subject: [PATCH 70/77] tweak regex to fix edge cases

---
 src/blib2to3/pgen2/tokenize.py |  8 ++++----
 tests/data/cases/pep_701.py    | 17 +++++++++--------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 5e1676e2bc6..565f2cf2835 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -140,8 +140,8 @@ def _combinations(*l: str) -> Set[str]:
 DoubleLbrace = r'[^"\\{]*(?:(?:\\N{|\\.|{{)[^"\\{]*)*(?<!\\N){(?!{)'
 
 # beginning of a triple quoted f-string. must not end with `{{` or `\N{`
-Single3Lbrace = r"[^'\\{]*(?:(?:\\N{|\\.|{{|'(?!''))[^'\\{]*)*(?<!\\N){(?!{)"
-Double3Lbrace = r'[^"\\{]*(?:(?:\\N{|\\.|{{|"(?!""))[^"\\{]*)*(?<!\\N){(?!{)'
+Single3Lbrace = r"[^'{]*(?:(?:\\N{|\\[^{]|{{|'(?!''))[^'{]*)*(?<!\\N){(?!{)"
+Double3Lbrace = r'[^"{]*(?:(?:\\N{|\\[^{]|{{|"(?!""))[^"{]*)*(?<!\\N){(?!{)'
 
 # ! format specifier inside an fstring brace, ensure it's not a `!=` token
 Bang = Whitespace + group("!") + r"(?!=)"
@@ -175,8 +175,8 @@ def _combinations(*l: str) -> Set[str]:
 _string_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*'
 
 # FSTRING_MIDDLE and LBRACE, must not end with a `{{` or `\N{`
-_fstring_middle_single = r"[^\n'\\{]*(?:(?:\\N{|\\.|{{)[^\n'\\{]*)*(?<!\\N)({)(?!{)"
-_fstring_middle_double = r'[^\n"\\{]*(?:(?:\\N{|\\.|{{)[^\n"\\{]*)*(?<!\\N)({)(?!{)'
+_fstring_middle_single = r"[^\n'{]*(?:(?:\\N{|\\[^{]|{{)[^\n'{]*)*(?<!\\N)({)(?!{)"
+_fstring_middle_double = r'[^\n"{]*(?:(?:\\N{|\\[^{]|{{)[^\n"{]*)*(?<!\\N)({)(?!{)'
 
 # First (or only) line of ' or " string.
 ContStr = group(
diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index 0b6995bfd7e..a2ce38bd25b 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -79,10 +79,9 @@
 }"
 f"This is a really long string, but just make sure that you reflow fstrings correctly {2+2:d}"
 
-# TODO: Edge case: if the fstring replacement ends with a `=` it should not be touched
-# f"{2+2=}"
-# f"{2+2    =    }"
-# f"{     2      +     2    =    }"
+f"{2+2=}"
+f"{2+2    =    }"
+f"{     2      +     2    =    }"
 
 # TODO:
 # f"""foo {
@@ -100,6 +99,8 @@
 got {escape}"
 
 x = f'\N{GREEK CAPITAL LETTER DELTA} \N{SNOWMAN} {x}'
+fr'\{{\}}'
+
 # output
 
 x = f"foo"
@@ -182,10 +183,9 @@
 }"
 f"This is a really long string, but just make sure that you reflow fstrings correctly {2+2:d}"
 
-# TODO: Edge case: if the fstring replacement ends with a `=` it should not be touched
-# f"{2+2=}"
-# f"{2+2    =    }"
-# f"{     2      +     2    =    }"
+f"{2+2=}"
+f"{2+2    =    }"
+f"{     2      +     2    =    }"
 
 # TODO:
 # f"""foo {
@@ -203,3 +203,4 @@
 got {escape}"
 
 x = f"\N{GREEK CAPITAL LETTER DELTA} \N{SNOWMAN} {x}"
+rf"\{{\}}"

From 019df7b4620d2a54053eaec5e8c25acf4a084c38 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 22 Apr 2024 11:15:05 +0530
Subject: [PATCH 71/77] fix edge case with nested multiline strings

---
 src/blib2to3/pgen2/tokenize.py |  2 +-
 tests/data/cases/pep_701.py    | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index 565f2cf2835..d8690f8bcdd 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -530,7 +530,7 @@ def generate_tokens(
         lnum += 1
         pos, max = 0, len(line)
 
-        if contstr and not inside_fstring_braces:  # continued string
+        if contstr:  # continued string
             assert contline is not None
             if not line:
                 raise TokenError("EOF in multi-line string", strstart)
diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index a2ce38bd25b..14e0e0abcdc 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -101,6 +101,11 @@
 x = f'\N{GREEK CAPITAL LETTER DELTA} \N{SNOWMAN} {x}'
 fr'\{{\}}'
 
+f"""
+    WITH {f'''
+    {1}_cte AS ()'''}
+"""
+
 # output
 
 x = f"foo"
@@ -204,3 +209,8 @@
 
 x = f"\N{GREEK CAPITAL LETTER DELTA} \N{SNOWMAN} {x}"
 rf"\{{\}}"
+
+f"""
+    WITH {f'''
+    {1}_cte AS ()'''}
+"""
\ No newline at end of file

From a64939d4adf50d1edbd1f579fdf0952b2b9545cf Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 22 Apr 2024 11:15:55 +0530
Subject: [PATCH 72/77] whitespace

---
 tests/data/cases/pep_701.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index 14e0e0abcdc..ccd3ec40447 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -213,4 +213,4 @@
 f"""
     WITH {f'''
     {1}_cte AS ()'''}
-"""
\ No newline at end of file
+"""

From 7df45fb2b31adc027f02c3a164ba6554066325ee Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 22 Apr 2024 16:24:25 +0530
Subject: [PATCH 73/77] fix multiline formatspec todo

---
 src/blib2to3/pgen2/tokenize.py |  8 ++++----
 tests/data/cases/pep_701.py    | 22 ++++++++++------------
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py
index d8690f8bcdd..d6b684ab1aa 100644
--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@@ -149,7 +149,7 @@ def _combinations(*l: str) -> Set[str]:
 Colon = Whitespace + group(":")
 colon = re.compile(Colon)
 
-FstringMiddleAfterColon = group(Whitespace + r".*?") + group("{", "}", "\n")
+FstringMiddleAfterColon = group(Whitespace + r".*?") + group("{", "}")
 fstring_middle_after_colon = re.compile(FstringMiddleAfterColon)
 
 # Because of leftmost-then-longest match semantics, be sure to put the
@@ -737,7 +737,9 @@ def generate_tokens(
             if inside_fstring_colon:
                 match = fstring_middle_after_colon.match(line, pos)
                 if match is None:
-                    raise TokenError("unterminated f-string literal", (lnum, pos))
+                    formatspec += line[pos:]
+                    pos = max
+                    continue
 
                 start, end = match.span(1)
                 token = line[start:end]
@@ -746,8 +748,6 @@ def generate_tokens(
                 brace_start, brace_end = match.span(2)
                 brace_or_nl = line[brace_start:brace_end]
                 if brace_or_nl == "\n":
-                    # TODO: in a triple quoted string we should infact add the \n here
-                    # formatspec += "\n"
                     pos = brace_end
 
                 yield (FSTRING_MIDDLE, formatspec, formatspec_start, (lnum, end), line)
diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index ccd3ec40447..bfd2a7ad7b7 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -83,12 +83,11 @@
 f"{2+2    =    }"
 f"{     2      +     2    =    }"
 
-# TODO:
-# f"""foo {
-#     datetime.datetime.now():%Y
-# %m
-# %d
-# }"""
+f"""foo {
+    datetime.datetime.now():%Y
+%m
+%d
+}"""
 
 raise ValueError(
                 "xxxxxxxxxxxIncorrect --line-ranges format, expect START-END, found"
@@ -192,12 +191,11 @@
 f"{2+2    =    }"
 f"{     2      +     2    =    }"
 
-# TODO:
-# f"""foo {
-#     datetime.datetime.now():%Y
-# %m
-# %d
-# }"""
+f"""foo {
+    datetime.datetime.now():%Y
+%m
+%d
+}"""
 
 raise ValueError(
     "xxxxxxxxxxxIncorrect --line-ranges format, expect START-END, found"

From 36e04d2e5e1cac0e516aed5955d971cb1a03a842 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 22 Apr 2024 16:25:44 +0530
Subject: [PATCH 74/77] add another test case

---
 tests/data/cases/pep_701.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/data/cases/pep_701.py b/tests/data/cases/pep_701.py
index bfd2a7ad7b7..c5bc48e95f2 100644
--- a/tests/data/cases/pep_701.py
+++ b/tests/data/cases/pep_701.py
@@ -89,6 +89,11 @@
 %d
 }"""
 
+f"{
+X
+!r
+}"
+
 raise ValueError(
                 "xxxxxxxxxxxIncorrect --line-ranges format, expect START-END, found"
                 f" {lines_str!r}"
@@ -197,6 +202,11 @@
 %d
 }"""
 
+f"{
+X
+!r
+}"
+
 raise ValueError(
     "xxxxxxxxxxxIncorrect --line-ranges format, expect START-END, found"
     f" {lines_str!r}"

From eb05cd4cb250fc22657ee47d0ed1c3ae2fa714d2 Mon Sep 17 00:00:00 2001
From: Tushar Sadhwani <tushar.sadhwani000@gmail.com>
Date: Mon, 22 Apr 2024 16:51:45 +0530
Subject: [PATCH 75/77] Revert "Remove node-specific logic from visit_default
 (#4321)"

This reverts commit 7134754ef45078b032039ad858bdaaef146233b2.
---
 src/black/linegen.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index 64db7b6208c..2f2ae431818 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -152,6 +152,11 @@ def visit_default(self, node: LN) -> Iterator[Line]:
 
             if any_open_brackets:
                 node.prefix = ""
+            if self.mode.string_normalization and node.type == token.STRING:
+                node.value = normalize_string_prefix(node.value)
+                node.value = normalize_string_quotes(node.value)
+            if node.type == token.NUMBER:
+                normalize_numeric_literal(node)
             if node.type not in WHITESPACE:
                 self.current_line.append(node)
         yield from super().visit_default(node)
@@ -415,11 +420,12 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
             # indentation of those changes the AST representation of the code.
             if self.mode.string_normalization:
                 docstring = normalize_string_prefix(leaf.value)
-                # We handle string normalization at the end of this method, but since
-                # what we do right now acts differently depending on quote style (ex.
+                # visit_default() does handle string normalization for us, but
+                # since this method acts differently depending on quote style (ex.
                 # see padding logic below), there's a possibility for unstable
-                # formatting. To avoid a situation where this function formats a
-                # docstring differently on the second pass, normalize it early.
+                # formatting as visit_default() is called *after*. To avoid a
+                # situation where this function formats a docstring differently on
+                # the second pass, normalize it early.
                 docstring = normalize_string_quotes(docstring)
             else:
                 docstring = leaf.value
@@ -493,13 +499,6 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
             else:
                 leaf.value = prefix + quote + docstring + quote
 
-        if self.mode.string_normalization and leaf.type == token.STRING:
-            leaf.value = normalize_string_prefix(leaf.value)
-            leaf.value = normalize_string_quotes(leaf.value)
-        yield from self.visit_default(leaf)
-
-    def visit_NUMBER(self, leaf: Leaf) -> Iterator[Line]:
-        normalize_numeric_literal(leaf)
         yield from self.visit_default(leaf)
 
     def visit_fstring(self, node: Node) -> Iterator[Line]:

From 5d727ec86639c553187c9a0b976aafddbd00abc1 Mon Sep 17 00:00:00 2001
From: Jelle Zijlstra <jelle.zijlstra@gmail.com>
Date: Mon, 22 Apr 2024 07:59:07 -0700
Subject: [PATCH 76/77] Revert "Revert "Remove node-specific logic from
 visit_default (#4321)""

This reverts commit eb05cd4cb250fc22657ee47d0ed1c3ae2fa714d2.
---
 src/black/linegen.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index 2f2ae431818..64db7b6208c 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -152,11 +152,6 @@ def visit_default(self, node: LN) -> Iterator[Line]:
 
             if any_open_brackets:
                 node.prefix = ""
-            if self.mode.string_normalization and node.type == token.STRING:
-                node.value = normalize_string_prefix(node.value)
-                node.value = normalize_string_quotes(node.value)
-            if node.type == token.NUMBER:
-                normalize_numeric_literal(node)
             if node.type not in WHITESPACE:
                 self.current_line.append(node)
         yield from super().visit_default(node)
@@ -420,12 +415,11 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
             # indentation of those changes the AST representation of the code.
             if self.mode.string_normalization:
                 docstring = normalize_string_prefix(leaf.value)
-                # visit_default() does handle string normalization for us, but
-                # since this method acts differently depending on quote style (ex.
+                # We handle string normalization at the end of this method, but since
+                # what we do right now acts differently depending on quote style (ex.
                 # see padding logic below), there's a possibility for unstable
-                # formatting as visit_default() is called *after*. To avoid a
-                # situation where this function formats a docstring differently on
-                # the second pass, normalize it early.
+                # formatting. To avoid a situation where this function formats a
+                # docstring differently on the second pass, normalize it early.
                 docstring = normalize_string_quotes(docstring)
             else:
                 docstring = leaf.value
@@ -499,6 +493,13 @@ def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
             else:
                 leaf.value = prefix + quote + docstring + quote
 
+        if self.mode.string_normalization and leaf.type == token.STRING:
+            leaf.value = normalize_string_prefix(leaf.value)
+            leaf.value = normalize_string_quotes(leaf.value)
+        yield from self.visit_default(leaf)
+
+    def visit_NUMBER(self, leaf: Leaf) -> Iterator[Line]:
+        normalize_numeric_literal(leaf)
         yield from self.visit_default(leaf)
 
     def visit_fstring(self, node: Node) -> Iterator[Line]:

From ab2f43c51f9385188ae952e5354c1d8955a9b8a0 Mon Sep 17 00:00:00 2001
From: Jelle Zijlstra <jelle.zijlstra@gmail.com>
Date: Mon, 22 Apr 2024 08:00:41 -0700
Subject: [PATCH 77/77] fix

---
 src/black/linegen.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/black/linegen.py b/src/black/linegen.py
index 64db7b6208c..4b29a049dba 100644
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@@ -506,7 +506,7 @@ def visit_fstring(self, node: Node) -> Iterator[Line]:
         # currently we don't want to format and split f-strings at all.
         string_leaf = _fstring_to_string(node)
         node.replace(string_leaf)
-        yield from self.visit_default(string_leaf)
+        yield from self.visit_STRING(string_leaf)
 
         # TODO: Uncomment Implementation to format f-string children
         # fstring_start = node.children[0]