Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions Doc/library/tokenize.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ type can be determined by checking the ``exact_type`` property on the
**undefined** when providing invalid Python code and it can change at any
point.

Tokenizing Input
Tokenizing input
----------------

The primary entry point is a :term:`generator`:
Expand Down Expand Up @@ -146,7 +146,7 @@ function it uses to do this is available:

.. _tokenize-cli:

Command-Line Usage
Command-line usage
------------------

.. versionadded:: 3.3
Expand All @@ -173,8 +173,12 @@ The following options are accepted:
If :file:`filename.py` is specified its contents are tokenized to stdout.
Otherwise, tokenization is performed on stdin.

.. versionadded:: next
Output is in color by default and can be
:ref:`controlled using environment variables <using-on-controlling-color>`.

Examples
------------------
--------

Example of a script rewriter that transforms float literals into Decimal
objects::
Expand Down Expand Up @@ -227,7 +231,7 @@ Example of tokenizing from the command line. The script::

will be tokenized to the following output where the first column is the range
of the line/column coordinates where the token is found, the second column is
the name of the token, and the final column is the value of the token (if any)
the name of the token, and the final column is the value of the token (if any):

.. code-block:: shell-session

Expand Down
9 changes: 9 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,15 @@ tkinter
(Contributed by Matthias Kievernagel and Serhiy Storchaka in :gh:`47655`.)


tokenize
--------

* The output of the :mod:`tokenize` :ref:`command-line interface
<tokenize-cli>` is colored by default. This can be controlled with
:ref:`environment variables <using-on-controlling-color>`.
(Contributed by Hugo van Kemenade in :gh:`148991`.)


.. _whatsnew315-tomllib-1-1-0:

tomllib
Expand Down
12 changes: 12 additions & 0 deletions Lib/_colorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,14 @@ class Timeit(ThemeSection):
reset: str = ANSIColors.RESET


@dataclass(frozen=True, kw_only=True)
class Tokenize(ThemeSection):
whitespace: str = ANSIColors.GREY
error: str = ANSIColors.BOLD_RED
position: str = ANSIColors.GREY
delimiter: str = ANSIColors.RESET


@dataclass(frozen=True, kw_only=True)
class Traceback(ThemeSection):
type: str = ANSIColors.BOLD_MAGENTA
Expand Down Expand Up @@ -411,6 +419,7 @@ class Theme:
live_profiler: LiveProfiler = field(default_factory=LiveProfiler)
syntax: Syntax = field(default_factory=Syntax)
timeit: Timeit = field(default_factory=Timeit)
tokenize: Tokenize = field(default_factory=Tokenize)
traceback: Traceback = field(default_factory=Traceback)
unittest: Unittest = field(default_factory=Unittest)

Expand All @@ -424,6 +433,7 @@ def copy_with(
live_profiler: LiveProfiler | None = None,
syntax: Syntax | None = None,
timeit: Timeit | None = None,
tokenize: Tokenize | None = None,
traceback: Traceback | None = None,
unittest: Unittest | None = None,
) -> Self:
Expand All @@ -440,6 +450,7 @@ def copy_with(
live_profiler=live_profiler or self.live_profiler,
syntax=syntax or self.syntax,
timeit=timeit or self.timeit,
tokenize=tokenize or self.tokenize,
traceback=traceback or self.traceback,
unittest=unittest or self.unittest,
)
Expand All @@ -460,6 +471,7 @@ def no_colors(cls) -> Self:
live_profiler=LiveProfiler.no_colors(),
syntax=Syntax.no_colors(),
timeit=Timeit.no_colors(),
tokenize=Tokenize.no_colors(),
traceback=Traceback.no_colors(),
unittest=Unittest.no_colors(),
)
Expand Down
1 change: 1 addition & 0 deletions Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3326,6 +3326,7 @@ def test_newline_at_the_end_of_buffer(self):
run_test_script(file_name)


@support.force_not_colorized_test_class
class CommandLineTest(unittest.TestCase):
def setUp(self):
self.filename = tempfile.mktemp()
Expand Down
63 changes: 55 additions & 8 deletions Lib/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from token import *
from token import EXACT_TOKEN_TYPES
import _tokenize
lazy import _colorize

cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
Expand Down Expand Up @@ -505,6 +506,57 @@ def generate_tokens(readline):
"""
return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)


def _get_token_colors(syntax, tokenize):
"""Map token type numbers to theme colors."""
return frozendict({
COMMENT: syntax.comment,
DEDENT: tokenize.whitespace,
ENCODING: tokenize.whitespace,
ENDMARKER: tokenize.whitespace,
ERRORTOKEN: tokenize.error,
FSTRING_START: syntax.string,
FSTRING_MIDDLE: syntax.string,
FSTRING_END: syntax.string,
INDENT: tokenize.whitespace,
NAME: syntax.reset,
NEWLINE: tokenize.whitespace,
NL: tokenize.whitespace,
NUMBER: syntax.number,
OP: syntax.op,
SOFT_KEYWORD: syntax.soft_keyword,
STRING: syntax.string,
TSTRING_START: syntax.string,
TSTRING_MIDDLE: syntax.string,
TSTRING_END: syntax.string,
})


def _format_tokens(tokens, *, color=False, exact=False):
theme = _colorize.get_theme(force_no_color=not color)
s = theme.syntax
t = theme.tokenize
token_colors = _get_token_colors(s, t)
for token in tokens:
token_type = token.exact_type if exact else token.type
token_range = (
f"{t.position}{token.start[0]}"
f"{t.delimiter},{t.position}{token.start[1]}"
f"{t.delimiter}-"
f"{t.position}{token.end[0]}"
f"{t.delimiter},{t.position}{token.end[1]}"
f"{t.delimiter}:"
)
token_color = token_colors.get(token_type, s.reset)
token_name = tok_name[token_type]
visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:"
yield (
f"{token_range}{' ' * (20 - len(visible_range))}"
f"{token_color}{token_name:<15}"
f"{s.reset}{token.string!r:<15}"
)


def _main(args=None):
import argparse

Expand All @@ -524,7 +576,7 @@ def error(message, filename=None, location=None):
sys.exit(1)

# Parse the arguments and options
parser = argparse.ArgumentParser(color=True)
parser = argparse.ArgumentParser()
parser.add_argument(dest='filename', nargs='?',
metavar='filename.py',
help='the file to tokenize; defaults to stdin')
Expand All @@ -545,13 +597,8 @@ def error(message, filename=None, location=None):


# Output the tokenization
for token in tokens:
token_type = token.type
if args.exact:
token_type = token.exact_type
token_range = "%d,%d-%d,%d:" % (token.start + token.end)
print("%-20s%-15s%-15r" %
(token_range, tok_name[token_type], token.string))
for line in _format_tokens(tokens, color=True, exact=args.exact):
print(line)
except IndentationError as err:
line, column = err.args[1][1:3]
error(err.args[0], filename, (line, column))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add colour to :mod:`tokenize` CLI output. Patch by Hugo van Kemenade.
Loading