From deee43b3bcd4c067e2356c7dc741d2f2e523fee6 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 24 Aug 2021 01:08:33 +0100 Subject: [PATCH 1/5] Add tests for the C tokenizer and expose it as a private module --- Lib/test/test_tokenize.py | 889 +++++++++++++++++++++++++++++- Lib/tokenize.py | 9 + Makefile.pre.in | 1 + Modules/config.c.in | 4 + PC/config.c | 5 +- PCbuild/pythoncore.vcxproj | 1 + Python/Python-tokenize.c | 195 +++++++ Python/clinic/Python-tokenize.c.h | 41 ++ Python/stdlib_module_names.h | 1 + 9 files changed, 1141 insertions(+), 5 deletions(-) create mode 100644 Python/Python-tokenize.c create mode 100644 Python/clinic/Python-tokenize.c.h diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 4bce1ca9c76f7c..31ad2389b2a250 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3,7 +3,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, STRING, ENDMARKER, ENCODING, tok_name, detect_encoding, open as tokenize_open, Untokenizer, generate_tokens, - NEWLINE) + NEWLINE, _generate_tokens_from_c_tokenizer) from io import BytesIO, StringIO import unittest from unittest import TestCase, mock @@ -12,7 +12,6 @@ import os import token - # Converts a source string into a list of textual representation # of the tokens such as: # ` NAME 'if' (1, 0) (1, 2)` @@ -1654,5 +1653,891 @@ def test_indentation_semantics_retained(self): self.check_roundtrip(code) +class CTokenizeTest(TestCase): + def check_tokenize(self, s, expected): + # Format the tokens in s in a table format. + # The ENDMARKER and final NEWLINE are omitted. + with self.subTest(source=s): + result = stringify_tokens_from_source( + _generate_tokens_from_c_tokenizer(s), s + ) + self.assertEqual(result, expected.rstrip().splitlines()) + + def test_int(self): + + self.check_tokenize('0xff <= 255', """\ + NUMBER '' (1, 0) (1, 4) + LESSEQUAL '' (1, 5) (1, 7) + NUMBER '' (1, 8) (1, 11) + """) + + self.check_tokenize('0b10 <= 255', """\ + NUMBER '' (1, 0) (1, 4) + LESSEQUAL '' (1, 5) (1, 7) + NUMBER '' (1, 8) (1, 11) + """) + + self.check_tokenize('0o123 <= 0O123', """\ + NUMBER '' (1, 0) (1, 5) + LESSEQUAL '' (1, 6) (1, 8) + NUMBER '' (1, 9) (1, 14) + """) + + self.check_tokenize('1234567 > ~0x15', """\ + NUMBER '' (1, 0) (1, 7) + GREATER '' (1, 8) (1, 9) + TILDE '' (1, 10) (1, 11) + NUMBER '' (1, 11) (1, 15) + """) + + self.check_tokenize('2134568 != 1231515', """\ + NUMBER '' (1, 0) (1, 7) + NOTEQUAL '' (1, 8) (1, 10) + NUMBER '' (1, 11) (1, 18) + """) + + self.check_tokenize('(-124561-1) & 200000000', """\ + LPAR '' (1, 0) (1, 1) + MINUS '' (1, 1) (1, 2) + NUMBER '' (1, 2) (1, 8) + MINUS '' (1, 8) (1, 9) + NUMBER '' (1, 9) (1, 10) + RPAR '' (1, 10) (1, 11) + AMPER '' (1, 12) (1, 13) + NUMBER '' (1, 14) (1, 23) + """) + + self.check_tokenize('0xdeadbeef != -1', """\ + NUMBER '' (1, 0) (1, 10) + NOTEQUAL '' (1, 11) (1, 13) + MINUS '' (1, 14) (1, 15) + NUMBER '' (1, 15) (1, 16) + """) + + self.check_tokenize('0xdeadc0de & 12345', """\ + NUMBER '' (1, 0) (1, 10) + AMPER '' (1, 11) (1, 12) + NUMBER '' (1, 13) (1, 18) + """) + + self.check_tokenize('0xFF & 0x15 | 1234', """\ + NUMBER '' (1, 0) (1, 4) + AMPER '' (1, 5) (1, 6) + NUMBER '' (1, 7) (1, 11) + VBAR '' (1, 12) (1, 13) + NUMBER '' (1, 14) (1, 18) + """) + + def test_long(self): + + self.check_tokenize('x = 0', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + NUMBER '' (1, 4) (1, 5) + """) + + self.check_tokenize('x = 0xfffffffffff', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + NUMBER '' (1, 4) (1, 17) + """) + + self.check_tokenize('x = 123141242151251616110', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + NUMBER '' (1, 4) (1, 25) + """) + + self.check_tokenize('x = -15921590215012591', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + MINUS '' (1, 4) (1, 5) + NUMBER '' (1, 5) (1, 22) + """) + + def test_float(self): + + self.check_tokenize('x = 3.14159', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + NUMBER '' (1, 4) (1, 11) + """) + + self.check_tokenize('x = 314159.', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + NUMBER '' (1, 4) (1, 11) + """) + + self.check_tokenize('x = .314159', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + NUMBER '' (1, 4) (1, 11) + """) + + self.check_tokenize('x = 3e14159', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + NUMBER '' (1, 4) (1, 11) + """) + + self.check_tokenize('x = 3E123', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + NUMBER '' (1, 4) (1, 9) + """) + + self.check_tokenize('x+y = 3e-1230', """\ + NAME '' (1, 0) (1, 1) + PLUS '' (1, 1) (1, 2) + NAME '' (1, 2) (1, 3) + EQUAL '' (1, 4) (1, 5) + NUMBER '' (1, 6) (1, 13) + """) + + self.check_tokenize('x = 3.14e159', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + NUMBER '' (1, 4) (1, 12) + """) + + def test_string(self): + + self.check_tokenize('x = \'\'; y = ""', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + STRING '' (1, 4) (1, 6) + SEMI '' (1, 6) (1, 7) + NAME '' (1, 8) (1, 9) + EQUAL '' (1, 10) (1, 11) + STRING '' (1, 12) (1, 14) + """) + + self.check_tokenize('x = \'"\'; y = "\'"', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + STRING '' (1, 4) (1, 7) + SEMI '' (1, 7) (1, 8) + NAME '' (1, 9) (1, 10) + EQUAL '' (1, 11) (1, 12) + STRING '' (1, 13) (1, 16) + """) + + self.check_tokenize('x = "doesn\'t "shrink", does it"', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + STRING '' (1, 4) (1, 14) + NAME '' (1, 14) (1, 20) + STRING '' (1, 20) (1, 31) + """) + + self.check_tokenize("x = 'abc' + 'ABC'", """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + STRING '' (1, 4) (1, 9) + PLUS '' (1, 10) (1, 11) + STRING '' (1, 12) (1, 17) + """) + + self.check_tokenize('y = "ABC" + "ABC"', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + STRING '' (1, 4) (1, 9) + PLUS '' (1, 10) (1, 11) + STRING '' (1, 12) (1, 17) + """) + + self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + STRING '' (1, 4) (1, 10) + PLUS '' (1, 11) (1, 12) + STRING '' (1, 13) (1, 19) + PLUS '' (1, 20) (1, 21) + STRING '' (1, 22) (1, 28) + PLUS '' (1, 29) (1, 30) + STRING '' (1, 31) (1, 37) + """) + + self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + STRING '' (1, 4) (1, 10) + PLUS '' (1, 11) (1, 12) + STRING '' (1, 13) (1, 19) + PLUS '' (1, 20) (1, 21) + STRING '' (1, 22) (1, 28) + PLUS '' (1, 29) (1, 30) + STRING '' (1, 31) (1, 37) + """) + + self.check_tokenize("u'abc' + U'abc'", """\ + STRING '' (1, 0) (1, 6) + PLUS '' (1, 7) (1, 8) + STRING '' (1, 9) (1, 15) + """) + + self.check_tokenize('u"abc" + U"abc"', """\ + STRING '' (1, 0) (1, 6) + PLUS '' (1, 7) (1, 8) + STRING '' (1, 9) (1, 15) + """) + + self.check_tokenize("b'abc' + B'abc'", """\ + STRING '' (1, 0) (1, 6) + PLUS '' (1, 7) (1, 8) + STRING '' (1, 9) (1, 15) + """) + + self.check_tokenize('b"abc" + B"abc"', """\ + STRING '' (1, 0) (1, 6) + PLUS '' (1, 7) (1, 8) + STRING '' (1, 9) (1, 15) + """) + + self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\ + STRING '' (1, 0) (1, 7) + PLUS '' (1, 8) (1, 9) + STRING '' (1, 10) (1, 17) + PLUS '' (1, 18) (1, 19) + STRING '' (1, 20) (1, 27) + PLUS '' (1, 28) (1, 29) + STRING '' (1, 30) (1, 37) + """) + + self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\ + STRING '' (1, 0) (1, 7) + PLUS '' (1, 8) (1, 9) + STRING '' (1, 10) (1, 17) + PLUS '' (1, 18) (1, 19) + STRING '' (1, 20) (1, 27) + PLUS '' (1, 28) (1, 29) + STRING '' (1, 30) (1, 37) + """) + + self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\ + STRING '' (1, 0) (1, 7) + PLUS '' (1, 8) (1, 9) + STRING '' (1, 10) (1, 17) + PLUS '' (1, 18) (1, 19) + STRING '' (1, 20) (1, 27) + PLUS '' (1, 28) (1, 29) + STRING '' (1, 30) (1, 37) + """) + + self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\ + STRING '' (1, 0) (1, 7) + PLUS '' (1, 8) (1, 9) + STRING '' (1, 10) (1, 17) + PLUS '' (1, 18) (1, 19) + STRING '' (1, 20) (1, 27) + PLUS '' (1, 28) (1, 29) + STRING '' (1, 30) (1, 37) + """) + + self.check_tokenize('"a\\\nde\\\nfg"', """\ + STRING '' (1, 0) (3, 3) + """) + + self.check_tokenize('u"a\\\nde"', """\ + STRING '' (1, 0) (2, 3) + """) + + self.check_tokenize('rb"a\\\nd"', """\ + STRING '' (1, 0) (2, 2) + """) + + self.check_tokenize('"""a\\\nb"""', """\ + STRING '' (1, 0) (2, 4) + """) + + self.check_tokenize('u"""a\\\nb"""', """\ + STRING '' (1, 0) (2, 4) + """) + + self.check_tokenize('rb"""a\\\nb\\\nc"""', """\ + STRING '' (1, 0) (3, 4) + """) + + self.check_tokenize('f"abc"', """\ + STRING '' (1, 0) (1, 6) + """) + + self.check_tokenize('fR"a{b}c"', """\ + STRING '' (1, 0) (1, 9) + """) + + self.check_tokenize('f"""abc"""', """\ + STRING '' (1, 0) (1, 10) + """) + + self.check_tokenize('f"abc\\\ndef"', """\ + STRING '' (1, 0) (2, 4) + """) + + self.check_tokenize('Rf"abc\\\ndef"', """\ + STRING '' (1, 0) (2, 4) + """) + + def test_function(self): + + self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\ + NAME '' (1, 0) (1, 3) + NAME '' (1, 4) (1, 7) + LPAR '' (1, 7) (1, 8) + NAME '' (1, 8) (1, 9) + COMMA '' (1, 9) (1, 10) + NAME '' (1, 11) (1, 12) + COMMA '' (1, 12) (1, 13) + NAME '' (1, 14) (1, 15) + EQUAL '' (1, 15) (1, 16) + NUMBER '' (1, 16) (1, 17) + COMMA '' (1, 17) (1, 18) + NAME '' (1, 19) (1, 20) + EQUAL '' (1, 20) (1, 21) + NUMBER '' (1, 21) (1, 22) + COMMA '' (1, 22) (1, 23) + STAR '' (1, 24) (1, 25) + NAME '' (1, 25) (1, 26) + RPAR '' (1, 26) (1, 27) + COLON '' (1, 27) (1, 28) + NAME '' (1, 29) (1, 33) + """) + + self.check_tokenize('def d01v_(a=1, *k, **w): pass', """\ + NAME '' (1, 0) (1, 3) + NAME '' (1, 4) (1, 9) + LPAR '' (1, 9) (1, 10) + NAME '' (1, 10) (1, 11) + EQUAL '' (1, 11) (1, 12) + NUMBER '' (1, 12) (1, 13) + COMMA '' (1, 13) (1, 14) + STAR '' (1, 15) (1, 16) + NAME '' (1, 16) (1, 17) + COMMA '' (1, 17) (1, 18) + DOUBLESTAR '' (1, 19) (1, 21) + NAME '' (1, 21) (1, 22) + RPAR '' (1, 22) (1, 23) + COLON '' (1, 23) (1, 24) + NAME '' (1, 25) (1, 29) + """) + + self.check_tokenize('def d23(a: str, b: int=3) -> int: pass', """\ + NAME '' (1, 0) (1, 3) + NAME '' (1, 4) (1, 7) + LPAR '' (1, 7) (1, 8) + NAME '' (1, 8) (1, 9) + COLON '' (1, 9) (1, 10) + NAME '' (1, 11) (1, 14) + COMMA '' (1, 14) (1, 15) + NAME '' (1, 16) (1, 17) + COLON '' (1, 17) (1, 18) + NAME '' (1, 19) (1, 22) + EQUAL '' (1, 22) (1, 23) + NUMBER '' (1, 23) (1, 24) + RPAR '' (1, 24) (1, 25) + RARROW '' (1, 26) (1, 28) + NAME '' (1, 29) (1, 32) + COLON '' (1, 32) (1, 33) + NAME '' (1, 34) (1, 38) + """) + + def test_comparison(self): + + self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " + "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\ + NAME '' (1, 0) (1, 2) + NUMBER '' (1, 3) (1, 4) + LESS '' (1, 5) (1, 6) + NUMBER '' (1, 7) (1, 8) + GREATER '' (1, 9) (1, 10) + NUMBER '' (1, 11) (1, 12) + EQEQUAL '' (1, 13) (1, 15) + NUMBER '' (1, 16) (1, 17) + GREATEREQUAL '' (1, 18) (1, 20) + NUMBER '' (1, 21) (1, 22) + LESSEQUAL '' (1, 23) (1, 25) + NUMBER '' (1, 26) (1, 30) + LESSEQUAL '' (1, 31) (1, 33) + NUMBER '' (1, 34) (1, 38) + NOTEQUAL '' (1, 39) (1, 41) + NUMBER '' (1, 42) (1, 43) + NAME '' (1, 44) (1, 47) + NUMBER '' (1, 48) (1, 49) + NAME '' (1, 50) (1, 52) + NUMBER '' (1, 53) (1, 54) + NAME '' (1, 55) (1, 58) + NAME '' (1, 59) (1, 61) + NUMBER '' (1, 62) (1, 63) + NAME '' (1, 64) (1, 66) + NUMBER '' (1, 67) (1, 68) + NAME '' (1, 69) (1, 71) + NUMBER '' (1, 72) (1, 73) + NAME '' (1, 74) (1, 76) + NAME '' (1, 77) (1, 80) + NUMBER '' (1, 81) (1, 82) + COLON '' (1, 82) (1, 83) + NAME '' (1, 84) (1, 88) + """) + + def test_additive(self): + + self.check_tokenize('x = 1 - y + 15 - 1 + 0x124 + z + a[5]', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + NUMBER '' (1, 4) (1, 5) + MINUS '' (1, 6) (1, 7) + NAME '' (1, 8) (1, 9) + PLUS '' (1, 10) (1, 11) + NUMBER '' (1, 12) (1, 14) + MINUS '' (1, 15) (1, 16) + NUMBER '' (1, 17) (1, 18) + PLUS '' (1, 19) (1, 20) + NUMBER '' (1, 21) (1, 26) + PLUS '' (1, 27) (1, 28) + NAME '' (1, 29) (1, 30) + PLUS '' (1, 31) (1, 32) + NAME '' (1, 33) (1, 34) + LSQB '' (1, 34) (1, 35) + NUMBER '' (1, 35) (1, 36) + RSQB '' (1, 36) (1, 37) + """) + + def test_multiplicative(self): + + self.check_tokenize('x = 1//1*1/5*12%0x12@42', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + NUMBER '' (1, 4) (1, 5) + DOUBLESLASH '' (1, 5) (1, 7) + NUMBER '' (1, 7) (1, 8) + STAR '' (1, 8) (1, 9) + NUMBER '' (1, 9) (1, 10) + SLASH '' (1, 10) (1, 11) + NUMBER '' (1, 11) (1, 12) + STAR '' (1, 12) (1, 13) + NUMBER '' (1, 13) (1, 15) + PERCENT '' (1, 15) (1, 16) + NUMBER '' (1, 16) (1, 20) + AT '' (1, 20) (1, 21) + NUMBER '' (1, 21) (1, 23) + """) + + def test_unary(self): + + self.check_tokenize('~1 ^ 1 & 1 |1 ^ -1', """\ + TILDE '' (1, 0) (1, 1) + NUMBER '' (1, 1) (1, 2) + CIRCUMFLEX '' (1, 3) (1, 4) + NUMBER '' (1, 5) (1, 6) + AMPER '' (1, 7) (1, 8) + NUMBER '' (1, 9) (1, 10) + VBAR '' (1, 11) (1, 12) + NUMBER '' (1, 12) (1, 13) + CIRCUMFLEX '' (1, 14) (1, 15) + MINUS '' (1, 16) (1, 17) + NUMBER '' (1, 17) (1, 18) + """) + + self.check_tokenize('-1*1/1+1*1//1 - ---1**1', """\ + MINUS '' (1, 0) (1, 1) + NUMBER '' (1, 1) (1, 2) + STAR '' (1, 2) (1, 3) + NUMBER '' (1, 3) (1, 4) + SLASH '' (1, 4) (1, 5) + NUMBER '' (1, 5) (1, 6) + PLUS '' (1, 6) (1, 7) + NUMBER '' (1, 7) (1, 8) + STAR '' (1, 8) (1, 9) + NUMBER '' (1, 9) (1, 10) + DOUBLESLASH '' (1, 10) (1, 12) + NUMBER '' (1, 12) (1, 13) + MINUS '' (1, 14) (1, 15) + MINUS '' (1, 16) (1, 17) + MINUS '' (1, 17) (1, 18) + MINUS '' (1, 18) (1, 19) + NUMBER '' (1, 19) (1, 20) + DOUBLESTAR '' (1, 20) (1, 22) + NUMBER '' (1, 22) (1, 23) + """) + + def test_selector(self): + + self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\ + NAME '' (1, 0) (1, 6) + NAME '' (1, 7) (1, 10) + COMMA '' (1, 10) (1, 11) + NAME '' (1, 12) (1, 16) + NEWLINE '' (1, 16) (1, 16) + NAME '' (2, 0) (2, 1) + EQUAL '' (2, 2) (2, 3) + NAME '' (2, 4) (2, 7) + DOT '' (2, 7) (2, 8) + NAME '' (2, 8) (2, 15) + LSQB '' (2, 15) (2, 16) + STRING '' (2, 16) (2, 22) + RSQB '' (2, 22) (2, 23) + DOT '' (2, 23) (2, 24) + NAME '' (2, 24) (2, 28) + LPAR '' (2, 28) (2, 29) + RPAR '' (2, 29) (2, 30) + """) + + def test_method(self): + + self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\ + AT '' (1, 0) (1, 1) + NAME '' (1, 1) (1, 13) + NEWLINE '' (1, 13) (1, 13) + NAME '' (2, 0) (2, 3) + NAME '' (2, 4) (2, 7) + LPAR '' (2, 7) (2, 8) + NAME '' (2, 8) (2, 9) + COMMA '' (2, 9) (2, 10) + NAME '' (2, 10) (2, 11) + RPAR '' (2, 11) (2, 12) + COLON '' (2, 12) (2, 13) + NAME '' (2, 14) (2, 18) + """) + + def test_tabs(self): + + self.check_tokenize('def f():\n\tif x\n \tpass', """\ + NAME '' (1, 0) (1, 3) + NAME '' (1, 4) (1, 5) + LPAR '' (1, 5) (1, 6) + RPAR '' (1, 6) (1, 7) + COLON '' (1, 7) (1, 8) + NEWLINE '' (1, 8) (1, 8) + INDENT '' (2, -1) (2, -1) + NAME '' (2, 1) (2, 3) + NAME '' (2, 4) (2, 5) + NEWLINE '' (2, 5) (2, 5) + INDENT '' (3, -1) (3, -1) + NAME '' (3, 9) (3, 13) + DEDENT '' (3, -1) (3, -1) + DEDENT '' (3, -1) (3, -1) + """) + + def test_async(self): + + self.check_tokenize('async = 1', """\ + ASYNC '' (1, 0) (1, 5) + EQUAL '' (1, 6) (1, 7) + NUMBER '' (1, 8) (1, 9) + """) + + self.check_tokenize('a = (async = 1)', """\ + NAME '' (1, 0) (1, 1) + EQUAL '' (1, 2) (1, 3) + LPAR '' (1, 4) (1, 5) + ASYNC '' (1, 5) (1, 10) + EQUAL '' (1, 11) (1, 12) + NUMBER '' (1, 13) (1, 14) + RPAR '' (1, 14) (1, 15) + """) + + self.check_tokenize('async()', """\ + ASYNC '' (1, 0) (1, 5) + LPAR '' (1, 5) (1, 6) + RPAR '' (1, 6) (1, 7) + """) + + self.check_tokenize('class async(Bar):pass', """\ + NAME '' (1, 0) (1, 5) + ASYNC '' (1, 6) (1, 11) + LPAR '' (1, 11) (1, 12) + NAME '' (1, 12) (1, 15) + RPAR '' (1, 15) (1, 16) + COLON '' (1, 16) (1, 17) + NAME '' (1, 17) (1, 21) + """) + + self.check_tokenize('class async:pass', """\ + NAME '' (1, 0) (1, 5) + ASYNC '' (1, 6) (1, 11) + COLON '' (1, 11) (1, 12) + NAME '' (1, 12) (1, 16) + """) + + self.check_tokenize('await = 1', """\ + AWAIT '' (1, 0) (1, 5) + EQUAL '' (1, 6) (1, 7) + NUMBER '' (1, 8) (1, 9) + """) + + self.check_tokenize('foo.async', """\ + NAME '' (1, 0) (1, 3) + DOT '' (1, 3) (1, 4) + ASYNC '' (1, 4) (1, 9) + """) + + self.check_tokenize('async for a in b: pass', """\ + ASYNC '' (1, 0) (1, 5) + NAME '' (1, 6) (1, 9) + NAME '' (1, 10) (1, 11) + NAME '' (1, 12) (1, 14) + NAME '' (1, 15) (1, 16) + COLON '' (1, 16) (1, 17) + NAME '' (1, 18) (1, 22) + """) + + self.check_tokenize('async with a as b: pass', """\ + ASYNC '' (1, 0) (1, 5) + NAME '' (1, 6) (1, 10) + NAME '' (1, 11) (1, 12) + NAME '' (1, 13) (1, 15) + NAME '' (1, 16) (1, 17) + COLON '' (1, 17) (1, 18) + NAME '' (1, 19) (1, 23) + """) + + self.check_tokenize('async.foo', """\ + ASYNC '' (1, 0) (1, 5) + DOT '' (1, 5) (1, 6) + NAME '' (1, 6) (1, 9) + """) + + self.check_tokenize('async', """\ + ASYNC '' (1, 0) (1, 5) + """) + + self.check_tokenize('async\n#comment\nawait', """\ + ASYNC '' (1, 0) (1, 5) + NEWLINE '' (1, 5) (1, 5) + AWAIT '' (3, 0) (3, 5) + """) + + self.check_tokenize('async\n...\nawait', """\ + ASYNC '' (1, 0) (1, 5) + NEWLINE '' (1, 5) (1, 5) + ELLIPSIS '' (2, 0) (2, 3) + NEWLINE '' (2, 3) (2, 3) + AWAIT '' (3, 0) (3, 5) + """) + + self.check_tokenize('async\nawait', """\ + ASYNC '' (1, 0) (1, 5) + NEWLINE '' (1, 5) (1, 5) + AWAIT '' (2, 0) (2, 5) + """) + + self.check_tokenize('foo.async + 1', """\ + NAME '' (1, 0) (1, 3) + DOT '' (1, 3) (1, 4) + ASYNC '' (1, 4) (1, 9) + PLUS '' (1, 10) (1, 11) + NUMBER '' (1, 12) (1, 13) + """) + + self.check_tokenize('async def foo(): pass', """\ + ASYNC '' (1, 0) (1, 5) + NAME '' (1, 6) (1, 9) + NAME '' (1, 10) (1, 13) + LPAR '' (1, 13) (1, 14) + RPAR '' (1, 14) (1, 15) + COLON '' (1, 15) (1, 16) + NAME '' (1, 17) (1, 21) + """) + + self.check_tokenize('''\ +async def foo(): + def foo(await): + await = 1 + if 1: + await +async += 1 +''', """\ + ASYNC '' (1, 0) (1, 5) + NAME '' (1, 6) (1, 9) + NAME '' (1, 10) (1, 13) + LPAR '' (1, 13) (1, 14) + RPAR '' (1, 14) (1, 15) + COLON '' (1, 15) (1, 16) + NEWLINE '' (1, 16) (1, 16) + INDENT '' (2, -1) (2, -1) + NAME '' (2, 2) (2, 5) + NAME '' (2, 6) (2, 9) + LPAR '' (2, 9) (2, 10) + AWAIT '' (2, 10) (2, 15) + RPAR '' (2, 15) (2, 16) + COLON '' (2, 16) (2, 17) + NEWLINE '' (2, 17) (2, 17) + INDENT '' (3, -1) (3, -1) + AWAIT '' (3, 4) (3, 9) + EQUAL '' (3, 10) (3, 11) + NUMBER '' (3, 12) (3, 13) + NEWLINE '' (3, 13) (3, 13) + DEDENT '' (4, -1) (4, -1) + NAME '' (4, 2) (4, 4) + NUMBER '' (4, 5) (4, 6) + COLON '' (4, 6) (4, 7) + NEWLINE '' (4, 7) (4, 7) + INDENT '' (5, -1) (5, -1) + AWAIT '' (5, 4) (5, 9) + NEWLINE '' (5, 9) (5, 9) + DEDENT '' (6, -1) (6, -1) + DEDENT '' (6, -1) (6, -1) + ASYNC '' (6, 0) (6, 5) + PLUSEQUAL '' (6, 6) (6, 8) + NUMBER '' (6, 9) (6, 10) + NEWLINE '' (6, 10) (6, 10) + """) + + self.check_tokenize('async def foo():\n async for i in 1: pass', """\ + ASYNC '' (1, 0) (1, 5) + NAME '' (1, 6) (1, 9) + NAME '' (1, 10) (1, 13) + LPAR '' (1, 13) (1, 14) + RPAR '' (1, 14) (1, 15) + COLON '' (1, 15) (1, 16) + NEWLINE '' (1, 16) (1, 16) + INDENT '' (2, -1) (2, -1) + ASYNC '' (2, 2) (2, 7) + NAME '' (2, 8) (2, 11) + NAME '' (2, 12) (2, 13) + NAME '' (2, 14) (2, 16) + NUMBER '' (2, 17) (2, 18) + COLON '' (2, 18) (2, 19) + NAME '' (2, 20) (2, 24) + DEDENT '' (2, -1) (2, -1) + """) + + self.check_tokenize('async def foo(async): await', """\ + ASYNC '' (1, 0) (1, 5) + NAME '' (1, 6) (1, 9) + NAME '' (1, 10) (1, 13) + LPAR '' (1, 13) (1, 14) + ASYNC '' (1, 14) (1, 19) + RPAR '' (1, 19) (1, 20) + COLON '' (1, 20) (1, 21) + AWAIT '' (1, 22) (1, 27) + """) + + self.check_tokenize('''\ +def f(): + + def baz(): pass + async def bar(): pass + + await = 2''', """\ + NAME '' (1, 0) (1, 3) + NAME '' (1, 4) (1, 5) + LPAR '' (1, 5) (1, 6) + RPAR '' (1, 6) (1, 7) + COLON '' (1, 7) (1, 8) + NEWLINE '' (1, 8) (1, 8) + INDENT '' (3, -1) (3, -1) + NAME '' (3, 2) (3, 5) + NAME '' (3, 6) (3, 9) + LPAR '' (3, 9) (3, 10) + RPAR '' (3, 10) (3, 11) + COLON '' (3, 11) (3, 12) + NAME '' (3, 13) (3, 17) + NEWLINE '' (3, 17) (3, 17) + ASYNC '' (4, 2) (4, 7) + NAME '' (4, 8) (4, 11) + NAME '' (4, 12) (4, 15) + LPAR '' (4, 15) (4, 16) + RPAR '' (4, 16) (4, 17) + COLON '' (4, 17) (4, 18) + NAME '' (4, 19) (4, 23) + NEWLINE '' (4, 23) (4, 23) + AWAIT '' (6, 2) (6, 7) + EQUAL '' (6, 8) (6, 9) + NUMBER '' (6, 10) (6, 11) + DEDENT '' (6, -1) (6, -1) + """) + + self.check_tokenize('''\ +async def f(): + + def baz(): pass + async def bar(): pass + + await = 2''', """\ + ASYNC '' (1, 0) (1, 5) + NAME '' (1, 6) (1, 9) + NAME '' (1, 10) (1, 11) + LPAR '' (1, 11) (1, 12) + RPAR '' (1, 12) (1, 13) + COLON '' (1, 13) (1, 14) + NEWLINE '' (1, 14) (1, 14) + INDENT '' (3, -1) (3, -1) + NAME '' (3, 2) (3, 5) + NAME '' (3, 6) (3, 9) + LPAR '' (3, 9) (3, 10) + RPAR '' (3, 10) (3, 11) + COLON '' (3, 11) (3, 12) + NAME '' (3, 13) (3, 17) + NEWLINE '' (3, 17) (3, 17) + ASYNC '' (4, 2) (4, 7) + NAME '' (4, 8) (4, 11) + NAME '' (4, 12) (4, 15) + LPAR '' (4, 15) (4, 16) + RPAR '' (4, 16) (4, 17) + COLON '' (4, 17) (4, 18) + NAME '' (4, 19) (4, 23) + NEWLINE '' (4, 23) (4, 23) + AWAIT '' (6, 2) (6, 7) + EQUAL '' (6, 8) (6, 9) + NUMBER '' (6, 10) (6, 11) + DEDENT '' (6, -1) (6, -1) + """) + + + def test_unicode(self): + + self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\ + NAME '' (1, 0) (1, 6) + EQUAL '' (1, 7) (1, 8) + STRING '' (1, 9) (1, 18) + NEWLINE '' (1, 18) (1, 18) + NAME '' (2, 0) (2, 5) + EQUAL '' (2, 6) (2, 7) + STRING '' (2, 8) (2, 16) + """) + + def test_invalid_syntax(self): + def get_tokens(string): + return list(_generate_tokens_from_c_tokenizer(string)) + + self.assertRaises(SyntaxError, get_tokens, "(1+2]") + self.assertRaises(SyntaxError, get_tokens, "(1+2}") + self.assertRaises(SyntaxError, get_tokens, "{1+2]") + + self.assertRaises(SyntaxError, get_tokens, "1_") + self.assertRaises(SyntaxError, get_tokens, "1.2_") + self.assertRaises(SyntaxError, get_tokens, "1e2_") + self.assertRaises(SyntaxError, get_tokens, "1e+") + + self.assertRaises(SyntaxError, get_tokens, "\xa0") + self.assertRaises(SyntaxError, get_tokens, "€") + + self.assertRaises(SyntaxError, get_tokens, "0b12") + self.assertRaises(SyntaxError, get_tokens, "0b1_2") + self.assertRaises(SyntaxError, get_tokens, "0b2") + self.assertRaises(SyntaxError, get_tokens, "0b1_") + self.assertRaises(SyntaxError, get_tokens, "0b") + self.assertRaises(SyntaxError, get_tokens, "0o18") + self.assertRaises(SyntaxError, get_tokens, "0o1_8") + self.assertRaises(SyntaxError, get_tokens, "0o8") + self.assertRaises(SyntaxError, get_tokens, "0o1_") + self.assertRaises(SyntaxError, get_tokens, "0o") + self.assertRaises(SyntaxError, get_tokens, "0x1_") + self.assertRaises(SyntaxError, get_tokens, "0x") + self.assertRaises(SyntaxError, get_tokens, "1_") + self.assertRaises(SyntaxError, get_tokens, "012") + self.assertRaises(SyntaxError, get_tokens, "1.2_") + self.assertRaises(SyntaxError, get_tokens, "1e2_") + self.assertRaises(SyntaxError, get_tokens, "1e+") + + self.assertRaises(SyntaxError, get_tokens, "'sdfsdf") + self.assertRaises(SyntaxError, get_tokens, "'''sdfsdf''") + + self.assertRaises(SyntaxError, get_tokens, "("*1000+"a"+")"*1000) + self.assertRaises(SyntaxError, get_tokens, "]") + + if __name__ == "__main__": unittest.main() diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 7d7736fe985981..5f0a79dff6fc5f 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -56,6 +56,7 @@ def exact_type(self): else: return self.type + def group(*choices): return '(' + '|'.join(choices) + ')' def any(*choices): return group(*choices) + '*' def maybe(*choices): return group(*choices) + '?' @@ -680,5 +681,13 @@ def error(message, filename=None, location=None): perror("unexpected error: %s" % err) raise +def _generate_tokens_from_c_tokenizer(source): + """Tokenize a source reading Python code as unicode strings using the internal C tokenizer""" + import _tokenize as c_tokenizer + for info in c_tokenizer.TokenizerIter(source): + tok, type, lineno, end_lineno, col_off, end_col_off, line = info + yield TokenInfo(type, tok, (lineno, col_off), (end_lineno, end_col_off), line) + + if __name__ == "__main__": main() diff --git a/Makefile.pre.in b/Makefile.pre.in index f503ac4d876726..1007f440759b1a 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -339,6 +339,7 @@ PARSER_HEADERS= \ PYTHON_OBJS= \ Python/_warnings.o \ Python/Python-ast.o \ + Python/Python-tokenize.o \ Python/asdl.o \ Python/ast.o \ Python/ast_opt.o \ diff --git a/Modules/config.c.in b/Modules/config.c.in index d69e8e88b0ca45..6081f95759538f 100644 --- a/Modules/config.c.in +++ b/Modules/config.c.in @@ -28,6 +28,7 @@ extern PyObject* PyMarshal_Init(void); extern PyObject* PyInit__imp(void); extern PyObject* PyInit_gc(void); extern PyObject* PyInit__ast(void); +extern PyObject* PyInit__tokenize(void); extern PyObject* _PyWarnings_Init(void); extern PyObject* PyInit__string(void); @@ -44,6 +45,9 @@ struct _inittab _PyImport_Inittab[] = { /* This lives in Python/Python-ast.c */ {"_ast", PyInit__ast}, + /* This lives in Python/Python-tokenizer.c */ + {"_tokenize", PyInit__tokenize}, + /* These entries are here for sys.builtin_module_names */ {"builtins", NULL}, {"sys", NULL}, diff --git a/PC/config.c b/PC/config.c index 11743ea45a969d..9d900c78e40d00 100644 --- a/PC/config.c +++ b/PC/config.c @@ -72,9 +72,8 @@ extern PyObject* _PyWarnings_Init(void); extern PyObject* PyInit__string(void); extern PyObject* PyInit__stat(void); extern PyObject* PyInit__opcode(void); - extern PyObject* PyInit__contextvars(void); - +extern PyObject* PyInit__tokenize(void); /* tools/freeze/makeconfig.py marker for additional "extern" */ /* -- ADDMODULE MARKER 1 -- */ @@ -83,7 +82,6 @@ extern PyObject* PyMarshal_Init(void); extern PyObject* PyInit__imp(void); struct _inittab _PyImport_Inittab[] = { - {"_abc", PyInit__abc}, {"array", PyInit_array}, {"_ast", PyInit__ast}, @@ -105,6 +103,7 @@ struct _inittab _PyImport_Inittab[] = { {"_blake2", PyInit__blake2}, {"time", PyInit_time}, {"_thread", PyInit__thread}, + {"_tokenize", PyInit__tokenize}, {"_typing", PyInit__typing}, {"_statistics", PyInit__statistics}, #ifdef WIN32 diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index ebc0f2879f9595..b8cadf469355f4 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -488,6 +488,7 @@ + diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c new file mode 100644 index 00000000000000..67782b6670136e --- /dev/null +++ b/Python/Python-tokenize.c @@ -0,0 +1,195 @@ +#include "Python.h" +#include "../Parser/tokenizer.h" + +static struct PyModuleDef _tokenizemodule; + +typedef struct +{ + PyTypeObject* TokenizerIter; +} tokenize_state; + +static tokenize_state* +get_tokenize_state(PyObject* module) +{ + return (tokenize_state*)PyModule_GetState(module); +} + +#define _tokenize_get_state_by_type(type) \ + get_tokenize_state(_PyType_GetModuleByDef(type, &_tokenizemodule)) + +#include "clinic/Python-tokenize.c.h" + +/*[clinic input] +module _tokenizer +class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/ + +typedef struct +{ + PyObject_HEAD struct tok_state* tok; +} tokenizeriterobject; + +/*[clinic input] +@classmethod +_tokenizer.tokenizeriter.__new__ as tokenizeriter_new + + source: str +[clinic start generated code]*/ + +static PyObject * +tokenizeriter_new_impl(PyTypeObject *type, const char *source) +/*[clinic end generated code: output=7fd9f46cf9263cbb input=4384b368407375c6]*/ +{ + tokenizeriterobject* self = (tokenizeriterobject*)type->tp_alloc(type, 0); + if (self == NULL) { + return NULL; + } + PyObject* filename = PyUnicode_FromString(""); + if (filename == NULL) { + return NULL; + } + self->tok = PyTokenizer_FromUTF8(source, 1); + if (self->tok == NULL) { + return NULL; + } + self->tok->filename = filename; + return (PyObject*)self; +} + +static PyObject* +tokenizeriter_next(tokenizeriterobject* it) +{ + const char* start; + const char* end; + int type = PyTokenizer_Get(it->tok, &start, &end); + if (type == ERRORTOKEN && PyErr_Occurred()) { + return NULL; + } + if (type == ERRORTOKEN || type == ENDMARKER) { + PyErr_SetString(PyExc_StopIteration, "EOF"); + return NULL; + } + PyObject* str = NULL; + if (start == NULL || end == NULL) { + str = PyUnicode_FromStringAndSize(start, end - start); + } else { + str = PyUnicode_FromString(""); + } + if (str == NULL) { + return NULL; + } + + Py_ssize_t size = it->tok->inp - it->tok->buf; + PyObject* line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace"); + if (line == NULL) { + Py_DECREF(str); + return NULL; + } + const char* line_start = type == STRING ? it->tok->multi_line_start : it->tok->line_start; + int lineno = type == STRING ? it->tok->first_lineno : it->tok->lineno; + int end_lineno = it->tok->lineno; + int col_offset = -1; + int end_col_offset = -1; + if (start != NULL && start >= line_start) { + col_offset = (int)(start - line_start); + } + if (end != NULL && end >= it->tok->line_start) { + end_col_offset = (int)(end - it->tok->line_start); + } + + return Py_BuildValue("(NiiiiiN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line); +} + +static void +tokenizeriter_dealloc(tokenizeriterobject* it) +{ + PyTypeObject* tp = Py_TYPE(it); + PyTokenizer_Free(it->tok); + tp->tp_free(it); +} + +static PyType_Slot tokenizeriter_slots[] = { + {Py_tp_new, tokenizeriter_new}, + {Py_tp_dealloc, tokenizeriter_dealloc}, + {Py_tp_getattro, PyObject_GenericGetAttr}, + {Py_tp_iter, PyObject_SelfIter}, + {Py_tp_iternext, tokenizeriter_next}, + {0, NULL}, +}; + +static PyType_Spec tokenizeriter_spec = { + .name = "_tokenize.TokenizerIter", + .basicsize = sizeof(tokenizeriterobject), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), + .slots = tokenizeriter_slots, +}; + + +static int +tokenizemodule_exec(PyObject* m) +{ + tokenize_state* state = get_tokenize_state(m); + if (state == NULL) { + return -1; + } + + state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec( + m, &tokenizeriter_spec, NULL); + if (state->TokenizerIter == NULL) { + return -1; + } + if (PyModule_AddType(m, state->TokenizerIter) < 0) { + return -1; + } + + return 0; +} + +static PyMethodDef tokenize_methods[] = { + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static PyModuleDef_Slot tokenizemodule_slots[] = { + {Py_mod_exec, tokenizemodule_exec}, + {0, NULL} +}; + +static int +tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg) +{ + tokenize_state *state = get_tokenize_state(m); + Py_VISIT(state->TokenizerIter); + return 0; +} + +static int +tokenizemodule_clear(PyObject *m) +{ + tokenize_state *state = get_tokenize_state(m); + Py_CLEAR(state->TokenizerIter); + return 0; +} + +static void +tokenizemodule_free(void *m) +{ + tokenizemodule_clear((PyObject *)m); +} + +static struct PyModuleDef _tokenizemodule = { + PyModuleDef_HEAD_INIT, + .m_name = "_tokenize", + .m_size = sizeof(tokenize_state), + .m_slots = tokenizemodule_slots, + .m_methods = tokenize_methods, + .m_traverse = tokenizemodule_traverse, + .m_clear = tokenizemodule_clear, + .m_free = tokenizemodule_free, +}; + +PyMODINIT_FUNC +PyInit__tokenize(void) +{ + return PyModuleDef_Init(&_tokenizemodule); +} \ No newline at end of file diff --git a/Python/clinic/Python-tokenize.c.h b/Python/clinic/Python-tokenize.c.h new file mode 100644 index 00000000000000..050b4d49448c36 --- /dev/null +++ b/Python/clinic/Python-tokenize.c.h @@ -0,0 +1,41 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +static PyObject * +tokenizeriter_new_impl(PyTypeObject *type, const char *source); + +static PyObject * +tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"source", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "tokenizeriter", 0}; + PyObject *argsbuf[1]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + const char *source; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!PyUnicode_Check(fastargs[0])) { + _PyArg_BadArgument("tokenizeriter", "argument 'source'", "str", fastargs[0]); + goto exit; + } + Py_ssize_t source_length; + source = PyUnicode_AsUTF8AndSize(fastargs[0], &source_length); + if (source == NULL) { + goto exit; + } + if (strlen(source) != (size_t)source_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } + return_value = tokenizeriter_new_impl(type, source); + +exit: + return return_value; +} +/*[clinic end generated code: output=dfcd64774e01bfe6 input=a9049054013a1b77]*/ diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h index 3c5f1768305cb0..2f75c2e54cd5e9 100644 --- a/Python/stdlib_module_names.h +++ b/Python/stdlib_module_names.h @@ -80,6 +80,7 @@ static const char* _Py_stdlib_module_names[] = { "_thread", "_threading_local", "_tkinter", +"_tokenize", "_tracemalloc", "_typing", "_uuid", From 0e4795803d25b9f6f03bee27f0b7bc5f7d63cb1f Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 24 Aug 2021 16:30:30 +0100 Subject: [PATCH 2/5] fixup! Add tests for the C tokenizer and expose it as a private module --- Python/Python-tokenize.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 67782b6670136e..8f7ff0d87926ba 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -72,9 +72,9 @@ tokenizeriter_next(tokenizeriterobject* it) } PyObject* str = NULL; if (start == NULL || end == NULL) { - str = PyUnicode_FromStringAndSize(start, end - start); - } else { str = PyUnicode_FromString(""); + } else { + str = PyUnicode_FromStringAndSize(start, end - start); } if (str == NULL) { return NULL; @@ -192,4 +192,4 @@ PyMODINIT_FUNC PyInit__tokenize(void) { return PyModuleDef_Init(&_tokenizemodule); -} \ No newline at end of file +} From 832bf92f5981fb1f438f7c6cc41c37fba13928d6 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 24 Aug 2021 16:31:05 +0100 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Batuhan Taskaya --- Python/Python-tokenize.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 8f7ff0d87926ba..3c41b05af74a37 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -3,8 +3,7 @@ static struct PyModuleDef _tokenizemodule; -typedef struct -{ +typedef struct { PyTypeObject* TokenizerIter; } tokenize_state; @@ -25,9 +24,9 @@ class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_t [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/ -typedef struct -{ - PyObject_HEAD struct tok_state* tok; +typedef struct { + PyObject_HEAD + struct tok_state* tok; } tokenizeriterobject; /*[clinic input] From 0fa7650f83f0d7260bbde7ada8eb4edfd55d0c2a Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 24 Aug 2021 16:31:52 +0100 Subject: [PATCH 4/5] fixup! Apply suggestions from code review --- Lib/tokenize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 5f0a79dff6fc5f..0b9e238310049c 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -56,7 +56,6 @@ def exact_type(self): else: return self.type - def group(*choices): return '(' + '|'.join(choices) + ')' def any(*choices): return group(*choices) + '*' def maybe(*choices): return group(*choices) + '?' From 7abe25af33db27a2254750aaffbdb5e06141d0bb Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 24 Aug 2021 17:06:41 +0100 Subject: [PATCH 5/5] fixup! fixup! Apply suggestions from code review --- Lib/test/test_tokenize.py | 1056 ++++++++++++++++++------------------- Python/Python-tokenize.c | 1 + 2 files changed, 516 insertions(+), 541 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 31ad2389b2a250..f8b16e52976451 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1666,678 +1666,653 @@ def check_tokenize(self, s, expected): def test_int(self): self.check_tokenize('0xff <= 255', """\ - NUMBER '' (1, 0) (1, 4) - LESSEQUAL '' (1, 5) (1, 7) - NUMBER '' (1, 8) (1, 11) + NUMBER '0xff' (1, 0) (1, 4) + LESSEQUAL '<=' (1, 5) (1, 7) + NUMBER '255' (1, 8) (1, 11) """) self.check_tokenize('0b10 <= 255', """\ - NUMBER '' (1, 0) (1, 4) - LESSEQUAL '' (1, 5) (1, 7) - NUMBER '' (1, 8) (1, 11) + NUMBER '0b10' (1, 0) (1, 4) + LESSEQUAL '<=' (1, 5) (1, 7) + NUMBER '255' (1, 8) (1, 11) """) self.check_tokenize('0o123 <= 0O123', """\ - NUMBER '' (1, 0) (1, 5) - LESSEQUAL '' (1, 6) (1, 8) - NUMBER '' (1, 9) (1, 14) + NUMBER '0o123' (1, 0) (1, 5) + LESSEQUAL '<=' (1, 6) (1, 8) + NUMBER '0O123' (1, 9) (1, 14) """) self.check_tokenize('1234567 > ~0x15', """\ - NUMBER '' (1, 0) (1, 7) - GREATER '' (1, 8) (1, 9) - TILDE '' (1, 10) (1, 11) - NUMBER '' (1, 11) (1, 15) + NUMBER '1234567' (1, 0) (1, 7) + GREATER '>' (1, 8) (1, 9) + TILDE '~' (1, 10) (1, 11) + NUMBER '0x15' (1, 11) (1, 15) """) self.check_tokenize('2134568 != 1231515', """\ - NUMBER '' (1, 0) (1, 7) - NOTEQUAL '' (1, 8) (1, 10) - NUMBER '' (1, 11) (1, 18) + NUMBER '2134568' (1, 0) (1, 7) + NOTEQUAL '!=' (1, 8) (1, 10) + NUMBER '1231515' (1, 11) (1, 18) """) self.check_tokenize('(-124561-1) & 200000000', """\ - LPAR '' (1, 0) (1, 1) - MINUS '' (1, 1) (1, 2) - NUMBER '' (1, 2) (1, 8) - MINUS '' (1, 8) (1, 9) - NUMBER '' (1, 9) (1, 10) - RPAR '' (1, 10) (1, 11) - AMPER '' (1, 12) (1, 13) - NUMBER '' (1, 14) (1, 23) + LPAR '(' (1, 0) (1, 1) + MINUS '-' (1, 1) (1, 2) + NUMBER '124561' (1, 2) (1, 8) + MINUS '-' (1, 8) (1, 9) + NUMBER '1' (1, 9) (1, 10) + RPAR ')' (1, 10) (1, 11) + AMPER '&' (1, 12) (1, 13) + NUMBER '200000000' (1, 14) (1, 23) """) self.check_tokenize('0xdeadbeef != -1', """\ - NUMBER '' (1, 0) (1, 10) - NOTEQUAL '' (1, 11) (1, 13) - MINUS '' (1, 14) (1, 15) - NUMBER '' (1, 15) (1, 16) + NUMBER '0xdeadbeef' (1, 0) (1, 10) + NOTEQUAL '!=' (1, 11) (1, 13) + MINUS '-' (1, 14) (1, 15) + NUMBER '1' (1, 15) (1, 16) """) self.check_tokenize('0xdeadc0de & 12345', """\ - NUMBER '' (1, 0) (1, 10) - AMPER '' (1, 11) (1, 12) - NUMBER '' (1, 13) (1, 18) + NUMBER '0xdeadc0de' (1, 0) (1, 10) + AMPER '&' (1, 11) (1, 12) + NUMBER '12345' (1, 13) (1, 18) """) self.check_tokenize('0xFF & 0x15 | 1234', """\ - NUMBER '' (1, 0) (1, 4) - AMPER '' (1, 5) (1, 6) - NUMBER '' (1, 7) (1, 11) - VBAR '' (1, 12) (1, 13) - NUMBER '' (1, 14) (1, 18) - """) - - def test_long(self): - - self.check_tokenize('x = 0', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - NUMBER '' (1, 4) (1, 5) - """) - - self.check_tokenize('x = 0xfffffffffff', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - NUMBER '' (1, 4) (1, 17) - """) - - self.check_tokenize('x = 123141242151251616110', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - NUMBER '' (1, 4) (1, 25) - """) - - self.check_tokenize('x = -15921590215012591', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - MINUS '' (1, 4) (1, 5) - NUMBER '' (1, 5) (1, 22) + NUMBER '0xFF' (1, 0) (1, 4) + AMPER '&' (1, 5) (1, 6) + NUMBER '0x15' (1, 7) (1, 11) + VBAR '|' (1, 12) (1, 13) + NUMBER '1234' (1, 14) (1, 18) """) def test_float(self): self.check_tokenize('x = 3.14159', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - NUMBER '' (1, 4) (1, 11) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '3.14159' (1, 4) (1, 11) """) self.check_tokenize('x = 314159.', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - NUMBER '' (1, 4) (1, 11) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '314159.' (1, 4) (1, 11) """) self.check_tokenize('x = .314159', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - NUMBER '' (1, 4) (1, 11) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '.314159' (1, 4) (1, 11) """) self.check_tokenize('x = 3e14159', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - NUMBER '' (1, 4) (1, 11) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '3e14159' (1, 4) (1, 11) """) self.check_tokenize('x = 3E123', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - NUMBER '' (1, 4) (1, 9) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '3E123' (1, 4) (1, 9) """) self.check_tokenize('x+y = 3e-1230', """\ - NAME '' (1, 0) (1, 1) - PLUS '' (1, 1) (1, 2) - NAME '' (1, 2) (1, 3) - EQUAL '' (1, 4) (1, 5) - NUMBER '' (1, 6) (1, 13) + NAME 'x' (1, 0) (1, 1) + PLUS '+' (1, 1) (1, 2) + NAME 'y' (1, 2) (1, 3) + EQUAL '=' (1, 4) (1, 5) + NUMBER '3e-1230' (1, 6) (1, 13) """) self.check_tokenize('x = 3.14e159', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - NUMBER '' (1, 4) (1, 12) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '3.14e159' (1, 4) (1, 12) """) def test_string(self): self.check_tokenize('x = \'\'; y = ""', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - STRING '' (1, 4) (1, 6) - SEMI '' (1, 6) (1, 7) - NAME '' (1, 8) (1, 9) - EQUAL '' (1, 10) (1, 11) - STRING '' (1, 12) (1, 14) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING "''" (1, 4) (1, 6) + SEMI ';' (1, 6) (1, 7) + NAME 'y' (1, 8) (1, 9) + EQUAL '=' (1, 10) (1, 11) + STRING '""' (1, 12) (1, 14) """) self.check_tokenize('x = \'"\'; y = "\'"', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - STRING '' (1, 4) (1, 7) - SEMI '' (1, 7) (1, 8) - NAME '' (1, 9) (1, 10) - EQUAL '' (1, 11) (1, 12) - STRING '' (1, 13) (1, 16) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING '\\'"\\'' (1, 4) (1, 7) + SEMI ';' (1, 7) (1, 8) + NAME 'y' (1, 9) (1, 10) + EQUAL '=' (1, 11) (1, 12) + STRING '"\\'"' (1, 13) (1, 16) """) self.check_tokenize('x = "doesn\'t "shrink", does it"', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - STRING '' (1, 4) (1, 14) - NAME '' (1, 14) (1, 20) - STRING '' (1, 20) (1, 31) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING '"doesn\\'t "' (1, 4) (1, 14) + NAME 'shrink' (1, 14) (1, 20) + STRING '", does it"' (1, 20) (1, 31) """) self.check_tokenize("x = 'abc' + 'ABC'", """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - STRING '' (1, 4) (1, 9) - PLUS '' (1, 10) (1, 11) - STRING '' (1, 12) (1, 17) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING "'abc'" (1, 4) (1, 9) + PLUS '+' (1, 10) (1, 11) + STRING "'ABC'" (1, 12) (1, 17) """) self.check_tokenize('y = "ABC" + "ABC"', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - STRING '' (1, 4) (1, 9) - PLUS '' (1, 10) (1, 11) - STRING '' (1, 12) (1, 17) + NAME 'y' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING '"ABC"' (1, 4) (1, 9) + PLUS '+' (1, 10) (1, 11) + STRING '"ABC"' (1, 12) (1, 17) """) self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - STRING '' (1, 4) (1, 10) - PLUS '' (1, 11) (1, 12) - STRING '' (1, 13) (1, 19) - PLUS '' (1, 20) (1, 21) - STRING '' (1, 22) (1, 28) - PLUS '' (1, 29) (1, 30) - STRING '' (1, 31) (1, 37) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING "r'abc'" (1, 4) (1, 10) + PLUS '+' (1, 11) (1, 12) + STRING "r'ABC'" (1, 13) (1, 19) + PLUS '+' (1, 20) (1, 21) + STRING "R'ABC'" (1, 22) (1, 28) + PLUS '+' (1, 29) (1, 30) + STRING "R'ABC'" (1, 31) (1, 37) """) self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - STRING '' (1, 4) (1, 10) - PLUS '' (1, 11) (1, 12) - STRING '' (1, 13) (1, 19) - PLUS '' (1, 20) (1, 21) - STRING '' (1, 22) (1, 28) - PLUS '' (1, 29) (1, 30) - STRING '' (1, 31) (1, 37) + NAME 'y' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + STRING 'r"abc"' (1, 4) (1, 10) + PLUS '+' (1, 11) (1, 12) + STRING 'r"ABC"' (1, 13) (1, 19) + PLUS '+' (1, 20) (1, 21) + STRING 'R"ABC"' (1, 22) (1, 28) + PLUS '+' (1, 29) (1, 30) + STRING 'R"ABC"' (1, 31) (1, 37) """) self.check_tokenize("u'abc' + U'abc'", """\ - STRING '' (1, 0) (1, 6) - PLUS '' (1, 7) (1, 8) - STRING '' (1, 9) (1, 15) + STRING "u'abc'" (1, 0) (1, 6) + PLUS '+' (1, 7) (1, 8) + STRING "U'abc'" (1, 9) (1, 15) """) self.check_tokenize('u"abc" + U"abc"', """\ - STRING '' (1, 0) (1, 6) - PLUS '' (1, 7) (1, 8) - STRING '' (1, 9) (1, 15) + STRING 'u"abc"' (1, 0) (1, 6) + PLUS '+' (1, 7) (1, 8) + STRING 'U"abc"' (1, 9) (1, 15) """) self.check_tokenize("b'abc' + B'abc'", """\ - STRING '' (1, 0) (1, 6) - PLUS '' (1, 7) (1, 8) - STRING '' (1, 9) (1, 15) + STRING "b'abc'" (1, 0) (1, 6) + PLUS '+' (1, 7) (1, 8) + STRING "B'abc'" (1, 9) (1, 15) """) self.check_tokenize('b"abc" + B"abc"', """\ - STRING '' (1, 0) (1, 6) - PLUS '' (1, 7) (1, 8) - STRING '' (1, 9) (1, 15) + STRING 'b"abc"' (1, 0) (1, 6) + PLUS '+' (1, 7) (1, 8) + STRING 'B"abc"' (1, 9) (1, 15) """) self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\ - STRING '' (1, 0) (1, 7) - PLUS '' (1, 8) (1, 9) - STRING '' (1, 10) (1, 17) - PLUS '' (1, 18) (1, 19) - STRING '' (1, 20) (1, 27) - PLUS '' (1, 28) (1, 29) - STRING '' (1, 30) (1, 37) + STRING "br'abc'" (1, 0) (1, 7) + PLUS '+' (1, 8) (1, 9) + STRING "bR'abc'" (1, 10) (1, 17) + PLUS '+' (1, 18) (1, 19) + STRING "Br'abc'" (1, 20) (1, 27) + PLUS '+' (1, 28) (1, 29) + STRING "BR'abc'" (1, 30) (1, 37) """) self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\ - STRING '' (1, 0) (1, 7) - PLUS '' (1, 8) (1, 9) - STRING '' (1, 10) (1, 17) - PLUS '' (1, 18) (1, 19) - STRING '' (1, 20) (1, 27) - PLUS '' (1, 28) (1, 29) - STRING '' (1, 30) (1, 37) + STRING 'br"abc"' (1, 0) (1, 7) + PLUS '+' (1, 8) (1, 9) + STRING 'bR"abc"' (1, 10) (1, 17) + PLUS '+' (1, 18) (1, 19) + STRING 'Br"abc"' (1, 20) (1, 27) + PLUS '+' (1, 28) (1, 29) + STRING 'BR"abc"' (1, 30) (1, 37) """) self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\ - STRING '' (1, 0) (1, 7) - PLUS '' (1, 8) (1, 9) - STRING '' (1, 10) (1, 17) - PLUS '' (1, 18) (1, 19) - STRING '' (1, 20) (1, 27) - PLUS '' (1, 28) (1, 29) - STRING '' (1, 30) (1, 37) + STRING "rb'abc'" (1, 0) (1, 7) + PLUS '+' (1, 8) (1, 9) + STRING "rB'abc'" (1, 10) (1, 17) + PLUS '+' (1, 18) (1, 19) + STRING "Rb'abc'" (1, 20) (1, 27) + PLUS '+' (1, 28) (1, 29) + STRING "RB'abc'" (1, 30) (1, 37) """) self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\ - STRING '' (1, 0) (1, 7) - PLUS '' (1, 8) (1, 9) - STRING '' (1, 10) (1, 17) - PLUS '' (1, 18) (1, 19) - STRING '' (1, 20) (1, 27) - PLUS '' (1, 28) (1, 29) - STRING '' (1, 30) (1, 37) + STRING 'rb"abc"' (1, 0) (1, 7) + PLUS '+' (1, 8) (1, 9) + STRING 'rB"abc"' (1, 10) (1, 17) + PLUS '+' (1, 18) (1, 19) + STRING 'Rb"abc"' (1, 20) (1, 27) + PLUS '+' (1, 28) (1, 29) + STRING 'RB"abc"' (1, 30) (1, 37) """) self.check_tokenize('"a\\\nde\\\nfg"', """\ - STRING '' (1, 0) (3, 3) + STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3) """) self.check_tokenize('u"a\\\nde"', """\ - STRING '' (1, 0) (2, 3) + STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3) """) self.check_tokenize('rb"a\\\nd"', """\ - STRING '' (1, 0) (2, 2) + STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2) """) - self.check_tokenize('"""a\\\nb"""', """\ - STRING '' (1, 0) (2, 4) + self.check_tokenize(r'"""a\ +b"""', """\ + STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4) """) - - self.check_tokenize('u"""a\\\nb"""', """\ - STRING '' (1, 0) (2, 4) + self.check_tokenize(r'u"""a\ +b"""', """\ + STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4) """) - - self.check_tokenize('rb"""a\\\nb\\\nc"""', """\ - STRING '' (1, 0) (3, 4) + self.check_tokenize(r'rb"""a\ +b\ +c"""', """\ + STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4) """) - + self.check_tokenize('f"abc"', """\ - STRING '' (1, 0) (1, 6) + STRING 'f"abc"' (1, 0) (1, 6) """) self.check_tokenize('fR"a{b}c"', """\ - STRING '' (1, 0) (1, 9) + STRING 'fR"a{b}c"' (1, 0) (1, 9) """) self.check_tokenize('f"""abc"""', """\ - STRING '' (1, 0) (1, 10) + STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10) """) - self.check_tokenize('f"abc\\\ndef"', """\ - STRING '' (1, 0) (2, 4) + self.check_tokenize(r'f"abc\ +def"', """\ + STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4) """) - self.check_tokenize('Rf"abc\\\ndef"', """\ - STRING '' (1, 0) (2, 4) + self.check_tokenize(r'Rf"abc\ +def"', """\ + STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4) """) def test_function(self): self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\ - NAME '' (1, 0) (1, 3) - NAME '' (1, 4) (1, 7) - LPAR '' (1, 7) (1, 8) - NAME '' (1, 8) (1, 9) - COMMA '' (1, 9) (1, 10) - NAME '' (1, 11) (1, 12) - COMMA '' (1, 12) (1, 13) - NAME '' (1, 14) (1, 15) - EQUAL '' (1, 15) (1, 16) - NUMBER '' (1, 16) (1, 17) - COMMA '' (1, 17) (1, 18) - NAME '' (1, 19) (1, 20) - EQUAL '' (1, 20) (1, 21) - NUMBER '' (1, 21) (1, 22) - COMMA '' (1, 22) (1, 23) - STAR '' (1, 24) (1, 25) - NAME '' (1, 25) (1, 26) - RPAR '' (1, 26) (1, 27) - COLON '' (1, 27) (1, 28) - NAME '' (1, 29) (1, 33) + NAME 'def' (1, 0) (1, 3) + NAME 'd22' (1, 4) (1, 7) + LPAR '(' (1, 7) (1, 8) + NAME 'a' (1, 8) (1, 9) + COMMA ',' (1, 9) (1, 10) + NAME 'b' (1, 11) (1, 12) + COMMA ',' (1, 12) (1, 13) + NAME 'c' (1, 14) (1, 15) + EQUAL '=' (1, 15) (1, 16) + NUMBER '2' (1, 16) (1, 17) + COMMA ',' (1, 17) (1, 18) + NAME 'd' (1, 19) (1, 20) + EQUAL '=' (1, 20) (1, 21) + NUMBER '2' (1, 21) (1, 22) + COMMA ',' (1, 22) (1, 23) + STAR '*' (1, 24) (1, 25) + NAME 'k' (1, 25) (1, 26) + RPAR ')' (1, 26) (1, 27) + COLON ':' (1, 27) (1, 28) + NAME 'pass' (1, 29) (1, 33) """) self.check_tokenize('def d01v_(a=1, *k, **w): pass', """\ - NAME '' (1, 0) (1, 3) - NAME '' (1, 4) (1, 9) - LPAR '' (1, 9) (1, 10) - NAME '' (1, 10) (1, 11) - EQUAL '' (1, 11) (1, 12) - NUMBER '' (1, 12) (1, 13) - COMMA '' (1, 13) (1, 14) - STAR '' (1, 15) (1, 16) - NAME '' (1, 16) (1, 17) - COMMA '' (1, 17) (1, 18) - DOUBLESTAR '' (1, 19) (1, 21) - NAME '' (1, 21) (1, 22) - RPAR '' (1, 22) (1, 23) - COLON '' (1, 23) (1, 24) - NAME '' (1, 25) (1, 29) + NAME 'def' (1, 0) (1, 3) + NAME 'd01v_' (1, 4) (1, 9) + LPAR '(' (1, 9) (1, 10) + NAME 'a' (1, 10) (1, 11) + EQUAL '=' (1, 11) (1, 12) + NUMBER '1' (1, 12) (1, 13) + COMMA ',' (1, 13) (1, 14) + STAR '*' (1, 15) (1, 16) + NAME 'k' (1, 16) (1, 17) + COMMA ',' (1, 17) (1, 18) + DOUBLESTAR '**' (1, 19) (1, 21) + NAME 'w' (1, 21) (1, 22) + RPAR ')' (1, 22) (1, 23) + COLON ':' (1, 23) (1, 24) + NAME 'pass' (1, 25) (1, 29) """) self.check_tokenize('def d23(a: str, b: int=3) -> int: pass', """\ - NAME '' (1, 0) (1, 3) - NAME '' (1, 4) (1, 7) - LPAR '' (1, 7) (1, 8) - NAME '' (1, 8) (1, 9) - COLON '' (1, 9) (1, 10) - NAME '' (1, 11) (1, 14) - COMMA '' (1, 14) (1, 15) - NAME '' (1, 16) (1, 17) - COLON '' (1, 17) (1, 18) - NAME '' (1, 19) (1, 22) - EQUAL '' (1, 22) (1, 23) - NUMBER '' (1, 23) (1, 24) - RPAR '' (1, 24) (1, 25) - RARROW '' (1, 26) (1, 28) - NAME '' (1, 29) (1, 32) - COLON '' (1, 32) (1, 33) - NAME '' (1, 34) (1, 38) + NAME 'def' (1, 0) (1, 3) + NAME 'd23' (1, 4) (1, 7) + LPAR '(' (1, 7) (1, 8) + NAME 'a' (1, 8) (1, 9) + COLON ':' (1, 9) (1, 10) + NAME 'str' (1, 11) (1, 14) + COMMA ',' (1, 14) (1, 15) + NAME 'b' (1, 16) (1, 17) + COLON ':' (1, 17) (1, 18) + NAME 'int' (1, 19) (1, 22) + EQUAL '=' (1, 22) (1, 23) + NUMBER '3' (1, 23) (1, 24) + RPAR ')' (1, 24) (1, 25) + RARROW '->' (1, 26) (1, 28) + NAME 'int' (1, 29) (1, 32) + COLON ':' (1, 32) (1, 33) + NAME 'pass' (1, 34) (1, 38) """) def test_comparison(self): self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\ - NAME '' (1, 0) (1, 2) - NUMBER '' (1, 3) (1, 4) - LESS '' (1, 5) (1, 6) - NUMBER '' (1, 7) (1, 8) - GREATER '' (1, 9) (1, 10) - NUMBER '' (1, 11) (1, 12) - EQEQUAL '' (1, 13) (1, 15) - NUMBER '' (1, 16) (1, 17) - GREATEREQUAL '' (1, 18) (1, 20) - NUMBER '' (1, 21) (1, 22) - LESSEQUAL '' (1, 23) (1, 25) - NUMBER '' (1, 26) (1, 30) - LESSEQUAL '' (1, 31) (1, 33) - NUMBER '' (1, 34) (1, 38) - NOTEQUAL '' (1, 39) (1, 41) - NUMBER '' (1, 42) (1, 43) - NAME '' (1, 44) (1, 47) - NUMBER '' (1, 48) (1, 49) - NAME '' (1, 50) (1, 52) - NUMBER '' (1, 53) (1, 54) - NAME '' (1, 55) (1, 58) - NAME '' (1, 59) (1, 61) - NUMBER '' (1, 62) (1, 63) - NAME '' (1, 64) (1, 66) - NUMBER '' (1, 67) (1, 68) - NAME '' (1, 69) (1, 71) - NUMBER '' (1, 72) (1, 73) - NAME '' (1, 74) (1, 76) - NAME '' (1, 77) (1, 80) - NUMBER '' (1, 81) (1, 82) - COLON '' (1, 82) (1, 83) - NAME '' (1, 84) (1, 88) + NAME 'if' (1, 0) (1, 2) + NUMBER '1' (1, 3) (1, 4) + LESS '<' (1, 5) (1, 6) + NUMBER '1' (1, 7) (1, 8) + GREATER '>' (1, 9) (1, 10) + NUMBER '1' (1, 11) (1, 12) + EQEQUAL '==' (1, 13) (1, 15) + NUMBER '1' (1, 16) (1, 17) + GREATEREQUAL '>=' (1, 18) (1, 20) + NUMBER '5' (1, 21) (1, 22) + LESSEQUAL '<=' (1, 23) (1, 25) + NUMBER '0x15' (1, 26) (1, 30) + LESSEQUAL '<=' (1, 31) (1, 33) + NUMBER '0x12' (1, 34) (1, 38) + NOTEQUAL '!=' (1, 39) (1, 41) + NUMBER '1' (1, 42) (1, 43) + NAME 'and' (1, 44) (1, 47) + NUMBER '5' (1, 48) (1, 49) + NAME 'in' (1, 50) (1, 52) + NUMBER '1' (1, 53) (1, 54) + NAME 'not' (1, 55) (1, 58) + NAME 'in' (1, 59) (1, 61) + NUMBER '1' (1, 62) (1, 63) + NAME 'is' (1, 64) (1, 66) + NUMBER '1' (1, 67) (1, 68) + NAME 'or' (1, 69) (1, 71) + NUMBER '5' (1, 72) (1, 73) + NAME 'is' (1, 74) (1, 76) + NAME 'not' (1, 77) (1, 80) + NUMBER '1' (1, 81) (1, 82) + COLON ':' (1, 82) (1, 83) + NAME 'pass' (1, 84) (1, 88) """) def test_additive(self): self.check_tokenize('x = 1 - y + 15 - 1 + 0x124 + z + a[5]', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - NUMBER '' (1, 4) (1, 5) - MINUS '' (1, 6) (1, 7) - NAME '' (1, 8) (1, 9) - PLUS '' (1, 10) (1, 11) - NUMBER '' (1, 12) (1, 14) - MINUS '' (1, 15) (1, 16) - NUMBER '' (1, 17) (1, 18) - PLUS '' (1, 19) (1, 20) - NUMBER '' (1, 21) (1, 26) - PLUS '' (1, 27) (1, 28) - NAME '' (1, 29) (1, 30) - PLUS '' (1, 31) (1, 32) - NAME '' (1, 33) (1, 34) - LSQB '' (1, 34) (1, 35) - NUMBER '' (1, 35) (1, 36) - RSQB '' (1, 36) (1, 37) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '1' (1, 4) (1, 5) + MINUS '-' (1, 6) (1, 7) + NAME 'y' (1, 8) (1, 9) + PLUS '+' (1, 10) (1, 11) + NUMBER '15' (1, 12) (1, 14) + MINUS '-' (1, 15) (1, 16) + NUMBER '1' (1, 17) (1, 18) + PLUS '+' (1, 19) (1, 20) + NUMBER '0x124' (1, 21) (1, 26) + PLUS '+' (1, 27) (1, 28) + NAME 'z' (1, 29) (1, 30) + PLUS '+' (1, 31) (1, 32) + NAME 'a' (1, 33) (1, 34) + LSQB '[' (1, 34) (1, 35) + NUMBER '5' (1, 35) (1, 36) + RSQB ']' (1, 36) (1, 37) """) def test_multiplicative(self): self.check_tokenize('x = 1//1*1/5*12%0x12@42', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - NUMBER '' (1, 4) (1, 5) - DOUBLESLASH '' (1, 5) (1, 7) - NUMBER '' (1, 7) (1, 8) - STAR '' (1, 8) (1, 9) - NUMBER '' (1, 9) (1, 10) - SLASH '' (1, 10) (1, 11) - NUMBER '' (1, 11) (1, 12) - STAR '' (1, 12) (1, 13) - NUMBER '' (1, 13) (1, 15) - PERCENT '' (1, 15) (1, 16) - NUMBER '' (1, 16) (1, 20) - AT '' (1, 20) (1, 21) - NUMBER '' (1, 21) (1, 23) + NAME 'x' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + NUMBER '1' (1, 4) (1, 5) + DOUBLESLASH '//' (1, 5) (1, 7) + NUMBER '1' (1, 7) (1, 8) + STAR '*' (1, 8) (1, 9) + NUMBER '1' (1, 9) (1, 10) + SLASH '/' (1, 10) (1, 11) + NUMBER '5' (1, 11) (1, 12) + STAR '*' (1, 12) (1, 13) + NUMBER '12' (1, 13) (1, 15) + PERCENT '%' (1, 15) (1, 16) + NUMBER '0x12' (1, 16) (1, 20) + AT '@' (1, 20) (1, 21) + NUMBER '42' (1, 21) (1, 23) """) def test_unary(self): self.check_tokenize('~1 ^ 1 & 1 |1 ^ -1', """\ - TILDE '' (1, 0) (1, 1) - NUMBER '' (1, 1) (1, 2) - CIRCUMFLEX '' (1, 3) (1, 4) - NUMBER '' (1, 5) (1, 6) - AMPER '' (1, 7) (1, 8) - NUMBER '' (1, 9) (1, 10) - VBAR '' (1, 11) (1, 12) - NUMBER '' (1, 12) (1, 13) - CIRCUMFLEX '' (1, 14) (1, 15) - MINUS '' (1, 16) (1, 17) - NUMBER '' (1, 17) (1, 18) + TILDE '~' (1, 0) (1, 1) + NUMBER '1' (1, 1) (1, 2) + CIRCUMFLEX '^' (1, 3) (1, 4) + NUMBER '1' (1, 5) (1, 6) + AMPER '&' (1, 7) (1, 8) + NUMBER '1' (1, 9) (1, 10) + VBAR '|' (1, 11) (1, 12) + NUMBER '1' (1, 12) (1, 13) + CIRCUMFLEX '^' (1, 14) (1, 15) + MINUS '-' (1, 16) (1, 17) + NUMBER '1' (1, 17) (1, 18) """) self.check_tokenize('-1*1/1+1*1//1 - ---1**1', """\ - MINUS '' (1, 0) (1, 1) - NUMBER '' (1, 1) (1, 2) - STAR '' (1, 2) (1, 3) - NUMBER '' (1, 3) (1, 4) - SLASH '' (1, 4) (1, 5) - NUMBER '' (1, 5) (1, 6) - PLUS '' (1, 6) (1, 7) - NUMBER '' (1, 7) (1, 8) - STAR '' (1, 8) (1, 9) - NUMBER '' (1, 9) (1, 10) - DOUBLESLASH '' (1, 10) (1, 12) - NUMBER '' (1, 12) (1, 13) - MINUS '' (1, 14) (1, 15) - MINUS '' (1, 16) (1, 17) - MINUS '' (1, 17) (1, 18) - MINUS '' (1, 18) (1, 19) - NUMBER '' (1, 19) (1, 20) - DOUBLESTAR '' (1, 20) (1, 22) - NUMBER '' (1, 22) (1, 23) + MINUS '-' (1, 0) (1, 1) + NUMBER '1' (1, 1) (1, 2) + STAR '*' (1, 2) (1, 3) + NUMBER '1' (1, 3) (1, 4) + SLASH '/' (1, 4) (1, 5) + NUMBER '1' (1, 5) (1, 6) + PLUS '+' (1, 6) (1, 7) + NUMBER '1' (1, 7) (1, 8) + STAR '*' (1, 8) (1, 9) + NUMBER '1' (1, 9) (1, 10) + DOUBLESLASH '//' (1, 10) (1, 12) + NUMBER '1' (1, 12) (1, 13) + MINUS '-' (1, 14) (1, 15) + MINUS '-' (1, 16) (1, 17) + MINUS '-' (1, 17) (1, 18) + MINUS '-' (1, 18) (1, 19) + NUMBER '1' (1, 19) (1, 20) + DOUBLESTAR '**' (1, 20) (1, 22) + NUMBER '1' (1, 22) (1, 23) """) def test_selector(self): self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\ - NAME '' (1, 0) (1, 6) - NAME '' (1, 7) (1, 10) - COMMA '' (1, 10) (1, 11) - NAME '' (1, 12) (1, 16) + NAME 'import' (1, 0) (1, 6) + NAME 'sys' (1, 7) (1, 10) + COMMA ',' (1, 10) (1, 11) + NAME 'time' (1, 12) (1, 16) NEWLINE '' (1, 16) (1, 16) - NAME '' (2, 0) (2, 1) - EQUAL '' (2, 2) (2, 3) - NAME '' (2, 4) (2, 7) - DOT '' (2, 7) (2, 8) - NAME '' (2, 8) (2, 15) - LSQB '' (2, 15) (2, 16) - STRING '' (2, 16) (2, 22) - RSQB '' (2, 22) (2, 23) - DOT '' (2, 23) (2, 24) - NAME '' (2, 24) (2, 28) - LPAR '' (2, 28) (2, 29) - RPAR '' (2, 29) (2, 30) + NAME 'x' (2, 0) (2, 1) + EQUAL '=' (2, 2) (2, 3) + NAME 'sys' (2, 4) (2, 7) + DOT '.' (2, 7) (2, 8) + NAME 'modules' (2, 8) (2, 15) + LSQB '[' (2, 15) (2, 16) + STRING "'time'" (2, 16) (2, 22) + RSQB ']' (2, 22) (2, 23) + DOT '.' (2, 23) (2, 24) + NAME 'time' (2, 24) (2, 28) + LPAR '(' (2, 28) (2, 29) + RPAR ')' (2, 29) (2, 30) """) def test_method(self): self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\ - AT '' (1, 0) (1, 1) - NAME '' (1, 1) (1, 13) + AT '@' (1, 0) (1, 1) + NAME 'staticmethod' (1, 1) (1, 13) NEWLINE '' (1, 13) (1, 13) - NAME '' (2, 0) (2, 3) - NAME '' (2, 4) (2, 7) - LPAR '' (2, 7) (2, 8) - NAME '' (2, 8) (2, 9) - COMMA '' (2, 9) (2, 10) - NAME '' (2, 10) (2, 11) - RPAR '' (2, 11) (2, 12) - COLON '' (2, 12) (2, 13) - NAME '' (2, 14) (2, 18) + NAME 'def' (2, 0) (2, 3) + NAME 'foo' (2, 4) (2, 7) + LPAR '(' (2, 7) (2, 8) + NAME 'x' (2, 8) (2, 9) + COMMA ',' (2, 9) (2, 10) + NAME 'y' (2, 10) (2, 11) + RPAR ')' (2, 11) (2, 12) + COLON ':' (2, 12) (2, 13) + NAME 'pass' (2, 14) (2, 18) """) def test_tabs(self): - self.check_tokenize('def f():\n\tif x\n \tpass', """\ - NAME '' (1, 0) (1, 3) - NAME '' (1, 4) (1, 5) - LPAR '' (1, 5) (1, 6) - RPAR '' (1, 6) (1, 7) - COLON '' (1, 7) (1, 8) - NEWLINE '' (1, 8) (1, 8) - INDENT '' (2, -1) (2, -1) - NAME '' (2, 1) (2, 3) - NAME '' (2, 4) (2, 5) - NEWLINE '' (2, 5) (2, 5) - INDENT '' (3, -1) (3, -1) - NAME '' (3, 9) (3, 13) - DEDENT '' (3, -1) (3, -1) - DEDENT '' (3, -1) (3, -1) + self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\ + AT '@' (1, 0) (1, 1) + NAME 'staticmethod' (1, 1) (1, 13) + NEWLINE '' (1, 13) (1, 13) + NAME 'def' (2, 0) (2, 3) + NAME 'foo' (2, 4) (2, 7) + LPAR '(' (2, 7) (2, 8) + NAME 'x' (2, 8) (2, 9) + COMMA ',' (2, 9) (2, 10) + NAME 'y' (2, 10) (2, 11) + RPAR ')' (2, 11) (2, 12) + COLON ':' (2, 12) (2, 13) + NAME 'pass' (2, 14) (2, 18) """) def test_async(self): self.check_tokenize('async = 1', """\ - ASYNC '' (1, 0) (1, 5) - EQUAL '' (1, 6) (1, 7) - NUMBER '' (1, 8) (1, 9) + ASYNC 'async' (1, 0) (1, 5) + EQUAL '=' (1, 6) (1, 7) + NUMBER '1' (1, 8) (1, 9) """) self.check_tokenize('a = (async = 1)', """\ - NAME '' (1, 0) (1, 1) - EQUAL '' (1, 2) (1, 3) - LPAR '' (1, 4) (1, 5) - ASYNC '' (1, 5) (1, 10) - EQUAL '' (1, 11) (1, 12) - NUMBER '' (1, 13) (1, 14) - RPAR '' (1, 14) (1, 15) + NAME 'a' (1, 0) (1, 1) + EQUAL '=' (1, 2) (1, 3) + LPAR '(' (1, 4) (1, 5) + ASYNC 'async' (1, 5) (1, 10) + EQUAL '=' (1, 11) (1, 12) + NUMBER '1' (1, 13) (1, 14) + RPAR ')' (1, 14) (1, 15) """) self.check_tokenize('async()', """\ - ASYNC '' (1, 0) (1, 5) - LPAR '' (1, 5) (1, 6) - RPAR '' (1, 6) (1, 7) + ASYNC 'async' (1, 0) (1, 5) + LPAR '(' (1, 5) (1, 6) + RPAR ')' (1, 6) (1, 7) """) self.check_tokenize('class async(Bar):pass', """\ - NAME '' (1, 0) (1, 5) - ASYNC '' (1, 6) (1, 11) - LPAR '' (1, 11) (1, 12) - NAME '' (1, 12) (1, 15) - RPAR '' (1, 15) (1, 16) - COLON '' (1, 16) (1, 17) - NAME '' (1, 17) (1, 21) + NAME 'class' (1, 0) (1, 5) + ASYNC 'async' (1, 6) (1, 11) + LPAR '(' (1, 11) (1, 12) + NAME 'Bar' (1, 12) (1, 15) + RPAR ')' (1, 15) (1, 16) + COLON ':' (1, 16) (1, 17) + NAME 'pass' (1, 17) (1, 21) """) self.check_tokenize('class async:pass', """\ - NAME '' (1, 0) (1, 5) - ASYNC '' (1, 6) (1, 11) - COLON '' (1, 11) (1, 12) - NAME '' (1, 12) (1, 16) + NAME 'class' (1, 0) (1, 5) + ASYNC 'async' (1, 6) (1, 11) + COLON ':' (1, 11) (1, 12) + NAME 'pass' (1, 12) (1, 16) """) self.check_tokenize('await = 1', """\ - AWAIT '' (1, 0) (1, 5) - EQUAL '' (1, 6) (1, 7) - NUMBER '' (1, 8) (1, 9) + AWAIT 'await' (1, 0) (1, 5) + EQUAL '=' (1, 6) (1, 7) + NUMBER '1' (1, 8) (1, 9) """) self.check_tokenize('foo.async', """\ - NAME '' (1, 0) (1, 3) - DOT '' (1, 3) (1, 4) - ASYNC '' (1, 4) (1, 9) + NAME 'foo' (1, 0) (1, 3) + DOT '.' (1, 3) (1, 4) + ASYNC 'async' (1, 4) (1, 9) """) self.check_tokenize('async for a in b: pass', """\ - ASYNC '' (1, 0) (1, 5) - NAME '' (1, 6) (1, 9) - NAME '' (1, 10) (1, 11) - NAME '' (1, 12) (1, 14) - NAME '' (1, 15) (1, 16) - COLON '' (1, 16) (1, 17) - NAME '' (1, 18) (1, 22) + ASYNC 'async' (1, 0) (1, 5) + NAME 'for' (1, 6) (1, 9) + NAME 'a' (1, 10) (1, 11) + NAME 'in' (1, 12) (1, 14) + NAME 'b' (1, 15) (1, 16) + COLON ':' (1, 16) (1, 17) + NAME 'pass' (1, 18) (1, 22) """) self.check_tokenize('async with a as b: pass', """\ - ASYNC '' (1, 0) (1, 5) - NAME '' (1, 6) (1, 10) - NAME '' (1, 11) (1, 12) - NAME '' (1, 13) (1, 15) - NAME '' (1, 16) (1, 17) - COLON '' (1, 17) (1, 18) - NAME '' (1, 19) (1, 23) + ASYNC 'async' (1, 0) (1, 5) + NAME 'with' (1, 6) (1, 10) + NAME 'a' (1, 11) (1, 12) + NAME 'as' (1, 13) (1, 15) + NAME 'b' (1, 16) (1, 17) + COLON ':' (1, 17) (1, 18) + NAME 'pass' (1, 19) (1, 23) """) self.check_tokenize('async.foo', """\ - ASYNC '' (1, 0) (1, 5) - DOT '' (1, 5) (1, 6) - NAME '' (1, 6) (1, 9) + ASYNC 'async' (1, 0) (1, 5) + DOT '.' (1, 5) (1, 6) + NAME 'foo' (1, 6) (1, 9) """) self.check_tokenize('async', """\ - ASYNC '' (1, 0) (1, 5) + ASYNC 'async' (1, 0) (1, 5) """) self.check_tokenize('async\n#comment\nawait', """\ - ASYNC '' (1, 0) (1, 5) + ASYNC 'async' (1, 0) (1, 5) NEWLINE '' (1, 5) (1, 5) - AWAIT '' (3, 0) (3, 5) + AWAIT 'await' (3, 0) (3, 5) """) self.check_tokenize('async\n...\nawait', """\ - ASYNC '' (1, 0) (1, 5) + ASYNC 'async' (1, 0) (1, 5) NEWLINE '' (1, 5) (1, 5) - ELLIPSIS '' (2, 0) (2, 3) + ELLIPSIS '...' (2, 0) (2, 3) NEWLINE '' (2, 3) (2, 3) - AWAIT '' (3, 0) (3, 5) + AWAIT 'await' (3, 0) (3, 5) """) self.check_tokenize('async\nawait', """\ - ASYNC '' (1, 0) (1, 5) + ASYNC 'async' (1, 0) (1, 5) NEWLINE '' (1, 5) (1, 5) - AWAIT '' (2, 0) (2, 5) + AWAIT 'await' (2, 0) (2, 5) """) self.check_tokenize('foo.async + 1', """\ - NAME '' (1, 0) (1, 3) - DOT '' (1, 3) (1, 4) - ASYNC '' (1, 4) (1, 9) - PLUS '' (1, 10) (1, 11) - NUMBER '' (1, 12) (1, 13) + NAME 'foo' (1, 0) (1, 3) + DOT '.' (1, 3) (1, 4) + ASYNC 'async' (1, 4) (1, 9) + PLUS '+' (1, 10) (1, 11) + NUMBER '1' (1, 12) (1, 13) """) self.check_tokenize('async def foo(): pass', """\ - ASYNC '' (1, 0) (1, 5) - NAME '' (1, 6) (1, 9) - NAME '' (1, 10) (1, 13) - LPAR '' (1, 13) (1, 14) - RPAR '' (1, 14) (1, 15) - COLON '' (1, 15) (1, 16) - NAME '' (1, 17) (1, 21) + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + LPAR '(' (1, 13) (1, 14) + RPAR ')' (1, 14) (1, 15) + COLON ':' (1, 15) (1, 16) + NAME 'pass' (1, 17) (1, 21) """) self.check_tokenize('''\ @@ -2348,70 +2323,70 @@ def foo(await): await async += 1 ''', """\ - ASYNC '' (1, 0) (1, 5) - NAME '' (1, 6) (1, 9) - NAME '' (1, 10) (1, 13) - LPAR '' (1, 13) (1, 14) - RPAR '' (1, 14) (1, 15) - COLON '' (1, 15) (1, 16) + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + LPAR '(' (1, 13) (1, 14) + RPAR ')' (1, 14) (1, 15) + COLON ':' (1, 15) (1, 16) NEWLINE '' (1, 16) (1, 16) INDENT '' (2, -1) (2, -1) - NAME '' (2, 2) (2, 5) - NAME '' (2, 6) (2, 9) - LPAR '' (2, 9) (2, 10) - AWAIT '' (2, 10) (2, 15) - RPAR '' (2, 15) (2, 16) - COLON '' (2, 16) (2, 17) + NAME 'def' (2, 2) (2, 5) + NAME 'foo' (2, 6) (2, 9) + LPAR '(' (2, 9) (2, 10) + AWAIT 'await' (2, 10) (2, 15) + RPAR ')' (2, 15) (2, 16) + COLON ':' (2, 16) (2, 17) NEWLINE '' (2, 17) (2, 17) INDENT '' (3, -1) (3, -1) - AWAIT '' (3, 4) (3, 9) - EQUAL '' (3, 10) (3, 11) - NUMBER '' (3, 12) (3, 13) + AWAIT 'await' (3, 4) (3, 9) + EQUAL '=' (3, 10) (3, 11) + NUMBER '1' (3, 12) (3, 13) NEWLINE '' (3, 13) (3, 13) DEDENT '' (4, -1) (4, -1) - NAME '' (4, 2) (4, 4) - NUMBER '' (4, 5) (4, 6) - COLON '' (4, 6) (4, 7) + NAME 'if' (4, 2) (4, 4) + NUMBER '1' (4, 5) (4, 6) + COLON ':' (4, 6) (4, 7) NEWLINE '' (4, 7) (4, 7) INDENT '' (5, -1) (5, -1) - AWAIT '' (5, 4) (5, 9) + AWAIT 'await' (5, 4) (5, 9) NEWLINE '' (5, 9) (5, 9) DEDENT '' (6, -1) (6, -1) DEDENT '' (6, -1) (6, -1) - ASYNC '' (6, 0) (6, 5) - PLUSEQUAL '' (6, 6) (6, 8) - NUMBER '' (6, 9) (6, 10) + ASYNC 'async' (6, 0) (6, 5) + PLUSEQUAL '+=' (6, 6) (6, 8) + NUMBER '1' (6, 9) (6, 10) NEWLINE '' (6, 10) (6, 10) """) self.check_tokenize('async def foo():\n async for i in 1: pass', """\ - ASYNC '' (1, 0) (1, 5) - NAME '' (1, 6) (1, 9) - NAME '' (1, 10) (1, 13) - LPAR '' (1, 13) (1, 14) - RPAR '' (1, 14) (1, 15) - COLON '' (1, 15) (1, 16) + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + LPAR '(' (1, 13) (1, 14) + RPAR ')' (1, 14) (1, 15) + COLON ':' (1, 15) (1, 16) NEWLINE '' (1, 16) (1, 16) INDENT '' (2, -1) (2, -1) - ASYNC '' (2, 2) (2, 7) - NAME '' (2, 8) (2, 11) - NAME '' (2, 12) (2, 13) - NAME '' (2, 14) (2, 16) - NUMBER '' (2, 17) (2, 18) - COLON '' (2, 18) (2, 19) - NAME '' (2, 20) (2, 24) + ASYNC 'async' (2, 2) (2, 7) + NAME 'for' (2, 8) (2, 11) + NAME 'i' (2, 12) (2, 13) + NAME 'in' (2, 14) (2, 16) + NUMBER '1' (2, 17) (2, 18) + COLON ':' (2, 18) (2, 19) + NAME 'pass' (2, 20) (2, 24) DEDENT '' (2, -1) (2, -1) """) self.check_tokenize('async def foo(async): await', """\ - ASYNC '' (1, 0) (1, 5) - NAME '' (1, 6) (1, 9) - NAME '' (1, 10) (1, 13) - LPAR '' (1, 13) (1, 14) - ASYNC '' (1, 14) (1, 19) - RPAR '' (1, 19) (1, 20) - COLON '' (1, 20) (1, 21) - AWAIT '' (1, 22) (1, 27) + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + LPAR '(' (1, 13) (1, 14) + ASYNC 'async' (1, 14) (1, 19) + RPAR ')' (1, 19) (1, 20) + COLON ':' (1, 20) (1, 21) + AWAIT 'await' (1, 22) (1, 27) """) self.check_tokenize('''\ @@ -2421,31 +2396,31 @@ def baz(): pass async def bar(): pass await = 2''', """\ - NAME '' (1, 0) (1, 3) - NAME '' (1, 4) (1, 5) - LPAR '' (1, 5) (1, 6) - RPAR '' (1, 6) (1, 7) - COLON '' (1, 7) (1, 8) + NAME 'def' (1, 0) (1, 3) + NAME 'f' (1, 4) (1, 5) + LPAR '(' (1, 5) (1, 6) + RPAR ')' (1, 6) (1, 7) + COLON ':' (1, 7) (1, 8) NEWLINE '' (1, 8) (1, 8) INDENT '' (3, -1) (3, -1) - NAME '' (3, 2) (3, 5) - NAME '' (3, 6) (3, 9) - LPAR '' (3, 9) (3, 10) - RPAR '' (3, 10) (3, 11) - COLON '' (3, 11) (3, 12) - NAME '' (3, 13) (3, 17) + NAME 'def' (3, 2) (3, 5) + NAME 'baz' (3, 6) (3, 9) + LPAR '(' (3, 9) (3, 10) + RPAR ')' (3, 10) (3, 11) + COLON ':' (3, 11) (3, 12) + NAME 'pass' (3, 13) (3, 17) NEWLINE '' (3, 17) (3, 17) - ASYNC '' (4, 2) (4, 7) - NAME '' (4, 8) (4, 11) - NAME '' (4, 12) (4, 15) - LPAR '' (4, 15) (4, 16) - RPAR '' (4, 16) (4, 17) - COLON '' (4, 17) (4, 18) - NAME '' (4, 19) (4, 23) + ASYNC 'async' (4, 2) (4, 7) + NAME 'def' (4, 8) (4, 11) + NAME 'bar' (4, 12) (4, 15) + LPAR '(' (4, 15) (4, 16) + RPAR ')' (4, 16) (4, 17) + COLON ':' (4, 17) (4, 18) + NAME 'pass' (4, 19) (4, 23) NEWLINE '' (4, 23) (4, 23) - AWAIT '' (6, 2) (6, 7) - EQUAL '' (6, 8) (6, 9) - NUMBER '' (6, 10) (6, 11) + AWAIT 'await' (6, 2) (6, 7) + EQUAL '=' (6, 8) (6, 9) + NUMBER '2' (6, 10) (6, 11) DEDENT '' (6, -1) (6, -1) """) @@ -2456,46 +2431,45 @@ def baz(): pass async def bar(): pass await = 2''', """\ - ASYNC '' (1, 0) (1, 5) - NAME '' (1, 6) (1, 9) - NAME '' (1, 10) (1, 11) - LPAR '' (1, 11) (1, 12) - RPAR '' (1, 12) (1, 13) - COLON '' (1, 13) (1, 14) + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'f' (1, 10) (1, 11) + LPAR '(' (1, 11) (1, 12) + RPAR ')' (1, 12) (1, 13) + COLON ':' (1, 13) (1, 14) NEWLINE '' (1, 14) (1, 14) INDENT '' (3, -1) (3, -1) - NAME '' (3, 2) (3, 5) - NAME '' (3, 6) (3, 9) - LPAR '' (3, 9) (3, 10) - RPAR '' (3, 10) (3, 11) - COLON '' (3, 11) (3, 12) - NAME '' (3, 13) (3, 17) + NAME 'def' (3, 2) (3, 5) + NAME 'baz' (3, 6) (3, 9) + LPAR '(' (3, 9) (3, 10) + RPAR ')' (3, 10) (3, 11) + COLON ':' (3, 11) (3, 12) + NAME 'pass' (3, 13) (3, 17) NEWLINE '' (3, 17) (3, 17) - ASYNC '' (4, 2) (4, 7) - NAME '' (4, 8) (4, 11) - NAME '' (4, 12) (4, 15) - LPAR '' (4, 15) (4, 16) - RPAR '' (4, 16) (4, 17) - COLON '' (4, 17) (4, 18) - NAME '' (4, 19) (4, 23) + ASYNC 'async' (4, 2) (4, 7) + NAME 'def' (4, 8) (4, 11) + NAME 'bar' (4, 12) (4, 15) + LPAR '(' (4, 15) (4, 16) + RPAR ')' (4, 16) (4, 17) + COLON ':' (4, 17) (4, 18) + NAME 'pass' (4, 19) (4, 23) NEWLINE '' (4, 23) (4, 23) - AWAIT '' (6, 2) (6, 7) - EQUAL '' (6, 8) (6, 9) - NUMBER '' (6, 10) (6, 11) + AWAIT 'await' (6, 2) (6, 7) + EQUAL '=' (6, 8) (6, 9) + NUMBER '2' (6, 10) (6, 11) DEDENT '' (6, -1) (6, -1) """) - def test_unicode(self): self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\ - NAME '' (1, 0) (1, 6) - EQUAL '' (1, 7) (1, 8) - STRING '' (1, 9) (1, 18) + NAME 'Örter' (1, 0) (1, 6) + EQUAL '=' (1, 7) (1, 8) + STRING "u'places'" (1, 9) (1, 18) NEWLINE '' (1, 18) (1, 18) - NAME '' (2, 0) (2, 5) - EQUAL '' (2, 6) (2, 7) - STRING '' (2, 8) (2, 16) + NAME 'grün' (2, 0) (2, 5) + EQUAL '=' (2, 6) (2, 7) + STRING "U'green'" (2, 8) (2, 16) """) def test_invalid_syntax(self): diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 3c41b05af74a37..b9fb1693ce117e 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -106,6 +106,7 @@ tokenizeriter_dealloc(tokenizeriterobject* it) PyTypeObject* tp = Py_TYPE(it); PyTokenizer_Free(it->tok); tp->tp_free(it); + Py_DECREF(tp); } static PyType_Slot tokenizeriter_slots[] = {