Add type hints to parsing API and some other places.

get_tokens, get_model, and their resource and init file variants now have type hints (#4740). Parsing model itself still needs type hints.
robotframework · Apr 21, 2023 · 479065d · 479065d
1 parent bcb0e31
commit 479065d
Show file tree

Hide file tree

Showing 8 changed files with 112 additions and 94 deletions.
diff --git a/src/robot/model/namepatterns.py b/src/robot/model/namepatterns.py
@@ -13,14 +13,14 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-from typing import Iterable, Iterator, Sequence
+from typing import Iterable, Iterator
 
 from robot.utils import MultiMatcher
 
 
 class NamePatterns(Iterable[str]):
 
-    def __init__(self, patterns: Sequence[str] = ()):
+    def __init__(self, patterns: Iterator[str] = ()):
         self.matcher = MultiMatcher(patterns, ignore='_')
 
     def match(self, name: str, longname: 'str|None' = None) -> bool:

diff --git a/src/robot/parsing/lexer/lexer.py b/src/robot/parsing/lexer/lexer.py
@@ -13,18 +13,23 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
+from collections.abc import Iterator
 from itertools import chain
 
+from robot.conf import LanguagesLike
 from robot.errors import DataError
-from robot.utils import get_error_message, FileReader
+from robot.utils import get_error_message, FileReader, Source
 
 from .blocklexers import FileLexer
-from .context import InitFileContext, SuiteFileContext, ResourceFileContext
+from .context import (InitFileContext, LexingContext, SuiteFileContext,
+                      ResourceFileContext)
 from .tokenizer import Tokenizer
 from .tokens import EOS, END, Token
 
 
-def get_tokens(source, data_only=False, tokenize_variables=False, lang=None):
+def get_tokens(source: Source, data_only: bool = False,
+               tokenize_variables: bool = False,
+               lang: LanguagesLike = None) -> 'Iterator[Token]':
     """Parses the given source to tokens.
 
     :param source: The source where to read the data. Can be a path to
@@ -40,7 +45,7 @@ def get_tokens(source, data_only=False, tokenize_variables=False, lang=None):
         method for details.
     :param lang: Additional languages to be supported during parsing.
         Can be a string matching any of the supported language codes or names,
-        an initialized :class:`~robot.conf.languages.Language` subsclass,
+        an initialized :class:`~robot.conf.languages.Language` subclass,
         a list containing such strings or instances, or a
         :class:`~robot.conf.languages.Languages` instance.
 
@@ -52,7 +57,9 @@ def get_tokens(source, data_only=False, tokenize_variables=False, lang=None):
     return lexer.get_tokens()
 
 
-def get_resource_tokens(source, data_only=False, tokenize_variables=False, lang=None):
+def get_resource_tokens(source: Source, data_only: bool = False,
+                        tokenize_variables: bool = False,
+                        lang: LanguagesLike = None) -> 'Iterator[Token]':
     """Parses the given source to resource file tokens.
 
     Same as :func:`get_tokens` otherwise, but the source is considered to be
@@ -63,7 +70,9 @@ def get_resource_tokens(source, data_only=False, tokenize_variables=False, lang=
     return lexer.get_tokens()
 
 
-def get_init_tokens(source, data_only=False, tokenize_variables=False, lang=None):
+def get_init_tokens(source: Source, data_only: bool = False,
+                    tokenize_variables: bool = False,
+                    lang: LanguagesLike = None) -> 'Iterator[Token]':
     """Parses the given source to init file tokens.
 
     Same as :func:`get_tokens` otherwise, but the source is considered to be
@@ -77,15 +86,15 @@ def get_init_tokens(source, data_only=False, tokenize_variables=False, lang=None
 
 class Lexer:
 
-    def __init__(self, ctx, data_only=False, tokenize_variables=False):
+    def __init__(self, ctx: LexingContext, data_only: bool = False,
+                 tokenize_variables: bool = False):
         self.lexer = FileLexer(ctx)
         self.data_only = data_only
         self.tokenize_variables = tokenize_variables
-        self.statements = []
+        self.statements: 'list[list[Token]]' = []
 
-    def input(self, source):
-        for statement in Tokenizer().tokenize(self._read(source),
-                                              self.data_only):
+    def input(self, source: Source):
+        for statement in Tokenizer().tokenize(self._read(source), self.data_only):
             # Store all tokens but pass only data tokens to lexer.
             self.statements.append(statement)
             if self.data_only:
@@ -96,27 +105,28 @@ def input(self, source):
             if data:
                 self.lexer.input(data)
 
-    def _read(self, source):
+    def _read(self, source: Source) -> str:
         try:
             with FileReader(source, accept_text=True) as reader:
                 return reader.read()
         except Exception:
             raise DataError(get_error_message())
 
-    def get_tokens(self):
+    def get_tokens(self) -> 'Iterator[Token]':
         self.lexer.lex()
-        statements = self.statements
-        if not self.data_only:
+        if self.data_only:
+            statements = self.statements
+        else:
             statements = chain.from_iterable(
-                self._split_trailing_commented_and_empty_lines(s)
-                for s in statements
+                self._split_trailing_commented_and_empty_lines(stmt)
+                for stmt in self.statements
             )
         tokens = self._get_tokens(statements)
         if self.tokenize_variables:
             tokens = self._tokenize_variables(tokens)
         return tokens
 
-    def _get_tokens(self, statements):
+    def _get_tokens(self, statements: 'list[list[Token]]') -> 'Iterator[Token]':
         if self.data_only:
             ignored_types = {None, Token.COMMENT_HEADER, Token.COMMENT}
         else:
@@ -143,7 +153,8 @@ def _get_tokens(self, statements):
                 yield END.from_token(last, virtual=True)
                 yield EOS.from_token(last)
 
-    def _split_trailing_commented_and_empty_lines(self, statement):
+    def _split_trailing_commented_and_empty_lines(self, statement: 'list[Token]') \
+            -> 'list[list[Token]]':
         lines = self._split_to_lines(statement)
         commented_or_empty = []
         for line in reversed(lines):
@@ -156,7 +167,7 @@ def _split_trailing_commented_and_empty_lines(self, statement):
         statement = list(chain.from_iterable(lines))
         return [statement] + list(reversed(commented_or_empty))
 
-    def _split_to_lines(self, statement):
+    def _split_to_lines(self, statement: 'list[Token]') -> 'list[list[Token]]':
         lines = []
         current = []
         for token in statement:
@@ -168,15 +179,14 @@ def _split_to_lines(self, statement):
             lines.append(current)
         return lines
 
-    def _is_commented_or_empty(self, line):
+    def _is_commented_or_empty(self, line: 'list[Token]') -> bool:
         separator_or_ignore = (Token.SEPARATOR, None)
         comment_or_eol = (Token.COMMENT, Token.EOL)
         for token in line:
             if token.type not in separator_or_ignore:
                 return token.type in comment_or_eol
         return False
 
-    def _tokenize_variables(self, tokens):
+    def _tokenize_variables(self, tokens: 'Iterator[Token]') -> 'Iterator[Token]':
         for token in tokens:
-            for t in token.tokenize_variables():
-                yield t
+            yield from token.tokenize_variables()
diff --git a/src/robot/parsing/lexer/tokenizer.py b/src/robot/parsing/lexer/tokenizer.py
@@ -14,6 +14,7 @@
 #  limitations under the License.
 
 import re
+from collections.abc import Iterator
 
 from .tokens import Token
 
@@ -22,7 +23,7 @@ class Tokenizer:
     _space_splitter = re.compile(r'(\s{2,}|\t)', re.UNICODE)
     _pipe_splitter = re.compile(r'((?:\A|\s+)\|(?:\s+|\Z))', re.UNICODE)
 
-    def tokenize(self, data, data_only=False):
+    def tokenize(self, data: str, data_only: bool = False) -> 'Iterator[list[Token]]':
         current = []
         for lineno, line in enumerate(data.splitlines(not data_only), start=1):
             tokens = self._tokenize_line(line, lineno, not data_only)
@@ -35,7 +36,7 @@ def tokenize(self, data, data_only=False):
                 current.extend(tokens)
         yield current
 
-    def _tokenize_line(self, line, lineno, include_separators=True):
+    def _tokenize_line(self, line: str, lineno: int, include_separators: bool):
         # Performance optimized code.
         tokens = []
         append = tokens.append
@@ -55,13 +56,13 @@ def _tokenize_line(self, line, lineno, include_separators=True):
             append(Token(Token.EOL, trailing_whitespace, lineno, offset))
         return tokens
 
-    def _split_from_spaces(self, line):
+    def _split_from_spaces(self, line: str) -> 'Iterator[tuple[str, bool]]':
         is_data = True
         for value in self._space_splitter.split(line):
             yield value, is_data
             is_data = not is_data
 
-    def _split_from_pipes(self, line):
+    def _split_from_pipes(self, line) -> 'Iterator[tuple[str, bool]]':
         splitter = self._pipe_splitter
         _, separator, rest = splitter.split(line, 1)
         yield separator, False

diff --git a/src/robot/parsing/lexer/tokens.py b/src/robot/parsing/lexer/tokens.py
@@ -13,6 +13,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
+from collections.abc import Iterator
+
 from robot.variables import VariableIterator
 
 
@@ -26,16 +28,13 @@ class Token:
 
     Token types are declared as class attributes such as :attr:`SETTING_HEADER`
     and :attr:`EOL`. Values of these constants have changed slightly in Robot
-    Framework 4.0 and they may change again in the future. It is thus safer
+    Framework 4.0, and they may change again in the future. It is thus safer
     to use the constants, not their values, when types are needed. For example,
     use ``Token(Token.EOL)`` instead of ``Token('EOL')`` and
     ``token.type == Token.EOL`` instead of ``token.type == 'EOL'``.
 
-    If :attr:`value` is not given when :class:`Token` is initialized and
-    :attr:`type` is :attr:`IF`, :attr:`ELSE_IF`, :attr:`ELSE`, :attr:`FOR`,
-    :attr:`END`, :attr:`WITH_NAME` or :attr:`CONTINUATION`, the value is
-    automatically set to the correct marker value like ``'IF'`` or ``'ELSE IF'``.
-    If :attr:`type` is :attr:`EOL` in this case, the value is set to ``'\\n'``.
+    If :attr:`value` is not given and :attr:`type` is a special marker like
+    :attr:`IF` or `:attr:`EOL`, the value is set automatically.
     """
 
     SETTING_HEADER = 'SETTING HEADER'
@@ -155,11 +154,11 @@ class Token:
         TESTCASE_NAME,
         KEYWORD_NAME
     ))
-
     __slots__ = ['type', 'value', 'lineno', 'col_offset', 'error',
                  '_add_eos_before', '_add_eos_after']
 
-    def __init__(self, type=None, value=None, lineno=-1, col_offset=-1, error=None):
+    def __init__(self, type: 'str|None' = None, value: 'str|None' = None,
+                 lineno: int = -1, col_offset: int = -1, error: 'str|None' = None):
         self.type = type
         if value is None:
             value = {
@@ -179,21 +178,21 @@ def __init__(self, type=None, value=None, lineno=-1, col_offset=-1, error=None):
         self._add_eos_after = False
 
     @property
-    def end_col_offset(self):
+    def end_col_offset(self) -> int:
         if self.col_offset == -1:
             return -1
         return self.col_offset + len(self.value)
 
-    def set_error(self, error):
+    def set_error(self, error: str):
         self.type = Token.ERROR
         self.error = error
 
-    def tokenize_variables(self):
+    def tokenize_variables(self) -> 'Iterator[Token]':
         """Tokenizes possible variables in token value.
 
         Yields the token itself if the token does not allow variables (see
         :attr:`Token.ALLOW_VARIABLES`) or its value does not contain
-        variables. Otherwise yields variable tokens as well as tokens
+        variables. Otherwise, yields variable tokens as well as tokens
         before, after, or between variables so that they have the same
         type as the original token.
         """
@@ -220,16 +219,15 @@ def _tokenize_variables(self, variables):
         if remaining:
             yield Token(self.type, remaining, lineno, col_offset)
 
-    def __str__(self):
+    def __str__(self) -> str:
         return self.value
 
-    def __repr__(self):
-        type_ = self.type.replace(' ', '_') if self.type else 'None'
-        error = '' if not self.error else ', %r' % self.error
-        return 'Token(%s, %r, %s, %s%s)' % (type_, self.value, self.lineno,
-                                            self.col_offset, error)
+    def __repr__(self) -> str:
+        typ = self.type.replace(' ', '_') if self.type else 'None'
+        error = '' if not self.error else f', {self.error!r}'
+        return f'Token({typ}, {self.value!r}, {self.lineno}, {self.col_offset}{error})'
 
-    def __eq__(self, other):
+    def __eq__(self, other) -> bool:
         return (isinstance(other, Token)
                 and self.type == other.type
                 and self.value == other.value
@@ -242,13 +240,13 @@ class EOS(Token):
     """Token representing end of a statement."""
     __slots__ = []
 
-    def __init__(self, lineno=-1, col_offset=-1):
+    def __init__(self, lineno: int = -1, col_offset: int = -1):
         super().__init__(Token.EOS, '', lineno, col_offset)
 
     @classmethod
-    def from_token(cls, token, before=False):
+    def from_token(cls, token: Token, before: bool = False) -> 'EOS':
         col_offset = token.col_offset if before else token.end_col_offset
-        return EOS(token.lineno, col_offset)
+        return cls(token.lineno, col_offset)
 
 
 class END(Token):
@@ -259,10 +257,10 @@ class END(Token):
     """
     __slots__ = []
 
-    def __init__(self, lineno=-1, col_offset=-1, virtual=False):
+    def __init__(self, lineno: int = -1, col_offset: int = -1, virtual: bool = False):
         value = 'END' if not virtual else ''
         super().__init__(Token.END, value, lineno, col_offset)
 
     @classmethod
-    def from_token(cls, token, virtual=False):
-        return END(token.lineno, token.end_col_offset, virtual)
+    def from_token(cls, token: Token, virtual: bool = False) -> 'END':
+        return cls(token.lineno, token.end_col_offset, virtual)
diff --git a/src/robot/parsing/parser/parser.py b/src/robot/parsing/parser/parser.py
@@ -13,14 +13,18 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-from ..lexer import Token, get_tokens, get_resource_tokens, get_init_tokens
-from ..model import Statement, ModelVisitor
+from robot.conf import LanguagesLike
+from robot.utils import Source
+
+from ..lexer import get_init_tokens, get_resource_tokens, get_tokens, Token
+from ..model import File, ModelVisitor, Statement
 
 from .fileparser import FileParser
 
 
-def get_model(source, data_only=False, curdir=None, lang=None):
-    """Parses the given source to a model represented as an AST.
+def get_model(source: Source, data_only: bool = False, curdir: 'str|None' = None,
+              lang: LanguagesLike = None) -> File:
+    """Parses the given source into a model represented as an AST.
 
     How to use the model is explained more thoroughly in the general
     documentation of the :mod:`robot.parsing` module.
@@ -36,11 +40,11 @@ def get_model(source, data_only=False, curdir=None, lang=None):
     :param curdir: Directory where the source file exists. This path is used
         to set the value of the built-in ``${CURDIR}`` variable during parsing.
         When not given, the variable is left as-is. Should only be given
-        only if the model will be executed afterwards. If the model is saved
+        only if the model will be executed afterward. If the model is saved
         back to disk, resolving ``${CURDIR}`` is typically not a good idea.
     :param lang: Additional languages to be supported during parsing.
         Can be a string matching any of the supported language codes or names,
-        an initialized :class:`~robot.conf.languages.Language` subsclass,
+        an initialized :class:`~robot.conf.languages.Language` subclass,
         a list containing such strings or instances, or a
         :class:`~robot.conf.languages.Languages` instance.
 
@@ -50,19 +54,21 @@ def get_model(source, data_only=False, curdir=None, lang=None):
     return _get_model(get_tokens, source, data_only, curdir, lang)
 
 
-def get_resource_model(source, data_only=False, curdir=None, lang=None):
-    """Parses the given source to a resource file model.
+def get_resource_model(source: Source, data_only: bool = False,
+                       curdir: 'str|None' = None, lang: LanguagesLike = None) -> File:
+    """Parses the given source into a resource file model.
 
-    Otherwise same as :func:`get_model` but the source is considered to be
+    Same as :func:`get_model` otherwise, but the source is considered to be
     a resource file. This affects, for example, what settings are valid.
     """
     return _get_model(get_resource_tokens, source, data_only, curdir, lang)
 
 
-def get_init_model(source, data_only=False, curdir=None, lang=None):
-    """Parses the given source to a init file model.
+def get_init_model(source: Source, data_only: bool = False, curdir: 'str|None' = None,
+                   lang: LanguagesLike = None) -> File:
+    """Parses the given source into an init file model.
 
-    Otherwise same as :func:`get_model` but the source is considered to be
+    Same as :func:`get_model` otherwise, but the source is considered to be
     a suite initialization file. This affects, for example, what settings are
     valid.
     """