Skip to content

Commit

Permalink
Add type hints to parsing API and some other places.
Browse files Browse the repository at this point in the history
get_tokens, get_model, and their resource and init file variants now
have type hints (#4740). Parsing model itself still needs type hints.
  • Loading branch information
pekkaklarck committed Apr 21, 2023
1 parent bcb0e31 commit 479065d
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 94 deletions.
4 changes: 2 additions & 2 deletions src/robot/model/namepatterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Iterable, Iterator, Sequence
from typing import Iterable, Iterator

from robot.utils import MultiMatcher


class NamePatterns(Iterable[str]):

def __init__(self, patterns: Sequence[str] = ()):
def __init__(self, patterns: Iterator[str] = ()):
self.matcher = MultiMatcher(patterns, ignore='_')

def match(self, name: str, longname: 'str|None' = None) -> bool:
Expand Down
58 changes: 34 additions & 24 deletions src/robot/parsing/lexer/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from collections.abc import Iterator
from itertools import chain

from robot.conf import LanguagesLike
from robot.errors import DataError
from robot.utils import get_error_message, FileReader
from robot.utils import get_error_message, FileReader, Source

from .blocklexers import FileLexer
from .context import InitFileContext, SuiteFileContext, ResourceFileContext
from .context import (InitFileContext, LexingContext, SuiteFileContext,
ResourceFileContext)
from .tokenizer import Tokenizer
from .tokens import EOS, END, Token


def get_tokens(source, data_only=False, tokenize_variables=False, lang=None):
def get_tokens(source: Source, data_only: bool = False,
tokenize_variables: bool = False,
lang: LanguagesLike = None) -> 'Iterator[Token]':
"""Parses the given source to tokens.
:param source: The source where to read the data. Can be a path to
Expand All @@ -40,7 +45,7 @@ def get_tokens(source, data_only=False, tokenize_variables=False, lang=None):
method for details.
:param lang: Additional languages to be supported during parsing.
Can be a string matching any of the supported language codes or names,
an initialized :class:`~robot.conf.languages.Language` subsclass,
an initialized :class:`~robot.conf.languages.Language` subclass,
a list containing such strings or instances, or a
:class:`~robot.conf.languages.Languages` instance.
Expand All @@ -52,7 +57,9 @@ def get_tokens(source, data_only=False, tokenize_variables=False, lang=None):
return lexer.get_tokens()


def get_resource_tokens(source, data_only=False, tokenize_variables=False, lang=None):
def get_resource_tokens(source: Source, data_only: bool = False,
tokenize_variables: bool = False,
lang: LanguagesLike = None) -> 'Iterator[Token]':
"""Parses the given source to resource file tokens.
Same as :func:`get_tokens` otherwise, but the source is considered to be
Expand All @@ -63,7 +70,9 @@ def get_resource_tokens(source, data_only=False, tokenize_variables=False, lang=
return lexer.get_tokens()


def get_init_tokens(source, data_only=False, tokenize_variables=False, lang=None):
def get_init_tokens(source: Source, data_only: bool = False,
tokenize_variables: bool = False,
lang: LanguagesLike = None) -> 'Iterator[Token]':
"""Parses the given source to init file tokens.
Same as :func:`get_tokens` otherwise, but the source is considered to be
Expand All @@ -77,15 +86,15 @@ def get_init_tokens(source, data_only=False, tokenize_variables=False, lang=None

class Lexer:

def __init__(self, ctx, data_only=False, tokenize_variables=False):
def __init__(self, ctx: LexingContext, data_only: bool = False,
tokenize_variables: bool = False):
self.lexer = FileLexer(ctx)
self.data_only = data_only
self.tokenize_variables = tokenize_variables
self.statements = []
self.statements: 'list[list[Token]]' = []

def input(self, source):
for statement in Tokenizer().tokenize(self._read(source),
self.data_only):
def input(self, source: Source):
for statement in Tokenizer().tokenize(self._read(source), self.data_only):
# Store all tokens but pass only data tokens to lexer.
self.statements.append(statement)
if self.data_only:
Expand All @@ -96,27 +105,28 @@ def input(self, source):
if data:
self.lexer.input(data)

def _read(self, source):
def _read(self, source: Source) -> str:
try:
with FileReader(source, accept_text=True) as reader:
return reader.read()
except Exception:
raise DataError(get_error_message())

def get_tokens(self):
def get_tokens(self) -> 'Iterator[Token]':
self.lexer.lex()
statements = self.statements
if not self.data_only:
if self.data_only:
statements = self.statements
else:
statements = chain.from_iterable(
self._split_trailing_commented_and_empty_lines(s)
for s in statements
self._split_trailing_commented_and_empty_lines(stmt)
for stmt in self.statements
)
tokens = self._get_tokens(statements)
if self.tokenize_variables:
tokens = self._tokenize_variables(tokens)
return tokens

def _get_tokens(self, statements):
def _get_tokens(self, statements: 'list[list[Token]]') -> 'Iterator[Token]':
if self.data_only:
ignored_types = {None, Token.COMMENT_HEADER, Token.COMMENT}
else:
Expand All @@ -143,7 +153,8 @@ def _get_tokens(self, statements):
yield END.from_token(last, virtual=True)
yield EOS.from_token(last)

def _split_trailing_commented_and_empty_lines(self, statement):
def _split_trailing_commented_and_empty_lines(self, statement: 'list[Token]') \
-> 'list[list[Token]]':
lines = self._split_to_lines(statement)
commented_or_empty = []
for line in reversed(lines):
Expand All @@ -156,7 +167,7 @@ def _split_trailing_commented_and_empty_lines(self, statement):
statement = list(chain.from_iterable(lines))
return [statement] + list(reversed(commented_or_empty))

def _split_to_lines(self, statement):
def _split_to_lines(self, statement: 'list[Token]') -> 'list[list[Token]]':
lines = []
current = []
for token in statement:
Expand All @@ -168,15 +179,14 @@ def _split_to_lines(self, statement):
lines.append(current)
return lines

def _is_commented_or_empty(self, line):
def _is_commented_or_empty(self, line: 'list[Token]') -> bool:
separator_or_ignore = (Token.SEPARATOR, None)
comment_or_eol = (Token.COMMENT, Token.EOL)
for token in line:
if token.type not in separator_or_ignore:
return token.type in comment_or_eol
return False

def _tokenize_variables(self, tokens):
def _tokenize_variables(self, tokens: 'Iterator[Token]') -> 'Iterator[Token]':
for token in tokens:
for t in token.tokenize_variables():
yield t
yield from token.tokenize_variables()
9 changes: 5 additions & 4 deletions src/robot/parsing/lexer/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.

import re
from collections.abc import Iterator

from .tokens import Token

Expand All @@ -22,7 +23,7 @@ class Tokenizer:
_space_splitter = re.compile(r'(\s{2,}|\t)', re.UNICODE)
_pipe_splitter = re.compile(r'((?:\A|\s+)\|(?:\s+|\Z))', re.UNICODE)

def tokenize(self, data, data_only=False):
def tokenize(self, data: str, data_only: bool = False) -> 'Iterator[list[Token]]':
current = []
for lineno, line in enumerate(data.splitlines(not data_only), start=1):
tokens = self._tokenize_line(line, lineno, not data_only)
Expand All @@ -35,7 +36,7 @@ def tokenize(self, data, data_only=False):
current.extend(tokens)
yield current

def _tokenize_line(self, line, lineno, include_separators=True):
def _tokenize_line(self, line: str, lineno: int, include_separators: bool):
# Performance optimized code.
tokens = []
append = tokens.append
Expand All @@ -55,13 +56,13 @@ def _tokenize_line(self, line, lineno, include_separators=True):
append(Token(Token.EOL, trailing_whitespace, lineno, offset))
return tokens

def _split_from_spaces(self, line):
def _split_from_spaces(self, line: str) -> 'Iterator[tuple[str, bool]]':
is_data = True
for value in self._space_splitter.split(line):
yield value, is_data
is_data = not is_data

def _split_from_pipes(self, line):
def _split_from_pipes(self, line) -> 'Iterator[tuple[str, bool]]':
splitter = self._pipe_splitter
_, separator, rest = splitter.split(line, 1)
yield separator, False
Expand Down
48 changes: 23 additions & 25 deletions src/robot/parsing/lexer/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from collections.abc import Iterator

from robot.variables import VariableIterator


Expand All @@ -26,16 +28,13 @@ class Token:
Token types are declared as class attributes such as :attr:`SETTING_HEADER`
and :attr:`EOL`. Values of these constants have changed slightly in Robot
Framework 4.0 and they may change again in the future. It is thus safer
Framework 4.0, and they may change again in the future. It is thus safer
to use the constants, not their values, when types are needed. For example,
use ``Token(Token.EOL)`` instead of ``Token('EOL')`` and
``token.type == Token.EOL`` instead of ``token.type == 'EOL'``.
If :attr:`value` is not given when :class:`Token` is initialized and
:attr:`type` is :attr:`IF`, :attr:`ELSE_IF`, :attr:`ELSE`, :attr:`FOR`,
:attr:`END`, :attr:`WITH_NAME` or :attr:`CONTINUATION`, the value is
automatically set to the correct marker value like ``'IF'`` or ``'ELSE IF'``.
If :attr:`type` is :attr:`EOL` in this case, the value is set to ``'\\n'``.
If :attr:`value` is not given and :attr:`type` is a special marker like
:attr:`IF` or `:attr:`EOL`, the value is set automatically.
"""

SETTING_HEADER = 'SETTING HEADER'
Expand Down Expand Up @@ -155,11 +154,11 @@ class Token:
TESTCASE_NAME,
KEYWORD_NAME
))

__slots__ = ['type', 'value', 'lineno', 'col_offset', 'error',
'_add_eos_before', '_add_eos_after']

def __init__(self, type=None, value=None, lineno=-1, col_offset=-1, error=None):
def __init__(self, type: 'str|None' = None, value: 'str|None' = None,
lineno: int = -1, col_offset: int = -1, error: 'str|None' = None):
self.type = type
if value is None:
value = {
Expand All @@ -179,21 +178,21 @@ def __init__(self, type=None, value=None, lineno=-1, col_offset=-1, error=None):
self._add_eos_after = False

@property
def end_col_offset(self):
def end_col_offset(self) -> int:
if self.col_offset == -1:
return -1
return self.col_offset + len(self.value)

def set_error(self, error):
def set_error(self, error: str):
self.type = Token.ERROR
self.error = error

def tokenize_variables(self):
def tokenize_variables(self) -> 'Iterator[Token]':
"""Tokenizes possible variables in token value.
Yields the token itself if the token does not allow variables (see
:attr:`Token.ALLOW_VARIABLES`) or its value does not contain
variables. Otherwise yields variable tokens as well as tokens
variables. Otherwise, yields variable tokens as well as tokens
before, after, or between variables so that they have the same
type as the original token.
"""
Expand All @@ -220,16 +219,15 @@ def _tokenize_variables(self, variables):
if remaining:
yield Token(self.type, remaining, lineno, col_offset)

def __str__(self):
def __str__(self) -> str:
return self.value

def __repr__(self):
type_ = self.type.replace(' ', '_') if self.type else 'None'
error = '' if not self.error else ', %r' % self.error
return 'Token(%s, %r, %s, %s%s)' % (type_, self.value, self.lineno,
self.col_offset, error)
def __repr__(self) -> str:
typ = self.type.replace(' ', '_') if self.type else 'None'
error = '' if not self.error else f', {self.error!r}'
return f'Token({typ}, {self.value!r}, {self.lineno}, {self.col_offset}{error})'

def __eq__(self, other):
def __eq__(self, other) -> bool:
return (isinstance(other, Token)
and self.type == other.type
and self.value == other.value
Expand All @@ -242,13 +240,13 @@ class EOS(Token):
"""Token representing end of a statement."""
__slots__ = []

def __init__(self, lineno=-1, col_offset=-1):
def __init__(self, lineno: int = -1, col_offset: int = -1):
super().__init__(Token.EOS, '', lineno, col_offset)

@classmethod
def from_token(cls, token, before=False):
def from_token(cls, token: Token, before: bool = False) -> 'EOS':
col_offset = token.col_offset if before else token.end_col_offset
return EOS(token.lineno, col_offset)
return cls(token.lineno, col_offset)


class END(Token):
Expand All @@ -259,10 +257,10 @@ class END(Token):
"""
__slots__ = []

def __init__(self, lineno=-1, col_offset=-1, virtual=False):
def __init__(self, lineno: int = -1, col_offset: int = -1, virtual: bool = False):
value = 'END' if not virtual else ''
super().__init__(Token.END, value, lineno, col_offset)

@classmethod
def from_token(cls, token, virtual=False):
return END(token.lineno, token.end_col_offset, virtual)
def from_token(cls, token: Token, virtual: bool = False) -> 'END':
return cls(token.lineno, token.end_col_offset, virtual)
30 changes: 18 additions & 12 deletions src/robot/parsing/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ..lexer import Token, get_tokens, get_resource_tokens, get_init_tokens
from ..model import Statement, ModelVisitor
from robot.conf import LanguagesLike
from robot.utils import Source

from ..lexer import get_init_tokens, get_resource_tokens, get_tokens, Token
from ..model import File, ModelVisitor, Statement

from .fileparser import FileParser


def get_model(source, data_only=False, curdir=None, lang=None):
"""Parses the given source to a model represented as an AST.
def get_model(source: Source, data_only: bool = False, curdir: 'str|None' = None,
lang: LanguagesLike = None) -> File:
"""Parses the given source into a model represented as an AST.
How to use the model is explained more thoroughly in the general
documentation of the :mod:`robot.parsing` module.
Expand All @@ -36,11 +40,11 @@ def get_model(source, data_only=False, curdir=None, lang=None):
:param curdir: Directory where the source file exists. This path is used
to set the value of the built-in ``${CURDIR}`` variable during parsing.
When not given, the variable is left as-is. Should only be given
only if the model will be executed afterwards. If the model is saved
only if the model will be executed afterward. If the model is saved
back to disk, resolving ``${CURDIR}`` is typically not a good idea.
:param lang: Additional languages to be supported during parsing.
Can be a string matching any of the supported language codes or names,
an initialized :class:`~robot.conf.languages.Language` subsclass,
an initialized :class:`~robot.conf.languages.Language` subclass,
a list containing such strings or instances, or a
:class:`~robot.conf.languages.Languages` instance.
Expand All @@ -50,19 +54,21 @@ def get_model(source, data_only=False, curdir=None, lang=None):
return _get_model(get_tokens, source, data_only, curdir, lang)


def get_resource_model(source, data_only=False, curdir=None, lang=None):
"""Parses the given source to a resource file model.
def get_resource_model(source: Source, data_only: bool = False,
curdir: 'str|None' = None, lang: LanguagesLike = None) -> File:
"""Parses the given source into a resource file model.
Otherwise same as :func:`get_model` but the source is considered to be
Same as :func:`get_model` otherwise, but the source is considered to be
a resource file. This affects, for example, what settings are valid.
"""
return _get_model(get_resource_tokens, source, data_only, curdir, lang)


def get_init_model(source, data_only=False, curdir=None, lang=None):
"""Parses the given source to a init file model.
def get_init_model(source: Source, data_only: bool = False, curdir: 'str|None' = None,
lang: LanguagesLike = None) -> File:
"""Parses the given source into an init file model.
Otherwise same as :func:`get_model` but the source is considered to be
Same as :func:`get_model` otherwise, but the source is considered to be
a suite initialization file. This affects, for example, what settings are
valid.
"""
Expand Down

0 comments on commit 479065d

Please sign in to comment.