Skip to content

Commit

Permalink
Performance optimization for tokenizing data rows.
Browse files Browse the repository at this point in the history
Only affects parsing with `data_only=True`.
  • Loading branch information
pekkaklarck committed Apr 21, 2023
1 parent 479065d commit e82b154
Showing 1 changed file with 8 additions and 10 deletions.
18 changes: 8 additions & 10 deletions src/robot/parsing/lexer/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def _split_from_pipes(self, line) -> 'Iterator[tuple[str, bool]]':
yield rest, True

def _cleanup_tokens(self, tokens, data_only):
has_data, continues = self._handle_comments_and_continuation(tokens)
has_data, has_comments, continues \
= self._handle_comments_and_continuation(tokens)
self._remove_trailing_empty(tokens)
if continues:
self._remove_leading_empty(tokens)
Expand All @@ -82,19 +83,19 @@ def _cleanup_tokens(self, tokens, data_only):
starts_new = False
else:
starts_new = has_data
if data_only:
tokens = self._remove_non_data(tokens)
if data_only and (has_comments or continues):
tokens = [t for t in tokens if t.type is None]
return tokens, starts_new

def _handle_comments_and_continuation(self, tokens):
has_data = False
continues = False
commented = False
for token in tokens:
continues = False
for index, token in enumerate(tokens):
if token.type is None:
# lstrip needed to strip possible leading space from first token.
# Other leading/trailing spaces have been consumed as separators.
value = token.value.lstrip()
value = token.value if index else token.value.lstrip()
if commented:
token.type = Token.COMMENT
elif value:
Expand All @@ -107,7 +108,7 @@ def _handle_comments_and_continuation(self, tokens):
continues = True
else:
has_data = True
return has_data, continues
return has_data, commented, continues

def _remove_trailing_empty(self, tokens):
for token in reversed(tokens):
Expand All @@ -133,6 +134,3 @@ def _find_continuation(self, tokens):
for token in tokens:
if token.type == Token.CONTINUATION:
return token

def _remove_non_data(self, tokens):
return [t for t in tokens if t.type is None]

0 comments on commit e82b154

Please sign in to comment.