Skip to content

Commit

Permalink
Merge pull request #2582 from jeanas/roundtrip-test
Browse files Browse the repository at this point in the history
tests: Check that golden tokens add up to the input
  • Loading branch information
Anteru committed Nov 14, 2023
2 parents 7536da3 + 2403507 commit f57a2dd
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 21 deletions.
33 changes: 20 additions & 13 deletions pygments/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,20 +199,9 @@ def analyse_text(text):
it's the same as if the return values was ``0.0``.
"""

def get_tokens(self, text, unfiltered=False):
"""
This method is the basic interface of a lexer. It is called by
the `highlight()` function. It must process the text and return an
iterable of ``(tokentype, value)`` pairs from `text`.
def _preprocess_lexer_input(self, text):
"""Apply preprocessing such as decoding the input, removing BOM and normalizing newlines."""

Normally, you don't need to override this method. The default
implementation processes the options recognized by all lexers
(`stripnl`, `stripall` and so on), and then yields all tokens
from `get_tokens_unprocessed()`, with the ``index`` dropped.
If `unfiltered` is set to `True`, the filtering mechanism is
bypassed even if filters are defined.
"""
if not isinstance(text, str):
if self.encoding == 'guess':
text, _ = guess_decode(text)
Expand Down Expand Up @@ -255,6 +244,24 @@ def get_tokens(self, text, unfiltered=False):
if self.ensurenl and not text.endswith('\n'):
text += '\n'

return text

def get_tokens(self, text, unfiltered=False):
"""
This method is the basic interface of a lexer. It is called by
the `highlight()` function. It must process the text and return an
iterable of ``(tokentype, value)`` pairs from `text`.
Normally, you don't need to override this method. The default
implementation processes the options recognized by all lexers
(`stripnl`, `stripall` and so on), and then yields all tokens
from `get_tokens_unprocessed()`, with the ``index`` dropped.
If `unfiltered` is set to `True`, the filtering mechanism is
bypassed even if filters are defined.
"""
text = self._preprocess_lexer_input(text)

def streamer():
for _, t, v in self.get_tokens_unprocessed(text):
yield t, v
Expand Down
28 changes: 20 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,15 @@ def _prettyprint_tokens(cls, tokens):

def runtest(self):
lexer = pygments.lexers.get_lexer_by_name(self.lexer)
tokens = lexer.get_tokens(self.input)
tokens = list(lexer.get_tokens(self.input))
self.actual = '\n'.join(self._prettyprint_tokens(tokens)).rstrip('\n') + '\n'
if not self.config.getoption('--update-goldens'):
if self.config.getoption('--update-goldens'):
# Make sure the new golden output corresponds to the input.
output = ''.join(val for (tok, val) in tokens)
preproc_input = lexer._preprocess_lexer_input(self.input) # remove BOMs etc.
assert output == preproc_input
else:
# Make sure the output is the expected golden output
assert self.actual == self.expected

def _test_file_rel_path(self):
Expand All @@ -59,12 +65,18 @@ def _prunetraceback(self, excinfo):

def repr_failure(self, excinfo):
if isinstance(excinfo.value, AssertionError):
rel_path = self._test_file_rel_path()
message = (
'The tokens produced by the "{}" lexer differ from the '
'expected ones in the file "{}".\n'
'Run `pytest {} --update-goldens` to update it.'
).format(self.lexer, rel_path, Path(*rel_path.parts[:2]))
if self.config.getoption('--update-goldens'):
message = (
f'The tokens produced by the "{self.lexer}" lexer '
'do not add up to the input.'
)
else:
rel_path = self._test_file_rel_path()
message = (
'The tokens produced by the "{}" lexer differ from the '
'expected ones in the file "{}".\n'
'Run `tox -- {} --update-goldens` to update it.'
).format(self.lexer, rel_path, Path(*rel_path.parts[:2]))
diff = str(excinfo.value).split('\n', 1)[-1]
return message + '\n\n' + diff
else:
Expand Down

0 comments on commit f57a2dd

Please sign in to comment.