Skip to content

Commit

Permalink
Address Georg's review
Browse files Browse the repository at this point in the history
  • Loading branch information
jeanas committed Nov 14, 2023
1 parent 3b1878e commit 2403507
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 18 deletions.
33 changes: 19 additions & 14 deletions pygments/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,20 +199,9 @@ def analyse_text(text):
it's the same as if the return values was ``0.0``.
"""

def get_tokens(self, text, unfiltered=False):
"""
This method is the basic interface of a lexer. It is called by
the `highlight()` function. It must process the text and return an
iterable of ``(tokentype, value)`` pairs from `text`.
Normally, you don't need to override this method. The default
implementation processes the options recognized by all lexers
(`stripnl`, `stripall` and so on), and then yields all tokens
from `get_tokens_unprocessed()`, with the ``index`` dropped.
def _preprocess_lexer_input(self, text):
"""Apply preprocessing such as decoding the input, removing BOM and normalizing newlines."""

If `unfiltered` is set to `True`, the filtering mechanism is
bypassed even if filters are defined.
"""
if not isinstance(text, str):
if self.encoding == 'guess':
text, _ = guess_decode(text)
Expand Down Expand Up @@ -255,7 +244,23 @@ def get_tokens(self, text, unfiltered=False):
if self.ensurenl and not text.endswith('\n'):
text += '\n'

self._input_for_tests = text
return text

def get_tokens(self, text, unfiltered=False):
"""
This method is the basic interface of a lexer. It is called by
the `highlight()` function. It must process the text and return an
iterable of ``(tokentype, value)`` pairs from `text`.
Normally, you don't need to override this method. The default
implementation processes the options recognized by all lexers
(`stripnl`, `stripall` and so on), and then yields all tokens
from `get_tokens_unprocessed()`, with the ``index`` dropped.
If `unfiltered` is set to `True`, the filtering mechanism is
bypassed even if filters are defined.
"""
text = self._preprocess_lexer_input(text)

def streamer():
for _, t, v in self.get_tokens_unprocessed(text):
Expand Down
7 changes: 3 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,9 @@ def runtest(self):
self.actual = '\n'.join(self._prettyprint_tokens(tokens)).rstrip('\n') + '\n'
if self.config.getoption('--update-goldens'):
# Make sure the new golden output corresponds to the input.
# lexer._input_for_tests is self.input but with a newline possibly
# added due to the ensurenl option, BOM possibly removed, ...
output = ''.join(val for (tok, val) in tokens)
assert output == lexer._input_for_tests
preproc_input = lexer._preprocess_lexer_input(self.input) # remove BOMs etc.
assert output == preproc_input
else:
# Make sure the output is the expected golden output
assert self.actual == self.expected
Expand All @@ -76,7 +75,7 @@ def repr_failure(self, excinfo):
message = (
'The tokens produced by the "{}" lexer differ from the '
'expected ones in the file "{}".\n'
'Run `pytest {} --update-goldens` to update it.'
'Run `tox -- {} --update-goldens` to update it.'
).format(self.lexer, rel_path, Path(*rel_path.parts[:2]))
diff = str(excinfo.value).split('\n', 1)[-1]
return message + '\n\n' + diff
Expand Down

0 comments on commit 2403507

Please sign in to comment.