Skip to content
This repository
Browse code

added support for token stream filtering and preprocessing.

--HG--
branch : trunk
  • Loading branch information...
commit 9ad96e7af659846abcf45e0c64bf07dacf7881c4 1 parent e3290ea
Armin Ronacher authored
4 CHANGES
@@ -13,6 +13,10 @@ Version 2.0
13 13
14 14 For more information see :ref:`the implementation details <notes-on-subscribing>`.
15 15
  16 +- added support for preprocessing and token stream filtering for extensions.
  17 + This would allow extensions to allow simplified gettext calls in template
  18 + data and something similar.
  19 +
16 20 Version 2.0rc1
17 21 --------------
18 22 (no codename, released on July 9th 2008)
2  docs/api.rst
Source Rendered
@@ -590,6 +590,8 @@ don't recommend using any of those.
590 590
591 591 .. automethod:: Environment.parse
592 592
  593 +.. automethod:: Environment.preprocess
  594 +
593 595 .. automethod:: Template.new_context
594 596
595 597 .. method:: Template.root_render_func(context)
2  docs/extensions.rst
Source Rendered
@@ -168,7 +168,7 @@ Extension API
168 168 Extensions always have to extend the :class:`jinja2.ext.Extension` class:
169 169
170 170 .. autoclass:: Extension
171   - :members: parse, attr, call_method
  171 + :members: preprocess, filter_stream, parse, attr, call_method
172 172
173 173 .. attribute:: identifier
174 174
31 jinja2/environment.py
@@ -10,7 +10,7 @@
10 10 """
11 11 import sys
12 12 from jinja2.defaults import *
13   -from jinja2.lexer import Lexer
  13 +from jinja2.lexer import Lexer, TokenStream
14 14 from jinja2.parser import Parser
15 15 from jinja2.optimizer import optimize
16 16 from jinja2.compiler import generate
@@ -339,8 +339,35 @@ def lex(self, source, name=None, filename=None):
339 339 tokens as tuples in the form ``(lineno, token_type, value)``.
340 340 This can be useful for :ref:`extension development <writing-extensions>`
341 341 and debugging templates.
  342 +
  343 + This does not perform preprocessing. If you want the preprocessing
  344 + of the extensions to be applied you have to filter source through
  345 + the :meth:`preprocess` method.
  346 + """
  347 + return self.lexer.tokeniter(unicode(source), name, filename)
  348 +
  349 + def preprocess(self, source, name=None, filename=None):
  350 + """Preprocesses the source with all extensions. This is automatically
  351 + called for all parsing and compiling methods but *not* for :meth:`lex`
  352 + because there you usually only want the actual source tokenized.
  353 + """
  354 + return reduce(lambda s, e: e.preprocess(s, name, filename),
  355 + self.extensions.itervalues(), unicode(source))
  356 +
  357 + def _tokenize(self, source, name, filename=None):
  358 + """Called by the parser to do the preprocessing and filtering
  359 + for all the extensions. Returns a :class:`~jinja2.lexer.TokenStream`.
342 360 """
343   - return self.lexer.tokeniter(source, name, filename)
  361 + def _stream(iterable):
  362 + if not isinstance(iterable, TokenStream):
  363 + iterable = TokenStream(iterable, name, filename)
  364 + return iterable
  365 + source = self.preprocess(source, name, filename)
  366 + tokeniter = self.lexer.tokeniter(source, name, filename)
  367 + stream = _stream(self.lexer.wrap(tokeniter, name, filename))
  368 + for ext in self.extensions.itervalues():
  369 + stream = _stream(ext.filter_stream(stream))
  370 + return stream
344 371
345 372 def compile(self, source, name=None, filename=None, raw=False):
346 373 """Compile a node or template source code. The `name` parameter is
16 jinja2/ext.py
@@ -16,6 +16,7 @@
16 16 from jinja2.environment import get_spontaneous_environment
17 17 from jinja2.runtime import Undefined, concat
18 18 from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError
  19 +from jinja2.lexer import Token
19 20 from jinja2.utils import contextfunction, import_string, Markup
20 21
21 22
@@ -67,6 +68,21 @@ def bind(self, environment):
67 68 rv.environment = environment
68 69 return rv
69 70
  71 + def preprocess(self, source, name, filename=None):
  72 + """This method is called before the actual lexing and can be used to
  73 + preprocess the source. The `filename` is optional. The return value
  74 + must be the preprocessed source.
  75 + """
  76 + return source
  77 +
  78 + def filter_stream(self, stream):
  79 + """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
  80 + to filter tokens returned. This method has to return an iterable of
  81 + :class:`~jinja2.lexer.Token`\s, but it doesn't have to return a
  82 + :class:`~jinja2.lexer.TokenStream`.
  83 + """
  84 + return stream
  85 +
70 86 def parse(self, parser):
71 87 """If any of the :attr:`tags` matched this method is called with the
72 88 parser as first argument. The token the parser stream is pointing at
110 jinja2/lexer.py
@@ -133,17 +133,17 @@ class TokenStreamIterator(object):
133 133 """
134 134
135 135 def __init__(self, stream):
136   - self._stream = stream
  136 + self.stream = stream
137 137
138 138 def __iter__(self):
139 139 return self
140 140
141 141 def next(self):
142   - token = self._stream.current
  142 + token = self.stream.current
143 143 if token.type == 'eof':
144   - self._stream.close()
  144 + self.stream.close()
145 145 raise StopIteration()
146   - self._stream.next(False)
  146 + self.stream.next()
147 147 return token
148 148
149 149
@@ -154,11 +154,12 @@ class TokenStream(object):
154 154 """
155 155
156 156 def __init__(self, generator, name, filename):
157   - self._next = generator.next
  157 + self._next = iter(generator).next
158 158 self._pushed = deque()
159   - self.current = Token(1, 'initial', '')
160 159 self.name = name
161 160 self.filename = filename
  161 + self.closed = False
  162 + self.current = Token(1, 'initial', '')
162 163 self.next()
163 164
164 165 def __iter__(self):
@@ -214,6 +215,7 @@ def close(self):
214 215 """Close the stream."""
215 216 self.current = Token(self.current.lineno, 'eof', '')
216 217 self._next = None
  218 + self.closed = True
217 219
218 220 def expect(self, expr):
219 221 """Expect a given token type and return it. This accepts the same
@@ -374,60 +376,60 @@ def _normalize_newlines(self, value):
374 376 return newline_re.sub(self.newline_sequence, value)
375 377
376 378 def tokenize(self, source, name=None, filename=None):
377   - """Works like `tokeniter` but returns a tokenstream of tokens and not
378   - a generator or token tuples. Additionally all token values are already
379   - converted into types and postprocessed. For example comments are removed,
380   - integers and floats converted, strings unescaped etc.
  379 + """Calls tokeniter + tokenize and wraps it in a token stream.
  380 + This is currently only used for unittests.
381 381 """
382   - def generate():
383   - for lineno, token, value in self.tokeniter(source, name, filename):
384   - if token in ('comment_begin', 'comment', 'comment_end',
385   - 'whitespace'):
386   - continue
387   - elif token == 'linestatement_begin':
388   - token = 'block_begin'
389   - elif token == 'linestatement_end':
390   - token = 'block_end'
391   - # we are not interested in those tokens in the parser
392   - elif token in ('raw_begin', 'raw_end'):
393   - continue
394   - elif token == 'data':
395   - value = self._normalize_newlines(value)
396   - elif token == 'keyword':
397   - token = value
398   - elif token == 'name':
  382 + stream = self.tokeniter(source, name, filename)
  383 + return TokenStream(self.wrap(stream, name, filename), name, filename)
  384 +
  385 + def wrap(self, stream, name=None, filename=None):
  386 + """This is called with the stream as returned by `tokenize` and wraps
  387 + every token in a :class:`Token` and converts the value.
  388 + """
  389 + for lineno, token, value in stream:
  390 + if token in ('comment_begin', 'comment', 'comment_end',
  391 + 'whitespace'):
  392 + continue
  393 + elif token == 'linestatement_begin':
  394 + token = 'block_begin'
  395 + elif token == 'linestatement_end':
  396 + token = 'block_end'
  397 + # we are not interested in those tokens in the parser
  398 + elif token in ('raw_begin', 'raw_end'):
  399 + continue
  400 + elif token == 'data':
  401 + value = self._normalize_newlines(value)
  402 + elif token == 'keyword':
  403 + token = value
  404 + elif token == 'name':
  405 + value = str(value)
  406 + elif token == 'string':
  407 + # try to unescape string
  408 + try:
  409 + value = self._normalize_newlines(value[1:-1]) \
  410 + .encode('ascii', 'backslashreplace') \
  411 + .decode('unicode-escape')
  412 + except Exception, e:
  413 + msg = str(e).split(':')[-1].strip()
  414 + raise TemplateSyntaxError(msg, lineno, name, filename)
  415 + # if we can express it as bytestring (ascii only)
  416 + # we do that for support of semi broken APIs
  417 + # as datetime.datetime.strftime
  418 + try:
399 419 value = str(value)
400   - elif token == 'string':
401   - # try to unescape string
402   - try:
403   - value = self._normalize_newlines(value[1:-1]) \
404   - .encode('ascii', 'backslashreplace') \
405   - .decode('unicode-escape')
406   - except Exception, e:
407   - msg = str(e).split(':')[-1].strip()
408   - raise TemplateSyntaxError(msg, lineno, name, filename)
409   - # if we can express it as bytestring (ascii only)
410   - # we do that for support of semi broken APIs
411   - # as datetime.datetime.strftime
412   - try:
413   - value = str(value)
414   - except UnicodeError:
415   - pass
416   - elif token == 'integer':
417   - value = int(value)
418   - elif token == 'float':
419   - value = float(value)
420   - elif token == 'operator':
421   - token = operators[value]
422   - yield Token(lineno, token, value)
423   - return TokenStream(generate(), name, filename)
  420 + except UnicodeError:
  421 + pass
  422 + elif token == 'integer':
  423 + value = int(value)
  424 + elif token == 'float':
  425 + value = float(value)
  426 + elif token == 'operator':
  427 + token = operators[value]
  428 + yield Token(lineno, token, value)
424 429
425 430 def tokeniter(self, source, name, filename=None):
426 431 """This method tokenizes the text and returns the tokens in a
427 432 generator. Use this method if you just want to tokenize a template.
428   - The output you get is not compatible with the input the jinja parser
429   - wants. The parser uses the `tokenize` function with returns a
430   - `TokenStream` and postprocessed tokens.
431 433 """
432 434 source = '\n'.join(unicode(source).splitlines())
433 435 pos = 0
3  jinja2/parser.py
@@ -25,11 +25,10 @@ class Parser(object):
25 25
26 26 def __init__(self, environment, source, name=None, filename=None):
27 27 self.environment = environment
28   - self.source = unicode(source)
  28 + self.stream = environment._tokenize(source, name, filename)
29 29 self.name = name
30 30 self.filename = filename
31 31 self.closed = False
32   - self.stream = environment.lexer.tokenize(self.source, name, filename)
33 32 self.extensions = {}
34 33 for extension in environment.extensions.itervalues():
35 34 for tag in extension.tags:

0 comments on commit 9ad96e7

Please sign in to comment.
Something went wrong with that request. Please try again.