Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| # -*- coding: utf-8 -*- | |
| """ | |
| pygments.filters | |
| ~~~~~~~~~~~~~~~~ | |
| Module containing filter lookup functions and default | |
| filters. | |
| :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. | |
| :license: BSD, see LICENSE for details. | |
| """ | |
| import re | |
| from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \ | |
| string_to_tokentype | |
| from pygments.filter import Filter | |
| from pygments.util import get_list_opt, get_int_opt, get_bool_opt, \ | |
| get_choice_opt, ClassNotFound, OptionError | |
| from pygments.plugin import find_plugin_filters | |
| def find_filter_class(filtername): | |
| """ | |
| Lookup a filter by name. Return None if not found. | |
| """ | |
| if filtername in FILTERS: | |
| return FILTERS[filtername] | |
| for name, cls in find_plugin_filters(): | |
| if name == filtername: | |
| return cls | |
| return None | |
| def get_filter_by_name(filtername, **options): | |
| """ | |
| Return an instantiated filter. Options are passed to the filter | |
| initializer if wanted. Raise a ClassNotFound if not found. | |
| """ | |
| cls = find_filter_class(filtername) | |
| if cls: | |
| return cls(**options) | |
| else: | |
| raise ClassNotFound('filter %r not found' % filtername) | |
| def get_all_filters(): | |
| """ | |
| Return a generator of all filter names. | |
| """ | |
| for name in FILTERS: | |
| yield name | |
| for name, _ in find_plugin_filters(): | |
| yield name | |
| def _replace_special(ttype, value, regex, specialttype, | |
| replacefunc=lambda x: x): | |
| last = 0 | |
| for match in regex.finditer(value): | |
| start, end = match.start(), match.end() | |
| if start != last: | |
| yield ttype, value[last:start] | |
| yield specialttype, replacefunc(value[start:end]) | |
| last = end | |
| if last != len(value): | |
| yield ttype, value[last:] | |
| class CodeTagFilter(Filter): | |
| """ | |
| Highlight special code tags in comments and docstrings. | |
| Options accepted: | |
| `codetags` : list of strings | |
| A list of strings that are flagged as code tags. The default is to | |
| highlight ``XXX``, ``TODO``, ``BUG`` and ``NOTE``. | |
| """ | |
| def __init__(self, **options): | |
| Filter.__init__(self, **options) | |
| tags = get_list_opt(options, 'codetags', | |
| ['XXX', 'TODO', 'BUG', 'NOTE']) | |
| self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([ | |
| re.escape(tag) for tag in tags if tag | |
| ])) | |
| def filter(self, lexer, stream): | |
| regex = self.tag_re | |
| for ttype, value in stream: | |
| if ttype in String.Doc or \ | |
| ttype in Comment and \ | |
| ttype not in Comment.Preproc: | |
| for sttype, svalue in _replace_special(ttype, value, regex, | |
| Comment.Special): | |
| yield sttype, svalue | |
| else: | |
| yield ttype, value | |
| class KeywordCaseFilter(Filter): | |
| """ | |
| Convert keywords to lowercase or uppercase or capitalize them, which | |
| means first letter uppercase, rest lowercase. | |
| This can be useful e.g. if you highlight Pascal code and want to adapt the | |
| code to your styleguide. | |
| Options accepted: | |
| `case` : string | |
| The casing to convert keywords to. Must be one of ``'lower'``, | |
| ``'upper'`` or ``'capitalize'``. The default is ``'lower'``. | |
| """ | |
| def __init__(self, **options): | |
| Filter.__init__(self, **options) | |
| case = get_choice_opt(options, 'case', ['lower', 'upper', 'capitalize'], 'lower') | |
| self.convert = getattr(unicode, case) | |
| def filter(self, lexer, stream): | |
| for ttype, value in stream: | |
| if ttype in Keyword: | |
| yield ttype, self.convert(value) | |
| else: | |
| yield ttype, value | |
| class NameHighlightFilter(Filter): | |
| """ | |
| Highlight a normal Name token with a different token type. | |
| Example:: | |
| filter = NameHighlightFilter( | |
| names=['foo', 'bar', 'baz'], | |
| tokentype=Name.Function, | |
| ) | |
| This would highlight the names "foo", "bar" and "baz" | |
| as functions. `Name.Function` is the default token type. | |
| Options accepted: | |
| `names` : list of strings | |
| A list of names that should be given the different token type. | |
| There is no default. | |
| `tokentype` : TokenType or string | |
| A token type or a string containing a token type name that is | |
| used for highlighting the strings in `names`. The default is | |
| `Name.Function`. | |
| """ | |
| def __init__(self, **options): | |
| Filter.__init__(self, **options) | |
| self.names = set(get_list_opt(options, 'names', [])) | |
| tokentype = options.get('tokentype') | |
| if tokentype: | |
| self.tokentype = string_to_tokentype(tokentype) | |
| else: | |
| self.tokentype = Name.Function | |
| def filter(self, lexer, stream): | |
| for ttype, value in stream: | |
| if ttype is Name and value in self.names: | |
| yield self.tokentype, value | |
| else: | |
| yield ttype, value | |
| class ErrorToken(Exception): | |
| pass | |
| class RaiseOnErrorTokenFilter(Filter): | |
| """ | |
| Raise an exception when the lexer generates an error token. | |
| Options accepted: | |
| `excclass` : Exception class | |
| The exception class to raise. | |
| The default is `pygments.filters.ErrorToken`. | |
| *New in Pygments 0.8.* | |
| """ | |
| def __init__(self, **options): | |
| Filter.__init__(self, **options) | |
| self.exception = options.get('excclass', ErrorToken) | |
| try: | |
| # issubclass() will raise TypeError if first argument is not a class | |
| if not issubclass(self.exception, Exception): | |
| raise TypeError | |
| except TypeError: | |
| raise OptionError('excclass option is not an exception class') | |
| def filter(self, lexer, stream): | |
| for ttype, value in stream: | |
| if ttype is Error: | |
| raise self.exception(value) | |
| yield ttype, value | |
| class VisibleWhitespaceFilter(Filter): | |
| """ | |
| Convert tabs, newlines and/or spaces to visible characters. | |
| Options accepted: | |
| `spaces` : string or bool | |
| If this is a one-character string, spaces will be replaces by this string. | |
| If it is another true value, spaces will be replaced by ``·`` (unicode | |
| MIDDLE DOT). If it is a false value, spaces will not be replaced. The | |
| default is ``False``. | |
| `tabs` : string or bool | |
| The same as for `spaces`, but the default replacement character is ``»`` | |
| (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value | |
| is ``False``. Note: this will not work if the `tabsize` option for the | |
| lexer is nonzero, as tabs will already have been expanded then. | |
| `tabsize` : int | |
| If tabs are to be replaced by this filter (see the `tabs` option), this | |
| is the total number of characters that a tab should be expanded to. | |
| The default is ``8``. | |
| `newlines` : string or bool | |
| The same as for `spaces`, but the default replacement character is ``¶`` | |
| (unicode PILCROW SIGN). The default value is ``False``. | |
| `wstokentype` : bool | |
| If true, give whitespace the special `Whitespace` token type. This allows | |
| styling the visible whitespace differently (e.g. greyed out), but it can | |
| disrupt background colors. The default is ``True``. | |
| *New in Pygments 0.8.* | |
| """ | |
| def __init__(self, **options): | |
| Filter.__init__(self, **options) | |
| for name, default in {'spaces': u'·', 'tabs': u'»', 'newlines': u'¶'}.items(): | |
| opt = options.get(name, False) | |
| if isinstance(opt, basestring) and len(opt) == 1: | |
| setattr(self, name, opt) | |
| else: | |
| setattr(self, name, (opt and default or '')) | |
| tabsize = get_int_opt(options, 'tabsize', 8) | |
| if self.tabs: | |
| self.tabs += ' '*(tabsize-1) | |
| if self.newlines: | |
| self.newlines += '\n' | |
| self.wstt = get_bool_opt(options, 'wstokentype', True) | |
| def filter(self, lexer, stream): | |
| if self.wstt: | |
| spaces = self.spaces or ' ' | |
| tabs = self.tabs or '\t' | |
| newlines = self.newlines or '\n' | |
| regex = re.compile(r'\s') | |
| def replacefunc(wschar): | |
| if wschar == ' ': | |
| return spaces | |
| elif wschar == '\t': | |
| return tabs | |
| elif wschar == '\n': | |
| return newlines | |
| return wschar | |
| for ttype, value in stream: | |
| for sttype, svalue in _replace_special(ttype, value, regex, | |
| Whitespace, replacefunc): | |
| yield sttype, svalue | |
| else: | |
| spaces, tabs, newlines = self.spaces, self.tabs, self.newlines | |
| # simpler processing | |
| for ttype, value in stream: | |
| if spaces: | |
| value = value.replace(' ', spaces) | |
| if tabs: | |
| value = value.replace('\t', tabs) | |
| if newlines: | |
| value = value.replace('\n', newlines) | |
| yield ttype, value | |
| class GobbleFilter(Filter): | |
| """ | |
| Gobbles source code lines (eats initial characters). | |
| This filter drops the first ``n`` characters off every line of code. This | |
| may be useful when the source code fed to the lexer is indented by a fixed | |
| amount of space that isn't desired in the output. | |
| Options accepted: | |
| `n` : int | |
| The number of characters to gobble. | |
| *New in Pygments 1.2.* | |
| """ | |
| def __init__(self, **options): | |
| Filter.__init__(self, **options) | |
| self.n = get_int_opt(options, 'n', 0) | |
| def gobble(self, value, left): | |
| if left < len(value): | |
| return value[left:], 0 | |
| else: | |
| return '', left - len(value) | |
| def filter(self, lexer, stream): | |
| n = self.n | |
| left = n # How many characters left to gobble. | |
| for ttype, value in stream: | |
| # Remove ``left`` tokens from first line, ``n`` from all others. | |
| parts = value.split('\n') | |
| (parts[0], left) = self.gobble(parts[0], left) | |
| for i in range(1, len(parts)): | |
| (parts[i], left) = self.gobble(parts[i], n) | |
| value = '\n'.join(parts) | |
| if value != '': | |
| yield ttype, value | |
| class TokenMergeFilter(Filter): | |
| """ | |
| Merges consecutive tokens with the same token type in the output stream of a | |
| lexer. | |
| *New in Pygments 1.2.* | |
| """ | |
| def __init__(self, **options): | |
| Filter.__init__(self, **options) | |
| def filter(self, lexer, stream): | |
| current_type = None | |
| current_value = None | |
| for ttype, value in stream: | |
| if ttype is current_type: | |
| current_value += value | |
| else: | |
| if current_type is not None: | |
| yield current_type, current_value | |
| current_type = ttype | |
| current_value = value | |
| if current_type is not None: | |
| yield current_type, current_value | |
| FILTERS = { | |
| 'codetagify': CodeTagFilter, | |
| 'keywordcase': KeywordCaseFilter, | |
| 'highlight': NameHighlightFilter, | |
| 'raiseonerror': RaiseOnErrorTokenFilter, | |
| 'whitespace': VisibleWhitespaceFilter, | |
| 'gobble': GobbleFilter, | |
| 'tokenmerge': TokenMergeFilter, | |
| } |