Permalink
Browse files

unicode + html entities support, and replacing lxml with regex

  • Loading branch information...
1 parent 0212e25 commit dbe69c9a130966d0f747676d83734d12bb465655 @stefantalpalaru stefantalpalaru committed Jan 16, 2011
Showing with 36 additions and 28 deletions.
  1. +0 −1 README
  2. +7 −8 django_pygments/templatetags/pygmentify.py
  3. +24 −17 django_pygments/utils.py
  4. +4 −1 django_pygments/views.py
  5. +1 −1 setup.py
View
@@ -2,7 +2,6 @@ django-pygments is a Django app that provides a template tag and 2 filters for d
Dependencies:
- pygments
-- lxml
Installation:
- add django_pygments to your project directory and to INSTALLED_APPS in your settings.py
@@ -1,30 +1,29 @@
from django import template
from django.template.defaultfilters import stringfilter
-from django_pygments.utils import pygmentify_text
+from django_pygments.utils import pygmentify_html
from django.utils.safestring import mark_safe
register = template.Library()
-# truncate after a certain number of characters
@register.filter
@stringfilter
def pygmentify(value):
try:
- res = pygmentify_text(value)
+ res = pygmentify_html(value)
except Exception, e:
print e
- print 'value="%s"' % value
+ print u'value="%s"' % value
res = value
return mark_safe(res)
@register.filter
@stringfilter
def pygmentify_inline(value):
try:
- res = pygmentify_text(value, noclasses=True)
+ res = pygmentify_html(value, noclasses=True)
except Exception, e:
print e
- print 'value="%s"' % value
+ print u'value="%s"' % value
res = value
return mark_safe(res)
@@ -34,10 +33,10 @@ def __init__(self, nodelist):
def render(self, context):
output = self.nodelist.render(context)
try:
- res = pygmentify_text(output)
+ res = pygmentify_html(output)
except Exception, e:
print e
- print 'value="%s"' % value
+ print u'value="%s"' % value
res = output
return mark_safe(res)
@@ -4,7 +4,9 @@
warnings.resetwarnings()
from pygments import highlight
from pygments.formatters import HtmlFormatter
-from lxml import html
+from pprint import pprint
+import re
+from django.utils.encoding import smart_unicode
class ListHtmlFormatter(HtmlFormatter):
def wrap(self, source, outfile):
@@ -19,26 +21,31 @@ def _wrap_list(self, source):
yield i, t
yield 0, '</ol>'
-def pygmentify_text(text, **kwargs):
- default_lang = 'text'
-
+def pygmentify_html(text, **kwargs):
+ text = smart_unicode(text)
+ lang = default_lang = 'text'
# a tuple of known lexer names
lexer_names = reduce(lambda a,b: a + b[2], LEXERS.itervalues(), ())
-
# custom formatter
formatter = ListHtmlFormatter(encoding='utf-8', **kwargs)
-
- html_node = html.fragment_fromstring(text, create_parent='div')
- new_html_node = html_node
- for code_node in html_node.findall('pre'):
- if not code_node.text:
- continue
- lang = code_node.attrib.get('lang', default_lang)
- if lang not in lexer_names:
- lang = default_lang
+ subs = []
+ pre_re = re.compile(r'(<pre[^>]*>)(.*?)(</pre>)', re.DOTALL | re.UNICODE)
+ br_re = re.compile(r'<br[^>]*?>', re.UNICODE)
+ lang_re = re.compile(r'lang="(.+?)"', re.DOTALL | re.UNICODE)
+ for pre_match in pre_re.findall(text):
+ work_area = pre_match[1]
+ work_area = br_re.sub('\n', work_area)
+ match = lang_re.search (pre_match[0])
+ if match:
+ lang = match.group(1).strip()
+ if lang not in lexer_names:
+ lang = default_lang
lexer = get_lexer_by_name(lang, stripall=True)
- new_code_node = html.fragment_fromstring(highlight(code_node.text, lexer, formatter))
- new_html_node.replace(code_node, new_code_node)
+ work_area = work_area.replace(u'&nbsp;', u' ').replace(u'&amp;', u'&').replace(u'&lt;', u'<').replace(u'&gt;', u'>').replace(u'&quot;', u'"').replace(u'&#39;', u"'")
+ work_area = highlight(work_area, lexer, formatter)
+ subs.append([u''.join(pre_match), smart_unicode(work_area)])
+ for sub in subs:
+ text = text.replace(sub[0], sub[1], 1)
+ return text
- return html.tostring(new_html_node, encoding=unicode, method='xml')[5:-6] # need to strip the enclosing div
@@ -1,6 +1,9 @@
+# vim: set fileencoding=utf-8 :
from django.shortcuts import render_to_response, get_object_or_404
+from django.http import HttpResponseRedirect, HttpResponse, Http404
from django.template import RequestContext
from django.utils.html import escape
+from pygments.lexers import LEXERS
def demo(request):
@@ -17,7 +20,7 @@ def _wrap_list(self, source):
t = '<li><div class="line">%s</div></li>' % t
yield i, t
yield 0, '</ol>'
- # a very long comment that keeps on going and going and going and going and going and going and going and going and going and going and going and going
+ # a unicode comment: âăşţîÂĂŞŢÎ èéòçàù
"""
snippet = '<pre lang="python">' + escape(raw_snippet) + '</pre>'
return render_to_response('django_pygments/demo.html', locals(), context_instance = RequestContext(request))
View
@@ -16,7 +16,7 @@
url='http://github.com/odeoncg/django-pygments/tree/master',
packages=find_packages(),
include_package_data=True,
- install_requires=['pygments', 'lxml'],
+ install_requires=['pygments'],
classifiers=[
'Environment :: Web Environment',
'Intended Audience :: Developers',

0 comments on commit dbe69c9

Please sign in to comment.