Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Allow overriding of convertWebIntelligentPlainTextToHtml bits #1

Merged
merged 1 commit into from

2 participants

@dnouri

This allows an easy way to extend the converter through subclassing.
One might want to override the regexps, or modify the HTML that one of
the replace* methods produces.

Fully compatible with old API, all tests passing.

@dnouri dnouri Create a class consisting of the inner functions and variable defs of…
… convertWebIntelligentPlainTextToHtml.

This allows an easy way to extend the converter through subclassing.
One might want to override the regexps, or modify the HTML that one of
the replace* methods produces.
3b1a8e2
@garbas garbas merged commit acd58a8 into plone:master
@garbas
Collaborator

added description to changelog
01f7b8a

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Feb 10, 2012
  1. @dnouri

    Create a class consisting of the inner functions and variable defs of…

    dnouri authored
    … convertWebIntelligentPlainTextToHtml.
    
    This allows an easy way to extend the converter through subclassing.
    One might want to override the regexps, or modify the HTML that one of
    the replace* methods produces.
This page is out of date. Refresh to see the latest.
Showing with 53 additions and 41 deletions.
  1. +52 −41 plone/intelligenttext/transforms.py
  2. +1 −0  setup.py
View
93 plone/intelligenttext/transforms.py
@@ -1,16 +1,45 @@
from htmlentitydefs import entitydefs
import re
+class WebIntelligentToHtmlConverter(object):
+ urlRegexp = re.compile(r'((?:ftp|https?)://(localhost|([12]?[0-9]{1,2}.){3}([12]?[0-9]{1,2})|(?:[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\.)+(?:com|edu|biz|org|gov|int|info|mil|net|name|museum|coop|aero|[a-z][a-z]))\b(?::\d+)?(?:\/[^"\'<>()\[\]{}\s\x7f-\xff]*(?:[.,?]+[^"\'<>()\[\]{}\s\x7f-\xff]+)*)?)', re.I|re.S|re.U)
+ emailRegexp = re.compile(r'["=]?(\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)', re.I|re.S|re.U)
+ indentRegexp = re.compile(r'^(\s+)', re.M|re.U)
-def convertWebIntelligentPlainTextToHtml(orig, tab_width=4):
- """Converts text/x-web-intelligent to text/html
- """
- try:
- # tab_width could be a string like '4'
- tab_width = int(tab_width)
- except ValueError:
- tab_width=4
+ def __init__(self, orig, tab_width=4):
+ self.orig = orig
+ self.tab_width = tab_width
+
+ def __call__(self):
+ text = self.orig
+ if text is None:
+ text = ''
+ if not isinstance(text, unicode):
+ text = unicode(text, 'utf-8', 'replace')
+
+ # Do &amp; separately, else, it may replace an already-inserted & from
+ # an entity with &amp;, so < becomes &lt; becomes &amp;lt;
+ text = text.replace('&', '&amp;')
+ # Make funny characters into html entity defs
+ for entity, letter in entitydefs.items():
+ if entity != 'amp':
+ text = text.replace(
+ letter.decode('latin-1'), '&' + entity + ';')
+
+ text = self.urlRegexp.subn(self.replaceURL, text)[0]
+ text = self.emailRegexp.subn(self.replaceEmail, text)[0]
+ text = self.indentRegexp.subn(self.indentWhitespace, text)[0]
+ # convert windows line endings
+ text = text.replace('\r\n', '\n')
+ # Finally, make \n's into br's
+ text = text.replace('\n', '<br />')
+
+ text = text.encode('utf-8')
+
+ return text
+
+ @staticmethod
def abbreviateUrl(url, max = 60, ellipsis = "[&hellip;]"):
"""very long urls are abbreviated to allow nicer layout
"""
@@ -29,29 +58,12 @@ def abbreviateUrl(url, max = 60, ellipsis = "[&hellip;]"):
return protocol + list[0] +"/" +ellipsis + "/" + list[-1]
- urlRegexp = re.compile(r'((?:ftp|https?)://(localhost|([12]?[0-9]{1,2}.){3}([12]?[0-9]{1,2})|(?:[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\.)+(?:com|edu|biz|org|gov|int|info|mil|net|name|museum|coop|aero|[a-z][a-z]))\b(?::\d+)?(?:\/[^"\'<>()\[\]{}\s\x7f-\xff]*(?:[.,?]+[^"\'<>()\[\]{}\s\x7f-\xff]+)*)?)', re.I|re.S|re.U)
- emailRegexp = re.compile(r'["=]?(\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)', re.I|re.S|re.U)
- indentRegexp = re.compile(r'^(\s+)', re.M|re.U)
-
- text = orig
- if text is None:
- text = ''
- if not isinstance(text, unicode):
- text = unicode(text, 'utf-8', 'replace')
-
- # Do &amp; separately, else, it may replace an already-inserted & from
- # an entity with &amp;, so < becomes &lt; becomes &amp;lt;
- text = text.replace('&', '&amp;')
- # Make funny characters into html entity defs
- for entity, letter in entitydefs.items():
- if entity != 'amp':
- text = text.replace(letter.decode('latin-1'), '&' + entity + ';')
-
- def replaceURL(match):
+ @classmethod
+ def replaceURL(cls, match):
"""Replace hyperlinks with clickable <a> tags
"""
url = match.groups()[0]
- linktext = abbreviateUrl(url)
+ linktext = cls.abbreviateUrl(url)
# Also with <some link> we should only link to some link, not
# including the brackets.
end = ''
@@ -63,8 +75,8 @@ def replaceURL(match):
# rel="nofollow" shall avoid spamming
return '<a href="%s" rel="nofollow">%s</a>%s' % (url, linktext, end)
- text = urlRegexp.subn(replaceURL, text)[0]
+ @staticmethod
def replaceEmail(match):
"""Replace email strings with mailto: links
"""
@@ -73,25 +85,24 @@ def replaceEmail(match):
# crawlers to pickup email addresses
url = url.replace('@', '&#0064;')
return '<a href="&#0109;ailto&#0058;%s">%s</a>' % (url, url)
- text = emailRegexp.subn(replaceEmail, text)[0]
- def indentWhitespace(match):
+ def indentWhitespace(self, match):
"""Make leading whitespace on a line into &nbsp; to preserve indents
"""
indent = match.groups()[0]
indent = indent.replace(' ', '&nbsp;')
- return indent.replace('\t', '&nbsp;' * tab_width)
- text = indentRegexp.subn(indentWhitespace, text)[0]
-
- # convert windows line endings
- text = text.replace('\r\n', '\n')
- # Finally, make \n's into br's
- text = text.replace('\n', '<br />')
+ return indent.replace('\t', '&nbsp;' * self.tab_width)
- text = text.encode('utf-8')
-
- return text
+def convertWebIntelligentPlainTextToHtml(orig, tab_width=4):
+ """Converts text/x-web-intelligent to text/html
+ """
+ try:
+ # tab_width could be a string like '4'
+ tab_width = int(tab_width)
+ except ValueError:
+ tab_width=4
+ return WebIntelligentToHtmlConverter(orig, tab_width)()
def convertHtmlToWebIntelligentPlainText(orig):
"""Converts text/html to text/x-web-intelligent.
View
1  setup.py
@@ -25,6 +25,7 @@
namespace_packages=['plone'],
include_package_data=True,
zip_safe=False,
+ test_suite="plone.intelligenttext.tests.test_suite",
install_requires=[
'setuptools',
],
Something went wrong with that request. Please try again.