Permalink
Browse files

[issue #64] Python 3 support!

  • Loading branch information...
trentm committed Apr 20, 2012
1 parent a827c30 commit 08522fd44bd4adc24771610fe3379391c6b59b0a
Showing with 164 additions and 98 deletions.
  1. +2 −0 .gitignore
  2. +3 −2 CHANGES.md
  3. +5 −0 Makefile
  4. +3 −2 README.md
  5. +1 −0 TODO.txt
  6. +61 −48 lib/markdown2.py
  7. +11 −2 setup.py
  8. +6 −7 test/api.doctests
  9. +10 −3 test/test.py
  10. +60 −30 test/test_markdown2.py
  11. +2 −4 test/testall.py
View
@@ -1,10 +1,12 @@
*.pyc
tmp
deps/pygments
+deps/pygments3
dist
MANIFEST
build
googlecode_upload.py
perf/*.prof
perf/tmp-*-cases
sandbox/*.html
+__pycache__
View
@@ -1,8 +1,9 @@
# python-markdown2 Changelog
-## python-markdown2 1.3.2 (not yet released)
+## python-markdown2 1.4.0 (not yet released)
-(nothing yet)
+- [issue #64] Python 3 support! markdown2.py supports Python 2 and 3 in the
+ same file without requiring install-time 2to3 transformation.
## python-markdown2 1.3.1
View
@@ -13,6 +13,11 @@ pygments:
mkdir -p deps && \
hg clone https://bitbucket.org/birkenfeld/pygments-main deps/pygments)
(cd deps/pygments && hg pull && hg update)
+ # And for Python 3 usage:
+ rm -rf deps/pygments3
+ mkdir -p deps/pygments3
+ cp -PR deps/pygments/pygments deps/pygments3/pygments
+ 2to3 -w --no-diffs deps/pygments3/pygments
clean:
rm -rf build dist MANIFEST
View
@@ -12,7 +12,8 @@ This (markdown2) is a fast and complete Python implementation of Markdown. It
was written to closely match the behaviour of the original Perl-implemented
Markdown.pl. Markdown2 also comes with a number of extensions (called
"extras") for things like syntax coloring, tables, header-ids. See the
-"Extra Syntax" section below.
+"Extra Syntax" section below. "markdown2" supports all Python versions
+from 2.4 to 3.3.
There is another [Python
markdown.py](http://www.freewisdom.org/projects/python-markdown/). However, at
@@ -32,8 +33,8 @@ for updates to python-markdown2.
To install it in your Python installation run *one* of the following:
- pypm install markdown2 # if you use ActivePython (activestate.com/activepython)
pip install markdown2
+ pypm install markdown2 # if you use ActivePython (activestate.com/activepython)
python setup.py install
However, everything you need to run this is in "lib/markdown2.py". If it is
View
@@ -1,3 +1,4 @@
+- py3: commit, travis-ci, py2.4 test (broken?)
- add "markdown-in-html" extra to wiki
- add "smarty-pants" extra to wiki
- add "html-classes" extra to wiki
View
@@ -1,6 +1,8 @@
#!/usr/bin/env python
+# Copyright (c) 2012 Trent Mick.
# Copyright (c) 2007-2008 ActiveState Corp.
# License: MIT (http://www.opensource.org/licenses/mit-license.php)
+
from __future__ import generators
r"""A fast and complete Python implementation of Markdown.
@@ -78,7 +80,7 @@
# not yet sure if there implications with this. Compare 'pydoc sre'
# and 'perldoc perlre'.
-__version_info__ = (1, 3, 2)
+__version_info__ = (1, 4, 0)
__version__ = '.'.join(map(str, __version_info__))
__author__ = "Trent Mick"
@@ -94,22 +96,34 @@
import optparse
from random import random, randint
import codecs
-from urllib import quote
-
#---- Python version compat
+try:
+ from urllib.parse import quote # python3
+except ImportError:
+ from urllib import quote # python2
+
if sys.version_info[:2] < (2,4):
from sets import Set as set
def reversed(sequence):
for i in sequence[::-1]:
yield i
- def _unicode_decode(s, encoding, errors='xmlcharrefreplace'):
- return unicode(s, encoding, errors)
-else:
- def _unicode_decode(s, encoding, errors='strict'):
- return s.decode(encoding, errors)
+
+# Use `bytes` for byte strings and `unicode` for unicode strings (str in Py3).
+if sys.version_info[0] <= 2:
+ py3 = False
+ try:
+ bytes
+ except NameError:
+ bytes = str
+ base_string_type = basestring
+elif sys.version_info[0] >= 3:
+ py3 = True
+ unicode = str
+ base_string_type = str
+
#---- globals
@@ -120,21 +134,13 @@ def _unicode_decode(s, encoding, errors='strict'):
DEFAULT_TAB_WIDTH = 4
-try:
- import uuid
-except ImportError:
- SECRET_SALT = str(randint(0, 1000000))
-else:
- SECRET_SALT = str(uuid.uuid4())
-def _hash_ascii(s):
- #return md5(s).hexdigest() # Markdown.pl effectively does this.
- return 'md5-' + md5(SECRET_SALT + s).hexdigest()
+SECRET_SALT = bytes(randint(0, 1000000))
def _hash_text(s):
return 'md5-' + md5(SECRET_SALT + s.encode("utf-8")).hexdigest()
# Table of hash values for escaped characters:
-g_escape_table = dict([(ch, _hash_ascii(ch))
- for ch in '\\`*_{}[]()>#+-.!'])
+g_escape_table = dict([(ch, _hash_text(ch))
+ for ch in '\\`*_{}[]()>#+-.!'])
@@ -224,8 +230,8 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
self._escape_table = g_escape_table.copy()
if "smarty-pants" in self.extras:
- self._escape_table['"'] = _hash_ascii('"')
- self._escape_table["'"] = _hash_ascii("'")
+ self._escape_table['"'] = _hash_text('"')
+ self._escape_table["'"] = _hash_text("'")
def reset(self):
self.urls = {}
@@ -471,7 +477,7 @@ def _get_emacs_vars(self, text):
emacs_vars[variable] = value
# Unquote values.
- for var, val in emacs_vars.items():
+ for var, val in list(emacs_vars.items()):
if len(val) > 1 and (val.startswith('"') and val.endswith('"')
or val.startswith('"') and val.endswith('"')):
emacs_vars[var] = val[1:-1]
@@ -611,11 +617,11 @@ def _hash_html_blocks(self, text, raw=False):
# Delimiters for next comment block.
try:
start_idx = text.index("<!--", start)
- except ValueError, ex:
+ except ValueError:
break
try:
end_idx = text.index("-->", start_idx) + 3
- except ValueError, ex:
+ except ValueError:
break
# Start position for next comment block search.
@@ -955,7 +961,7 @@ def _is_auto_link(s):
return ''.join(tokens)
def _unhash_html_spans(self, text):
- for key, sanitized in self.html_spans.items():
+ for key, sanitized in list(self.html_spans.items()):
text = text.replace(key, sanitized)
return text
@@ -1204,7 +1210,7 @@ def header_id_from_text(self, text, prefix, n):
the TOC (if the "toc" extra is specified).
"""
header_id = _slugify(text)
- if prefix and isinstance(prefix, basestring):
+ if prefix and isinstance(prefix, base_string_type):
header_id = prefix + '-' + header_id
if header_id in self._count_from_header_id:
self._count_from_header_id[header_id] += 1
@@ -1772,7 +1778,7 @@ def _encode_amps_and_angles(self, text):
return text
def _encode_backslash_escapes(self, text):
- for ch, escape in self._escape_table.items():
+ for ch, escape in list(self._escape_table.items()):
text = text.replace("\\"+ch, escape)
return text
@@ -1847,13 +1853,13 @@ def _do_link_patterns(self, text):
hash = _hash_text(link)
link_from_hash[hash] = link
text = text[:start] + hash + text[end:]
- for hash, link in link_from_hash.items():
+ for hash, link in list(link_from_hash.items()):
text = text.replace(hash, link)
return text
def _unescape_special_chars(self, text):
# Swap back in all the special characters we've hidden.
- for ch, hash in self._escape_table.items():
+ for ch, hash in list(self._escape_table.items()):
text = text.replace(hash, ch)
return text
@@ -1910,7 +1916,7 @@ def indent():
if not lines[-1].endswith("</li>"):
lines[-1] += "</li>"
lines.append("%s</ul></li>" % indent())
- lines.append(u'%s<li><a href="#%s">%s</a>' % (
+ lines.append('%s<li><a href="#%s">%s</a>' % (
indent(), id, name))
while len(h_stack) > 1:
h_stack.pop()
@@ -1931,8 +1937,8 @@ def _slugify(value):
From Django's "django/template/defaultfilters.py".
"""
import unicodedata
- value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
- value = unicode(_slugify_strip_re.sub('', value).strip().lower())
+ value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode()
+ value = _slugify_strip_re.sub('', value).strip().lower()
return _slugify_hyphenate_re.sub('-', value)
## end of http://code.activestate.com/recipes/577257/ }}}
@@ -1970,7 +1976,7 @@ def _regex_from_encoded_pattern(s):
except KeyError:
raise ValueError("unsupported regex flag: '%s' in '%s' "
"(must be one of '%s')"
- % (char, s, ''.join(flag_from_char.keys())))
+ % (char, s, ''.join(list(flag_from_char.keys()))))
return re.compile(s[1:idx], flags)
else: # not an encoded regex
return re.compile(re.escape(s))
@@ -1990,8 +1996,8 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
"""
DEBUG = False
if DEBUG:
- print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
- % (tabsize, skip_first_line)
+ print("dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
+ % (tabsize, skip_first_line))
indents = []
margin = None
for i, line in enumerate(lines):
@@ -2008,12 +2014,12 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
break
else:
continue # skip all-whitespace lines
- if DEBUG: print "dedent: indent=%d: %r" % (indent, line)
+ if DEBUG: print("dedent: indent=%d: %r" % (indent, line))
if margin is None:
margin = indent
else:
margin = min(margin, indent)
- if DEBUG: print "dedent: margin=%r" % margin
+ if DEBUG: print("dedent: margin=%r" % margin)
if margin is not None and margin > 0:
for i, line in enumerate(lines):
@@ -2025,16 +2031,16 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
elif ch == '\t':
removed += tabsize - (removed % tabsize)
elif ch in '\r\n':
- if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line
+ if DEBUG: print("dedent: %r: EOL -> strip up to EOL" % line)
lines[i] = lines[i][j:]
break
else:
raise ValueError("unexpected non-whitespace char %r in "
"line %r while removing %d-space margin"
% (ch, line, margin))
if DEBUG:
- print "dedent: %r: %r -> removed %d/%d"\
- % (line, ch, removed, margin)
+ print("dedent: %r: %r -> removed %d/%d"\
+ % (line, ch, removed, margin))
if removed == margin:
lines[i] = lines[i][j+1:]
break
@@ -2263,20 +2269,27 @@ def main(argv=None):
fp.close()
if opts.compare:
from subprocess import Popen, PIPE
- print "==== Markdown.pl ===="
+ print("==== Markdown.pl ====")
p = Popen('perl %s' % markdown_pl, shell=True, stdin=PIPE, stdout=PIPE, close_fds=True)
- p.stdin.write(text)
+ p.stdin.write(text.encode('utf-8'))
p.stdin.close()
- perl_html = p.stdout.read()
- sys.stdout.write(perl_html)
- print "==== markdown2.py ===="
+ perl_html = p.stdout.read().decode('utf-8')
+ if py3:
+ sys.stdout.write(perl_html)
+ else:
+ sys.stdout.write(perl_html.encode(
+ sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
+ print("==== markdown2.py ====")
html = markdown(text,
html4tags=opts.html4tags,
safe_mode=opts.safe_mode,
extras=extras, link_patterns=link_patterns,
use_file_vars=opts.use_file_vars)
- sys.stdout.write(
- html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
+ if py3:
+ sys.stdout.write(html)
+ else:
+ sys.stdout.write(html.encode(
+ sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
if extras and "toc" in extras:
log.debug("toc_html: " +
html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
@@ -2290,7 +2303,7 @@ def main(argv=None):
else:
norm_html = html
norm_perl_html = perl_html
- print "==== match? %r ====" % (norm_perl_html == norm_html)
+ print("==== match? %r ====" % (norm_perl_html == norm_html))
if __name__ == "__main__":
View
@@ -17,11 +17,21 @@
Intended Audience :: Developers
License :: OSI Approved :: MIT License
Programming Language :: Python
+Programming Language :: Python :: 2
+Programming Language :: Python :: 2.4
+Programming Language :: Python :: 2.5
+Programming Language :: Python :: 2.6
+Programming Language :: Python :: 2.7
+Programming Language :: Python :: 3
+Programming Language :: Python :: 3.0
+Programming Language :: Python :: 3.1
+Programming Language :: Python :: 3.2
+Programming Language :: Python :: 3.3
Operating System :: OS Independent
Topic :: Software Development :: Libraries :: Python Modules
Topic :: Software Development :: Documentation
Topic :: Text Processing :: Filters
-Topic :: Text Processing :: Markup :: HTML
+Topic :: Text Processing :: Markup :: HTML
"""
if sys.version_info < (2, 3):
@@ -61,4 +71,3 @@ def setup(**kwargs):
spec. See http://github.com/trentm/python-markdown2 for more info.
""",
)
-
View
@@ -5,14 +5,13 @@ True
>>> hasattr(markdown2, "__version_info__")
True
->>> markdown2.markdown(u"*boo*")
-u'<p><em>boo</em></p>\n'
+>>> str( markdown2.markdown("*boo*") )
+'<p><em>boo</em></p>\n'
>>> m = markdown2.Markdown()
->>> m.convert(u"*boo*")
-u'<p><em>boo</em></p>\n'
+>>> str( m.convert("*boo*") )
+'<p><em>boo</em></p>\n'
>>> m = markdown2.MarkdownWithExtras()
->>> m.convert(u"*boo*")
-u'<p><em>boo</em></p>\n'
-
+>>> str( m.convert("*boo*") )
+'<p><em>boo</em></p>\n'
Oops, something went wrong.

0 comments on commit 08522fd

Please sign in to comment.