Skip to content
This repository has been archived by the owner on Aug 26, 2022. It is now read-only.

Commit

Permalink
Fix bug 1156936: No longer escape section ids
Browse files Browse the repository at this point in the history
  • Loading branch information
groovecoder committed Jun 10, 2015
1 parent d8d5e09 commit 3ea15af
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 31 deletions.
21 changes: 1 addition & 20 deletions kuma/wiki/content.py
Expand Up @@ -534,30 +534,11 @@ def slugify(self, text):
non_safe = [c for c in text if c in self.non_url_safe]
if non_safe:
for c in non_safe:
text = text.replace(c, hex(ord(c)).replace('0x', '.').upper())
text = text.replace(c, '')
# Strip leading, trailing and multiple whitespace, convert remaining whitespace to _
text = u'_'.join(text.split())
non_ascii = [c for c in text if ord(c) > 128]
if non_ascii:
for c in non_ascii:
text = text.replace(c, self.encode_non_ascii(c))
return text

def encode_non_ascii(self, c):
# This is slightly gnarly.
#
# What MindTouch does is basically turn any non-ASCII characters
# into UTF-8 codepoints, preceded by a dot.
#
# This is somewhat tricky in Python because Python's internals are
# UCS-2, meaning that Python will give us, essentially, UTF-16
# codepoints out of Unicode strings. So, an ugly but functional
# hack: encode the offending character UTF-8 and repr that, which
# gives us the codepoints preceded by '\x' escape sequences. Then
# we can just replace the escape sequence with the dot, uppercase
# it, and we have the thing MindTouch would generate.
return repr(c.encode('utf-8')).strip("'").replace(r'\x', '.').upper()

def process_header(self, token, buffer):
# If we get into this code, 'token' will be the start tag of a
# header element. We're going to grab its text contents to
Expand Down
20 changes: 9 additions & 11 deletions kuma/wiki/tests/test_content.py
Expand Up @@ -389,23 +389,21 @@ def test_code_syntax_conversion(self):
def test_non_ascii_section_headers(self):
headers = [
(u'Documentation à propos de HTML',
'Documentation_.C3.A0_propos_de_HTML'),
u'Documentation_à_propos_de_HTML'),
(u'Outils facilitant le développement HTML',
'Outils_facilitant_le_d.C3.A9veloppement_HTML'),
(u'例:\u00a0スキューと平行移動',
'.E4.BE.8B.3A_.E3.82.B9.E3.82.AD.E3.83.A5.E3.83.BC.E3.81.A8.E5.B9.B3.E8.A1.8C.E7.A7.BB.E5.8B.95'),
(u'例:\u00a0回転',
'.E4.BE.8B.3A_.E5.9B.9E.E8.BB.A2'),
u'Outils_facilitant_le_développement_HTML'),
(u'字面值(literals)',
u'字面值(literals)'),
(u'Documentação',
'Documenta.C3.A7.C3.A3o'),
u'Documentação'),
(u'Lektury uzupełniające',
'Lektury_uzupe.C5.82niaj.C4.85ce'),
u'Lektury_uzupełniające'),
(u'Атрибуты',
'.D0.90.D1.82.D1.80.D0.B8.D0.B1.D1.83.D1.82.D1.8B'),
u'Атрибуты'),
(u'HTML5 엘리먼트',
'HTML5_.EC.97.98.EB.A6.AC.EB.A8.BC.ED.8A.B8'),
u'HTML5_엘리먼트'),
(u'Non safe title "#$%&+,/:;=?@[\\]^`{|}~',
u'Non_safe_title_.22.23.24.25.26.2B.2C.2F.3A.3B.3D.3F.40.5B.5C.5D.5E.60.7B.7C.7D.7E'),
u'Non_safe_title'),
]

section_filter = SectionIDFilter('')
Expand Down

0 comments on commit 3ea15af

Please sign in to comment.