Skip to content
This repository has been archived by the owner on Aug 26, 2022. It is now read-only.

Commit

Permalink
Merge pull request #142 from lmorchard/bug-730994-migrate-macros
Browse files Browse the repository at this point in the history
Bug 730994 migrate macros
  • Loading branch information
groovecoder committed Mar 26, 2012
2 parents 9c0c81c + 6d4faa7 commit 3f599fb
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 10 deletions.
34 changes: 27 additions & 7 deletions apps/dekicompat/management/commands/migrate_to_kuma_wiki.py
Expand Up @@ -34,7 +34,9 @@
from wiki.models import (Document, Revision, CATEGORIES, SIGNIFICANCES)

from wiki.models import REDIRECT_CONTENT
from wiki.content import ContentSectionTool, CodeSyntaxFilter
import wiki.content
from wiki.content import (ContentSectionTool, CodeSyntaxFilter,
DekiscriptMacroFilter)

from dekicompat.backends import DekiUser, DekiUserBackend

Expand Down Expand Up @@ -139,6 +141,8 @@ class Command(BaseCommand):
help="Migrate # of documents in locales other than en-US"),
make_option('--withsyntax', dest="withsyntax", type="int", default=0,
help="Migrate # of documents with syntax blocks"),
make_option('--withscripts', dest="withscripts", type="int", default=0,
help="Migrate # of documents that use scripts"),
make_option('--syntax-metrics', action="store_true",
dest="syntax_metrics", default=False,
help="Measure syntax highlighter usage, skip migration"),
Expand Down Expand Up @@ -458,6 +462,18 @@ def gather_pages(self):
LIMIT %s
""" % (ns_list, '%s'), self.options['withsyntax']))

if self.options['withscripts'] > 0:
log.info("Gathering %s pages that use scripts" %
self.options['withscripts'])
iters.append(self._query("""
SELECT *
FROM pages
WHERE page_namespace IN %s AND
page_text like '%%%%span class="script"%%%%'
ORDER BY page_timestamp DESC
LIMIT %s
""" % (ns_list, '%s'), self.options['withscripts']))

return itertools.chain(*iters)

@transaction.commit_on_success
Expand All @@ -471,7 +487,7 @@ def update_document(self, r):
# Special: namespace (not migrated), or a couple of untitled and empty
# pages under the Template: or User: namespaces.
if not r['page_timestamp']:
log.debug("\t%s / %s (%s) skipped, no timestamp" %
log.debug("\t%s/%s (%s) skipped, no timestamp" %
(locale, slug, r['page_display_name']))
return False

Expand All @@ -481,7 +497,7 @@ def update_document(self, r):
last_mod = self.docs_migrated.get(r['page_id'], (None, None))[1]
if (not self.options['update_documents'] and last_mod is not None
and last_mod >= page_ts):
log.debug("\t%s / %s (%s) up to date" %
log.debug("\t%s/%s (%s) up to date" %
(locale, slug, r['page_display_name']))
return False

Expand All @@ -491,18 +507,18 @@ def update_document(self, r):
content_hash = (hashlib.md5(r['page_text'].encode('utf-8'))
.hexdigest())
if content_hash in USER_NS_EXCLUDED_CONTENT_HASHES:
log.debug("\t%s / %s (%s) matched User: content exclusion list" %
log.debug("\t%s/%s (%s) matched User: content exclusion list" %
(locale, slug, r['page_display_name']))
return False

# Check to see if this page's content is too long, skip if so.
if len(r['page_text']) > self.options['maxlength']:
log.debug("\t%s / %s (%s) skipped, page too long (%s > %s max)" %
log.debug("\t%s/%s (%s) skipped, page too long (%s > %s max)" %
(locale, slug, r['page_display_name'],
len(r['page_text']), self.options['maxlength']))
return False

log.info("\t%s / %s (%s)" % (locale, slug, r['page_display_name']))
log.info("\t%s/%s (%s)" % (locale, slug, r['page_display_name']))

# Ensure that the document exists, and has the MindTouch page ID
doc, created = Document.objects.get_or_create(
Expand Down Expand Up @@ -663,7 +679,7 @@ def convert_page_text(self, pt):
pt = self.convert_redirect(pt)

pt = self.convert_code_blocks(pt)
# TODO: bug 710728 - Convert and normalize template calls
pt = self.convert_dekiscript_template_calls(pt)
# TODO: bug 710726 - Convert intra-wiki links?

return pt
Expand All @@ -683,6 +699,10 @@ def convert_code_blocks(self, pt):
pt = ContentSectionTool(pt).filter(CodeSyntaxFilter).serialize()
return pt

def convert_dekiscript_template_calls(self, pt):
return (wiki.content.parse(pt).filter(DekiscriptMacroFilter)
.serialize())

def get_tags_for_page(self, r):
"""For a given page row, get the list of tags from MindTouch and build
a string representation for Kuma revisions."""
Expand Down
5 changes: 4 additions & 1 deletion apps/users/helpers.py
Expand Up @@ -10,7 +10,10 @@
@register.function
def profile_url(user):
"""Return a URL to the user's profile."""
return reverse('devmo_profile_view', args=[user.username])
try:
return reverse('devmo_profile_view', args=[user.username])
except Exception, e:
return user.username


@register.function
Expand Down
60 changes: 60 additions & 0 deletions apps/wiki/content.py
@@ -1,3 +1,4 @@
import logging
import re
from urllib import urlencode
import bleach
Expand Down Expand Up @@ -308,3 +309,62 @@ def __iter__(self):
del attrs['function']
token['data'] = attrs.items()
yield token


class DekiscriptMacroFilter(html5lib_Filter):
"""Filter to convert Dekiscript template calls into kumascript macros."""
def __iter__(self):

buffer = []
for token in html5lib_Filter.__iter__(self):
buffer.append(token)

while len(buffer):
token = buffer.pop(0)

if not ('StartTag' == token['type'] and
'span' == token['name']):
yield token
continue

attrs = dict(token['data'])
if attrs.get('class','') != 'script':
yield token
continue

ds_call = []
while len(buffer) and 'EndTag' != token['type']:
token = buffer.pop(0)
if 'Characters' == token['type']:
ds_call.append(token['data'])

ds_call = ''.join(ds_call).strip()

# Snip off any "template." prefixes
strip_prefixes = ('template.', 'wiki.')
for prefix in strip_prefixes:
if ds_call.lower().startswith(prefix):
ds_call = ds_call[len(prefix):]

# Convert numeric args to quoted. eg. bug(123) -> bug("123")
num_re = re.compile(r'^([^(]+)\((\d+)')
m = num_re.match(ds_call)
if m:
ds_call = '%s("%s")' % (m.group(1), m.group(2))

# template("template name", [ "params" ])
wt_re = re.compile(r'''^template\(['"]([^'"]+)['"],\s*\[([^\]]+)]''', re.I)
m = wt_re.match(ds_call)
if m:
ds_call = '%s(%s)' % (m.group(1), m.group(2).strip())

# template("template name")
wt_re = re.compile(r'''^template\(['"]([^'"]+)['"]''', re.I)
m = wt_re.match(ds_call)
if m:
ds_call = '%s()' % (m.group(1))

yield dict(
type="Characters",
data='{{ %s }}' % ds_call
)
3 changes: 2 additions & 1 deletion apps/wiki/templates/wiki/document.html
Expand Up @@ -43,8 +43,9 @@ <h1 class="page-title">{{ document.title }}</h1>
<ul>
{% for error in kumascript_errors %}
<li class="error error-{{ error.level }}">
<span class="level">{{ error.level }}</span>
{# <span class="level">{{ error.level }}</span> #}
{% if error.args %}<span class="type">{{ error.args[0] }}</span>{% endif %}
&#8212;
<span class="message">{{ error.message }}</span>
</li>
{% endfor %}
Expand Down
48 changes: 47 additions & 1 deletion apps/wiki/tests/test_content.py
@@ -1,3 +1,5 @@
# This Python file uses the following encoding: utf-8
# see also: http://www.python.org/dev/peps/pep-0263/
import logging

from datetime import datetime, timedelta
Expand All @@ -12,7 +14,8 @@
from sumo import ProgrammingError
from sumo.tests import TestCase
import wiki.content
from wiki.content import SECTION_EDIT_TAGS, CodeSyntaxFilter
from wiki.content import (SECTION_EDIT_TAGS, CodeSyntaxFilter,
DekiscriptMacroFilter)
from wiki.tests import normalize_html

import html5lib
Expand Down Expand Up @@ -339,6 +342,49 @@ def test_code_syntax_conversion(self):
.filter(CodeSyntaxFilter).serialize())
eq_(normalize_html(expected), normalize_html(result))

@attr('current')
def test_dekiscript_macro_conversion(self):
doc_src = u"""
<span>Just a span</span>
<span class="notascript">Hi there</span>
<li><span class="script">MixedCaseName('parameter1', 'parameter2')</span></li>
<li><span class="script">bug(689641)</span></li>
<li><span class="script">template.lowercasename('border')</span></li>
<li><span class="script">Template.UpperCaseTemplate("foo")</span></li>
<li><span class="script">wiki.template('英語版章題', [ "Reusing tabs" ])</span></li>
<li><span class="script">template("non-standard_inline", ["Reusing tabs", "YAY"])</span></li>
<li><span class="script">wiki.template('英語版章題')</span></li>
<li><span class="script">template("non-standard_inline")</span></li>
"""
expected = u"""
<span>Just a span</span>
<span class="notascript">Hi there</span>
<li>{{ MixedCaseName('parameter1', 'parameter2') }}</li>
<li>{{ bug("689641") }}</li>
<li>{{ lowercasename('border') }}</li>
<li>{{ UpperCaseTemplate("foo") }}</li>
<li>{{ 英語版章題("Reusing tabs") }}</li>
<li>{{ non-standard_inline("Reusing tabs", "YAY") }}</li>
<li>{{ 英語版章題() }}</li>
<li>{{ non-standard_inline() }}</li>
"""

# Check line-by-line, to help work out any issues failure-by-failure
doc_src_lines = doc_src.split("\n")
expected_lines = expected.split("\n")
for i in range(0, len(doc_src_lines)):
result = (wiki.content
.parse(doc_src_lines[i])
.filter(DekiscriptMacroFilter).serialize())
eq_(normalize_html(expected_lines[i]), normalize_html(result))

# But, the whole thing should work in the filter, as well.
result = (wiki.content
.parse(doc_src)
.filter(DekiscriptMacroFilter).serialize())
eq_(normalize_html(expected), normalize_html(result))


class AllowedHTMLTests(TestCase):
simple_tags = (
'div', 'span', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'pre',
Expand Down

0 comments on commit 3f599fb

Please sign in to comment.