Merge pull request #142 from lmorchard/bug-730994-migrate-macros

Bug 730994 migrate macros
mdn · Mar 26, 2012 · 3f599fb · 3f599fb
2 parents 9c0c81c + 6d4faa7
commit 3f599fb
Show file tree

Hide file tree

Showing 5 changed files with 140 additions and 10 deletions.
diff --git a/apps/dekicompat/management/commands/migrate_to_kuma_wiki.py b/apps/dekicompat/management/commands/migrate_to_kuma_wiki.py
@@ -34,7 +34,9 @@
 from wiki.models import (Document, Revision, CATEGORIES, SIGNIFICANCES)
 
 from wiki.models import REDIRECT_CONTENT
-from wiki.content import ContentSectionTool, CodeSyntaxFilter
+import wiki.content
+from wiki.content import (ContentSectionTool, CodeSyntaxFilter,
+                          DekiscriptMacroFilter)
 
 from dekicompat.backends import DekiUser, DekiUserBackend
 
@@ -139,6 +141,8 @@ class Command(BaseCommand):
                     help="Migrate # of documents in locales other than en-US"),
         make_option('--withsyntax', dest="withsyntax", type="int", default=0,
                     help="Migrate # of documents with syntax blocks"),
+        make_option('--withscripts', dest="withscripts", type="int", default=0,
+                    help="Migrate # of documents that use scripts"),
         make_option('--syntax-metrics', action="store_true",
                     dest="syntax_metrics", default=False,
                     help="Measure syntax highlighter usage, skip migration"),
@@ -458,6 +462,18 @@ def gather_pages(self):
                     LIMIT %s
                 """ % (ns_list, '%s'), self.options['withsyntax']))
 
+            if self.options['withscripts'] > 0:
+                log.info("Gathering %s pages that use scripts" %
+                         self.options['withscripts'])
+                iters.append(self._query("""
+                    SELECT *
+                    FROM pages
+                    WHERE page_namespace IN %s AND
+                          page_text like '%%%%span class="script"%%%%'
+                    ORDER BY page_timestamp DESC
+                    LIMIT %s
+                """ % (ns_list, '%s'), self.options['withscripts']))
+
         return itertools.chain(*iters)
 
     @transaction.commit_on_success
@@ -471,7 +487,7 @@ def update_document(self, r):
         # Special: namespace (not migrated), or a couple of untitled and empty
         # pages under the Template: or User: namespaces.
         if not r['page_timestamp']:
-            log.debug("\t%s / %s (%s) skipped, no timestamp" %
+            log.debug("\t%s/%s (%s) skipped, no timestamp" %
                       (locale, slug, r['page_display_name']))
             return False
 
@@ -481,7 +497,7 @@ def update_document(self, r):
         last_mod = self.docs_migrated.get(r['page_id'], (None, None))[1]
         if (not self.options['update_documents'] and last_mod is not None
                 and last_mod >= page_ts):
-            log.debug("\t%s / %s (%s) up to date" %
+            log.debug("\t%s/%s (%s) up to date" %
                       (locale, slug, r['page_display_name']))
             return False
 
@@ -491,18 +507,18 @@ def update_document(self, r):
             content_hash = (hashlib.md5(r['page_text'].encode('utf-8'))
                                    .hexdigest())
             if content_hash in USER_NS_EXCLUDED_CONTENT_HASHES:
-                log.debug("\t%s / %s (%s) matched User: content exclusion list" %
+                log.debug("\t%s/%s (%s) matched User: content exclusion list" %
                           (locale, slug, r['page_display_name']))
                 return False
 
         # Check to see if this page's content is too long, skip if so.
         if len(r['page_text']) > self.options['maxlength']:
-            log.debug("\t%s / %s (%s) skipped, page too long (%s > %s max)" %
+            log.debug("\t%s/%s (%s) skipped, page too long (%s > %s max)" %
                       (locale, slug, r['page_display_name'],
                        len(r['page_text']), self.options['maxlength']))
             return False
 
-        log.info("\t%s / %s (%s)" % (locale, slug, r['page_display_name']))
+        log.info("\t%s/%s (%s)" % (locale, slug, r['page_display_name']))
 
         # Ensure that the document exists, and has the MindTouch page ID
         doc, created = Document.objects.get_or_create(
@@ -663,7 +679,7 @@ def convert_page_text(self, pt):
             pt = self.convert_redirect(pt)
 
         pt = self.convert_code_blocks(pt)
-        # TODO: bug 710728 - Convert and normalize template calls
+        pt = self.convert_dekiscript_template_calls(pt)
         # TODO: bug 710726 - Convert intra-wiki links?
 
         return pt
@@ -683,6 +699,10 @@ def convert_code_blocks(self, pt):
         pt = ContentSectionTool(pt).filter(CodeSyntaxFilter).serialize()
         return pt
 
+    def convert_dekiscript_template_calls(self, pt):
+        return (wiki.content.parse(pt).filter(DekiscriptMacroFilter)
+                    .serialize())
+
     def get_tags_for_page(self, r):
         """For a given page row, get the list of tags from MindTouch and build
         a string representation for Kuma revisions."""

diff --git a/apps/users/helpers.py b/apps/users/helpers.py
@@ -10,7 +10,10 @@
 @register.function
 def profile_url(user):
     """Return a URL to the user's profile."""
-    return reverse('devmo_profile_view', args=[user.username])
+    try:
+        return reverse('devmo_profile_view', args=[user.username])
+    except Exception, e:
+        return user.username
 
 
 @register.function

diff --git a/apps/wiki/content.py b/apps/wiki/content.py
@@ -1,3 +1,4 @@
+import logging
 import re
 from urllib import urlencode
 import bleach
@@ -308,3 +309,62 @@ def __iter__(self):
                             del attrs['function']
                             token['data'] = attrs.items()
             yield token
+
+
+class DekiscriptMacroFilter(html5lib_Filter):
+    """Filter to convert Dekiscript template calls into kumascript macros."""
+    def __iter__(self):
+
+        buffer = []
+        for token in html5lib_Filter.__iter__(self):
+            buffer.append(token)
+
+        while len(buffer):
+            token = buffer.pop(0)
+
+            if not ('StartTag' == token['type'] and
+                    'span' == token['name']):
+                yield token
+                continue
+
+            attrs = dict(token['data'])
+            if attrs.get('class','') != 'script':
+                yield token
+                continue
+
+            ds_call = []
+            while len(buffer) and 'EndTag' != token['type']:
+                token = buffer.pop(0)
+                if 'Characters' == token['type']:
+                    ds_call.append(token['data'])
+
+            ds_call = ''.join(ds_call).strip()
+
+            # Snip off any "template." prefixes
+            strip_prefixes = ('template.', 'wiki.')
+            for prefix in strip_prefixes:
+                if ds_call.lower().startswith(prefix):
+                    ds_call = ds_call[len(prefix):]
+
+            # Convert numeric args to quoted. eg. bug(123) -> bug("123")
+            num_re = re.compile(r'^([^(]+)\((\d+)')
+            m = num_re.match(ds_call)
+            if m:
+                ds_call = '%s("%s")' % (m.group(1), m.group(2))
+
+            # template("template name", [ "params" ])
+            wt_re = re.compile(r'''^template\(['"]([^'"]+)['"],\s*\[([^\]]+)]''', re.I)
+            m = wt_re.match(ds_call)
+            if m:
+                ds_call = '%s(%s)' % (m.group(1), m.group(2).strip())
+
+            # template("template name")
+            wt_re = re.compile(r'''^template\(['"]([^'"]+)['"]''', re.I)
+            m = wt_re.match(ds_call)
+            if m:
+                ds_call = '%s()' % (m.group(1))
+
+            yield dict(
+                type="Characters",
+                data='{{ %s }}' % ds_call
+            )
diff --git a/apps/wiki/templates/wiki/document.html b/apps/wiki/templates/wiki/document.html
@@ -43,8 +43,9 @@ <h1 class="page-title">{{ document.title }}</h1>
             <ul>
               {% for error in kumascript_errors %}
                 <li class="error error-{{ error.level }}">
-                  <span class="level">{{ error.level }}</span>
+                  {# <span class="level">{{ error.level }}</span> #}
                   {% if error.args %}<span class="type">{{ error.args[0] }}</span>{% endif %}
+                  &#8212;
                   <span class="message">{{ error.message }}</span>
                 </li>
               {% endfor %}

diff --git a/apps/wiki/tests/test_content.py b/apps/wiki/tests/test_content.py
@@ -1,3 +1,5 @@
+# This Python file uses the following encoding: utf-8
+# see also: http://www.python.org/dev/peps/pep-0263/
 import logging
 
 from datetime import datetime, timedelta
@@ -12,7 +14,8 @@
 from sumo import ProgrammingError
 from sumo.tests import TestCase
 import wiki.content
-from wiki.content import SECTION_EDIT_TAGS, CodeSyntaxFilter
+from wiki.content import (SECTION_EDIT_TAGS, CodeSyntaxFilter,
+                          DekiscriptMacroFilter)
 from wiki.tests import normalize_html
 
 import html5lib
@@ -339,6 +342,49 @@ def test_code_syntax_conversion(self):
                   .filter(CodeSyntaxFilter).serialize())
         eq_(normalize_html(expected), normalize_html(result))
 
+    @attr('current')
+    def test_dekiscript_macro_conversion(self):
+        doc_src = u"""
+            <span>Just a span</span>
+            <span class="notascript">Hi there</span>
+            <li><span class="script">MixedCaseName('parameter1', 'parameter2')</span></li>
+            <li><span class="script">bug(689641)</span></li>
+            <li><span class="script">template.lowercasename('border')</span></li>
+            <li><span class="script">Template.UpperCaseTemplate("foo")</span></li>
+            <li><span class="script">wiki.template('英語版章題', [ "Reusing tabs" ])</span></li>
+            <li><span class="script">template("non-standard_inline", ["Reusing tabs", "YAY"])</span></li>
+            <li><span class="script">wiki.template('英語版章題')</span></li>
+            <li><span class="script">template("non-standard_inline")</span></li>
+        """
+        expected = u"""
+            <span>Just a span</span>
+            <span class="notascript">Hi there</span>
+            <li>{{ MixedCaseName('parameter1', 'parameter2') }}</li>
+            <li>{{ bug("689641") }}</li>
+            <li>{{ lowercasename('border') }}</li>
+            <li>{{ UpperCaseTemplate("foo") }}</li>
+            <li>{{ 英語版章題("Reusing tabs") }}</li>
+            <li>{{ non-standard_inline("Reusing tabs", "YAY") }}</li>
+            <li>{{ 英語版章題() }}</li>
+            <li>{{ non-standard_inline() }}</li>
+        """
+
+        # Check line-by-line, to help work out any issues failure-by-failure
+        doc_src_lines = doc_src.split("\n")
+        expected_lines = expected.split("\n")
+        for i in range(0, len(doc_src_lines)):
+            result = (wiki.content
+                      .parse(doc_src_lines[i])
+                      .filter(DekiscriptMacroFilter).serialize())
+            eq_(normalize_html(expected_lines[i]), normalize_html(result))
+
+        # But, the whole thing should work in the filter, as well.
+        result = (wiki.content
+                  .parse(doc_src)
+                  .filter(DekiscriptMacroFilter).serialize())
+        eq_(normalize_html(expected), normalize_html(result))
+
+
 class AllowedHTMLTests(TestCase):
     simple_tags = (
         'div', 'span', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'pre',