mdn · groovecoder · Apr 20, 2012 · Mar 19, 2012 · Apr 10, 2012 · Apr 11, 2012
diff --git a/apps/dekicompat/management/commands/migrate_to_kuma_wiki.py b/apps/dekicompat/management/commands/migrate_to_kuma_wiki.py
@@ -741,15 +741,15 @@ def convert_dekiscript_template(self, pt):
         This is an incomplete process, but it tries to take care off as much as
         it can so that human intervention is minimized."""
 
-        # Many templates start with this prefix, which corresponds to {% in EJS
+        # Many templates start with this prefix, which corresponds to <% in EJS
         pre = '<pre class="script">'
         if pt.startswith(pre):
-            pt = "{%%\n%s" % pt[len(pre):]
+            pt = "<%%\n%s" % pt[len(pre):]
 
-        # Many templates end with this postfix, which corresponds to %} in EJS
+        # Many templates end with this postfix, which corresponds to %> in EJS
         post = '</pre>'
         if pt.endswith(post):
-            pt = "%s\n%%}" % pt[:0-len(post)]
+            pt = "%s\n%%>" % pt[:0-len(post)]
 
         # Template source is usually HTML encoded inside the <pre>
         pt = (pt.replace('&amp;', '&')
@@ -817,7 +817,7 @@ def get_kuma_locale_and_slug_for_page(self, r):
         if '/' in title:
             # Treat the first part of the slug path as locale and snip it off.
             mt_language, new_title = title.split('/', 1)
-            if mt_language in MT_TO_KUMA_LOCALE_MAP:
+            if mt_language.lower() in MT_TO_KUMA_LOCALE_MAP:
                 # If it's a known language, then rebuild the slug
                 slug = '%s%s' % (ns_name, new_title)
             else:

diff --git a/apps/wiki/content.py b/apps/wiki/content.py
@@ -1,8 +1,12 @@
+import logging
 import re
 from urllib import urlencode
 
+from xml.sax.saxutils import quoteattr
+
 import html5lib
 from html5lib.filters._base import Filter as html5lib_Filter
+from pyquery import PyQuery as pq
 
 from tower import ugettext as _
 
@@ -27,6 +31,16 @@ def parse(src):
     return ContentSectionTool(src)
 
 
+def filter_out_noinclude(src):
+    """Quick and dirty filter to remove <div class="noinclude"> blocks"""
+    # NOTE: This started as an html5lib filter, but it started getting really
+    # complex. Seems like pyquery works well enough without corrupting
+    # character encoding.
+    doc = pq(src)
+    doc.remove('*[class=noinclude]')
+    return doc.html()
+
+
 class ContentSectionTool(object):
 
     def __init__(self, src=None):
@@ -58,7 +72,7 @@ def parse(self, src):
     def serialize(self, stream=None):
         if stream is None:
             stream = self.stream
-        return "".join(self.serializer.serialize(stream))
+        return u"".join(self.serializer.serialize(stream))
 
     def __unicode__(self):
         return self.serialize()
@@ -102,6 +116,10 @@ def gen_id(self):
                 self.known_ids.add(id)
                 return id
 
+    def slugify(self, text):
+        """Turn the text content of a header into a slug for use in an ID"""
+        return (text.replace(' ', '_'))
+
     def __iter__(self):
         input = html5lib_Filter.__iter__(self)
 
@@ -113,17 +131,63 @@ def __iter__(self):
                 attrs = dict(token['data'])
                 if 'id' in attrs:
                     self.known_ids.add(attrs['id'])
+                if 'name' in attrs:
+                    self.known_ids.add(attrs['name'])
 
-        # Pass 2: Sprinkle in IDs where they're missing
-        for token in buffer:
-            if ('StartTag' == token['type'] and
+        # Pass 2: Sprinkle in IDs where they're needed
+        while len(buffer):
+            token = buffer.pop(0)
+
+            if not ('StartTag' == token['type'] and
                     token['name'] in SECTION_TAGS):
+                yield token
+            else:
                 attrs = dict(token['data'])
-                id = attrs.get('id', None)
-                if not id:
+
+                # Treat a name attribute as a human-specified ID override
+                name = attrs.get('name', None)
+                if name:
+                    attrs['id'] = name
+                    token['data'] = attrs.items()
+                    yield token
+                    continue
+
+                # If this is not a header, then generate a section ID.
+                if token['name'] not in HEAD_TAGS:
                     attrs['id'] = self.gen_id()
                     token['data'] = attrs.items()
-            yield token
+                    yield token
+                    continue
+
+                # If this is a header, then scoop up the rest of the header and
+                # gather the text it contains.
+                start, text, tmp = token, [], []
+                while len(buffer):
+                    token = buffer.pop(0)
+                    tmp.append(token)
+                    if token['type'] in ('Characters', 'SpaceCharacters'):
+                        text.append(token['data'])
+                    elif ('EndTag' == token['type'] and
+                          start['name'] == token['name']):
+                        # Note: This is naive, and doesn't track other
+                        # start/end tags nested in the header. Odd things might
+                        # happen in a case like <h1><h1></h1></h1>. But, that's
+                        # invalid markup and the worst case should be a
+                        # truncated ID because all the text wasn't accumulated.
+                        break
+
+                # Slugify the text we found inside the header, generate an ID
+                # as a last resort.
+                slug = self.slugify(u''.join(text))
+                if not slug:
+                    slug = self.gen_id()
+                attrs['id'] = slug
+                start['data'] = attrs.items()
+
+                # Finally, emit the tokens we scooped up for the header.
+                yield start
+                for t in tmp:
+                    yield t
 
 
 class SectionEditLinkFilter(html5lib_Filter):
@@ -152,17 +216,18 @@ def __iter__(self):
                              'title': _('Edit section'),
                              'class': 'edit-section',
                              'data-section-id': id,
-                             'data-section-src-url': '%s?%s' % (
+                             'data-section-src-url': u'%s?%s' % (
                                  reverse('wiki.document',
                                          args=[self.full_path],
                                          locale=self.locale),
-                                 urlencode({'section': id, 'raw': 'true'})
+                                 urlencode({'section': id.encode('utf-8'),
+                                            'raw': 'true'})
                               ),
-                              'href': '%s?%s' % (
+                              'href': u'%s?%s' % (
                                  reverse('wiki.edit_document',
                                          args=[self.full_path],
                                          locale=self.locale),
-                                 urlencode({'section': id,
+                                 urlencode({'section': id.encode('utf-8'),
                                             'edit_links': 'true'})
                               )
                          }},
@@ -385,12 +450,26 @@ def __iter__(self):
                 continue
 
             ds_call = []
-            while len(buffer) and 'EndTag' != token['type']:
+            while len(buffer):
                 token = buffer.pop(0)
-                if 'Characters' == token['type']:
+                if token['type'] in ('Characters', 'SpaceCharacters'):
                     ds_call.append(token['data'])
-
-            ds_call = ''.join(ds_call).strip()
+                elif 'StartTag' == token['type']:
+                    attrs = token['data']
+                    if attrs:
+                        a_out = (u' %s' % u' '.join(
+                            (u'%s=%s' % 
+                             (name, quoteattr(val))
+                             for name, val in attrs)))
+                    else:
+                        a_out = u''
+                    ds_call.append(u'<%s%s>' % (token['name'], a_out))
+                elif 'EndTag' == token['type']:
+                    if 'span' == token['name']:
+                        break
+                    ds_call.append('</%s>' % token['name'])
+
+            ds_call = u''.join(ds_call).strip()
 
             # Snip off any "template." prefixes
             strip_prefixes = ('template.', 'wiki.')
@@ -417,7 +496,11 @@ def __iter__(self):
             if m:
                 ds_call = '%s()' % (m.group(1))
 
-            yield dict(
-                type="Characters",
-                data='{{ %s }}' % ds_call
-            )
+            # HACK: This is dirty, but seems like the easiest way to
+            # reconstitute the token stream, including what gets parsed as
+            # markup in the middle of macro parameters.
+            #
+            # eg. {{ Note("This is <strong>strongly</strong> discouraged") }}
+            parsed = parse('{{ %s }}' % ds_call)
+            for token in parsed.stream:
+                yield token
diff --git a/apps/wiki/forms.py b/apps/wiki/forms.py
@@ -49,7 +49,6 @@
 COMMENT_LONG = _lazy(u'Please keep the length of the comment to '
                      u'%(limit_value)s characters or less. It is currently '
                      u'%(show_value)s characters.')
-TITLE_COLLIDES = _lazy(u'Another document with this title already exists.')
 SLUG_COLLIDES = _lazy(u'Another document with this slug already exists.')
 OTHER_COLLIDES = _lazy(u'Another document with this metadata already exists.')
 
@@ -162,7 +161,7 @@ def save(self, parent_doc, **kwargs):
 class RevisionForm(forms.ModelForm):
     """Form to create new revisions."""
 
-    title = StrippedCharField(min_length=5, max_length=255,
+    title = StrippedCharField(min_length=2, max_length=255,
                               required=False,
                               widget=forms.TextInput(
                                   attrs={'placeholder': TITLE_PLACEHOLDER}),
@@ -204,7 +203,7 @@ class RevisionForm(forms.ModelForm):
                      c in GROUPED_FIREFOX_VERSIONS]}
 
     content = StrippedCharField(
-                min_length=5, max_length=100000,
+                min_length=5, max_length=300000,
                 label=_lazy(u'Content:'),
                 widget=forms.Textarea(attrs={'data-showfor':
                                              json.dumps(showfor_data)}),
@@ -274,8 +273,7 @@ def _clean_collidable(self, name):
             # to them are ignored for an iframe submission
             return getattr(self.instance.document, name)
 
-        error_message = {'title': TITLE_COLLIDES,
-                         'slug': SLUG_COLLIDES}.get(name, OTHER_COLLIDES)
+        error_message = {'slug': SLUG_COLLIDES}.get(name, OTHER_COLLIDES)
         try:
             existing_doc = Document.uncached.get(
                     locale=self.instance.document.locale,
@@ -297,9 +295,6 @@ def _clean_collidable(self, name):
 
         return value
 
-    def clean_title(self):
-        return self._clean_collidable('title')
-
     def clean_slug(self):
         return self._clean_collidable('slug')
 

diff --git a/apps/wiki/models.py b/apps/wiki/models.py
@@ -36,7 +36,7 @@
 ALLOWED_TAGS = bleach.ALLOWED_TAGS + [
     'div', 'span', 'p', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
     'pre', 'code',
-    'dl', 'dt', 'dd', 'small', 'sup',
+    'dl', 'dt', 'dd', 'small', 'sup', 'u',
     'img',
     'input',
     'table', 'tbody', 'thead', 'tr', 'th', 'td',
@@ -46,13 +46,14 @@
     'address'
 ]
 ALLOWED_ATTRIBUTES = bleach.ALLOWED_ATTRIBUTES
-ALLOWED_ATTRIBUTES['div'] = ['class', 'id']
-ALLOWED_ATTRIBUTES['pre'] = ['class', 'id']
-ALLOWED_ATTRIBUTES['span'] = ['style', ]
+ALLOWED_ATTRIBUTES['div'] = ['style', 'class', 'id']
+ALLOWED_ATTRIBUTES['p'] = ['style', 'class', 'id']
+ALLOWED_ATTRIBUTES['pre'] = ['style', 'class', 'id']
+ALLOWED_ATTRIBUTES['span'] = ['style', 'title', ]
 ALLOWED_ATTRIBUTES['img'] = ['src', 'id', 'align', 'alt', 'class', 'is',
                              'title', 'style']
-ALLOWED_ATTRIBUTES['a'] = ['id', 'class', 'href', 'title', ]
-ALLOWED_ATTRIBUTES.update(dict((x, ['style', ]) for x in
+ALLOWED_ATTRIBUTES['a'] = ['style', 'id', 'class', 'href', 'title', ]
+ALLOWED_ATTRIBUTES.update(dict((x, ['style', 'name', ]) for x in
                           ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')))
 ALLOWED_ATTRIBUTES.update(dict((x, ['id', ]) for x in (
     'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'code', 'dl', 'dt', 'dd',
@@ -61,6 +62,16 @@
     'progress', 'audio', 'video', 'details', 'datagrid', 'datalist', 'table',
     'address'
 )))
+ALLOWED_STYLES = [
+    'border', 'float', 'overflow', 'min-height', 'vertical-align',
+    'white-space',
+    'margin', 'margin-left', 'margin-top', 'margin-bottom', 'margin-right',
+    'padding', 'padding-left', 'padding-top', 'padding-bottom', 'padding-right',
+    'background', # TODO: Maybe not this one, it can load URLs
+    'background-color',
+    'font', 'font-size', 'font-weight', 'text-align', 'text-transform',
+    '-moz-column-width', '-webkit-columns', 'columns',
+]
 
 # Disruptiveness of edits to translated versions. Numerical magnitude indicate
 # the relative severity.
@@ -549,10 +560,10 @@ def locale_and_slug_from_path(path, request=None):
         if '/' in path:
             locale, slug = path.split('/', 1)
 
-            if locale in settings.MT_TO_KUMA_LOCALE_MAP:
+            if locale.lower() in settings.MT_TO_KUMA_LOCALE_MAP:
                 # If this looks like a MindTouch locale, remap it.
                 old_locale = locale
-                locale = settings.MT_TO_KUMA_LOCALE_MAP[locale]
+                locale = settings.MT_TO_KUMA_LOCALE_MAP[locale.lower()]
                 # But, we only need a redirect if the locale actually changed.
                 needs_redirect = (locale != old_locale)
 
@@ -938,7 +949,7 @@ def content_cleaned(self):
             return self.content
         return bleach.clean(
             self.content, attributes=ALLOWED_ATTRIBUTES, tags=ALLOWED_TAGS,
-            strip_comments=False
+            styles=ALLOWED_STYLES, strip_comments=False
         )
 
     def get_previous(self):

diff --git a/apps/wiki/templates/wiki/document.html b/apps/wiki/templates/wiki/document.html
@@ -1,9 +1,7 @@
 {# vim: set ts=2 et sts=2 sw=2: #}
 {% extends "wiki/base.html" %}
 {% from "wiki/includes/sidebar_modules.html" import document_tabs, document_notifications %}
-{# L10n: {t} is the title of the document. {c} is the category. #}
-{% set title = _('{t} | {c}')|f(t=document.title, c=document.get_category_display()) %}
-{% block title %}{{ page_title(title) }}{% endblock %}
+{% block title %}{{ page_title(document.title) }}{% endblock %}
 {% set classes = 'document' %}
 {% block bodyclass %}document{% endblock %}
 {% if document.parent %}
@@ -38,19 +36,7 @@ <h1 class="page-title">{{ document.title }}</h1>
             {% endif %}
         </ul>
         {% if kumascript_errors %}
-          <div class="warning" id="kumascript-errors">
-            <p>{{ _("There are scripting errors on this page:") }}</p>
-            <ul>
-              {% for error in kumascript_errors %}
-                <li class="error error-{{ error.level }}">
-                  {# <span class="level">{{ error.level }}</span> #}
-                  {% if error.args %}<span class="type">{{ error.args[0] }}</span>{% endif %}
-                  &#8212;
-                  <span class="message">{{ error.message }}</span>
-                </li>
-              {% endfor %}
-            </ul>
-          </div>
+          {% include 'wiki/includes/kumascript_errors.html' %}
         {% endif %}
        </header>
         {% if redirected_from %}

diff --git a/apps/wiki/templates/wiki/edit_document.html b/apps/wiki/templates/wiki/edit_document.html
@@ -2,7 +2,7 @@
 {% extends "wiki/base.html" %}
 {% from "layout/errorlist.html" import errorlist %}
 {% from "wiki/includes/sidebar_modules.html" import document_tabs %}
-{% set title = _('Edit Article | {document}')|f(document=document.title) %}
+{% set title = _('{document} | Edit Article')|f(document=document.title) %}
 {% block title %}{{ page_title(title) }}{% endblock %}
 {# TODO: Change KB url to landing page when we have one #}
 {% set crumbs = [(url('wiki.category', document.category), document.get_category_display()),