Skip to content
This repository has been archived by the owner on Aug 26, 2022. It is now read-only.

Bug 730707 kumascript #164

Merged
merged 16 commits into from Apr 20, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 5 additions & 5 deletions apps/dekicompat/management/commands/migrate_to_kuma_wiki.py
Expand Up @@ -741,15 +741,15 @@ def convert_dekiscript_template(self, pt):
This is an incomplete process, but it tries to take care off as much as
it can so that human intervention is minimized."""

# Many templates start with this prefix, which corresponds to {% in EJS
# Many templates start with this prefix, which corresponds to <% in EJS
pre = '<pre class="script">'
if pt.startswith(pre):
pt = "{%%\n%s" % pt[len(pre):]
pt = "<%%\n%s" % pt[len(pre):]

# Many templates end with this postfix, which corresponds to %} in EJS
# Many templates end with this postfix, which corresponds to %> in EJS
post = '</pre>'
if pt.endswith(post):
pt = "%s\n%%}" % pt[:0-len(post)]
pt = "%s\n%%>" % pt[:0-len(post)]

# Template source is usually HTML encoded inside the <pre>
pt = (pt.replace('&amp;', '&')
Expand Down Expand Up @@ -817,7 +817,7 @@ def get_kuma_locale_and_slug_for_page(self, r):
if '/' in title:
# Treat the first part of the slug path as locale and snip it off.
mt_language, new_title = title.split('/', 1)
if mt_language in MT_TO_KUMA_LOCALE_MAP:
if mt_language.lower() in MT_TO_KUMA_LOCALE_MAP:
# If it's a known language, then rebuild the slug
slug = '%s%s' % (ns_name, new_title)
else:
Expand Down
121 changes: 102 additions & 19 deletions apps/wiki/content.py
@@ -1,8 +1,12 @@
import logging
import re
from urllib import urlencode

from xml.sax.saxutils import quoteattr

import html5lib
from html5lib.filters._base import Filter as html5lib_Filter
from pyquery import PyQuery as pq

from tower import ugettext as _

Expand All @@ -27,6 +31,16 @@ def parse(src):
return ContentSectionTool(src)


def filter_out_noinclude(src):
"""Quick and dirty filter to remove <div class="noinclude"> blocks"""
# NOTE: This started as an html5lib filter, but it started getting really
# complex. Seems like pyquery works well enough without corrupting
# character encoding.
doc = pq(src)
doc.remove('*[class=noinclude]')
return doc.html()


class ContentSectionTool(object):

def __init__(self, src=None):
Expand Down Expand Up @@ -58,7 +72,7 @@ def parse(self, src):
def serialize(self, stream=None):
if stream is None:
stream = self.stream
return "".join(self.serializer.serialize(stream))
return u"".join(self.serializer.serialize(stream))

def __unicode__(self):
return self.serialize()
Expand Down Expand Up @@ -102,6 +116,10 @@ def gen_id(self):
self.known_ids.add(id)
return id

def slugify(self, text):
"""Turn the text content of a header into a slug for use in an ID"""
return (text.replace(' ', '_'))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any chance these IDs ever end up as part of a URL (not just a fragment identifier)? Looks like they do further down and if so, that's a potential Unicode issue -- we might want to do something like Django's own built-in slugify template filter, which has a little Unicode-normalization song-and-dance to produce a readable but URL-safe result.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, these will probably end up in section editing URLs. :/ Need to look at this some more, because I want to make sure it matches up with existing anchor links from MindTouch. I don't think it quite does that all the way, either.


def __iter__(self):
input = html5lib_Filter.__iter__(self)

Expand All @@ -113,17 +131,63 @@ def __iter__(self):
attrs = dict(token['data'])
if 'id' in attrs:
self.known_ids.add(attrs['id'])
if 'name' in attrs:
self.known_ids.add(attrs['name'])

# Pass 2: Sprinkle in IDs where they're missing
for token in buffer:
if ('StartTag' == token['type'] and
# Pass 2: Sprinkle in IDs where they're needed
while len(buffer):
token = buffer.pop(0)

if not ('StartTag' == token['type'] and
token['name'] in SECTION_TAGS):
yield token
else:
attrs = dict(token['data'])
id = attrs.get('id', None)
if not id:

# Treat a name attribute as a human-specified ID override
name = attrs.get('name', None)
if name:
attrs['id'] = name
token['data'] = attrs.items()
yield token
continue

# If this is not a header, then generate a section ID.
if token['name'] not in HEAD_TAGS:
attrs['id'] = self.gen_id()
token['data'] = attrs.items()
yield token
yield token
continue

# If this is a header, then scoop up the rest of the header and
# gather the text it contains.
start, text, tmp = token, [], []
while len(buffer):
token = buffer.pop(0)
tmp.append(token)
if token['type'] in ('Characters', 'SpaceCharacters'):
text.append(token['data'])
elif ('EndTag' == token['type'] and
start['name'] == token['name']):
# Note: This is naive, and doesn't track other
# start/end tags nested in the header. Odd things might
# happen in a case like <h1><h1></h1></h1>. But, that's
# invalid markup and the worst case should be a
# truncated ID because all the text wasn't accumulated.
break
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may be a silly question, but the comment here made me think of it: is there any mechanism enforcing uniqueness of IDs within the document? What happens if IDs end up colliding?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kind of punted on that... There is a mechanism for uniqueness, but only for auto-generated IDs (eg. sect1, sect2, etc). For IDs based on element text or the name attribute, no uniqueness is enforced.

This is really a half-baked feature, ugh. :/

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I just filed bug 747403 to remember to put more work into this feature


# Slugify the text we found inside the header, generate an ID
# as a last resort.
slug = self.slugify(u''.join(text))
if not slug:
slug = self.gen_id()
attrs['id'] = slug
start['data'] = attrs.items()

# Finally, emit the tokens we scooped up for the header.
yield start
for t in tmp:
yield t


class SectionEditLinkFilter(html5lib_Filter):
Expand Down Expand Up @@ -152,17 +216,18 @@ def __iter__(self):
'title': _('Edit section'),
'class': 'edit-section',
'data-section-id': id,
'data-section-src-url': '%s?%s' % (
'data-section-src-url': u'%s?%s' % (
reverse('wiki.document',
args=[self.full_path],
locale=self.locale),
urlencode({'section': id, 'raw': 'true'})
urlencode({'section': id.encode('utf-8'),
'raw': 'true'})
),
'href': '%s?%s' % (
'href': u'%s?%s' % (
reverse('wiki.edit_document',
args=[self.full_path],
locale=self.locale),
urlencode({'section': id,
urlencode({'section': id.encode('utf-8'),
'edit_links': 'true'})
)
}},
Expand Down Expand Up @@ -385,12 +450,26 @@ def __iter__(self):
continue

ds_call = []
while len(buffer) and 'EndTag' != token['type']:
while len(buffer):
token = buffer.pop(0)
if 'Characters' == token['type']:
if token['type'] in ('Characters', 'SpaceCharacters'):
ds_call.append(token['data'])

ds_call = ''.join(ds_call).strip()
elif 'StartTag' == token['type']:
attrs = token['data']
if attrs:
a_out = (u' %s' % u' '.join(
(u'%s=%s' %
(name, quoteattr(val))
for name, val in attrs)))
else:
a_out = u''
ds_call.append(u'<%s%s>' % (token['name'], a_out))
elif 'EndTag' == token['type']:
if 'span' == token['name']:
break
ds_call.append('</%s>' % token['name'])

ds_call = u''.join(ds_call).strip()

# Snip off any "template." prefixes
strip_prefixes = ('template.', 'wiki.')
Expand All @@ -417,7 +496,11 @@ def __iter__(self):
if m:
ds_call = '%s()' % (m.group(1))

yield dict(
type="Characters",
data='{{ %s }}' % ds_call
)
# HACK: This is dirty, but seems like the easiest way to
# reconstitute the token stream, including what gets parsed as
# markup in the middle of macro parameters.
#
# eg. {{ Note("This is <strong>strongly</strong> discouraged") }}
parsed = parse('{{ %s }}' % ds_call)
for token in parsed.stream:
yield token
11 changes: 3 additions & 8 deletions apps/wiki/forms.py
Expand Up @@ -49,7 +49,6 @@
COMMENT_LONG = _lazy(u'Please keep the length of the comment to '
u'%(limit_value)s characters or less. It is currently '
u'%(show_value)s characters.')
TITLE_COLLIDES = _lazy(u'Another document with this title already exists.')
SLUG_COLLIDES = _lazy(u'Another document with this slug already exists.')
OTHER_COLLIDES = _lazy(u'Another document with this metadata already exists.')

Expand Down Expand Up @@ -162,7 +161,7 @@ def save(self, parent_doc, **kwargs):
class RevisionForm(forms.ModelForm):
"""Form to create new revisions."""

title = StrippedCharField(min_length=5, max_length=255,
title = StrippedCharField(min_length=2, max_length=255,
required=False,
widget=forms.TextInput(
attrs={'placeholder': TITLE_PLACEHOLDER}),
Expand Down Expand Up @@ -204,7 +203,7 @@ class RevisionForm(forms.ModelForm):
c in GROUPED_FIREFOX_VERSIONS]}

content = StrippedCharField(
min_length=5, max_length=100000,
min_length=5, max_length=300000,
label=_lazy(u'Content:'),
widget=forms.Textarea(attrs={'data-showfor':
json.dumps(showfor_data)}),
Expand Down Expand Up @@ -274,8 +273,7 @@ def _clean_collidable(self, name):
# to them are ignored for an iframe submission
return getattr(self.instance.document, name)

error_message = {'title': TITLE_COLLIDES,
'slug': SLUG_COLLIDES}.get(name, OTHER_COLLIDES)
error_message = {'slug': SLUG_COLLIDES}.get(name, OTHER_COLLIDES)
try:
existing_doc = Document.uncached.get(
locale=self.instance.document.locale,
Expand All @@ -297,9 +295,6 @@ def _clean_collidable(self, name):

return value

def clean_title(self):
return self._clean_collidable('title')

def clean_slug(self):
return self._clean_collidable('slug')

Expand Down
29 changes: 20 additions & 9 deletions apps/wiki/models.py
Expand Up @@ -36,7 +36,7 @@
ALLOWED_TAGS = bleach.ALLOWED_TAGS + [
'div', 'span', 'p', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'pre', 'code',
'dl', 'dt', 'dd', 'small', 'sup',
'dl', 'dt', 'dd', 'small', 'sup', 'u',
'img',
'input',
'table', 'tbody', 'thead', 'tr', 'th', 'td',
Expand All @@ -46,13 +46,14 @@
'address'
]
ALLOWED_ATTRIBUTES = bleach.ALLOWED_ATTRIBUTES
ALLOWED_ATTRIBUTES['div'] = ['class', 'id']
ALLOWED_ATTRIBUTES['pre'] = ['class', 'id']
ALLOWED_ATTRIBUTES['span'] = ['style', ]
ALLOWED_ATTRIBUTES['div'] = ['style', 'class', 'id']
ALLOWED_ATTRIBUTES['p'] = ['style', 'class', 'id']
ALLOWED_ATTRIBUTES['pre'] = ['style', 'class', 'id']
ALLOWED_ATTRIBUTES['span'] = ['style', 'title', ]
ALLOWED_ATTRIBUTES['img'] = ['src', 'id', 'align', 'alt', 'class', 'is',
'title', 'style']
ALLOWED_ATTRIBUTES['a'] = ['id', 'class', 'href', 'title', ]
ALLOWED_ATTRIBUTES.update(dict((x, ['style', ]) for x in
ALLOWED_ATTRIBUTES['a'] = ['style', 'id', 'class', 'href', 'title', ]
ALLOWED_ATTRIBUTES.update(dict((x, ['style', 'name', ]) for x in
('h1', 'h2', 'h3', 'h4', 'h5', 'h6')))
ALLOWED_ATTRIBUTES.update(dict((x, ['id', ]) for x in (
'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'code', 'dl', 'dt', 'dd',
Expand All @@ -61,6 +62,16 @@
'progress', 'audio', 'video', 'details', 'datagrid', 'datalist', 'table',
'address'
)))
ALLOWED_STYLES = [
'border', 'float', 'overflow', 'min-height', 'vertical-align',
'white-space',
'margin', 'margin-left', 'margin-top', 'margin-bottom', 'margin-right',
'padding', 'padding-left', 'padding-top', 'padding-bottom', 'padding-right',
'background', # TODO: Maybe not this one, it can load URLs
'background-color',
'font', 'font-size', 'font-weight', 'text-align', 'text-transform',
'-moz-column-width', '-webkit-columns', 'columns',
]

# Disruptiveness of edits to translated versions. Numerical magnitude indicate
# the relative severity.
Expand Down Expand Up @@ -549,10 +560,10 @@ def locale_and_slug_from_path(path, request=None):
if '/' in path:
locale, slug = path.split('/', 1)

if locale in settings.MT_TO_KUMA_LOCALE_MAP:
if locale.lower() in settings.MT_TO_KUMA_LOCALE_MAP:
# If this looks like a MindTouch locale, remap it.
old_locale = locale
locale = settings.MT_TO_KUMA_LOCALE_MAP[locale]
locale = settings.MT_TO_KUMA_LOCALE_MAP[locale.lower()]
# But, we only need a redirect if the locale actually changed.
needs_redirect = (locale != old_locale)

Expand Down Expand Up @@ -938,7 +949,7 @@ def content_cleaned(self):
return self.content
return bleach.clean(
self.content, attributes=ALLOWED_ATTRIBUTES, tags=ALLOWED_TAGS,
strip_comments=False
styles=ALLOWED_STYLES, strip_comments=False
)

def get_previous(self):
Expand Down
18 changes: 2 additions & 16 deletions apps/wiki/templates/wiki/document.html
@@ -1,9 +1,7 @@
{# vim: set ts=2 et sts=2 sw=2: #}
{% extends "wiki/base.html" %}
{% from "wiki/includes/sidebar_modules.html" import document_tabs, document_notifications %}
{# L10n: {t} is the title of the document. {c} is the category. #}
{% set title = _('{t} | {c}')|f(t=document.title, c=document.get_category_display()) %}
{% block title %}{{ page_title(title) }}{% endblock %}
{% block title %}{{ page_title(document.title) }}{% endblock %}
{% set classes = 'document' %}
{% block bodyclass %}document{% endblock %}
{% if document.parent %}
Expand Down Expand Up @@ -38,19 +36,7 @@ <h1 class="page-title">{{ document.title }}</h1>
{% endif %}
</ul>
{% if kumascript_errors %}
<div class="warning" id="kumascript-errors">
<p>{{ _("There are scripting errors on this page:") }}</p>
<ul>
{% for error in kumascript_errors %}
<li class="error error-{{ error.level }}">
{# <span class="level">{{ error.level }}</span> #}
{% if error.args %}<span class="type">{{ error.args[0] }}</span>{% endif %}
&#8212;
<span class="message">{{ error.message }}</span>
</li>
{% endfor %}
</ul>
</div>
{% include 'wiki/includes/kumascript_errors.html' %}
{% endif %}
</header>
{% if redirected_from %}
Expand Down
2 changes: 1 addition & 1 deletion apps/wiki/templates/wiki/edit_document.html
Expand Up @@ -2,7 +2,7 @@
{% extends "wiki/base.html" %}
{% from "layout/errorlist.html" import errorlist %}
{% from "wiki/includes/sidebar_modules.html" import document_tabs %}
{% set title = _('Edit Article | {document}')|f(document=document.title) %}
{% set title = _('{document} | Edit Article')|f(document=document.title) %}
{% block title %}{{ page_title(title) }}{% endblock %}
{# TODO: Change KB url to landing page when we have one #}
{% set crumbs = [(url('wiki.category', document.category), document.get_category_display()),
Expand Down