Skip to content

Commit

Permalink
Merge pull request #708 from okfn/708-markdown-fixes
Browse files Browse the repository at this point in the history
Markdown fixes
  • Loading branch information
domoritz committed Apr 17, 2013
2 parents 250233e + 85a3d3e commit cb9849f
Show file tree
Hide file tree
Showing 16 changed files with 138 additions and 265 deletions.
2 changes: 1 addition & 1 deletion ckan/controllers/api.py
Expand Up @@ -571,7 +571,7 @@ def _get_search_params(cls, request_params):

def markdown(self, ver=None):
raw_markdown = request.params.get('q', '')
results = ckan.misc.MarkdownFormat().to_html(raw_markdown)
results = h.render_markdown(raw_markdown)

return self._finish_ok(results)

Expand Down
9 changes: 1 addition & 8 deletions ckan/controllers/group.py
Expand Up @@ -201,14 +201,7 @@ def _read(self, id, limit):
else:
q += ' groups:"%s"' % c.group_dict.get('name')

try:
description_formatted = ckan.misc.MarkdownFormat().to_html(
c.group_dict.get('description', ''))
c.description_formatted = genshi.HTML(description_formatted)
except Exception, e:
error_msg = "<span class='inline-warning'>%s</span>" %\
_("Cannot render description")
c.description_formatted = genshi.HTML(error_msg)
c.description_formatted = h.render_markdown(c.group_dict.get('description'))

context['return_query'] = True

Expand Down
14 changes: 1 addition & 13 deletions ckan/controllers/user.py
Expand Up @@ -2,11 +2,9 @@
from urllib import quote

from pylons import config
import genshi

import ckan.lib.i18n as i18n
import ckan.lib.base as base
import ckan.misc as misc
import ckan.model as model
import ckan.lib.helpers as h
import ckan.new_authz as new_authz
Expand Down Expand Up @@ -73,7 +71,7 @@ def _setup_template_variables(self, context, data_dict):
abort(401, _('Not authorized to see this page'))
c.user_dict = user_dict
c.is_myself = user_dict['name'] == c.user
c.about_formatted = self._format_about(user_dict['about'])
c.about_formatted = h.render_markdown(user_dict['about'])

## end hooks

Expand Down Expand Up @@ -622,13 +620,3 @@ def unfollow(self, id):
or e.error_dict)
h.flash_error(error_message)
h.redirect_to(controller='user', action='read', id=id)

def _format_about(self, about):
about_formatted = misc.MarkdownFormat().to_html(about)
try:
html = genshi.HTML(about_formatted)
except genshi.ParseError, e:
log.error('Could not print "about" field Field: %r Error: %r',
about, e)
html = _('Error: Could not parse About text')
return html
2 changes: 1 addition & 1 deletion ckan/lib/create_test_data.py
Expand Up @@ -464,7 +464,7 @@ def create(cls, auth_profile="", package_type=None):
model.Session.add_all([
model.User(name=u'tester', apikey=u'tester', password=u'tester'),
model.User(name=u'joeadmin', password=u'joeadmin'),
model.User(name=u'annafan', about=u'I love reading Annakarenina. My site: <a href="http://anna.com">anna.com</a>', password=u'annafan'),
model.User(name=u'annafan', about=u'I love reading Annakarenina. My site: http://anna.com', password=u'annafan'),
model.User(name=u'russianfan', password=u'russianfan'),
sysadmin,
])
Expand Down
3 changes: 1 addition & 2 deletions ckan/lib/dictization/model_dictize.py
Expand Up @@ -4,7 +4,6 @@
from pylons import config
from sqlalchemy.sql import select

import ckan.misc as misc
import ckan.logic as logic
import ckan.plugins as plugins
import ckan.lib.helpers as h
Expand Down Expand Up @@ -530,7 +529,7 @@ def package_to_api(pkg, context):
dictized['license'] = pkg.license.title if pkg.license else None
dictized['ratings_average'] = pkg.get_average_rating()
dictized['ratings_count'] = len(pkg.ratings)
dictized['notes_rendered'] = misc.MarkdownFormat().to_html(pkg.notes)
dictized['notes_rendered'] = h.render_markdown(pkg.notes)

site_url = config.get('ckan.site_url', None)
if site_url:
Expand Down
75 changes: 70 additions & 5 deletions ckan/lib/helpers.py
Expand Up @@ -636,7 +636,7 @@ def markdown_extract(text, extract_length=190):
will not be truncated.'''
if (text is None) or (text.strip() == ''):
return ''
plain = re.sub(r'<.*?>', '', markdown(text))
plain = RE_MD_HTML_TAGS.sub('', markdown(text))
if not extract_length or len(plain) < extract_length:
return literal(plain)
return literal(unicode(truncate(plain, length=extract_length, indicator='...', whole_word=True)))
Expand Down Expand Up @@ -956,7 +956,7 @@ def related_item_link(related_item_dict):

def tag_link(tag):
url = url_for(controller='tag', action='read', id=tag['name'])
return link_to(tag['name'], url)
return link_to(tag.get('title', tag['name']), url)


def group_link(group):
Expand Down Expand Up @@ -1367,12 +1367,77 @@ def get_request_param(parameter_name, default=None):
return request.params.get(parameter_name, default)


def render_markdown(data):
# find all inner text of html eg `<b>moo</b>` gets `moo` but not of <a> tags
# as this would lead to linkifying links if they are urls.
RE_MD_GET_INNER_HTML = re.compile(
r'(^|(?:<(?!a\b)[^>]*>))([^<]+)(?=<|$)',
flags=re.UNICODE
)

# find all `internal links` eg. tag:moo, dataset:1234, tag:"my tag"
RE_MD_INTERNAL_LINK = re.compile(
r'\b(tag|package|dataset|group):((")?(?(3)[ \w\-.]+|[\w\-.]+)(?(3)"))',
flags=re.UNICODE
)

# find external links eg http://foo.com, https://bar.org/foobar.html
RE_MD_EXTERNAL_LINK = re.compile(
r'(\bhttps?:\/\/[\w\-\.,@?^=%&;:\/~\\+#]*)',
flags=re.UNICODE
)

# find all tags but ignore < in the strings so that we can use it correctly
# in markdown
RE_MD_HTML_TAGS = re.compile('<[^><]*>')


def html_auto_link(data):
'''Linkifies HTML
tag:... converted to a tag link
dataset:... converted to a dataset link
group:... converted to a group link
http://... converted to a link
'''

LINK_FNS = {
'tag': tag_link,
'group': group_link,
'dataset': dataset_link,
'package': dataset_link,
}

def makelink(matchobj):
obj = matchobj.group(1)
name = matchobj.group(2)
title = '%s:%s' % (obj, name)
return LINK_FNS[obj]({'name': name.strip('"'), 'title': title})

def link(matchobj):
return '<a href="%s" target="_blank" rel="nofollow">%s</a>' \
% (matchobj.group(1), matchobj.group(1))

def process(matchobj):
data = matchobj.group(2)
data = RE_MD_INTERNAL_LINK.sub(makelink, data)
data = RE_MD_EXTERNAL_LINK.sub(link, data)
return matchobj.group(1) + data

data = RE_MD_GET_INNER_HTML.sub(process, data)
return data


def render_markdown(data, auto_link=True):
''' returns the data as rendered markdown '''
# cope with data == None
if not data:
return ''
return literal(ckan.misc.MarkdownFormat().to_html(data))
data = RE_MD_HTML_TAGS.sub('', data.strip())
data = markdown(data, safe_mode=True)
# tags can be added by tag:... or tag:"...." and a link will be made
# from it
if auto_link:
data = html_auto_link(data)
return literal(data)


def format_resource_items(items):
Expand Down
9 changes: 2 additions & 7 deletions ckan/lib/package_saver.py
@@ -1,4 +1,3 @@
import genshi
from sqlalchemy import orm
import ckan.lib.helpers as h
from ckan.lib.base import *
Expand All @@ -22,12 +21,8 @@ def render_package(cls, pkg, context):
render.
Note that the actual calling of render('package/read') is left
to the caller.'''
try:
notes_formatted = ckan.misc.MarkdownFormat().to_html(pkg.get('notes',''))
c.pkg_notes_formatted = genshi.HTML(notes_formatted)
except Exception, e:
error_msg = "<span class='inline-warning'>%s</span>" % _("Cannot render package description")
c.pkg_notes_formatted = genshi.HTML(error_msg)
c.pkg_notes_formatted = h.render_markdown(pkg.get('notes'))

c.current_rating, c.num_ratings = ckan.rating.get_rating(context['package'])
url = pkg.get('url', '')
c.pkg_url_link = h.link_to(url, url, rel='foaf:homepage', target='_blank') \
Expand Down
95 changes: 0 additions & 95 deletions ckan/misc.py

This file was deleted.

4 changes: 2 additions & 2 deletions ckan/model/package.py
Expand Up @@ -17,7 +17,6 @@
import activity
import extension

import ckan.misc
import ckan.lib.dictization

__all__ = ['Package', 'package_table', 'package_revision_table',
Expand Down Expand Up @@ -216,7 +215,8 @@ def as_dict(self, ref_package_by='name', ref_group_by='name'):
if self.metadata_modified else None
_dict['metadata_created'] = self.metadata_created.isoformat() \
if self.metadata_created else None
_dict['notes_rendered'] = ckan.misc.MarkdownFormat().to_html(self.notes)
import ckan.lib.helpers as h
_dict['notes_rendered'] = h.render_markdown(self.notes)
_dict['type'] = self.type or u'dataset'
#tracking
import ckan.model as model
Expand Down
2 changes: 1 addition & 1 deletion ckan/templates_legacy/package/read_core.html
Expand Up @@ -10,7 +10,7 @@
<div id="dataset-overview">

<!-- Description -->
<div class="notes" py:if="str(c.pkg_notes_formatted).strip()">
<div class="notes" py:if="c.pkg_notes_formatted">
<div id="notes-extract">
${c.pkg_notes_formatted}
</div>
Expand Down
20 changes: 8 additions & 12 deletions ckan/tests/functional/test_package.py
Expand Up @@ -309,10 +309,6 @@ def test_read(self):
assert anna.version in res
assert anna.url in res
assert 'Some test notes' in res
self.check_named_element(res, 'a',
'http://ckan.net/',
'target="_blank"',
'rel="nofollow"')
assert '<strong>Some bolded text.</strong>' in res
self.check_tag_and_data(res, 'left arrow', '&lt;')
self.check_tag_and_data(res, 'umlaut', u'\xfc')
Expand Down Expand Up @@ -350,17 +346,17 @@ def test_read_internal_links(self):
pkg_name = u'link-test',
CreateTestData.create_arbitrary([
{'name':pkg_name,
'notes':'Decoy link here: decoy:decoy, real links here: package:pkg-1, ' \
'notes':'Decoy link here: decoy:decoy, real links here: dataset:pkg-1, ' \
'tag:tag_1 group:test-group-1 and a multi-word tag: tag:"multi word with punctuation."',
}
])
offset = url_for(controller='package', action='read', id=pkg_name)
res = self.app.get(offset)
def check_link(res, controller, id):
id_in_uri = id.strip('"').replace(' ', '%20') # remove quotes and percent-encode spaces
self.check_tag_and_data(res, 'a ', '/%s/%s' % (controller, id_in_uri),
'%s:%s' % (controller, id))
check_link(res, 'package', 'pkg-1')
self.check_tag_and_data(res, 'a ', '%s/%s' % (controller, id_in_uri),
'%s:%s' % (controller, id.replace('"', '&#34;')))
check_link(res, 'dataset', 'pkg-1')
check_link(res, 'tag', 'tag_1')
check_link(res, 'tag', '"multi word with punctuation."')
check_link(res, 'group', 'test-group-1')
Expand Down Expand Up @@ -1557,10 +1553,10 @@ def teardown(self):

def test_markdown_html_whitelist(self):
self.body = str(self.res)
self.assert_fragment('<table width="100%" border="1">')
self.assert_fragment('<td rowspan="2"><b>Description</b></td>')
self.assert_fragment('<a href="http://www.nber.org/patents/subcategories.txt" target="_blank" rel="nofollow">subcategory.txt</a>')
self.assert_fragment('<td colspan="2"><center>--</center></td>')
self.fail_if_fragment('<table width="100%" border="1">')
self.fail_if_fragment('<td rowspan="2"><b>Description</b></td>')
self.fail_if_fragment('<a href="http://www.nber.org/patents/subcategories.txt" target="_blank" rel="nofollow">subcategory.txt</a>')
self.fail_if_fragment('<td colspan="2"><center>--</center></td>')
self.fail_if_fragment('<script>')

def assert_fragment(self, fragment):
Expand Down

0 comments on commit cb9849f

Please sign in to comment.