Skip to content
Browse files

Merge pull request #872 from lmorchard/809523-page-summary-api

fix bug 809523: API access to document summary
  • Loading branch information...
2 parents 0a808ee + cec50d3 commit ad1d34c5648d9f4d7a0b50c5bb1a50e8238fe88b @groovecoder groovecoder committed
Showing with 97 additions and 12 deletions.
  1. +27 −10 apps/wiki/helpers.py
  2. +47 −0 apps/wiki/tests/test_helpers.py
  3. +14 −1 apps/wiki/tests/test_views.py
  4. +9 −1 apps/wiki/views.py
View
37 apps/wiki/helpers.py
@@ -13,16 +13,26 @@
import logging
from sumo.urlresolvers import reverse
+import wiki
from wiki import DIFF_WRAP_COLUMN
-def get_seo_description(content, locale=None):
+def get_seo_description(content, locale=None, strip_markup=True):
# Create an SEO summary
# TODO: Google only takes the first 180 characters, so maybe we find a
# logical way to find the end of sentence before 180?
seo_summary = ''
try:
if content:
+ # Try constraining the search for summary to an explicit "Summary"
+ # section, if any.
+ summary_section = (wiki.content
+ .parse(content)
+ .extractSection('Summary')
+ .serialize())
+ if summary_section:
+ content = summary_section
+
# Need to add a BR to the page content otherwise pyQuery wont find
# a <p></p> element if it's the only element in the doc_html
seo_analyze_doc_html = content + '<br />'
@@ -31,13 +41,19 @@ def get_seo_description(content, locale=None):
# Look for the SEO summary class first
summaryClasses = page.find('.seoSummary')
if len(summaryClasses):
- seo_summary = summaryClasses.text()
+ if strip_markup:
+ seo_summary = summaryClasses.text()
+ else:
+ seo_summary = summaryClasses.html()
else:
paragraphs = page.find('p')
if paragraphs.length:
for p in range(len(paragraphs)):
item = paragraphs.eq(p)
- text = item.text()
+ if strip_markup:
+ text = item.text()
+ else:
+ text = item.html()
# Checking for a parent length of 2
# because we don't want p's wrapped
# in DIVs ("<div class='warning'>") and pyQuery adds
@@ -52,13 +68,14 @@ def get_seo_description(content, locale=None):
except:
pass
- # Post-found cleanup
- # remove markup chars
- seo_summary = seo_summary.replace('<', '').replace('>', '')
- # remove spaces around some punctuation added by PyQuery
- if locale == 'en-US':
- seo_summary = re.sub(r' ([,\)\.])', r'\1', seo_summary)
- seo_summary = re.sub(r'(\() ', r'\1', seo_summary)
+ if strip_markup:
+ # Post-found cleanup
+ # remove markup chars
+ seo_summary = seo_summary.replace('<', '').replace('>', '')
+ # remove spaces around some punctuation added by PyQuery
+ if locale == 'en-US':
+ seo_summary = re.sub(r' ([,\)\.])', r'\1', seo_summary)
+ seo_summary = re.sub(r'(\() ', r'\1', seo_summary)
return seo_summary
View
47 apps/wiki/tests/test_helpers.py
@@ -1,11 +1,58 @@
from nose.tools import eq_
from test_utils import TestCase
+from wiki.tests import normalize_html
from wiki.helpers import get_seo_description
class GetSEODescriptionTests(TestCase):
+ def test_summary_section(self):
+ content = (u'<h2 id="Summary">Summary</h2><p>The <strong>Document Object '
+ 'Model'
+ '</strong> (<strong>DOM</strong>) is an API for '
+ '<a href="/en-US/docs/HTML" title="en-US/docs/HTML">HTML</a> and '
+ '<a href="/en-US/docs/XML" title="en-US/docs/XML">XML</a> '
+ 'documents. It provides a structural representation of the '
+ 'document, enabling you to modify its content and visual '
+ 'presentation by using a scripting language such as '
+ '<a href="/en-US/docs/JavaScript" '
+ 'title="https://developer.mozilla.org/en-US/docs/JavaScript">'
+ 'JavaScript</a>.</span></p>')
+ expected = ('The Document Object Model (DOM) is an API for HTML and '
+ 'XML documents. It provides a structural representation of the'
+ ' document, enabling you to modify its content and visual'
+ ' presentation by using a scripting language such as'
+ ' JavaScript.')
+ eq_(expected, get_seo_description(content, 'en-US'))
+
+ def test_keep_markup(self):
+ content = """
+ <h2 id="Summary">Summary</h2>
+ <p>The <strong>Document Object Model </strong>
+ (<strong>DOM</strong>) is an API for <a href="/en-US/docs/HTML"
+ title="en-US/docs/HTML">HTML</a> and <a href="/en-US/docs/XML"
+ title="en-US/docs/XML">XML</a> documents. It provides a structural
+ representation of the document, enabling you to modify its content
+ and visual presentation by using a scripting language such as <a
+ href="/en-US/docs/JavaScript"
+ title="https://developer.mozilla.org/en-US/docs/JavaScript">
+ JavaScript</a>.</span></p>
+ """
+ expected = """
+ The <strong>Document Object Model </strong>
+ (<strong>DOM</strong>) is an API for <a href="/en-US/docs/HTML"
+ title="en-US/docs/HTML">HTML</a> and <a href="/en-US/docs/XML"
+ title="en-US/docs/XML">XML</a> documents. It provides a structural
+ representation of the document, enabling you to modify its content
+ and visual presentation by using a scripting language such as <a
+ href="/en-US/docs/JavaScript"
+ title="https://developer.mozilla.org/en-US/docs/JavaScript">
+ JavaScript</a>.</span>
+ """
+ eq_(normalize_html(expected),
+ normalize_html(get_seo_description(content, 'en-US', False)))
+
def test_html_elements_spaces(self):
# No spaces with html tags
content = (u'<p><span class="seoSummary">The <strong>Document Object '
View
15 apps/wiki/tests/test_views.py
@@ -167,11 +167,13 @@ def test_toc_view(self):
'</ol></li></ol>')
def test_children_view(self):
+ test_content = '<p>Test <a href="http://example.com">Summary</a></p>'
def _make_doc(title, slug, parent=None):
doc = document(title=title, slug=slug, save=True)
+ doc.html = test_content
if parent:
doc.parent_topic = parent
- doc.save()
+ doc.save()
return doc
root_doc = _make_doc('Root', 'Root')
@@ -192,6 +194,8 @@ def _make_doc(title, slug, parent=None):
# Basic structure creation testing
eq_(json_obj['slug'], 'Root')
+ eq_(json_obj['summary'],
+ 'Test <a href="http://example.com">Summary</a>')
eq_(len(json_obj['subpages']), 2)
eq_(len(json_obj['subpages'][0]['subpages']), 2)
eq_(json_obj['subpages'][0]['subpages'][1]['title'], 'Grandchild 2')
@@ -219,6 +223,15 @@ def _depth_test(depth, aught):
json_obj = json.loads(resp.content)
eq_(json_obj['subpages'][0]['title'], 'A Child')
+ def test_summary_view(self):
+ """The ?summary option should restrict document view to summary"""
+ d, r = doc_rev("""
+ <p>Foo bar <a href="http://example.com">baz</a></p>
+ <p>Quux xyzzy</p>
+ """)
+ resp = self.client.get('%s?raw&summary' % d.get_absolute_url())
+ eq_(resp.content, 'Foo bar <a href="http://example.com">baz</a>')
+
def test_revision_view_bleached_content(self):
"""Bug 821988: Revision content should be cleaned with bleach"""
d, r = doc_rev("""
View
10 apps/wiki/views.py
@@ -275,8 +275,10 @@ def _get_document_for_json(doc, addLocaleToTitle=False):
title += ' [' + doc.locale + ']'
summary = ''
- if doc.current_revision:
+ if doc.current_revision and doc.current_revision.summary:
summary = doc.current_revision.summary
+ else:
+ summary = get_seo_description(doc.html, doc.locale, False)
# Map out translations
translations = []
@@ -420,6 +422,7 @@ def document(request, document_slug, document_locale):
# Grab some parameters that affect output
section_id = request.GET.get('section', None)
show_raw = request.GET.get('raw', False) is not False
+ show_summary = request.GET.get('summary', False) is not False
is_include = request.GET.get('include', False) is not False
need_edit_links = request.GET.get('edit_links', False) is not False
@@ -503,6 +506,10 @@ def set_common_headers(r):
# If this is an include, filter out the class="noinclude" blocks.
if is_include:
doc_html = (wiki.content.filter_out_noinclude(doc_html))
+
+ # If ?summary is on, just serve up the summary as doc HTML
+ if show_summary:
+ doc_html = get_seo_description(doc_html, doc.locale, False)
# if ?raw parameter is supplied, then we respond with raw page source
# without template wrapping or edit links. This is also permissive for
@@ -1278,6 +1285,7 @@ def _make_doc_structure(d, level):
'slug': d.slug,
'locale': d.locale,
'url': d.get_absolute_url(),
+ 'summary': get_seo_description(d.html, d.locale, False),
'subpages': []
}

0 comments on commit ad1d34c

Please sign in to comment.
Something went wrong with that request. Please try again.