Skip to content
This repository has been archived by the owner on Aug 26, 2022. It is now read-only.

Commit

Permalink
Merge pull request #392 from darkwing/seo-google-impact-772116
Browse files Browse the repository at this point in the history
fix bug 772116 - Adding SEO to documents, preventing Google from taking wrong text
  • Loading branch information
groovecoder committed Jul 18, 2012
2 parents 962b3a4 + 34236a4 commit a990284
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 7 deletions.
13 changes: 11 additions & 2 deletions apps/wiki/templates/wiki/document.html
Expand Up @@ -16,8 +16,17 @@
{% endif %}

{% block extrahead %}
<link rel="alternate" type="application/json"
href="{{ url('wiki.json_slug', document.full_path) }}">
<link rel="alternate" type="application/json" href="{{ url('wiki.json_slug', document.full_path) }}" />

<meta property="og:title" content="{{ document.title }}"/>
<meta property="og:type" content="website"/>
<meta property="og:image" content="{{ request.build_absolute_uri('/media/img/mdn-logo-sm.png') }}"/>
<meta property="og:site_name" content="Mozilla Developer Network"/>

{% if seo_summary %}
<meta property="og:description" content="{{ seo_summary }}"/>
<meta name="description" content="{{ seo_summary }}" />
{% endif %}
{% endblock %}

{% block content %}
Expand Down
51 changes: 47 additions & 4 deletions apps/wiki/tests/test_views.py
Expand Up @@ -7,6 +7,7 @@
import hashlib
import os
import time
import unicodedata

from django.conf import settings
from django.contrib.auth.models import User
Expand Down Expand Up @@ -596,6 +597,50 @@ class DocumentEditingTests(TestCaseBase):

fixtures = ['test_users.json']

def test_seo_script(self):

client = LocalizingClient()
client.login(username='admin', password='testpass')

def make_page_and_compare_seo(slug, content, aught_preview):
# Create the doc
data = new_document_data()
data.update({ 'title': 'blah', 'slug': slug, 'content': content })
response = client.post(reverse('wiki.new_document', locale='en-US'), data)
eq_(302, response.status_code)

# Connect to newly created page
response = self.client.get(reverse('wiki.document', args=[slug], locale='en-US'))
page = pq(response.content)
meta_content = page.find('meta[name=description]').attr('content')
eq_(str(meta_content).decode('utf-8'),
str(aught_preview).decode('utf-8'))

# Weird chars for testing
complex_html_prefix = '<div><p>slx<a>jsf</a></p><p>yah</p></div> <blockquote><p>yah</p></blockquote>'
fa_chars = 'ست داشتید هنوز در اینجا هست.ما تنها می ‎ خواستیم نام بهتری برای انجمن توسعه ‎ دهندگان'
ko_chars = '웹 애플리케이션 특징 최종 단계로 접어들어 거의 완료되어가고 먼트 '
ru_chars = 'Русский'
ar_chars = 'عربي'

# Test pages - very basic
good = 'This is the content which should be chosen, man.'
make_page_and_compare_seo('one', '<p>' + good + '</p>', good)
# No content, no seo
make_page_and_compare_seo('two', 'blahblahblahblah<br />', None)
# No summary, no seo
make_page_and_compare_seo('three', '<div><p>You cant see me</p></div>', None)
# Warning paragraph ignored
make_page_and_compare_seo('four', '<div class="geckoVersion"><p>No no no</p></div><p>yes yes yes</p>', 'yes yes yes')
# Warning paragraph ignored, first one chosen if multiple matches
make_page_and_compare_seo('five', '<div class="geckoVersion"><p>No no no</p></div><p>yes yes yes</p><p>ignore ignore ignore</p>', 'yes yes yes')
# Weird chars
make_page_and_compare_seo('fa', complex_html_prefix + '<p>' + fa_chars + '</p>', fa_chars)
make_page_and_compare_seo('ko', complex_html_prefix + '<p>' + ko_chars + '</p>', ko_chars)
make_page_and_compare_seo('ru', complex_html_prefix + '<p>' + ru_chars + '</p>', ru_chars)
make_page_and_compare_seo('ar', complex_html_prefix + '<p>' + ar_chars + '</p>', ar_chars)


def test_create_on_404(self):
client = LocalizingClient()
client.login(username='admin', password='testpass')
Expand Down Expand Up @@ -625,12 +670,10 @@ def test_create_on_404(self):
eq_(404, resp.status_code)

# Ensure root level documents work, not just children
slug = 'noExist'
response = client.get(reverse('wiki.document', args=[slug], locale=locale))
response = client.get(reverse('wiki.document', args=['noExist'], locale=locale))
eq_(302, response.status_code)

slug = 'Template:NoExist'
response = client.get(reverse('wiki.document', args=[slug], locale=locale))
response = client.get(reverse('wiki.document', args=['Template:NoExist'], locale=locale))
eq_(302, response.status_code)

def test_retitling(self):
Expand Down
29 changes: 28 additions & 1 deletion apps/wiki/views.py
Expand Up @@ -67,6 +67,8 @@
import wiki.content
from wiki import kumascript

from pyquery import PyQuery as pq

import logging

log = logging.getLogger('k.wiki')
Expand Down Expand Up @@ -385,12 +387,37 @@ def set_common_headers(r):
# https://github.com/jsocol/kitsune/commit/
# f1ebb241e4b1d746f97686e65f49e478e28d89f2

# Create an SEO summary
# TODO: Google only takes the first 180 characters, so maybe we find a logical
# way to find the end of sentence before 180?
seo_summary = ''
try:
if doc_html and not doc.is_template:
# Need to add a BR to the page content otherwise pyQuery wont find a
# <p></p> element if it's the only element in the doc_html
seo_analyze_doc_html = doc_html + '<br />'
page = pq(seo_analyze_doc_html)
paragraphs = page.find('p')
if paragraphs.length:
for p in range(len(paragraphs)):
item = paragraphs.eq(p)
text = item.text()
# Checking for a parent length of 2 because we don't want p's wrapped
# in DIVs ("<div class='warning'>") and pyQuery adds
# "<html><div>" wrapping to entire document
if len(text) and not 'Redirect' in text and item.parents().length == 2:
seo_summary = text.strip()
break
except XMLSyntaxError:
logging.debug('Could not generate SEO summary')

data = {'document': doc, 'document_html': doc_html, 'toc_html': toc_html,
'redirected_from': redirected_from,
'related': related, 'contributors': contributors,
'fallback_reason': fallback_reason,
'kumascript_errors': ks_errors,
'render_raw_fallback': render_raw_fallback}
'render_raw_fallback': render_raw_fallback,
'seo_summary': seo_summary}
data.update(SHOWFOR_DATA)

response = jingo.render(request, 'wiki/document.html', data)
Expand Down

0 comments on commit a990284

Please sign in to comment.