This repository has been archived by the owner on Aug 26, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 679
/
helpers.py
239 lines (205 loc) · 8.86 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# coding=utf-8
import difflib
import re
import urllib
import constance.config
from jingo import register
import jinja2
from pyquery import PyQuery as pq
from tidylib import tidy_document
from tower import ugettext as _
import logging
from sumo.urlresolvers import reverse
import wiki
from wiki import DIFF_WRAP_COLUMN
def get_seo_description(content, locale=None, strip_markup=True):
# Create an SEO summary
# TODO: Google only takes the first 180 characters, so maybe we find a
# logical way to find the end of sentence before 180?
seo_summary = ''
try:
if content:
# Try constraining the search for summary to an explicit "Summary"
# section, if any.
summary_section = (wiki.content
.parse(content)
.extractSection('Summary')
.serialize())
if summary_section:
content = summary_section
# Need to add a BR to the page content otherwise pyQuery wont find
# a <p></p> element if it's the only element in the doc_html
seo_analyze_doc_html = content + '<br />'
page = pq(seo_analyze_doc_html)
# Look for the SEO summary class first
summaryClasses = page.find('.seoSummary')
if len(summaryClasses):
if strip_markup:
seo_summary = summaryClasses.text()
else:
seo_summary = summaryClasses.html()
else:
paragraphs = page.find('p')
if paragraphs.length:
for p in range(len(paragraphs)):
item = paragraphs.eq(p)
if strip_markup:
text = item.text()
else:
text = item.html()
# Checking for a parent length of 2
# because we don't want p's wrapped
# in DIVs ("<div class='warning'>") and pyQuery adds
# "<html><div>" wrapping to entire document
if (len(text) and
not 'Redirect' in text and
text.find(u'«') == -1 and
text.find('«') == -1 and
item.parents().length == 2):
seo_summary = text.strip()
break
except:
pass
if strip_markup:
# Post-found cleanup
# remove markup chars
seo_summary = seo_summary.replace('<', '').replace('>', '')
# remove spaces around some punctuation added by PyQuery
if locale == 'en-US':
seo_summary = re.sub(r' ([,\)\.])', r'\1', seo_summary)
seo_summary = re.sub(r'(\() ', r'\1', seo_summary)
return seo_summary
def compare_url(doc, from_id, to_id):
return (reverse('wiki.compare_revisions', args=[doc.full_path],
locale=doc.locale)
+ '?' +
urllib.urlencode({'from': from_id, 'to': to_id})
)
# http://stackoverflow.com/q/774316/571420
def show_diff(seqm):
"""Unify operations between two compared strings
seqm is a difflib.SequenceMatcher instance whose a & b are strings"""
lines = constance.config.FEED_DIFF_CONTEXT_LINES
full_output = []
for opcode, a0, a1, b0, b1 in seqm.get_opcodes():
if opcode == 'equal':
full_output.append(seqm.a[a0:a1])
elif opcode == 'insert':
full_output.append("<ins>" + seqm.b[b0:b1] + "</ins>")
elif opcode == 'delete':
full_output.append("<del>" + seqm.a[a0:a1] + "</del>")
elif opcode == 'replace':
full_output.append(" <del>" + seqm.a[a0:a1] + "</del> ")
full_output.append(" <ins>" + seqm.b[b0:b1] + "</ins> ")
else:
raise RuntimeError("unexpected opcode")
output = []
whitespace_change = False
for piece in full_output:
if '<ins>' in piece or '<del>' in piece:
# a change
if re.match('<(ins|del)>\W+</(ins|del)>', piece):
# the change is whitespace,
# ignore it and remove preceding context
output = output[:-lines]
whitespace_change = True
continue
else:
output.append(piece)
else:
context_lines = piece.splitlines()
if output == []:
# first context only shows preceding lines for next change
context = ['<p>...</p>'] + context_lines[-lines:]
elif whitespace_change:
# context shows preceding lines for next change
context = ['<p>...</p>'] + context_lines[-lines:]
whitespace_change = False
else:
# context shows subsequent lines
# and preceding lines for next change
context = (context_lines[:lines]
+ ['<p>...</p>']
+ context_lines[-lines:])
output = output + context
# remove extra context from the very end, unless its the only context
if len(output) > lines + 1: # context lines and the change line
output = output[:-lines]
return ''.join(output)
def _massage_diff_content(content):
tidy_options = {'output-xhtml': 0, 'force-output': 1}
content = tidy_document(content, options=tidy_options)
return content
@register.filter
def bugize_text(content):
content = jinja2.escape(content)
content = re.sub(r'bug\s+#?(\d+)',
jinja2.Markup('<a href="https://bugzilla.mozilla.org/'
'show_bug.cgi?id=\\1" '
'target="_blank">bug \\1</a>'),
content)
return content
@register.function
def format_comment(rev):
""" Massages revision comment content after the fact """
prev_rev = rev.get_previous()
comment = bugize_text(rev.comment if rev.comment else "")
# If a page move, say so
if prev_rev and prev_rev.slug != rev.slug:
comment += jinja2.Markup('<span class="slug-change">'
'Moved From <strong>%s</strong> '
'to <strong>%s</strong></span>') % (
prev_rev.slug, rev.slug)
return comment
@register.function
def diff_table(content_from, content_to, prev_id, curr_id):
"""Creates an HTML diff of the passed in content_from and content_to."""
tidy_from, errors = _massage_diff_content(content_from)
tidy_to, errors = _massage_diff_content(content_to)
html_diff = difflib.HtmlDiff(wrapcolumn=DIFF_WRAP_COLUMN)
from_lines = tidy_from.splitlines()
to_lines = tidy_to.splitlines()
try:
diff = html_diff.make_table(from_lines, to_lines,
_("Revision %s") % prev_id,
_("Revision %s") % curr_id,
context=True,
numlines=constance.config.DIFF_CONTEXT_LINES
)
except RuntimeError:
# some diffs hit a max recursion error
message = _(u'There was an error generating the content.')
diff = '<div class="warning"><p>%s</p></div>' % message
return jinja2.Markup(diff)
@register.function
def diff_inline(content_from, content_to):
tidy_from, errors = _massage_diff_content(content_from)
tidy_to, errors = _massage_diff_content(content_to)
sm = difflib.SequenceMatcher(None, tidy_from, tidy_to)
diff = show_diff(sm)
return jinja2.Markup(diff)
@register.function
def tag_diff_table(prev_tags, curr_tags, prev_id, curr_id):
html_diff = difflib.HtmlDiff(wrapcolumn=DIFF_WRAP_COLUMN)
prev_tag_lines = [prev_tags]
curr_tag_lines = [curr_tags]
diff = html_diff.make_table(prev_tag_lines, curr_tag_lines,
_("Revision %s") % prev_id,
_("Revision %s") % curr_id
)
# Simple formatting update: 784877
diff = diff.replace('",', '"<br />').replace('<td', '<td valign="top"')
return jinja2.Markup(diff)
@register.function
def colorize_diff(diff):
diff = diff.replace('<span class="diff_add"', '<span class="diff_add" '
'style="background-color: #afa; text-decoration: none;"')
diff = diff.replace('<span class="diff_sub"', '<span class="diff_sub" '
'style="background-color: #faa; text-decoration: none;"')
diff = diff.replace('<span class="diff_chg"', '<span class="diff_chg" '
'style="background-color: #fe0; text-decoration: none;"')
return diff
@register.filter
def wiki_bleach(val):
from wiki.models import Document
return jinja2.Markup(Document.objects.clean_content(val))