Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
413 additions
and
102 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
.DS_Store | ||
*.pyc | ||
*.pyo | ||
*.whoosh | ||
*.db | ||
env | ||
dist | ||
_mailinglist/* | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# -*- coding: utf-8 -*- | ||
import os | ||
import re | ||
from flask import url_for, Markup | ||
from flask_website import app | ||
from flask_website.search import Indexable | ||
|
||
|
||
_doc_body_re = re.compile(r'''(?smx) | ||
<title>(.*?)</title>.*? | ||
<div\s+class="body">(.*?)<div\s+class="sphinxsidebar"> | ||
''') | ||
|
||
|
||
class DocumentationPage(Indexable): | ||
search_document_kind = 'documentation' | ||
|
||
def __init__(self, slug): | ||
self.slug = slug | ||
fn = os.path.join(app.config['DOCUMENTATION_PATH'], | ||
slug, 'index.html') | ||
with open(fn) as f: | ||
contents = f.read().decode('utf-8') | ||
title, text = _doc_body_re.search(contents).groups() | ||
self.title = Markup(title).striptags().split(u'—')[0].strip() | ||
self.text = Markup(text).striptags().strip().replace(u'¶', u'') | ||
|
||
def get_search_document(self): | ||
return dict( | ||
id=unicode(self.slug), | ||
title=self.title, | ||
keywords=[], | ||
content=self.text | ||
) | ||
|
||
@property | ||
def url(self): | ||
return url_for('docs.show', page=self.slug) | ||
|
||
@classmethod | ||
def describe_search_result(cls, result): | ||
rv = cls(result['id']) | ||
return Markup(result.highlights('content', text=rv.text)) or None | ||
|
||
@classmethod | ||
def iter_pages(cls): | ||
base_folder = os.path.abspath(app.config['DOCUMENTATION_PATH']) | ||
for dirpath, dirnames, filenames in os.walk(base_folder): | ||
if 'index.html' in filenames: | ||
slug = dirpath[len(base_folder) + 1:] | ||
# skip the index page. useless | ||
if slug: | ||
yield DocumentationPage(slug) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
from hashlib import md5 | ||
from flask import Markup, url_for, json | ||
from werkzeug import parse_date, http_date | ||
from jinja2.utils import urlize | ||
from flask_website import app | ||
from flask_website.utils import split_lines_wrapping | ||
|
||
|
||
class Mail(object): | ||
|
||
def __init__(self, d): | ||
self.msgid = d['msgid'] | ||
self.author_name, self.author_addr = d['author'] | ||
self.date = parse_date(d['date']) | ||
self.subject = d['subject'] | ||
self.children = [Mail(x) for x in d['children']] | ||
self.text = d['text'] | ||
|
||
def rendered_text(self): | ||
result = [] | ||
in_sig = False | ||
for line in split_lines_wrapping(self.text): | ||
if line == u'-- ': | ||
in_sig = True | ||
# the extra space at the end is a simple workaround for | ||
# urlize not to consume the </span> as part of the URL | ||
if in_sig: | ||
line = Markup(u'<span class=sig>%s </span>') % line | ||
elif line.startswith('>'): | ||
line = Markup(u'<span class=quote>%s </span>') % line | ||
result.append(urlize(line)) | ||
return Markup(u'\n'.join(result)) | ||
|
||
def to_json(self): | ||
rv = vars(self).copy() | ||
rv.pop('author_email', None) | ||
rv['date'] = http_date(rv['date']) | ||
rv['children'] = [c.to_json() for c in rv['children']] | ||
return rv | ||
|
||
@property | ||
def id(self): | ||
return md5(self.msgid.encode('utf-8')).hexdigest() | ||
|
||
|
||
class Thread(object): | ||
|
||
def __init__(self, d): | ||
self.slug = d['slug'].rsplit('/', 1)[-1] | ||
self.title = d['title'] | ||
self.reply_count = d['reply_count'] | ||
self.author_name, self.author_email = d['author'] | ||
self.date = parse_date(d['date']) | ||
if 'root' in d: | ||
self.root = Mail(d['root']) | ||
|
||
@staticmethod | ||
def get(year, month, day, slug): | ||
try: | ||
with open('%s/threads/%s-%02d-%02d/%s' % | ||
(app.config['MAILINGLIST_PATH'], year, month, | ||
day, slug)) as f: | ||
return Thread(json.load(f)) | ||
except IOError: | ||
pass | ||
|
||
@staticmethod | ||
def get_list(): | ||
with open('%s/threads/threadlist' % app.config['MAILINGLIST_PATH']) as f: | ||
return [Thread(x) for x in json.load(f)] | ||
|
||
@property | ||
def url(self): | ||
return url_for('mailinglist.show_thread', year=self.date.year, | ||
month=self.date.month, day=self.date.day, | ||
slug=self.slug) | ||
|
||
def to_json(self): | ||
rv = vars(self).copy() | ||
rv['date'] = http_date(rv['date']) | ||
if 'root' in rv: | ||
rv['root'] = rv['root'].to_json() | ||
return rv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
# -*- coding: utf-8 -*- | ||
import os | ||
from whoosh import highlight, analysis, qparser | ||
from whoosh.support.charset import accent_map | ||
from flask import Markup | ||
from flask_website import app | ||
from werkzeug import import_string | ||
|
||
|
||
def open_index(): | ||
from whoosh import index, fields as f | ||
if os.path.isdir(app.config['WHOOSH_INDEX']): | ||
return index.open_dir(app.config['WHOOSH_INDEX']) | ||
os.mkdir(app.config['WHOOSH_INDEX']) | ||
analyzer = analysis.StemmingAnalyzer() | analysis.CharsetFilter(accent_map) | ||
schema = f.Schema( | ||
url=f.ID(stored=True, unique=True), | ||
id=f.ID(stored=True), | ||
title=f.TEXT(stored=True, field_boost=2.0, analyzer=analyzer), | ||
type=f.ID(stored=True), | ||
keywords=f.KEYWORD(commas=True), | ||
content=f.TEXT(analyzer=analyzer) | ||
) | ||
return index.create_in(app.config['WHOOSH_INDEX'], schema) | ||
|
||
|
||
index = open_index() | ||
|
||
|
||
class Indexable(object): | ||
search_document_kind = None | ||
|
||
def add_to_search_index(self, writer): | ||
writer.add_document(url=unicode(self.url), | ||
type=self.search_document_type, | ||
**self.get_search_document()) | ||
|
||
@classmethod | ||
def describe_search_result(cls, result): | ||
return None | ||
|
||
@property | ||
def search_document_type(self): | ||
cls = type(self) | ||
return cls.__module__ + u'.' + cls.__name__ | ||
|
||
def get_search_document(self): | ||
raise NotImplementedError() | ||
|
||
def remove_from_search_index(self, writer): | ||
writer.delete_by_term('url', unicode(self.url)) | ||
|
||
|
||
def highlight_all(result, field): | ||
text = result[field] | ||
return Markup(highlight.Highlighter( | ||
fragmenter=highlight.WholeFragmenter(), | ||
formatter=result.results.highlighter.formatter) | ||
.highlight_hit(result, field, text=text)) or text | ||
|
||
|
||
class SearchResult(object): | ||
|
||
def __init__(self, result): | ||
self.url = result['url'] | ||
self.title_text = result['title'] | ||
self.title = highlight_all(result, 'title') | ||
cls = import_string(result['type']) | ||
self.kind = cls.search_document_kind | ||
self.description = cls.describe_search_result(result) | ||
|
||
|
||
class SearchResultPage(object): | ||
|
||
def __init__(self, results, page): | ||
self.page = page | ||
if results is None: | ||
self.results = [] | ||
self.pages = 1 | ||
self.total = 0 | ||
else: | ||
self.results = [SearchResult(r) for r in results] | ||
self.pages = results.pagecount | ||
self.total = results.total | ||
|
||
def __iter__(self): | ||
return iter(self.results) | ||
|
||
|
||
def search(query, page=1, per_page=20): | ||
with index.searcher() as s: | ||
qp = qparser.MultifieldParser(['title', 'content'], index.schema) | ||
q = qp.parse(unicode(query)) | ||
try: | ||
result_page = s.search_page(q, page, pagelen=per_page) | ||
except ValueError: | ||
if page == 1: | ||
return SearchResultPage(None, page) | ||
return None | ||
results = result_page.results | ||
results.highlighter.fragmenter.maxchars = 512 | ||
results.highlighter.fragmenter.surround = 40 | ||
results.highlighter.formatter = highlight.HtmlFormatter('em', | ||
classname='search-match', termclass='search-term', | ||
between=u'<span class=ellipsis> … </span>') | ||
return SearchResultPage(result_page, page) | ||
|
||
|
||
def update_model_based_indexes(session, flush_context): | ||
"""Called by a session event, updates the model based documents.""" | ||
to_delete = [] | ||
to_add = [] | ||
for model in session.new: | ||
if isinstance(model, Indexable): | ||
to_add.append(model) | ||
|
||
for model in session.dirty: | ||
if isinstance(model, Indexable): | ||
to_delete.append(model) | ||
to_add.append(model) | ||
|
||
for model in session.dirty: | ||
if isinstance(model, Indexable): | ||
to_delete.append(model) | ||
|
||
if not (to_delete or to_add): | ||
return | ||
|
||
writer = index.writer() | ||
for model in to_delete: | ||
model.remove_from_search_index(writer) | ||
for model in to_add: | ||
model.add_to_search_index(writer) | ||
writer.commit() | ||
|
||
|
||
def update_documentation_index(): | ||
from flask_website.docs import DocumentationPage | ||
writer = index.writer() | ||
for page in DocumentationPage.iter_pages(): | ||
page.remove_from_search_index(writer) | ||
page.add_to_search_index(writer) | ||
writer.commit() | ||
|
||
|
||
def reindex_snippets(): | ||
from flask_website.database import Snippet | ||
writer = index.writer() | ||
for snippet in Snippet.query.all(): | ||
snippet.remove_from_search_index(writer) | ||
snippet.add_to_search_index(writer) | ||
writer.commit() |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.