Permalink
Browse files

Search providers

Separate search logic into provider system. Move existing logic into a
cloudsearch implementation of a search provider, and add a Solr
implementation of a search provider.
1 parent 990a583 commit 15d7e9333f69955380469ddee80dbac40860b6c6 @kemitche kemitche committed May 13, 2015
View
@@ -737,7 +737,7 @@ function set_consumer_count {
}
set_consumer_count log_q 0
-set_consumer_count cloudsearch_q 0
+set_consumer_count search_q 0
set_consumer_count del_account_q 1
set_consumer_count scraper_q 1
set_consumer_count markread_q 1
View
@@ -410,6 +410,9 @@ wiki_max_page_separators = 3
############################################ SEARCH
+# search provider name
+search_provider = cloudsearch
+
# endpoint for link search
CLOUDSEARCH_SEARCH_API =
# endpoint for link upload
@@ -419,7 +422,6 @@ CLOUDSEARCH_SUBREDDIT_SEARCH_API =
# endpoint for subreddit upload
CLOUDSEARCH_SUBREDDIT_DOC_API =
-
############################################ MEMCACHE
num_mc_clients = 5
# core memcache cluster, Things and various other stuff
@@ -800,3 +802,4 @@ feature_require_https = off
# as well.
feature_give_hsts_grants = off
feature_multireddit_customizations = off
+
@@ -91,8 +91,8 @@
from r2.lib.menus import CommentSortMenu
from r2.lib.captcha import get_iden
from r2.lib.strings import strings
-from r2.lib.filters import _force_unicode, _force_utf8, websafe_json, websafe, spaceCompress
from r2.lib.template_helpers import format_html, header_url
+from r2.lib.filters import _force_unicode, _force_utf8, websafe_json, websafe, spaceCompress
from r2.lib.db import queries
from r2.lib import media
from r2.lib.db import tdb_cassandra
@@ -103,7 +103,6 @@
from r2.lib.filters import safemarkdown
from r2.lib.media import str_to_image
from r2.controllers.api_docs import api_doc, api_section
-from r2.lib.search import SearchQuery
from r2.controllers.oauth2 import require_oauth2_scope, allow_oauth2_access
from r2.lib.template_helpers import add_sr, get_domain, make_url_protocol_relative
from r2.lib.system_messages import notify_user_added
@@ -4186,8 +4185,9 @@ def GET_subreddits_by_topic(self, responder, query):
exclude = Subreddit.default_subreddits()
faceting = {"reddit":{"sort":"-sum(text_relevance)", "count":20}}
- results = SearchQuery(query, sort="relevance", faceting=faceting, num=0,
- syntax="plain").run()
+ results = g.search.SearchQuery(query, sort="relevance",
+ faceting=faceting, num=0,
+ syntax="plain").run()
sr_results = []
for sr, count in results.subreddit_facets:
@@ -53,8 +53,6 @@
from r2.lib.db import queries
from r2.lib.db.tdb_cassandra import MultiColumnQuery
from r2.lib.strings import strings
-from r2.lib.search import (SearchQuery, SubredditSearchQuery, SearchException,
- InvalidQuery)
from r2.lib.validator import *
from r2.lib import jsontemplates
from r2.lib import sup
@@ -851,17 +849,14 @@ def GET_related(self, num, article, after, reverse, count):
query = self.related_replace_regex.sub(self.related_replace_with,
article.title)
- query = _force_unicode(query)
- query = query[:1024]
- query = u"|".join(query.split())
- query = u"title:'%s'" % query
+
rel_range = timedelta(days=3)
start = int(time_module.mktime((article._date - rel_range).utctimetuple()))
end = int(time_module.mktime((article._date + rel_range).utctimetuple()))
- nsfw = u"nsfw:0" if not article.is_nsfw else u""
- query = u"(and %s timestamp:%s..%s %s)" % (query, start, end, nsfw)
- q = SearchQuery(query, raw_sort="-text_relevance", faceting={},
- syntax="cloudsearch")
+ nsfw = article.is_nsfw
+
+ q = g.search.get_related_query(query, article, start, end, nsfw)
+
content = self._search(q, num=num, after=after, reverse=reverse,
count=count)
@@ -932,8 +927,8 @@ def GET_search_reddits(self, query, reverse, after, count, num):
sort = 'rel1'
if query:
- q = SubredditSearchQuery(query, sort=sort, faceting={},
- include_over18=include_over18)
+ q = g.search.SubredditSearchQuery(query, sort=sort, faceting={},
+ include_over18=include_over18)
content = self._search(q, num=num, reverse=reverse,
after=after, count=count,
skip_deleted_authors=False)
@@ -958,7 +953,7 @@ def GET_search_reddits(self, query, reverse, after, count, num):
recent=VMenu('t', TimeMenu, remember=False),
restrict_sr=VBoolean('restrict_sr', default=False),
include_facets=VBoolean('include_facets', default=False),
- syntax=VOneOf('syntax', options=SearchQuery.known_syntaxes))
+ syntax=VOneOf('syntax', options=g.search_syntaxes))
@api_doc(api_section.search, supports_rss=True, uses_site=True)
def GET_search(self, query, num, reverse, after, count, sort, recent,
restrict_sr, include_facets, syntax):
@@ -985,7 +980,7 @@ def GET_search(self, query, num, reverse, after, count, sort, recent,
has_query = query or not isinstance(site, (DefaultSR, AllSR))
if not syntax:
- syntax = SearchQuery.default_syntax
+ syntax = g.search.SearchQuery.default_syntax
# show NSFW to API and RSS users unless obey_over18=true
is_api_or_rss = (c.render_style in API_TYPES
@@ -1037,15 +1032,16 @@ def GET_search(self, query, num, reverse, after, count, sort, recent,
if num > 0 and has_query:
nav_menus = [SearchSortMenu(default=sort), TimeMenu(default=recent)]
try:
- q = SearchQuery(query, site, sort=sort, faceting=faceting,
- include_over18=include_over18,
- recent=recent, syntax=syntax)
+ q = g.search.SearchQuery(query, site, sort=sort,
+ faceting=faceting,
+ include_over18=include_over18,
+ recent=recent, syntax=syntax)
content = self._search(q, num=num, after=after, reverse=reverse,
count=count)
converted_data = q.converted_data
subreddit_facets = content.subreddit_facets
- except InvalidQuery:
+ except g.search.InvalidQuery:
g.stats.simple_event('cloudsearch.error.invalidquery')
# Clean the search of characters that might be causing the
@@ -1055,9 +1051,10 @@ def GET_search(self, query, num, reverse, after, count, sort, recent,
cleaned = re.sub("[^\w\s]+", " ", query)
cleaned = cleaned.lower().strip()
- q = SearchQuery(cleaned, site, sort=sort, faceting=faceting,
- include_over18=include_over18,
- recent=recent)
+ q = g.search.SearchQuery(cleaned, site, sort=sort,
+ faceting=faceting,
+ include_over18=include_over18,
+ recent=recent)
content = self._search(q, num=num, after=after, reverse=reverse,
count=count)
converted_data = q.converted_data
@@ -1076,8 +1073,9 @@ def GET_search(self, query, num, reverse, after, count, sort, recent,
# extra search request for subreddit results
if sr_num > 0 and has_query:
- sr_q = SubredditSearchQuery(query, sort='rel1', faceting={},
- include_over18=include_over18)
+ sr_q = g.search.SubredditSearchQuery(query, sort='rel1',
+ faceting={},
+ include_over18=include_over18)
subreddits = self._search(sr_q, num=sr_num, reverse=reverse,
after=after, count=count, type='sr',
skip_deleted_authors=False)
@@ -1125,7 +1123,7 @@ def _search(self, query_obj, num, after, reverse, count=0, type=None,
try:
res = listing.listing()
- except SearchException + (socket.error,) as e:
+ except g.search.SearchException + (socket.error,) as e:
return self.search_fail(e)
return res
@@ -41,7 +41,6 @@
from r2.lib.db.thing import Query, Merge, Relations
from r2.lib.db import queries
from r2.lib.strings import Score
-import r2.lib.search as search
from r2.lib.template_helpers import add_sr
from r2.lib.admin_utils import check_cheating
from r2.lib.csrf import csrf_exempt
@@ -59,6 +58,7 @@
from api_docs import api_doc, api_section
+from pylons import g
from pylons.i18n import _
from datetime import timedelta
@@ -68,6 +68,7 @@
class ListingController(RedditController):
"""Generalized controller for pages with lists of links."""
+
# toggle skipping of links based on the users' save/hide/vote preferences
skip = True
@@ -165,7 +166,7 @@ def builder(self):
builder_cls = self.builder_cls
elif isinstance(self.query_obj, Query):
builder_cls = QueryBuilder
- elif isinstance(self.query_obj, search.SearchQuery):
+ elif isinstance(self.query_obj, g.search.SearchQuery):
builder_cls = SearchBuilder
elif isinstance(self.query_obj, iters):
builder_cls = IDBuilder
@@ -1798,8 +1798,7 @@ def abort_if_not_modified(self, last_modified, private=True,
abort(304, 'not modified')
def search_fail(self, exception):
- from r2.lib.search import SearchException
- if isinstance(exception, SearchException + (socket.error,)):
+ if isinstance(exception, g.search.SearchException + (socket.error,)):
g.log.error("Search Error: %s" % repr(exception))
errpage = pages.RedditError(_("search failed"),
@@ -73,6 +73,25 @@
LIVE_CONFIG_NODE = "/config/live"
+
+LINK_SEARCH_SORTS = {
+ 'cloudsearch': {'relevance': '-relevance',
+ 'hot': '-hot2',
+ 'top': '-top',
+ 'new': '-timestamp',
+ 'comments': '-num_comments',},
+ 'solr': {'relevance': 'score desc',
+ 'hot': 'max(hot/45000.0, 1.0) desc',
+ 'top': 'top desc',
+ 'new': 'timestamp desc',
+ 'comments': 'num_comments desc',},
+ }
+
+SEARCH_SYNTAXES = {
+ 'cloudsearch': ('cloudsearch', 'lucene', 'plain'),
+ 'solr': ('solr', 'plain'),
+ }
+
SECRETS_NODE = "/config/secrets"
@@ -282,6 +301,7 @@ class Globals(object):
'community_email',
'smtp_server',
'events_collector_url',
+ 'search_provider',
],
ConfigValue.choice(ONE=CL_ONE, QUORUM=CL_QUORUM): [
@@ -966,3 +986,23 @@ def __del__(self):
here.
"""
pass
+
+ @property
+ def search(self):
+ if getattr(self, 'search_provider', None):
+ if type(self.search_provider) == str:
+ self.search_provider = select_provider(self.config,
+ self.pkg_resources_working_set,
+ "r2.provider.search",
+ self.search_provider,
+ )
+ return self.search_provider
+ return None
+
+ @property
+ def search_sorts(self):
+ return LINK_SEARCH_SORTS[self.config.get('search_provider')]
+
+ @property
+ def search_syntaxes(self):
+ return SEARCH_SYNTAXES[self.config.get('search_provider')]
View
@@ -20,14 +20,12 @@
# Inc. All Rights Reserved.
###############################################################################
-from pylons import c, request
+from pylons import c, g, request
from pylons.i18n import _, N_
from r2.config import feature
from r2.lib.db import operators
from r2.lib.filters import _force_unicode
-from r2.lib.search import sorts as search_sorts
-from r2.lib.search import sr_sorts as sr_search_sorts
from r2.lib.strings import StringHandler, plurals
from r2.lib.utils import class_property, query_string, timeago
from r2.lib.wrapped import Styled
@@ -610,7 +608,7 @@ def make_title(self, attr):
class SearchSortMenu(SortMenu):
"""Sort menu for search pages."""
_default = 'relevance'
- mapping = search_sorts
+ mapping = g.search_sorts
_options = mapping.keys()
@classmethod
@@ -621,7 +619,7 @@ def operator(cls, sort):
class SubredditSearchSortMenu(SortMenu):
"""Sort menu for subreddit search pages."""
_default = 'relevance'
- mapping = sr_search_sorts
+ mapping = g.search_sorts
_options = mapping.keys()
@classmethod
@@ -20,14 +20,28 @@
# Inc. All Rights Reserved.
###############################################################################
-import r2.lib.cloudsearch as cloudsearch
+class SearchProvider(object):
+ """Provider for search.
+ """
-InvalidQuery = (cloudsearch.InvalidQuery,)
-SearchException = (cloudsearch.CloudSearchHTTPError,)
+ def InvalidQuery(self):
+ raise NotImplementedError
-SearchQuery = cloudsearch.LinkSearchQuery
-SubredditSearchQuery = cloudsearch.SubredditSearchQuery
+ def SearchException(self):
+ raise NotImplementedError
-sorts = cloudsearch.LinkSearchQuery.sorts_menu_mapping
-sr_sorts = cloudsearch.SubredditSearchQuery.sorts_menu_mapping
+ def Query(self):
+ raise NotImplementedError
+
+ def SubredditSearchQuery(self):
+ raise NotImplementedError
+
+ def sorts(self):
+ raise NotImplementedError
+
+ def run_changed(self):
+ raise NotImplementedError
+
+ def get_related_query(self):
+ raise NotImplementedError
Oops, something went wrong.

0 comments on commit 15d7e93

Please sign in to comment.