Skip to content

Commit

Permalink
xappy/cache_search_results.py,xappy/cachemanager/generic.py,
Browse files Browse the repository at this point in the history
xappy/doctests/searchconnection_doctest2.txt,
xappy/mset_search_results.py,xappy/searchconnection.py,
xappy/searchresults.py,xappy/unittests/cached_searches.py,
xappy/unittests/facets.py: Finish implementing support for facets
and stats served from the cache.

git-svn-id: http://xappy.googlecode.com/svn/trunk@590 eff16abe-6239-0410-8a55-6f1cd6cf4f8f
  • Loading branch information
boulton.rj committed Jan 26, 2010
1 parent 2651bd0 commit a8bc06d
Show file tree
Hide file tree
Showing 9 changed files with 330 additions and 95 deletions.
9 changes: 9 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
Tue Jan 26 09:43:01 GMT 2010 Richard Boulton <richard@tartarus.org>

* xappy/cache_search_results.py,xappy/cachemanager/generic.py,
xappy/doctests/searchconnection_doctest2.txt,
xappy/mset_search_results.py,xappy/searchconnection.py,
xappy/searchresults.py,xappy/unittests/cached_searches.py,
xappy/unittests/facets.py: Finish implementing support for facets
and stats served from the cache.

Thu Jan 21 16:15:28 GMT 2010 Richard Boulton <richard@tartarus.org>

* xappy/cachemanager/generic.py: Add support for setting facets and
Expand Down
12 changes: 0 additions & 12 deletions xappy/cache_search_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,3 @@ def get_startrank(self):

def get_endrank(self):
return self.startrank + len(self.xapids)


class CacheFacetResults(object):
"""The result of counting facets.
"""
def __init__(self, facets):
self.facets = facets

def get_suggested_facets(self, maxfacets,
required_facets):
return self.facets
12 changes: 6 additions & 6 deletions xappy/cachemanager/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ def sort_facets(facets):
"""Sort an iterable of facets.
Returns a tuple, sorted by fieldname. Also sorts the values into
descending frequency order.
descending frequency order (and ascending order of key for equal
frequencies).
"""
if isinstance(facets, dict):
facets = facets.iteritems()
return tuple(sorted((fieldname,
tuple(sorted(valfreqs.iteritems() if isinstance(valfreqs, dict) else valfreqs,
key=operator.itemgetter(1),
reverse=True)))
key=lambda x: (-x[1], x[0]))))
for fieldname, valfreqs in facets))

class CacheManager(object):
Expand Down Expand Up @@ -621,7 +621,7 @@ def remove_hits(self, queryid, ranks_and_docids):

data = self['T' + str(queryid)]
if len(data) != 0:
data = self.decode(data)
data = list(self.decode(data))
if data[0] is not None:
data[0] -= 1
if data[1] is not None:
Expand All @@ -646,7 +646,7 @@ def add_facets(self, queryid, facets):
self.set_facets(queryid, facets)
return
newfacets = dict(self.decode(data))
for fieldname, new_valuefreqs in facets:
for fieldname, new_valfreqs in facets.iteritems():
try:
existing_valfreqs = newfacets[fieldname]
except KeyError:
Expand All @@ -659,7 +659,7 @@ def add_facets(self, queryid, facets):
freq += existing_valfreqs[value]
except KeyError:
pass
existing_valfreqs[value] = tuple(freq.iteritems())
existing_valfreqs[value] = freq
newfacets[fieldname] = tuple(existing_valfreqs.iteritems())

self[key] = self.encode(sort_facets(newfacets))
Expand Down
4 changes: 2 additions & 2 deletions xappy/doctests/searchconnection_doctest2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ restriction):
The suggestions for the facet we've already restricted by are for sub-values
within the range:
>>> results3.get_suggested_facets(maxfacets=5)[3]
('price3', (((0.0, 46.899999999999999), 8), ((53.600000000000001, 93.799999999999997), 7), ((100.5, 147.40000000000001), 8), ((154.09999999999999, 194.30000000000001), 7)))
('price3', (((0.0, 46.899999999999999), 8), ((100.5, 147.40000000000001), 8), ((53.600000000000001, 93.799999999999997), 7), ((154.09999999999999, 194.30000000000001), 7)))


Regression test: this used to give an error
Expand Down Expand Up @@ -251,7 +251,7 @@ will be stored). Therefore, we expect facet8 to _not_ include the 2000 and
... checkatleast=200, getfacets=True,
... allowfacets=('facet7', 'facet8'))
>>> results3.get_suggested_facets()
[('facet8', (((0.0, 0.0), 100), ((1.0, 1.0), 100))), ('facet7', (('0', 100), ('1', 100), ('2000', 200), ('2001', 200)))]
[('facet8', (((0.0, 0.0), 100), ((1.0, 1.0), 100))), ('facet7', (('2000', 200), ('2001', 200), ('0', 100), ('1', 100)))]


Even if the database doesn't contain any facets, getting the list of suggested
Expand Down
51 changes: 33 additions & 18 deletions xappy/mset_search_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,32 +471,51 @@ def __len__(self):
return len(self.mset_order)


class MSetFacetResults(object):
class NoFacetResults(object):
"""Stub used when no facet results asked for.
"""
def __init__(self, *args, **kwargs):
pass

def get_facets(self):
raise errors.SearchError("Facet selection wasn't enabled when the search was run")

def get_suggested_facets(self, maxfacets, required_facets):
raise errors.SearchError("Facet selection wasn't enabled when the search was run")


class FacetResults(object):
"""The result of counting facets.
"""
def __init__(self, facetspies, facetfields, facethierarchy, facetassocs,
desired_num_of_categories):
desired_num_of_categories, cache_facets):
self.facetspies = facetspies
self.facetfields = facetfields
self.facethierarchy = facethierarchy
self.facetassocs = facetassocs

self.facetvalues = {}
self.facetscore = {}
for field, slot, facettype in self.facetfields:
values, score = self.calc_facet_value(slot, facettype,
for field, slot, facettype in facetfields:
values, score = self._calc_facet_value(slot, facettype,
desired_num_of_categories)
self.facetvalues[field] = values
self.facetscore[field] = score

def calc_facet_value(self, slot, facettype, desired_num_of_categories):
if cache_facets is not None:
self.facetvalues.update(cache_facets)
self.facetscore.update((fieldname, 0)
for fieldname, _ in cache_facets)

def _calc_facet_value(self, slot, facettype, desired_num_of_categories):
"""Calculate the facet value for a given slot, and return it.
"""
facetspy = self.facetspies.get(slot)
facetspy = self.facetspies.get(slot, None)
if facetspy is None:
return [], 0
return (), 0
else:
if facettype == 'float':
ranges = xapian.NumericRanges(facetspy.get_values(),
Expand All @@ -510,7 +529,7 @@ def calc_facet_value(self, slot, facettype, desired_num_of_categories):
score = xapian.score_evenness(facetspy,
desired_num_of_categories)
values = facetspy.get_values_as_dict()
values = tuple(sorted(values.iteritems()))
values = tuple(sorted(values.iteritems(), key=lambda x: (-x[1], x[0])))
return values, score

def get_facets(self):
Expand All @@ -527,26 +546,22 @@ def get_suggested_facets(self, maxfacets, required_facets):
`SearchResults.get_suggested_facets()`.
"""
if self.facetspies is None:
raise errors.SearchError("Facet selection wasn't enabled when the search was run")
if isinstance(required_facets, basestring):
required_facets = [required_facets]
scores = []
facettypes = {}

for field, slot, facettype in self.facetfields:
facettypes[field] = facettype
for field in self.facetvalues.iterkeys():
score = self.facetscore[field]
scores.append((score, field, slot))
scores.append((score, field))

# Sort on whether facet is top-level ahead of score (use subfacets first),
# and on whether facet is preferred for the query type ahead of anything else
if self.facethierarchy:
# Note, tuple[-2] is the value of 'field' in a scores tuple
scores = [(tuple[-2] not in self.facethierarchy,) + tuple for tuple in scores]
# Note, tuple[-1] is the value of 'field' in a scores tuple
scores = [(tuple[-1] not in self.facethierarchy,) + tuple for tuple in scores]
if self.facetassocs:
preferred = IndexerConnection.FacetQueryType_Preferred
scores = [(self.facetassocs.get(tuple[-2]) != preferred,) + tuple for tuple in scores]
scores = [(self.facetassocs.get(tuple[-1]) != preferred,) + tuple for tuple in scores]
scores.sort()
if self.facethierarchy:
index = 1
Expand All @@ -559,7 +574,7 @@ def get_suggested_facets(self, maxfacets, required_facets):

results = []
required_results = []
for score, field, slot in scores:
for score, field in scores:
# Check if the facet is required
required = False
if required_facets is not None:
Expand Down
75 changes: 43 additions & 32 deletions xappy/searchconnection.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import itertools

import xapian
from cache_search_results import CacheResultOrdering, CacheFacetResults
from cache_search_results import CacheResultOrdering
from datastructures import UnprocessedDocument, ProcessedDocument
from fieldactions import ActionContext, FieldActions, \
ActionSet, SortableMarshaller, convert_range_to_term, \
Expand All @@ -43,7 +43,7 @@
DocumentIter, SynonymIter, _allocate_id
from query import Query
from searchresults import SearchResults, SearchResultContext
from mset_search_results import MSetFacetResults, \
from mset_search_results import FacetResults, NoFacetResults, \
MSetResultOrdering, ResultStats, MSetTermWeightGetter

class ExternalWeightSource(object):
Expand Down Expand Up @@ -1957,14 +1957,9 @@ def _field_type_from_kwargslist(kwargslist):
return fieldtype
return 'string'

def _make_facet_matchspies(self, query, allowfacets, denyfacets,
def _calc_facet_fields(self, query, allowfacets, denyfacets,
usesubfacets, query_type):
# Set facetspies to {}, even if no facet fields are found, to
# distinguish from no facet calculation being performed. (This
# will prevent an error being thrown when the list of suggested
# facets is requested - instead, an empty list will be returned.)
facetspies = {}
facetfields = []
facetfieldnames = []

if allowfacets is not None and denyfacets is not None:
raise errors.SearchError("Cannot specify both `allowfacets` and `denyfacets`")
Expand Down Expand Up @@ -2004,6 +1999,24 @@ def _make_facet_matchspies(self, query, allowfacets, denyfacets,
# filter out facets that should never be returned for the query type
if self._facet_query_never(field, query_type):
continue
facetfieldnames.append(field)
return facetfieldnames

def _make_facet_matchspies(self, facetfieldnames):
# Set facetspies to {}, even if no facet fields are found, to
# distinguish from no facet calculation being performed. (This
# will prevent an error being thrown when the list of suggested
# facets is requested - instead, an empty list will be returned.)
facetspies = {}
facetfields = []

for field in facetfieldnames:
try:
actions = self._field_actions[field]._actions
except KeyError:
continue
for action, kwargslist in actions.iteritems():
if action == FieldActions.FACET:
slot = self._field_mappings.get_slot(field, 'facet')
facettype = self._field_type_from_kwargslist(kwargslist)
if facettype == 'string':
Expand Down Expand Up @@ -2135,13 +2148,6 @@ def search(self, query, startrank, endrank,
if self._index is None:
raise errors.SearchError("SearchConnection has been closed")

if 'facets' in _checkxapian.missing_features:
if getfacets is not None or \
allowfacets is not None or \
denyfacets is not None or \
usesubfacets is not None or \
query_type is not None:
raise errors.SearchError("Facets unsupported with this release of xapian")
if checkatleast == -1:
checkatleast = self._index.get_doccount()
if stats_checkatleast == -1:
Expand All @@ -2159,10 +2165,12 @@ def search(self, query, startrank, endrank,

# Prepare the facet spies.
if getfacets:
facetspies, facetfields = self._make_facet_matchspies(uncached_query,
allowfacets, denyfacets, usesubfacets, query_type)
if 'facets' in _checkxapian.missing_features:
raise errors.SearchError("Facets unsupported with this release of xapian")
facetfieldnames = set(self._calc_facet_fields(query,
allowfacets, denyfacets, usesubfacets, query_type))
else:
facetspies, facetfields = None, []
facetfieldnames = set()

# Get whatever information we can from the cache.
cache_hits, cache_stats, cache_facets = None, (None, None, None), None
Expand All @@ -2181,9 +2189,16 @@ def search(self, query, startrank, endrank,
cache_stats = self.cache_manager.get_stats(queryid)

# Get the stored facet values.
if len(facetfields) != 0:
if len(facetfieldnames) != 0:
cache_facets = self.cache_manager.get_facets(queryid)
for fieldname, valfreqs in cache_facets:
facetfieldnames.remove(fieldname)

if getfacets:
facetspies, facetfields = \
self._make_facet_matchspies(facetfieldnames)
else:
facetspies, facetfields = None, []

# Work out how many results we need.
real_maxitems = 0
Expand All @@ -2210,12 +2225,8 @@ def search(self, query, startrank, endrank,
checkatleast = max(checkatleast, stats_checkatleast)

if len(facetfields) != 0:
# FIXME - check if the facets requested were available - if not all
# available, set cache_facets to None.

if cache_facets is None:
checkatleast = max(checkatleast, facet_checkatleast)
need_to_search = True
checkatleast = max(checkatleast, facet_checkatleast)
need_to_search = True

# FIXME - we currently always need to search to get an mset object for
# getting term weights
Expand Down Expand Up @@ -2253,18 +2264,18 @@ def search(self, query, startrank, endrank,
else:
mset = None


# Build the search results:
if cache_facets is None:
if getfacets:
# The facet results don't depend on anything else.
facet_hierarchy = None
if usesubfacets:
facet_hierarchy = self._facet_hierarchy
facets = MSetFacetResults(facetspies, facetfields, facet_hierarchy,
self._facet_query_table.get(query_type),
facet_desired_num_of_categories)
facets = FacetResults(facetspies, facetfields, facet_hierarchy,
self._facet_query_table.get(query_type),
facet_desired_num_of_categories,
cache_facets)
else:
facets = CacheFacetResults(cache_facets)
facets = NoFacetResults()

if need_to_search:
weightgetter = MSetTermWeightGetter(mset)
Expand Down
8 changes: 7 additions & 1 deletion xappy/searchresults.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,10 @@ def _relevant_data_simple(self, allow, query, groupnumbers):
slots = {}
for field in allow:
p = []
actions = self._conn._field_actions[field]._actions
try:
actions = self._conn._field_actions[field]._actions
except KeyError:
continue
is_ft = None
for action, kwargslist in actions.iteritems():
if action == FieldActions.INDEX_FREETEXT:
Expand Down Expand Up @@ -754,6 +757,9 @@ def get_suggested_facets(self, maxfacets=5, desired_num_of_categories=None,
required_facets=None):
"""Get a suggested set of facets, to present to the user.
`desired_num_of_categories` is a deprecated parameter, and is ignored,
and will be removed in the near future. FIXME - remove it.
This returns a list, in descending order of the usefulness of the
facet, in which each item is a tuple holding:
Expand Down
Loading

0 comments on commit a8bc06d

Please sign in to comment.