Skip to content

Commit

Permalink
Merge pull request #32 from emory-lits-labs/feature/stats
Browse files Browse the repository at this point in the history
Feature/stats
  • Loading branch information
delijati committed Jun 13, 2016
2 parents 16f0efd + 45dd216 commit d2bf472
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 2 deletions.
18 changes: 18 additions & 0 deletions docs/query.rst
Original file line number Diff line number Diff line change
Expand Up @@ -852,3 +852,21 @@ You can also pass multiple ids: ::
>>> resp = si.get(["978-0641723445", "978-1423103349"])

The return value is the same as for a normal search

Stats
-----

For background, see https://wiki.apache.org/solr/StatsComponent

Solr can return simple statistics for indexed numeric fields::

>>> resp = solr.query().stats('int_field')

You can also pass multiple fields::

>>> resp = solr.query().stats(['int_field', 'float_field'])

The resulting statistics are available on the response at
``resp.stats.stats_fields``.


26 changes: 26 additions & 0 deletions scorched/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,31 @@ def from_json(cls, response):
return SolrFacetCounts(**facet_counts)


class SolrStats(object):
members = (
"stats_fields",
"facet",
)

def __init__(self, **kwargs):
for member in self.members:
setattr(self, member, kwargs.get(member, ()))
self.stats_fields = dict(self.stats_fields)

@classmethod
def from_json(cls, response):
try:
stats_response = response['stats']
except KeyError:
return SolrStats()
stats = {'stats_fields': {}}
# faceted stats, if present, are included within the field
for field, values in list(stats_response['stats_fields'].items()):
stats['stats_fields'][field] = values

return SolrStats(**stats)


class SolrUpdateResponse(object):
@classmethod
def from_json(cls, jsonmsg):
Expand Down Expand Up @@ -97,6 +122,7 @@ def from_json(cls, jsonmsg, datefields=()):
self.term_vectors = self.parse_term_vectors(doc.get('termVectors', []))
# can be computed by MoreLikeThisHandler
self.interesting_terms = doc.get('interestingTerms', None)
self.stats = SolrStats.from_json(doc)
return self

@classmethod
Expand Down
45 changes: 44 additions & 1 deletion scorched/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ class BaseSearch(object):
'faceter', 'grouper', 'sorter', 'facet_querier',
'debugger', 'spellchecker', 'requesthandler',
'field_limiter', 'parser', 'pivoter', 'facet_ranger',
'term_vectors')
'term_vectors', 'stat')

def _init_common_modules(self):
self.query_obj = LuceneQuery(u'q')
Expand All @@ -403,6 +403,7 @@ def _init_common_modules(self):
self.facet_ranger = FacetRangeOptions()
self.facet_querier = FacetQueryOptions()
self.term_vectors = TermVectorOptions()
self.stat = StatOptions()

def clone(self):
return self.__class__(interface=self.interface, original=self)
Expand Down Expand Up @@ -551,6 +552,11 @@ def results_as(self, constructor):
newself = self.clone()
return newself

def stats(self, fields, **kwargs):
newself = self.clone()
newself.stat.update(fields, **kwargs)
return newself

def params(self):
return params_from_dict(**self.options())

Expand Down Expand Up @@ -1318,6 +1324,43 @@ def options(self):
return {}


class StatOptions(Options):
option_name = "stats"
opts = {
"stats.facet": str,
}
# NOTE: solr documentation indicates stats.facet is a legacy parameter,
# recommends using stats.field with facet.pivot instead

def __init__(self, original=None):
if original is None:
self.stats = False
self.facet = None
self.fields = collections.defaultdict(dict)
else:
self.stats = original.stats
self.fields = copy.copy(original.fields)
self.facet = original.facet

def update(self, fields=None, **kwargs):
if 'facet' in kwargs:
self.facet = kwargs['facet']
del kwargs['facet']
super(StatOptions, self).update(fields, **kwargs)
self.stats = True

def field_names_in_opts(self, opts, fields):
if fields:
opts["stats.field"] = sorted(fields)

def options(self):
opts = super(StatOptions, self).options()
# stats = True set based on option_name
if self.facet:
opts['stats.facet'] = self.facet
return opts


def params_from_dict(**kwargs):
utf8_params = []
for k, vs in list(kwargs.items()):
Expand Down
14 changes: 13 additions & 1 deletion scorched/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
PostingsHighlightOptions, FacetPivotOptions,
RequestHandlerOption, DebugOptions,
params_from_dict, FacetRangeOptions,
TermVectorOptions)
TermVectorOptions, StatOptions)
from scorched.strings import WildcardString
from nose.tools import assert_equal

Expand Down Expand Up @@ -349,6 +349,15 @@ def check_mlt_query_data(method, args, kwargs, output):
({"debug": True},
{'debugQuery': True}),
),
StatOptions: (
({"fields": "int_field"},
{"stats": True, "stats.field": ['int_field']}),
({"fields": ["int_field", "float_field"]},
{"stats": True, "stats.field": ['int_field', 'float_field']}),
({"fields": ["int_field", "float_field"], "facet": "field0"},
{"stats": True, "stats.field": ['int_field', 'float_field'],
"stats.facet": "field0"}),
),
}


Expand Down Expand Up @@ -395,6 +404,9 @@ def check_good_option_data(OptionClass, kwargs, output):
# no float in pf
{"pf": {"text_field": 0.25, "string_field": "ABBS"}},
),
StatOptions: (
{"oops": True}, # undefined option
)
}


Expand Down

0 comments on commit d2bf472

Please sign in to comment.