Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pivots [WIP] #968

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
140 changes: 140 additions & 0 deletions sunspot/lib/sunspot/date_math_parser.rb
@@ -0,0 +1,140 @@
module Sunspot
module Util
# Based on:
# https://github.com/apache/lucene-solr/blob/master/solr/core/src/java/org/apache/solr/util/DateMathParser.java
class DateMathParser
def initialize(date)
@date = case date
when DateTime
date
when Time
date.to_datetime
when Date
date.to_datetime
else
raise "DateMathParser expects a DateTime got: #{date.class}"
end
end

def evaluate(gap)
scanner = StringScanner.new(gap)
value_stack = [@date]
op_stack = []

while !scanner.eos?
if scanner.scan(/[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9.]+Z/)
value_stack.push(DateTime.parse(scanner.matched))
elsif scanner.scan(/[0-9]+/)
value_stack.push(scanner.matched.to_i)
elsif scanner.scan(/[A-Z]+/)
value_stack.push(scanner.matched)
elsif scanner.scan(/\//)
op_stack.push('round')
elsif scanner.scan(/\+/)
op_stack.push('add')
elsif scanner.scan(/-/)
op_stack.push('sub')
else
throw "Error parsing Date Time Math string in range.gap: #{gap}"
end
end

while !op_stack.empty?
op = op_stack.pop

case op
when 'round'
unit, value = value_stack.pop, value_stack.pop
value_stack.push(normalize_date(value, unit))
when 'add'
unit, value, date = value_stack.pop, value_stack.pop, value_stack.pop
value_stack.push(add_date_time(date, value, unit))
when 'sub'
unit, value, date = value_stack.pop, value_stack.pop, value_stack.pop
value_stack.push(sub_date_time(date, value, unit))
else
raise "Unrecongnized operator '#{op}' in Date Time Math string in range.gap: #{gap}"
end
end

value_stack.pop
end

def normalize_date(date, unit)
case unit
when "YEAR", "YEARS"
DateTime.new(date.year, 1, 1, 0, 0, 0)
when "MONTH", "MONTHS"
DateTime.new(date.year, date.month, 1, 0, 0, 0)
when "DAY", "DAYS"
DateTime.new(date.year, date.month, date.day, 0, 0, 0)
when "DATE"
# not certain how to handle 'DATE' so just pass through date
date
when "HOUR", "HOURS"
DateTime.new(date.year, date.month, date.mday, date.hour, 0, 0)
when "MINUTE", "MINUTES"
DateTime.new(date.year, date.month, date.mday, date.hour, date.minute, 0)
when "SECOND", "SECONDS"
# Not sure how to truncate to nearest second with only second level
# accuracy in ruby
date
when "MILLI", "MILLIS", "MILLISECOND", "MILLISECONDS"
# Not sure how to handle milliseconds so just return date
date
else
raise "Unrecognized Date Time Math unit: #{unit}"
end
end

def add_date_time(date, value, unit)
case unit
when "YEAR", "YEARS"
date.next_year(value)
when "MONTH", "MONTHS"
date.next_month(value)
when "DAY", "DAYS"
date.next_day(value)
when "DATE"
# Not certain how to handle 'DATE' so just pass through empty date
date
when "HOUR", "HOURS"
date + Rational(value, 24)
when "MINUTE", "MINUTES"
date + Rational(value, 60 * 24)
when "SECOND", "SECONDS"
date + Rational(value, 60 * 60 * 24)
when "MILLI", "MILLIS", "MILLISECOND", "MILLISECONDS"
date + Rational(value, 1000 * 60 * 60 * 24)
else
raise "Unrecognized Date Time Math unit: #{unit}"
end
end

def sub_date_time(date, value, unit)
case unit
when "YEAR", "YEARS"
date.prev_year(value)
when "MONTH", "MONTHS"
date.prev_month(value)
when "DAY", "DAYS"
date.prev_day(value)
when "DATE"
# Not certain how to handle 'DATE' so just pass through empty date
date
when "HOUR", "HOURS"
date - Rational(value, 24)
when "MINUTE", "MINUTES"
date - Rational(value, 60 * 24)
when "SECOND", "SECONDS"
date - Rational(value, 60 * 60 * 24)
when "MILLI", "MILLIS", "MILLISECOND", "MILLISECONDS"
date - Rational(value, 1000 * 60 * 60 * 24)
else
raise "Unrecognized Date Time Math unit: #{unit}"
end
end
end
end
end

8 changes: 8 additions & 0 deletions sunspot/lib/sunspot/dsl/field_query.rb
Expand Up @@ -391,6 +391,14 @@ def order_by_function(*args)
Sunspot::Query::Sort::FunctionSort.new(@setup,args)
)
end

# TODO: Document function
def pivot(*field_names, **options)
fields = field_names.map { |f| @setup.field(f) }
pivot = Sunspot::Query::PivotFacet.new(fields, options)
@query.add_field_facet(pivot)
@search.add_pivot_facet(fields, options)
end
end
end
end
53 changes: 53 additions & 0 deletions sunspot/lib/sunspot/query/pivot_facet.rb
@@ -0,0 +1,53 @@
module Sunspot
module Query
# Add a facet class for pivoting ranges
class PivotFacet < AbstractFieldFacet
def initialize(fields, options)
@fields = fields
# This facet operates on mutiple fields
super(nil, options)
end

# ammended not to rely on @field
def qualified_param(param)
:"facet.pivot.#{param}"
end

def to_params
super.tap do |params|
# use array so that multiple facet.pivot appear in the search
# string rather than the last facet.pivot key added to the params
# see:
# * https://github.com/sunspot/sunspot/blob/3328212da79178319e98699d408f14513855d3c0/sunspot/lib/sunspot/query/common_query.rb#L81
# * https://github.com/sunspot/sunspot/blob/3328212da79178319e98699d408f14513855d3c0/sunspot/lib/sunspot/util.rb#L236
#
params[:"facet.pivot"] = [field_names_with_local_params]
end
end

private

def local_params
@local_params ||=
{}.tap do |local_params|
local_params[:range] = @options[:range] if @options[:range]
local_params[:stats] = @options[:stats] if @options[:stats]
local_params[:query] = @options[:query] if @options[:query]
end
end

def field_names_with_local_params
if local_params.empty?
field_names.join(',')
else
pairs = local_params.map { |key, value| "#{key}=#{value}" }
"{!#{pairs.join(' ')}}#{field_names.join(',')}"
end
end

def field_names
@fields.map(&:indexed_name)
end
end
end
end
5 changes: 4 additions & 1 deletion sunspot/lib/sunspot/query/range_facet.rb
Expand Up @@ -23,12 +23,15 @@ def to_params

private

def local_params
def local_params
@local_params ||=
begin
local_params = {}
local_params[:ex] = @exclude_tag if @exclude_tag
local_params[:key] = @options[:name] if @options[:name]
# Allow tags on range facets
# NOTE: This should also be available on queries, stats etc
local_params[:tag] = @options[:tag] if @options[:tag]
local_params
end
end
Expand Down
20 changes: 19 additions & 1 deletion sunspot/lib/sunspot/search/abstract_search.rb
Expand Up @@ -26,14 +26,17 @@ def initialize(connection, setup, query, configuration) #:nodoc:
@connection, @setup, @query = connection, setup, query
@query.paginate(1, configuration.pagination.default_per_page)

@facets = []
@facets_by_name = {}
@facets = []

@groups_by_name = {}
@groups = []

@stats_by_name = {}
@stats = []

@pivots_by_name = {}
@pivots = []
end

#
Expand Down Expand Up @@ -268,6 +271,21 @@ def add_json_facet(field, options = {})
add_facet(name, FieldJsonFacet.new(field, self, options))
end

def add_pivot_facet(fields, options)
# pivots are named after their fields joined by commas, see:
# https://lucene.apache.org/solr/guide/6_6/faceting.html#Faceting-CombiningFacetQueriesAndFacetRangesWithPivotFacets
pivot_name = fields.map(&:name).join(',')
pivot = Sunspot::Search::PivotFacet.new(fields, @setup, self, options)
@pivots << pivot
@pivots_by_name[pivot_name] = pivot
end

def pivot(*names)
return if names.empty?
# Ordering of names is important!
@pivots_by_name[names.join(',')]
end

def highlights_for(doc) #:nodoc:
if @solr_result['highlighting']
@solr_result['highlighting'][doc['id']]
Expand Down
67 changes: 67 additions & 0 deletions sunspot/lib/sunspot/search/pivot_facet.rb
@@ -0,0 +1,67 @@
module Sunspot
module Search
class PivotFacet
class Row
def initialize(fields, pivot, setup)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe fields should be in last because you are expecting more than 1 values.
def initialize(pivot, setup, fields).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @anikett-rpx, thanks for the comment!

Could you give me an example of what you mean? fields should be an explicit array of field names, entered like .new([:field1, :field2, ...], ...).

# ordering is important here!
@field, *@deeper_fields = *fields
@pivot = pivot
@setup = setup
end

attr_reader :field, :deeper_fields

def result
@pivot
end

def pivot
result['pivot'].map { |p| Row.new(deeper_fields, p, @setup) }
end

def range(field_name)
indexed_name = @setup.field(field_name).indexed_name
return unless result['ranges'][indexed_name]
PivotRange.new(result['ranges'][indexed_name])
end
end

class PivotRange
def initialize(range)
@range = range
end

def counts
#probably should be just like the RangeFacet returning FacetRows
@range['counts'].each_slice(2).to_h
end

def gap
@range['gap']
end

def start
@range['start']
end

def end
@range['end']
end
end

def initialize(fields, setup, search, options)
@fields, @setup, @search, @options = fields, setup, search, options
end

def rows
@rows ||= @search.
facet_response['facet_pivot'][range_name].
map { |p| Row.new(@fields, p, @setup) }
end

def range_name
@fields.map(&:indexed_name).join(',')
end
end
end
end
31 changes: 17 additions & 14 deletions sunspot/lib/sunspot/search/range_facet.rb
Expand Up @@ -10,27 +10,30 @@ def field_name
end

def rows
@rows ||=
begin
data = @search.facet_response['facet_ranges'][@field.indexed_name]
gap = (@options[:range_interval] || 10).to_i
rows = []

if data['counts']
Hash[*data['counts']].each_pair do |start_str, count|
@rows ||= [].tap do |rows|
data = @search.facet_response['facet_ranges'][@field.indexed_name]
gap = @options[:range_interval]

if data['counts']
data['counts'].each_slice(2) do |start_str, count|
if start_str =~ /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/
start = @field.cast(start_str)
finish = Sunspot::Util::DateMathParser.new(start).evaluate(gap)
rows << FacetRow.new(start..finish, count, self)
else
start = start_str.to_f
finish = start + gap
rows << FacetRow.new(start..finish, count, self)
end
end
end

if @options[:sort] == :count
rows.sort! { |lrow, rrow| rrow.count <=> lrow.count }
else
rows.sort! { |lrow, rrow| lrow.value.first <=> rrow.value.first }
end
rows
if @options[:sort] == :count
rows.sort! { |lrow, rrow| rrow.count <=> lrow.count }
else
rows.sort! { |lrow, rrow| lrow.value.first <=> rrow.value.first }
end
end
end
end
end
Expand Down