Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates indexing to use batch processing with a config variable #135

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions sunspot/lib/sunspot/configuration.rb
Expand Up @@ -8,6 +8,8 @@ module Sunspot
# Sunspot.config.pagination.default_per_page:: # Sunspot.config.pagination.default_per_page::
# Solr always paginates its results. This sets Sunspot's default result # Solr always paginates its results. This sets Sunspot's default result
# count per page if it is not explicitly specified in the query. # count per page if it is not explicitly specified in the query.
# Sunspot.config.indexing.default_batch_size::
# This sets the batch size for indexing, default is 50
# #
module Configuration module Configuration
class <<self class <<self
Expand All @@ -28,6 +30,9 @@ def build #:nodoc:
pagination do pagination do
default_per_page 30 default_per_page 30
end end
indexing do
default_batch_size 50
end
end end
end end


Expand Down
21 changes: 16 additions & 5 deletions sunspot_rails/lib/sunspot/rails/searchable.rb
Expand Up @@ -238,7 +238,7 @@ def solr_reindex(options = {})
# #
def solr_index(opts={}) def solr_index(opts={})
options = { options = {
:batch_size => 50, :batch_size => Sunspot.config.indexing.default_batch_size,
:batch_commit => true, :batch_commit => true,
:include => self.sunspot_options[:include], :include => self.sunspot_options[:include],
:start => opts.delete(:first_id) || 0 :start => opts.delete(:first_id) || 0
Expand Down Expand Up @@ -275,13 +275,19 @@ def solr_index(opts={})
# wrong. Usually you will want to rectify the situation by calling # wrong. Usually you will want to rectify the situation by calling
# #clean_index_orphans or #reindex # #clean_index_orphans or #reindex
# #
# ==== Options (passed as a hash)
#
# batch_size<Integer>:: Batch size with which to load records. Passing
# Default is 1000 (from ActiveRecord).
#
# ==== Returns # ==== Returns
# #
# Array:: Collection of IDs that exist in Solr but not in the database # Array:: Collection of IDs that exist in Solr but not in the database
def solr_index_orphans def solr_index_orphans(opts={})
batch_size = opts[:batch_size] || Sunspot.config.indexing.default_batch_size
count = self.count count = self.count
indexed_ids = solr_search_ids { paginate(:page => 1, :per_page => count) }.to_set indexed_ids = solr_search_ids { paginate(:page => 1, :per_page => count) }.to_set
all(:select => 'id').each do |object| find_each(:select => 'id', :batch_size => batch_size) do |object|
indexed_ids.delete(object.id) indexed_ids.delete(object.id)
end end
indexed_ids.to_a indexed_ids.to_a
Expand All @@ -293,8 +299,13 @@ def solr_index_orphans
# circumstances, this should not be necessary; this method is provided # circumstances, this should not be necessary; this method is provided
# in case something goes wrong. # in case something goes wrong.
# #
def solr_clean_index_orphans # ==== Options (passed as a hash)
solr_index_orphans.each do |id| #
# batch_size<Integer>:: Batch size with which to load records
# Default is 50
#
def solr_clean_index_orphans(opts={})
solr_index_orphans(opts).each do |id|
new do |fake_instance| new do |fake_instance|
fake_instance.id = id fake_instance.id = id
end.solr_remove_from_index end.solr_remove_from_index
Expand Down
6 changes: 6 additions & 0 deletions sunspot_rails/spec/model_spec.rb
Expand Up @@ -211,6 +211,12 @@
it 'should return IDs of objects that are in the index but not the database' do it 'should return IDs of objects that are in the index but not the database' do
Post.index_orphans.should == [@posts.first.id] Post.index_orphans.should == [@posts.first.id]
end end

it 'should find the orphans in batches to improve performance' do
Post.should_receive(:find_each).with(hash_including(:batch_size => 10)).and_return([])
Post.index_orphans(:batch_size => 10)
end

end end


describe 'clean_index_orphans()' do describe 'clean_index_orphans()' do
Expand Down