Permalink
Browse files

Fixed problem where index orphans were still remaining after a call t…

…o clean_index_orphans

- If the number of orphans exceeds the number of records in the table, calling clean_index_orphans would leave (orphan count - table count) remaining
  • Loading branch information...
1 parent 33e21b7 commit ce054eed9489956a027dd5e8ac70e6bb6d8ab80d @loren loren committed Jan 25, 2012
Showing with 50 additions and 49 deletions.
  1. +1 −0 .gitignore
  2. +34 −34 sunspot_rails/lib/sunspot/rails/searchable.rb
  3. +15 −15 sunspot_rails/spec/model_spec.rb
View
@@ -9,3 +9,4 @@ sunspot-solr.pid
.yardoc
docs
*.swp
+.idea
@@ -1,6 +1,6 @@
module Sunspot #:nodoc:
module Rails #:nodoc:
- #
+ #
# This module adds Sunspot functionality to ActiveRecord models. As well as
# providing class and instance methods, it optionally adds lifecycle hooks
# to automatically add and remove models from the Solr index as they are
@@ -16,7 +16,7 @@ def included(base) #:nodoc:
end
module ActsAsMethods
- #
+ #
# Makes a class searchable if it is not already, or adds search
# configuration if it is. Note that the options passed in are only used
# the first time this method is called for a particular class; so,
@@ -28,7 +28,7 @@ module ActsAsMethods
# complete information on the functionality provided by that method.
#
# ==== Options (+options+)
- #
+ #
# :auto_index<Boolean>::
# Automatically index models in Solr when they are saved.
# Default: true
@@ -48,9 +48,9 @@ module ActsAsMethods
# object. Usual suspects are updated_at or counters.
# :include<Mixed>::
# Define default ActiveRecord includes, set this to allow ActiveRecord
- # to load required associations when indexing. See ActiveRecord's
+ # to load required associations when indexing. See ActiveRecord's
# documentation on eager-loading for examples on how to set this
- # Default: []
+ # Default: []
# :unless<Mixed>::
# Only index models in Solr if the method, proc or string evaluates
# to false (e.g. <code>:unless => :should_not_index?</code> or <code>
@@ -94,12 +94,12 @@ def searchable(options = {}, &block)
end
end
options[:include] = Util::Array(options[:include])
-
+
self.sunspot_options = options
end
end
- #
+ #
# This method is defined on all ActiveRecord::Base subclasses. It
# is false for classes on which #searchable has not been called, and
# true for classes on which #searchable has been called.
@@ -126,7 +126,7 @@ class <<base
alias_method :clean_index_orphans, :solr_clean_index_orphans unless method_defined? :clean_index_orphans
end
end
- #
+ #
# Search for instances of this class in Solr. The block is delegated to
# the Sunspot.search method - see the Sunspot documentation for the full
# API.
@@ -155,7 +155,7 @@ def solr_search(options = {}, &block)
end
end
- #
+ #
# Get IDs of matching results without loading the result objects from
# the database. This method may be useful if search is used as an
# intermediate step in a larger find operation. The block is the same
@@ -171,14 +171,14 @@ def solr_search_ids(&block)
end
end
- #
+ #
# Remove instances of this class from the Solr index.
#
def solr_remove_all_from_index
Sunspot.remove_all(self)
end
- #
+ #
# Remove all instances of this class from the Solr index and immediately
# commit.
#
@@ -187,7 +187,7 @@ def solr_remove_all_from_index!
Sunspot.remove_all!(self)
end
- #
+ #
# Completely rebuild the index for this class. First removes all
# instances from the index, then loads records and indexes them.
#
@@ -205,7 +205,7 @@ def solr_reindex(options = {})
# records will not be indexed in batches. By default, a commit is issued
# after each batch; passing +false+ for +batch_commit+ will disable
# this, and only issue a commit at the end of the process. If associated
- # objects need to indexed also, you can specify +include+ in format
+ # objects need to indexed also, you can specify +include+ in format
# accepted by ActiveRecord to improve your sql select performance
#
# ==== Options (passed as a hash)
@@ -223,18 +223,18 @@ def solr_reindex(options = {})
# specify something reasonable here.
#
# ==== Examples
- #
+ #
# # index in batches of 50, commit after each
- # Post.index
+ # Post.index
#
# # index all rows at once, then commit
- # Post.index(:batch_size => nil)
+ # Post.index(:batch_size => nil)
#
# # index in batches of 50, commit when all batches complete
- # Post.index(:batch_commit => false)
+ # Post.index(:batch_commit => false)
#
# # include the associated +author+ object when loading to index
- # Post.index(:include => :author)
+ # Post.index(:include => :author)
#
def solr_index(opts={})
options = {
@@ -268,32 +268,32 @@ def solr_index(opts={})
Sunspot.commit unless options[:batch_commit]
end
- #
+ #
# Return the IDs of records of this class that are indexed in Solr but
# do not exist in the database. Under normal circumstances, this should
# never happen, but this method is provided in case something goes
# wrong. Usually you will want to rectify the situation by calling
# #clean_index_orphans or #reindex
- #
+ #
# ==== Options (passed as a hash)
#
# batch_size<Integer>:: Batch size with which to load records. Passing
# Default is 1000 (from ActiveRecord).
- #
+ #
# ==== Returns
#
# Array:: Collection of IDs that exist in Solr but not in the database
def solr_index_orphans(opts={})
batch_size = opts[:batch_size] || Sunspot.config.indexing.default_batch_size
- count = self.count
+ count = solr_search_ids.count
indexed_ids = solr_search_ids { paginate(:page => 1, :per_page => count) }.to_set
find_each(:select => 'id', :batch_size => batch_size) do |object|
indexed_ids.delete(object.id)
end
indexed_ids.to_a
end
- #
+ #
# Find IDs of records of this class that are indexed in Solr but do not
# exist in the database, and remove them from Solr. Under normal
# circumstances, this should not be necessary; this method is provided
@@ -303,7 +303,7 @@ def solr_index_orphans(opts={})
#
# batch_size<Integer>:: Batch size with which to load records
# Default is 50
- #
+ #
def solr_clean_index_orphans(opts={})
solr_index_orphans(opts).each do |id|
new do |fake_instance|
@@ -312,7 +312,7 @@ def solr_clean_index_orphans(opts={})
end
end
- #
+ #
# Classes that have been defined as searchable return +true+ for this
# method.
#
@@ -323,7 +323,7 @@ def solr_clean_index_orphans(opts={})
def searchable?
true
end
-
+
def solr_execute_search(options = {})
options.assert_valid_keys(:include, :select)
search = yield
@@ -344,10 +344,10 @@ def solr_execute_search_ids(options = {})
search = yield
search.raw_results.map { |raw_result| raw_result.primary_key.to_i }
end
-
+
protected
-
- #
+
+ #
# Does some logging for benchmarking indexing performance
#
def solr_benchmark(batch_size, counter, &block)
@@ -371,7 +371,7 @@ def self.included(base) #:nodoc:
alias_method :more_like_this_ids, :solr_more_like_this_ids unless method_defined? :more_like_this_ids
end
end
- #
+ #
# Index the model in Solr. If the model is already indexed, it will be
# updated. Using the defaults, you will usually not need to call this
# method, as models are indexed automatically when they are created or
@@ -383,14 +383,14 @@ def solr_index
Sunspot.index(self)
end
- #
+ #
# Index the model in Solr and immediately commit. See #index
#
def solr_index!
Sunspot.index!(self)
end
-
- #
+
+ #
# Remove the model from the Solr index. Using the defaults, this should
# not be necessary, as models will automatically be removed from the
# index when they are destroyed. If you disable automatic removal
@@ -401,7 +401,7 @@ def solr_remove_from_index
Sunspot.remove(self)
end
- #
+ #
# Remove the model from the Solr index and commit immediately. See
# #remove_from_index
#
@@ -119,7 +119,7 @@
with :title, 'Bogus Post'
end.results.should be_empty
end
-
+
it 'should use the include option on the data accessor when specified' do
Post.should_receive(:all).with(hash_including(:include => [:blog])).and_return([@post])
Post.search do
@@ -134,7 +134,7 @@
with :title, 'Test Post'
end.results.should == [@post]
end
-
+
it 'should use the select option from search call to data accessor' do
Post.should_receive(:all).with(hash_including(:select => 'title, published_at')).and_return([@post])
Post.search(:select => 'title, published_at') do
@@ -145,15 +145,15 @@
it 'should not allow bogus options to search' do
lambda { Post.search(:bogus => :option) }.should raise_error(ArgumentError)
end
-
+
it 'should use the select option on the data accessor when specified' do
Post.should_receive(:all).with(hash_including(:select => 'title, published_at')).and_return([@post])
Post.search do
with :title, 'Test Post'
data_accessor_for(Post).select = [:title, :published_at]
end.results.should == [@post]
end
-
+
it 'should not use the select option on the data accessor when not specified' do
Post.should_receive(:all).with(hash_not_including(:select)).and_return([@post])
Post.search do
@@ -190,7 +190,7 @@
Post.search_ids.to_set.should == @posts.map { |post| post.id }.to_set
end
end
-
+
describe 'searchable?()' do
it 'should not be true for models that have not been configured for search' do
Location.should_not be_searchable
@@ -221,15 +221,15 @@
describe 'clean_index_orphans()' do
before :each do
- @posts = Array.new(2) { Post.create }.each { |post| post.index }
+ @posts = Array.new(3) { Post.create }.each { |post| post.index }
Sunspot.commit
- @posts.first.destroy
+ @posts.first(2).each { |post| post.destroy }
end
it 'should remove orphans from the index' do
Post.clean_index_orphans
Sunspot.commit
- Post.search.results.should == [@posts.last]
+ Post.search.hits.count.should == 1
end
end
@@ -252,7 +252,7 @@
Sunspot.commit
Post.search.results.to_set.should == @posts.to_set
end
-
+
end
describe 'reindex() with real data' do
@@ -274,7 +274,7 @@
Sunspot.commit
Post.search.results.to_set.should == @posts.to_set
end
-
+
describe "using batch sizes" do
it 'should index with a specified batch size' do
Post.reindex(:batch_size => 1)
@@ -285,15 +285,15 @@
end
-
+
describe "reindex()" do
-
+
before(:each) do
@posts = Array.new(2) { Post.create }
end
describe "when not using batches" do
-
+
it "should select all if the batch_size is nil" do
Post.should_receive(:all).with(:include => []).and_return([])
Post.reindex(:batch_size => nil)
@@ -319,7 +319,7 @@
Post.search.results.should_not include(@posts.first)
end
end
-
+
end
describe "when using batches" do
@@ -350,7 +350,7 @@
end
end
end
-
+
describe "more_like_this()" do
before(:each) do
@posts = [

0 comments on commit ce054ee

Please sign in to comment.