1- require " active_record/relation/batches/batch_enumerator"
1+ require ' active_record/relation/batches/batch_enumerator'
22
33module ActiveRecord
44 module Batches
5- ORDER_OR_LIMIT_IGNORED_MESSAGE = "Scoped order and limit are ignored, it's forced to be batch order and batch size ."
5+ ORDER_IGNORE_MESSAGE = "Scoped order is ignored, it's forced to be batch order."
66
77 # Looping through a collection of records from the database
88 # (using the Scoping::Named::ClassMethods.all method, for example)
@@ -34,15 +34,19 @@ module Batches
3434 # * <tt>:start</tt> - Specifies the primary key value to start from, inclusive of the value.
3535 # * <tt>:finish</tt> - Specifies the primary key value to end at, inclusive of the value.
3636 # * <tt>:error_on_ignore</tt> - Overrides the application config to specify if an error should be raised when
37- # the order and limit have to be ignored due to batching.
37+ # the order has to be ignored due to batching.
3838 #
39- # This is especially useful if you want multiple workers dealing with
40- # the same processing queue. You can make worker 1 handle all the records
41- # between id 0 and 10,000 and worker 2 handle from 10,000 and beyond
42- # (by setting the +:start+ and +:finish+ option on each worker).
39+ # Limits are honored, and if present there is no requirement for the batch
40+ # size, it can be less than, equal, or greater than the limit.
4341 #
44- # # Let's process for a batch of 2000 records, skipping the first 2000 rows
45- # Person.find_each(start: 2000, batch_size: 2000) do |person|
42+ # The options +start+ and +finish+ are especially useful if you want
43+ # multiple workers dealing with the same processing queue. You can make
44+ # worker 1 handle all the records between id 1 and 9999 and worker 2
45+ # handle from 10000 and beyond by setting the +:start+ and +:finish+
46+ # option on each worker.
47+ #
48+ # # Let's process from record 10_000 on.
49+ # Person.find_each(start: 10_000) do |person|
4650 # person.party_all_night!
4751 # end
4852 #
@@ -51,8 +55,8 @@ module Batches
5155 # work. This also means that this method only works when the primary key is
5256 # orderable (e.g. an integer or string).
5357 #
54- # NOTE: You can't set the limit either, that's used to control
55- # the batch sizes .
58+ # NOTE: By its nature, batch processing is subject to race conditions if
59+ # other processes are modifying the database .
5660 def find_each ( start : nil , finish : nil , batch_size : 1000 , error_on_ignore : nil )
5761 if block_given?
5862 find_in_batches ( start : start , finish : finish , batch_size : batch_size , error_on_ignore : error_on_ignore ) do |records |
@@ -89,15 +93,19 @@ def find_each(start: nil, finish: nil, batch_size: 1000, error_on_ignore: nil)
8993 # * <tt>:start</tt> - Specifies the primary key value to start from, inclusive of the value.
9094 # * <tt>:finish</tt> - Specifies the primary key value to end at, inclusive of the value.
9195 # * <tt>:error_on_ignore</tt> - Overrides the application config to specify if an error should be raised when
92- # the order and limit have to be ignored due to batching.
96+ # the order has to be ignored due to batching.
97+ #
98+ # Limits are honored, and if present there is no requirement for the batch
99+ # size, it can be less than, equal, or greater than the limit.
93100 #
94- # This is especially useful if you want multiple workers dealing with
95- # the same processing queue. You can make worker 1 handle all the records
96- # between id 0 and 10,000 and worker 2 handle from 10,000 and beyond
97- # (by setting the +:start+ and +:finish+ option on each worker).
101+ # The options +start+ and +finish+ are especially useful if you want
102+ # multiple workers dealing with the same processing queue. You can make
103+ # worker 1 handle all the records between id 1 and 9999 and worker 2
104+ # handle from 10000 and beyond by setting the +:start+ and +:finish+
105+ # option on each worker.
98106 #
99- # # Let's process the next 2000 records
100- # Person.find_in_batches(start: 2000, batch_size: 2000 ) do |group|
107+ # # Let's process from record 10_000 on.
108+ # Person.find_in_batches(start: 10_000 ) do |group|
101109 # group.each { |person| person.party_all_night! }
102110 # end
103111 #
@@ -106,8 +114,8 @@ def find_each(start: nil, finish: nil, batch_size: 1000, error_on_ignore: nil)
106114 # work. This also means that this method only works when the primary key is
107115 # orderable (e.g. an integer or string).
108116 #
109- # NOTE: You can't set the limit either, that's used to control
110- # the batch sizes .
117+ # NOTE: By its nature, batch processing is subject to race conditions if
118+ # other processes are modifying the database .
111119 def find_in_batches ( start : nil , finish : nil , batch_size : 1000 , error_on_ignore : nil )
112120 relation = self
113121 unless block_given?
@@ -149,17 +157,19 @@ def find_in_batches(start: nil, finish: nil, batch_size: 1000, error_on_ignore:
149157 # * <tt>:start</tt> - Specifies the primary key value to start from, inclusive of the value.
150158 # * <tt>:finish</tt> - Specifies the primary key value to end at, inclusive of the value.
151159 # * <tt>:error_on_ignore</tt> - Overrides the application config to specify if an error should be raised when
152- # the order and limit have to be ignored due to batching.
160+ # the order has to be ignored due to batching.
161+ #
162+ # Limits are honored, and if present there is no requirement for the batch
163+ # size, it can be less than, equal, or greater than the limit.
153164 #
154- # This is especially useful if you want to work with the
155- # ActiveRecord::Relation object instead of the array of records, or if
156- # you want multiple workers dealing with the same processing queue. You can
157- # make worker 1 handle all the records between id 0 and 10,000 and worker 2
158- # handle from 10,000 and beyond (by setting the +:start+ and +:finish+
159- # option on each worker).
165+ # The options +start+ and +finish+ are especially useful if you want
166+ # multiple workers dealing with the same processing queue. You can make
167+ # worker 1 handle all the records between id 1 and 9999 and worker 2
168+ # handle from 10000 and beyond by setting the +:start+ and +:finish+
169+ # option on each worker.
160170 #
161- # # Let's process the next 2000 records
162- # Person.in_batches(of: 2000, start: 2000 ).update_all(awesome: true)
171+ # # Let's process from record 10_000 on.
172+ # Person.in_batches(start: 10_000 ).update_all(awesome: true)
163173 #
164174 # An example of calling where query method on the relation:
165175 #
@@ -179,31 +189,38 @@ def find_in_batches(start: nil, finish: nil, batch_size: 1000, error_on_ignore:
179189 # consistent. Therefore the primary key must be orderable, e.g an integer
180190 # or a string.
181191 #
182- # NOTE: You can't set the limit either, that's used to control the batch
183- # sizes .
192+ # NOTE: By its nature, batch processing is subject to race conditions if
193+ # other processes are modifying the database .
184194 def in_batches ( of : 1000 , start : nil , finish : nil , load : false , error_on_ignore : nil )
185195 relation = self
186196 unless block_given?
187197 return BatchEnumerator . new ( of : of , start : start , finish : finish , relation : self )
188198 end
189199
190- if arel . orders . present? || arel . taken . present?
191- act_on_order_or_limit_ignored ( error_on_ignore )
200+ if arel . orders . present?
201+ act_on_ignored_order ( error_on_ignore )
202+ end
203+
204+ batch_limit = of
205+ if limit_value
206+ remaining = limit_value
207+ batch_limit = remaining if remaining < batch_limit
208+ relation = relation . limit ( nil ) # new relation without the limit
192209 end
193210
194- relation = relation . reorder ( batch_order ) . limit ( of )
211+ relation = relation . reorder ( batch_order ) . limit ( batch_limit )
195212 relation = apply_limits ( relation , start , finish )
196213 batch_relation = relation
197214
198215 loop do
199216 if load
200217 records = batch_relation . records
201218 ids = records . map ( &:id )
202- yielded_relation = self . where ( primary_key => ids )
219+ yielded_relation = where ( primary_key => ids )
203220 yielded_relation . load_records ( records )
204221 else
205222 ids = batch_relation . pluck ( primary_key )
206- yielded_relation = self . where ( primary_key => ids )
223+ yielded_relation = where ( primary_key => ids )
207224 end
208225
209226 break if ids . empty?
@@ -213,7 +230,20 @@ def in_batches(of: 1000, start: nil, finish: nil, load: false, error_on_ignore:
213230
214231 yield yielded_relation
215232
216- break if ids . length < of
233+ break if ids . length < batch_limit
234+
235+ if limit_value
236+ remaining -= ids . length
237+
238+ if remaining == 0
239+ # Saves a useless iteration when the limit is a multiple of the
240+ # batch size.
241+ break
242+ elsif remaining < batch_limit
243+ relation = relation . limit ( remaining )
244+ end
245+ end
246+
217247 batch_relation = relation . where ( arel_attribute ( primary_key ) . gt ( primary_key_offset ) )
218248 end
219249 end
@@ -230,13 +260,13 @@ def batch_order
230260 "#{ quoted_table_name } .#{ quoted_primary_key } ASC"
231261 end
232262
233- def act_on_order_or_limit_ignored ( error_on_ignore )
234- raise_error = ( error_on_ignore . nil? ? self . klass . error_on_ignored_order_or_limit : error_on_ignore )
263+ def act_on_ignored_order ( error_on_ignore )
264+ raise_error = ( error_on_ignore . nil? ? self . klass . error_on_ignored_order : error_on_ignore )
235265
236266 if raise_error
237- raise ArgumentError . new ( ORDER_OR_LIMIT_IGNORED_MESSAGE )
267+ raise ArgumentError . new ( ORDER_IGNORE_MESSAGE )
238268 elsif logger
239- logger . warn ( ORDER_OR_LIMIT_IGNORED_MESSAGE )
269+ logger . warn ( ORDER_IGNORE_MESSAGE )
240270 end
241271 end
242272 end
0 commit comments