Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Back-ported changes.

  * Added output
  * All operations are performed on SourceCLass and DestinationClass, no class referencing the STI table or destination table need exist
  * Chunk cleaving offset is based on starting id, all operations take place on ascending ids
  • Loading branch information...
commit 7a28b93f00901c4fb2e94dee978f545cdd232960 1 parent 9dfe163
Duncan Beevers duncanbeevers authored
91 lib/single_table_inheritance_cleaver.rb
View
@@ -1,21 +1,29 @@
class SingleTableInheritanceCleaver
- attr_accessor :source, :destinations, :chunk_size, :rejections, :conditions, :excluded_types, :table_name_to_class_hash
+ attr_accessor :source, :destinations, :chunk_size, :rejections, :conditions, :excluded_types, :table_name_to_class_hash,
+ :output
DISALLOWED_COLUMN_NAMES = %w(id type)
+ class SourceClass < ActiveRecord::Base
+ end
+
+ class DestinationClass < ActiveRecord::Base
+ end
+
def initialize(source, options = {})
+ SourceClass.table_name = source.to_s.tableize
self.source = source
self.rejections = options[:rejections] || {}
- self.conditions = options[:conditions] || {}
self.chunk_size = options[:chunk_size] || 500
self.excluded_types = options[:excluded_types] || []
self.destinations = options[:destinations] || {}
+ self.output = options[:output]
conflicting_types = self.destinations.keys & self.excluded_types
raise ArgumentError, "The #{conflicting_types.join(', ')} types were explicitly included and excluded, make up your mind." unless conflicting_types.blank?
- all_types = source.find(:all, :select => 'DISTINCT type').map {|t| t.attributes['type']}
+ all_types = SourceClass.find(:all, :select => 'DISTINCT type AS type_name').map(&:type_name) # bypass AR's type-inference
valid_types = all_types - self.excluded_types
valid_types.each do |type|
self.destinations[type] ||= type.tableize
@@ -23,45 +31,90 @@ def initialize(source, options = {})
self.table_name_to_class_hash = {}
self.destinations.values.each { |table_name| self.table_name_to_class_hash[table_name] = table_name.classify.constantize }
+
+ additional_conditions = options[:conditions] || {}
+ self.conditions = {}
+ self.destinations.each do |source_type, destination_table_name|
+ self.conditions[destination_table_name] = merge_conditions(additional_conditions, :type => source_type)
+ end
end
# Process records from the source table into the destination tables
def cleave!
+ status_update "Beginning cleave on #{source}"
destinations.each do |source_type, destination_table_name|
+ status_update "Cleaving #{source_type} to #{destination_table_name}"
cleave_destination source_type, destination_table_name
end
end
- def cleave_destination source_type, destination_table_name, offset = 0
- while (keep_going = cleave_chunk(source_type, destination_table_name, offset))
- offset += chunk_size
+ def cleave_destination source_type, destination_table_name, starting_id = 0
+ count_conditions = merge_conditions(self.conditions[destination_table_name], [ 'id >= ?', starting_id ] )
+ total_records_to_cleave = SourceClass.count('1', :conditions => count_conditions)
+ return if total_records_to_cleave.zero?
+
+ total_chunks_to_cleave = total_records_to_cleave / chunk_size
+ total_chunks_to_cleave = 1 if total_chunks_to_cleave.zero?
+ output_interval = total_chunks_to_cleave / 100
+ output_interval = 1 if output_interval.zero?
+ status_update "#{total_records_to_cleave} more #{destination_table_name} to cleave"
+ chunks_cleaved = 0
+ while (starting_id = cleave_chunk(source_type, destination_table_name, starting_id))
+ chunks_cleaved += 1
+ percent = chunks_cleaved * 100 / total_chunks_to_cleave
+ status_update "[#{chunks_cleaved}/#{total_chunks_to_cleave} #{percent}%]" if 0 == chunks_cleaved % output_interval
end
end
- def cleave_chunk source_type, destination_table_name, offset = 0
+ def cleave_chunk source_type, destination_table_name, starting_id = 0
return nil unless self.destinations.keys.include?(source_type)
- source_class = source_type.constantize
- previous_max = source_class.maximum('id')
+ DestinationClass.set_table_name destination_table_name
+ previous_max = DestinationClass.maximum('id')
column_names = column_names(destination_table_name)
- conditions = source.send(:merge_conditions, {:type => source_type}, self.conditions[destination_table_name])
-
+ conditions = merge_conditions(self.conditions[destination_table_name], [ 'id >= ?', starting_id ] )
sql_column_names = column_names.join(', ')
- sql = <<-SQL
- INSERT INTO #{destination_table_name}(#{sql_column_names}) SELECT #{sql_column_names} FROM #{source.table_name} WHERE #{conditions} LIMIT #{self.chunk_size} OFFSET #{offset}
- SQL
- latest_insert = source.connection.insert sql
- current_max = source_class.maximum('id')
-
- return current_max.to_i != previous_max.to_i
+
+ sql = [
+ 'INSERT INTO ', destination_table_name,
+ '(', sql_column_names, ') ',
+ job_select(sql_column_names, conditions)
+ ].join
+
+ SourceClass.connection.insert sql
+ current_max = DestinationClass.maximum('id')
+
+ return false unless current_max.to_i != previous_max.to_i
+
+ last_id_processed = SourceClass.connection.execute(job_select('id', conditions)).map do |r| r['id'].to_i end.max
+ last_id_processed + 1
+ end
+
+ def job_select columns, conditions
+ [
+ 'SELECT ', columns,
+ ' FROM ', SourceClass.table_name,
+ ' WHERE ', conditions,
+ ' ORDER BY id',
+ ' LIMIT ', chunk_size
+ ].join
end
def column_names(destination_table_name)
- names = self.source.columns.map(&:name)
+ names = SourceClass.columns.map(&:name)
names.delete_if { |name| DISALLOWED_COLUMN_NAMES.include?(name) || Array(self.rejections[destination_table_name]).include?(name) }
names = names & self.table_name_to_class_hash[destination_table_name].column_names
names
end
+
+ def status_update what
+ puts [ Time.now, ': ', what ].join if output
+ end
+
+ def merge_conditions condition1, condition2
+ SourceClass.send(:merge_conditions, condition1, condition2)
+ end
+
end
26 test/single_table_inheritance_cleaver_test.rb
View
@@ -1,6 +1,15 @@
require File.join(File.dirname(__FILE__), 'test_helper')
class SingleTableInheritanceCleaverTest < Test::Unit::TestCase
+ def test_should_not_output_by_default
+ cleaver = SingleTableInheritanceCleaver.new(HighScore)
+ assert !cleaver.output
+ end
+
+ def test_should_output
+ cleaver = SingleTableInheritanceCleaver.new(HighScore, :output => true)
+ assert cleaver.output
+ end
def test_cleaver_knows_what_the_table_will_be_split_into
HighScore.create!(:type => 'DailyHighScore')
@@ -13,10 +22,11 @@ def test_cleaver_knows_what_the_table_will_be_split_into
def test_cleave_respects_conditions_on_a_destination
generate_some_high_scores_to_cleave
- cleaver = SingleTableInheritanceCleaver.new(HighScore, :conditions => {'daily_high_scores' => 'value between 4 and 10' }) # {:value => 4..10}
+ cleaver = SingleTableInheritanceCleaver.new(HighScore, :conditions => 'value between 10 and 110') # {:value => 4..10}
cleaver.cleave!
- assert_same_elements( (4..10).to_a, DailyHighScore.find(:all).map(&:value) )
+ assert_same_elements( (10..20).to_a, DailyHighScore.find(:all).map(&:value) )
+ assert_same_elements( (101..110).to_a, WeeklyHighScore.find(:all).map(&:value) )
end
def test_specify_destination_table
@@ -105,7 +115,7 @@ def test_cleave_adds_correct_data_with_several_items_per_type
cleaver = SingleTableInheritanceCleaver.new(HighScore, :chunk_size => 7)
cleaver.cleave!
-
+
assert_equal [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], DailyHighScore.find(:all).map(&:value)
assert_equal [101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120], WeeklyHighScore.find(:all).map(&:value)
@@ -154,14 +164,14 @@ def test_cleave_chunk_returns_nil_if_nothing_added
assert !cleaver.cleave_chunk('DailyHighScore', 'daily_high_scores', 10)
end
- def test_cleave_chunk_adds_rows_at_specified_offset
+ def test_cleave_chunk_adds_rows_at_specified_id
generate_some_high_scores_to_cleave
records_to_move = 5
- offset = 6
+ starting_id = 6
cleaver = SingleTableInheritanceCleaver.new(HighScore, :chunk_size => records_to_move)
- cleaver.cleave_chunk('DailyHighScore', 'daily_high_scores', offset)
-
- expected_values = HighScore.find(:all, :conditions => {:type => 'DailyHighScore'}, :order => 'id', :offset => offset, :limit => records_to_move).map(&:value)
+ cleaver.cleave_chunk('DailyHighScore', 'daily_high_scores', starting_id)
+
+ expected_values = HighScore.find(:all, :conditions => [ 'type = ? AND id > ?', 'DailyHighScore', starting_id ], :order => 'id', :limit => records_to_move).map(&:value)
assert_same_elements expected_values, DailyHighScore.find(:all).map(&:value)
end
Please sign in to comment.
Something went wrong with that request. Please try again.