diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cc5d820..b6d27cfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ - Add official Ruby 3.4 support. [#289](https://github.com/splitwise/super_diff/pull/289) by [@olleolleolle](https://github.com/olleolleolle) +### Bug fixes + +- Fix hash diffing algorithm. [#293](https://github.com/splitwise/super_diff/pull/293) + ### Other changes - Fix bundler gem caching in CI. [#289](https://github.com/splitwise/super_diff/pull/289) by [@olleolleolle](https://github.com/olleolleolle) diff --git a/lib/super_diff/basic/operation_tree_builders/hash.rb b/lib/super_diff/basic/operation_tree_builders/hash.rb index ba030524..9f7064e3 100644 --- a/lib/super_diff/basic/operation_tree_builders/hash.rb +++ b/lib/super_diff/basic/operation_tree_builders/hash.rb @@ -21,195 +21,95 @@ def build_operation_tree private def unary_operations_using_variant_of_patience_algorithm - operations = [] aks = actual.keys eks = expected.keys - previous_ei = nil - ei = 0 - ai = 0 - - # When diffing a hash, we're more interested in the 'actual' version - # than the 'expected' version, because that's the ultimate truth. - # Therefore, the diff is presented from the perspective of the 'actual' - # hash, and we start off by looping over it. - while ai < aks.size - ak = aks[ai] - av = actual[ak] - ev = expected[ak] - # While we iterate over 'actual' in order, we jump all over - # 'expected', trying to match up its keys with the keys in 'actual' as - # much as possible. - ei = eks.index(ak) - - if should_add_noop_operation?(ak) - # (If we're here, it probably means that the key we're pointing to - # in the 'actual' and 'expected' hashes have the same value.) - - if ei && previous_ei && (ei - previous_ei) > 1 - # If we've jumped from one operation in the 'expected' hash to - # another operation later in 'expected' (due to the fact that the - # 'expected' hash is in a different order than 'actual'), collect - # any delete operations in between and add them to our operations - # array as deletes before adding the noop. If we don't do this - # now, then those deletes will disappear. (Again, we are mainly - # iterating over 'actual', so this is the only way to catch all of - # the keys in 'expected'.) - (previous_ei + 1).upto(ei - 1) do |ei2| - ek = eks[ei2] - ev2 = expected[ek] - av2 = actual[ek] - - next unless - (!actual.include?(ek) || ev2 != av2) && - operations.none? do |operation| - %i[delete noop].include?(operation.name) && - operation.key == ek - end - - operations << Core::UnaryOperation.new( - name: :delete, - collection: expected, - key: ek, - value: ev2, - index: ei2 - ) - end - end + operations = [] + lcs_callbacks = LCSCallbacks.new( + operations: operations, + expected: expected, + actual: actual, + should_add_noop_operation: method(:should_add_noop_operation?) + ) + Diff::LCS.traverse_sequences(eks, aks, lcs_callbacks) + + operations + end + + class LCSCallbacks + extend AttrExtras.mixin + + pattr_initialize %i[operations! expected! actual! should_add_noop_operation!] + public :operations + + def discard_a(event) + # This key is in `expected`, but may also be in `actual`. + key = event.old_element + + # We want the diff to match the key order of `actual` as much as + # possible, so if this is also a key in `actual` that's just being + # reordered, don't add any operations now. The change or noop will + # be added later. + return if actual.include?(key) + + operations << Core::UnaryOperation.new( + name: :delete, + collection: expected, + key: key, + value: expected[key], + index: event.old_position + ) + end + + def discard_b(event) + # This key is in `actual`, but may also be in `expected`. + + key = event.new_element + handle_operation_on_key_in_actual(key, event) + end + + def match(event) + # This key is part of the longest common subsequence. + + key = event.old_element + handle_operation_on_key_in_actual(key, event) + end + + private + + def handle_operation_on_key_in_actual(key, event) + if should_add_noop_operation.call(key) operations << Core::UnaryOperation.new( name: :noop, collection: actual, - key: ak, - value: av, - index: ai + key: key, + value: actual[key], + index: event.new_position ) else - # (If we're here, it probably means that the key in 'actual' isn't - # present in 'expected' or the values don't match.) - - if (operations.empty? || operations.last.name == :noop) && - (ai.zero? || eks.include?(aks[ai - 1])) - - # If we go from a match in the last iteration to a missing or - # extra key in this one, or we're at the first key in 'actual' and - # it's missing or extra, look for deletes in the 'expected' hash - # and add them to our list of operations before we add the - # inserts. In most cases we will accomplish this by backtracking a - # bit to the key in 'expected' that matched the key in 'actual' we - # processed in the previous iteration (or just the first key in - # 'expected' if this is the first key in 'actual'), and then - # iterating from there through 'expected' until we reach the end - # or we hit some other condition (see below). - - start_index = - if ai.positive? - eks.index(aks[ai - 1]) + 1 - else - 0 - end - - start_index.upto(eks.size - 1) do |ei2| - ek = eks[ei2] - ev = expected[ek] - av2 = actual[ek] - - if actual.include?(ek) && ev == av2 - # If the key in 'expected' we've landed on happens to be a - # match in 'actual', then stop, because it's going to be - # handled in some future iteration of the 'actual' loop. - break - elsif aks[ai + 1..].any? do |k| # rubocop:disable Lint/DuplicateBranch for clarity - expected.include?(k) && expected[k] != actual[k] - end - - # While we backtracked a bit to iterate over 'expected', we - # now have to look ahead. If we will end up encountering a - # insert that matches this delete later, stop and go back to - # iterating over 'actual'. This is because the delete we would - # have added now will be added later when we encounter the - # associated insert, so we don't want to add it twice. - break - else - operations << Core::UnaryOperation.new( - name: :delete, - collection: expected, - key: ek, - value: ev, - index: ei2 - ) - end - - next unless ek == ak && ev != av - - # If we're pointing to the same key in 'expected' as in - # 'actual', but with different values, go ahead and add an - # insert now to accompany the delete added above. That way - # they appear together, which will be easier to read. - operations << Core::UnaryOperation.new( - name: :insert, - collection: actual, - key: ak, - value: av, - index: ai - ) - end - end - - if expected.include?(ak) && ev != av && - operations.none? do |op| - op.name == :delete && op.key == ak - end - - # If we're here, it means that we didn't encounter any delete - # operations above for whatever reason and so we need to add a - # delete to represent the fact that the value for this key has - # changed. + # If the key is present in both `actual` and `expected` + # but the values don't match, create a `delete` operation + # just before the `insert`. + # (This condition will always be true for `match` events.) + if expected.include?(key) operations << Core::UnaryOperation.new( name: :delete, collection: expected, - key: ak, - value: expected[ak], - index: ei + key: key, + value: expected[key], + index: event.old_position ) end - if operations.none? { |op| op.name == :insert && op.key == ak } - # If we're here, it means that we didn't encounter any insert - # operations above. Since we already handled delete, the only - # alternative is that this key must not exist in 'expected', so - # we need to add an insert. - operations << Core::UnaryOperation.new( - name: :insert, - collection: actual, - key: ak, - value: av, - index: ai - ) - end + operations << Core::UnaryOperation.new( + name: :insert, + collection: actual, + key: key, + value: actual[key], + index: event.new_position + ) end - - ai += 1 - previous_ei = ei - end - - # The last thing to do is this: if there are keys in 'expected' that - # aren't in 'actual', and they aren't associated with any inserts to - # where they would have been added above, tack those deletes onto the - # end of our operations array. - (eks - aks - operations.map(&:key)).each do |ek| - ei = eks.index(ek) - ev = expected[ek] - - operations << Core::UnaryOperation.new( - name: :delete, - collection: expected, - key: ek, - value: ev, - index: ei - ) end - - operations end def should_add_noop_operation?(key) diff --git a/spec/integration/rspec/include_matcher_spec.rb b/spec/integration/rspec/include_matcher_spec.rb index 6501b32c..3996664e 100644 --- a/spec/integration/rspec/include_matcher_spec.rb +++ b/spec/integration/rspec/include_matcher_spec.rb @@ -294,8 +294,8 @@ plain_line ' {' plain_line ' number: 42,' plain_line %( city: "Burbank",) - plain_line %( zip: "90210") expected_line %(- state: "CA") + plain_line %( zip: "90210") plain_line ' }' end ) diff --git a/spec/unit/basic/operation_tree_builders/hash_spec.rb b/spec/unit/basic/operation_tree_builders/hash_spec.rb new file mode 100644 index 00000000..3d9497bb --- /dev/null +++ b/spec/unit/basic/operation_tree_builders/hash_spec.rb @@ -0,0 +1,299 @@ +# frozen_string_literal: true + +require 'securerandom' +require 'spec_helper' + +RSpec.describe SuperDiff::Basic::OperationTreeBuilders::Hash, type: :unit do + describe 'the produced tree' do + subject(:tree) { described_class.call(expected: expected, actual: actual) } + + let(:actual) do + { + a: 99, + b: 2, + c: 99 + } + end + + let(:expected) do + { + c: 3, + b: 2, + a: 1 + } + end + + it 'orders key operations according to actual' do + expect(tree).to match( + [ + having_attributes(key: :a, name: :delete, value: 1), + having_attributes(key: :a, name: :insert, value: 99), + having_attributes(key: :b, name: :noop, value: 2), + having_attributes(key: :c, name: :delete, value: 3), + having_attributes(key: :c, name: :insert, value: 99) + ] + ) + end + + it 'has at most two entries per key' do + groups = tree.group_by(&:key) + + groups.each do |key, value| + expect(value.count).to(be <= 2, "There are > 2 operations for key `#{key}`") + end + end + + context 'with large random hashes' do + let(:operation_tree) { described_class.call(expected: expected, actual: actual) } + let(:actual) do + Array.new(rand(1..50)) do |i| + [i, SecureRandom.alphanumeric(4)] + end.to_h + end + let(:expected) do + entries = actual.entries + + # Remove keys at random + entries = entries.sample(rand(actual.length)) + + # Change keys at random + entries = entries.map do |k, v| + [k, rand(5).zero? ? SecureRandom.alphanumeric(4) : v] + end + + # Add keys at random + max_key = actual.keys.max || 0 + entries += Array.new(rand(10)) do |i| + [max_key + i + 1, SecureRandom.alphanumeric(4)] + end + + entries.shuffle.to_h + end + + it 'is correct' do + (actual.keys | expected.keys).each do |key| + relevant_operations = operation_tree.select { |op| op.key == key } + + if actual.key?(key) + if expected.key?(key) + if actual[key] == expected[key] + # no-op + expect(relevant_operations).to contain_exactly( + having_attributes(name: :noop) + ) + else + # remove, followed by add + expect(relevant_operations).to contain_exactly( + having_attributes(name: :delete), + having_attributes(name: :insert) + ) + end + else + # in actual but not expected, there should be one addition + expect(relevant_operations).to contain_exactly( + having_attributes(name: :insert) + ) + end + else + # in expected but not actual; there should be one removal + expect(relevant_operations).to contain_exactly( + having_attributes(name: :delete) + ) + end + end + end + end + + context 'with unmatched expected keys interleaved with actual' do + let(:operation_tree) { described_class.call(expected: expected, actual: actual) } + + let(:actual) do + { + 'A' => 1, + 'C' => 3 + } + end + let(:expected) do + { + 'A' => 1, + 'B' => 2, + 'C' => 3 + } + end + + it 'interleaves the delete operation' do + operations = operation_tree.to_a + expect(operations.count).to eq(3) + expect(operations[1]).to have_attributes( + key: 'B', + name: :delete + ) + end + end + + context 'with differing expected values interleaved with actual' do + let(:actual) do + { + a: 1, + b: 2, + c: 3 + } + end + let(:expected) do + { + a: 1, + c: 99, + b: 2 + } + end + + it 'colocates the delete and insert operations' do + expect(tree).to match( + [ + having_attributes(key: :a, name: :noop), + having_attributes(key: :b, name: :noop), + having_attributes(key: :c, name: :delete, value: 99), + having_attributes(key: :c, name: :insert, value: 3) + ] + ) + end + end + + context 'with interleaved additions, removals, and changes' do + let(:actual) do + { + a: 1, + b: 2, + c: 3, # added to actual + d: 99 + } + end + let(:expected) do + { + a: 1, + d: 4, # 99 in actual + z: 26, # missing from actual + b: 2 + } + end + + it 'no-ops, removes, and inserts as expected' do + expect(tree).to match( + [ + having_attributes(key: :a, name: :noop), + having_attributes(key: :z, name: :delete, value: 26), # missing from actual + having_attributes(key: :b, name: :noop, value: 2), + having_attributes(key: :c, name: :insert, value: 3), # missing from expected + having_attributes(key: :d, name: :delete, value: 4), + having_attributes(key: :d, name: :insert, value: 99) + ] + ) + end + end + + context 'with unmatched expected keys preceding and proceeding an actual match' do + let(:expected) do + { + z: 26, + a: 1, + b: 2 + } + end + let(:actual) do + { + a: 1 + } + end + + it 'appends all insert operations at the end' do + expect(tree).to match( + [ + having_attributes(key: :z, name: :delete, value: 26), + having_attributes(key: :a, name: :noop, value: 1), + having_attributes(key: :b, name: :delete, value: 2) + ] + ) + end + end + + context 'with unmatched expected keys preceding and proceeding an actual mismatch' do + let(:expected) do + { + z: 26, + a: 1, + b: 2 + } + end + let(:actual) do + { + a: 99 + } + end + + it 'deletes keys in order corresponding to expected' do + expect(tree).to match( + [ + having_attributes(key: :z, name: :delete), + having_attributes(key: :a, name: :delete, value: 1), + having_attributes(key: :a, name: :insert, value: 99), + having_attributes(key: :b, name: :delete) + ] + ) + end + end + + context 'with a changed value whose key in expected precedes a match and whose key in actual proceeds the match' do + let(:expected) do + { + b: 2, + a: 1 + } + end + let(:actual) do + { + a: 1, + b: 99 + } + end + + it 'colocates the insert and delete according to actual key order' do + expect(tree).to match( + [ + having_attributes(key: :a, name: :noop), + having_attributes(key: :b, name: :delete, value: 2), + having_attributes(key: :b, name: :insert, value: 99) + ] + ) + end + end + + context 'when the last actual value mismatches and the expected key precedes the last match' do + let(:actual) do + { + c: 1, + b: 2, + a: 3 + } + end + let(:expected) do + { + a: 1, + b: 2, + c: 3 + } + end + + it 'constructs the correct operation tree' do + expect(tree).to match( + [ + having_attributes(key: :c, name: :delete, value: 3), + having_attributes(key: :c, name: :insert, value: 1), + having_attributes(key: :b, name: :noop, value: 2), + having_attributes(key: :a, name: :delete, value: 1), + having_attributes(key: :a, name: :insert, value: 3) + ] + ) + end + end + end +end diff --git a/spec/unit/equality_matchers/main_spec.rb b/spec/unit/equality_matchers/main_spec.rb index 9ea84dcc..54d45df1 100644 --- a/spec/unit/equality_matchers/main_spec.rb +++ b/spec/unit/equality_matchers/main_spec.rb @@ -1266,8 +1266,8 @@ actual_line '+ is_translator: false,' actual_line '+ is_translation_enabled: false,' plain_line %( profile_background_color: "FFF1E0",) - plain_line %( profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png",) expected_line %(- profile_image_url: "http://pbs.twimg.com/profile_images/931156393108885504/EqEMtLhM_normal.jpg",) + plain_line %( profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png",) plain_line %( profile_banner_url: "https://pbs.twimg.com/profile_banners/18949452/1581526592",) plain_line ' profile_background_tile: false' plain_line ' }'