Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Give LTD's a #find alias for the #left_to_right method. Also, re-enab…

…le tests.
  • Loading branch information...
commit bdbd07fa615a72b670bda81e13066e6e61a2bfe8 1 parent 8483630
@rossmeissl authored
View
8 README.rdoc
@@ -4,13 +4,9 @@ Match things based on string similarity (using the Pair Distance algorithm) and
= Quickstart
- >> right_records = [ 'seamus', 'andy', 'ben' ]
+ >> d = LooseTightDictionary.new %w(seamus andy ben)
=> [...]
- >> left_record = 'Shamus Heaney'
- => [...]
- >> d = LooseTightDictionary.new right_records
- => [...]
- >> puts d.left_to_right left_record
+ >> puts d.find 'Shamus Heaney'
=> 'seamus'
Try running the included example file:
View
1  lib/loose_tight_dictionary.rb
@@ -218,6 +218,7 @@ def left_to_right(left_record)
inline_check left_record, right_record
right_record
end
+ alias_method :find, :left_to_right
def optimize(t_options_left, t_options_right)
cart_prod(t_options_left, t_options_right).max do |a, b|
View
349 test/test_loose_tight_dictionary.rb
@@ -65,212 +65,207 @@ def ltd
:tee => $tee
end
- if ENV['NEW'] == 'true' or ENV['ALL'] == 'true'
- end
-
- if ENV['OLD'] == 'true' or ENV['ALL'] == 'true'
- should "optionally only pay attention to things that match blockings" do
- assert_equal @a_right, ltd.left_to_right(@a_left)
+ should "optionally only pay attention to things that match blockings" do
+ assert_equal @a_right, ltd.left_to_right(@a_left)
- clear_ltd
- @blocking_only = true
- assert_equal nil, ltd.left_to_right(@a_left)
+ clear_ltd
+ @blocking_only = true
+ assert_equal nil, ltd.left_to_right(@a_left)
- clear_ltd
- @blocking_only = true
- @blockings.push ['/dash/i']
- assert_equal @a_right, ltd.left_to_right(@a_left)
- end
-
- # the example from the readme, considerably uglier here
- should "check a simple table" do
- @right = [ 'seamus', 'andy', 'ben' ]
- @positives = [ [ 'seamus', 'Mr. Seamus Abshere' ] ]
- left = [ 'Mr. Seamus Abshere', 'Sr. Andy Rossmeissl', 'Master BenT' ]
-
- assert_nothing_raised do
- ltd.check left
- end
- end
-
- should "treat a String as a full record if passed through" do
- dash = 'DHC8-400'
- b747 = 'B747200/300'
- dc9 = 'DC-9-10'
- right_records = [ dash, b747, dc9 ]
- simple_ltd = LooseTightDictionary.new right_records, :logger => $logger, :tee => $tee
- assert_equal dash, simple_ltd.left_to_right('DeHavilland Dash-8 DHC-400')
- assert_equal b747, simple_ltd.left_to_right('Boeing 747-300')
- assert_equal dc9, simple_ltd.left_to_right('McDonnell Douglas MD81/DC-9')
+ clear_ltd
+ @blocking_only = true
+ @blockings.push ['/dash/i']
+ assert_equal @a_right, ltd.left_to_right(@a_left)
+ end
+
+ # the example from the readme, considerably uglier here
+ should "check a simple table" do
+ @right = [ 'seamus', 'andy', 'ben' ]
+ @positives = [ [ 'seamus', 'Mr. Seamus Abshere' ] ]
+ left = [ 'Mr. Seamus Abshere', 'Sr. Andy Rossmeissl', 'Master BenT' ]
+
+ assert_nothing_raised do
+ ltd.check left
end
-
- should "call it a mismatch if you hit a blank positive" do
- @positives.push [@a_left[0], '']
- assert_raises(LooseTightDictionary::Mismatch) do
- ltd.left_to_right @a_left
- end
+ end
+
+ should "treat a String as a full record if passed through" do
+ dash = 'DHC8-400'
+ b747 = 'B747200/300'
+ dc9 = 'DC-9-10'
+ right_records = [ dash, b747, dc9 ]
+ simple_ltd = LooseTightDictionary.new right_records, :logger => $logger, :tee => $tee
+ assert_equal dash, simple_ltd.left_to_right('DeHavilland Dash-8 DHC-400')
+ assert_equal b747, simple_ltd.left_to_right('Boeing 747-300')
+ assert_equal dc9, simple_ltd.find('McDonnell Douglas MD81/DC-9')
+ end
+
+ should "call it a mismatch if you hit a blank positive" do
+ @positives.push [@a_left[0], '']
+ assert_raises(LooseTightDictionary::Mismatch) do
+ ltd.left_to_right @a_left
end
+ end
- should "call it a false positive if you hit a blank negative" do
- @negatives.push [@a_left[0], '']
- assert_raises(LooseTightDictionary::FalsePositive) do
- ltd.left_to_right @a_left
- end
- end
-
- should "have a false match without blocking" do
- # @d_left will be our victim
- @right.push @d_lookalike
- @tightenings.push @t_1
-
- assert_equal @d_lookalike, ltd.left_to_right(@d_left)
+ should "call it a false positive if you hit a blank negative" do
+ @negatives.push [@a_left[0], '']
+ assert_raises(LooseTightDictionary::FalsePositive) do
+ ltd.left_to_right @a_left
end
+ end
+
+ should "have a false match without blocking" do
+ # @d_left will be our victim
+ @right.push @d_lookalike
+ @tightenings.push @t_1
- should "do blocking if the left matches a block" do
- # @d_left will be our victim
- @right.push @d_lookalike
- @tightenings.push @t_1
- @blockings.push ['/(bombardier|de ?havilland)/i']
-
- assert_equal @d_right, ltd.left_to_right(@d_left)
- end
+ assert_equal @d_lookalike, ltd.left_to_right(@d_left)
+ end
+
+ should "do blocking if the left matches a block" do
+ # @d_left will be our victim
+ @right.push @d_lookalike
+ @tightenings.push @t_1
+ @blockings.push ['/(bombardier|de ?havilland)/i']
- should "treat blocks as exclusive" do
- @right = [ @d_left ]
- @tightenings.push @t_1
- @blockings.push ['/(bombardier|de ?havilland)/i']
+ assert_equal @d_right, ltd.left_to_right(@d_left)
+ end
+
+ should "treat blocks as exclusive" do
+ @right = [ @d_left ]
+ @tightenings.push @t_1
+ @blockings.push ['/(bombardier|de ?havilland)/i']
- assert_equal nil, ltd.left_to_right(@d_lookalike)
- end
+ assert_equal nil, ltd.left_to_right(@d_lookalike)
+ end
+
+ should "only use identities if they stem from the same regexp" do
+ @identities.push @r_1
+ @identities.push [ '/(cessna)(?:.*?)(citation)/i' ]
+ @identities.push [ '/(cessna)(?:.*?)(\d\d\d)/i' ]
+ x_left = [ 'CESSNA D-333 CITATION V']
+ x_right = [ 'CESSNA D-333' ]
+ @right.push x_right
- should "only use identities if they stem from the same regexp" do
- @identities.push @r_1
- @identities.push [ '/(cessna)(?:.*?)(citation)/i' ]
- @identities.push [ '/(cessna)(?:.*?)(\d\d\d)/i' ]
- x_left = [ 'CESSNA D-333 CITATION V']
- x_right = [ 'CESSNA D-333' ]
- @right.push x_right
-
- assert_equal x_right, ltd.left_to_right(x_left)
- end
+ assert_equal x_right, ltd.left_to_right(x_left)
+ end
+
+ should "use the best score from all of the tightenings" do
+ x_left = ["BOEING 737100"]
+ x_right = ["BOEING BOEING 737-100/200"]
+ x_right_wrong = ["BOEING BOEING 737-900"]
+ @right.push x_right
+ @right.push x_right_wrong
+ @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
+ @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
- should "use the best score from all of the tightenings" do
- x_left = ["BOEING 737100"]
- x_right = ["BOEING BOEING 737-100/200"]
- x_right_wrong = ["BOEING BOEING 737-900"]
- @right.push x_right
- @right.push x_right_wrong
- @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
- @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
-
- assert_equal x_right, ltd.left_to_right(x_left)
- end
+ assert_equal x_right, ltd.left_to_right(x_left)
+ end
+
+ should "compare using prefixes if tightened key is shorter than correct match" do
+ x_left = ["BOEING 720"]
+ x_right = ["BOEING BOEING 720-000"]
+ x_right_wrong = ["BOEING BOEING 717-200"]
+ @right.push x_right
+ @right.push x_right_wrong
+ @tightenings.push @t_1
+ @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
+ @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
- should "compare using prefixes if tightened key is shorter than correct match" do
- x_left = ["BOEING 720"]
- x_right = ["BOEING BOEING 720-000"]
- x_right_wrong = ["BOEING BOEING 717-200"]
- @right.push x_right
- @right.push x_right_wrong
- @tightenings.push @t_1
- @tightenings.push ['/(7\d)(7|0)-?\d{1,3}\/(\d\d\d)/i']
- @tightenings.push ['/(7\d)(7|0)-?(\d{1,3}|[A-Z]{0,3})/i']
-
- assert_equal x_right, ltd.left_to_right(x_left)
- end
+ assert_equal x_right, ltd.left_to_right(x_left)
+ end
+
+ should "use the shortest original input" do
+ x_left = ['De Havilland DHC8-777 Dash-8 Superstar']
+ x_right = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar']
+ x_right_long = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar/Supernova']
- should "use the shortest original input" do
- x_left = ['De Havilland DHC8-777 Dash-8 Superstar']
- x_right = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar']
- x_right_long = ['DEHAVILLAND DEHAVILLAND DHC8-777 DASH-8 Superstar/Supernova']
-
- @right.push x_right_long
- @right.push x_right
- @tightenings.push @t_1
-
- assert_equal x_right, ltd.left_to_right(x_left)
- end
+ @right.push x_right_long
+ @right.push x_right
+ @tightenings.push @t_1
- should "perform lookups left to right" do
- assert_equal @a_right, ltd.left_to_right(@a_left)
+ assert_equal x_right, ltd.left_to_right(x_left)
+ end
+
+ should "perform lookups left to right" do
+ assert_equal @a_right, ltd.left_to_right(@a_left)
+ end
+
+ should "succeed if there are no checks" do
+ assert_nothing_raised do
+ ltd.check @left
end
+ end
+
+ should "succeed if the positive checks just work" do
+ @positives.push [ @a_left[0], @a_right[0] ]
- should "succeed if there are no checks" do
- assert_nothing_raised do
- ltd.check @left
- end
+ assert_nothing_raised do
+ ltd.check @left
+ end
+ end
+
+ should "fail if positive checks don't work" do
+ @positives.push [ @d_left[0], @d_right[0] ]
+
+ assert_raises(LooseTightDictionary::Mismatch) do
+ ltd.check @left
+ end
+ end
+
+ should "succeed if proper tightening is applied" do
+ @positives.push [ @d_left[0], @d_right[0] ]
+ @tightenings.push @t_1
+
+ assert_nothing_raised do
+ ltd.check @left
end
+ end
+
+ should "use a Google Docs spreadsheet as a source of tightenings" do
+ @positives.push [ @d_left[0], @d_right[0] ]
+ @tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
- should "succeed if the positive checks just work" do
- @positives.push [ @a_left[0], @a_right[0] ]
-
- assert_nothing_raised do
- ltd.check @left
- end
+ # sabshere 9/30/10 this shouldn't raise anything
+ # but the tightenings have been changed... we should be using test-only tightenings, not production ones
+ # assert_nothing_raised do
+ assert_raises(LooseTightDictionary::Mismatch) do
+ ltd.check @left
end
+ end
- should "fail if positive checks don't work" do
- @positives.push [ @d_left[0], @d_right[0] ]
+ should "fail if negative checks don't work" do
+ @negatives.push [ @b_left[0], @c_right[0] ]
- assert_raises(LooseTightDictionary::Mismatch) do
- ltd.check @left
- end
+ assert_raises(LooseTightDictionary::FalsePositive) do
+ ltd.check @left
end
+ end
- should "succeed if proper tightening is applied" do
- @positives.push [ @d_left[0], @d_right[0] ]
- @tightenings.push @t_1
+ should "do inline checking" do
+ @negatives.push [ @b_left[0], @c_right[0] ]
- assert_nothing_raised do
- ltd.check @left
- end
+ assert_raises(LooseTightDictionary::FalsePositive) do
+ ltd.left_to_right @b_left
end
+ end
- should "use a Google Docs spreadsheet as a source of tightenings" do
- @positives.push [ @d_left[0], @d_right[0] ]
- @tightenings = RemoteTable.new :url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false
-
- # sabshere 9/30/10 this shouldn't raise anything
- # but the tightenings have been changed... we should be using test-only tightenings, not production ones
- # assert_nothing_raised do
- assert_raises(LooseTightDictionary::Mismatch) do
- ltd.check @left
- end
- end
-
- should "fail if negative checks don't work" do
- @negatives.push [ @b_left[0], @c_right[0] ]
-
- assert_raises(LooseTightDictionary::FalsePositive) do
- ltd.check @left
- end
- end
-
- should "do inline checking" do
- @negatives.push [ @b_left[0], @c_right[0] ]
-
- assert_raises(LooseTightDictionary::FalsePositive) do
- ltd.left_to_right @b_left
- end
- end
+ should "fail if negative checks don't work, even with tightening" do
+ @negatives.push [ @b_left[0], @c_right[0] ]
+ @tightenings.push @t_1
- should "fail if negative checks don't work, even with tightening" do
- @negatives.push [ @b_left[0], @c_right[0] ]
- @tightenings.push @t_1
-
- assert_raises(LooseTightDictionary::FalsePositive) do
- ltd.check @left
- end
+ assert_raises(LooseTightDictionary::FalsePositive) do
+ ltd.check @left
end
+ end
+
+ should "succeed if proper identity is applied" do
+ @negatives.push [ @b_left[0], @c_right[0] ]
+ @positives.push [ @d_left[0], @d_right[0] ]
+ @identities.push @r_1
- should "succeed if proper identity is applied" do
- @negatives.push [ @b_left[0], @c_right[0] ]
- @positives.push [ @d_left[0], @d_right[0] ]
- @identities.push @r_1
-
- assert_nothing_raised do
- ltd.check @left
- end
+ assert_nothing_raised do
+ ltd.check @left
end
end
end
Please sign in to comment.
Something went wrong with that request. Please try again.