Permalink
Browse files

Perl 6 port of the perl 5 module initial commits

  • Loading branch information...
0 parents commit 46c49de9a8ab4ddb143392b19b5b5f69f090d0ef @thundergnat committed Dec 5, 2010
Showing with 160 additions and 0 deletions.
  1. +46 −0 README
  2. +96 −0 lib/Text/Levenshtein.pm
  3. +18 −0 t/basic.t
46 README
@@ -0,0 +1,46 @@
+=begin pod
+
+This is a direct port of the Perl 5 version and should be close to 100% similar
+other then some Perl 6 idioms. I didn't port the fastdistance routine since the
+logic was buggy and the code to fix it made it the same speed as the regular
+distance routine.
+
+
+=head1 NAME
+
+Text::Levenshtein - An implementation of the Levenshtein edit distance
+
+=head1 SYNOPSIS
+
+ use Text::Levenshtein qw(distance);
+
+ print distance("foo","four");
+ # prints "2"
+
+ my @words=("four","foo","bar");
+ my @distances=distance("foo",@words);
+
+ print "@distances";
+ # prints "2 0 3"
+
+
+=head1 DESCRIPTION
+
+This module implements the Levenshtein edit distance. The Levenshtein edit
+distance is a measure of the degree of proximity between two strings. This
+distance is the number of substitutions, deletions or insertions ("edits")
+needed to transform one string into the other one (and vice versa). When two
+strings have distance 0, they are the same. A good point to start is:
+L< http://www.merriampark.com/ld.htm >
+
+=head1 AUTHOR
+
+Copyright 2002 Dree Mistrut <F<dree@friul.it>>
+perl6 port: 2010 Steve Schulze aka thundergnat
+L< http://perlmonks.org/?node=thundergnat >
+
+This package is free software and is provided "as is" without express or implied
+warranty. You can redistribute it and/or modify it under the same terms as Perl
+itself.
+
+=end pod
@@ -0,0 +1,96 @@
+module Text::Levenshtein;
+
+sub _min
+{
+ return $^a < $^b
+ ?? $^a < $^c ?? $^a !! $^c
+ !! $^b < $^c ?? $^b !! $^c;
+}
+
+
+sub distance ($s, *@t) is export
+{
+
+ my $n = $s.chars;
+ my @result;
+
+ for (@t) -> $t {
+ if ($s eq $t) {
+ push @result, 0;
+ next;
+ }
+ my @d;
+ my $cost=0;
+
+ my $m = $t.chars;
+ @result.push($m) and next unless $n;
+ @result.push($n) and next unless $m;
+
+ @d[0][0] = 0;
+ for (1 .. $n) -> $i {
+ @d[$i][0] = $i;
+ }
+ for (1 .. $m) -> $j {
+ @d[0][$j] = $j;
+ }
+
+ for (1 .. $n) -> $i {
+ my $s_i = $s.substr($i-1,1);
+ for (1 .. $m) -> $j {
+ @d[$i][$j] =
+ _min(@d[$i-1][$j]+1, @d[$i][$j-1]+1,
+ @d[$i-1][$j-1]+($s_i eq substr($t,$j-1,1) ?? 0 !! 1) )
+ }
+ }
+ push @result,@d[$n][$m];
+ }
+ return @result;
+}
+
+
+=begin pod
+=head1 NAME
+
+Text::Levenshtein - An implementation of the Levenshtein edit distance
+
+=head1 SYNOPSIS
+
+ use Text::Levenshtein qw(distance);
+
+ print distance("foo","four");
+ # prints "2"
+
+ my @words=("four","foo","bar");
+ my @distances=distance("foo",@words);
+
+ print "@distances";
+ # prints "2 0 3"
+
+
+=head1 DESCRIPTION
+
+This module implements the Levenshtein edit distance.
+The Levenshtein edit distance is a measure of the degree of proximity between two strings.
+This distance is the number of substitutions, deletions or insertions ("edits")
+needed to transform one string into the other one (and vice versa).
+When two strings have distance 0, they are the same.
+A good point to start is: <http://www.merriampark.com/ld.htm>
+
+#See also Text::LevenshteinXS on CPAN if you do not require a perl-only implementation. It
+#is extremely faster in nearly all cases.
+
+#See also Text::WagnerFischer on CPAN for a configurable edit distance, i.e. for
+#configurable costs (weights) for the edits.
+
+
+=head1 AUTHOR
+
+Copyright 2002 Dree Mistrut <F<dree@friul.it>>
+perl6 port: 2010 Steve Schulze
+
+This package is free software and is provided "as is" without express
+or implied warranty. You can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=end pod
+
@@ -0,0 +1,18 @@
+use Test;
+plan 9;
+
+use Text::Levenshtein;
+
+is(distance("foo","four"),2,"Correct distance foo four");
+is(distance("foo","foo"),0,"Correct distance foo foo");
+is(distance("cow","cat"),2,"Correct distance cow cat");
+is(distance("cat","moocow"),5,"Correct distance cat moocow");
+is(distance("cat","cowmoo"),5,"Correct distance cat cowmoo");
+is(distance("cow","moocow"),3,"Correct distance cow moocow");
+is(distance("sebastian","sebastien"),1,"Correct distance sebastian sebastien");
+is(distance("more","cowbell"),5,"Correct distance more cowbell");
+my @foo = distance("foo","four","foo","bar");
+my @bar = (2,0,3);
+is(@foo,@bar,"Array test: Correct distances foo four foo bar");
+
+

0 comments on commit 46c49de

Please sign in to comment.