Skip to content

Commit

Permalink
solved some weight bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
markvanderloo committed Oct 26, 2015
1 parent 93e808d commit 93d0d4c
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 9 deletions.
8 changes: 4 additions & 4 deletions pkg/src/dl.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,12 @@ double dl_dist(

unsigned int swapCount, targetCharCount,i,j;
double delScore, insScore, subScore, swapScore;
unsigned int score_ceil = x + y;
double score_ceil = x + y;

/* intialize matrix start values */
scores[0] = score_ceil;
scores[1 * (y + 2) + 0] = score_ceil;
scores[0 * (y + 2) + 1] = score_ceil;
scores[1 * (y + 2) + 0] = weight[0]; //score_ceil;
scores[0 * (y + 2) + 1] = weight[1]; //score_ceil;
scores[1 * (y + 2) + 1] = 0;

uniquePush(dict,src[0]);
Expand All @@ -142,7 +142,7 @@ double dl_dist(
for(j=1;j<=y;j++){
if(i == 1) {
uniquePush(dict,tgt[j]);
scores[1 * (y + 2) + (j + 1)] = j * weight[0];
scores[1 * (y + 2) + (j + 1)] = j * weight[1];
scores[0 * (y + 2) + (j + 1)] = score_ceil;
}
targetCharCount = dict->value[which(dict, tgt[j-1])];
Expand Down
9 changes: 6 additions & 3 deletions pkg/src/lv.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <omp.h>
#endif


/* Levenshtein distance
* Computes Levenshtein distance
* - Simplified from restricted DL pseudocode at http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
Expand All @@ -44,24 +45,26 @@ double lv_dist(
double sub;

for ( i = 0; i < I; ++i ){
scores[i] = i * weight[1];
scores[i] = i * weight[0];
}
for ( j = 1; j < J; ++j, L += I ){
scores[L] = j * weight[0];
scores[L] = j * weight[1];
}


int M;
for ( i = 1; i <= na; ++i ){
L = I; M= 0;
for ( j = 1; j <= nb; ++j, L += I, M += I ){
sub = (a[i-1] == b[j-1]) ? 0 : weight[2];
scores[i + L] = MIN(MIN(
scores[i + I*j] = MIN(MIN(
scores[i-1 + L] + weight[0], // deletion
scores[i + M] + weight[1]), // insertion
scores[i-1 + M] + sub // substitution
);
}
}

double score = scores[I*J-1];
return score;
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/src/osa.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ double osa_dist(unsigned int *a, int na, unsigned int *b, int nb, double *weight
double sub, tran;

for ( i = 0; i < I; ++i ){
scores[i] = i * weight[1];
scores[i] = i * weight[0];
}
for ( j = 1; j < J; ++j, L += I ){
scores[L] = j * weight[0];
scores[L] = j * weight[1];
}

for ( i = 1; i <= na; ++i ){
Expand Down
8 changes: 8 additions & 0 deletions pkg/tests/testthat/testStringdist.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ test_that("weights are handled correctly",{
expect_equal(
stringdist("ABC", "BC", method = "dl", weight = c(i=.1, d=.1, s=1,t=.1)),.1
)
# examples from the paper; Tanks to Nathalia Potocka for reporting.
expect_equal(stringdist("leia","leela",method="lv",weight=c(i=.1,d=1,s=1)),1.1)
expect_equal(stringdist("leia","leela",method="lv",weight=c(i=1,d=.1,s=1)),2)
expect_equal(stringdist("a","b",method="lv",weight=c(i=.1,d=1,s=.3)),.3)
expect_equal(stringdist("a","b",method="osa",weight=c(i=.1,d=1,s=.3,1)),.3)
expect_equal(stringdist("a","b",method="dl",weight=c(i=.1,d=1,s=.3,t=1)),.3)
expect_equal(stringdist("leia","leela",method="dl",weight=c(i=1,d=.1,s=1,t=1)),2)

})

test_that("NA's are handled correctly",{
Expand Down

0 comments on commit 93d0d4c

Please sign in to comment.