Skip to content

Commit

Permalink
bugfix in weighted edit-based distances
Browse files Browse the repository at this point in the history
  • Loading branch information
markvanderloo committed Jul 14, 2015
1 parent 788cf5d commit 0a960c4
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 7 deletions.
3 changes: 3 additions & 0 deletions pkg/NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
version 0.9.3
- bugfix in dl, lv, osa distance: weights were not taken into account properly (thanks to Zach Price)

version 0.9.2
- Update fixing some errors (missing documentation, tests) in the 0.9.1 release.
- Fixed a few possible memory leaks.
Expand Down
1 change: 1 addition & 0 deletions pkg/src/Rstringdist.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ SEXP R_amatch(SEXP x, SEXP table, SEXP method
, p
, q
);

double d = R_PosInf, d1 = R_PosInf;
int index, len_X, len_T;
unsigned int *str;
Expand Down
4 changes: 2 additions & 2 deletions pkg/src/dl.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,14 +135,14 @@ double dl_dist(
/* j = tgt index */
for(i=1;i<=x;i++){
uniquePush(dict,src[i]);
scores[(i+1) * (y + 2) + 1] = i;
scores[(i+1) * (y + 2) + 1] = i * weight[0];
scores[(i+1) * (y + 2) + 0] = score_ceil;
swapCount = 0;

for(j=1;j<=y;j++){
if(i == 1) {
uniquePush(dict,tgt[j]);
scores[1 * (y + 2) + (j + 1)] = j;
scores[1 * (y + 2) + (j + 1)] = j * weight[0];
scores[0 * (y + 2) + (j + 1)] = score_ceil;
}
targetCharCount = dict->value[which(dict, tgt[j-1])];
Expand Down
5 changes: 2 additions & 3 deletions pkg/src/lv.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,11 @@ double lv_dist(
int I = na+1, L = na+1, J = nb+1;
double sub;


for ( i = 0; i < I; ++i ){
scores[i] = i;
scores[i] = i * weight[0];
}
for ( j = 1; j < J; ++j, L += I ){
scores[L] = j;
scores[L] = j * weight[0];
}

int M;
Expand Down
4 changes: 2 additions & 2 deletions pkg/src/osa.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ double osa_dist(unsigned int *a, int na, unsigned int *b, int nb, double *weight
double sub, tran;

for ( i = 0; i < I; ++i ){
scores[i] = i;
scores[i] = i * weight[0];
}
for ( j = 1; j < J; ++j, L += I ){
scores[L] = j;
scores[L] = j * weight[0];
}

for ( i = 1; i <= na; ++i ){
Expand Down
19 changes: 19 additions & 0 deletions pkg/tests/testthat/testStringdist.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,25 @@ test_that("weights are handled correctly",{
stringdist("abc","ac",method='osa',weight=c(0.5,1,1,1)),
stringdist("ac","abc",method='osa',weight=c(1,0.5,1,1))
)
expect_equal(
stringdist("ABC", "BC", method = "lv", weight = c(i=.1, d=.1, s=.1)),.1
)
expect_equal(
stringdist("ABC", "BC", method = "lv", weight = c(i=.1, d=.1, s=1)),.1
)

expect_equal(
stringdist("ABC", "BC", method = "osa", weight = c(i=.1, d=.1, s=.1,t=.1)),.1
)
expect_equal(
stringdist("ABC", "BC", method = "osa", weight = c(i=.1, d=.1, s=1,t=.1)),.1
)
expect_equal(
stringdist("ABC", "BC", method = "dl", weight = c(i=.1, d=.1, s=.1,t=.1)),.1
)
expect_equal(
stringdist("ABC", "BC", method = "dl", weight = c(i=.1, d=.1, s=1,t=.1)),.1
)
})

test_that("NA's are handled correctly",{
Expand Down

0 comments on commit 0a960c4

Please sign in to comment.