Skip to content

Commit

Permalink
fixed #17 which reappeared when I fixed #46
Browse files Browse the repository at this point in the history
  • Loading branch information
markvanderloo committed Sep 9, 2016
1 parent 9978434 commit 10b1be2
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pkg/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Imports:
parallel
URL: https://github.com/markvanderloo/stringdist
BugReports: https://github.com/markvanderloo/stringdist/issues
Date: 2015-12-30
Date: 2016-09-09
Suggests:
testthat
RoxygenNote: 5.0.1
2 changes: 2 additions & 0 deletions pkg/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ version 0.9.4.2
(thanks to Max Fritsche)
- bugfix in stringdistmatrix(a): Would segfault on q-gram w/input > ~7k strings
and q>1 (thanks to Connor McKay)
- bugfix in jaccard distance: distance not always correct when passing multiple
strings (thanks to Robert Carlson)

version 0.9.4.1
- stringdistmatrix(a) now outputs long vectors (issue #45, thanks to Wouter
Expand Down
6 changes: 5 additions & 1 deletion pkg/src/Rstringdist.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,13 @@ SEXP R_amatch(SEXP x, SEXP table, SEXP method
len_T = T->str_len[j];
if (len_X != NA_INTEGER && len_T != NA_INTEGER ){ // both are char (usual case)
d = stringdist(sd, str, len_X, *tab, len_T);
//Rprintf("d = %8.4f ",d);
if ( d <= maxDist && d < d1){
index = j + 1;
if ( ABS(d) < 1e-14 ) break; // exact match
if ( fabs(d) < 1e-14 ){
// Rprintf(" helleu!\n");
break; // exact match
}
d1 = d;
}
} else if ( len_X == NA_INTEGER && len_T == NA_INTEGER ) { // both are NA
Expand Down
22 changes: 18 additions & 4 deletions pkg/src/qgram.c
Original file line number Diff line number Diff line change
Expand Up @@ -305,15 +305,28 @@ static void getjaccard(qtree *Q, double *d){
++d[0];
}
// denominator: |x V y|
++d[1];
if ( Q->n[0] > 0 || Q->n[1] > 0){
++d[1];
}
// clean up and continue
Q->n[0] = 0;
Q->n[1] = 0;
getjaccard(Q->left,d);
getjaccard(Q->right,d);
}


/* for testing purposes only
static void print_qtree(qtree *Q, int q){
if (Q==NULL) return;
Rprintf("q=%d ",q);
Rprintf("qgram = {");
for(int i = 0; i < q; i++)
Rprintf("%03d ",Q->qgram[i]);
Rprintf("}");
Rprintf("n = [%2.0f %2.0f]\n", Q->n[0], Q->n[1]);
print_qtree(Q->left,q);
print_qtree(Q->right,q);
}*/

/*Get qgram distances
* Input
Expand Down Expand Up @@ -356,7 +369,8 @@ double qgram_dist(
*Qp = push_string(t, y, q, *Qp, 1, 2);
if (*Qp == NULL) return -2.0;

qtree *Q = *Qp;

qtree *Q = *Qp;
switch ( distance ){
case 0:
getdist(Q,dist);
Expand All @@ -373,7 +387,7 @@ double qgram_dist(
}
break;
case 2:
getjaccard(Q,dist);
getjaccard(*Qp,dist);
dist[0] = 1.0 - dist[0]/dist[1];
break;
default:
Expand Down
5 changes: 2 additions & 3 deletions pkg/src/stringdist.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,11 @@ double stringdist(Stringdist *S, unsigned int *str_a, int len_a, unsigned int *s
case lcs :
return lcs_dist(str_a, len_a, str_b, len_b, S->work);
case qgram :
return qgram_dist(str_a, len_a, str_b, len_b, S->q, &(S->tree), 0L);
return qgram_dist(str_a, len_a, str_b, len_b, S->q, &S->tree, 0L);
case cosine :
return qgram_dist(str_a, len_a, str_b, len_b, S->q, &S->tree, 1L);
case jaccard :
d = qgram_dist(str_a, len_a, str_b, len_b, S->q, &S->tree, 2L);
break;
return qgram_dist(str_a, len_a, str_b, len_b, S->q, &S->tree, 2L);
case jw :
return jaro_winkler_dist(str_a, len_a, str_b, len_b, S->p, S->weight, S->work);
case soundex :
Expand Down

0 comments on commit 10b1be2

Please sign in to comment.