Skip to content

Commit

Permalink
added some tests
Browse files Browse the repository at this point in the history
  • Loading branch information
markvanderloo committed Jul 16, 2020
1 parent eb07072 commit 692095c
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 9 deletions.
13 changes: 8 additions & 5 deletions pkg/R/afind.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@
#' The functions \code{grab} and \code{grabl} are approximate string matching
#' functions that mimic base R's \code{\link[base]{grep}} and
#' \code{\link[base:grep]{grepl}}. They are implemented as convenience wrappers
#' of \code{find}.
#' of \code{find}. For \code{grabl} there is one difference with \code{grepl}.
#' The result of \code{grepl("foo",NA)} is \code{FALSE}, which seems inconsistent
#' with \code{grepl(NA, NA)} and \code{grepl(NA, "foo")} which return \code{NA}.
#' \code{grabl} returns \code{NA} when either \code{pattern} or \code{x} is \code{NA}.
#'
#' @section Running cosine distance:
#' This algorithm gains efficiency by using that two consecutive windows have
Expand Down Expand Up @@ -174,16 +177,16 @@ grab <- function(x, pattern, maxDist=Inf, value=FALSE, ...){
grabl <- function(x, pattern, maxDist=Inf, ...){
stopifnot(is.numeric(maxDist), maxDist >= 0, length(pattern) == 1)
L <- afind(x, pattern, value=FALSE, ...)
L$distance <= maxDist
as.logical(L$distance <= maxDist)
}


#' @rdname afind
#'
#' @return
#' For \code{extract}, a \code{character} vector of \code{length(x)}, with
#' \code{NA} where no match was found and the first matched string if there is
#' a match. (similar to \code{stringr::str_extract}).
#' For \code{extract}, a \code{character} matrix with \code{length(x)} rows and
#' \code{length(pattern)} columns. If match was found, element \eqn{(i,j)}
#' contains the match, otherwise it is set to \code{NA}.
#' @export
extract <- function(x, pattern, maxDist = Inf, ...){
stopifnot(is.numeric(maxDist), maxDist >= 0, length(pattern) == 1)
Expand Down
27 changes: 25 additions & 2 deletions pkg/inst/tinytest/test_afind.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ expect_equal(length(out2), 2)
expect_equal(grab(texts, "harvester", maxDist=2), 2)
expect_equal(grab(texts, "harvester", value=TRUE, maxDist=2), "harverste")
expect_equal(grabl(texts, "harvester", maxDist=2)
, matrix(c(FALSE,TRUE,FALSE,FALSE),nrow=4))
, c(FALSE,TRUE,FALSE,FALSE))

expect_equal(extract(texts, "harvester", maxDist=2)
, matrix(c(NA, "harverste",NA,NA), nrow=4) )
, matrix(c(NA, "harverste",NA,NA),nrow=4) )

## Test running_cosine
pattern <- c("phish", "want to")
Expand All @@ -76,7 +76,30 @@ for ( method in methods ){
expect_equal(afind(text, pattern, method=method, q=3, p=0.1)$location[1,1], 19, info=method)
}

## test the usual edge cases

# notice: window size = 0.
expect_equal(afind("foo","")$distance[1], 0)

expect_equal(afind("foo",NA)$distance[1], NA_real_)
expect_equal(afind("foo",NA)$location[1], NA_integer_)
expect_equal(afind("foo",NA)$match[1], NA_character_)

expect_equal(afind(NA,"foo")$distance[1], NA_real_)
expect_equal(afind(NA,"foo")$location[1], NA_integer_)
expect_equal(afind(NA,"foo")$match[1], NA_character_)

expect_equal(afind("","foo")$distance[1], 3)
expect_equal(afind("","foo")$location[1], 1)
expect_equal(afind("","foo")$match[1], "")

expect_equal(grab("foo", ""), 1L)
expect_equal(grabl("foo",""), TRUE)
expect_equal(grab("foo",NA), integer(0))

# note that 'grepl' gives FALSE in this case (which is inconsistent with
# grepl(NA, NA), grepl(NA, "foo").
expect_equal(grabl("foo",NA), NA)



Expand Down
2 changes: 1 addition & 1 deletion pkg/src/Rstringdist.c
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ SEXP R_afind(SEXP a, SEXP pattern, SEXP width
if (isna_s || isna_t){ // something to search in, or find?
yloc[offset + i] = NA_INTEGER;
ydist[offset + i] = NA_REAL;
} else if ( current_window >= len_s ){ // is the text shorter than the pattern?
} else if ( current_window >= len_s ){ // is the text shorter than the window?
yloc[offset + i] = 1L;
ydist[offset + i] = stringdist(sd, s, len_s, t, len_t);
} else { // slide window over text and compute distances
Expand Down
2 changes: 1 addition & 1 deletion pkg/src/qgram.c
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ double running_cosine_dist(
double *store
){

double d, ww, wp, pp;
double d;

unsigned int *first_qgram;
unsigned int *last_qgram;
Expand Down

0 comments on commit 692095c

Please sign in to comment.