Skip to content

Commit

Permalink
Fix tests for ngram/skip_ngram
Browse files Browse the repository at this point in the history
  • Loading branch information
juliasilge committed May 2, 2016
1 parent 986570c commit 66c532b
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions tests/testthat/test-unnest-tokens.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ test_that("tokenizing by sentence works", {
expect_equal(d$sentence[1], "i'm nobody!")
})

test_that("tokenizing by ngram works", {
test_that("tokenizing by ngram and skip ngram works", {
d2 <- data_frame(txt = c("Hope is the thing with feathers -",
"That perches in the soul -",
"And sings the tune without the words -",
Expand All @@ -45,19 +45,20 @@ test_that("tokenizing by ngram works", {
"And on the strangest Sea -",
"Yet - never - in Extremity,",
"It asked a crumb - of me."))

# tokenize by ngram
d <- d2 %>% unnest_tokens(ngram, txt, token = "ngrams", n = 2)
expect_equal(nrow(d), 57)
expect_equal(ncol(d), 1)
expect_equal(d$ngram[1], "hope is")
expect_equal(d$ngram[10], "and sings")
})

test_that("tokenizing by skip ngram works", {
# tokenize by skip_ngram
d <- d2 %>% unnest_tokens(ngram, txt, token = "skip_ngrams", n = 4, k = 2)
expect_equal(nrow(d), 36)
expect_equal(ncol(d), 1)
expect_equal(d$ngram[1], "hope is the thing")
expect_equal(d$ngram[10], "tune without the words")
})

})

0 comments on commit 66c532b

Please sign in to comment.