From a8eeb1e6ee574eccab39883d0c641c095dbe8b4a Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Fri, 23 Jul 2021 16:02:51 -0700 Subject: [PATCH 1/5] Add between to dt_funs --- R/tidyeval.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tidyeval.R b/R/tidyeval.R index aaf7d9cae..d1bec1952 100644 --- a/R/tidyeval.R +++ b/R/tidyeval.R @@ -14,7 +14,7 @@ dt_eval <- function(x) { # Make sure data.table functions are available so dtplyr still works # even when data.table isn't attached dt_funs <- c( - "CJ", "copy", "dcast", "melt", "nafill", + "between", "CJ", "copy", "dcast", "melt", "nafill", "fcase", "fcoalesce", "fifelse", "fintersect", "frank", "frankv", "fsetdiff", "funion", "setcolorder", "setnames", "tstrsplit" ) From b9626b3d9c44dca63e58dd4636c569e8946460a2 Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Fri, 23 Jul 2021 16:03:45 -0700 Subject: [PATCH 2/5] Don't grab extra rows in slice --- R/step-subset-slice.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/step-subset-slice.R b/R/step-subset-slice.R index 51cf0ba1b..358eb85f6 100644 --- a/R/step-subset-slice.R +++ b/R/step-subset-slice.R @@ -56,12 +56,15 @@ slice.dtplyr_step <- function(.data, ...) { if (length(dots) == 0) { i <- NULL - } else if (length(dots) == 1) { - i <- dots[[1]] } else { - i <- call2("c", !!!dots) + if (length(dots) == 1) { + .rows <- dots[[1]] + } else { + .rows <- call2("c", !!!dots) + } + between <- call2("between", .rows, quote(-.N), quote(.N)) + i <- call2("[", .rows, between) } - i <- expr(!!i) step_subset_i(.data, i) } From bcd4a1c754791ab210c90b09b2baca78297e9523 Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Fri, 23 Jul 2021 16:04:01 -0700 Subject: [PATCH 3/5] Add test for slice with excess rows --- tests/testthat/test-step-subset-slice.R | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-step-subset-slice.R b/tests/testthat/test-step-subset-slice.R index bd84669ee..c26cc19a3 100644 --- a/tests/testthat/test-step-subset-slice.R +++ b/tests/testthat/test-step-subset-slice.R @@ -7,12 +7,12 @@ test_that("can slice", { expr(DT) ) expect_equal( - dt %>% slice(1:4) %>% show_query(), - expr(DT[1:4]) + dt %>% slice(c(1, 2)) %>% show_query(), + expr(DT[c(1, 2)[between(c(1, 2), -.N, .N)]]) ) expect_equal( dt %>% slice(1, 2, 3) %>% show_query(), - expr(DT[c(1, 2, 3)]) + expr(DT[c(1, 2, 3)[between(c(1, 2, 3), -.N, .N)]]) ) }) @@ -22,7 +22,7 @@ test_that("can slice when grouped", { expect_equal( dt2 %>% show_query(), - expr(DT[DT[, .I[1], by = .(x)]$V1]) + expr(DT[DT[, .I[1[between(1, -.N, .N)]], by = .(x)]$V1]) ) expect_equal(as_tibble(dt2), tibble(x = c(1, 2), y = c(1, 3))) }) @@ -35,6 +35,14 @@ test_that("slicing doesn't sorts groups", { ) }) +test_that("doesn't return excess rows, #10", { + dt <- lazy_dt(data.table(x = 1:2)) + expect_equal( + dt %>% slice(1:3) %>% pull(x), + 1:2 + ) +}) + # variants ---------------------------------------------------------------- test_that("functions silently truncate results", { From 630a94ee92d0ff5494f36dedc2d186e75188e4eb Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Fri, 23 Jul 2021 16:04:08 -0700 Subject: [PATCH 4/5] Add news bullet --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 1d91d6621..9a4930800 100644 --- a/NEWS.md +++ b/NEWS.md @@ -68,6 +68,8 @@ * Can use `T` to specify the default in `case_when()`, (#272). +* `slice()` no longer returns excess rows (#10). + * More translations for tidyr verbs have been added: * `drop_na()` (@markfairbanks, #194) From 1392c9d6e03aa730880ef19e641e84126e9949ba Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Mon, 26 Jul 2021 08:22:41 -0700 Subject: [PATCH 5/5] Note relevant data.table PR --- R/step-subset-slice.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/step-subset-slice.R b/R/step-subset-slice.R index 358eb85f6..1cf1ce343 100644 --- a/R/step-subset-slice.R +++ b/R/step-subset-slice.R @@ -62,6 +62,8 @@ slice.dtplyr_step <- function(.data, ...) { } else { .rows <- call2("c", !!!dots) } + # Update logic once data.table #4353 is merged + # https://github.com/Rdatatable/data.table/pull/4353 between <- call2("between", .rows, quote(-.N), quote(.N)) i <- call2("[", .rows, between) }