-
Notifications
You must be signed in to change notification settings - Fork 60
Closed
Description
Your use of keyby instead of by is resulting an index vector which is sorted by x. But it's not what you want, since dplyr doesn't sort by it's grouping columns.
library(testthat)
library(dtplyr)
library(dplyr,warn.conflicts=FALSE)
library(data.table,warn.conflicts=FALSE)
test_that("can filter when grouped, x not sorted", {
dt1 <- lazy_dt(data.table(x = c(3,3,1, 2,1, 2), y = c(3,0,1, 2, 2, 4)), "DT")
dt2 <- dt1 %>% group_by(x) %>% filter(sum(y) == 3) %>% as_tibble()
dt3 <- dt1 %>% as_tibble() %>% group_by(x) %>% filter(sum(y) == 3) %>% ungroup()
expect_equal(dt3, tibble(x = c(3, 3, 1 ,1), y = c(3, 0, 1, 2)))
expect_equal(dt2, tibble(x = c(3, 3, 1, 1), y = c(3, 0, 1, 2)))
})
#> Error: Test failed: 'can filter when grouped'
#> * <text>:10: `dt2` not equal to tibble(x = c(3, 3, 1, 1), y = c(3, 0, 1, 2)).
#> Component "x": Mean relative difference: 1
#> Component "y": Mean relative difference: 1.333333Consider:
library(dtplyr)
library(dplyr,warn.conflicts=FALSE)
library(data.table,warn.conflicts=FALSE)
dt1 <- lazy_dt(data.table(x = c(3,3,1, 2,1, 2), y = c(3,0,1, 2, 2, 4)), "DT")
dt2 <- dt1 %>% group_by(x) %>% filter(sum(y) == 3)
show_query(dt2)
#> DT[DT[, .I[sum(y) == 3], keyby = .(x)]$V1]
dt1 <- as.data.table(dt1)
dt1[, .I[sum(y)==3], keyby="x"]$V1
#> [1] 3 5 1 2
dt1[, .I[sum(y)==3], by="x"]$V1
#> [1] 1 2 3 5Originally posted by @myoung3 in #177 (comment)
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels