Skip to content

count() drops last group #356

@jmbarbone

Description

@jmbarbone

Only the last group is dropped when using count(). I think this could be in tally.dtplyr_step() where summarise() is called.

library(dplyr, warn.conflicts = FALSE)
library(dtplyr, warn.conflicts = FALSE)

x <- tibble(
  x = c(1, 1, 2, 2),
  y = c(1, 1, 1, 2)
)

# dplyr behavior ----------------------------------------------------------

# no groups
x %>% 
  count(x, y) %>% 
  mutate(z = mean(n))
#> # A tibble: 3 × 4
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2  1.33
#> 2     2     1     1  1.33
#> 3     2     2     1  1.33

# groups retained
x %>% 
  group_by(x, y) %>% 
  count() %>% 
  mutate(z = mean(n))
#> # A tibble: 3 × 4
#> # Groups:   x, y [3]
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2     2
#> 2     2     1     1     1
#> 3     2     2     1     1

# dtplyr behavior ---------------------------------------------------------

# y is dropped but not x (or others if available)
x %>% 
  lazy_dt() %>% 
  count(x, y) %>% 
  mutate(z = mean(n))
#> Source: local data table [3 x 4]
#> Groups: x
#> Call:   `_DT1`[, .(n = .N), keyby = .(x, y)][, `:=`(z = mean(n)), by = .(x)]
#> 
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2     2
#> 2     2     1     1     1
#> 3     2     2     1     1
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

# need explicit ungroup() to fix previous
x %>% 
  lazy_dt() %>% 
  count(x, y) %>% 
  ungroup() %>% 
  mutate(z = mean(n))
#> Source: local data table [3 x 4]
#> Call:   `_DT2`[, .(n = .N), keyby = .(x, y)][, `:=`(z = mean(n))]
#> 
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2  1.33
#> 2     2     1     1  1.33
#> 3     2     2     1  1.33
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

# y is dropped rather than retained
x %>% 
  lazy_dt() %>% 
  group_by(x, y) %>% 
  count() %>% 
  mutate(z = mean(n))
#> Source: local data table [3 x 4]
#> Groups: x
#> Call:   `_DT3`[, .(n = .N), keyby = .(x, y)][, `:=`(z = mean(n)), by = .(x)]
#> 
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2     2
#> 2     2     1     1     1
#> 3     2     2     1     1
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

Created on 2022-05-11 by the reprex package (v2.0.1)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions