Skip to content

Commit

Permalink
- New add_count() and add_tally() for adding an n column within…
Browse files Browse the repository at this point in the history
… groups (#2078, @dgrtwo).

* Added draft of add_tally and add_count functions, which add an "n" column based on counting within groups.

* Added simple test cases for add_count and add_tally
  • Loading branch information
dgrtwo authored and krlmlr committed Dec 6, 2016
1 parent 5baf524 commit dabc82b
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 0 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ LazyData: yes
License: MIT + file LICENSE
Collate:
'RcppExports.R'
'add-tally.r'
'all-equal.r'
'bench-compare.r'
'bind.r'
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,13 @@ S3method(union_all,default)
S3method(union_all,tbl_lazy)
S3method(unique,sql)
export("%>%")
export(add_count)
export(add_count_)
export(add_op_single)
export(add_row)
export(add_rownames)
export(add_tally)
export(add_tally_)
export(all_equal)
export(anti_join)
export(arrange)
Expand Down
102 changes: 102 additions & 0 deletions R/add-tally.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#' Add a column counting or tallying observations within groups
#'
#' \code{add_tally} adds a column "n" to a table based on the number
#' of items within each existing group, while \code{add_count} is a shortcut that
#' does the grouping as well. These functions are to \code{\link{tally}}
#' and \code{\link{count}} as \code{\link{mutate}} is to \code{\link{summarise}}:
#' they add an additional column rather than collapsing each group.
#'
#' @param x a \code{tbl}.
#' @param wt (Optional) If omitted, will count the number of rows. Otherwise,
#' performs a weighted tally by summing the (non-missing) values of variable wt
#' @param sort Whether to sort the result in descending order of n
#' @param ...,vars Variables to group by.
#'
#' @details \code{add_count} counts within the current groups of the data when present,
#' and preserves those groups (it does not add the new ones).
#'
#' @examples
#'
#' add_tally(mtcars)
#' add_tally(group_by(mtcars, cyl))
#' add_tally(group_by(mtcars, cyl), sort = TRUE)
#'
#' add_count(mtcars, cyl)
#' add_count(mtcars, cyl, am)
#'
#' if (require("Lahman")) {
#' batting_tbl <- tbl_df(Batting)
#'
#' # get records of players who played in multiple stints in the same year
#' batting_tbl %>%
#' add_count(playerID, yearID) %>%
#' filter(n > 1)
#'
#' # get only players who played in more than three stints total
#' batting_tbl %>%
#' add_count(playerID) %>%
#' filter(n > 3)
#'
#' # get only players with at least 1000 ABs
#' batting_tbl %>%
#' add_count(playerID, wt = AB) %>%
#' filter(n >= 1000)
#' }
#'
#' @export
add_tally <- function(x, wt, sort = FALSE) {
if (missing(wt)) {
if ("n" %in% names(x)) {
message("Using n as weighting variable")
wt <- quote(n)
}
else {
wt <- NULL
}
}
else {
wt <- substitute(wt)
}
add_tally_(x, wt, sort = sort)
}


#' @rdname add_tally
#' @export
add_tally_ <- function(x, wt = NULL, sort = FALSE) {
g <- groups(x)
if (is.null(wt)) {
n <- quote(n())
}
else {
n <- lazyeval::interp(quote(sum(wt, na.rm = TRUE)), wt = wt)
}
n_name <- n_name(tbl_vars(x))
out <- mutate_(x, .dots = setNames(list(n), n_name))

if (sort) {
desc_n <- lazyeval::interp(quote(desc(n)), n = as.name(n_name))
out <- arrange_(out, desc_n)
}
group_by_(out, .dots = g)
}


#' @rdname add_tally
#' @export
add_count <- function(x, ..., wt = NULL, sort = FALSE) {
vars <- lazyeval::lazy_dots(...)
wt <- substitute(wt)
add_count_(x, vars, wt, sort = sort)
}


#' @rdname add_tally
#' @export
add_count_ <- function(x, vars, wt = NULL, sort = FALSE) {
g <- groups(x)
grouped <- group_by_(x, .dots = vars, add = TRUE)

ret <- add_tally_(grouped, wt = wt, sort = sort)
group_by_(ret, .dots = g)
}
68 changes: 68 additions & 0 deletions man/add_tally.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions tests/testthat/test-add-count.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
context("add_count")

test_that("can add counts of a variable called n", {
df <- data.frame(n = c(1, 1, 2, 2, 2))

out <- df %>% add_count(n)
expect_equal(names(out), c("n", "nn"))
expect_equal(out$n, df$n)
expect_equal(out$nn, c(2, 2, 3, 3, 3))

out <- df %>% add_count(n, sort = TRUE)
expect_equal(out$nn, c(3, 3, 3, 2, 2))
})

test_that("add_count respects and preserves existing groups", {
df <- data.frame(g = c(1, 2, 2, 2), val = c("b", "b", "b", "c"))
res <- df %>% add_count(val)
expect_equal(res$n, c(3, 3, 3, 1))
expect_null(groups(res))

res <- df %>% group_by(g) %>% add_count(val)
expect_equal(res$n, c(1, 2, 2, 1))
expect_equal(as.character(groups(res)), "g")
})
34 changes: 34 additions & 0 deletions tests/testthat/test-add-tally.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
context("add_tally")

test_that("can add tallies of a variable", {
df <- data.frame(a = c(1, 1, 2, 2, 2))

out <- df %>% group_by(a) %>% add_tally()
expect_equal(names(out), c("a", "n"))
expect_equal(out$a, df$a)
expect_equal(out$n, c(2, 2, 3, 3, 3))

out <- df %>% group_by(a) %>% add_tally(sort = TRUE)
expect_equal(out$n, c(3, 3, 3, 2, 2))
})

test_that("add_tally respects and preserves existing groups", {
df <- data.frame(g = c(1, 2, 2, 2), val = c("b", "b", "b", "c"))
res <- df %>% group_by(val) %>% add_tally()
expect_equal(res$n, c(3, 3, 3, 1))
expect_equal(as.character(groups(res)), "val")

res <- df %>% group_by(g, val) %>% add_tally()
expect_equal(res$n, c(1, 2, 2, 1))
expect_equal(as.character(groups(res)), c("g", "val"))
})

test_that("add_tally can be given a weighting variable", {
df <- data.frame(a = c(1, 1, 2, 2, 2), w = c(1, 1, 2, 3, 4))

out <- df %>% group_by(a) %>% add_tally(wt = w)
expect_equal(out$n, c(2, 2, 9, 9, 9))

out <- df %>% group_by(a) %>% add_tally(wt = w + 1)
expect_equal(out$n, c(4, 4, 12, 12, 12))
})

0 comments on commit dabc82b

Please sign in to comment.