Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New group_names() to return group names as character vector #2384

Merged
merged 11 commits into from
Feb 10, 2017
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ S3method(group_size,data.frame)
S3method(group_size,grouped_df)
S3method(group_size,rowwise_df)
S3method(group_size,tbl_sql)
S3method(group_vars,default)
S3method(group_vars,tbl_cube)
S3method(group_vars,tbl_lazy)
S3method(groups,data.frame)
S3method(groups,grouped_df)
S3method(groups,tbl_cube)
Expand Down Expand Up @@ -382,6 +385,7 @@ export(group_by_prepare)
export(group_indices)
export(group_indices_)
export(group_size)
export(group_vars)
export(grouped_df)
export(groups)
export(has_lahman)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

* Fix `group_by()` for data frames that have UTF-8 encoded names (#2284, #2382).

* New `group_vars()` generic that returns the grouping as character vector, to avoid the potentially lossy conversion to language symbol. The list returned by `group_by_prepare()` now has a new `group_names` component (#1950).

* Fix `copy_to()` for MySQL if a character column contains `NA` (#1975, #2256, #2263, #2381, @demorenoc, @eduardgrebe).

* Fix `group_size()` and `n_groups()` for MySQL (#2381).
Expand Down
10 changes: 5 additions & 5 deletions R/add-tally.r
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ add_tally <- function(x, wt, sort = FALSE) {
#' @rdname add_tally
#' @export
add_tally_ <- function(x, wt = NULL, sort = FALSE) {
g <- groups(x)
g <- group_vars(x)
if (is.null(wt)) {
n <- quote(n())
} else {
Expand All @@ -75,7 +75,7 @@ add_tally_ <- function(x, wt = NULL, sort = FALSE) {
desc_n <- lazyeval::interp(quote(desc(n)), n = as.name(n_name))
out <- arrange_(out, desc_n)
}
group_by_(out, .dots = g)
grouped_df(out, g)
}


Expand All @@ -91,9 +91,9 @@ add_count <- function(x, ..., wt = NULL, sort = FALSE) {
#' @rdname add_tally
#' @export
add_count_ <- function(x, vars, wt = NULL, sort = FALSE) {
g <- groups(x)
g <- group_vars(x)
grouped <- group_by_(x, .dots = vars, add = TRUE)

ret <- add_tally_(grouped, wt = wt, sort = sort)
group_by_(ret, .dots = g)
out <- add_tally_(grouped, wt = wt, sort = sort)
grouped_df(out, g)
}
2 changes: 1 addition & 1 deletion R/colwise.R
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ colwise_ <- function(tbl, calls, vars) {
named_calls <- attr(calls, "has_names")
named_vars <- any(has_names(vars))

vars <- select_vars_(tbl_vars(tbl), vars, exclude = as.character(groups(tbl)))
vars <- select_vars_(tbl_vars(tbl), vars, exclude = group_vars(tbl))

out <- vector("list", length(vars) * length(calls))
dim(out) <- c(length(vars), length(calls))
Expand Down
2 changes: 1 addition & 1 deletion R/dataframe.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ add_rownames <- function(df, var = "rowname") {
#' @export
group_by_.data.frame <- function(.data, ..., .dots, add = FALSE) {
groups <- group_by_prepare(.data, ..., .dots = .dots, add = add)
grouped_df(groups$data, groups$groups)
grouped_df(groups$data, groups$group_names)
}

#' @export
Expand Down
33 changes: 22 additions & 11 deletions R/group-by.r
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,18 @@ group_by_prepare <- function(.data, ..., .dots, add = FALSE) {
# Once we've done the mutate, we no longer need lazy objects, and
# can instead just use symbols
new_groups <- lazyeval::auto_name(new_groups)
groups <- lapply(names(new_groups), as.name)
group_names <- names(new_groups)
if (add) {
groups <- c(groups(.data), groups)
group_names <- c(group_vars(.data), group_names)
}
groups <- groups[!duplicated(groups)]
group_names <- unique(group_names)

list(data = .data, groups = groups)
list(data = .data, groups = lapply(group_names, as.name), group_names = group_names)
}

#' Get/set the grouping variables for tbl.
#'
#' These functions do not perform non-standard evaluation, and so are useful
#' when programming against `tbl` objects. `ungroup()` is a convenient
#' inline way of removing existing grouping.
#'
#' @rdname group_by
#' @description `groups()` returns the current grouping
#' as a list of [name()].
#' @param x data [tbl()]
#' @param ... Additional arguments that maybe used by methods.
#' @export
Expand All @@ -121,14 +118,28 @@ groups <- function(x) {
UseMethod("groups")
}

#' @rdname group_by
#' @description `group_vars()` returns the current grouping
#' as a character vector.
#' @export
group_vars <- function(x) {
UseMethod("group_vars")
}

#' @export
group_vars.default <- function(x) {
deparse_names(groups(x))
}

#' @export
regroup <- function(x, value) {
.Deprecated("group_by_")
group_by_(x, .dots = value)
}

#' @rdname group_by
#' @description `ungroup()` removes an existing grouping.
#' @export
#' @rdname groups
ungroup <- function(x, ...) {
UseMethod("ungroup")
}
18 changes: 11 additions & 7 deletions R/grouped-df.r
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#'
#' @keywords internal
#' @param data a tbl or data frame.
#' @param vars a list of quoted variables.
#' @param vars a character vector or a list of [name()]
#' @param drop if `TRUE` preserve all factor levels, even those without
#' data.
#' @export
Expand All @@ -16,10 +16,12 @@ grouped_df <- function(data, vars, drop = TRUE) {
}
assert_that(
is.data.frame(data),
is.list(vars),
all(sapply(vars, is.name)),
(is.list(vars) && all(sapply(vars,is.name))) || is.character(vars),
is.flag(drop)
)
if (is.list(vars)) {
vars <- deparse_names(vars)
}
grouped_df_impl(data, unname(vars), drop)
}

Expand Down Expand Up @@ -53,6 +55,8 @@ n_groups.grouped_df <- function(x) {

#' @export
groups.grouped_df <- function(x) {
# Implement group_vars.grouped_df() instead if this assertion fails
stopifnot(is.list(attr(x, "vars")))
attr(x, "vars")
}

Expand All @@ -73,12 +77,12 @@ ungroup.grouped_df <- function(x, ...) {
`[.grouped_df` <- function(x, i, j, ...) {
y <- NextMethod()

group_vars <- vapply(groups(x), as.character, character(1))
group_names <- group_vars(x)

if (!all(group_vars %in% names(y))) {
if (!all(group_names %in% names(y))) {
tbl_df(y)
} else {
grouped_df(y, groups(x))
grouped_df(y, group_names)
}

}
Expand Down Expand Up @@ -107,7 +111,7 @@ select_.grouped_df <- function(.data, ..., .dots) {
}

ensure_grouped_vars <- function(vars, data, notify = TRUE) {
group_names <- vapply(groups(data), as.character, character(1))
group_names <- group_vars(data)
missing <- setdiff(group_names, vars)

if (length(missing) > 0) {
Expand Down
2 changes: 1 addition & 1 deletion R/rowwise.r
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ group_by_.rowwise_df <- function(.data, ..., .dots, add = FALSE) {
.data <- ungroup(.data)

groups <- group_by_prepare(.data, ..., .dots = .dots, add = add)
grouped_df(groups$data, groups$groups)
grouped_df(groups$data, groups$group_names)
}


Expand Down
12 changes: 7 additions & 5 deletions R/tbl-cube.r
Original file line number Diff line number Diff line change
Expand Up @@ -354,16 +354,18 @@ group_by_.tbl_cube <- function(.data, ..., .dots, add = FALSE) {
groups <- group_by_prepare(.data, ..., .dots = .dots, add = add)

# Convert symbols to indices
nms <- names(groups$data$dims)
nms_list <- as.list(setNames(seq_along(nms), nms))

groups$data$groups <- unlist(lapply(groups$groups, eval, nms_list))
groups$data$groups <- match(groups$group_names, names(groups$data$dims))
groups$data
}

#' @export
groups.tbl_cube <- function(x) {
lapply(x$dims, as.name)[x$group]
lapply(group_vars(x), as.name)
}

#' @export
group_vars.tbl_cube <- function(x) {
x$dims[x$group]
}

# mutate and summarise operate similarly need to evaluate variables in special
Expand Down
7 changes: 6 additions & 1 deletion R/tbl-lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ tbl_vars.tbl_lazy <- function(x) {

#' @export
groups.tbl_lazy <- function(x) {
lapply(op_grps(x$ops), as.name)
lapply(group_vars(x), as.name)
}

#' @export
group_vars.tbl_lazy <- function(x) {
op_grps(x$ops)
}

#' @export
Expand Down
6 changes: 5 additions & 1 deletion R/utils.r
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,11 @@ is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5) {

deparse_all <- function(x) {
deparse2 <- function(x) paste(deparse(x, width.cutoff = 500L), collapse = "")
vapply(x, deparse2, FUN.VALUE = character(1))
vapply(x, deparse2, FUN.VALUE = character(1L))
}

deparse_names <- function(x) {
vapply(x, deparse, FUN.VALUE = character(1L))
}

#' Provides comma-separated string out ot the parameters
Expand Down
24 changes: 24 additions & 0 deletions man/group_by.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/grouped_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 0 additions & 26 deletions man/groups.Rd

This file was deleted.

13 changes: 13 additions & 0 deletions tests/testthat/helper-groups.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
expect_groups <- function(df, groups, info = NULL) {
if (length(groups) == 0L) {
expect_null(groups(df), info = info)
expect_identical(group_vars(df), character(), info = info)
} else {
expect_identical(groups(df), lapply(enc2native(groups), as.name), info = info)
expect_identical(group_vars(df), groups, info = info)
}
}

expect_no_groups <- function(df) {
expect_groups(df, NULL)
}
4 changes: 2 additions & 2 deletions tests/testthat/test-add-count.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ test_that("add_count respects and preserves existing groups", {
df <- data.frame(g = c(1, 2, 2, 2), val = c("b", "b", "b", "c"))
res <- df %>% add_count(val)
expect_equal(res$n, c(3, 3, 3, 1))
expect_null(groups(res))
expect_no_groups(res)

res <- df %>% group_by(g) %>% add_count(val)
expect_equal(res$n, c(1, 2, 2, 1))
expect_equal(as.character(groups(res)), "g")
expect_groups(res, "g")
})
4 changes: 2 additions & 2 deletions tests/testthat/test-add-tally.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ test_that("add_tally respects and preserves existing groups", {
df <- data.frame(g = c(1, 2, 2, 2), val = c("b", "b", "b", "c"))
res <- df %>% group_by(val) %>% add_tally()
expect_equal(res$n, c(3, 3, 3, 1))
expect_equal(as.character(groups(res)), "val")
expect_groups(res, "val")

res <- df %>% group_by(g, val) %>% add_tally()
expect_equal(res$n, c(1, 2, 2, 1))
expect_equal(as.character(groups(res)), c("g", "val"))
expect_groups(res, c("g", "val"))
})

test_that("add_tally can be given a weighting variable", {
Expand Down
Loading