tidyverse · krlmlr · Feb 10, 2017 · Jan 27, 2017 · Jan 27, 2017 · Jan 27, 2017
diff --git a/NAMESPACE b/NAMESPACE
@@ -119,6 +119,9 @@ S3method(group_size,data.frame)
 S3method(group_size,grouped_df)
 S3method(group_size,rowwise_df)
 S3method(group_size,tbl_sql)
+S3method(group_vars,default)
+S3method(group_vars,tbl_cube)
+S3method(group_vars,tbl_lazy)
 S3method(groups,data.frame)
 S3method(groups,grouped_df)
 S3method(groups,tbl_cube)
@@ -382,6 +385,7 @@ export(group_by_prepare)
 export(group_indices)
 export(group_indices_)
 export(group_size)
+export(group_vars)
 export(grouped_df)
 export(groups)
 export(has_lahman)

diff --git a/NEWS.md b/NEWS.md
@@ -6,6 +6,8 @@
 
 * Fix `group_by()` for data frames that have UTF-8 encoded names (#2284, #2382).
 
+* New `group_vars()` generic that returns the grouping as character vector, to avoid the potentially lossy conversion to language symbol. The list returned by `group_by_prepare()` now has a new `group_names` component (#1950).
+
 * Fix `copy_to()` for MySQL if a character column contains `NA` (#1975, #2256, #2263, #2381, @demorenoc, @eduardgrebe).
 
 * Fix `group_size()` and `n_groups()` for MySQL (#2381).

diff --git a/R/add-tally.r b/R/add-tally.r
@@ -62,7 +62,7 @@ add_tally <- function(x, wt, sort = FALSE) {
 #' @rdname add_tally
 #' @export
 add_tally_ <- function(x, wt = NULL, sort = FALSE) {
-  g <- groups(x)
+  g <- group_vars(x)
   if (is.null(wt)) {
     n <- quote(n())
   } else {
@@ -75,7 +75,7 @@ add_tally_ <- function(x, wt = NULL, sort = FALSE) {
     desc_n <- lazyeval::interp(quote(desc(n)), n = as.name(n_name))
     out <- arrange_(out, desc_n)
   }
-  group_by_(out, .dots = g)
+  grouped_df(out, g)
 }
 
 
@@ -91,9 +91,9 @@ add_count <- function(x, ..., wt = NULL, sort = FALSE) {
 #' @rdname add_tally
 #' @export
 add_count_ <- function(x, vars, wt = NULL, sort = FALSE) {
-  g <- groups(x)
+  g <- group_vars(x)
   grouped <- group_by_(x, .dots = vars, add = TRUE)
 
-  ret <- add_tally_(grouped, wt = wt, sort = sort)
-  group_by_(ret, .dots = g)
+  out <- add_tally_(grouped, wt = wt, sort = sort)
+  grouped_df(out, g)
 }
diff --git a/R/colwise.R b/R/colwise.R
@@ -201,7 +201,7 @@ colwise_ <- function(tbl, calls, vars) {
   named_calls <- attr(calls, "has_names")
   named_vars <- any(has_names(vars))
 
-  vars <- select_vars_(tbl_vars(tbl), vars, exclude = as.character(groups(tbl)))
+  vars <- select_vars_(tbl_vars(tbl), vars, exclude = group_vars(tbl))
 
   out <- vector("list", length(vars) * length(calls))
   dim(out) <- c(length(vars), length(calls))

diff --git a/R/dataframe.R b/R/dataframe.R
@@ -48,7 +48,7 @@ add_rownames <- function(df, var = "rowname") {
 #' @export
 group_by_.data.frame <- function(.data, ..., .dots, add = FALSE) {
   groups <- group_by_prepare(.data, ..., .dots = .dots, add = add)
-  grouped_df(groups$data, groups$groups)
+  grouped_df(groups$data, groups$group_names)
 }
 
 #' @export

diff --git a/R/group-by.r b/R/group-by.r
@@ -95,21 +95,18 @@ group_by_prepare <- function(.data, ..., .dots, add = FALSE) {
   # Once we've done the mutate, we no longer need lazy objects, and
   # can instead just use symbols
   new_groups <- lazyeval::auto_name(new_groups)
-  groups <- lapply(names(new_groups), as.name)
+  group_names <- names(new_groups)
   if (add) {
-    groups <- c(groups(.data), groups)
+    group_names <- c(group_vars(.data), group_names)
   }
-  groups <- groups[!duplicated(groups)]
+  group_names <- unique(group_names)
 
-  list(data = .data, groups = groups)
+  list(data = .data, groups = lapply(group_names, as.name), group_names = group_names)
 }
 
-#' Get/set the grouping variables for tbl.
-#'
-#' These functions do not perform non-standard evaluation, and so are useful
-#' when programming against `tbl` objects. `ungroup()` is a convenient
-#' inline way of removing existing grouping.
-#'
+#' @rdname group_by
+#' @description `groups()` returns the current grouping
+#'   as a list of [name()].
 #' @param x data [tbl()]
 #' @param ... Additional arguments that maybe used by methods.
 #' @export
@@ -121,14 +118,28 @@ groups <- function(x) {
   UseMethod("groups")
 }
 
+#' @rdname group_by
+#' @description `group_vars()` returns the current grouping
+#'   as a character vector.
+#' @export
+group_vars <- function(x) {
+  UseMethod("group_vars")
+}
+
+#' @export
+group_vars.default <- function(x) {
+  deparse_names(groups(x))
+}
+
 #' @export
 regroup <- function(x, value) {
   .Deprecated("group_by_")
   group_by_(x, .dots = value)
 }
 
+#' @rdname group_by
+#' @description `ungroup()` removes an existing grouping.
 #' @export
-#' @rdname groups
 ungroup <- function(x, ...) {
   UseMethod("ungroup")
 }
diff --git a/R/grouped-df.r b/R/grouped-df.r
@@ -6,7 +6,7 @@
 #'
 #' @keywords internal
 #' @param data a tbl or data frame.
-#' @param vars a list of quoted variables.
+#' @param vars a character vector or a list of [name()]
 #' @param drop if `TRUE` preserve all factor levels, even those without
 #'   data.
 #' @export
@@ -16,10 +16,12 @@ grouped_df <- function(data, vars, drop = TRUE) {
   }
   assert_that(
     is.data.frame(data),
-    is.list(vars),
-    all(sapply(vars, is.name)),
+    (is.list(vars) && all(sapply(vars,is.name))) || is.character(vars),
     is.flag(drop)
   )
+  if (is.list(vars)) {
+    vars <- deparse_names(vars)
+  }
   grouped_df_impl(data, unname(vars), drop)
 }
 
@@ -53,6 +55,8 @@ n_groups.grouped_df <- function(x) {
 
 #' @export
 groups.grouped_df <- function(x) {
+  # Implement group_vars.grouped_df() instead if this assertion fails
+  stopifnot(is.list(attr(x, "vars")))
   attr(x, "vars")
 }
 
@@ -73,12 +77,12 @@ ungroup.grouped_df <- function(x, ...) {
 `[.grouped_df` <- function(x, i, j, ...) {
   y <- NextMethod()
 
-  group_vars <- vapply(groups(x), as.character, character(1))
+  group_names <- group_vars(x)
 
-  if (!all(group_vars %in% names(y))) {
+  if (!all(group_names %in% names(y))) {
     tbl_df(y)
   } else {
-    grouped_df(y, groups(x))
+    grouped_df(y, group_names)
   }
 
 }
@@ -107,7 +111,7 @@ select_.grouped_df <- function(.data, ..., .dots) {
 }
 
 ensure_grouped_vars <- function(vars, data, notify = TRUE) {
-  group_names <- vapply(groups(data), as.character, character(1))
+  group_names <- group_vars(data)
   missing <- setdiff(group_names, vars)
 
   if (length(missing) > 0) {

diff --git a/R/rowwise.r b/R/rowwise.r
@@ -59,7 +59,7 @@ group_by_.rowwise_df <- function(.data, ..., .dots, add = FALSE) {
   .data <- ungroup(.data)
 
   groups <- group_by_prepare(.data, ..., .dots = .dots, add = add)
-  grouped_df(groups$data, groups$groups)
+  grouped_df(groups$data, groups$group_names)
 }
 
 

diff --git a/R/tbl-cube.r b/R/tbl-cube.r
@@ -354,16 +354,18 @@ group_by_.tbl_cube <- function(.data, ..., .dots, add = FALSE) {
   groups <- group_by_prepare(.data, ..., .dots = .dots, add = add)
 
   # Convert symbols to indices
-  nms <- names(groups$data$dims)
-  nms_list <- as.list(setNames(seq_along(nms), nms))
-
-  groups$data$groups <- unlist(lapply(groups$groups, eval, nms_list))
+  groups$data$groups <- match(groups$group_names, names(groups$data$dims))
   groups$data
 }
 
 #' @export
 groups.tbl_cube <- function(x) {
-  lapply(x$dims, as.name)[x$group]
+  lapply(group_vars(x), as.name)
+}
+
+#' @export
+group_vars.tbl_cube <- function(x) {
+  x$dims[x$group]
 }
 
 # mutate and summarise operate similarly need to evaluate variables in special

diff --git a/R/tbl-lazy.R b/R/tbl-lazy.R
@@ -18,7 +18,12 @@ tbl_vars.tbl_lazy <- function(x) {
 
 #' @export
 groups.tbl_lazy <- function(x) {
-  lapply(op_grps(x$ops), as.name)
+  lapply(group_vars(x), as.name)
+}
+
+#' @export
+group_vars.tbl_lazy <- function(x) {
+  op_grps(x$ops)
 }
 
 #' @export

diff --git a/R/utils.r b/R/utils.r
@@ -68,7 +68,11 @@ is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5) {
 
 deparse_all <- function(x) {
   deparse2 <- function(x) paste(deparse(x, width.cutoff = 500L), collapse = "")
-  vapply(x, deparse2, FUN.VALUE = character(1))
+  vapply(x, deparse2, FUN.VALUE = character(1L))
+}
+
+deparse_names <- function(x) {
+  vapply(x, deparse, FUN.VALUE = character(1L))
 }
 
 #' Provides comma-separated string out ot the parameters

diff --git a/man/group_by.Rd b/man/group_by.Rd
diff --git a/man/grouped_df.Rd b/man/grouped_df.Rd
diff --git a/man/groups.Rd b/man/groups.Rd
diff --git a/tests/testthat/helper-groups.R b/tests/testthat/helper-groups.R
@@ -0,0 +1,13 @@
+expect_groups <- function(df, groups, info = NULL) {
+  if (length(groups) == 0L) {
+    expect_null(groups(df), info = info)
+    expect_identical(group_vars(df), character(), info = info)
+  } else {
+    expect_identical(groups(df), lapply(enc2native(groups), as.name), info = info)
+    expect_identical(group_vars(df), groups, info = info)
+  }
+}
+
+expect_no_groups <- function(df) {
+  expect_groups(df, NULL)
+}
diff --git a/tests/testthat/test-add-count.R b/tests/testthat/test-add-count.R
@@ -16,9 +16,9 @@ test_that("add_count respects and preserves existing groups", {
   df <- data.frame(g = c(1, 2, 2, 2), val = c("b", "b", "b", "c"))
   res <- df %>% add_count(val)
   expect_equal(res$n, c(3, 3, 3, 1))
-  expect_null(groups(res))
+  expect_no_groups(res)
 
   res <- df %>% group_by(g) %>% add_count(val)
   expect_equal(res$n, c(1, 2, 2, 1))
-  expect_equal(as.character(groups(res)), "g")
+  expect_groups(res, "g")
 })
diff --git a/tests/testthat/test-add-tally.R b/tests/testthat/test-add-tally.R
@@ -16,11 +16,11 @@ test_that("add_tally respects and preserves existing groups", {
   df <- data.frame(g = c(1, 2, 2, 2), val = c("b", "b", "b", "c"))
   res <- df %>% group_by(val) %>% add_tally()
   expect_equal(res$n, c(3, 3, 3, 1))
-  expect_equal(as.character(groups(res)), "val")
+  expect_groups(res, "val")
 
   res <- df %>% group_by(g, val) %>% add_tally()
   expect_equal(res$n, c(1, 2, 2, 1))
-  expect_equal(as.character(groups(res)), c("g", "val"))
+  expect_groups(res, c("g", "val"))
 })
 
 test_that("add_tally can be given a weighting variable", {