tidyverse · hadley · Jan 15, 2020 · Jan 13, 2020 · Jan 13, 2020 · Jan 13, 2020
diff --git a/NAMESPACE b/NAMESPACE
@@ -9,7 +9,6 @@ S3method("names<-",grouped_df)
 S3method(anti_join,data.frame)
 S3method(arrange,data.frame)
 S3method(arrange,default)
-S3method(arrange,grouped_df)
 S3method(arrange_,data.frame)
 S3method(arrange_,tbl_df)
 S3method(as.data.frame,grouped_df)
@@ -32,7 +31,6 @@ S3method(default_missing,data.frame)
 S3method(default_missing,default)
 S3method(distinct,data.frame)
 S3method(distinct,default)
-S3method(distinct,grouped_df)
 S3method(distinct_,data.frame)
 S3method(distinct_,grouped_df)
 S3method(distinct_,tbl_df)
@@ -45,9 +43,14 @@ S3method(do_,"NULL")
 S3method(do_,data.frame)
 S3method(do_,grouped_df)
 S3method(do_,rowwise_df)
+S3method(dplyr_col_modify,data.frame)
+S3method(dplyr_col_modify,grouped_df)
+S3method(dplyr_reconstruct,data.frame)
+S3method(dplyr_reconstruct,grouped_df)
+S3method(dplyr_row_slice,data.frame)
+S3method(dplyr_row_slice,grouped_df)
 S3method(filter,data.frame)
 S3method(filter,default)
-S3method(filter,grouped_df)
 S3method(filter,ts)
 S3method(filter_,data.frame)
 S3method(filter_,tbl_df)
@@ -88,7 +91,6 @@ S3method(intersect,default)
 S3method(left_join,data.frame)
 S3method(mutate,data.frame)
 S3method(mutate,default)
-S3method(mutate,grouped_df)
 S3method(mutate_,data.frame)
 S3method(mutate_,tbl_df)
 S3method(n_groups,data.frame)
@@ -108,7 +110,6 @@ S3method(recode,factor)
 S3method(recode,numeric)
 S3method(rename,data.frame)
 S3method(rename,default)
-S3method(rename,grouped_df)
 S3method(rename_,data.frame)
 S3method(rename_,grouped_df)
 S3method(right_join,data.frame)
@@ -119,7 +120,6 @@ S3method(sample_n,data.frame)
 S3method(sample_n,default)
 S3method(select,data.frame)
 S3method(select,default)
-S3method(select,grouped_df)
 S3method(select,list)
 S3method(select_,data.frame)
 S3method(select_,grouped_df)
@@ -130,7 +130,6 @@ S3method(setequal,data.frame)
 S3method(setequal,default)
 S3method(slice,data.frame)
 S3method(slice,default)
-S3method(slice,grouped_df)
 S3method(slice_,data.frame)
 S3method(slice_,tbl_df)
 S3method(slice_head,data.frame)
@@ -150,7 +149,6 @@ S3method(tbl_ptype,default)
 S3method(tbl_sum,grouped_df)
 S3method(tbl_vars,data.frame)
 S3method(transmute,data.frame)
-S3method(transmute,grouped_df)
 S3method(transmute_,default)
 S3method(transmute_,grouped_df)
 S3method(ungroup,data.frame)
@@ -241,6 +239,9 @@ export(distinct_if)
 export(distinct_prepare)
 export(do)
 export(do_)
+export(dplyr_col_modify)
+export(dplyr_reconstruct)
+export(dplyr_row_slice)
 export(ends_with)
 export(enexpr)
 export(enexprs)
@@ -424,6 +425,7 @@ export(union)
 export(union_all)
 export(validate_grouped_df)
 export(vars)
+export(with_groups)
 export(with_order)
 export(wrap_dbplyr_obj)
 import(rlang)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,25 @@
 # dplyr 0.9.0 (in development)
 
+* New, experimental, `with_groups()` makes it easy to temporarily group or
+  ungroup (#4711).
+
+* dplyr now has a rudimentary, experimental, and stop-gap, extension mechanism
+  documented in `?dplyr_extending`
+
+* The implementation of all verbs has been carefully thought through. This 
+  mostly makes implementation simpler but should hopefully increase consistency,
+  and also makes it easier to adapt to dplyr to new data structures in the 
+  new future. Pragmatically, the biggest difference for most people will be
+  that each verb documents its return value in terms of rows, columns, groups,
+  and data frame attributes.
+
+* Row names are now preserved when working with data frames.
+
+* `count()` and `add_count()` now preserve the type of the input (#4086).
+
+* `add_count(drop = )` is deprecated because it didn't actually affect
+  the output.
+
 * `full_join()` gains keep argument so that you can optionally choose to 
   keep both sets of join keys (#4589). This is useful when you want to
   figure out which rows were missing from either side.

diff --git a/R/all-equal.r b/R/all-equal.r
@@ -66,29 +66,29 @@ equal_data_frame <- function(x, y, ignore_col_order = TRUE, ignore_row_order = T
   # keys must be identical
   msg <- ""
   if (any(wrong <- !vec_in(x_split$key, y_split$key))) {
-    rows <- sort(map_int(x_split$pos[which(wrong)], function(.x) .x[1L]))
+    rows <- sort(map_int(x_split$loc[which(wrong)], function(.x) .x[1L]))
     msg <- paste0(msg, "- Rows in x but not in y: ", glue_collapse(rows, sep = ", "), "\n")
   }
 
   if (any(wrong <- !vec_in(y_split$key, x_split$key))) {
-    rows <- sort(map_int(y_split$pos[which(wrong)], function(.x) .x[1L]))
+    rows <- sort(map_int(y_split$loc[which(wrong)], function(.x) .x[1L]))
     msg <- paste0(msg, "- Rows in y but not in x: ", glue_collapse(rows, sep = ", "), "\n")
   }
   if (msg != "") {
     return(msg)
   }
 
   # keys are identical, check that rows occur the same number of times
-  if (any(wrong <- lengths(x_split$pos) != lengths(y_split$pos))) {
-    rows <- sort(map_int(x_split$pos[which(wrong)], function(.x) .x[1L]))
+  if (any(wrong <- lengths(x_split$loc) != lengths(y_split$loc))) {
+    rows <- sort(map_int(x_split$loc[which(wrong)], function(.x) .x[1L]))
     return(paste0("- Rows with difference occurences in x and y: ",
       glue_collapse(rows, sep = ", "),
       "\n"
     ))
   }
 
   # then if we care about row order, the id need to be identical
-  if (!ignore_row_order && !all(vec_equal(x_split$pos, y_split$pos))) {
+  if (!ignore_row_order && !all(vec_equal(x_split$loc, y_split$loc))) {
     return("Same row values, but different order")
   }
 

diff --git a/R/arrange.R b/R/arrange.R
@@ -16,11 +16,14 @@
 #' * treated differently for remote data, depending on the backend.
 #'
 #' @return
-#' An object of the same type as `.data`. The columns will be left as is;
-#' the rows will be in different order.
+#' An object of the same type as `.data`.
+#'
+#' * All rows appear in the output, but (usually) in a different place.
+#' * Columns are not modified.
+#' * Groups are not modified.
+#' * Data frame attributes are preserved.
 #' @export
 #' @inheritParams filter
-#' @inheritSection filter Tidy data
 #' @param ... <[`tidy-eval`][dplyr_tidy_eval]> Variables, or functions or
 #'   variables. Use [desc()] to sort a variable in descending order.
 #' @family single table verbs
@@ -46,19 +49,8 @@ arrange.data.frame <- function(.data, ..., .by_group = FALSE) {
     return(.data)
   }
 
-  idx <- arrange_rows(.data, ...)
-  .data[idx, , drop = FALSE]
-}
-
-#' @export
-arrange.grouped_df <- function(.data, ..., .by_group = FALSE) {
-  if (missing(...)) {
-    return(.data)
-  }
-
-  # TODO: figure out how to update group_indices more efficiently
-  idx <- arrange_rows(.data, ..., .by_group = .by_group)
-  .data[idx, , drop = FALSE]
+  loc <- arrange_rows(.data, ..., .by_group = .by_group)
+  dplyr_row_slice(.data, loc)
 }
 
 # Helpers -----------------------------------------------------------------

diff --git a/R/count-tally.R b/R/count-tally.R
@@ -25,8 +25,12 @@
 #'
 #'   If omitted, it will default to `n`. If there's already a column called `n`,
 #'   it will error, and require you to specify the name.
-#' @param .drop see [group_by()]
-#' @return A tbl, grouped the same way as the input.
+#' @param .drop For `count()`: if `FALSE` will include counts for empty groups
+#'   (i.e. for levels of factors that don't exist in the data). Deprecated for
+#'   `add_count()` since it didn't actually affect the output.
+#' @return
+#' An object of the same type as `.data`. `count()` and `add_count()`
+#' group transiently, so the output has the same groups as the input.
 #' @export
 #' @examples
 #' # count() is a convenient way to get a sense of the distribution of
@@ -84,26 +88,33 @@ add_tally <- function(x, wt = NULL, sort = FALSE, name = NULL) {
 #' @export
 #' @rdname tally
 count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = group_by_drop_default(x)) {
-  groups <- group_vars(x)
+
   if (!missing(...)) {
-    x <- .group_by_static_drop(x, ..., .add = TRUE, .drop = .drop)
+    out <- group_by(x, ..., .add = TRUE, .drop = .drop)
+  } else {
+    out <- x
   }
 
-  x <- tally(x, wt = !!enquo(wt), sort = sort, name = name)
-  x <- .group_by_static_drop(x, !!!syms(groups), .add = FALSE, .drop = .drop)
-  x
+  out <- tally(out, wt = !!enquo(wt), sort = sort, name = name)
+  dplyr_reconstruct(out, x)
 }
 
 #' @rdname tally
 #' @export
-add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = group_by_drop_default(x)) {
-  groups <- group_vars(x)
+add_count <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated()) {
+  if (!missing(.drop)) {
+    lifecycle::deprecate_warn("1.0.0", "add_count(.drop = )")
+  }
+
   if (!missing(...)) {
-    x <- .group_by_static_drop(x, ..., .add = TRUE, .drop = .drop)
+    out <- group_by(x, ..., .add = TRUE)
+  } else {
+    out <- x
   }
+  out <- add_tally(out, wt = !!enquo(wt), sort = sort, name = name)
 
-  x <- add_tally(x, wt = !!enquo(wt), sort = sort, name = name)
-  x <- .group_by_static_drop(x, !!!syms(groups), .add = FALSE, .drop = .drop)
+  name <- check_name(x, name)
+  x[[name]] <- out[[name]]
   x
 }
 

diff --git a/R/deprec-lazyeval.R b/R/deprec-lazyeval.R
@@ -325,7 +325,7 @@ select_.data.frame <- function(.data, ..., .dots = list()) {
 #' @export
 select_.grouped_df <- function(.data, ..., .dots = list()) {
   dots <- compat_lazy_dots(.dots, caller_env(), ...)
-  select.grouped_df(.data, !!!dots)
+  select(.data, !!!dots)
 }
 
 

diff --git a/R/distinct.R b/R/distinct.R
@@ -12,10 +12,13 @@
 #'   If a combination of `...` is not distinct, this keeps the
 #'   first row of values.
 #' @return
-#' An object the same type as `.data`. If `...` is empty or `.keep_all` is
-#' `TRUE`, the columns will be unchanged. Otherwise, it will first perform a
-#' `mutate()`. The rows will be in the same order as the input, but only
-#' distinct elements will be preserved.
+#' An object of the same type as `.data`.
+#'
+#' * Rows are a subset of the input, but appear in the same order.
+#' * Columns are not modified if `...` is empty or `.keep_all` is `TRUE`.
+#'   Otherwise, `distinct()` first calls `mutate()` to create new columns.
+#' * Groups are not modified.
+#' * Data frame attributes are preserved.
 #' @export
 #' @examples
 #' df <- tibble(
@@ -91,24 +94,17 @@ distinct_prepare <- function(.data, vars, group_vars = character(), .keep_all =
 
 #' @export
 distinct.data.frame <- function(.data, ..., .keep_all = FALSE) {
-  prep <- distinct_prepare(.data, enquos(...), .keep_all = .keep_all)
-
-  idx <- vec_unique_loc(prep$data[, prep$vars, drop = FALSE])
-  prep$data[idx, prep$keep, drop = FALSE]
-}
-
-#' @export
-distinct.grouped_df <- function(.data, ..., .keep_all = FALSE) {
-  prep <- distinct_prepare(
-    .data,
+  prep <- distinct_prepare(.data,
     vars = enquos(...),
     group_vars = group_vars(.data),
     .keep_all = .keep_all
   )
 
-  # TODO: figure out how to update group indices more efficiently
-  idx <- vec_unique_loc(prep$data[, prep$vars, drop = FALSE])
-  prep$data[idx, prep$keep, drop = FALSE]
+  # out <- as_tibble(prep$data)
+  out <- prep$data
+  loc <- vec_unique_loc(as_tibble(out)[prep$vars])
+
+  dplyr_row_slice(out[prep$keep], loc)
 }
 
 

diff --git a/R/dplyr.r b/R/dplyr.r
@@ -43,4 +43,4 @@
 #' @importFrom lifecycle deprecated
 "_PACKAGE"
 
-utils::globalVariables(c("old_keys", "old_rows", ".rows", "new_indices", "new_rows", "new_rows_sizes", "needs_recycle", "distinct_vars"))
+utils::globalVariables(c("old_keys", "old_rows", ".rows", "new_indices", "new_rows", "new_rows_sizes", "needs_recycle", "distinct_vars", "out"))
diff --git a/R/filter.R b/R/filter.R
@@ -41,10 +41,6 @@
 #' When applied on a grouped tibble, `filter()` automatically [rearranges][arrange]
 #' the tibble by groups for performance reasons.
 #'
-#' @section Tidy data:
-#' When applied to a data frame, row names are silently dropped. To preserve,
-#' convert to an explicit variable with [tibble::rownames_to_column()].
-#'
 #' @section Scoped filtering:
 #' The three [scoped] variants ([filter_all()], [filter_if()] and
 #' [filter_at()]) make it easy to apply a filtering condition to a
@@ -59,7 +55,13 @@
 #'   condition evaluates to `TRUE` are kept.
 #' @param .preserve when `FALSE` (the default), the grouping structure
 #'   is recalculated based on the resulting data, otherwise it is kept as is.
-#' @inherit arrange return
+#' @return
+#' An object of the same type as `.data`.
+#'
+#' * Rows are a subset of the input, but appear in the same order.
+#' * Columns are not modified.
+#' * The number of groups may be reduced (if `.preserve` is not `TRUE`).
+#' * Data frame attributes are preserved.
 #' @seealso [filter_all()], [filter_if()] and [filter_at()].
 #' @export
 #' @examples
@@ -105,24 +107,8 @@ filter.data.frame <- function(.data, ..., .preserve = FALSE) {
     return(.data)
   }
 
-  idx <- filter_rows(.data, ...)
-  .data[idx[[1]], , drop = FALSE]
-}
-
-#' @export
-filter.grouped_df <- function(.data, ..., .preserve = !group_by_drop_default(.data)) {
-  if (missing(...)) {
-    return(.data)
-  }
-
-  idx <- filter_rows(.data, ...)
-  data <- as.data.frame(.data)[idx[[1]], , drop = FALSE]
-
-  groups <- group_data(.data)
-  groups$.rows <- filter_update_rows(nrow(.data), idx[[3]], idx[[1]], idx[[2]])
-  groups <- group_data_trim(groups, .preserve)
-
-  new_grouped_df(data, groups)
+  loc <- filter_rows(.data, ...)[[1]]
+  dplyr_row_slice(.data, loc, preserve = .preserve)
 }
 
 filter_rows <- function(.data, ...) {
@@ -160,8 +146,3 @@ check_filter <- function(dots) {
 
   }
 }
-
-
-filter_update_rows <- function(n_rows, group_indices, keep, new_rows_sizes) {
-  .Call(`dplyr_filter_update_rows`, n_rows, group_indices, keep, new_rows_sizes)
-}