tidyverse · krlmlr · Dec 30, 2017 · Nov 6, 2017 · Dec 30, 2017 · lionel-
diff --git a/NEWS.md b/NEWS.md
@@ -45,6 +45,8 @@
 
 * Add error for `distinct()` if any of the selected columns are of type `list` (#3088, @foo-bar-baz-qux).
 
+* `sample_n()` and `sample_frac()` on grouped data frame are now faster especially for those with large number of groups (#3193, @saurfang).
+
 * Better error message if dbplyr is not installed when accessing database backends (#3225).
 
 *  Fix `row_number()` and `ntile()` ordering to use the locale-dependent ordering functions in R when dealing with character vectors, rather than always using the C-locale ordering function in C (#2792, @foo-bar-baz-qux).

diff --git a/R/grouped-df.r b/R/grouped-df.r
@@ -265,11 +265,11 @@ sample_n.grouped_df <- function(tbl, size, replace = FALSE,
     inform("`.env` is deprecated and no longer has any effect")
   }
   weight <- enquo(weight)
+  weight <- mutate(tbl, w = !!weight)[["w"]]
 
   index <- attr(tbl, "indices")
   sampled <- lapply(index, sample_group,
     frac = FALSE,
-    tbl = tbl,
     size = size,
     replace = replace,
     weight = weight
@@ -292,11 +292,11 @@ sample_frac.grouped_df <- function(tbl, size = 1, replace = FALSE,
     )
   }
   weight <- enquo(weight)
+  weight <- mutate(tbl, w = !!weight)[["w"]]
 
   index <- attr(tbl, "indices")
   sampled <- lapply(index, sample_group,
     frac = TRUE,
-    tbl = tbl,
     size = size,
     replace = replace,
     weight = weight
@@ -306,7 +306,7 @@ sample_frac.grouped_df <- function(tbl, size = 1, replace = FALSE,
   grouped_df(tbl[idx, , drop = FALSE], vars = groups(tbl))
 }
 
-sample_group <- function(tbl, i, frac, size, replace, weight) {
+sample_group <- function(i, frac, size, replace, weight) {
   n <- length(i)
   if (frac) {
     check_frac(size, replace)
@@ -315,9 +315,8 @@ sample_group <- function(tbl, i, frac, size, replace, weight) {
     check_size(size, n, replace)
   }
 
-  weight <- eval_tidy(weight, tbl[i + 1, , drop = FALSE])
   if (!is_null(weight)) {
-    weight <- check_weight(weight, n)
+    weight <- check_weight(weight[i + 1], n)
   }
 
   i[sample.int(n, size, replace = replace, prob = weight)]