Ranking functions should preserve missing values #774

tidyverse · Nov 18, 2014 · bc02fe0 · bc02fe0
1 parent 538fac6
commit bc02fe0
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 8 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,7 @@
 # dplyr 0.3.1.9000
 
+* Ranking functions now preserve missing values (#774).
+
 * dplyr now requires RSQLite >= 1.0. This shouldn't affect your code
   in any way (except that RSQLite now doesn't need to be loaded) 
   but does simplify the internals (#622).

diff --git a/R/rank.R b/R/rank.R
@@ -26,9 +26,11 @@
 #' }
 #'
 #' @name ranking
-#' @param x a vector of values to rank
+#' @param x a vector of values to rank. Missing values are left as is.
+#'   If you want to treat them as the smallest or largest values, replace
+#'   with Inf or -Inf before ranking.
 #' @examples
-#' x <- c(5, 1, 3, 2, 2)
+#' x <- c(5, 1, 3, 2, 2, NA)
 #' row_number(x)
 #' min_rank(x)
 #' dense_rank(x)
@@ -41,7 +43,7 @@ NULL
 
 #' @export
 #' @rdname ranking
-row_number <- function(x) rank(x, ties.method = "first")
+row_number <- function(x) rank(x, ties.method = "first", na.last = "keep")
 
 # Definition from
 # http://blogs.msdn.com/b/craigfr/archive/2008/03/31/ranking-functions-rank-dense-rank-and-ntile.aspx
@@ -54,12 +56,12 @@ ntile <- function(x, n) {
 
 #' @export
 #' @rdname ranking
-min_rank <- function(x) rank(x, ties.method = "min")
+min_rank <- function(x) rank(x, ties.method = "min", na.last = "keep")
 
 #' @export
 #' @rdname ranking
 dense_rank <- function(x) {
-  r <- rank(x)
+  r <- rank(x, na.last = "keep")
   match(r, sort(unique(r)))
 }
 
@@ -72,6 +74,6 @@ percent_rank <- function(x) {
 #' @export
 #' @rdname ranking
 cume_dist <- function(x) {
-  rank(x, ties.method = "max") / length(x)
+  rank(x, ties.method = "max", na.last = "keep") / length(x)
 }
 
diff --git a/man/ranking.Rd b/man/ranking.Rd
@@ -22,7 +22,9 @@ percent_rank(x)
 cume_dist(x)
 }
 \arguments{
-\item{x}{a vector of values to rank}
+\item{x}{a vector of values to rank. Missing values are left as is.
+If you want to treat them as the smallest or largest values, replace
+with Inf or -Inf before ranking.}
 
 \item{n}{number of groups to split up into.}
 }
@@ -54,7 +56,7 @@ to smallest outputs. Use \code{\link{desc}} to reverse the direction..
 }
 }
 \examples{
-x <- c(5, 1, 3, 2, 2)
+x <- c(5, 1, 3, 2, 2, NA)
 row_number(x)
 min_rank(x)
 dense_rank(x)