Skip to content

Commit

Permalink
Ranking functions should preserve missing values #774
Browse files Browse the repository at this point in the history
  • Loading branch information
hadley committed Nov 18, 2014
1 parent 538fac6 commit bc02fe0
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 8 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
@@ -1,5 +1,7 @@
# dplyr 0.3.1.9000

* Ranking functions now preserve missing values (#774).

* dplyr now requires RSQLite >= 1.0. This shouldn't affect your code
in any way (except that RSQLite now doesn't need to be loaded)
but does simplify the internals (#622).
Expand Down
14 changes: 8 additions & 6 deletions R/rank.R
Expand Up @@ -26,9 +26,11 @@
#' }
#'
#' @name ranking
#' @param x a vector of values to rank
#' @param x a vector of values to rank. Missing values are left as is.
#' If you want to treat them as the smallest or largest values, replace
#' with Inf or -Inf before ranking.
#' @examples
#' x <- c(5, 1, 3, 2, 2)
#' x <- c(5, 1, 3, 2, 2, NA)
#' row_number(x)
#' min_rank(x)
#' dense_rank(x)
Expand All @@ -41,7 +43,7 @@ NULL

#' @export
#' @rdname ranking
row_number <- function(x) rank(x, ties.method = "first")
row_number <- function(x) rank(x, ties.method = "first", na.last = "keep")

# Definition from
# http://blogs.msdn.com/b/craigfr/archive/2008/03/31/ranking-functions-rank-dense-rank-and-ntile.aspx
Expand All @@ -54,12 +56,12 @@ ntile <- function(x, n) {

#' @export
#' @rdname ranking
min_rank <- function(x) rank(x, ties.method = "min")
min_rank <- function(x) rank(x, ties.method = "min", na.last = "keep")

#' @export
#' @rdname ranking
dense_rank <- function(x) {
r <- rank(x)
r <- rank(x, na.last = "keep")
match(r, sort(unique(r)))
}

Expand All @@ -72,6 +74,6 @@ percent_rank <- function(x) {
#' @export
#' @rdname ranking
cume_dist <- function(x) {
rank(x, ties.method = "max") / length(x)
rank(x, ties.method = "max", na.last = "keep") / length(x)
}

6 changes: 4 additions & 2 deletions man/ranking.Rd
Expand Up @@ -22,7 +22,9 @@ percent_rank(x)
cume_dist(x)
}
\arguments{
\item{x}{a vector of values to rank}
\item{x}{a vector of values to rank. Missing values are left as is.
If you want to treat them as the smallest or largest values, replace
with Inf or -Inf before ranking.}

\item{n}{number of groups to split up into.}
}
Expand Down Expand Up @@ -54,7 +56,7 @@ to smallest outputs. Use \code{\link{desc}} to reverse the direction..
}
}
\examples{
x <- c(5, 1, 3, 2, 2)
x <- c(5, 1, 3, 2, 2, NA)
row_number(x)
min_rank(x)
dense_rank(x)
Expand Down

0 comments on commit bc02fe0

Please sign in to comment.