-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
nth-value.R
184 lines (160 loc) · 4.68 KB
/
nth-value.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#' Extract the first, last, or nth value from a vector
#'
#' These are useful helpers for extracting a single value from a vector. They
#' are guaranteed to return a meaningful value, even when the input is shorter
#' than expected. You can also provide an optional secondary vector that defines
#' the ordering.
#'
#' @details
#' For most vector types, `first(x)`, `last(x)`, and `nth(x, n)` work like
#' `x[[1]]`, `x[[length(x)]`, and `x[[n]]`, respectively. The primary exception
#' is data frames, where they instead retrieve rows, i.e. `x[1, ]`, `x[nrow(x),
#' ]`, and `x[n, ]`. This is consistent with the tidyverse/vctrs principle which
#' treats data frames as a vector of rows, rather than a vector of columns.
#'
#' @param x A vector
#' @param n For `nth()`, a single integer specifying the position.
#' Negative integers index from the end (i.e. `-1L` will return the
#' last value in the vector).
#' @param order_by An optional vector the same size as `x` used to determine the
#' order.
#' @param default A default value to use if the position does not exist in `x`.
#'
#' If `NULL`, the default, a missing value is used.
#'
#' If supplied, this must be a single value, which will be cast to the type of
#' `x`.
#'
#' When `x` is a list , `default` is allowed to be any value. There are no
#' type or size restrictions in this case.
#' @param na_rm Should missing values in `x` be removed before extracting the
#' value?
#'
#' @return
#' If `x` is a list, a single element from that list. Otherwise, a vector the
#' same type as `x` with size 1.
#'
#' @export
#' @examples
#' x <- 1:10
#' y <- 10:1
#'
#' first(x)
#' last(y)
#'
#' nth(x, 1)
#' nth(x, 5)
#' nth(x, -2)
#'
#' # `first()` and `last()` are often useful in `summarise()`
#' df <- tibble(x = x, y = y)
#' df %>%
#' summarise(
#' across(x:y, first, .names = "{col}_first"),
#' y_last = last(y)
#' )
#'
#' # Selecting a position that is out of bounds returns a default value
#' nth(x, 11)
#' nth(x, 0)
#'
#' # This out of bounds behavior also applies to empty vectors
#' first(integer())
#'
#' # You can customize the default value with `default`
#' nth(x, 11, default = -1L)
#' first(integer(), default = 0L)
#'
#' # `order_by` provides optional ordering
#' last(x)
#' last(x, order_by = y)
#'
#' # `na_rm` removes missing values before extracting the value
#' z <- c(NA, NA, 1, 3, NA, 5, NA)
#' first(z)
#' first(z, na_rm = TRUE)
#' last(z, na_rm = TRUE)
#' nth(z, 3, na_rm = TRUE)
#'
#' # For data frames, these select entire rows
#' df <- tibble(a = 1:5, b = 6:10)
#' first(df)
#' nth(df, 4)
nth <- function(x, n, order_by = NULL, default = NULL, na_rm = FALSE) {
size <- vec_size(x)
vec_check_size(n, size = 1L)
n <- vec_cast(n, to = integer())
if (!is.null(order_by)) {
vec_check_size(order_by, size = size)
}
default <- check_nth_default(default, x = x)
check_bool(na_rm)
if (na_rm && vec_any_missing(x)) {
not_missing <- !vec_detect_missing(x)
x <- vec_slice(x, not_missing)
size <- vec_size(x)
if (!is.null(order_by)) {
order_by <- vec_slice(order_by, not_missing)
}
}
if (is.na(n)) {
abort("`n` can't be `NA`.")
}
if (n < 0L) {
# Negative values index from RHS
n <- size + n + 1L
}
if (n <= 0L || n > size) {
return(default)
}
if (!is.null(order_by)) {
order <- vec_order_radix(order_by)
n <- order[[n]]
}
vec_slice2(x, n)
}
#' @export
#' @rdname nth
first <- function(x, order_by = NULL, default = NULL, na_rm = FALSE) {
nth(x, 1L, order_by = order_by, default = default, na_rm = na_rm)
}
#' @export
#' @rdname nth
last <- function(x, order_by = NULL, default = NULL, na_rm = FALSE) {
nth(x, -1L, order_by = order_by, default = default, na_rm = na_rm)
}
check_nth_default <- function(default, x, ..., error_call = caller_env()) {
check_dots_empty0(...)
if (obj_is_list(x)) {
# Very special behavior for lists, since we use `[[` on them.
# Valid to use any `default` here (even non-vectors).
# And `default = NULL` is the correct default `default` for lists.
return(default)
}
if (is.null(default)) {
return(vec_init(x))
}
vec_check_size(default, size = 1L, call = error_call)
default <- vec_cast(
x = default,
to = x,
x_arg = "default",
to_arg = "x",
call = error_call
)
default
}
vec_slice2 <- function(x, i) {
# Our unimplemented vctrs equivalent of `[[`
# https://github.com/r-lib/vctrs/pull/1228/
# A real implementation would use this, but it is too slow right now
# and we know `i` is a valid integer index (#6682)
# i <- vec_as_location2(i, vec_size(x))
if (obj_is_list(x)) {
out <- .subset2(x, i)
} else {
out <- vec_slice(x, i)
out <- vec_set_names(out, NULL)
}
out
}