-
-
Notifications
You must be signed in to change notification settings - Fork 15
/
pivot_longer.R
138 lines (120 loc) · 4.31 KB
/
pivot_longer.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#' Pivot data from wide to long
#'
#' `pivot_longer()` "lengthens" data, increasing the number of rows and decreasing the number of columns. The inverse
#' transformation is [pivot_wider()].
#'
#' @param data `data.frame`. The data to pivot.
#' @param cols <[`poor-select`][select_helpers]>. Columns to pivot into longer format.
#' @param names_to `character(n)`. The name of the new column(s) that will contain the column names.
#' @param names_prefix `character(1)`. A regular expression used to remove matching text from the start of each variable
#' name.
#' @param names_sep,names_pattern `character(1)`. If `names_to` contains multiple values, this argument controls how the
#' column name is broken up. `names_pattern` takes a regular expression containing matching groups (`()`).
#' @param values_to `character(n)`. The name of the new column(s) that will contain the values of the pivoted variables.
#' @param values_drop_na `logical(1)`. If `TRUE`, will drop rows that contain only `NA` in the `values_to` column. This
#' effectively converts explicit missing values to implicit missing values, and should generally be used only when
#' missing values in data were created by its structure.
#' @param ... Additional arguments passed on to methods.
#'
#' @return A `data.frame`.
#'
#' @examples
#' wide_data <- data.frame(replicate(5, rnorm(10)))
#' # Customizing the names
#' pivot_longer(
#' data = wide_data,
#' cols = c(1, 2),
#' names_to = "Column",
#' values_to = "Numbers"
#' )
#'
#' @export
pivot_longer <- function(
data,
cols,
names_to = "name",
names_prefix = NULL,
names_sep = NULL,
names_pattern = NULL,
values_to = "value",
values_drop_na = FALSE,
...
) {
if (missing(cols)) {
stop("`cols` must select at least one column.")
}
cols <- names(eval_select_pos(data, substitute(cols)))
if (any(names_to %in% setdiff(names(data), cols))) {
stop(
paste0(
"Some values of the columns specified in 'names_to' are already present
as column names. Either use another value in `names_to` or rename the
following columns: ",
paste(names_to[which(names_to %in% setdiff(names(data), cols))], sep = ", ")
),
call. = FALSE)
}
# Sanity checks ----------------
# nothing to select?
if (length(cols) == 0L) {
stop("No columns found for reshaping data.", call. = FALSE)
}
# Reshaping ---------------------
# Create Index column as needed by reshape
data[["_Row"]] <- as.numeric(rownames(data))
# Create a new index for cases with length(names_to) > 1
names_to_2 <- paste(names_to, collapse = "_")
# Reshape
long <- stats::reshape(
data,
varying = cols,
idvar = "_Row",
v.names = values_to,
timevar = names_to_2,
direction = "long"
)
# Cleaning --------------------------
# Sort the dataframe (to match pivot_longer's output)
long <- long[do.call(order, long[, c("_Row", names_to_2)]), ]
long[["_Row"]] <- NULL
# Re-insert col names as levels
long[[names_to_2]] <- cols[long[[names_to_2]]]
# if several variable in names_to, split the names either with names_sep or with names_pattern
if (length(names_to) > 1) {
for (i in seq_along(names_to)) {
if (is.null(names_pattern)) {
new_vals <- unlist(lapply(
strsplit(unique(long[[names_to_2]]), names_sep, fixed = TRUE),
function(x) x[i]
))
long[[names_to[i]]] <- new_vals
} else {
colPattern <- regmatches(
x = unique(long[[names_to_2]]),
m = regexec(names_pattern, unique(long[[names_to_2]]))
)
colPattern <- as.data.frame(do.call(rbind, colPattern))[, c(1, i + 1)]
names(colPattern) <- c(names_to_2, names_to[i])
long <- left_join(x = long, y = colPattern, by = names_to_2)
}
}
long[[names_to_2]] <- NULL
}
# reorder
long <- relocate(.data = long, values_to, .after = -1)
# remove names prefix if specified
if (!is.null(names_prefix)) {
if (length(names_to) > 1) {
stop("`names_prefix` only works when `names_to` is of length 1.", call. = FALSE)
}
long[[names_to]] <- gsub(paste0("^", names_prefix), "", long[[names_to]])
}
if (values_drop_na) {
long <- long[!is.na(long[, values_to]), ]
}
# Reset row names
rownames(long) <- NULL
# Remove reshape attributes
attributes(long)$reshapeLong <- NULL
long
}