/
pivot_wider.R
204 lines (179 loc) · 7.27 KB
/
pivot_wider.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#' Pivot data from long to wide
#'
#' @description "Widens" data, increasing the number of columns and
#' decreasing the number of rows.
#'
#' @param .df A data.frame or data.table
#' @param id_cols A set of columns that uniquely identifies each observation.
#' Defaults to all columns in the data table except for the columns specified in `names_from` and `values_from`.
#' Typically used when you have additional variables that is directly related.
#' `tidyselect` compatible.
#' @param names_from A pair of arguments describing which column (or columns) to get the name of the output column `name_from`,
#' and which column (or columns) to get the cell values from `values_from`).
#' `tidyselect` compatible.
#' @param values_from A pair of arguments describing which column (or columns) to get the name of the output column `name_from`,
#' and which column (or columns) to get the cell values from `values_from`.
#' `tidyselect` compatible.
#' @param names_sep the separator between the names of the columns
#' @param names_prefix prefix to add to the names of the new columns
#' @param names_glue Instead of using `names_sep` and `names_prefix`, you can supply a
#' glue specification that uses the `names_from` columns (and special `.value`) to create custom column names
#' @param names_sort Should the resulting new columns be sorted.
#' @param names_repair Treatment of duplicate names. See `?vctrs::vec_as_names` for options/details.
#' @param values_fn Should the data be aggregated before casting? If the formula doesn't identify a single observation for each cell, then aggregation defaults to length with a message.
#' @param values_fill If values are missing, what value should be filled in
#' @param unused_fn Aggregation function to be applied to unused columns.
#' Default is to ignore unused columns.
#'
#' @examples
#' df <- tidytable(
#' id = 1,
#' names = c("a", "b", "c"),
#' vals = 1:3
#' )
#'
#' df %>%
#' pivot_wider(names_from = names, values_from = vals)
#'
#' df %>%
#' pivot_wider(
#' names_from = names, values_from = vals, names_prefix = "new_"
#' )
#' @export
pivot_wider <- function(.df,
names_from = name,
values_from = value,
id_cols = NULL,
names_sep = "_",
names_prefix = "",
names_glue = NULL,
names_sort = FALSE,
names_repair = "unique",
values_fill = NULL,
values_fn = NULL,
unused_fn = NULL) {
UseMethod("pivot_wider")
}
#' @export
pivot_wider.tidytable <- function(.df,
names_from = name,
values_from = value,
id_cols = NULL,
names_sep = "_",
names_prefix = "",
names_glue = NULL,
names_sort = FALSE,
names_repair = "unique",
values_fill = NULL,
values_fn = NULL,
unused_fn = NULL) {
names_from <- tidyselect_names(.df, {{ names_from }})
values_from <- tidyselect_names(.df, {{ values_from }})
id_cols <- enquo(id_cols)
id_cols_is_null <- quo_is_null(id_cols)
if (id_cols_is_null) {
id_cols <- setdiff(names(.df), c(names_from, values_from))
} else {
id_cols <- tidyselect_names(.df, !!id_cols)
unused_cols <- setdiff(names(.df), c(names_from, values_from, id_cols))
}
values_fn <- quo_squash(enquo(values_fn))
uses_dot_value <- !is.null(names_glue) && str_detect(names_glue, "{.value}", fixed = TRUE)
# Prepare output column names
if (names_prefix != "" && is.null(names_glue)) {
first_name <- sym(names_from[[1]])
.df <- mutate(.df, !!first_name := paste0(.env$names_prefix, !!first_name))
} else if (uses_dot_value) {
glue_df <- distinct(.df, all_of(names_from))
values_from_reps <- nrow(glue_df)
glue_df <- vec_rep(glue_df, length(values_from))
glue_df <- mutate(glue_df,
.value = vec_rep_each(.env$values_from, .env$values_from_reps),
.before = 1)
glue_vars <- glue_data(glue_df, names_glue)
# mimic column names assigned by data.table::dcast()
if (length(values_from) <= 1) {
glue_df <- dt_j(glue_df, .value := NULL)
}
names(glue_vars) <- exec(paste, !!!glue_df, sep = names_sep)
} else if (!is.null(names_glue)) {
.df <- mutate(.df,
.names_from = glue(.env$names_glue),
.before = all_of(names_from[1]))
.df <- dt_j(.df, (names_from) := NULL)
names_from <- ".names_from"
}
if (is_false(names_sort)) {
.df <- mutate(.df, across(all_of(names_from), ~ safe_as_factor(.x)))
}
no_id <- length(id_cols) == 0
if (no_id) {
lhs <- "..."
} else {
lhs <- str_flatten(glue("`{id_cols}`"), " + ")
}
rhs <- str_flatten(glue("`{names_from}`"), " + ")
formula <- glue("{lhs} ~ {rhs}")
.dcast_df <- select(.df, any_of(c(id_cols, names_from, values_from)))
out <- eval_tidy(call2(
"dcast",
quo(.dcast_df),
formula = formula,
value.var = values_from,
fun.aggregate = expr(!!values_fn),
sep = names_sep,
fill = values_fill,
.ns = "data.table"
))
out <- remove_key(out)
if (no_id) {
out <- select(out, -any_of("."))
}
if (uses_dot_value) {
new_vars <- setdiff(names(out), id_cols)
out <- set_col_names(out, glue_vars[new_vars], new_vars)
}
out <- df_name_repair(out, names_repair)
if (!id_cols_is_null && !is.null(unused_fn)) {
unused_fn <- as_function(unused_fn)
unused_df <- select(.df, all_of(id_cols), all_of(unused_cols))
unused_df <- summarize(unused_df,
across(any_of(unused_cols), unused_fn),
.by = any_of(id_cols))
out <- left_join(out, unused_df, by = id_cols)
}
as_tidytable(out)
}
#' @export
pivot_wider.data.frame <- function(.df,
names_from = name,
values_from = value,
id_cols = NULL,
names_sep = "_",
names_prefix = "",
names_glue = NULL,
names_sort = FALSE,
names_repair = "unique",
values_fill = NULL,
values_fn = NULL,
unused_fn = NULL) {
.df <- as_tidytable(.df)
pivot_wider(
.df, names_from = {{ names_from }}, values_from = {{ values_from }},
id_cols = {{ id_cols }}, names_sep = names_sep,
names_prefix = names_prefix, names_glue = names_glue,
names_sort = names_sort, names_repair = names_repair,
values_fill = values_fill, values_fn = {{ values_fn }},
unused_fn = unused_fn
)
}
safe_as_factor <- function(x) {
if (vec_ptype_compatible(x, factor())) {
vec_cast(x, factor())
} else if (is.numeric(x)) {
factor(x, vec_unique(x))
} else {
x
}
}
globalVariables(c(".", ".names_from", "name", "value", ".value"))