-
Notifications
You must be signed in to change notification settings - Fork 2
/
tidy-stats.R
120 lines (107 loc) · 3.17 KB
/
tidy-stats.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#' Generate tidy quantiles for a dataframe column
#'
#' This function respects groupings from `dplyr::group_by()`. When the dataframe
#' contains grouped data, the quantiles are computed within each subgroup of
#' data.
#'
#' @param data a dataframe
#' @param var a column in the dataframe
#' @param probs quantiles to return. Defaults to `c(.1, .3, .5, .7, .9)`
#' @return a long dataframe (a tibble) with quantiles for the variable.
#' @export
#' @examples
#' tidy_quantile(sleep, extra)
#'
#' sleep %>%
#' dplyr::group_by(group) %>%
#' tidy_quantile(extra)
tidy_quantile <- function(data, var, probs = seq(.1, .9, .2)) {
UseMethod("tidy_quantile")
}
#' @export
tidy_quantile.default <- function(data, var, probs = seq(.1, .9, .2)) {
q <- enquo(var)
rlang::eval_tidy(q, data = data) %>%
stats::quantile(probs, na.rm = TRUE) %>%
tibble::enframe("quantile", value = rlang::quo_name(q))
}
#' @export
tidy_quantile.grouped_df <- function(data, var, probs = seq(.1, .9, .2)) {
q <- enquo(var)
groups <- split(data, group_indices(data)) %>%
lapply(select, !!! group_vars(data)) %>%
lapply(distinct) %>%
lapply(ungroup) %>%
bind_rows(.id = "....id")
quantiles <- split(data, group_indices(data)) %>%
lapply(ungroup) %>%
lapply(tidy_quantile.default, !! q, probs) %>%
bind_rows(.id = "....id")
groups %>%
left_join(quantiles, by = "....id", multiple = "all") %>%
select(-one_of("....id"))
}
#' Generate tidy correlations
#'
#' This function respects groupings from `dplyr::group_by()`. When the dataframe
#' contains grouped data, the correlations are computed within each subgroup of
#' data.
#'
#' @param data a dataframe
#' @param ... columns to select, using `dplyr::select()` semantics.
#' @param type type of correlation, either `"pearson"` (the default) or
#' `"spearman"`.
#' @return a long dataframe (a tibble) with correlations calculated for each
#' pair of columns.
#' @export
#' @examples
#' tidy_correlation(ChickWeight, -Chick, -Diet)
#'
#' tidy_correlation(ChickWeight, weight, Time)
#'
#' ChickWeight %>%
#' dplyr::group_by(Diet) %>%
#' tidy_correlation(weight, Time)
tidy_correlation <- function(data, ..., type = c("pearson", "spearman")) {
UseMethod("tidy_correlation")
}
#' @export
tidy_correlation.grouped_df <- function(
data,
...,
type = c("pearson", "spearman")
) {
# We need two sets of columns selected and returned.
#
# 1. user's variable selection: ...
# 2. the grouping columns
#
# Before reframe(), we select() these columns.
#
# Inside of reframe(), pick(everything()) selects all the non-grouping
# columns, and reframe() returns the grouping columns for us.
data |>
select(..., group_cols()) |>
reframe(
tidy_correlation.default(pick(everything()), everything(), type = type)
)
}
#' @export
tidy_correlation.default <- function(
data,
...,
type = c("pearson", "spearman")
) {
select(data, ...) |>
as.matrix() |>
Hmisc::rcorr(type = type) |>
broom::tidy() |>
tibble::remove_rownames() |>
tibble::as_tibble() |>
mutate(
across(
.cols = where(is.numeric) & !one_of("column1", "column2"),
.fns = function(x) round(x, 4)
)
)
}