Join GitHub today
GitHub is home to over 50 million developers working together to host and review code, manage projects, and build software together.
Sign up| #' Groupwise descriptive statistics | |
| #' | |
| #' Descriptive statistics of a continuous variable for the different levels of | |
| #' a categorical variable. \code{boxplot.group_summary()} creates boxplots of | |
| #' the continuous variable for the different levels of the categorical variable. | |
| #' | |
| #' @param data A \code{data.frame} or a \code{tibble}. | |
| #' @param gvar Column in \code{data}. | |
| #' @param cvar Column in \code{data}. | |
| #' @param x An object of the class \code{ds_group_summary}. | |
| #' @param print_plot logical; if \code{TRUE}, prints the plot else returns a plot object. | |
| #' @param ... Further arguments to be passed to or from methods. | |
| #' | |
| #' @return \code{ds_group_summary()} returns an object of class \code{"ds_group_summary"}. | |
| #' An object of class \code{"ds_group_summary"} is a list containing the | |
| #' following components: | |
| #' | |
| #' \item{stats}{A data frame containing descriptive statistics for the different | |
| #' levels of the factor variable.} | |
| #' \item{tidy_stats}{A tibble containing descriptive statistics for the different | |
| #' levels of the factor variable.} | |
| #' \item{plotdata}{Data for boxplot method.} | |
| #' | |
| #' @examples | |
| #' # ds_group summary | |
| #' ds_group_summary(mtcarz, cyl, mpg) | |
| #' | |
| #' # boxplot | |
| #' k <- ds_group_summary(mtcarz, cyl, mpg) | |
| #' plot(k) | |
| #' | |
| #' # tibble | |
| #' k$tidy_stats | |
| #' | |
| #' @seealso \code{\link{ds_summary_stats}} | |
| #' | |
| #' @export | |
| #' | |
| ds_group_summary <- function(data, gvar, cvar) UseMethod("ds_group_summary") | |
| #' @export | |
| #' | |
| ds_group_summary.default <- function(data, gvar, cvar) { | |
| check_df(data) | |
| gvar_name <- deparse(substitute(gvar)) | |
| cvar_name <- deparse(substitute(cvar)) | |
| g_var <- rlang::enquo(gvar) | |
| c_var <- rlang::enquo(cvar) | |
| check_numeric(data, !! c_var, cvar_name) | |
| check_factor(data, !! g_var, gvar_name) | |
| gvar <- dplyr::pull(data, !! g_var) | |
| cvar <- dplyr::pull(data, !! c_var) | |
| xname <- | |
| data %>% | |
| dplyr::select(!! g_var) %>% | |
| names() | |
| yname <- | |
| data %>% | |
| dplyr::select(!! c_var) %>% | |
| names() | |
| split_dat <- tapply(cvar, list(gvar), function(gvar) { | |
| c( | |
| length(gvar), min(gvar), max(gvar), mean(gvar), | |
| stats::median(gvar), ds_mode(gvar), stats::sd(gvar), stats::var(gvar), | |
| ds_skewness(gvar), ds_kurtosis(gvar), stat_uss(gvar), | |
| ds_css(gvar), ds_cvar(gvar), ds_std_error(gvar), | |
| ds_range(gvar), stats::IQR(gvar) | |
| ) | |
| }) | |
| splito <- sapply(split_dat, round, 2) | |
| rnames <- c( | |
| "Obs", "Minimum", "Maximum", "Mean", "Median", "Mode", | |
| "Std. Deviation", "Variance", "Skewness", "Kurtosis", | |
| "Uncorrected SS", "Corrected SS", "Coeff Variation", | |
| "Std. Error Mean", "Range", "Interquartile Range" | |
| ) | |
| out <- data.frame(rnames, splito) | |
| names(out) <- c("Statistic/Levels", levels(gvar)) | |
| plot_data <- data.frame(gvar, cvar) | |
| names(plot_data) <- c(xname, yname) | |
| tidystats <- | |
| data %>% | |
| dplyr::select(!! g_var, !! c_var) %>% | |
| na.omit() %>% | |
| dplyr::group_by(!! g_var) %>% | |
| dplyr::summarise(length = length(!! c_var), min = min(!! c_var), | |
| max = max(!! c_var), mean = mean(!! c_var), | |
| median= stats::median(!! c_var), mode = ds_mode(!! c_var), | |
| sd = stats::sd(!! c_var), variance = stats::var(!! c_var), | |
| skewness = ds_skewness(!! c_var), kurtosis = ds_kurtosis(!! c_var), | |
| coeff_var = ds_cvar(!! c_var), std_error = ds_std_error(!! c_var), | |
| range = ds_range(!! c_var), iqr = stats::IQR(!! c_var)) | |
| result <- list(stats = out, | |
| tidy_stats = tidystats, | |
| plotdata = plot_data, | |
| xvar = xname, | |
| yvar = yname, | |
| data = data | |
| ) | |
| class(result) <- "ds_group_summary" | |
| return(result) | |
| } | |
| #' @export | |
| print.ds_group_summary <- function(x, ...) { | |
| print_group(x) | |
| } | |
| #' @rdname ds_group_summary | |
| #' @export | |
| #' | |
| plot.ds_group_summary <- function(x, print_plot = TRUE, ...) { | |
| x_lab <- magrittr::use_series(x, xvar) | |
| y_lab <- magrittr::use_series(x, yvar) | |
| k <- | |
| x %>% | |
| magrittr::use_series(xvar) %>% | |
| rlang::sym() | |
| j <- | |
| x %>% | |
| magrittr::use_series(yvar) %>% | |
| rlang::sym() | |
| p <- | |
| x %>% | |
| magrittr::use_series(data) %>% | |
| dplyr::select(x = !! k, y = !! j) %>% | |
| ggplot2::ggplot() + | |
| ggplot2::geom_boxplot(ggplot2::aes(x = x, y = y), fill = "blue") + | |
| ggplot2::xlab(x_lab) + ggplot2::ylab(y_lab) + | |
| ggplot2::ggtitle(paste(y_lab, "by", x_lab)) | |
| if (print_plot) { | |
| print(p) | |
| } else { | |
| return(p) | |
| } | |
| } |