Skip to content

Commit

Permalink
Fix for possible column name duplication when generating summary rows (
Browse files Browse the repository at this point in the history
…#792)

* Use improved summary generation scheme

* Update man_extract_summary_1.png

* Update extract_summary.Rd

* Update test-summary_rows.R

* Add several testthat snapshot tests

* Refactor `dt_summary_build()`

* Refactor `dt_summary_build()`

* Refactor `summary_row_tags()`

* Refactor functions that generate summary rows

* Replace col index with colname

* Modify `filter()` stmts

* Replace use of `across()`

* Rename columns in extracted summary

* Make corrections to `extract_summary()`
  • Loading branch information
rich-iannone committed Jun 11, 2021
1 parent 3eb07d5 commit bf1fba0
Show file tree
Hide file tree
Showing 11 changed files with 3,550 additions and 88 deletions.
111 changes: 60 additions & 51 deletions R/dt_summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ dt_summary_df_data_get <- function(data) {

dt <- dt_summary_df_get(data)

dt["summary_df_data_list"] %>% as.list()
as.list(dt["summary_df_data_list"])
}

dt_summary_df_display_get <- function(data) {
Expand All @@ -27,7 +27,7 @@ dt_summary_df_display_get <- function(data) {

dt <- dt_summary_df_get(data)

dt["summary_df_display_list"] %>% as.list()
as.list(dt["summary_df_display_list"])
}

dt_summary_set <- function(data, summary) {
Expand All @@ -42,17 +42,14 @@ dt_summary_data_set <- function(data, summary) {

dt_summary_init <- function(data) {

list() %>%
dt_summary_set(summary = ., data = data)
dt_summary_set(summary = list(), data = data)
}

dt_summary_add <- function(data, summary) {

data %>%
dt_summary_get() %>%
append(
list(summary)
) %>%
append(list(summary)) %>%
dt_summary_set(summary = ., data = data)
}

Expand All @@ -65,7 +62,6 @@ dt_summary_build <- function(data,
context) {

# TODO: is `dt_body_get()` necessary here? `dt_boxh_vars_default()` could be used

summary_list <- dt_summary_get(data = data)
body <- dt_body_get(data = data)
data_tbl <- dt_data_get(data = data)
Expand Down Expand Up @@ -99,11 +95,13 @@ dt_summary_build <- function(data,

if (length(labels) != length(unique(labels))) {

stop("All summary labels must be unique:\n",
" * Review the names provided in `fns`\n",
" * These labels are in conflict: ",
paste0(labels, collapse = ", "), ".",
call. = FALSE)
stop(
"All summary labels must be unique:\n",
" * Review the names provided in `fns`\n",
" * These labels are in conflict: ",
paste0(labels, collapse = ", "), ".",
call. = FALSE
)
}

# Resolve the `missing_text`
Expand All @@ -113,10 +111,13 @@ dt_summary_build <- function(data,
assert_rowgroups <- function() {

if (all(is.na(stub_df$group_id))) {
stop("There are no row groups in the gt object:\n",
" * Use `groups = NULL` to create a grand summary\n",
" * Define row groups using `gt()` or `tab_row_group()`",
call. = FALSE)

stop(
"There are no row groups in the gt object:\n",
" * Use `groups = NULL` to create a grand summary\n",
" * Define row groups using `gt()` or `tab_row_group()`",
call. = FALSE
)
}
}

Expand Down Expand Up @@ -162,7 +163,7 @@ dt_summary_build <- function(data,
base::setdiff(
base::setdiff(
colnames(body),
c("groupname", "rowname")
c("groupname", rowname_col_private)
),
columns
)
Expand All @@ -172,38 +173,40 @@ dt_summary_build <- function(data,
if (identical(groups, grand_summary_col)) {

select_data_tbl <-
dplyr::select(data_tbl, !!columns) %>%
dplyr::mutate(group_id = !!grand_summary_col) %>%
dplyr::select(group_id, !!columns) %>%
as.data.frame(stringsAsFactors = FALSE)
dplyr::select(data_tbl, .env$columns) %>%
dplyr::mutate(!!group_id_col_private := .env$grand_summary_col) %>%
dplyr::select(.env$group_id_col_private, .env$columns)

} else {

select_data_tbl <-
as.data.frame(
dplyr::bind_cols(
dplyr::select(stub_df, group_id),
data_tbl[stub_df$rownum_i, columns]
),
stringsAsFactors = FALSE
dplyr::bind_cols(
dplyr::select(stub_df, !!group_id_col_private := .data$group_id),
data_tbl[stub_df$rownum_i, columns]
)
}

# Get the registered function calls
agg_funs <- lapply(fns, rlang::as_closure)

summary_dfs_data <-
dplyr::bind_rows(
lapply(
seq_along(agg_funs),
FUN = function(j) {

group_label <- labels[j]

select_data_tbl %>%
dplyr::filter(group_id %in% !!groups) %>%
dplyr::group_by(group_id) %>%
dplyr::filter(.data[[group_id_col_private]] %in% .env$groups) %>%
dplyr::group_by(.data[[group_id_col_private]]) %>%
dplyr::summarize_all(.funs = agg_funs[[j]]) %>%
dplyr::ungroup() %>%
dplyr::mutate(rowname = !!labels[j]) %>%
dplyr::select(group_id, rowname, dplyr::everything())
dplyr::mutate(!!rowname_col_private := .env$group_label) %>%
dplyr::select(
.env$group_id_col_private, .env$rowname_col_private,
dplyr::everything()
)
}
)
)
Expand All @@ -213,7 +216,10 @@ dt_summary_build <- function(data,
summary_dfs_data[, columns_excl] <- NA_real_

summary_dfs_data <-
dplyr::select(summary_dfs_data, group_id, rowname, colnames(body))
dplyr::select(
summary_dfs_data, .env$group_id_col_private, .env$rowname_col_private,
colnames(body)
)

# Format the displayed summary lines
summary_dfs_display <-
Expand Down Expand Up @@ -255,24 +261,23 @@ dt_summary_build <- function(data,

for (group in groups) {

# Place data frame in separate list component by `group`
group_sym <- rlang::enquo(group)

group_summary_data_df <-
summary_dfs_data %>%
dplyr::filter(group_id == !!group_sym)
dplyr::filter(summary_dfs_data, .data[[group_id_col_private]] == .env$group)

group_summary_display_df <-
summary_dfs_display %>%
dplyr::filter(group_id == !!group_sym)
dplyr::filter(summary_dfs_display, .data[[group_id_col_private]] == .env$group)

summary_df_data_list <-
c(summary_df_data_list,
stats::setNames(list(group_summary_data_df), group))
c(
summary_df_data_list,
stats::setNames(list(group_summary_data_df), group)
)

summary_df_display_list <-
c(summary_df_display_list,
stats::setNames(list(group_summary_display_df), group))
c(
summary_df_display_list,
stats::setNames(list(group_summary_display_df), group)
)
}
}

Expand All @@ -286,23 +291,23 @@ dt_summary_build <- function(data,

for (i in seq(summary_df_display_list)) {

arrangement <- unique(summary_df_display_list[[i]]$rowname)
arrangement <-
unique(summary_df_display_list[[i]][, rowname_col_private, drop = TRUE])

summary_df_display_list[[i]] <-
summary_df_display_list[[i]] %>%
dplyr::select(-group_id) %>%
dplyr::group_by(rowname) %>%
dplyr::select(-.env$group_id_col_private) %>%
dplyr::group_by(.data[[rowname_col_private]]) %>%
dplyr::summarize_all(last_non_na)

summary_df_display_list[[i]] <-
summary_df_display_list[[i]][
match(arrangement, summary_df_display_list[[i]]$rowname), ] %>%
match(arrangement, summary_df_display_list[[i]][[rowname_col_private]]), ] %>%
replace(is.na(.), missing_text)
}

# Return a list of lists, each of which have
# summary data frames for display and for data
# collection purposes
# Return a list of lists, each of which have summary data frames for
# display and for data collection purposes
list_of_summaries <-
list(
summary_df_data_list = summary_df_data_list,
Expand All @@ -311,3 +316,7 @@ dt_summary_build <- function(data,

dt_summary_data_set(data = data, summary = list_of_summaries)
}

grand_summary_col <- "::GRAND_SUMMARY"
rowname_col_private <- "::rowname::"
group_id_col_private <- "::group_id::"
18 changes: 15 additions & 3 deletions R/export.R
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ as_rtf <- function(data,
#' summary_extracted %>%
#' unlist(recursive = FALSE) %>%
#' dplyr::bind_rows() %>%
#' gt()
#' gt(groupname_col = "group_id")
#'
#' @section Figures:
#' \if{html}{\figure{man_extract_summary_1.png}{options: width=100\%}}
Expand All @@ -607,7 +607,7 @@ extract_summary <- function(data) {

stop(
"There is no summary list to extract.\n",
"* Use the `summary_rows()` function to generate summaries.",
"Use the `summary_rows()`/`grand_summary_rows()` functions to generate summaries.",
call. = FALSE
)
}
Expand All @@ -618,5 +618,17 @@ extract_summary <- function(data) {

# Extract the list of summary data frames
# that contains tidy, unformatted data
as.list(dt_summary_df_data_get(data = built_data))
summary_tbl <-
dt_summary_df_data_get(data = built_data) %>%
lapply(FUN = function(x) {
lapply(x, function(y) {
dplyr::rename(
y,
rowname = .env$rowname_col_private,
group_id = .env$group_id_col_private
)
})
})

as.list(summary_tbl)
}
15 changes: 7 additions & 8 deletions R/summary_rows.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,24 +126,23 @@ summary_rows <- function(data,
data <-
dt_boxhead_add_var(
data = data,
var = "rowname",
var = rowname_col_private,
type = "stub",
column_label = list("rowname"),
column_label = list(rowname_col_private),
column_align = "left",
column_width = list(NULL),
hidden_px = list(NULL),
add_where = "bottom"
)

nrow_data <- nrow(data$`_data`)

# Add the `"rowname"` column into `_data`
# Add the `"::rowname::"` column into `_data`
data$`_data` <-
data$`_data` %>%
dplyr::mutate(rowname = rep("", .env$nrow_data)) %>%
dplyr::select(dplyr::everything(), .data$rowname)
dplyr::mutate(!!rowname_col_private := rep("", nrow(data$`_data`))) %>%
dplyr::select(dplyr::everything(), .env$rowname_col_private)


# Place the `rowname` values into `stub_df$rowname`; these are
# Place the `::rowname::` values into `stub_df$rowname`; these are
# empty strings which will provide an empty stub for locations
# adjacent to the body rows
stub_df[["rowname"]] <- ""
Expand Down
3 changes: 0 additions & 3 deletions R/utils_render_common.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@

grand_summary_col <- "::GRAND_SUMMARY"

# Define the contexts
all_contexts <- c("html", "latex", "rtf", "default")

Expand Down
6 changes: 3 additions & 3 deletions R/utils_render_html.R
Original file line number Diff line number Diff line change
Expand Up @@ -1102,7 +1102,7 @@ summary_row_tags <- function(list_of_summaries,
# select the column named `rowname` and all of the visible columns
summary_df <-
list_of_summaries$summary_df_display_list[[group_id]] %>%
dplyr::select(.data$rowname, .env$default_vars)
dplyr::select(.env$rowname_col_private, .env$default_vars)

n_cols <- ncol(summary_df)

Expand All @@ -1127,8 +1127,8 @@ summary_row_tags <- function(list_of_summaries,

if (group_id == grand_summary_col) {

# In this case, `grand_summary_col` is a global variable
# (`"::GRAND_SUMMARY"`) assigned in `utils_render_common.R`)
# In the above condition, `grand_summary_col` is a global variable
# (`"::GRAND_SUMMARY"`) assigned in `dt_summary.R`)

styles_resolved_row <-
styles_resolved_group[styles_resolved_group$rownum == j, , drop = FALSE]
Expand Down
18 changes: 9 additions & 9 deletions R/utils_render_latex.R
Original file line number Diff line number Diff line change
Expand Up @@ -456,21 +456,21 @@ create_summary_rows_l <- function(list_of_summaries,
}

# Obtain the group ID for the group of rows that ends at row `x`;
group <-
group_id <-
groups_rows_df[groups_rows_df$row_end == x, "group_id", drop = TRUE]

# Check whether this group has a corresponding entry in
# `list_of_summaries$summary_df_display_list` (i.e., are there
# summary rows for this group?); if not, return an empty string
if (!(group %in% names(list_of_summaries$summary_df_display_list))) {
if (!(group_id %in% names(list_of_summaries$summary_df_display_list))) {
return("")
}

# Obtain the summary data table specific to the group ID and
# select the column named `rowname` and all of the visible columns
# select the column named `::rowname::` and all of the visible columns
summary_df <-
list_of_summaries$summary_df_display_list[[group]] %>%
dplyr::select(.data$rowname, .env$default_vars)
list_of_summaries$summary_df_display_list[[group_id]] %>%
dplyr::select(.env$rowname_col_private, .env$default_vars)

row_splits_summary <- split_row_content(summary_df)

Expand All @@ -497,8 +497,8 @@ create_grand_summary_rows_l <- function(list_of_summaries,

if (
length(list_of_summaries) < 1 ||
is.null(list_of_summaries$summary_df_display_list$`::GRAND_SUMMARY`) ||
nrow(list_of_summaries$summary_df_display_list$`::GRAND_SUMMARY`) < 1
is.null(list_of_summaries$summary_df_display_list[[grand_summary_col]]) ||
nrow(list_of_summaries$summary_df_display_list[[grand_summary_col]]) < 1
) {
return("")
}
Expand All @@ -508,8 +508,8 @@ create_grand_summary_rows_l <- function(list_of_summaries,
default_vars <- boxh[boxh$type == "default", "var", drop = TRUE]

grand_summary_df <-
list_of_summaries$summary_df_display_list$`::GRAND_SUMMARY` %>%
dplyr::select(.data$rowname, .env$default_vars)
list_of_summaries$summary_df_display_list[[grand_summary_col]] %>%
dplyr::select(.env$rowname_col_private, .env$default_vars)

row_splits_summary <- split_row_content(grand_summary_df)

Expand Down
Loading

0 comments on commit bf1fba0

Please sign in to comment.