Fix for possible column name duplication when generating summary rows (…

…#792) * Use improved summary generation scheme * Update man_extract_summary_1.png * Update extract_summary.Rd * Update test-summary_rows.R * Add several testthat snapshot tests * Refactor `dt_summary_build()` * Refactor `dt_summary_build()` * Refactor `summary_row_tags()` * Refactor functions that generate summary rows * Replace col index with colname * Modify `filter()` stmts * Replace use of `across()` * Rename columns in extracted summary * Make corrections to `extract_summary()`
rstudio · Jun 11, 2021 · bf1fba0 · bf1fba0
1 parent 3eb07d5
commit bf1fba0
Show file tree

Hide file tree

Showing 11 changed files with 3,550 additions and 88 deletions.
diff --git a/R/dt_summary.R b/R/dt_summary.R
@@ -18,7 +18,7 @@ dt_summary_df_data_get <- function(data) {
 
   dt <- dt_summary_df_get(data)
 
-  dt["summary_df_data_list"] %>% as.list()
+  as.list(dt["summary_df_data_list"])
 }
 
 dt_summary_df_display_get <- function(data) {
@@ -27,7 +27,7 @@ dt_summary_df_display_get <- function(data) {
 
   dt <- dt_summary_df_get(data)
 
-  dt["summary_df_display_list"] %>% as.list()
+  as.list(dt["summary_df_display_list"])
 }
 
 dt_summary_set <- function(data, summary) {
@@ -42,17 +42,14 @@ dt_summary_data_set <- function(data, summary) {
 
 dt_summary_init <- function(data) {
 
-  list() %>%
-    dt_summary_set(summary = ., data = data)
+  dt_summary_set(summary = list(), data = data)
 }
 
 dt_summary_add <- function(data, summary) {
 
   data %>%
     dt_summary_get() %>%
-    append(
-      list(summary)
-    ) %>%
+    append(list(summary)) %>%
     dt_summary_set(summary = ., data = data)
 }
 
@@ -65,7 +62,6 @@ dt_summary_build <- function(data,
                              context) {
 
   # TODO: is `dt_body_get()` necessary here? `dt_boxh_vars_default()` could be used
-
   summary_list <- dt_summary_get(data = data)
   body <- dt_body_get(data = data)
   data_tbl <- dt_data_get(data = data)
@@ -99,11 +95,13 @@ dt_summary_build <- function(data,
 
     if (length(labels) != length(unique(labels))) {
 
-      stop("All summary labels must be unique:\n",
-           " * Review the names provided in `fns`\n",
-           " * These labels are in conflict: ",
-           paste0(labels, collapse = ", "), ".",
-           call. = FALSE)
+      stop(
+        "All summary labels must be unique:\n",
+        " * Review the names provided in `fns`\n",
+        " * These labels are in conflict: ",
+        paste0(labels, collapse = ", "), ".",
+        call. = FALSE
+      )
     }
 
     # Resolve the `missing_text`
@@ -113,10 +111,13 @@ dt_summary_build <- function(data,
     assert_rowgroups <- function() {
 
       if (all(is.na(stub_df$group_id))) {
-        stop("There are no row groups in the gt object:\n",
-             " * Use `groups = NULL` to create a grand summary\n",
-             " * Define row groups using `gt()` or `tab_row_group()`",
-             call. = FALSE)
+
+        stop(
+          "There are no row groups in the gt object:\n",
+          " * Use `groups = NULL` to create a grand summary\n",
+          " * Define row groups using `gt()` or `tab_row_group()`",
+          call. = FALSE
+        )
       }
     }
 
@@ -162,7 +163,7 @@ dt_summary_build <- function(data,
       base::setdiff(
         base::setdiff(
           colnames(body),
-          c("groupname", "rowname")
+          c("groupname", rowname_col_private)
         ),
         columns
       )
@@ -172,38 +173,40 @@ dt_summary_build <- function(data,
     if (identical(groups, grand_summary_col)) {
 
       select_data_tbl <-
-        dplyr::select(data_tbl, !!columns) %>%
-        dplyr::mutate(group_id = !!grand_summary_col) %>%
-        dplyr::select(group_id, !!columns) %>%
-        as.data.frame(stringsAsFactors = FALSE)
+        dplyr::select(data_tbl, .env$columns) %>%
+        dplyr::mutate(!!group_id_col_private := .env$grand_summary_col) %>%
+        dplyr::select(.env$group_id_col_private, .env$columns)
 
     } else {
 
       select_data_tbl <-
-        as.data.frame(
-          dplyr::bind_cols(
-            dplyr::select(stub_df, group_id),
-            data_tbl[stub_df$rownum_i, columns]
-          ),
-          stringsAsFactors = FALSE
+        dplyr::bind_cols(
+          dplyr::select(stub_df, !!group_id_col_private := .data$group_id),
+          data_tbl[stub_df$rownum_i, columns]
         )
     }
 
     # Get the registered function calls
     agg_funs <- lapply(fns, rlang::as_closure)
+
     summary_dfs_data <-
       dplyr::bind_rows(
         lapply(
           seq_along(agg_funs),
           FUN = function(j) {
 
+            group_label <- labels[j]
+
             select_data_tbl %>%
-              dplyr::filter(group_id %in% !!groups) %>%
-              dplyr::group_by(group_id) %>%
+              dplyr::filter(.data[[group_id_col_private]] %in% .env$groups) %>%
+              dplyr::group_by(.data[[group_id_col_private]]) %>%
               dplyr::summarize_all(.funs = agg_funs[[j]]) %>%
               dplyr::ungroup() %>%
-              dplyr::mutate(rowname = !!labels[j]) %>%
-              dplyr::select(group_id, rowname, dplyr::everything())
+              dplyr::mutate(!!rowname_col_private := .env$group_label) %>%
+              dplyr::select(
+                .env$group_id_col_private, .env$rowname_col_private,
+                dplyr::everything()
+              )
           }
         )
       )
@@ -213,7 +216,10 @@ dt_summary_build <- function(data,
     summary_dfs_data[, columns_excl] <- NA_real_
 
     summary_dfs_data <-
-      dplyr::select(summary_dfs_data, group_id, rowname, colnames(body))
+      dplyr::select(
+        summary_dfs_data, .env$group_id_col_private, .env$rowname_col_private,
+        colnames(body)
+      )
 
     # Format the displayed summary lines
     summary_dfs_display <-
@@ -255,24 +261,23 @@ dt_summary_build <- function(data,
 
     for (group in groups) {
 
-      # Place data frame in separate list component by `group`
-      group_sym <- rlang::enquo(group)
-
       group_summary_data_df <-
-        summary_dfs_data %>%
-        dplyr::filter(group_id == !!group_sym)
+        dplyr::filter(summary_dfs_data, .data[[group_id_col_private]] == .env$group)
 
       group_summary_display_df <-
-        summary_dfs_display %>%
-        dplyr::filter(group_id == !!group_sym)
+        dplyr::filter(summary_dfs_display, .data[[group_id_col_private]] == .env$group)
 
       summary_df_data_list <-
-        c(summary_df_data_list,
-          stats::setNames(list(group_summary_data_df), group))
+        c(
+          summary_df_data_list,
+          stats::setNames(list(group_summary_data_df), group)
+        )
 
       summary_df_display_list <-
-        c(summary_df_display_list,
-          stats::setNames(list(group_summary_display_df), group))
+        c(
+          summary_df_display_list,
+          stats::setNames(list(group_summary_display_df), group)
+        )
     }
   }
 
@@ -286,23 +291,23 @@ dt_summary_build <- function(data,
 
   for (i in seq(summary_df_display_list)) {
 
-    arrangement <- unique(summary_df_display_list[[i]]$rowname)
+    arrangement <-
+      unique(summary_df_display_list[[i]][, rowname_col_private, drop = TRUE])
 
     summary_df_display_list[[i]] <-
       summary_df_display_list[[i]] %>%
-      dplyr::select(-group_id) %>%
-      dplyr::group_by(rowname) %>%
+      dplyr::select(-.env$group_id_col_private) %>%
+      dplyr::group_by(.data[[rowname_col_private]]) %>%
       dplyr::summarize_all(last_non_na)
 
     summary_df_display_list[[i]] <-
       summary_df_display_list[[i]][
-        match(arrangement, summary_df_display_list[[i]]$rowname), ] %>%
+        match(arrangement, summary_df_display_list[[i]][[rowname_col_private]]), ] %>%
       replace(is.na(.), missing_text)
   }
 
-  # Return a list of lists, each of which have
-  # summary data frames for display and for data
-  # collection purposes
+  # Return a list of lists, each of which have summary data frames for
+  # display and for data collection purposes
   list_of_summaries <-
     list(
       summary_df_data_list = summary_df_data_list,
@@ -311,3 +316,7 @@ dt_summary_build <- function(data,
 
   dt_summary_data_set(data = data, summary = list_of_summaries)
 }
+
+grand_summary_col <- "::GRAND_SUMMARY"
+rowname_col_private <- "::rowname::"
+group_id_col_private <- "::group_id::"
diff --git a/R/export.R b/R/export.R
@@ -586,7 +586,7 @@ as_rtf <- function(data,
 #'   summary_extracted %>%
 #'   unlist(recursive = FALSE) %>%
 #'   dplyr::bind_rows() %>%
-#'   gt()
+#'   gt(groupname_col = "group_id")
 #'
 #' @section Figures:
 #' \if{html}{\figure{man_extract_summary_1.png}{options: width=100\%}}
@@ -607,7 +607,7 @@ extract_summary <- function(data) {
 
     stop(
       "There is no summary list to extract.\n",
-      "* Use the `summary_rows()` function to generate summaries.",
+      "Use the `summary_rows()`/`grand_summary_rows()` functions to generate summaries.",
       call. = FALSE
     )
   }
@@ -618,5 +618,17 @@ extract_summary <- function(data) {
 
   # Extract the list of summary data frames
   # that contains tidy, unformatted data
-  as.list(dt_summary_df_data_get(data = built_data))
+  summary_tbl <-
+    dt_summary_df_data_get(data = built_data) %>%
+    lapply(FUN = function(x) {
+      lapply(x, function(y) {
+        dplyr::rename(
+          y,
+          rowname = .env$rowname_col_private,
+          group_id = .env$group_id_col_private
+        )
+      })
+    })
+
+  as.list(summary_tbl)
 }
diff --git a/R/summary_rows.R b/R/summary_rows.R
@@ -126,24 +126,23 @@ summary_rows <- function(data,
     data <-
       dt_boxhead_add_var(
         data = data,
-        var = "rowname",
+        var = rowname_col_private,
         type = "stub",
-        column_label = list("rowname"),
+        column_label = list(rowname_col_private),
         column_align = "left",
         column_width = list(NULL),
         hidden_px = list(NULL),
         add_where = "bottom"
       )
 
-    nrow_data <- nrow(data$`_data`)
-
-    # Add the `"rowname"` column into `_data`
+    # Add the `"::rowname::"` column into `_data`
     data$`_data` <-
       data$`_data` %>%
-      dplyr::mutate(rowname = rep("", .env$nrow_data)) %>%
-      dplyr::select(dplyr::everything(), .data$rowname)
+      dplyr::mutate(!!rowname_col_private := rep("", nrow(data$`_data`))) %>%
+      dplyr::select(dplyr::everything(), .env$rowname_col_private)
+
 
-    # Place the `rowname` values into `stub_df$rowname`; these are
+    # Place the `::rowname::` values into `stub_df$rowname`; these are
     # empty strings which will provide an empty stub for locations
     # adjacent to the body rows
     stub_df[["rowname"]] <- ""

diff --git a/R/utils_render_common.R b/R/utils_render_common.R
@@ -1,6 +1,3 @@
-
-grand_summary_col <- "::GRAND_SUMMARY"
-
 # Define the contexts
 all_contexts <- c("html", "latex", "rtf", "default")
 

diff --git a/R/utils_render_html.R b/R/utils_render_html.R
@@ -1102,7 +1102,7 @@ summary_row_tags <- function(list_of_summaries,
     # select the column named `rowname` and all of the visible columns
     summary_df <-
       list_of_summaries$summary_df_display_list[[group_id]] %>%
-      dplyr::select(.data$rowname, .env$default_vars)
+      dplyr::select(.env$rowname_col_private, .env$default_vars)
 
     n_cols <- ncol(summary_df)
 
@@ -1127,8 +1127,8 @@ summary_row_tags <- function(list_of_summaries,
 
       if (group_id == grand_summary_col) {
 
-        # In this case, `grand_summary_col` is a global variable
-        # (`"::GRAND_SUMMARY"`) assigned in `utils_render_common.R`)
+        # In the above condition, `grand_summary_col` is a global variable
+        # (`"::GRAND_SUMMARY"`) assigned in `dt_summary.R`)
 
         styles_resolved_row <-
           styles_resolved_group[styles_resolved_group$rownum == j, , drop = FALSE]

diff --git a/R/utils_render_latex.R b/R/utils_render_latex.R
@@ -456,21 +456,21 @@ create_summary_rows_l <- function(list_of_summaries,
           }
 
           # Obtain the group ID for the group of rows that ends at row `x`;
-          group <-
+          group_id <-
             groups_rows_df[groups_rows_df$row_end == x, "group_id", drop = TRUE]
 
           # Check whether this group has a corresponding entry in
           # `list_of_summaries$summary_df_display_list` (i.e., are there
           # summary rows for this group?); if not, return an empty string
-          if (!(group %in% names(list_of_summaries$summary_df_display_list))) {
+          if (!(group_id %in% names(list_of_summaries$summary_df_display_list))) {
             return("")
           }
 
           # Obtain the summary data table specific to the group ID and
-          # select the column named `rowname` and all of the visible columns
+          # select the column named `::rowname::` and all of the visible columns
           summary_df <-
-            list_of_summaries$summary_df_display_list[[group]] %>%
-            dplyr::select(.data$rowname, .env$default_vars)
+            list_of_summaries$summary_df_display_list[[group_id]] %>%
+            dplyr::select(.env$rowname_col_private, .env$default_vars)
 
           row_splits_summary <- split_row_content(summary_df)
 
@@ -497,8 +497,8 @@ create_grand_summary_rows_l <- function(list_of_summaries,
 
   if (
     length(list_of_summaries) < 1 ||
-    is.null(list_of_summaries$summary_df_display_list$`::GRAND_SUMMARY`) ||
-    nrow(list_of_summaries$summary_df_display_list$`::GRAND_SUMMARY`) < 1
+    is.null(list_of_summaries$summary_df_display_list[[grand_summary_col]]) ||
+    nrow(list_of_summaries$summary_df_display_list[[grand_summary_col]]) < 1
   ) {
     return("")
   }
@@ -508,8 +508,8 @@ create_grand_summary_rows_l <- function(list_of_summaries,
   default_vars <- boxh[boxh$type == "default", "var", drop = TRUE]
 
   grand_summary_df <-
-    list_of_summaries$summary_df_display_list$`::GRAND_SUMMARY` %>%
-    dplyr::select(.data$rowname, .env$default_vars)
+    list_of_summaries$summary_df_display_list[[grand_summary_col]] %>%
+    dplyr::select(.env$rowname_col_private, .env$default_vars)
 
   row_splits_summary <- split_row_content(grand_summary_df)