Skip to content

Commit

Permalink
Merge pull request #13 from lwjohnst86/retain_factor_order
Browse files Browse the repository at this point in the history
Factor ordering as set in `add_rows` was not as shown in table
  • Loading branch information
lwjohnst86 committed Jun 1, 2017
2 parents 28baca0 + 6038bd1 commit 1c514ae
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 26 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Bug fixes

* Fixed issue where factor ordering changes (#10; #11)
* Fixed issue where factor ordering changes (#11; #13)

# carpenter 0.2.1

Expand Down
49 changes: 32 additions & 17 deletions R/build_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,34 +101,49 @@ make_numeric_row <-

make_factor_row <-
function(data, header, rows, stat, digits) {
data <- tidyr::gather_(data, 'Variables', 'Values', rows) %>%
dplyr::mutate_(Values = "factor(Values, levels = unique(Values),
labels = unique(Values))") %>%
dplyr::group_by_(header, 'Variables', 'Values') %>%
factor_levels <- unlist(lapply(data[rows], levels))
variable_names_pattern <- paste0("(", paste(rows, collapse = "|"), ").*")
variable_names <- gsub(variable_names_pattern, "\\1", names(factor_levels))

factor_levels_df <-
dplyr::data_frame(Variables = variable_names,
Values = factor_levels) %>%
dplyr::group_by_("Variables") %>%
dplyr::mutate(ValOrder = 1:n())

variable_names_order <- dplyr::data_frame(Variables = rows,
VarOrder = 1:length(rows))

factor_summary <- data %>%
dplyr::mutate_at(rows, as.numeric) %>%
tidyr::gather_('Variables', 'ValOrder', rows) %>%
dplyr::group_by_(header, 'Variables', 'ValOrder') %>%
dplyr::tally() %>%
stats::na.omit()
stats::na.omit() %>%
dplyr::ungroup() %>%
dplyr::full_join(factor_levels_df, by = c('Variables', "ValOrder"))

data <- dplyr::group_by_(data, header, 'Variables') %>%
factor_summary <- factor_summary %>%
dplyr::group_by_(header, 'Variables') %>%
dplyr::mutate_(n = lazyeval::interp('stat(n, digits)',
stat = stat, digits = digits)) %>%
tidyr::spread_(header, 'n') %>%
dplyr::ungroup() %>%
dplyr::mutate_(id = lazyeval::interp('1:n()'))
dplyr::full_join(variable_names_order, by = "Variables") %>%
dplyr::arrange_("VarOrder", "ValOrder")

data <- dplyr::full_join(
data,
data %>%
dplyr::group_by_('Variables') %>%
dplyr::tally() %>%
dplyr::mutate_(id = 'cumsum(n) - n + 0.5') %>%
dplyr::select_('-n'),
by = c('Variables', 'id')
factor_pretable <- dplyr::full_join(
factor_summary,
dplyr::data_frame(VarOrder = 1:length(rows) - 0.5,
Variables = rows),
by = c('Variables', 'VarOrder')
)
dplyr::arrange_(data, 'id') %>%

dplyr::arrange_(factor_pretable, 'VarOrder', "ValOrder") %>%
dplyr::mutate_(
Values = "ifelse(is.na(Values), '', as.character(Values))",
Variables = "ifelse(Values != '', '- ', as.character(Variables))",
Variables = "paste0(Variables, Values)"
) %>%
dplyr::select_('-Values', '-id')
dplyr::select_('-Values', "-ValOrder", "-VarOrder")
}
43 changes: 35 additions & 8 deletions tests/testthat/test-build_tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,54 @@ context("Building the final table")

test_that("build_table outputs correct information", {
ds <- data.frame(
Groups = rep(c('yes', 'no'), each = 4),
Groups = factor(rep(c('yes', 'no'), each = 4)),
Var1 = c(1, 1, 1, 1, 2, 2, 2, 2),
Var2 = c(1, 1, 2, 2, 1, 1, 2, 2),
Var3 = rep(c('no', 'yes'), 2, each = 2)
Var3 = factor(rep(c('no', 'yes'), 2, each = 2)),
Var4 = factor(rep(c('no', 'yes', "huh", "def"), 1, each = 2),
levels = c("yes", "no", "def", "huh"))
)

draft <- outline_table(ds, 'Groups')
draft <- add_rows(draft, 'Var1', stat_meanSD)
draft <- add_rows(draft, 'Var2', stat_medianIQR)
draft <- add_rows(draft, 'Var3', stat_nPct)
draft <- add_rows(draft, 'Var4', stat_nPct)
draft <- renaming(draft, 'rows', function(x)
gsub('Var', 'V', x))
draft <- renaming(draft, 'header', c('Rows', 'Group1', 'Group2'))
output_table <- build_table(draft, finish = FALSE)

expected_table <- data.frame(
Rows = c('V1', 'V2', 'V3', '- no', '- yes'),
Group1 = c('2.0 (0.0)', '1.5 (1.0-2.0)', NA, '2 (50%)', '2 (50%)'),
Group2 = c('1.0 (0.0)', '1.5 (1.0-2.0)', NA, '2 (50%)', '2 (50%)'),
stringsAsFactors = FALSE
expected_table <- dplyr::data_frame(
Rows = c('V1', 'V2', 'V3', '- no', '- yes', "V4", "- yes", "- no", "- def", "- huh"),
Group1 = c('2.0 (0.0)', '1.5 (1.0-2.0)', NA, '2 (50%)', '2 (50%)', NA, NA, NA, "2 (50%)", "2 (50%)"),
Group2 = c('1.0 (0.0)', '1.5 (1.0-2.0)', NA, '2 (50%)', '2 (50%)', NA, "2 (50%)", "2 (50%)", NA, NA)
)

expect_identical(output_table, dplyr::tbl_df(expected_table))
expect_identical(output_table, expected_table)
})

test_that("build_table outputs in order as determined in data", {
ds <- dplyr::data_frame(
Group = as.factor(mtcars$vs),
Z = factor(
rep(c(0, 1), 16),
levels = c(1, 0),
labels = c("yes", "no")
),
A = factor(mtcars$gear, levels = c(5, 4, 3))
)

draft <- outline_table(ds, 'Group')
draft <- add_rows(draft, 'Z', stat_nPct)
draft <- add_rows(draft, 'A', stat_nPct)
output_table <- build_table(draft, finish = FALSE)

expected_table <- dplyr::data_frame(
Variables = c('Z', "- yes", "- no", "A", '- 5', '- 4', '- 3'),
`0` = c(NA, '7 (38.9%)', '11 (61.1%)', NA, "4 (22.2%)", "2 (11.1%)", "12 (66.7%)"),
`1` = c(NA, "9 (64.3%)", "5 (35.7%)", NA, "1 (7.1%)", "10 (71.4%)", "3 (21.4%)")
)

expect_identical(output_table, expected_table)
})

0 comments on commit 1c514ae

Please sign in to comment.