diff --git a/NAMESPACE b/NAMESPACE index 0fbe7ffb..956d1a9e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,7 @@ export(brm_plot_draws) export(brm_prior_archetype) export(brm_prior_label) export(brm_prior_simple) +export(brm_recenter_nuisance) export(brm_simulate) export(brm_simulate_categorical) export(brm_simulate_continuous) diff --git a/R/brm_recenter_nuisance.R b/R/brm_recenter_nuisance.R new file mode 100644 index 00000000..f223687f --- /dev/null +++ b/R/brm_recenter_nuisance.R @@ -0,0 +1,80 @@ +#' @title Recenter nuisance variables +#' @export +#' @family archetype utilities +#' @description Change the center of a nuisance variable of an +#' informative prior archetype. +#' @details By "centering vector y at scalar x", we mean taking +#' the difference `z = y - x`. If `x` is the mean, then `mean(z)` is +#' 0. Informative prior archetypes center nuisance variables +#' at their means so the parameters can be interpreted correctly +#' for setting informative priors. This is appropriate most of the time, +#' but sometimes it is better to center a column at a pre-specified +#' scientifically meaningful fixed number. If you want a nuisance column +#' to be centered at a fixed value other than its mean, +#' use [brm_recenter_nuisance()] to shift the center. This function +#' can handle any nuisance variable +#' @return An informative prior archetype data frame with one of the +#' variables re-centered. +#' @param data An informative prior archetype data frame output from +#' [brm_archetype_cells()] or similar. +#' @param nuisance Character of length 1, name of the nuisance column +#' in the data to shift the center. +#' @param center Numeric of length 1, value of the center to shift +#' the column in `nuisance`. The affected column in the returned +#' archetype data frame will look as if it were centered by this +#' value to begin with. +#' @examples +#' set.seed(0L) +#' data <- brm_simulate_outline( +#' n_group = 2, +#' n_patient = 100, +#' n_time = 4, +#' rate_dropout = 0, +#' rate_lapse = 0 +#' ) |> +#' dplyr::mutate(response = rnorm(n = dplyr::n())) |> +#' brm_data_change() |> +#' brm_simulate_continuous(names = c("biomarker1", "biomarker2")) |> +#' brm_simulate_categorical( +#' names = c("status1", "status2"), +#' levels = c("present", "absent") +#' ) +#' archetype <- brm_archetype_cells(data) +#' mean(archetype$nuisance_biomarker1) # after original centering +#' center <- mean(data$biomarker1) +#' center # original center, before the centering from brm_archetype_cells() +#' attr(archetype$nuisance_biomarker1, "brm_center") # original center +#' max(abs((data$biomarker1 - center) - archetype$nuisance_biomarker1)) +#' # Re-center nuisance_biomarker1 at 0.75. +#' archetype <- brm_recenter_nuisance( +#' data = archetype, +#' nuisance = "nuisance_biomarker1", +#' center = 0.75 +#' ) +#' attr(archetype$nuisance_biomarker1, "brm_center") # new center +#' mean(archetype$nuisance_biomarker1) # no longer equal to the center +#' # nuisance_biomarker1 is now as though we centered it at 0.75. +#' max(abs((data$biomarker1 - 0.75) - archetype$nuisance_biomarker1)) +brm_recenter_nuisance <- function(data, nuisance, center) { + brm_data_validate(data = data) + assert( + inherits(data, "brms_mmrm_archetype"), + message = "data must be a brms.mmrm informative prior archetype" + ) + assert_chr(nuisance, "nuisance must be a character string") + assert( + nuisance %in% attr(data, "brm_archetype_nuisance"), + message = c( + "nuisance must be the name of a nuisance column in the archetype data" + ) + ) + assert_num(center, "center must be a valid non-missing numeric scalar") + original <- attr(data[[nuisance]], "brm_center") + assert_num( + original, + paste("original center of", nuisance, "could not be found") + ) + data[[nuisance]] <- data[[nuisance]] + original - center + attr(data[[nuisance]], "brm_center") <- center + data +} diff --git a/R/utils_archetype.R b/R/utils_archetype.R index 775b5b12..4809e3cd 100644 --- a/R/utils_archetype.R +++ b/R/utils_archetype.R @@ -49,7 +49,7 @@ archetype_nuisance <- function( nuisance = out ) } - out + nuisance_center(out) } nuisance_covariates <- function(data) { @@ -62,16 +62,11 @@ nuisance_covariates <- function(data) { colnames(categorical) <- paste0(colnames(categorical), "_") out <- dplyr::bind_cols(out, model.matrix(~ 0 + ., categorical)) } - for (name in colnames(out)) { - out[[name]] <- out[[name]] - mean(out[[name]]) - } out } nuisance_baseline <- function(data) { - baseline <- attr(data, "brm_baseline") - data[[baseline]] <- data[[baseline]] - mean(data[[baseline]]) - data[, baseline, drop = FALSE] + data[, attr(data, "brm_baseline"), drop = FALSE] } nuisance_baseline_subgroup <- function(data) { @@ -98,7 +93,6 @@ nuisance_baseline_time <- function(data) { nuisance_baseline_terms <- function(data, baseline, formula) { matrix <- model.matrix(object = formula, data = data) - matrix <- sweep(matrix, MARGIN = 2L, STATS = colMeans(matrix), FUN = "-") tibble::as_tibble(as.data.frame(matrix)) } @@ -136,3 +130,12 @@ drop_zero_columns <- function(x) { sums <- colSums(abs(x)) x[, sums > .Machine$double.eps, drop = FALSE] } + +nuisance_center <- function(data) { + for (name in colnames(data)) { + mean <- mean(data[[name]]) + attr(data[[name]], "brm_center") <- mean + data[[name]] <- data[[name]] - mean + } + data +} diff --git a/man/brm_prior_archetype.Rd b/man/brm_prior_archetype.Rd index 340f217c..5df36248 100644 --- a/man/brm_prior_archetype.Rd +++ b/man/brm_prior_archetype.Rd @@ -61,18 +61,14 @@ data <- brm_simulate_outline( ) archetype <- brm_archetype_successive_cells(data) dplyr::distinct(data, group, time) -label <- brm_prior_label( - code = "normal(1, 1)", - group = "group_1", - time = "time_1" -) |> +prior <- NULL |> + brm_prior_label("normal(1, 1)", group = "group_1", time = "time_1") |> brm_prior_label("normal(1, 2)", group = "group_1", time = "time_2") |> brm_prior_label("normal(1, 3)", group = "group_1", time = "time_3") |> brm_prior_label("normal(2, 1)", group = "group_2", time = "time_1") |> brm_prior_label("normal(2, 2)", group = "group_2", time = "time_2") |> - brm_prior_label("normal(2, 3)", group = "group_2", time = "time_3") -label -prior <- brm_prior_archetype(label = label, archetype = archetype) + brm_prior_label("normal(2, 3)", group = "group_2", time = "time_3") |> + brm_prior_archetype(archetype = archetype) prior class(prior) } diff --git a/man/brm_prior_label.Rd b/man/brm_prior_label.Rd index ddde3552..7f711970 100644 --- a/man/brm_prior_label.Rd +++ b/man/brm_prior_label.Rd @@ -80,11 +80,8 @@ data <- brm_simulate_outline( ) archetype <- brm_archetype_successive_cells(data) dplyr::distinct(data, group, time) -label <- brm_prior_label( - code = "normal(1, 1)", - group = "group_1", - time = "time_1" -) |> +label <- NULL |> + brm_prior_label("normal(1, 1)", group = "group_1", time = "time_1") |> brm_prior_label("normal(1, 2)", group = "group_1", time = "time_2") |> brm_prior_label("normal(1, 3)", group = "group_1", time = "time_3") |> brm_prior_label("normal(2, 1)", group = "group_2", time = "time_1") |> diff --git a/man/brm_recenter_nuisance.Rd b/man/brm_recenter_nuisance.Rd new file mode 100644 index 00000000..c7f87429 --- /dev/null +++ b/man/brm_recenter_nuisance.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/brm_recenter_nuisance.R +\name{brm_recenter_nuisance} +\alias{brm_recenter_nuisance} +\title{Recenter nuisance variables} +\usage{ +brm_recenter_nuisance(data, nuisance, center) +} +\arguments{ +\item{data}{An informative prior archetype data frame output from +\code{\link[=brm_archetype_cells]{brm_archetype_cells()}} or similar.} + +\item{nuisance}{Character of length 1, name of the nuisance column +in the data to shift the center.} + +\item{center}{Numeric of length 1, value of the center to shift +the column in \code{nuisance}. The affected column in the returned +archetype data frame will look as if it were centered by this +value to begin with.} +} +\value{ +An informative prior archetype data frame with one of the +variables re-centered. +} +\description{ +Change the center of a nuisance variable of an +informative prior archetype. +} +\details{ +By "centering vector y at scalar x", we mean taking +the difference \code{z = y - x}. If \code{x} is the mean, then \code{mean(z)} is +0. Informative prior archetypes center nuisance variables +at their means so the parameters can be interpreted correctly +for setting informative priors. This is appropriate most of the time, +but sometimes it is better to center a column at a pre-specified +scientifically meaningful fixed number. If you want a nuisance column +to be centered at a fixed value other than its mean, +use \code{\link[=brm_recenter_nuisance]{brm_recenter_nuisance()}} to shift the center. This function +can handle any nuisance variable +} +\examples{ +set.seed(0L) +data <- brm_simulate_outline( + n_group = 2, + n_patient = 100, + n_time = 4, + rate_dropout = 0, + rate_lapse = 0 +) |> + dplyr::mutate(response = rnorm(n = dplyr::n())) |> + brm_data_change() |> + brm_simulate_continuous(names = c("biomarker1", "biomarker2")) |> + brm_simulate_categorical( + names = c("status1", "status2"), + levels = c("present", "absent") + ) +archetype <- brm_archetype_cells(data) +mean(archetype$nuisance_biomarker1) # after original centering +center <- mean(data$biomarker1) +center # original center, before the centering from brm_archetype_cells() +attr(archetype$nuisance_biomarker1, "brm_center") # original center +max(abs((data$biomarker1 - center) - archetype$nuisance_biomarker1)) +# Re-center nuisance_biomarker1 at 0.75. +archetype <- brm_recenter_nuisance( + data = archetype, + nuisance = "nuisance_biomarker1", + center = 0.75 +) +attr(archetype$nuisance_biomarker1, "brm_center") # new center +mean(archetype$nuisance_biomarker1) # no longer equal to the center +# nuisance_biomarker1 is now as though we centered it at 0.75. +max(abs((data$biomarker1 - 0.75) - archetype$nuisance_biomarker1)) +} +\concept{archetype utilities} diff --git a/tests/testthat/test-brm_recenter_nuisance.R b/tests/testthat/test-brm_recenter_nuisance.R new file mode 100644 index 00000000..21a8094b --- /dev/null +++ b/tests/testthat/test-brm_recenter_nuisance.R @@ -0,0 +1,37 @@ +test_that("multiplication works", { + set.seed(0L) + data <- brm_simulate_outline( + n_group = 2, + n_patient = 100, + n_time = 4, + rate_dropout = 0, + rate_lapse = 0 + ) |> + dplyr::mutate(response = rnorm(n = dplyr::n())) |> + brm_data_change() |> + brm_simulate_continuous(names = c("biomarker1", "biomarker2")) |> + brm_simulate_categorical( + names = c("status1", "status2"), + levels = c("present", "absent") + ) + archetype <- brm_archetype_cells(data) + expect_equal(mean(archetype$nuisance_biomarker1), 0) + expect_equal( + mean(data$biomarker1), + attr(archetype$nuisance_biomarker1, "brm_center") + ) + expect_equal( + max(abs((data$biomarker1 - center) - archetype$nuisance_biomarker1)), + 0 + ) + archetype <- brm_recenter_nuisance( + data = archetype, + nuisance = "nuisance_biomarker1", + center = 0.75 + ) + expect_equal(attr(archetype$nuisance_biomarker1, "brm_center"), 0.75) + expect_equal( + max(abs((data$biomarker1 - 0.75) - archetype$nuisance_biomarker1)), + 0 + ) +}) diff --git a/vignettes/archetypes.Rmd b/vignettes/archetypes.Rmd index 7cf0f529..b44b1afd 100644 --- a/vignettes/archetypes.Rmd +++ b/vignettes/archetypes.Rmd @@ -100,7 +100,7 @@ attr(archetype, "brm_archetype_interest") #> [5] "x_group_2_time_3" "x_group_2_time_4" ``` -and we have nuisance variables. Some nuisance variables are continuous covariates, while others are levels of one-hot-encoded concomitant factors or interactions of those concomitant factors with baseline and/or subgroup. All nuisance variables are centered at their means so the reference level of the model is at the "center" of the data and not implicitly conditional on a subset of the data. In addition, some nuisance variables are automatically dropped in order to ensure the model matrix is full-rank. This is critically important to preserve the interpretation of the columns of interest and make sure the informative priors behave as expected. +and we have nuisance variables. Some nuisance variables are continuous covariates, while others are levels of one-hot-encoded concomitant factors or interactions of those concomitant factors with baseline and/or subgroup. All nuisance variables are centered at their means so the reference level of the model is at the "center" of the data and not implicitly conditional on a subset of the data.^[`brm_recenter_nuisance()` can retroactively recenter a nuisance column to a fixed value other than its mean.] In addition, some nuisance variables are automatically dropped in order to ensure the model matrix is full-rank. This is critically important to preserve the interpretation of the columns of interest and make sure the informative priors behave as expected. ```r @@ -293,46 +293,46 @@ model <- brm_model( #> Compiling Stan program... #> Start sampling brms::prior_summary(model) -#> prior class coef group resp -#> (flat) b -#> student_t(4, 2.17, 4.86) b nuisance_baseline.timetime_2 -#> student_t(4, 3.22, 5.25) b nuisance_baseline.timetime_3 -#> student_t(4, 2.53, 5.75) b nuisance_baseline.timetime_4 -#> (flat) b nuisance_biomarker1 -#> (flat) b nuisance_biomarker2 -#> (flat) b nuisance_status1_absent -#> (flat) b nuisance_status2_present -#> student_t(4, 0.98, 2.37) b x_group_1_time_2 -#> student_t(4, 1.82, 3.32) b x_group_1_time_3 -#> student_t(4, 2.35, 4.41) b x_group_1_time_4 -#> student_t(4, 0.31, 2.22) b x_group_2_time_2 -#> student_t(4, 1.94, 2.85) b x_group_2_time_3 -#> student_t(4, 2.33, 3.41) b x_group_2_time_4 -#> (flat) b -#> (flat) b timetime_2 -#> (flat) b timetime_3 -#> (flat) b timetime_4 -#> lkj_corr_cholesky(1) Lcortime -#> dpar nlpar lb ub source -#> default -#> user -#> user -#> user -#> (vectorized) -#> (vectorized) -#> (vectorized) -#> (vectorized) -#> user -#> user -#> user -#> user -#> user -#> user -#> sigma default -#> sigma (vectorized) -#> sigma (vectorized) -#> sigma (vectorized) -#> default +#> prior class coef group resp dpar nlpar +#> (flat) b +#> student_t(4, 2.17, 4.86) b nuisance_baseline.timetime_2 +#> student_t(4, 3.22, 5.25) b nuisance_baseline.timetime_3 +#> student_t(4, 2.53, 5.75) b nuisance_baseline.timetime_4 +#> (flat) b nuisance_biomarker1 +#> (flat) b nuisance_biomarker2 +#> (flat) b nuisance_status1_absent +#> (flat) b nuisance_status2_present +#> student_t(4, 0.98, 2.37) b x_group_1_time_2 +#> student_t(4, 1.82, 3.32) b x_group_1_time_3 +#> student_t(4, 2.35, 4.41) b x_group_1_time_4 +#> student_t(4, 0.31, 2.22) b x_group_2_time_2 +#> student_t(4, 1.94, 2.85) b x_group_2_time_3 +#> student_t(4, 2.33, 3.41) b x_group_2_time_4 +#> (flat) b sigma +#> (flat) b timetime_2 sigma +#> (flat) b timetime_3 sigma +#> (flat) b timetime_4 sigma +#> lkj_corr_cholesky(1) Lcortime +#> lb ub source +#> default +#> user +#> user +#> user +#> (vectorized) +#> (vectorized) +#> (vectorized) +#> (vectorized) +#> user +#> user +#> user +#> user +#> user +#> user +#> default +#> (vectorized) +#> (vectorized) +#> (vectorized) +#> default ``` Marginal mean estimation, post-processing, and visualization automatically understand the archetype without any user intervention. diff --git a/vignettes/archetypes.Rmd.upstream b/vignettes/archetypes.Rmd.upstream index 6da23f10..09f98f97 100644 --- a/vignettes/archetypes.Rmd.upstream +++ b/vignettes/archetypes.Rmd.upstream @@ -70,7 +70,7 @@ Those new columns constitute a custom model matrix to describe the desired param attr(archetype, "brm_archetype_interest") ``` -and we have nuisance variables. Some nuisance variables are continuous covariates, while others are levels of one-hot-encoded concomitant factors or interactions of those concomitant factors with baseline and/or subgroup. All nuisance variables are centered at their means so the reference level of the model is at the "center" of the data and not implicitly conditional on a subset of the data. In addition, some nuisance variables are automatically dropped in order to ensure the model matrix is full-rank. This is critically important to preserve the interpretation of the columns of interest and make sure the informative priors behave as expected. +and we have nuisance variables. Some nuisance variables are continuous covariates, while others are levels of one-hot-encoded concomitant factors or interactions of those concomitant factors with baseline and/or subgroup. All nuisance variables are centered at their means so the reference level of the model is at the "center" of the data and not implicitly conditional on a subset of the data.^[`brm_recenter_nuisance()` can retroactively recenter a nuisance column to a fixed value other than its mean.] In addition, some nuisance variables are automatically dropped in order to ensure the model matrix is full-rank. This is critically important to preserve the interpretation of the columns of interest and make sure the informative priors behave as expected. ```{r} attr(archetype, "brm_archetype_nuisance") diff --git a/vignettes/archetypes_figures/archetype_compare_data-1.png b/vignettes/archetypes_figures/archetype_compare_data-1.png index 6f271201..c60a954b 100644 Binary files a/vignettes/archetypes_figures/archetype_compare_data-1.png and b/vignettes/archetypes_figures/archetype_compare_data-1.png differ