openpharma · wlandau · Jun 8, 2023 · Jun 8, 2023 · Jun 8, 2023 · Jun 8, 2023
diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml
@@ -19,6 +19,7 @@ jobs:
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
       R_KEEP_PKG_SOURCE: yes
+      NOT_CRAN: true
 
     steps:
       - uses: actions/checkout@v3

diff --git a/.github/workflows/cover.yaml b/.github/workflows/cover.yaml
@@ -9,6 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      NOT_CRAN: true
 
     steps:
       - uses: actions/checkout@v3

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -9,7 +9,7 @@ Description: The mixed model for repeated measures (MMRM) is a
   and align with best practices for the life sciences.
   References: Bürkner (2017) <10.18637/jss.v080.i01>,
   Mallinckrodt (2008) <doi:10.1177/009286150804200402>.
-Version: 0.0.0.9000
+Version: 0.0.0.9001
 License: MIT + file LICENSE
 URL: https://github.com/RConsortium/brms.mmrm
 BugReports: https://github.com/RConsortium/brms.mmrm/issues
@@ -67,7 +67,8 @@ Imports:
   tidyr,
   tidyselect,
   trialr,
-  utils
+  utils,
+  zoo
 Suggests:
   BH,
   knitr (>= 1.30),

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(brm_data)
 export(brm_formula)
 export(brm_marginal_data)
 export(brm_marginal_draws)
@@ -53,3 +54,4 @@ importFrom(tidyselect,everything)
 importFrom(trialr,rlkjcorr)
 importFrom(utils,capture.output)
 importFrom(utils,globalVariables)
+importFrom(zoo,na.locf)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# brms.mmrm 0.0.0.9001
+
+* Encapsulate data with roles as attributes (#31).
+
 # brms.mmrm 0.0.0.9000
 
 * First version.
diff --git a/R/brm_data.R b/R/brm_data.R
@@ -0,0 +1,204 @@
+#' @title Create an MMRM dataset.
+#' @export
+#' @family data
+#' @description Create a dataset to analyze with an MMRM.
+#' @return A classed tibble with attributes which denote features of
+#'   the data such as the treatment group and discrete time variables.
+#' @param data Data frame or tibble with longitudinal data.
+#' @param outcome Character of length 1, name of the outcome variable.
+#' @param role Character of length 1. Either `"response"` if `outcome`
+#'   is the raw response variable (e.g. AVAL) or `"change"` if `outcome`
+#'   is change from baseline (e.g. CHG).
+#' @param group Character of length 1, name of the treatment group variable.
+#' @param base Character of length 1, name of the baseline response variable.
+#'   Supply `NULL` to ignore or omit.
+#' @param time Character of length 1, name of the discrete time variable.
+#' @param patient Character of length 1, name of the patient ID variable.
+#' @param covariates Character vector of names of other covariates.
+#' @examples
+#' set.seed(0)
+#' sim <- brm_simulate()
+#' data <- tibble::as_tibble(sim$data)
+#' colnames(data) <- paste0("col_", colnames(data))
+#' data
+#' brm_data(
+#'   data = data,
+#'   outcome = "col_response",
+#'   role = "response",
+#'   group = "col_group",
+#'   time = "col_time",
+#'   patient = "col_patient"
+#' )
+brm_data <- function(
+  data,
+  outcome = "CHG",
+  role = "change",
+  base = NULL,
+  group = "TRT01P",
+  time = "AVISIT",
+  patient = "USUBJID",
+  covariates = character(0)
+) {
+  assert(is.data.frame(data), message = "data arg must be a data frame.")
+  out <- brm_data_new(
+    data = data,
+    outcome = as.character(outcome),
+    role = as.character(role),
+    base = base,
+    group = as.character(group),
+    time = as.character(time),
+    patient = as.character(patient),
+    covariates = as.character(covariates)
+  )
+  brm_data_validate(data = out)
+  out <- brm_data_fill(out)
+  out <- brm_data_select(data = out)
+  out
+}
+
+brm_data_new <- function(
+  data,
+  outcome,
+  role,
+  base,
+  group,
+  time,
+  patient,
+  covariates
+) {
+  out <- tibble::new_tibble(x = data, class = "brm_data")
+  structure(
+    out,
+    outcome = outcome,
+    role = role,
+    base = base,
+    group = group,
+    time = time,
+    patient = patient,
+    covariates = covariates
+  )
+}
+
+brm_data_validate <- function(data) {
+  outcome <- attr(data, "outcome")
+  role <- attr(data, "role")
+  base <- attr(data, "base")
+  group <- attr(data, "group")
+  time <- attr(data, "time")
+  patient <- attr(data, "patient")
+  covariates <- attr(data, "covariates")
+  assert(is.data.frame(data), message = "data must be a data frame")
+  assert(inherits(data, "brm_data"), message = "data not from brm_data()")
+  assert_chr(outcome, "outcome of data must be a nonempty character string")
+  assert_chr(role, "role of data must be a nonempty character string")
+  assert_chr(base %|||% "base", "base of data must NULL or character")
+  assert_chr(group, "group of data must be a nonempty character string")
+  assert_chr(time, "time of data must be a nonempty character string")
+  assert_chr(patient, "patient of data must be a nonempty character string")
+  assert_chr_vec(covariates, "covariates of data must be a character vector")
+  assert_col(outcome, data)
+  assert(
+    role %in% c("response", "change"),
+    message = "role must be either \"response\" or \"change\""
+  )
+  assert_col(base, data)
+  assert_col(group, data)
+  assert_col(time, data)
+  assert_col(patient, data)
+  assert_col(covariates, data)
+  assert(
+    !any(grepl(",", as.character(data[[group]]))),
+    message = "group variable cannot contain commas"
+  )
+  assert(
+    !any(grepl(",", as.character(data[[time]]))),
+    message = "time variable cannot contain commas"
+  )
+  assert(
+    is.numeric(data[[outcome]]),
+    message = "outcome variable in the data must be numeric."
+  )
+  for (column in c(base, group, time, patient, covariates)) {
+    assert(
+      !anyNA(data[[column]]),
+      message = sprintf(
+        "no missing values allowed in column \"%s\"",
+        column
+      )
+    )
+  }
+  for (column in c(group, time)) {
+    assert(
+      !is.numeric(data[[column]]),
+      message = sprintf(
+        paste(
+          "%s column in the data must not be numeric.",
+          "Should be character or factor."
+        ),
+        column
+      )
+    )
+  }
+}
+
+brm_data_select <- function(data) {
+  columns <- c(
+    attr(data, "outcome"),
+    attr(data, "base"),
+    attr(data, "group"),
+    attr(data, "time"),
+    attr(data, "patient"),
+    attr(data, "covariates")
+  )
+  columns <- as.character(columns)
+  data[, columns, drop = FALSE]
+}
+
+brm_data_fill <- function(data) {
+  outcome <- attr(data, "outcome")
+  role <- attr(data, "role")
+  base <- attr(data, "base")
+  group <- attr(data, "group")
+  time <- attr(data, "time")
+  patient <- attr(data, "patient")
+  covariates <- attr(data, "covariates")
+  args <- list(data = data, as.symbol(patient), as.symbol(time))
+  data <- do.call(what = tidyr::complete, args = args)
+  args <- list(.data = data, as.symbol(patient), as.symbol(time))
+  data <- do.call(what = dplyr::arrange, args = args)
+  for (column in c(base, group, covariates)) {
+    data[[column]] <- brm_data_fill_column(data[[column]], data[[patient]])
+  }
+  args <- list(
+    .data = data,
+    as.symbol(group),
+    as.symbol(patient),
+    as.symbol(time)
+  )
+  data <- do.call(what = dplyr::arrange, args = args)
+  brm_data_new(
+    data = data,
+    outcome = outcome,
+    role = role,
+    base = base,
+    group = group,
+    time = time,
+    patient = patient,
+    covariates = covariates
+  )
+}
+
+brm_data_fill_column <- function(x, index) {
+  out <- tapply(
+    X = x,
+    INDEX = index,
+    FUN = brm_data_locf
+  )
+  unlist(out, use.names = FALSE)
+}
+
+brm_data_locf <- function(x) {
+  x <- zoo::na.locf(x, fromLast = FALSE, na.rm = FALSE)
+  x <- zoo::na.locf(x, fromLast = TRUE, na.rm = FALSE)
+  x
+}
diff --git a/R/brm_formula.R b/R/brm_formula.R
@@ -5,18 +5,7 @@
 #' @return An object of class `"brmsformula"` returned from
 #'   `brms::brmsformula()`. It contains the fixed effect parameterization,
 #'   correlation structure, and residual variance structure.
-#' @param response Character of length 1, name of the response variable
-#'   in the data.
-#' @param group Character of length 1, name of the treatment group
-#'   variable in the data.
-#' @param base Character of length 1, name of the baseline response variable
-#'   in the data.
-#' @param time Character of length 1, name of the discrete time variable
-#'   in the data.
-#' @param patient Character of length 1, name of the patient ID variable
-#'   in the data.
-#' @param covariates Character vector of names of other covariates
-#'   in the data.
+#' @param data A classed data frame from [brm_data()].
 #' @param correlation Character of length 1, name of the correlation
 #'   structure. Only `"unstructured"` is currently supported.
 #' @param intercept `TRUE` to include an intercept, `FALSE` to omit.
@@ -31,20 +20,25 @@
 #' @param interaction_group `TRUE` to include treatment-group-by-time
 #'   interaction, `FALSE` to omit.
 #' @examples
-#' brm_formula()
-#' brm_formula(intercept = FALSE, effect_base = FALSE)
+#' set.seed(0)
+#' data <- brm_data(
+#'   data = tibble::as_tibble(brm_simulate()$data),
+#'   outcome = "response",
+#'   role = "response",
+#'   group = "group",
+#'   time = "time",
+#'   patient = "patient"
+#' )
+#' brm_formula(data)
+#' brm_formula(data = data, intercept = FALSE, effect_base = FALSE)
 #' brm_formula(
+#'   data = data,
 #'   intercept = FALSE,
 #'   effect_base = FALSE,
 #'   interaction_group = FALSE
 #' )
 brm_formula <- function(
-  response = "CHG",
-  base = "BASE",
-  group = "TRT01P",
-  time = "AVISIT",
-  patient = "USUBJID",
-  covariates = character(0),
+  data,
   intercept = TRUE,
   effect_base = TRUE,
   effect_group = TRUE,
@@ -53,12 +47,7 @@ brm_formula <- function(
   interaction_group = TRUE,
   correlation = "unstructured"
 ) {
-  assert_chr(response, "response arg must be a nonempty character string")
-  assert_chr(base, "base arg must be a nonempty character string")
-  assert_chr(group, "group arg must be a nonempty character string")
-  assert_chr(time, "time arg must be a nonempty character string")
-  assert_chr(patient, "patient arg must be a nonempty character string")
-  assert_chr_vec(covariates, "covariates arg must be a character vector")
+  brm_data_validate(data)
   assert_lgl(intercept)
   assert_lgl(effect_group)
   assert_lgl(effect_time)
@@ -77,18 +66,25 @@ brm_formula <- function(
       paste(correlations, collapse = ", ")
     )
   )
+  outcome <- attr(data, "outcome")
+  role <- attr(data, "role")
+  base <- attr(data, "base")
+  group <- attr(data, "group")
+  time <- attr(data, "time")
+  patient <- attr(data, "patient")
+  covariates <- attr(data, "covariates")
   terms <- c(
     term("0", !intercept),
     term(time, effect_time),
-    term(base, effect_base),
-    term(paste0(base, ":", time), interaction_base),
+    term(base, effect_base && !is.null(base)),
+    term(paste0(base, ":", time), interaction_base && !is.null(base)),
     term(group, effect_group),
     term(paste0(group, ":", time), interaction_group),
     covariates,
     term_correlation(correlation, time, patient)
   )
   right <- paste(terms, collapse = " + ")
-  formula <- stats::as.formula(paste(response, "~", right))
+  formula <- stats::as.formula(paste(outcome, "~", right))
   formula_sigma <- stats::as.formula(paste("sigma ~ 0 +", time))
   brms::brmsformula(formula = formula, formula_sigma)
 }