Skip to content

Commit

Permalink
Add new merge_clin() function
Browse files Browse the repository at this point in the history
- new function wrapper to allow users
  to merge in clinical variables to `soma_adat`
  objects easily
- closes SomaLogic#80
  • Loading branch information
stufield committed Mar 12, 2024
1 parent e433267 commit efad5e7
Show file tree
Hide file tree
Showing 6 changed files with 227 additions and 0 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ export(loadAdatsAsList)
export(locateSeqId)
export(matchSeqIds)
export(meltExpressionSet)
export(merge_clin)
export(mutate)
export(parseHeader)
export(pivotExpressionSet)
Expand Down Expand Up @@ -172,6 +173,7 @@ importFrom(tidyr,unite)
importFrom(tools,md5sum)
importFrom(utils,capture.output)
importFrom(utils,head)
importFrom(utils,read.csv)
importFrom(utils,read.delim)
importFrom(utils,tail)
importFrom(utils,write.table)
95 changes: 95 additions & 0 deletions R/merge-clin.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#' Merge Clinical Data into SomaScan
#'
#' Occasionally, additional clinical data is obtained _after_ samples
#' have been submitted to SomaLogic, or even after 'SomaScan'
#' results have been delivered.
#' This requires the new clinical variables, i.e. non-proteomic, data to be
#' merged with 'SomaScan' data into a "new" ADAT prior to analysis.
#' [merge_clin()] easily merges such clinical variables into an
#' existing `soma_adat` object and is a simple wrapper around [dplyr::left_join()].
#'
#' This funtionality also exists as a command-line tool (R script) contained
#' in `merge_clin.R` that lives in the `cli/merge` system file directory.
#' Please see:
#' \itemize{
#' \item `dir(system.file("cli/merge", package = "SomaDataIO"), full.names = TRUE)`
#' \item `vignette("cli-merge-tool", package = "SomaDataIO")`
#' }
#'
#' @inheritParams params
#' @param clin_data One of 2 options:
#' \itemize{
#' \item A data frame containing clinical variables to merge into `x`, or
#' \item A path to a file, typically a `*.csv`,
#' containing clinical variables to merge into `x`.
#' }
#' @param by A character vector of variables to join by.
#' See [dplyr::left_join()] for more details.
#' @param by_class If `clin_data` is a file path, a named character vector
#' of the variable its class. This ensures the `by-key` is compatible
#' for the join. For example, `c(SampleId = "character")`.
#' See [read.table()] for details about the `colClasses` argument, and
#' the examples below.
#' @param ... Additional parameters passed to [dplyr::left_join()].
#' @return An object of the same class as `x` with new clinical
#' variables merged.
#' @author Stu Field
#' @seealso [dplyr::left_join()]
#' @examples
#' # retrieve clinical data
#' clin_file <- system.file("cli/merge", "meta.csv",
#' package = "SomaDataIO",
#' mustWork = TRUE)
#' clin_file
#'
#' # view clinical data to be merged:
#' # 1) `group`
#' # 2) `newvar`
#' clin_df <- read.csv(clin_file, colClasses = c(SampleId = "character"))
#' clin_df
#'
#' # create mini-adat
#' apts <- withr::with_seed(123, sample(getAnalytes(example_data), 2L))
#' adat <- head(example_data, 9L) |> # 9 x 2
#' dplyr::select(SampleId, all_of(apts))
#'
#' # merge clinical variables
#' merged <- merge_clin(adat, clin_df, by = "SampleId")
#' merged
#'
#' # Alternative syntax:
#' # 1) pass file path
#' # 2) merge on different variable names
#' # 3) convert join type on-the-fly
#' clin_file2 <- system.file("cli/merge", "meta2.csv",
#' package = "SomaDataIO",
#' mustWork = TRUE)
#'
#' id_type <- typeof(adat$SampleId)
#' merged2 <- merge_clin(adat, clin_file2, # file path
#' by = c(SampleId = "ClinKey"), # join on 2 variables
#' by_class = c(ClinKey = id_type)) # match types
#' merged2
#' @importFrom utils read.csv
#' @importFrom dplyr left_join
#' @export
merge_clin <- function(x, clin_data, by = NULL, by_class = NULL, ...) {

stopifnot("`x` must be a `soma_adat`." = is.soma_adat(x))

if ( inherits(clin_data, "data.frame") ) {
clin_df <- clin_data
} else if ( is.character(clin_data) &&
length(clin_data) == 1L &&
file.exists(clin_data) ) {
clin_df <- normalizePath(clin_data, mustWork = TRUE) |>
utils::read.csv(header = TRUE, colClasses = by_class, row.names = NULL,
stringsAsFactors = FALSE)
} else {
stop(
"Invalid `clin_data` argument: ", .value(class(clin_data)),
"\n`clin_data` must be either a `data.frame` or file path.", call. = FALSE)
}

dplyr::left_join(x, clin_df, by = by, ...)
}
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ reference:
- starts_with("getAnalyte")
- getMeta
- diffAdats
- merge_clin

- title: Transform Between SomaScan Versions
desc: >
Expand Down
94 changes: 94 additions & 0 deletions man/merge_clin.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions tests/testthat/test-merge-clin.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

# Setup ----
clin_file <- system.file("cli/merge", "meta.csv", package = "SomaDataIO",
mustWork = TRUE)
clin_df <- read.csv(clin_file, header = TRUE, colClasses = c(SampleId = "character"))
apts <- withr::with_seed(123, sample(getAnalytes(example_data), 2L))
adat <- head(example_data, 9L) |> dplyr::select(SampleId, all_of(apts))

test_that("merge_clin() errors on bad `clin_data` argument", {
merged <- merge_clin(adat, clin_df, by = "SampleId")
expect_true(all(names(adat) %in% names(merged)))
expect_equal(setdiff(names(merged), names(adat)), c("group", "newvar"))
expect_equal(dim(merged), c(9, 5L))
expect_equal(sum(is.na(merged)), 8L)
expect_equal(sum(merged$newvar, na.rm = TRUE), -1.779255)
})

test_that("merge_clin() generates same result on `clin_data` argument", {
expect_equal(
merge_clin(adat, clin_df, by = "SampleId"),
merge_clin(adat, clin_file, by = "SampleId", by_class = c(SampleId = "character"))
)
})

test_that("merge_clin() errors on bad `clin_data` argument", {
expect_error( merge_clin(adat, letters) )
expect_error( merge_clin(adat, 1:10L) )
expect_error( merge_clin(adat, "Samples") )
expect_error( merge_clin(adat, NA) )
expect_error( merge_clin(adat, NA_character_) )
expect_error( merge_clin(data.frame(adat)) )
})
3 changes: 3 additions & 0 deletions vignettes/cli-merge-tool.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ in the `cli/merge/` directory, which allows one to
generate an updated `*.adat` file via the command-line without
having to launch an integrated development environment ("IDE"), e.g. `RStudio`.

To use `SomaDataIO`s exported functionality fro _within_ and R session,
please see `merge_clin()`.


----------------

Expand Down

0 comments on commit efad5e7

Please sign in to comment.