Skip to content

Commit

Permalink
Merge pull request #338 from metrumresearchgroup/feat/check_up_to_date
Browse files Browse the repository at this point in the history
Feat/check up to date
  • Loading branch information
seth127 committed Mar 4, 2021
2 parents fe173ec + 80c51cb commit 5390d08
Show file tree
Hide file tree
Showing 15 changed files with 489 additions and 64 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: bbr
Title: R package for bbi
Version: 1.0.0.8000
Version: 1.0.0.8001
Authors@R:
c(person(given = "Devin",
family = "Pastoor",
Expand Down
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ S3method(check_grd,bbi_nonmem_summary)
S3method(check_grd,character)
S3method(check_output_dir,bbi_model)
S3method(check_output_dir,character)
S3method(check_up_to_date,bbi_log_df)
S3method(check_up_to_date,bbi_nonmem_model)
S3method(check_up_to_date,bbi_nonmem_summary)
S3method(copy_model_from,bbi_nonmem_model)
S3method(get_based_on,bbi_run_log_df)
S3method(get_based_on,character)
Expand Down Expand Up @@ -69,6 +72,7 @@ export(check_grd)
export(check_nonmem_table_output)
export(check_output_dir)
export(check_status_code)
export(check_up_to_date)
export(check_yaml_in_sync)
export(collapse_to_string)
export(config_log)
Expand Down Expand Up @@ -123,10 +127,12 @@ export(tail_output)
export(use_bbi)
export(yaml_ext)
import(fs)
importFrom(checkmate,assert_file_exists)
importFrom(checkmate,assert_list)
importFrom(checkmate,assert_number)
importFrom(checkmate,assert_scalar)
importFrom(checkmate,assert_string)
importFrom(checkmate,assert_true)
importFrom(cli,cat_bullet)
importFrom(cli,cat_line)
importFrom(cli,cat_rule)
Expand Down Expand Up @@ -160,6 +166,7 @@ importFrom(fs,file_copy)
importFrom(fs,file_delete)
importFrom(fs,file_exists)
importFrom(fs,is_absolute_path)
importFrom(fs,path_ext_set)
importFrom(fs,path_norm)
importFrom(fs,path_rel)
importFrom(ggplot2,aes)
Expand Down
9 changes: 8 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@

## New features and changes

* Added `tags_diff` function for comparing the tags between different models. (#337)
* Added `tags_diff()` function for comparing the tags between different models. (#337)

* Added `check_up_to_date()` function for checking whether the model file(s) and data file(s) associated with a model have changed since the model was run. (#338)

## Developer-facing changes

* Added a `bbi_model` parent class to `bbi_nonmem_model` and `bbi_nonmem_summary` objects. Many of the helpers in `get-path-from-object.R` now dispatch on this class. This had been discussed in the past but was primarily done now in preparation for beginning development for Stan modeling, which will create `bbi_stan_model` and `bbi_stan_summary` objects that will also inherit from this parent class. (#332)


# bbr 1.0.0

Expand Down
5 changes: 5 additions & 0 deletions R/aaa.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,11 @@ SUMMARY_PARAM_DIAG <- "diag"
SUMMARY_PARAM_SHRINKAGE <- "shrinkage"
SUMMARY_SHRINKAGE_OMEGA <- "eta_sd"
SUMMARY_SHRINKAGE_SIGMA <- "eps_sd"

CONFIG_MODEL_PATH <- "model_path"
CONFIG_MODEL_MD5 <- "model_md5"
CONFIG_DATA_PATH <- "data_path"
CONFIG_DATA_MD5 <- "data_md5"

# keys required for a summary object to have
SUMMARY_REQ_KEYS <- c(
Expand Down Expand Up @@ -225,3 +229,4 @@ NO_NONMEM_ERR_MSG <- "No version was supplied and no default value exists in the
MOD_ALREADY_EXISTS_ERR_MSG <- "already exist, but we are configured not to overwrite"
NO_STAN_ERR_MSG <- "stan support not yet implemented."
PARAM_BAYES_ERR_MSG <- "param_estimates() is not currently implemented for Bayesian methods."
CHECK_UP_TO_DATE_ERR_MSG <- "Cannot check if up-to-date because model has not been run yet."
146 changes: 146 additions & 0 deletions R/check-up-to-date.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#' Check model and data up to date with outputs
#'
#' Functions for checking that the model outputs on disk match the md5 hashes
#' stored in `bbi_config.json` at run time. In other words, checking that the
#' model and data files have _not_ changed since the model was last run. See
#' Details section for what specific files are checked.
#'
#' @details
#' Different files are checked depending on what type of model is being checked.
#'
#' **For NONMEM models**
#'
#' * The model file (control stream)
#'
#' * The data file (referenced in `$DATA` within the control stream)
#'
#' **Currently only NONMEM implemented.**
#'
#' @param .bbi_object the object to check. Could be
#' a `bbi_{.model_type}_model` object,
#' a `bbi_{.model_type}_summary` object,
#' or a `bbi_log_df` tibble.
#' @param ... Arguments passed through (currently none).
#'
#' @return
#'
#' **`bbi_model`** method invisibly returns a logical vector of length 2. The
#' first element (named `"model"`) refers to the model files mentioned above.
#' The second element (named `"data"`) refers to the data files mentioned above.
#' For both elements, they will be `TRUE` if nothing has changed, `FALSE` if
#' anything has changed. Note: _if no file exists_ at the specified path,
#' `FALSE` will be returned because that is technically a "change." The file
#' used to exist and now it does not.
#'
#' **`bbi_log_df`** method invisibly returns a named list of lists, with one
#' element for each row in the input tibble, with the name corresponding to the
#' value in the `run` column for that row. Each element of the list will contain
#' the two-element list returned from the `bbi_model` method (described above)
#' for the relevant model.
#'
#' There is no `add_up_to_date()` function because **if you would like to add
#' these columns to a `bbi_log_df` tibble** you can use [add_config()], which
#' contains `model_has_changed` and `data_has_changed` columns. Please note:
#' these contain the opposite boolean values (`check_up_to_date()` returns
#' `TRUE` if up to date, `*_has_changed` returns `TRUE` if _changed_).
#'
#' **The returned value is invisible because a message is printed** alerting the
#' user of the specific files that have changed, if any. This facilitates
#' calling the function for this side effect without explicitly handling the
#' returned value.
#'
#' @export
check_up_to_date <- function(.bbi_object, ...) {
UseMethod("check_up_to_date")
}

#' @export
check_up_to_date.bbi_nonmem_model <- function(.bbi_object, ...) {
check_up_to_date_nonmem(.bbi_object)
}

#' @export
check_up_to_date.bbi_nonmem_summary <- function(.bbi_object, ...) {
check_up_to_date_nonmem(.bbi_object)
}

#' @export
check_up_to_date.bbi_log_df <- function(.bbi_object, ...) {

check_list <- map(.bbi_object[[ABS_MOD_PATH]], function(.p) {
tryCatch(
check_up_to_date(read_model(.p)),
error = function(.e) {
.error_msg <- paste(as.character(.e$message), collapse = " -- ")
if (grepl(CHECK_UP_TO_DATE_ERR_MSG, .error_msg, fixed = TRUE)) {
message(.error_msg)
return(as.logical(c(model = NA, data = NA)))
} else {
stop(.e)
}
}
)
})

names(check_list) <- .bbi_object[[RUN_ID_COL]]
return(invisible(check_list))
}

####################################
# PRIVATE implementation functions
####################################


#' Private implementation to check that NONMEM model is up-to-date
#'
#' Specifically, check that control stream and data file on disk have not
#' changed since the model was run. This is accomplished by taking their md5
#' hashes and comparing it to the hashes stored in `bbi_config.json`.
#'
#' @importFrom jsonlite fromJSON
#' @importFrom fs file_exists
#' @importFrom tidyr replace_na
#'
#' @inheritParams check_up_to_date
#'
#' @keywords internal
check_up_to_date_nonmem <- function(.mod) {
config_path <- file.path(get_output_dir(.mod, .check_exists = FALSE), "bbi_config.json")
if (!fs::file_exists(config_path)) {
stop(paste(glue("Model {get_model_id(.mod)}:"), CHECK_UP_TO_DATE_ERR_MSG))
}
config <- jsonlite::fromJSON(config_path)

# check necessary files for changes
model_file <- get_model_path(.mod)
data_file <- get_data_path(.mod)

changed_files <- c(
config[[CONFIG_MODEL_MD5]] != tools::md5sum(model_file),
config[[CONFIG_DATA_MD5]] != tools::md5sum(data_file)
)

any_changes <- any(changed_files)
if(isTRUE(any_changes)) {
message(paste(
glue("The following files have changed in {get_model_id(.mod)}"),
paste("*", names(which(changed_files)), collapse = "\n"),
sep = "\n"
))
}

na_files <- is.na(changed_files)
if(isTRUE(any(na_files))) {
message(paste(
glue("The following files in {get_model_id(.mod)} ARE NO LONGER PRESENT"),
paste("*", names(changed_files[na_files]), collapse = "\n"),
sep = "\n"
))
}

# build return value
res <- replace_na(!changed_files, FALSE)
names(res) <- c("model", "data")

return(invisible(res))
}
101 changes: 50 additions & 51 deletions R/config-log.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ config_log_impl <- function(.mods) {
#'
#' @param path A string giving the path to `bbi_config.json`.
#' @param fields A character vector of fields to include.
#' @param model_path_field A string giving the field name for the model path.
#'
#' @return A list whose elements include
#'
Expand All @@ -137,28 +136,27 @@ config_log_impl <- function(.mods) {
#'
#' @keywords internal
config_log_entry <- function(path,
fields = CONFIG_KEEPERS,
model_path_field = ABS_MOD_PATH) {
fields = CONFIG_KEEPERS) {
checkmate::assert_file_exists(path)
checkmate::assert_character(fields)
checkmate::assert_string(model_path_field)

cfg_mod <- read_model_from_config(path)
config <- jsonlite::fromJSON(path)

if (!all(fields %in% names(config))) {
msg <- glue::glue(
glue::glue(
msg <- paste(
glue(
"{path} is missing the required keys:",
"`{paste(fields[!(fields %in% names(config))], collapse = ', ')}`",
"and will be skipped.",
.sep = " "
),
glue::glue(
glue(
"This is likely because it was run with an old version of bbi.",
"Model was run on version {config[['bbi_version']]}",
.sep = " "
),
glue::glue(
glue(
"User can call `bbi_current_release()` to see the most recent release",
"version, and call `use_bbi(options('bbr.bbi_exe_path'))` to",
"upgrade to the version.",
Expand All @@ -171,32 +169,16 @@ config_log_entry <- function(path,
return(NULL)
}

# the model_path field may not exist, e.g., it could be the home directory of
# the user who ran the model, so we cannot use it directly
output_dir <- dirname(path)
model_path <- fs::path_ext_set(output_dir, config[["model_extension"]])
matches <- suppressMessages(check_up_to_date(cfg_mod))

data_path <- fs::path_norm(
file.path(
output_dir,
config[["data_path"]]
)
)

matches <- purrr::map2_lgl(
c(model_path, data_path),
c(config[["model_md5"]], config[["data_md5"]]),
file_matches
)

config[["model_has_changed"]] <- !matches[1]
config[["data_has_changed"]] <- !matches[2]
config[["nm_version"]] <- resolve_nonmem_version(config)
config[[model_path_field]] <- output_dir
config[["model_has_changed"]] <- as.logical(!matches["model"]) # use as.logical to strip off names
config[["data_has_changed"]] <- as.logical(!matches["data"]) # use as.logical to strip off names
config[["nm_version"]] <- resolve_nonmem_version(config) %||% NA_character_
config[[ABS_MOD_PATH]] <- cfg_mod[[ABS_MOD_PATH]]

out_fields <- c(
ABS_MOD_PATH,
fields,
model_path_field,
"nm_version",
"model_has_changed",
"data_has_changed"
Expand All @@ -205,27 +187,6 @@ config_log_entry <- function(path,
config[out_fields]
}

#' Compare a file to an MD5 sum
#'
#' @param path String giving the path to the file.
#' @param md5 String giving expected MD5 sum.
#'
#' @return `TRUE` if `path` matches `md5`, otherwise `FALSE` (including if
#' `path` doesn't exist).
#'
#' @keywords internal
file_matches <- function(path, md5) {
checkmate::assert_string(path)
checkmate::assert_string(md5)

if (file.exists(path)) {
res <- tools::md5sum(path) == md5
} else {
res <- FALSE
}

res
}

#' Determine the NONMEM version used
#'
Expand All @@ -247,3 +208,41 @@ resolve_nonmem_version <- function(x) {
}
ver
}

#' Create a model object from a bbi_config.json path
#'
#' This is non-trivial because, while configs
#' always sit in the ouput directory, for NONMEM models
#' this is one dir deeper than the YAML and for Stan
#' models this is two dirs deeper than the YAML.
#' Hence, this is abstracted into an obnoxious private
#' helper function.
#' @importFrom fs path_norm path_ext_set file_exists
#' @importFrom stringr str_detect
#' @importFrom checkmate assert_file_exists assert_true
#' @param .config_path The absolute path a `bbi_config.json` file
#' @return a `bbi_{.model_type}_model` object
#' @keywords internal
read_model_from_config <- function(.config_path) {
checkmate::assert_true(stringr::str_detect(.config_path, "bbi_config\\.json$"))
checkmate::assert_file_exists(.config_path)

potential_nm_path <- file.path(.config_path, "..") %>%
fs::path_norm() %>%
fs::path_ext_set("yaml")

potential_stan_path <- file.path(.config_path, "..", "..") %>%
fs::path_norm() %>%
fs::path_ext_set("yaml")

winner <- names(which(fs::file_exists(c(potential_nm_path, potential_stan_path))))

if (length(winner) != 1) {
dev_error(glue("read_model_from_config() checked {potential_nm_path} and {potential_stan_path} and the following exist: {paste(winner, collapse = ', ')}"))
}

mod <- suppressMessages(
read_model(tools::file_path_sans_ext(winner))
)
return(mod)
}

0 comments on commit 5390d08

Please sign in to comment.