Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #34 function for growth parameters for height/length #45

Merged
merged 22 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
# Generated by roxygen2: do not edit by hand

export(derive_params_growth_height)
importFrom(admiral,derive_vars_dy)
importFrom(admiraldev,assert_character_scalar)
importFrom(admiraldev,assert_character_vector)
importFrom(admiraldev,assert_data_frame)
importFrom(admiraldev,assert_expr)
importFrom(admiraldev,assert_filter_cond)
importFrom(admiraldev,assert_logical_scalar)
importFrom(admiraldev,assert_numeric_vector)
importFrom(admiraldev,assert_symbol)
importFrom(admiraldev,assert_vars)
importFrom(admiraldev,assert_varval_list)
importFrom(admiraldev,expr_c)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
Expand All @@ -21,6 +25,8 @@ importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,if_else)
importFrom(dplyr,lead)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_at)
importFrom(dplyr,mutate_if)
Expand Down Expand Up @@ -93,12 +99,14 @@ importFrom(rlang,eval_tidy)
importFrom(rlang,expr)
importFrom(rlang,expr_interp)
importFrom(rlang,expr_label)
importFrom(rlang,exprs)
importFrom(rlang,f_lhs)
importFrom(rlang,f_rhs)
importFrom(rlang,inform)
importFrom(rlang,is_bare_formula)
importFrom(rlang,is_call)
importFrom(rlang,is_character)
importFrom(rlang,is_empty)
importFrom(rlang,is_formula)
importFrom(rlang,is_integerish)
importFrom(rlang,is_logical)
Expand All @@ -124,6 +132,8 @@ importFrom(rlang,sym)
importFrom(rlang,syms)
importFrom(rlang,type_of)
importFrom(rlang,warn)
importFrom(stats,pnorm)
importFrom(stats,qnorm)
importFrom(stringr,str_c)
importFrom(stringr,str_detect)
importFrom(stringr,str_extract)
Expand Down
11 changes: 6 additions & 5 deletions R/admiralpeds-package.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#' @keywords internal
#' @family internal
#' @importFrom dplyr arrange bind_rows case_when desc ends_with filter full_join group_by
#' if_else mutate mutate_at mutate_if n pull rename rename_at row_number select slice
#' starts_with transmute ungroup vars n_distinct union distinct
#' if_else lead left_join mutate mutate_at mutate_if n pull rename rename_at row_number
#' select slice starts_with transmute ungroup vars n_distinct union distinct
#' summarise_at summarise coalesce bind_cols na_if tibble
#' @importFrom magrittr %>%
#' @importFrom rlang := abort arg_match as_function as_string call2 caller_env
#' call_name current_env .data enexpr enquo eval_bare eval_tidy expr
#' call_name current_env .data enexpr enquo eval_bare eval_tidy expr exprs
#' expr_interp expr_label f_lhs f_rhs inform
#' is_bare_formula is_call is_character is_formula is_integerish
#' is_bare_formula is_call is_character is_formula is_empty is_integerish
#' is_logical is_quosure is_quosures is_symbol new_formula
#' parse_expr parse_exprs quo quo_get_expr quo_is_call
#' quo_is_missing quo_is_null quo_is_symbol quos quo_squash quo_text
Expand All @@ -27,5 +27,6 @@
#' @importFrom admiral derive_vars_dy
#' @importFrom admiraldev assert_logical_scalar assert_character_vector assert_vars
#' assert_data_frame assert_character_scalar assert_numeric_vector assert_filter_cond
#' assert_symbol
#' assert_symbol assert_expr assert_varval_list expr_c
#' @importFrom stats pnorm qnorm
"_PACKAGE"
282 changes: 282 additions & 0 deletions R/derive_params_growth_height.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
#' Derive Anthropometric indicators (Z-Scores/Percentiles-for-Height) based on Standard Growth Charts
#'
#' Derive Anthropometric indicators (Z-Scores/Percentiles-for-Height) based on Standard Growth Charts
#' for Weight by Height/Length
#'
#' @param dataset Input dataset
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing parameters from the issue:
age,
age_unit,
height_age,
These are all needed as we need to know at which age to assume height instead of body length but they're optional as: If only ever length or height is used then leave this NULL and just feed in only the corresponding by length or height metadata (instead of the combined version which has both)

I agree measure argument in the issue not needed - as we can add the height temp var to the input dataset

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is so that you use the right metadata from WHO depending on patient age and which way they were likely measuring (height or body length). it'd be good in the examples to use height_age = 730.5 days i.e. 2 years (even as default from what David explained to us?). See https://github.com/pharmaverse/admiralpeds/blob/35_advs_vignette/vignettes/advs.Rmd from line 229 for further explanation.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rossfarrugia I think that's why in the example I actually split the dataset into over 2 and under 2 and just ran the function twice, otherwise you would need to create some sort of additional joining variable on both sides, dataset and metadata, which involves additional pre-processing to both datasets, while adding complexity to the function too, the "modularity" of running it twice felt more intuitive to me

I'm open to this adoption with additional arguments, but I wonder what other programmers would think

Copy link
Collaborator

@rossfarrugia rossfarrugia Jun 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zdz2101 i'm open to your approach - it does give the user complete control still. we'll just need to well comment this to explain our approach in your roxygen2 function documentation example (e.g. at the end you should add a comment to explain that the 2 resulting dataframes would need to be set back together to get the complete ADVS for this parameter) and also we'll need to explain well in our template comments and our vignette.

@Fanny-Gautier @Lina2689 what do you think? as the template authors i would trust your advice here as you'll have a good read on what makes this least complex for users.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the template we selected the records from the metadata where MEASURE ="LENGTH" for patients <730.5 days, and MEASURE="HEIGHT" for patients >=730.5 days. As Zelos mentioned, it will require additional variables to merge the right data depending on the age.
We also added message in the ADVS peds template for the same message("To derive height/length parameters, below function needs to call separately for Height and Length based on the input data and current age of the patient, as it depends on your CRF guidelines.").
I think it is easier to split it because if the user has only HEIGHT then there is only one call, similarly for LENGTH. But if the user has both LENGTH and HEIGHT in the data, it will complexify the merge. I think the user has more flexibility while splitting the derivation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense Fanny, sounds like we have a plan then! and thanks for the other comments here too - looks like me and you picked up similar spots.

#'
#' The variables specified in `sex`, `height`, `height_unit`, `parameter`, `analysis_var`
#' are expected to be in the dataset.
rossfarrugia marked this conversation as resolved.
Show resolved Hide resolved
#'
#' @param sex Sex
#'
#' A character vector is expected.
#'
#' Expected Values: `M`, `F`
#'
#' @param height Current Height/length
#'
#' A numeric vector is expected. Note that this is the actual height at the current visit.
#'
#' @param height_unit Lenght/Height Unit
#
#' A character vector is expected.
#'
#' Expected values: 'cm'
#'
#' @param meta_criteria Metadata dataset
#'
#' A metadata dataset with the following expected variables:
#' `HEIGHT_LENGTH`, `HEIGHT_LENGTHU`, `SEX`, `L`, `M`, `S`
#'
#' The dataset can be derived from CDC/WHO or user-defined datasets.
#' The CDC/WHO growth chart metadata datasets are available in the package and will
#' require small modifications.
rossfarrugia marked this conversation as resolved.
Show resolved Hide resolved

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original WHO metadata table has height/length increment of 0.5, but the metadata in the admiralpeds packages has increment of 0.1. How was it extrapolated? Do we have any documentation on that?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://www.who.int/tools/child-growth-standards/standards/weight-for-length-height

scroll to the bottom on the expanded tables section you will find the increments were by 0.1

#' * `HEIGHT_LENGTH` - Height/Length
#' * `HEIGHT_LENGTHU` - Height Unit
#' * `SEX` - Sex
#' * `L` - Power in the Box-Cox transformation to normality
#' * `M` - Median
#' * `S` - Coefficient of variation
#'
#' @param parameter Anthropometric measurement parameter to calculate z-score or percentile

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we include a reminder that the expected unit of weight is kg?

#'
#' A condition is expected with the input dataset `VSTESTCD`/`PARAMCD`
#' for which we want growth derivations:
#'
#' e.g. `parameter = VSTESTCD == "WEIGHT"`.
#'
#' There is WHO metadata available for Weight available in the `admiralpeds` package.
#'
#' @param analysis_var Variable containing anthropometric measurement
#'
#' A numeric vector is expected, e.g. `AVAL`, `VSSTRESN`
#'
#' @param set_values_to_sds Variables to be set for Z-Scores
#'
#' The specified variables are set to the specified values for the new
#' observations. For example,
#' `set_values_to_sds(exprs(PARAMCD = “WTASDS”, PARAM = “Weight-for-height z-score”))`
rossfarrugia marked this conversation as resolved.
Show resolved Hide resolved
#' defines the parameter code and parameter.
#'
#' *Permitted Values*: List of variable-value pairs
#'
#' If left as default value, `NULL`, then parameter not derived in output dataset
#'
#' @param set_values_to_pctl Variables to be set for Percentile
#'
#' The specified variables are set to the specified values for the new
#' observations. For example,
#' `set_values_to_pctl(exprs(PARAMCD = “WTAPCTL”, PARAM = “Weight-for-height percentile”))`
#' defines the parameter code and parameter.
#'
#' *Permitted Values*: List of variable-value pair
#'
#' If left as default value, `NULL`, then parameter not derived in output dataset
#'
#' @return The input dataset additional records with the new parameter added.
#'
#'
#' @family der_prm_bds_vs
#'
#' @keywords der_prm_bds_vs
#'
#' @export
#'
#' @examples
#' library(dplyr)
#' library(lubridate)
#' library(rlang)
#' library(admiral)
#'
#' advs <- dm_peds %>%
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add comments to this example please? as its quite a lot for users to follow with so much pre-processing before we even get to the example function call

#' select(USUBJID, BRTHDTC, SEX) %>%
#' right_join(., vs_peds, by = "USUBJID") %>%
#' mutate(
#' VSDT = ymd(VSDTC),
#' BRTHDT = ymd(BRTHDTC)
#' ) %>%
#' derive_vars_duration(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trunc_out = FALSE can be removed as its default anyway for this function - to simplify the example

#' new_var = AAGECUR,
#' new_var_unit = AAGECURU,
#' start_date = BRTHDT,
#' end_date = VSDT,
#' out_unit = "months",
#' trunc_out = FALSE
#' )
#'
#' heights <- vs_peds %>%
#' filter(VSTESTCD == "HEIGHT") %>%
#' select(USUBJID, VSSTRESN, VSSTRESU, VSDTC) %>%
#' rename(
#' HGTTMP = VSSTRESN,
#' HGTTMPU = VSSTRESU
#' )
#'
#' advs <- advs %>%
#' right_join(., heights, by = c("USUBJID", "VSDTC"))
#'
#' advs_under2 <- advs %>%
#' filter(AAGECUR <= 24)
#'
#' advs_over2 <- advs %>%
#' filter(AAGECUR > 24)
#'
#' who_under2 <- bind_rows(
#' (admiralpeds::who_wt_for_lgth_boys %>%
#' mutate(
#' SEX = "M",
#' height_unit = "cm"
#' )
#' ),
#' (admiralpeds::who_wt_for_lgth_girls %>%
#' mutate(
#' SEX = "F",
#' height_unit = "cm"
#' )
#' )
#' ) %>%
#' rename(
#' HEIGHT_LENGTH = Length,
#' HEIGHT_LENGTHU = height_unit
#' )
#'
#' who_over2 <- bind_rows(
#' (admiralpeds::who_wt_for_ht_boys %>%
#' mutate(
#' SEX = "M",
#' height_unit = "cm"
#' )
#' ),
#' (admiralpeds::who_wt_for_ht_girls %>%
#' mutate(
#' SEX = "F",
#' height_unit = "cm"
#' )
#' )
#' ) %>%
#' rename(
#' HEIGHT_LENGTH = Height,
#' HEIGHT_LENGTHU = height_unit
#' )
#'
#'
#' derive_params_growth_height(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is patient 01-701-1033 gone ? While running this code I don't have any records created for patient 01-701-1033. Could you please clarify the reason ? thanks

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

patient should be in there, at least showing on my end

#' advs_under2,
#' sex = SEX,
#' height = HGTTMP,
#' height_unit = HGTTMPU,
#' meta_criteria = who_under2,
#' parameter = VSTESTCD == "WEIGHT",
#' analysis_var = VSSTRESN,
#' set_values_to_sds = exprs(
#' PARAMCD = "WTASDS",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be PARAMCD = "WTHSDS"?

#' PARAM = "Weight-for-height z-score"
#' ),
#' set_values_to_pctl = exprs(
#' PARAMCD = "WTAPCTL",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be PARAMCD = "WTHPCTL"?

#' PARAM = "Weight-for-height percentile"
#' )
#' )
#'
#' derive_params_growth_height(
#' advs_over2,
#' sex = SEX,
#' height = HGTTMP,
#' height_unit = HGTTMPU,
#' meta_criteria = who_over2,
#' parameter = VSTESTCD == "WEIGHT",
#' analysis_var = VSSTRESN,
#' set_values_to_sds = exprs(
#' PARAMCD = "WTASDS",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be PARAMCD = "WTHSDS"?

#' PARAM = "Weight-for-height z-score"
#' ),
#' set_values_to_pctl = exprs(
#' PARAMCD = "WTAPCTL",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be PARAMCD = "WTHPCTL"?

#' PARAM = "Weight-for-height percentile"
#' )
#' )

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we include codes to bind rows of advs_under2 and advs_over2?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense just for the sake of completing the example

derive_params_growth_height <- function(dataset,
sex,
height,
height_unit,
meta_criteria,
parameter,
Fanny-Gautier marked this conversation as resolved.
Show resolved Hide resolved
analysis_var,
set_values_to_sds = NULL,
set_values_to_pctl = NULL) {
# Apply assertions to each argument to ensure each object is appropriate class
sex <- assert_symbol(enexpr(sex))
height <- assert_symbol(enexpr(height))
height_unit <- assert_symbol(enexpr(height_unit))
analysis_var <- assert_symbol(enexpr(analysis_var))
assert_data_frame(dataset, required_vars = expr_c(sex, height, height_unit, analysis_var))
assert_data_frame(meta_criteria, required_vars = exprs(SEX, HEIGHT_LENGTH, HEIGHT_LENGTHU, L, M, S)) # nolint

assert_expr(enexpr(parameter))
assert_varval_list(set_values_to_sds, optional = TRUE)
assert_varval_list(set_values_to_pctl, optional = TRUE)

if (is.null(set_values_to_sds) && is.null(set_values_to_pctl)) {
abort("One of `set_values_to_sds`/`set_values_to_pctl` has to be specified.")
}

# create a unified join naming convention, hard to figure out in by argument
dataset <- dataset %>%
mutate(
sex_join := {{ sex }},
heightu_join := {{ height_unit }}
)

# Process metadata
# Metadata should contain SEX, HEIGHT_LENGTH, HEIGHT_LENGTHU, L, M, S
# Processing the data to be compatible with the dataset object
processed_md <- meta_criteria %>%
arrange(SEX, HEIGHT_LENGTHU, HEIGHT_LENGTH) %>%
group_by(SEX, HEIGHT_LENGTHU) %>%
mutate(next_height = lead(HEIGHT_LENGTH)) %>%
rename(
sex_join = SEX,
prev_height = HEIGHT_LENGTH,
heightu_join = HEIGHT_LENGTHU
)

# Merge the dataset that contains the vs records and filter the L/M/S that match height
# To parse out the appropriate age, create [x, y) using prev_height <= height < next_height
added_records <- dataset %>%
filter(!!enexpr(parameter)) %>%
left_join(.,
processed_md,
by = c("sex_join", "heightu_join"),
relationship = "many-to-many"
) %>%
filter(prev_height <= {{ height }} & {{ height }} < next_height)

dataset_final <- dataset

# create separate records objects as appropriate depending if user specific sds and/or pctl
if (!is_empty(set_values_to_sds)) {
add_sds <- added_records %>%
mutate(
AVAL := (({{ analysis_var }} / M)^L - 1) / (L * S), # nolint
!!!set_values_to_sds
)

dataset_final <- bind_rows(dataset, add_sds) %>%
select(-c(L, M, S, sex_join, heightu_join, prev_height, next_height))
}

if (!is_empty(set_values_to_pctl)) {
add_pctl <- added_records %>%
mutate(
AVAL := (({{ analysis_var }} / M)^L - 1) / (L * S), # nolint
AVAL = pnorm(AVAL) * 100,
!!!set_values_to_pctl
)

dataset_final <- bind_rows(dataset_final, add_pctl) %>%
select(-c(L, M, S, sex_join, heightu_join, prev_height, next_height))
}

return(dataset_final)
}
12 changes: 6 additions & 6 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
ADSL
ADaM
ADaMs
ADSL
Anthropometric
BMI
Biologics
CDISC
DM
Lenght
SDTM
anthropometric
bds
der
pharmaverse
renv
repo
roxygen
Template
bmi
prm
Loading
Loading