Skip to content

Latest commit

 

History

History
405 lines (364 loc) · 9.88 KB

_targets.md

File metadata and controls

405 lines (364 loc) · 9.88 KB

Analysis pipeline: Joint models quantify associations between T-cell kinetics and allo-immunological events after allogeneic stem cell transplantation and subsequent donor lymphocyte infusion

Edouard Bonneville and Eva Koster

Pipeline

The analysis pipeline for this work can be regenerated by rendering this file,

rmarkdown::render("_targets.Rmd")

The pipeline can then be run using,

tar_make()

The complete pipeline can be visualised using,

tar_visnetwork()

Setup

We first load the targets package and remove the potentially outdated workflow.

library("targets")
library("tarchetypes")
library("future")
library("future.callr")
library("here")
tar_unscript()

We now define shared global options across our workflow and load R functions from the R folder.

# Use capsule/renv eventually docker (!!)
options(tidyverse.quiet = TRUE)

# Global objects
cell_lines <- list("cell_line" = paste0("CD", c(3, 4, 8)))

# Load packages and functions necessary for pipeline
source(here::here("packages.R"))
source(here::here("data-raw/prepare-raw-data.R"))
functions <- list.files(here("R"), full.names = TRUE)
invisible(lapply(functions, source))
rm("functions")

# Run whole pipeline in parallel
plan(callr)

# Other pipeline options
tar_option_set(error = "continue")
#> Establish _targets.R and _targets_r/globals/globals.R.

Data preparation

  • First, we pre-process the raw data
list(
  tar_target(
    lymphocytes_raw,
    data.table(readRDS(here("data-raw/2022-02-24_v9/lymphocytes.rds")))
  ),
  tar_target(
    variables_raw,
    data.table(readRDS(here("data-raw/2022-02-24_v9/variables.rds")))
  ),
  tar_target(dat_merged, prepare_raw_data(lymphocytes_raw, variables_raw))
) 
#> Establish _targets.R and _targets_r/targets/process-raw-data.R.
  • We add reference values for the different cell lines, for further plotting later in the pipeline
tar_target(reference_values, {
  cbind.data.frame(
    "cell_type" = c("CD3", "CD4", "CD8", "NK", "CD19"),
    "lower_limit" = c(860, 560, 260, 40, 60),
    "upper_limit" = c(2490, 1490, 990, 390, 1000)
  )
})
#> Define target reference_values from chunk code.
#> Establish _targets.R and _targets_r/targets/reference_values.R.
  • Next, we prepare the long and wide datasets for the alloSCT and post DLI models
list(
  tar_target(
    NMA_preDLI_datasets,
    get_preDLI_datasets(
      dat_merged[TCD2 %in% c("NMA RD: ALT", "UD: ALT + ATG")], 
      admin_cens = 6
    )
  ),
  tar_target(
    NMA_postDLI_datasets,
    get_postDLI_datasets(
      dat_merged = dat_merged[TCD2 %in% c("NMA RD: ALT", "UD: ALT + ATG")],
      admin_cens_dli = 3
    )
  )
)
#> Establish _targets.R and _targets_r/targets/prepare-data.R.

Post alloSCT models (model I)

  • Current value
list(
  # -- Pre-DLI Cox model
  tar_target(
    preDLI_cox,
    run_preDLI_cox(
      form = Surv(time, status) ~
        ATG.1 + hirisk.1 + # GVHD
        ATG.2 + hirisk.2 + # Relapse
        ATG.3 + # NRF other
        strata(trans),
      dat_wide = NMA_preDLI_datasets$wide
    )
  ),

  # -- Longitudinal models - correlated reffs, fixed intercept
  tar_map(
    values = cell_lines,
    tar_target(
      preDLI_long_corr,
      run_preDLI_longitudinal(
        cell_line = paste0(cell_line, "_abs_log"),
        form_fixed = "ns(intSCT2_7, 3) * hirisk * ATG + CMV_PD",
        form_random = ~ 0 + ns(intSCT2_7, 3) | IDAA,
        dat = NMA_preDLI_datasets$long
      )
    )
  ),

  # Not many events for we lower default flexibility of baseline hazard (from 5 to 3 internal knots)
  tar_target(preDLI_basehaz_knots, 3L),

  # -- Joint models, correlated reffs
  tar_target(
    preDLI_JM_value_corr_CD3,
    jointModel(
      lmeObject = preDLI_long_corr_CD3,
      survObject = preDLI_cox,
      CompRisk = TRUE,
      method = "spline-PH-aGH",
      timeVar = "intSCT2_7",
      parameterization = "value",
      interFact = list("value" = ~ strata(trans) - 1),
      iter.qN = 1000L,
      lng.in.kn = preDLI_basehaz_knots,
      numeriDeriv = "cd",
      eps.Hes = 1e-04
    )
  ),
  tar_target(
    preDLI_JM_value_corr_CD4,
    jointModel(
      lmeObject = preDLI_long_corr_CD4,
      survObject = preDLI_cox,
      CompRisk = TRUE,
      method = "spline-PH-aGH",
      timeVar = "intSCT2_7",
      parameterization = "value",
      interFact = list("value" = ~ strata(trans) - 1),
      iter.qN = 1000L,
      lng.in.kn = preDLI_basehaz_knots,
      numeriDeriv = "cd",
      eps.Hes = 1e-04
    )
  ),
  tar_target(
    preDLI_JM_value_corr_CD8,
    jointModel(
      lmeObject = preDLI_long_corr_CD8,
      survObject = preDLI_cox,
      CompRisk = TRUE,
      method = "spline-PH-aGH",
      timeVar = "intSCT2_7",
      parameterization = "value",
      interFact = list("value" = ~ strata(trans) - 1),
      iter.qN = 1000L,
      lng.in.kn = preDLI_basehaz_knots,
      numeriDeriv = "cd",
      eps.Hes = 1e-04,
      verbose = TRUE
    )
  )
)
#> Establish _targets.R and _targets_r/targets/preDLI-current-value.R.
  • Time-dependent slopes
list(
  # Store the deriv form (since same for all mods)
  tar_target(
    derivForm_preDLI,
    list(
      fixed = ~ 0 + dns(intSCT2_7, 3) +
        dns(intSCT2_7, 3):as.numeric(hirisk == "yes") +
        dns(intSCT2_7, 3):as.numeric(ATG == "UD(+ATG)") +
        dns(intSCT2_7, 3):as.numeric(hirisk == "yes"):as.numeric(ATG == "UD(+ATG)"),
      random = ~ 0 + dns(intSCT2_7, 3),
      indFixed = c(2:4, 8:13, 15:17),
      indRandom = c(1:3)
    )
  ),
  tar_target(
    preDLI_JM_both_corr_CD3,
    update(
      preDLI_JM_value_corr_CD3,
      lmeObject = preDLI_long_corr_CD3,
      survObject = preDLI_cox,
      parameterization = "both",
      interFact = list("value" = ~ strata(trans) - 1, "slope" = ~ strata(trans) - 1),
      derivForm = derivForm_preDLI,
      lng.in.kn = preDLI_basehaz_knots
    )
  ),
  # This one gets stuck local minima.. update iter.EM to 500
  tar_target(
    preDLI_JM_both_corr_CD4,
    update(
      preDLI_JM_value_corr_CD4,
      lmeObject = preDLI_long_corr_CD4,
      survObject = preDLI_cox,
      parameterization = "both",
      interFact = list("value" = ~ strata(trans) - 1, "slope" = ~ strata(trans) - 1),
      derivForm = derivForm_preDLI,
      lng.in.kn = preDLI_basehaz_knots,
      iter.EM = 500L,
      verbose = TRUE
    )
  ),
  tar_target(
    preDLI_JM_both_corr_CD8,
    update(
      preDLI_JM_value_corr_CD8,
      lmeObject = preDLI_long_corr_CD8,
      survObject = preDLI_cox,
      parameterization = "both",
      interFact = list("value" = ~ strata(trans) - 1, "slope" = ~ strata(trans) - 1),
      derivForm = derivForm_preDLI,
      lng.in.kn = preDLI_basehaz_knots
    )
  )
)
#> Establish _targets.R and _targets_r/targets/preDLI-slopes.R.

Post DLI models (model II)

list(
  # -- Longitudinal models - correlated reffs, random intercept + slope
  tar_map(
    values = cell_lines,
    tar_target(
      postDLI_long_corr,
      run_preDLI_longitudinal(
        cell_line = paste0(cell_line, "_abs_log"),
        form_fixed = "ns(intDLI1, 2) * ATG + CMV_PD",
        form_random = ~ ns(intDLI1, 2) | IDAA,
        dat = NMA_postDLI_datasets$long
      )
    )
  ),

  # -- Post-DLI cox:
  tar_target(
    postDLI_cox,
    run_postDLI_cox(
      Surv(time, status) ~ ATG.1 + strata(trans), # ATG.2 wanted but not enough events
      dat_wide = NMA_postDLI_datasets$wide
    )
  ),

  # 3L used pre-DLI, consider now 2L since only on portion of follow-up
  tar_target(postDLI_basehaz_knots, 2L),

  # -- Joint models, correlated reffs
  tar_target(
    postDLI_JM_corr_CD3,
    jointModel(
      lmeObject = postDLI_long_corr_CD3,
      survObject = postDLI_cox,
      CompRisk = TRUE,
      parameterization = "value",
      interFact = list("value" = ~ strata(trans) - 1),
      method = "spline-PH-aGH",
      timeVar = "intDLI1",
      lng.in.kn = postDLI_basehaz_knots,
      iter.EM = 1000L,
      numeriDeriv = "cd",
      eps.Hes = 1e-04,
      verbose = TRUE
    )
  ),
  tar_target(
    postDLI_JM_corr_CD4,
    jointModel(
      lmeObject = postDLI_long_corr_CD4,
      survObject = postDLI_cox,
      CompRisk = TRUE,
      parameterization = "value",
      interFact = list("value" = ~ strata(trans) - 1),
      method = "spline-PH-aGH",
      timeVar = "intDLI1",
      lng.in.kn = postDLI_basehaz_knots,
      iter.EM = 1000L,
      numeriDeriv = "cd",
      eps.Hes = 1e-04,
      verbose = TRUE
    )
  ),
  tar_target(
    postDLI_JM_corr_CD8,
    jointModel(
      lmeObject = postDLI_long_corr_CD8,
      survObject = postDLI_cox,
      CompRisk = TRUE,
      parameterization = "value",
      interFact = list("value" = ~ strata(trans) - 1),
      method = "spline-PH-aGH",
      timeVar = "intDLI1",
      lng.in.kn = postDLI_basehaz_knots,
      iter.EM = 1000L,
      numeriDeriv = "cd",
      eps.Hes = 1e-04,
      verbose = TRUE
    )
  )
)
#> Establish _targets.R and _targets_r/targets/postDLI-value.R.

Manuscript figures

Add here targets for:

  • Statistical supplement rmd (with corresponding figures)
  • Manuscript figures rmd

Additional NK analyses

list(
  tar_target(
    preDLI_long_corr_NK,
    run_preDLI_longitudinal(
      cell_line = "NK_abs_log",
      form_fixed = "ns(intSCT2_7, 3) * hirisk * ATG + CMV_PD",
      form_random = ~ 0 + ns(intSCT2_7, 3) | IDAA,
      dat = NMA_preDLI_datasets$long
    )
  ),
  tar_target(
    preDLI_JM_value_corr_NK,
    jointModel(
      lmeObject = preDLI_long_corr_NK,
      survObject = preDLI_cox,
      CompRisk = TRUE,
      method = "spline-PH-aGH",
      timeVar = "intSCT2_7",
      parameterization = "value",
      interFact = list("value" = ~ strata(trans) - 1),
      iter.qN = 1000L,
      lng.in.kn = preDLI_basehaz_knots,
      numeriDeriv = "cd",
      eps.Hes = 1e-04
    )
  ),
  tar_target(
    preDLI_tdc_dataset,
    make_tdc_dataset(
      dat_wide = NMA_preDLI_datasets$wide,
      dat_long = NMA_preDLI_datasets$long
    )
  )
)
#> Establish _targets.R and _targets_r/targets/NK-analyses.R.