diff --git a/.Rbuildignore b/.Rbuildignore
index 15a6e96..c6c52e2 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -8,3 +8,6 @@
^README\.Rmd$
^\.github$
^.\.sas7bdat$
+^Dockerfile$
+^docker-compose.yml$
+^.dockerignore$
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..100bedb
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,2 @@
+.vscode
+.git
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index b5867af..bec7be1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,11 @@
*.sas7bdat
vignettes/do_not_commit.Rmd
+vignettes/ae_profile.csv
+vignettes/scen.csv
+vignettes/scen_old.csv
+vignettes/scen_days.csv
+vignettes/*.html
+run_remote.md
+.vscode
+
diff --git a/DESCRIPTION b/DESCRIPTION
index 2ac6b4c..41de51a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: simaerep
Title: Simulate adverse event reporting in clinical trials with the goal of detecting under-reporting sites
-Version: 0.3.1
+Version: 0.3.2
Authors@R:
person(given = "Bjoern",
family = "Koneswarakantha",
@@ -16,9 +16,9 @@ License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Depends:
- ggplot2,
- dplyr
+ ggplot2
Imports:
+ dplyr,
lintr,
tidyr,
magrittr,
@@ -28,18 +28,16 @@ Imports:
forcats,
cowplot,
RColorBrewer,
- feather,
furrr (>= 0.2.1),
- future
+ progressr,
+ knitr,
+ tibble
Suggests:
testthat,
devtools,
pkgdown,
- knitr,
- tibble,
spelling,
- haven,
- sqldf
+ haven
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
Language: en-US
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..c1ab957
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,4 @@
+FROM rocker/verse:4.1
+COPY . /simaerep
+RUN R -e "devtools::install('/simaerep/.', upgrade = 'never', dependencies = TRUE, repos = 'http://cran.us.r-project.org')"
+RUN rm /simaerep -r -f
\ No newline at end of file
diff --git a/NAMESPACE b/NAMESPACE
index 6f87f69..0269c92 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,8 +6,10 @@ export(check_df_visit)
export(eval_sites)
export(eval_sites_deprecated)
export(exp_implicit_missing_visits)
+export(get_config)
export(get_ecd_values)
export(get_pat_pool_config)
+export(get_portf_perf)
export(lint_package)
export(pat_aggr)
export(pat_pool)
@@ -17,30 +19,79 @@ export(plot_sim_examples)
export(plot_study)
export(plot_visit_med75)
export(poiss_test_site_ae_vs_study_ae)
+export(prep_for_sim)
export(prob_lower_site_ae_vs_study_ae)
+export(purrr_bar)
+export(sim_after_prep)
+export(sim_scenario)
export(sim_sites)
export(sim_studies)
export(sim_test_data_patient)
+export(sim_test_data_portfolio)
export(sim_test_data_study)
+export(sim_ur_scenarios)
export(site_aggr)
-import(dplyr)
-import(furrr)
-import(future)
+export(with_progress_cnd)
import(ggplot2)
-import(purrr)
-import(tidyr)
importFrom(RColorBrewer,brewer.pal)
importFrom(cowplot,draw_label)
importFrom(cowplot,get_legend)
importFrom(cowplot,ggdraw)
importFrom(cowplot,plot_grid)
-importFrom(feather,read_feather)
-importFrom(feather,write_feather)
+importFrom(dplyr,arrange)
+importFrom(dplyr,between)
+importFrom(dplyr,bind_cols)
+importFrom(dplyr,bind_rows)
+importFrom(dplyr,case_when)
+importFrom(dplyr,dense_rank)
+importFrom(dplyr,desc)
+importFrom(dplyr,distinct)
+importFrom(dplyr,everything)
+importFrom(dplyr,filter)
+importFrom(dplyr,group_by)
+importFrom(dplyr,group_by_at)
+importFrom(dplyr,inner_join)
+importFrom(dplyr,is_grouped_df)
+importFrom(dplyr,lag)
+importFrom(dplyr,left_join)
+importFrom(dplyr,mutate)
+importFrom(dplyr,mutate_all)
+importFrom(dplyr,mutate_at)
+importFrom(dplyr,n)
+importFrom(dplyr,n_distinct)
+importFrom(dplyr,one_of)
+importFrom(dplyr,pull)
+importFrom(dplyr,rename)
+importFrom(dplyr,right_join)
+importFrom(dplyr,row_number)
+importFrom(dplyr,sample_n)
+importFrom(dplyr,select)
+importFrom(dplyr,summarise)
+importFrom(dplyr,summarise_all)
+importFrom(dplyr,summarise_at)
+importFrom(dplyr,ungroup)
+importFrom(dplyr,vars)
importFrom(forcats,fct_relevel)
+importFrom(furrr,furrr_options)
+importFrom(furrr,future_map)
+importFrom(furrr,future_pmap)
+importFrom(knitr,kable)
importFrom(lintr,lint_package)
importFrom(magrittr,"%>%")
+importFrom(progressr,progressor)
+importFrom(progressr,with_progress)
+importFrom(purrr,map)
+importFrom(purrr,map2)
+importFrom(purrr,map2_dbl)
+importFrom(purrr,map_chr)
+importFrom(purrr,map_dbl)
+importFrom(purrr,map_int)
+importFrom(purrr,pmap)
+importFrom(purrr,pmap_dbl)
+importFrom(purrr,possibly)
importFrom(purrr,safely)
importFrom(rlang,":=")
+importFrom(rlang,.data)
importFrom(stats,ecdf)
importFrom(stats,median)
importFrom(stats,p.adjust)
@@ -49,6 +100,12 @@ importFrom(stats,quantile)
importFrom(stats,rnorm)
importFrom(stats,rpois)
importFrom(stats,runif)
+importFrom(stats,sd)
importFrom(stringr,str_count)
importFrom(stringr,str_pad)
+importFrom(tibble,tibble)
+importFrom(tidyr,fill)
+importFrom(tidyr,nest)
+importFrom(tidyr,tibble)
+importFrom(tidyr,unnest)
importFrom(utils,head)
diff --git a/NEWS.md b/NEWS.md
index d26446a..463066f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,6 @@
+# simaerep 0.3.2
+- added portfolio performance assessment
+
# simaerep 0.3.1
- changed MIT License holder from openpharma to F. Hoffmann-La Roche Ltd and simaerep authors
@@ -8,4 +11,4 @@
# simaerep 0.2.0
- use Benjamin Hochberg procedure for alpha error correction
- fix warnings around parallel processing
-- improved SAS files vignette
\ No newline at end of file
+- improved SAS files vignette
diff --git a/R/0_imports.R b/R/0_imports.R
new file mode 100644
index 0000000..96ec556
--- /dev/null
+++ b/R/0_imports.R
@@ -0,0 +1,32 @@
+# satisfy lintr
+# lintr falsely flags possibly_ecdf as unused variable
+# using rlang::.data here causes error with furrr in sim_test_data_portfolio
+# patnum, n_ae, visit
+
+if (getRversion() >= "2.15.1") {
+ utils::globalVariables(c("possibly_ecdf", "patnum", "n_ae", "visit"))
+}
+
+#' @importFrom progressr progressor
+#' @importFrom cowplot get_legend plot_grid ggdraw draw_label plot_grid plot_grid
+#' @importFrom cowplot ggdraw draw_label
+#' @importFrom forcats fct_relevel
+#' @importFrom RColorBrewer brewer.pal
+#' @importFrom utils head
+#' @importFrom stats p.adjust quantile median runif poisson.test ecdf rnorm rpois sd
+#' @importFrom purrr safely possibly pmap map map2 pmap_dbl map2_dbl map_dbl
+#' @importFrom purrr map_int map_chr
+#' @importFrom furrr future_map future_pmap furrr_options
+#' @importFrom progressr with_progress
+#' @importFrom stringr str_count str_pad
+#' @importFrom rlang := .data
+#' @importFrom dplyr select mutate filter summarise group_by summarise_all summarise_at
+#' @importFrom dplyr mutate_all mutate_at ungroup vars bind_cols bind_rows pull
+#' @importFrom dplyr n_distinct distinct arrange right_join left_join inner_join
+#' @importFrom dplyr rename sample_n between row_number dense_rank desc case_when
+#' @importFrom dplyr group_by_at n is_grouped_df everything one_of lag
+#' @importFrom tidyr tibble unnest nest fill
+#' @importFrom lintr lint_package
+#' @importFrom knitr kable
+#' @importFrom tibble tibble
+NULL
diff --git a/R/lint.R b/R/lint.R
index 4be7e85..196c51f 100644
--- a/R/lint.R
+++ b/R/lint.R
@@ -11,7 +11,6 @@
#' @seealso \code{\link[lintr]{lint_package}}
#' @rdname lint_package
#' @export
-#' @importFrom lintr lint_package
lint_package <- function(path = ".", ...) {
lint_results <- lintr::lint_package(path = path,
diff --git a/R/progress.R b/R/progress.R
new file mode 100644
index 0000000..36b2c55
--- /dev/null
+++ b/R/progress.R
@@ -0,0 +1,164 @@
+#' @title execute a purrr or furrr function with a progress
+#' bar
+#' @description call still needs to be wrapped in with_progress()
+#' @param .purrr purrr or furrr function
+#' @param ... iterable arguments passed to .purrr
+#' @param .f function to be executed over iterables
+#' @param .f_args list of arguments passed to .f, Default: list()
+#' @param .purrr_args list of arguments passed to .purrr, Default: list()
+#' @param .steps integer number of iterations
+#' @param .slow logical slows down execution, Default: FALSE
+#' @param .progress logical, show progress bar, Default: TRUE
+#' @examples
+#' # purrr::map
+#' progressr::with_progress(
+#' purrr_bar(rep(0.25, 5), .purrr = purrr::map, .f = Sys.sleep, .steps = 5)
+#' )
+#'
+#'\dontrun{
+#' # purrr::walk
+#' progressr::with_progress(
+#' purrr_bar(rep(0.25, 5), .purrr = purrr::walk,.f = Sys.sleep, .steps = 5)
+#' )
+#'
+#' # progress bar off
+#' progressr::with_progress(
+#' purrr_bar(
+#' rep(0.25, 5), .purrr = purrr::walk,.f = Sys.sleep, .steps = 5, .progress = FALSE
+#' )
+#' )
+#'
+#' # purrr::map2
+#' progressr::with_progress(
+#' purrr_bar(
+#' rep(1, 5), rep(2, 5),
+#' .purrr = purrr::map2,
+#' .f = `+`,
+#' .steps = 5,
+#' .slow = TRUE
+#' )
+#')
+#'
+#' # purrr::pmap
+#' progressr::with_progress(
+#' purrr_bar(
+#' list(rep(1, 5), rep(2, 5)),
+#' .purrr = purrr::pmap,
+#' .f = `+`,
+#' .steps = 5,
+#' .slow = TRUE
+#' )
+#')
+#'
+#' # define function within purr_bar() call
+#' progressr::with_progress(
+#' purrr_bar(
+#' list(rep(1, 5), rep(2, 5)),
+#' .purrr = purrr::pmap,
+#' .f = function(x, y) {
+#' paste0(x, y)
+#' },
+#' .steps = 5,
+#' .slow = TRUE
+#' )
+#')
+#'
+#' # with mutate
+#' progressr::with_progress(
+#' tibble(x = rep(0.25, 5)) %>%
+#' mutate(x = purrr_bar(x, .purrr = purrr::map, .f = Sys.sleep, .steps = 5))
+#' )
+#'}
+#' @rdname purrr_bar
+#' @export
+purrr_bar <- function(...,
+ .purrr,
+ .f,
+ .f_args = list(),
+ .purrr_args = list(),
+ .steps,
+ .slow = FALSE,
+ .progress = TRUE) {
+
+ stopifnot("all .f_args list items must be named" = all(names(.f_args) != ""))
+ stopifnot("all .purrr_args list items must be named" = all(names(.purrr_args) != ""))
+
+ if (.progress) {
+ p <- progressr::progressor(steps = .steps)
+ } else {
+ p <- NULL
+ }
+
+ f <- function(..., .f_args, .p = p) {
+ if (.progress) p()
+ if (.slow) Sys.sleep(0.25)
+ .f_args <- c(list(...), .f_args)
+ do.call(.f, .f_args)
+ }
+
+ do.call(
+ .purrr,
+ c(
+ list(...),
+ list(f, .f_args = .f_args, .p = p),
+ .purrr_args
+ )
+ )
+}
+
+
+#'@title conditional \code{\link[progressr]{with_progress}}
+#'@description internal function. Use instead of
+#' \code{\link[progressr]{with_progress}} within custom functions with progress
+#' bars.
+#'@param ex expression
+#'@param progress logical, Default: TRUE
+#'@details DETAILS
+#' @examples
+#' if (interactive()) {
+#'
+#' with_progress_cnd(
+#' purrr_bar(rep(0.25, 5), .purrr = purrr::map, .f = Sys.sleep, .steps = 5),
+#' progress = TRUE
+#' )
+#'
+#' with_progress_cnd(
+#' purrr_bar(rep(0.25, 5), .purrr = purrr::map, .f = Sys.sleep, .steps = 5),
+#' progress = FALSE
+#' )
+#'
+#' # wrap a function with progress bar with another call with progress bar
+#'
+#' f1 <- function(x, progress = TRUE) {
+#' with_progress_cnd(
+#' purrr_bar(x, .purrr = purrr::walk, .f = Sys.sleep, .steps = length(x), .progress = progress),
+#' progress = progress
+#' )
+#' }
+#'
+#' # inner progress bar blocks outer progress bar
+#' progressr::with_progress(
+#' purrr_bar(
+#' rep(rep(1, 3),3), .purrr = purrr::walk, .f = f1, .steps = 3,
+#' .f_args = list(progress = TRUE)
+#' )
+#' )
+#'
+#' # inner progress bar turned off
+#' progressr::with_progress(
+#' purrr_bar(
+#' rep(list(rep(0.25, 3)), 5), .purrr = purrr::walk, .f = f1, .steps = 5,
+#' .f_args = list(progress = FALSE)
+#' )
+#' )
+#'}
+#'@seealso \code{\link[progressr]{with_progress}}
+#'@rdname with_progress_cnd
+#'@export
+with_progress_cnd <- function(ex, progress = TRUE) {
+ if (progress) {
+ progressr::with_progress(eval(ex))
+ } else {
+ eval(ex)
+ }
+}
diff --git a/R/simaerep.R b/R/simaerep.R
index 583b267..93fc963 100644
--- a/R/simaerep.R
+++ b/R/simaerep.R
@@ -22,32 +22,36 @@ if (getRversion() >= "2.15.1") {
#' df_visit$study_id <- "A"
#'
#' df_visit_filt <- df_visit %>%
-#' filter(visit != 3)
+#' dplyr::filter(visit != 3)
#'
#' df_visit_corr <- check_df_visit(df_visit_filt)
#' 3 %in% df_visit_corr$visit
#' nrow(df_visit_corr) == nrow(df_visit)
#'
-#' df_visit_corr <- check_df_visit(bind_rows(df_visit, df_visit))
+#' df_visit_corr <- check_df_visit(dplyr::bind_rows(df_visit, df_visit))
#' nrow(df_visit_corr) == nrow(df_visit)
#'
#' @rdname check_df_visit
#' @export
check_df_visit <- function(df_visit) {
+
+ df_visit <- ungroup(df_visit)
+
stopifnot(
all(c("study_id", "site_number", "patnum", "n_ae", "visit") %in% names(df_visit))
)
cols_na <- df_visit %>%
summarise_at(
- vars(
- .data$study_id,
- .data$site_number,
- .data$patnum,
- .data$n_ae,
- .data$visit),
- ~ any(is.na(.))
- ) %>%
+ vars(
+ .data$study_id,
+ .data$site_number,
+ .data$patnum,
+ .data$n_ae,
+ .data$visit
+ ),
+ anyNA
+ ) %>%
unlist()
if (any(cols_na)) {
@@ -67,6 +71,15 @@ check_df_visit <- function(df_visit) {
all()
)
+ df_visit %>%
+ group_by(.data$study_id, .data$patnum) %>%
+ summarise(n_sites = n_distinct(.data$site_number), .groups = "drop") %>%
+ mutate(check = .data$n_sites == 1) %>%
+ pull(.data$check) %>%
+ unlist() %>%
+ all() %>%
+ stopifnot("patient ids must be site exclusive" = .)
+
df_visit <- exp_implicit_missing_visits(df_visit)
df_visit <- aggr_duplicated_visits(df_visit)
@@ -324,7 +337,6 @@ get_visit_med75 <- function(df_pat,
#' @seealso \code{\link[simaerep]{site_aggr}},
#' \code{\link[simaerep]{sim_sites}},
#' \code{\link[stats]{p.adjust}}
-#' @importFrom stats p.adjust
#' @export
eval_sites <- function(df_sim_sites,
method = "BH",
@@ -356,7 +368,7 @@ eval_sites <- function(df_sim_sites,
if ("prob_low" %in% names(df_out)) {
- if (anyNA(df_out$pval)) {
+ if (anyNA(df_out$prob_low)) {
warning("prob_lower column contains NA")
}
@@ -616,11 +628,10 @@ pat_pool <- function(df_visit, df_site) {
#' @seealso \code{\link[purrr]{safely}}
#' @rdname prob_lower_site_ae_vs_study_ae
#' @export
-#' @importFrom purrr safely
prob_lower_site_ae_vs_study_ae <- function(site_ae, study_ae, r = 1000, parallel = FALSE) {
# if there is only one site
- if (is_null(study_ae)) {
+ if (is.null(study_ae)) {
prob_lower <- 1
return(prob_lower)
}
@@ -637,8 +648,6 @@ prob_lower_site_ae_vs_study_ae <- function(site_ae, study_ae, r = 1000, parallel
# set-up multiprocessing
# multiprocessing currently not used by sim_sites()
if (parallel) {
- requireNamespace("furrr")
- suppressWarnings(future::plan(multiprocess))
.f_map_int <- function(...) {
furrr::future_map_int(..., .options = furrr_options(seed = TRUE))
}
@@ -673,13 +682,17 @@ prob_lower_site_ae_vs_study_ae <- function(site_ae, study_ae, r = 1000, parallel
#' @param r integer, denotes number of simulations, default = 1000
#' @param poisson_test logical, calculates poisson.test pvalue
#' @param prob_lower logical, calculates probability for getting a lower value
+#' @param progress logical, display progress bar, Default = TRUE
#' @return dataframe with the following columns:
#' \describe{
#' \item{**study_id**}{study identification}
#' \item{**site_number**}{site identification}
+#' \item{**n_pat**}{number of patients at site}
#' \item{**visit_med75**}{median(max(visit)) * 0.75}
+#' \item{**n_pat_with_med75**}{number of patients at site with med75}
#' \item{**mean_ae_site_med75**}{mean AE at visit_med75 site level}
#' \item{**mean_ae_study_med75**}{mean AE at visit_med75 study level}
+#' \item{**n_pat_with_med75_study**}{number of patients at study with med75 excl. site}
#' \item{**pval**}{p-value as returned by \code{\link[stats]{poisson.test}}}
#' \item{**prob_low**}{bootstrapped probability for having mean_ae_site_med75 or lower}
#' }
@@ -705,18 +718,55 @@ prob_lower_site_ae_vs_study_ae <- function(site_ae, study_ae, r = 1000, parallel
#' \code{\link[simaerep]{pat_pool}},
#' \code{\link[simaerep]{prob_lower_site_ae_vs_study_ae}},
#' \code{\link[simaerep]{poiss_test_site_ae_vs_study_ae}},
+#' \code{\link[simaerep]{sim_sites}},
+#' \code{\link[simaerep]{prep_for_sim}}
#' @export
-#' @import dplyr
-#' @import purrr
-#' @import tidyr
sim_sites <- function(df_site,
df_visit,
r = 1000,
poisson_test = TRUE,
- prob_lower = TRUE) {
+ prob_lower = TRUE,
+ progress = TRUE) {
df_visit <- check_df_visit(df_visit)
+ df_sim_prep <- prep_for_sim(df_site, df_visit)
+
+ df_sim <- sim_after_prep(df_sim_prep,
+ r = r,
+ poisson_test = poisson_test,
+ prob_lower = prob_lower,
+ progress = progress)
+
+ return(df_sim)
+}
+
+#'@title prepare data for simulation
+#'@description Internal function called by \code{\link[simaerep]{sim_sites}}.
+#' Collect AEs per patient at visit_med75 for site and study as a vector of
+#' integers.
+#'@param df_visit dataframe, created by \code{\link[simaerep]{sim_sites}}
+#'@param df_site dataframe created by \code{\link[simaerep]{site_aggr}}
+#'@return dataframe
+#' @examples
+#' df_visit <- sim_test_data_study(
+#' n_pat = 100,
+#' n_sites = 5,
+#' frac_site_with_ur = 0.4,
+#' ur_rate = 0.2
+#')
+#'
+#' df_visit$study_id <- "A"
+#'
+#' df_site <- site_aggr(df_visit)
+#'
+#' df_prep <- prep_for_sim(df_site, df_visit)
+#' df_prep
+#'@rdname prep_for_sim
+#' @seealso \code{\link[simaerep]{sim_sites}}, \code{\link[simaerep]{sim_after_prep}}
+#'@export
+prep_for_sim <- function(df_site, df_visit) {
+
df_pat_pool <- pat_pool(df_visit, df_site)
df_sim_prep <- df_visit %>%
@@ -727,20 +777,67 @@ sim_sites <- function(df_site,
.data$n_pat,
.data$n_pat_with_med75,
.data$visit_med75) %>%
- summarise(patients = list(unique(.data$patnum)))
+ summarise(patients = list(unique(.data$patnum)), .groups = "drop")
df_sim_prep <- df_sim_prep %>%
left_join(df_pat_pool, "study_id") %>%
mutate(
- pat_pool = map2(.data$pat_pool, .data$visit_med75, function(x, y) filter(x, .data$visit == y)),
- n_ae_site = map2(.data$pat_pool, .data$patients, function(x, y) filter(x, .data$patnum %in% y)),
- n_ae_study = map2(.data$pat_pool, .data$patients, function(x, y) filter(x, ! .data$patnum %in% y)),
+ pat_pool = map2(
+ .data$pat_pool, .data$visit_med75,
+ function(x, y) filter(x, .data$visit == y)
+ ),
+ n_ae_site = map2(
+ .data$pat_pool, .data$patients,
+ function(x, y) filter(x, .data$patnum %in% y)
+ ),
+ n_ae_study = map2(
+ .data$pat_pool, .data$patients,
+ function(x, y) filter(x, ! .data$patnum %in% y)
+ ),
n_ae_site = map(.data$n_ae_site, "n_ae"),
n_ae_study = map(.data$n_ae_study, "n_ae")
) %>%
select(- .data$patients,
- .data$pat_pool)
+ return(df_sim_prep)
+
+}
+
+#'@title start simulation after preparation
+#'@description Internal function called by \code{\link[simaerep]{sim_sites}}
+#' after \code{\link[simaerep]{prep_for_sim}}
+#'@param df_sim_prep dataframe as returned by
+#' \code{\link[simaerep]{prep_for_sim}}
+#'@inheritParams sim_sites
+#'@return dataframe
+#' @examples
+#' df_visit <- sim_test_data_study(
+#' n_pat = 100,
+#' n_sites = 5,
+#' frac_site_with_ur = 0.4,
+#' ur_rate = 0.2
+#')
+#'
+#' df_visit$study_id <- "A"
+#'
+#' df_site <- site_aggr(df_visit)
+#'
+#' df_prep <- prep_for_sim(df_site, df_visit)
+#'
+#' df_sim <- sim_after_prep(df_prep)
+#'
+#' df_sim
+#'@rdname sim_after_prep
+#'@seealso \code{\link[simaerep]{sim_sites}},
+#' \code{\link[simaerep]{prep_for_sim}}
+#'@export
+sim_after_prep <- function(df_sim_prep,
+ r = 1000,
+ poisson_test = FALSE,
+ prob_lower = TRUE,
+ progress = FALSE) {
+
df_sim <- df_sim_prep
if (poisson_test) {
@@ -750,17 +847,29 @@ sim_sites <- function(df_site,
}
if (prob_lower) {
- df_sim <- df_sim %>%
- mutate(prob_low = map2_dbl(.data$n_ae_site, .data$n_ae_study,
- prob_lower_site_ae_vs_study_ae,
- r = r
- ))
+ with_progress_cnd(
+ df_sim <- df_sim %>%
+ mutate(
+ prob_low = purrr_bar(
+ .data$n_ae_site, .data$n_ae_study,
+ .purrr = map2_dbl,
+ .f = prob_lower_site_ae_vs_study_ae,
+ .f_args = list(r = r),
+ .steps = nrow(df_sim),
+ .progress = progress
+ )
+ ),
+ progress = progress
+ )
}
- df_sim %>%
- mutate(mean_ae_site_med75 = map_dbl(n_ae_site, mean),
- mean_ae_study_med75 = map_dbl(n_ae_study, mean)) %>%
- select(- n_ae_site, - n_ae_study) %>%
+ # clean
+
+ df_sim <- df_sim %>%
+ mutate(mean_ae_site_med75 = map_dbl(.data$n_ae_site, mean),
+ mean_ae_study_med75 = map_dbl(.data$n_ae_study, mean),
+ n_pat_with_med75_study = map_int(.data$n_ae_study, length)) %>%
+ select(- .data$n_ae_site, - .data$n_ae_study) %>%
select(.data$study_id,
.data$site_number,
.data$n_pat,
@@ -768,9 +877,12 @@ sim_sites <- function(df_site,
.data$visit_med75,
.data$mean_ae_site_med75,
.data$mean_ae_study_med75,
- everything()) %>%
- ungroup() %>%
- return()
+ .data$n_pat_with_med75_study,
+ dplyr::everything()) %>%
+ ungroup()
+
+ return(df_sim)
+
}
#' @title configure study patient pool by site parameters
@@ -793,7 +905,7 @@ sim_sites <- function(df_site,
#'
#' df_visit2$study_id <- "B"
#'
-#' df_visit <- bind_rows(df_visit1, df_visit2)
+#' df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
#'
#' df_site <- site_aggr(df_visit)
#'
@@ -869,7 +981,7 @@ get_pat_pool_config <- function(df_visit, df_site, min_n_pat_with_med75 = 1) {
#'
#' df_visit2$study_id <- "B"
#'
-#' df_visit <- bind_rows(df_visit1, df_visit2)
+#' df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
#'
#' df_site <- site_aggr(df_visit)
#'
@@ -885,14 +997,6 @@ get_pat_pool_config <- function(df_visit, df_site, min_n_pat_with_med75 = 1) {
#' mean_ae at visit_med75
#' @rdname sim_studies
#' @export
-#' @importFrom feather read_feather write_feather
-#' @import dplyr
-#' @import purrr
-#' @import tidyr
-#' @import furrr
-#' @import future
-#' @importFrom stringr str_count str_pad
-#' @importFrom rlang :=
sim_studies <- function(df_visit,
df_site,
r = 100,
@@ -913,7 +1017,7 @@ sim_studies <- function(df_visit,
min_n_pat_with_med75 = min_n_pat_with_med75)
# filter studies --------------------------------------------
- if (!is_null(studies)) {
+ if (!is.null(studies)) {
if (!all(studies %in% unique(df_visit$study_id))) {
stop("not all passed studies can be found in input data")
}
@@ -925,8 +1029,6 @@ sim_studies <- function(df_visit,
# set-up multiprocessing -------------------------------------
if (parallel) {
- requireNamespace("furrr")
- suppressWarnings(future::plan(multiprocess))
.f_map <- function(...) {
furrr::future_map(
...,
@@ -1027,7 +1129,6 @@ sim_studies <- function(df_visit,
#' knitr::kable(digits = 2)
#'@rdname site_aggr
#'@export
-#'@importFrom stats quantile
site_aggr <- function(df_visit,
method = "med75_adj",
min_pat_pool = 0.2) {
@@ -1091,15 +1192,13 @@ site_aggr <- function(df_visit,
#' @seealso [sim_sites()][sim_sites()]
#' @rdname poiss_test_site_ae_vs_study_ae
#' @export
-#' @importFrom purrr safely
-#' @importFrom stats median runif poisson.test
poiss_test_site_ae_vs_study_ae <- function(site_ae,
- study_ae,
- visit_med75) {
+ study_ae,
+ visit_med75) {
# if there is only one site
- if (is_null(study_ae)) {
+ if (is.null(study_ae)) {
pval <- 1
return(pval)
}
@@ -1130,99 +1229,9 @@ poiss_test_site_ae_vs_study_ae <- function(site_ae,
pval <- poisson_test$result$p.value
# this controls for cases when poisson.test fails for some reason
- if (is_null(pval)) {
+ if (is.null(pval)) {
pval <- 1
}
return(pval)
}
-
-
-#' @title simulate study test data
-#' @description evenly distributes a number of given patients across a number of
-#' given sites. Then simulates ae development of each patient reducing the
-#' number of reported AEs for patients distributed to AE-under-reporting sites.
-#' @param n_pat integer, number of patients, Default: 1000
-#' @param n_sites integer, number of sites, Default: 20
-#' @param frac_site_with_ur fraction of AE under-reporting sites, Default: 0
-#' @param ur_rate AE under-reporting rate, will lower mean ae per visit used to
-#' simulate patients at sites flagged as AE-under-reporting., Default: 0
-#' @param max_visit_mean mean of the maximum number of visits of each patient,
-#' Default: 20
-#' @param max_visit_sd standard deviation of maximum number of visits of each
-#' patient, Default: 4
-#' @param ae_per_visit_mean mean ae per visit per patient, Default: 0.5
-#' @return tibble with columns site_number, patnum, is_ur, max_visit_mean,
-#' max_visit_sd, ae_per_visit_mean, visit, n_ae
-#' @details maximum visit number will be sampled from normal distribution with
-#' characteristics derived from max_visit_mean and max_visit_sd, while the ae
-#' per visit will be sampled from a poisson distribution described by
-#' ae_per_visit_mean.
-#' @examples
-#' set.seed(1)
-#' df_visit <- sim_test_data_study(n_pat = 100, n_sites = 5)
-#' df_visit[which(df_visit$patnum == "P000001"),]
-#' df_visit <- sim_test_data_study(n_pat = 100, n_sites = 5,
-#' frac_site_with_ur = 0.2, ur_rate = 0.5)
-#' df_visit[which(df_visit$patnum == "P000001"),]
-#' @rdname sim_test_data_study
-#' @export
-sim_test_data_study <- function(n_pat = 1000,
- n_sites = 20,
- frac_site_with_ur = 0,
- ur_rate = 0,
- max_visit_mean = 20,
- max_visit_sd = 4,
- ae_per_visit_mean = 0.5
- ) {
- tibble(patnum = seq(1, n_pat)) %>%
- mutate(patnum = str_pad(patnum, width = 6, side = "left", pad = "0"),
- patnum = paste0("P", patnum),
- site_number = seq(1, n_pat),
- site_number = if (n_sites > 1) cut(.data$site_number, n_sites, labels = FALSE) else 1,
- is_ur = ifelse(.data$site_number <= (max(.data$site_number) * frac_site_with_ur), TRUE, FALSE),
- site_number = str_pad(.data$site_number, width = 4, side = "left", pad = "0"),
- site_number = paste0("S", .data$site_number),
- max_visit_mean = max_visit_mean,
- max_visit_sd = max_visit_sd,
- ae_per_visit_mean = ifelse(.data$is_ur, ae_per_visit_mean * (1 - ur_rate), ae_per_visit_mean),
- aes = pmap(list(vm = max_visit_mean,
- vs = max_visit_sd,
- am = ae_per_visit_mean),
- function(vm, vs, am) sim_test_data_patient(
- .f_sample_max_visit = function() rnorm(1, mean = vm, sd = vs),
- .f_sample_ae_per_visit = function(max_visit) rpois(max_visit, am))),
- aes = map(aes, ~ tibble(visit = seq(1, length(.)), n_ae = .))) %>%
- unnest(aes)
-
-}
-
-#' @title simulate patient ae reporting test data
-#' @description helper function for [sim_test_data_study()][sim_test_data_study()]
-#' @param .f_sample_ae_per_visit function used to sample the aes for each visit,
-#' Default: function(x) rpois(x, 0.5)
-#' @param .f_sample_max_visit function used to sample the maximum number of aes,
-#' Default: function() rnorm(1, mean = 20, sd = 4)
-#' @return vector containing cumulative aes
-#' @details ""
-#' @examples
-#' replicate(5, sim_test_data_patient())
-#' replicate(5, sim_test_data_patient(
-#' .f_sample_ae_per_visit = function(x) rpois(x, 1.2))
-#' )
-#' replicate(5, sim_test_data_patient(
-#' .f_sample_max_visit = function() rnorm(1, mean = 5, sd = 5))
-#' )
-#' @importFrom stats ecdf rnorm rpois
-#' @rdname sim_test_data_patient
-#' @export
-sim_test_data_patient <- function(.f_sample_max_visit = function() rnorm(1, mean = 20, sd = 4),
- .f_sample_ae_per_visit = function(max_visit) rpois(max_visit, 0.5)) {
-
- max_visit <- as.integer(.f_sample_max_visit())
- max_visit <- ifelse(max_visit < 1, 1, max_visit)
- aes <- .f_sample_ae_per_visit(max_visit)
- cum_aes <- cumsum(aes)
-
- return(cum_aes)
-}
diff --git a/R/simaerep_plot.R b/R/simaerep_plot.R
index 5121bb6..42b1042 100644
--- a/R/simaerep_plot.R
+++ b/R/simaerep_plot.R
@@ -22,15 +22,14 @@ if (getRversion() >= "2.15.1") {
#' @param color_low character, hex color value, Default: '#25A69A'
#' @param size_dots integer, Default: 10
#' @return ggplot object
-#' @importFrom magrittr %>%
#' @details ' '
#' @examples
-#' study <- tibble(
+#' study <- tibble::tibble(
#' site = LETTERS[1:3],
#' patients = c(list(seq(1, 50, 1)), list(seq(1, 40, 1)), list(seq(1, 10, 1)))
#' ) %>%
#' tidyr::unnest(patients) %>%
-#' mutate(n_ae = as.integer(runif(min = 0, max = 10, n = nrow(.))))
+#' dplyr::mutate(n_ae = as.integer(runif(min = 0, max = 10, n = nrow(.))))
#'
#' plot_dots(study)
#' @rdname plot_dots
@@ -73,14 +72,14 @@ plot_dots <- function(df,
df_label <- df %>%
group_by_at(vars(one_of(col_group))) %>%
- summarize(
+ summarise(
x = max(.data$x),
y = max(.data$y),
mean_ae = mean(.data$n_ae),
label = paste("\u00d8:", round(mean(.data$n_ae), 1))
)
- if (!is_null(thresh)) {
+ if (!is.null(thresh)) {
df_label <- df_label %>%
mutate(color = ifelse(.data$mean_ae >= thresh, color_high, color_low))
} else {
@@ -155,7 +154,6 @@ plot_dots <- function(df,
#' \code{\link[cowplot]{get_legend}},\code{\link[cowplot]{plot_grid}}
#' @rdname plot_sim_example
#' @export
-#' @importFrom cowplot get_legend plot_grid
plot_sim_example <- function(substract_ae_per_pat = 0,
size_dots = 10,
size_raster_label = 12,
@@ -306,7 +304,6 @@ plot_sim_example <- function(substract_ae_per_pat = 0,
#' \code{\link[cowplot]{ggdraw}},\code{\link[cowplot]{draw_label}},\code{\link[cowplot]{plot_grid}}
#' @rdname plot_sim_examples
#' @export
-#' @importFrom cowplot ggdraw draw_label plot_grid
plot_sim_examples <- function(substract_ae_per_pat = c(0, 1, 3), ...) {
make_title <- function(x, fontface = "bold", size = 14, angle = 0) {
@@ -424,10 +421,6 @@ plot_sim_examples <- function(substract_ae_per_pat = c(0, 1, 3), ...) {
#' plot_study(df_visit, df_site, df_eval, study = "A")
#' @rdname plot_study
#' @export
-#' @importFrom cowplot plot_grid ggdraw draw_label
-#' @importFrom forcats fct_relevel
-#' @importFrom RColorBrewer brewer.pal
-#' @importFrom utils head
#' @import ggplot2
plot_study <- function(df_visit,
df_site,
@@ -442,7 +435,7 @@ plot_study <- function(df_visit,
# alert level -------------------------------------------------------------
- if (is_null(df_al)) {
+ if (is.null(df_al)) {
df_visit <- df_visit %>%
mutate(alert_level_site = NA,
alert_level_study = NA)
@@ -821,7 +814,7 @@ plot_visit_med75 <- function(df_visit,
study_possible_max_visit <- df_pat %>%
filter(.data$study_id == study_id_str) %>%
- summarize(study_possible_max_visit = quantile(.data$max_visit_per_pat, 1 - min_pat_pool)) %>%
+ summarise(study_possible_max_visit = quantile(.data$max_visit_per_pat, 1 - min_pat_pool)) %>%
pull(.data$study_possible_max_visit) %>%
round(0)
diff --git a/R/simulate_test_data.R b/R/simulate_test_data.R
new file mode 100644
index 0000000..2f0e88d
--- /dev/null
+++ b/R/simulate_test_data.R
@@ -0,0 +1,772 @@
+#' @title simulate study test data
+#' @description evenly distributes a number of given patients across a number of
+#' given sites. Then simulates ae development of each patient reducing the
+#' number of reported AEs for patients distributed to AE-under-reporting sites.
+#' @param n_pat integer, number of patients, Default: 1000
+#' @param n_sites integer, number of sites, Default: 20
+#' @param frac_site_with_ur fraction of AE under-reporting sites, Default: 0
+#' @param ur_rate AE under-reporting rate, will lower mean ae per visit used to
+#' simulate patients at sites flagged as AE-under-reporting., Default: 0
+#' @param max_visit_mean mean of the maximum number of visits of each patient,
+#' Default: 20
+#' @param max_visit_sd standard deviation of maximum number of visits of each
+#' patient, Default: 4
+#' @param ae_per_visit_mean mean ae per visit per patient, Default: 0.5
+#' @return tibble with columns site_number, patnum, is_ur, max_visit_mean,
+#' max_visit_sd, ae_per_visit_mean, visit, n_ae
+#' @details maximum visit number will be sampled from normal distribution with
+#' characteristics derived from max_visit_mean and max_visit_sd, while the ae
+#' per visit will be sampled from a poisson distribution described by
+#' ae_per_visit_mean.
+#' @examples
+#' set.seed(1)
+#' df_visit <- sim_test_data_study(n_pat = 100, n_sites = 5)
+#' df_visit[which(df_visit$patnum == "P000001"),]
+#' df_visit <- sim_test_data_study(n_pat = 100, n_sites = 5,
+#' frac_site_with_ur = 0.2, ur_rate = 0.5)
+#' df_visit[which(df_visit$patnum == "P000001"),]
+#' @rdname sim_test_data_study
+#' @export
+sim_test_data_study <- function(n_pat = 1000,
+ n_sites = 20,
+ frac_site_with_ur = 0,
+ ur_rate = 0,
+ max_visit_mean = 20,
+ max_visit_sd = 4,
+ ae_per_visit_mean = 0.5
+) {
+ tibble(patnum = seq(1, n_pat)) %>%
+ mutate(patnum = str_pad(patnum, width = 6, side = "left", pad = "0"),
+ patnum = paste0("P", patnum),
+ site_number = seq(1, n_pat),
+ site_number = if (n_sites > 1) cut(.data$site_number, n_sites, labels = FALSE) else 1,
+ is_ur = ifelse(.data$site_number <= (max(.data$site_number) * frac_site_with_ur), TRUE, FALSE),
+ site_number = str_pad(.data$site_number, width = 4, side = "left", pad = "0"),
+ site_number = paste0("S", .data$site_number),
+ max_visit_mean = max_visit_mean,
+ max_visit_sd = max_visit_sd,
+ ae_per_visit_mean = ifelse(.data$is_ur, ae_per_visit_mean * (1 - ur_rate), ae_per_visit_mean),
+ aes = pmap(list(vm = max_visit_mean,
+ vs = max_visit_sd,
+ am = ae_per_visit_mean),
+ function(vm, vs, am) sim_test_data_patient(
+ .f_sample_max_visit = function() rnorm(1, mean = vm, sd = vs),
+ .f_sample_ae_per_visit = function(max_visit) rpois(max_visit, am))),
+ aes = map(aes, ~ tibble(visit = seq(1, length(.)), n_ae = .))) %>%
+ unnest(aes)
+
+}
+
+#' @title simulate patient ae reporting test data
+#' @description helper function for [sim_test_data_study()][sim_test_data_study()]
+#' @param .f_sample_ae_per_visit function used to sample the aes for each visit,
+#' Default: function(x) rpois(x, 0.5)
+#' @param .f_sample_max_visit function used to sample the maximum number of aes,
+#' Default: function() rnorm(1, mean = 20, sd = 4)
+#' @return vector containing cumulative aes
+#' @details ""
+#' @examples
+#' replicate(5, sim_test_data_patient())
+#' replicate(5, sim_test_data_patient(
+#' .f_sample_ae_per_visit = function(x) rpois(x, 1.2))
+#' )
+#' replicate(5, sim_test_data_patient(
+#' .f_sample_max_visit = function() rnorm(1, mean = 5, sd = 5))
+#' )
+#' @rdname sim_test_data_patient
+#' @export
+sim_test_data_patient <- function(.f_sample_max_visit = function() rnorm(1, mean = 20, sd = 4),
+ .f_sample_ae_per_visit = function(max_visit) rpois(max_visit, 0.5)) {
+
+ max_visit <- as.integer(.f_sample_max_visit())
+ max_visit <- ifelse(max_visit < 1, 1, max_visit)
+ aes <- .f_sample_ae_per_visit(max_visit)
+ cum_aes <- cumsum(aes)
+
+ return(cum_aes)
+}
+
+#' @title simulate single scenario
+#' @description internal function called by simulate_scenarios()
+#' @param n_ae_site integer vector
+#' @param n_ae_study integer vector
+#' @param frac_pat_with_ur double
+#' @param ur_rate double
+#' @return list
+#' @examples
+#' sim_scenario(c(5,5,5,5), c(8,8,8,8), 0.2, 0.5)
+#' sim_scenario(c(5,5,5,5), c(8,8,8,8), 0.75, 0.5)
+#' sim_scenario(c(5,5,5,5), c(8,8,8,8), 1, 0.5)
+#' sim_scenario(c(5,5,5,5), c(8,8,8,8), 1, 1)
+#' sim_scenario(c(5,5,5,5), c(8,8,8,8), 0, 0.5)
+#' sim_scenario(c(5,5,5,5), c(8,8,8,8), 2, 0.5)
+#' @rdname sim_scenario
+#' @export
+sim_scenario <- function(n_ae_site, n_ae_study, frac_pat_with_ur, ur_rate) {
+
+ if (frac_pat_with_ur == 0 | ur_rate == 0) {
+ return(list(n_ae_site = n_ae_site, n_ae_study = n_ae_study))
+ }
+
+ if (frac_pat_with_ur > 1) frac_pat_with_ur <- 1
+
+ n_pat_site <- length(n_ae_site)
+ n_pat_study <- length(n_ae_study)
+ n_pat_tot <- n_pat_site + n_pat_study
+ n_pat_ur <- round(n_pat_tot * frac_pat_with_ur, 0)
+
+ max_ix_site <- min(c(n_pat_ur, n_pat_site))
+
+ n_ae_site[1:max_ix_site] <- n_ae_site[1:max_ix_site] * (1 - ur_rate)
+
+ if (n_pat_ur > n_pat_site) {
+ max_ix_study <- n_pat_ur - n_pat_site
+
+ n_ae_study[1:max_ix_study] <- n_ae_study[1:max_ix_study] * (1 - ur_rate)
+ }
+
+ return(list(n_ae_site = n_ae_site, n_ae_study = n_ae_study))
+}
+
+#' @title Simulate Under-Reporting Scenarios
+#' @description Use with simulated portfolio data to generate under-reporting
+#' stats for specified scenarios.
+#' @param df_portf dataframe as returned by \code{\link{sim_test_data_portfolio}}
+#' @param extra_ur_sites numeric, set maximum number of additional
+#' under-reporting sites, see details Default: 3
+#' @param ur_rate numeric vector, set under-reporting rates for scenarios
+#' Default: c(0.25, 0.5)
+#' @inheritParams sim_sites
+#' @param parallel logical, use parallel processing see details, Default: FALSE
+#' @param progress logical, show progress bar, Default: TRUE
+#' @param site_aggr_args named list of parameters passed to
+#' \code{\link{site_aggr}}, Default: list()
+#' @param eval_sites_args named list of parameters passed to
+#' \code{\link{eval_sites}}, Default: list()
+#' @return dataframe with the following columns:
+#' \describe{
+#' \item{**study_id**}{study identification}
+#' \item{**site_number**}{site identification}
+#' \item{**n_pat**}{number of patients at site}
+#' \item{**n_pat_with_med75**}{number of patients at site with visit_med75}
+#' \item{**visit_med75**}{median(max(visit)) * 0.75}
+#' \item{**mean_ae_site_med75**}{mean AE at visit_med75 site level}
+#' \item{**mean_ae_study_med75**}{mean AE at visit_med75 study level}
+#' \item{**n_pat_with_med75_study**}{number of patients at site with
+#' visit_med75 at study excl site}
+#' \item{**extra_ur_sites**}{additional sites
+#' with under-reporting patients}
+#' \item{**frac_pat_with_ur**}{ratio of
+#' patients in study that are under-reporting}
+#' \item{**ur_rate**}{under-reporting rate}
+#' \item{**pval**}{p-value as
+#' returned by \code{\link[stats]{poisson.test}}}
+#' \item{**prob_low**}{bootstrapped probability for having mean_ae_site_med75
+#' or lower} \item{**pval_adj**}{adjusted p-values}
+#' \item{**prob_low_adj**}{adjusted bootstrapped probability for having
+#' mean_ae_site_med75 or lower} \item{**pval_prob_ur**}{probability
+#' under-reporting as 1 - pval_adj, poisson.test (use as benchmark)}
+#' \item{**prob_low_prob_ur**}{probability under-reporting as 1 -
+#' prob_low_adj, bootstrapped (use)}
+#'}
+#' @details The function will apply under-reporting scenarios to each site.
+#' Reducing the number of AEs by a given under-reporting (ur_rate) for all
+#' patients at the site and add the corresponding under-reporting statistics.
+#' Since the under-reporting probability is also affected by the number of
+#' other sites that are under-reporting we additionally calculate
+#' under-reporting statistics in a scenario where additional under reporting
+#' sites are present. For this we use the median number of patients per site
+#' at the study to calculate the final number of patients for which we lower
+#' the AEs in a given under-reporting scenario. We use the furrr package to
+#' implement parallel processing as these simulations can take a long time to
+#' run. For this to work we need to specify the plan for how the code should
+#' run, e.g. plan(multisession, workers = 18)
+#' @examples
+#' if (interactive()) {
+#' df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+#' frac_site_with_ur = 0.4, ur_rate = 0.6)
+#'
+#' df_visit1$study_id <- "A"
+#'
+#' df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+#' frac_site_with_ur = 0.2, ur_rate = 0.1)
+#'
+#' df_visit2$study_id <- "B"
+#'
+#' df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
+#'
+#' df_site_max <- df_visit %>%
+#' group_by(study_id, site_number, patnum) %>%
+#' summarise(max_visit = max(visit),
+#' max_ae = max(n_ae),
+#' .groups = "drop")
+#'
+#' df_config <- get_config(df_site_max)
+#'
+#' df_config
+#'
+#' df_portf <- sim_test_data_portfolio(df_config)
+#'
+#' df_portf
+#'
+#' df_scen <- sim_ur_scenarios(df_portf,
+#' extra_ur_sites = 2,
+#' ur_rate = c(0.5, 1))
+#'
+#'
+#' df_scen
+#'
+#' df_perf <- get_portf_perf(df_scen)
+#'
+#' df_perf
+#' }
+#' @seealso
+#' \code{\link{sim_test_data_study}}
+#' \code{\link{get_config}}
+#' \code{\link{sim_test_data_portfolio}}
+#' \code{\link{sim_ur_scenarios}}
+#' \code{\link{get_portf_perf}}
+#' @rdname sim_ur_scenarios
+#' @export
+sim_ur_scenarios <- function(df_portf,
+ extra_ur_sites = 3,
+ ur_rate = c(0.25, 0.5),
+ r = 1000,
+ poisson_test = FALSE,
+ prob_lower = TRUE,
+ parallel = FALSE,
+ progress = TRUE,
+ site_aggr_args = list(),
+ eval_sites_args = list()) {
+ # checks
+
+ stopifnot("all site_aggr_args list items must be named" = all(names(site_aggr_args) != ""))
+ stopifnot("all eval_sites_args list items must be named" = all(names(eval_sites_args) != ""))
+ stopifnot(is.numeric(extra_ur_sites))
+ stopifnot(length(extra_ur_sites) == 1)
+ extra_ur_sites <- as.integer(extra_ur_sites)
+
+
+ if (progress) {
+ message("aggregating site level")
+ }
+
+ df_visit <- check_df_visit(df_portf)
+
+ df_site <- do.call(site_aggr, c(list(df_visit = df_visit), site_aggr_args))
+
+ if (progress) {
+ message("prepping for simulation")
+ }
+
+ df_sim_prep <- prep_for_sim(df_site = df_site, df_visit = df_visit)
+
+ if (progress) {
+ message("generating scenarios")
+ }
+
+ # create scenario grid
+
+ df_mean_pat <- df_visit %>%
+ group_by(.data$study_id, .data$site_number, .data$visit) %>%
+ summarise(n_pat = n_distinct(.data$patnum),
+ .groups = "drop") %>%
+ group_by(.data$study_id, .data$visit) %>%
+ summarise(mean_n_pat = mean(.data$n_pat),
+ sum_n_pat = sum(.data$n_pat),
+ n_sites = n_distinct(.data$site_number),
+ .groups = "drop")
+
+ ur_rate <- ur_rate[ur_rate > 0]
+
+ df_grid_gr0 <- df_site %>%
+ select(.data$study_id, .data$site_number, .data$n_pat_with_med75, .data$visit_med75) %>%
+ left_join(df_mean_pat, by = c(study_id = "study_id", visit_med75 = "visit")) %>%
+ mutate(extra_ur_sites = list(0:extra_ur_sites)) %>%
+ unnest(.data$extra_ur_sites) %>%
+ mutate(
+ frac_pat_with_ur = (.data$n_pat_with_med75 + .data$extra_ur_sites * .data$mean_n_pat) /
+ .data$sum_n_pat,
+ ur_rate = list(ur_rate)
+ ) %>%
+ unnest(.data$ur_rate) %>%
+ select(
+ .data$study_id,
+ .data$site_number,
+ .data$extra_ur_sites,
+ .data$frac_pat_with_ur,
+ .data$ur_rate
+ )
+
+ df_grid_0 <- df_grid_gr0 %>%
+ select(.data$study_id, .data$site_number) %>%
+ distinct() %>%
+ mutate(extra_ur_sites = 0,
+ frac_pat_with_ur = 0,
+ ur_rate = 0)
+
+ df_grid <- bind_rows(df_grid_0, df_grid_gr0)
+
+ df_scen_prep <- df_sim_prep %>%
+ left_join(df_grid, by = c("study_id", "site_number"))
+
+ # generating scenarios
+
+ df_scen <- df_scen_prep %>%
+ mutate(scenarios = purrr::pmap(
+ list(.data$n_ae_site, .data$n_ae_study, .data$frac_pat_with_ur, .data$ur_rate),
+ sim_scenario
+ )
+ ) %>%
+ select(- .data$n_ae_site, - .data$n_ae_study) %>%
+ mutate(n_ae_site = map(.data$scenarios, "n_ae_site"),
+ n_ae_study = map(.data$scenarios, "n_ae_study")) %>%
+ select(- .data$scenarios)
+
+ if (progress) {
+ message("getting under-reporting stats")
+ }
+
+ if (parallel) {
+ .purrr <- furrr::future_map
+ .purrr_args <- list(.options = furrr_options(seed = TRUE))
+ } else {
+ .purrr <- purrr::map
+ .purrr_args <- list()
+ }
+
+ df_sim_sites <- df_scen %>%
+ mutate(study_id_gr = .data$study_id,
+ site_number_gr = .data$site_number) %>%
+ group_by(.data$study_id_gr, .data$site_number_gr) %>%
+ nest() %>%
+ ungroup() %>%
+ select(- .data$study_id_gr, - .data$site_number_gr)
+
+ with_progress_cnd(
+ ls_df_sim_sites <- purrr_bar(
+ df_sim_sites$data,
+ .purrr = .purrr,
+ .f = sim_after_prep,
+ .f_args = list(
+ r = r,
+ poisson_test = poisson_test,
+ prob_lower = prob_lower,
+ progress = FALSE
+ ),
+ .purrr_args = .purrr_args,
+ .steps = nrow(df_sim_sites),
+ .progress = progress
+ ),
+ progress = progress
+ )
+
+ if (progress) {
+ message("evaluating stats")
+ }
+
+ df_sim_sites <- bind_rows(ls_df_sim_sites)
+
+ df_eval <- do.call(eval_sites, c(list(df_sim_sites = df_sim_sites), eval_sites_args)) %>%
+ arrange(
+ .data$study_id,
+ .data$site_number,
+ .data$extra_ur_sites,
+ .data$frac_pat_with_ur,
+ .data$ur_rate
+ )
+
+}
+
+
+#' @title Simulate Portfolio Test Data
+#' @description Simulate visit level data from a portfolio configuration.
+#' @param df_config dataframe as returned by \code{\link{get_config}}
+#' @param parallel logical activate parallel processing, see details, Default: FALSE
+#' @param progress logical, Default: TRUE
+#'@return dataframe with the following columns: \describe{
+#' \item{**study_id**}{study identification} \item{**ae_per_visit_mean**}{mean
+#' AE per visit per study} \item{**site_number**}{site}
+#' \item{**max_visit_sd**}{standard deviation of maximum patient visits per
+#' site} \item{**max_visit_mean**}{mean of maximum patient visits per site}
+#' \item{**patnum**}{number of patients}
+#' \item{**visit**}{visit number}
+#' \item{**n_ae**}{cumulative sum of AEs}
+#'}
+#' @details uses \code{\link{sim_test_data_study}}.
+#' We use the `furrr` package to
+#' implement parallel processing as these simulations can take a long time to
+#' run. For this to work we need to specify the plan for how the code should
+#' run, e.g. `plan(multisession, workers = 3)
+#' @examples
+#' if (interactive()) {
+#' df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+#' frac_site_with_ur = 0.4, ur_rate = 0.6)
+#'
+#' df_visit1$study_id <- "A"
+#'
+#' df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+#' frac_site_with_ur = 0.2, ur_rate = 0.1)
+#'
+#' df_visit2$study_id <- "B"
+#'
+#' df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
+#'
+#' df_site_max <- df_visit %>%
+#' group_by(study_id, site_number, patnum) %>%
+#' summarise(max_visit = max(visit),
+#' max_ae = max(n_ae),
+#' .groups = "drop")
+#'
+#' df_config <- get_config(df_site_max)
+#'
+#' df_config
+#'
+#' df_portf <- sim_test_data_portfolio(df_config)
+#'
+#' df_portf
+#'
+#' df_scen <- sim_ur_scenarios(df_portf,
+#' extra_ur_sites = 2,
+#' ur_rate = c(0.5, 1))
+#'
+#'
+#' df_scen
+#'
+#' df_perf <- get_portf_perf(df_scen)
+#'
+#' df_perf
+#' }
+#' @seealso
+#' \code{\link{sim_test_data_study}}
+#' \code{\link{get_config}}
+#' \code{\link{sim_test_data_portfolio}}
+#' \code{\link{sim_ur_scenarios}}
+#' \code{\link{get_portf_perf}}
+#' @rdname sim_test_data_portfolio
+#' @export
+sim_test_data_portfolio <- function(df_config, parallel = FALSE, progress = TRUE) {
+
+ # checks --------------------------
+ df_config <- ungroup(df_config)
+
+ stopifnot(
+ df_config %>%
+ summarise_all(~ ! anyNA(.)) %>%
+ unlist() %>%
+ all()
+ )
+
+ stopifnot(is.data.frame(df_config))
+
+ stopifnot(
+ all(
+ c("study_id",
+ "ae_per_visit_mean",
+ "site_number",
+ "max_visit_sd",
+ "max_visit_mean",
+ "n_pat"
+ ) %in% colnames(df_config)
+ )
+ )
+
+ # exec --------------------------
+
+ if (parallel) {
+ .purrr <- furrr::future_pmap
+ .purrr_args <- list(.options = furrr_options(seed = TRUE))
+ } else {
+ .purrr <- purrr::pmap
+ .purrr_args <- list()
+ }
+
+ with_progress_cnd(
+ df_config_sim <- df_config %>%
+ mutate(
+ sim = purrr_bar(
+ list(
+ .data$ae_per_visit_mean,
+ .data$max_visit_sd,
+ .data$max_visit_mean,
+ .data$n_pat
+ ),
+ .purrr = .purrr,
+ .f = function(ae_per_visit_mean,
+ max_visit_sd,
+ max_visit_mean,
+ n_pat) {
+ sim_test_data_study(
+ n_pat = n_pat,
+ n_sites = 1,
+ max_visit_mean = max_visit_mean,
+ max_visit_sd = max_visit_sd,
+ ae_per_visit_mean = ae_per_visit_mean
+ ) %>%
+ select(
+ # using rlang::.data here causes error with furrr
+ patnum, visit, n_ae
+ )
+ },
+ .progress = progress,
+ .purrr_args = .purrr_args,
+ .steps = nrow(.)
+ )
+ ),
+ progress = progress
+ )
+
+ df_portf <- df_config_sim %>%
+ unnest(.data$sim) %>%
+ select(- .data$n_pat) %>%
+ group_by(.data$study_id) %>%
+ mutate(
+ # patnums need to be made site exclusive
+ patnum = str_pad(
+ dense_rank(paste0(.data$site_number, .data$patnum)),
+ width = 4,
+ side = "left",
+ pad = "0"
+ )
+ ) %>%
+ ungroup()
+
+ return(df_portf)
+}
+
+#'@title Get Portfolio Configuration
+#'@description Get Portfolio configuration from a dataframe aggregated on
+#' patient level with max_ae and max_visit. Will filter studies with only a few
+#' sites and patients and will anonymize IDs. Portfolio configuration can be
+#' used by \code{\link{sim_test_data_portfolio}} to generate data for an
+#' artificial portfolio.
+#'@param df_site dataframe aggregated on patient level with max_ae and max_visit
+#'@param min_pat_per_study minimum number of patients per study, Default: 100
+#'@param min_sites_per_study minimum number of sites per study, Default: 10
+#'@param anonymize logical, Default: TRUE
+#'@param pad_width padding width for newly created IDs, Default: 4
+#'@return dataframe with the following columns: \describe{
+#' \item{**study_id**}{study identification} \item{**ae_per_visit_mean**}{mean
+#' AE per visit per study} \item{**site_number**}{site}
+#' \item{**max_visit_sd**}{standard deviation of maximum patient visits per
+#' site} \item{**max_visit_mean**}{mean of maximum patient visits per site}
+#' \item{**n_pat**}{number of patients} }
+#' @examples
+#' if (interactive()) {
+#' df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+#' frac_site_with_ur = 0.4, ur_rate = 0.6)
+#'.
+#' df_visit1$study_id <- "A"
+#'.
+#' df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+#' frac_site_with_ur = 0.2, ur_rate = 0.1)
+#'.
+#' df_visit2$study_id <- "B"
+#'.
+#' df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
+#'
+#' df_site_max <- df_visit %>%
+#' group_by(study_id, site_number, patnum) %>%
+#' summarise(max_visit = max(visit),
+#' max_ae = max(n_ae),
+#' .groups = "drop")
+#'
+#' df_config <- get_config(df_site_max)
+#'
+#' df_config
+#'
+#' df_portf <- sim_test_data_portfolio(df_config)
+#'
+#' df_portf
+#'
+#' df_scen <- sim_ur_scenarios(df_portf,
+#' extra_ur_sites = 2,
+#' ur_rate = c(0.5, 1))
+#'
+#'
+#' df_scen
+#'
+#' df_perf <- get_portf_perf(df_scen)
+#'
+#' df_perf
+#' }
+#' @seealso
+#' \code{\link{sim_test_data_study}}
+#' \code{\link{get_config}}
+#' \code{\link{sim_test_data_portfolio}}
+#' \code{\link{sim_ur_scenarios}}
+#' \code{\link{get_portf_perf}}
+#'@rdname get_config
+#'@export
+get_config <- function(df_site,
+ min_pat_per_study = 100,
+ min_sites_per_study = 10,
+ anonymize = TRUE,
+ pad_width = 4) {
+
+ stopifnot(c("study_id", "site_number", "patnum", "max_visit", "max_ae") %in% colnames(df_site))
+ stopifnot(nrow(df_site) == nrow(distinct(select(df_site, .data$study_id, .data$site_number, .data$patnum))))
+
+ df_site %>%
+ summarise_all(~ ! anyNA(.)) %>%
+ unlist() %>%
+ all() %>%
+ stopifnot("NA detected" = .)
+
+ df_site %>%
+ group_by(.data$study_id, .data$patnum) %>%
+ summarise(n_sites = n_distinct(.data$site_number), .groups = "drop") %>%
+ mutate(check = .data$n_sites == 1) %>%
+ pull(.data$check) %>%
+ unlist() %>%
+ all() %>%
+ stopifnot("patient ids must be site exclusive" = .)
+
+ df_config <- df_site %>%
+ filter(.data$max_visit > 0) %>%
+ group_by(.data$study_id) %>%
+ mutate(ae_per_visit_mean = sum(.data$max_ae) / sum(.data$max_visit)) %>%
+ filter(
+ n_distinct(.data$patnum) >= min_pat_per_study,
+ n_distinct(.data$site_number) >= min_sites_per_study
+ ) %>%
+ group_by(.data$study_id, .data$ae_per_visit_mean, .data$site_number) %>%
+ summarise(max_visit_sd = sd(.data$max_visit),
+ max_visit_mean = mean(.data$max_visit),
+ n_pat = n_distinct(.data$patnum),
+ .groups = "drop") %>%
+ mutate(max_visit_sd = ifelse(is.na(.data$max_visit_sd), 0, .data$max_visit_sd))
+
+ if (anonymize) {
+ df_config <- df_config %>%
+ mutate(
+ study_id = dense_rank(.data$study_id),
+ study_id = str_pad(.data$study_id, pad_width, side = "left", "0")
+ ) %>%
+ group_by(.data$study_id) %>%
+ mutate(
+ site_number = dense_rank(.data$site_number),
+ site_number = str_pad(.data$site_number, pad_width, side = "left", "0")
+ ) %>%
+ ungroup()
+ }
+
+ stopifnot("nrows(df_config) > 0, relax filter settings!" = nrow(df_config) > 0)
+
+ return(df_config)
+}
+
+#' @title Get Portfolio Performance
+#' @description Performance as true positive rate (tpr as tp/P) on the basis of
+#' desired false positive rates (fpr as fp/P).
+#' @param df_scen dataframe as returned by \code{\link{sim_ur_scenarios}}
+#' @param stat character denoting the column name of the under-reporting
+#' statistic, Default: 'prob_low_prob_ur'
+#' @param fpr numeric vector specifying false positive rates, Default: c(0.001,
+#' 0.01, 0.05)
+#' @return dataframe
+#' @details DETAILS
+#' @examples
+#' df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+#' frac_site_with_ur = 0.4, ur_rate = 0.6)
+#'
+#' df_visit1$study_id <- "A"
+#'
+#' df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+#' frac_site_with_ur = 0.2, ur_rate = 0.1)
+#'
+#' df_visit2$study_id <- "B"
+#'
+#' df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
+#'
+#' df_site_max <- df_visit %>%
+#' dplyr::group_by(study_id, site_number, patnum) %>%
+#' dplyr::summarise(max_visit = max(visit),
+#' max_ae = max(n_ae),
+#' .groups = "drop")
+#'
+#' df_config <- get_config(df_site_max)
+#'
+#' df_config
+#'
+#' df_portf <- sim_test_data_portfolio(df_config)
+#'
+#' df_portf
+#'
+#' df_scen <- sim_ur_scenarios(df_portf,
+#' extra_ur_sites = 2,
+#' ur_rate = c(0.5, 1))
+#'
+#'
+#' df_scen
+#'
+#' df_perf <- get_portf_perf(df_scen)
+#'
+#' df_perf
+#' @seealso \code{\link{sim_test_data_study}} \code{\link{get_config}}
+#' \code{\link{sim_test_data_portfolio}} \code{\link{sim_ur_scenarios}}
+#' \code{\link{get_portf_perf}}
+#' @rdname get_portf_perf
+#' @export
+get_portf_perf <- function(df_scen, stat = "prob_low_prob_ur", fpr = c(0.001, 0.01, 0.05)) {
+
+ if (anyNA(df_scen[[stat]])) {
+ mes <- df_scen %>%
+ mutate(extra_ur_sites = as.factor(.data$extra_ur_sites),
+ ur_rate = as.factor(.data$ur_rate)) %>%
+ group_by(.data$extra_ur_sites, .data$ur_rate, .drop = FALSE) %>%
+ mutate(n_sites_total = n_distinct(.data$site_number)) %>%
+ group_by(.data$extra_ur_sites, .data$ur_rate, .data$n_sites_total) %>%
+ filter(is.na(.data[[stat]])) %>%
+ summarise(n = n_distinct(.data$site_number), .groups = "drop") %>%
+ mutate(
+ ratio_sites_with_na = .data$n /
+ ifelse(is.na(.data$n_sites_total),
+ 0,
+ .data$n_sites_total)
+ ) %>%
+ select(.data$extra_ur_sites, .data$ur_rate, .data$ratio_sites_with_na) %>%
+ knitr::kable() %>%
+ paste(collapse = "\n")
+
+ warning(
+ paste("Some Simulation Scenarios have returned NA stat values.\n", mes))
+
+ }
+
+ stat_at_0 <- df_scen %>%
+ filter(.data$ur_rate == 0, .data$frac_pat_with_ur == 0) %>%
+ pull(.data[[stat]])
+
+ df_thresh <- tibble(
+ fpr = fpr
+ ) %>%
+ mutate(
+ thresh = map_dbl(
+ .data$fpr,
+ ~ quantile(stat_at_0, probs = 1 - ., na.rm = TRUE)
+ )
+ )
+
+
+ df_prep <- df_scen %>%
+ mutate(data = list(df_thresh)) %>%
+ unnest(.data$data) %>%
+ mutate(stat = .data[[stat]]) %>%
+ group_by(.data$fpr, .data$thresh, .data$extra_ur_sites, .data$ur_rate) %>%
+ summarise(
+ tpr = sum(ifelse(.data$stat >= .data$thresh, 1, 0), na.rm = TRUE) /
+ n_distinct(paste(.data$study_id, .data$site_number)),
+ .groups = "drop")
+
+ df_prep_0 <- df_prep %>%
+ filter(.data$ur_rate == 0) %>%
+ mutate(extra_ur_sites = list(unique(df_prep$extra_ur_sites))) %>%
+ unnest(.data$extra_ur_sites)
+
+ df_prep_gr0 <- df_prep %>%
+ filter(.data$ur_rate > 0)
+
+ bind_rows(df_prep_0, df_prep_gr0) %>%
+ arrange(.data$fpr, .data$ur_rate)
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 78b0068..5dc7540 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -34,6 +34,7 @@ articles:
- sas_files
- visit_med75
- visits_or_days
+ - portfolio_perf
reference:
- title: Main Functions
@@ -48,11 +49,14 @@ reference:
desc: ~
contents:
- sim_test_data_study
+ - get_config
+ - sim_test_data_portfolio
- title: Test Functions
- desc: These functions are intented to check Poisson test applicability.
+ desc: These functions are intented to check simaerep performance.
contents:
- sim_studies
- - get_ecd_values
+ - sim_ur_scenarios
+ - get_portf_perf
- title: Helper Functions
desc: These functions are intended to be called by other functions.
contents:
@@ -66,11 +70,17 @@ reference:
- get_site_mean_ae_dev
- aggr_duplicated_visits
- exp_implicit_missing_visits
+ - prep_for_sim
+ - sim_after_prep
+ - get_ecd_values
+ - purrr_bar
+ - with_progress_cnd
+ - sim_scenario
- title: Deprecated
contents:
- eval_sites_deprecated
- title: Additional Plot Functions
- desc: These functions were intended for creating plots for the documentation.
+ desc: These functions are intended for creating plots for the documentation.
contents:
- plot_visit_med75
- plot_sim_examples
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..8ae9b06
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,24 @@
+version: '3.7'
+services:
+ build_image:
+ build: .
+ image: 'simaerep'
+ command: /bin/bash
+
+ shell:
+ image: 'simaerep'
+ working_dir: /app
+ # make container wait
+ command: tail -F anything
+ volumes:
+ - '.:/app'
+ rstudio:
+ image: 'simaerep'
+ ports:
+ - '8787:8787'
+ volumes:
+ - '.:/home/rstudio/app'
+ command: /init
+ environment:
+ PASSWORD: '123'
+ USER: 'rstudio'
diff --git a/docs/404.html b/docs/404.html
index 5fa5c52..fb089e4 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -89,7 +89,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -97,7 +97,7 @@
@@ -141,7 +144,7 @@
-
+
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html
index 0743477..8e6daa4 100644
--- a/docs/LICENSE-text.html
+++ b/docs/LICENSE-text.html
@@ -89,7 +89,7 @@
simaerep
- 0.3.1
+ 0.3.2
@@ -97,7 +97,7 @@
@@ -141,7 +144,7 @@
-
+
diff --git a/docs/LICENSE.html b/docs/LICENSE.html
index 2afe671..0123337 100644
--- a/docs/LICENSE.html
+++ b/docs/LICENSE.html
@@ -89,7 +89,7 @@
simaerep
- 0.3.1
+ 0.3.2
@@ -97,7 +97,7 @@
@@ -141,7 +144,7 @@
-
+
diff --git a/docs/articles/check_poisson.html b/docs/articles/check_poisson.html
index 018b036..8616bb6 100644
--- a/docs/articles/check_poisson.html
+++ b/docs/articles/check_poisson.html
@@ -44,7 +44,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -52,7 +52,7 @@
@@ -96,7 +99,7 @@
-
+
@@ -110,8 +113,7 @@
-
-
+
@@ -187,7 +189,7 @@
df_sim_sites <- sim_sites ( df_site , df_visit )
df_visit
-
## # A tibble: 58,397 x 9
+## # A tibble: 58,578 x 9
## patnum site_number is_ur max_visit_mean max_visit_sd ae_per_visit_me… visit
## <chr> <chr> <lgl> <dbl> <dbl> <dbl> <int>
## 1 P0000… S0001 TRUE 20 4 0.25 1
@@ -200,40 +202,40 @@
## 8 P0000… S0001 TRUE 20 4 0.25 8
## 9 P0000… S0001 TRUE 20 4 0.25 9
## 10 P0000… S0001 TRUE 20 4 0.25 10
-## # … with 58,387 more rows, and 2 more variables: n_ae <int>, study_id <chr>
+## # … with 58,568 more rows, and 2 more variables: n_ae <int>, study_id <chr>
## # A tibble: 300 x 6
## study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_site_me…
## <chr> <chr> <int> <int> <dbl> <dbl>
-## 1 ae_per_visit… S0001 10 7 15 0.429
-## 2 ae_per_visit… S0002 10 9 17 0.222
-## 3 ae_per_visit… S0003 10 9 19 0.444
-## 4 ae_per_visit… S0004 10 10 17 0.6
-## 5 ae_per_visit… S0005 10 9 14 0.222
-## 6 ae_per_visit… S0006 10 10 14 0.2
-## 7 ae_per_visit… S0007 10 10 15 0.3
-## 8 ae_per_visit… S0008 10 10 14 0.5
-## 9 ae_per_visit… S0009 10 8 18 0.25
-## 10 ae_per_visit… S0010 10 10 15 0.4
+## 1 ae_per_visit… S0001 10 8 15 0.375
+## 2 ae_per_visit… S0002 10 10 13 0.5
+## 3 ae_per_visit… S0003 10 9 16 0.333
+## 4 ae_per_visit… S0004 10 9 15 0.333
+## 5 ae_per_visit… S0005 10 10 18 0.6
+## 6 ae_per_visit… S0006 10 9 15 0.111
+## 7 ae_per_visit… S0007 10 8 18 0.25
+## 8 ae_per_visit… S0008 10 10 16 0.5
+## 9 ae_per_visit… S0009 10 9 17 0.889
+## 10 ae_per_visit… S0010 10 9 16 1
## # … with 290 more rows
-
## # A tibble: 300 x 9
+## # A tibble: 300 x 10
## study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_site_me…
## <chr> <chr> <int> <int> <dbl> <dbl>
-## 1 ae_per_… S0001 10 7 15 0.429
-## 2 ae_per_… S0002 10 9 17 0.222
-## 3 ae_per_… S0003 10 9 19 0.444
-## 4 ae_per_… S0004 10 10 17 0.6
-## 5 ae_per_… S0005 10 9 14 0.222
-## 6 ae_per_… S0006 10 10 14 0.2
-## 7 ae_per_… S0007 10 10 15 0.3
-## 8 ae_per_… S0008 10 10 14 0.5
-## 9 ae_per_… S0009 10 8 18 0.25
-## 10 ae_per_… S0010 10 10 15 0.4
-## # … with 290 more rows, and 3 more variables: mean_ae_study_med75 <dbl>,
-## # pval <dbl>, prob_low <dbl>
+## 1 ae_per_… S0001 10 8 15 0.375
+## 2 ae_per_… S0002 10 10 13 0.5
+## 3 ae_per_… S0003 10 9 16 0.333
+## 4 ae_per_… S0004 10 9 15 0.333
+## 5 ae_per_… S0005 10 10 18 0.6
+## 6 ae_per_… S0006 10 9 15 0.111
+## 7 ae_per_… S0007 10 8 18 0.25
+## 8 ae_per_… S0008 10 10 16 0.5
+## 9 ae_per_… S0009 10 9 17 0.889
+## 10 ae_per_… S0010 10 9 16 1
+## # … with 290 more rows, and 4 more variables: mean_ae_study_med75 <dbl>,
+## # n_pat_with_med75_study <int>, pval <dbl>, prob_low <dbl>
@@ -242,7 +244,7 @@
+future :: plan ( multiprocess )
## Warning: Strategy 'multiprocess' is deprecated in future (>= 1.20.0). Instead,
## explicitly specify either 'multisession' or 'multicore'. In the current R
## session, 'multiprocess' equals 'multisession'.
@@ -261,18 +263,18 @@
df_sim_studies
## # A tibble: 30,000 x 8
-## r study_id site_number visit_med75 n_pat_with_med75 n_pat_study pval
-## <dbl> <chr> <chr> <dbl> <int> <dbl> <dbl>
-## 1 1 ae_per_… S0001 15 7 885 0.342
-## 2 1 ae_per_… S0002 17 9 766 0.0772
-## 3 1 ae_per_… S0003 19 9 580 1
-## 4 1 ae_per_… S0004 17 10 765 1
-## 5 1 ae_per_… S0005 14 9 927 1
-## 6 1 ae_per_… S0006 14 10 926 0.300
-## 7 1 ae_per_… S0007 15 10 882 1
-## 8 1 ae_per_… S0008 14 10 926 1
-## 9 1 ae_per_… S0009 18 8 670 1
-## 10 1 ae_per_… S0010 15 10 882 1
+## r study_id site_number visit_med75 n_pat_with_med75 n_pat_study pval
+## <dbl> <chr> <chr> <dbl> <int> <dbl> <dbl>
+## 1 1 ae_per_… S0001 15 8 884 1
+## 2 1 ae_per_… S0002 13 10 946 0.140
+## 3 1 ae_per_… S0003 16 9 832 1
+## 4 1 ae_per_… S0004 15 9 883 1
+## 5 1 ae_per_… S0005 18 10 671 1
+## 6 1 ae_per_… S0006 15 9 883 0.396
+## 7 1 ae_per_… S0007 18 8 673 1
+## 8 1 ae_per_… S0008 16 10 831 1
+## 9 1 ae_per_… S0009 17 9 753 1
+## 10 1 ae_per_… S0010 16 9 832 0.429
## # … with 29,990 more rows, and 1 more variable: prob_low <dbl>
@@ -284,21 +286,21 @@
df_check_pval <- get_ecd_values ( df_sim_studies , df_sim_sites , val_str = "pval" )
df_check_pval
-## # A tibble: 300 x 10
+## # A tibble: 300 x 11
## study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_site_me…
## <chr> <chr> <int> <int> <dbl> <dbl>
-## 1 ae_per_… S0001 10 7 15 0.429
-## 2 ae_per_… S0002 10 9 17 0.222
-## 3 ae_per_… S0003 10 9 19 0.444
-## 4 ae_per_… S0004 10 10 17 0.6
-## 5 ae_per_… S0005 10 9 14 0.222
-## 6 ae_per_… S0006 10 10 14 0.2
-## 7 ae_per_… S0007 10 10 15 0.3
-## 8 ae_per_… S0008 10 10 14 0.5
-## 9 ae_per_… S0009 10 8 18 0.25
-## 10 ae_per_… S0010 10 10 15 0.4
-## # … with 290 more rows, and 4 more variables: mean_ae_study_med75 <dbl>,
-## # pval <dbl>, prob_low <dbl>, pval_ecd <dbl>
+## 1 ae_per_… S0001 10 8 15 0.375
+## 2 ae_per_… S0002 10 10 13 0.5
+## 3 ae_per_… S0003 10 9 16 0.333
+## 4 ae_per_… S0004 10 9 15 0.333
+## 5 ae_per_… S0005 10 10 18 0.6
+## 6 ae_per_… S0006 10 9 15 0.111
+## 7 ae_per_… S0007 10 8 18 0.25
+## 8 ae_per_… S0008 10 10 16 0.5
+## 9 ae_per_… S0009 10 9 17 0.889
+## 10 ae_per_… S0010 10 9 16 1
+## # … with 290 more rows, and 5 more variables: mean_ae_study_med75 <dbl>,
+## # n_pat_with_med75_study <int>, pval <dbl>, prob_low <dbl>, pval_ecd <dbl>
df_check_pval %>%
ggplot ( aes ( log ( pval , base = 10 ) , log ( pval_ecd , base = 10 ) ) ) +
@@ -318,21 +320,22 @@
df_check_prob <- get_ecd_values ( df_sim_studies , df_sim_sites , val_str = "prob_low" )
df_check_prob
-## # A tibble: 300 x 10
+## # A tibble: 300 x 11
## study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_site_me…
## <chr> <chr> <int> <int> <dbl> <dbl>
-## 1 ae_per_… S0001 10 7 15 0.429
-## 2 ae_per_… S0002 10 9 17 0.222
-## 3 ae_per_… S0003 10 9 19 0.444
-## 4 ae_per_… S0004 10 10 17 0.6
-## 5 ae_per_… S0005 10 9 14 0.222
-## 6 ae_per_… S0006 10 10 14 0.2
-## 7 ae_per_… S0007 10 10 15 0.3
-## 8 ae_per_… S0008 10 10 14 0.5
-## 9 ae_per_… S0009 10 8 18 0.25
-## 10 ae_per_… S0010 10 10 15 0.4
-## # … with 290 more rows, and 4 more variables: mean_ae_study_med75 <dbl>,
-## # pval <dbl>, prob_low <dbl>, prob_low_ecd <dbl>
+## 1 ae_per_… S0001 10 8 15 0.375
+## 2 ae_per_… S0002 10 10 13 0.5
+## 3 ae_per_… S0003 10 9 16 0.333
+## 4 ae_per_… S0004 10 9 15 0.333
+## 5 ae_per_… S0005 10 10 18 0.6
+## 6 ae_per_… S0006 10 9 15 0.111
+## 7 ae_per_… S0007 10 8 18 0.25
+## 8 ae_per_… S0008 10 10 16 0.5
+## 9 ae_per_… S0009 10 9 17 0.889
+## 10 ae_per_… S0010 10 9 16 1
+## # … with 290 more rows, and 5 more variables: mean_ae_study_med75 <dbl>,
+## # n_pat_with_med75_study <int>, pval <dbl>, prob_low <dbl>,
+## # prob_low_ecd <dbl>
df_check_prob %>%
ggplot ( aes ( log ( prob_low , base = 10 ) , log ( prob_low_ecd , base = 10 ) ) ) +
diff --git a/docs/articles/check_poisson_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/check_poisson_files/figure-html/unnamed-chunk-4-1.png
index addcbf9..446586f 100644
Binary files a/docs/articles/check_poisson_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/check_poisson_files/figure-html/unnamed-chunk-4-1.png differ
diff --git a/docs/articles/check_poisson_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/check_poisson_files/figure-html/unnamed-chunk-5-1.png
index 203dd12..bda3655 100644
Binary files a/docs/articles/check_poisson_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/check_poisson_files/figure-html/unnamed-chunk-5-1.png differ
diff --git a/docs/articles/check_poisson_files/header-attrs-2.6/header-attrs.js b/docs/articles/check_poisson_files/header-attrs-2.6/header-attrs.js
new file mode 100644
index 0000000..dd57d92
--- /dev/null
+++ b/docs/articles/check_poisson_files/header-attrs-2.6/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+ var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+ var i, h, a;
+ for (i = 0; i < hs.length; i++) {
+ h = hs[i];
+ if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
+ a = h.attributes;
+ while (a.length > 0) h.removeAttribute(a[0].name);
+ }
+});
diff --git a/docs/articles/index.html b/docs/articles/index.html
index ba94d0f..6bc9636 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -89,7 +89,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -97,7 +97,7 @@
@@ -141,7 +144,7 @@
@@ -96,7 +99,7 @@
-
+
@@ -110,8 +113,7 @@
-
-
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
@@ -530,6 +534,7 @@
15
3.000000
6.712644
+87
0.0e+00
0
@@ -541,6 +546,7 @@
14
3.352941
6.021739
+92
7.5e-06
0
@@ -552,6 +558,7 @@
17
8.500000
6.513514
+74
1.0e+00
1
@@ -563,6 +570,7 @@
16
8.058824
6.261905
+84
1.0e+00
1
@@ -574,6 +582,7 @@
16
7.555556
6.349398
+83
1.0e+00
1
@@ -585,6 +594,7 @@
15
7.850000
5.690476
+84
1.0e+00
1
@@ -604,19 +614,20 @@
-
-
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+ get_config()
+
+ Get Portfolio Configuration
+
+
+
+ sim_test_data_portfolio()
+
+ Simulate Portfolio Test Data
Test Functions
-
These functions are intented to check Poisson test applicability.
+
These functions are intented to check simaerep performance.
@@ -252,9 +267,15 @@ sim_ur_scenarios()
- get empirical cumulative distribution values of pval or prob_lower
+ Simulate Under-Reporting Scenarios
+
+
+
+ get_portf_perf()
+
+ Get Portfolio Performance
@@ -328,6 +349,43 @@
+
+
+ prep_for_sim()
+
+ prepare data for simulation
+
+
+
+ sim_after_prep()
+
+ start simulation after preparation
+
+
+
+ get_ecd_values()
+
+ get empirical cumulative distribution values of pval or prob_lower
+
+
+
+ purrr_bar()
+
+ execute a purrr or furrr function with a progress
+bar
+
+
+
+ with_progress_cnd()
+
+ conditional with_progress
+
+
+
+ sim_scenario()
+
+ simulate single scenario
@@ -352,7 +410,7 @@
Additional Plot Functions
-
These functions were intended for creating plots for the documentation.
+
These functions are intended for creating plots for the documentation.
diff --git a/docs/reference/lint_package.html b/docs/reference/lint_package.html
index 586aadb..2cbe694 100644
--- a/docs/reference/lint_package.html
+++ b/docs/reference/lint_package.html
@@ -91,7 +91,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -99,7 +99,7 @@
@@ -143,7 +146,7 @@
-
+
diff --git a/docs/reference/pat_aggr.html b/docs/reference/pat_aggr.html
index 6073312..f131ffe 100644
--- a/docs/reference/pat_aggr.html
+++ b/docs/reference/pat_aggr.html
@@ -90,7 +90,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -98,7 +98,7 @@
@@ -142,7 +145,7 @@
-
+
diff --git a/docs/reference/pat_pool.html b/docs/reference/pat_pool.html
index c722d07..3061eb3 100644
--- a/docs/reference/pat_pool.html
+++ b/docs/reference/pat_pool.html
@@ -93,7 +93,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -101,7 +101,7 @@
@@ -145,7 +148,7 @@
@@ -142,7 +145,7 @@
-
+
diff --git a/docs/reference/plot_dots-1.png b/docs/reference/plot_dots-1.png
index 9926d4c..36ad166 100644
Binary files a/docs/reference/plot_dots-1.png and b/docs/reference/plot_dots-1.png differ
diff --git a/docs/reference/plot_dots.html b/docs/reference/plot_dots.html
index ad26157..c38f794 100644
--- a/docs/reference/plot_dots.html
+++ b/docs/reference/plot_dots.html
@@ -90,7 +90,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -98,7 +98,7 @@
@@ -142,7 +145,7 @@
-
+
@@ -239,12 +242,12 @@ Details
' '
Examples
-
@@ -99,7 +99,7 @@
@@ -143,7 +146,7 @@
-
+
diff --git a/docs/reference/plot_sim_examples-1.png b/docs/reference/plot_sim_examples-1.png
index eb0ad0c..bb5a022 100644
Binary files a/docs/reference/plot_sim_examples-1.png and b/docs/reference/plot_sim_examples-1.png differ
diff --git a/docs/reference/plot_sim_examples-2.png b/docs/reference/plot_sim_examples-2.png
index aaa6080..6c8f0fd 100644
Binary files a/docs/reference/plot_sim_examples-2.png and b/docs/reference/plot_sim_examples-2.png differ
diff --git a/docs/reference/plot_sim_examples.html b/docs/reference/plot_sim_examples.html
index 4f0df34..0b15aa5 100644
--- a/docs/reference/plot_sim_examples.html
+++ b/docs/reference/plot_sim_examples.html
@@ -90,7 +90,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -98,7 +98,7 @@
@@ -142,7 +145,7 @@
-
+
diff --git a/docs/reference/plot_study-1.png b/docs/reference/plot_study-1.png
index 7ef4f6c..20380e3 100644
Binary files a/docs/reference/plot_study-1.png and b/docs/reference/plot_study-1.png differ
diff --git a/docs/reference/plot_study.html b/docs/reference/plot_study.html
index 330f846..5d669d4 100644
--- a/docs/reference/plot_study.html
+++ b/docs/reference/plot_study.html
@@ -90,7 +90,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -98,7 +98,7 @@
@@ -142,7 +145,7 @@
-
+
diff --git a/docs/reference/plot_visit_med75-1.png b/docs/reference/plot_visit_med75-1.png
index 17262ce..f949942 100644
Binary files a/docs/reference/plot_visit_med75-1.png and b/docs/reference/plot_visit_med75-1.png differ
diff --git a/docs/reference/plot_visit_med75.html b/docs/reference/plot_visit_med75.html
index 1bb510f..82a1015 100644
--- a/docs/reference/plot_visit_med75.html
+++ b/docs/reference/plot_visit_med75.html
@@ -91,7 +91,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -99,7 +99,7 @@
@@ -143,7 +146,7 @@
-
+
diff --git a/docs/reference/poiss_test_site_ae_vs_study_ae.html b/docs/reference/poiss_test_site_ae_vs_study_ae.html
index f55c3d3..a6be697 100644
--- a/docs/reference/poiss_test_site_ae_vs_study_ae.html
+++ b/docs/reference/poiss_test_site_ae_vs_study_ae.html
@@ -90,7 +90,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -98,7 +98,7 @@
@@ -142,7 +145,7 @@
-
+
diff --git a/docs/reference/prep_for_sim.html b/docs/reference/prep_for_sim.html
new file mode 100644
index 0000000..7b789e2
--- /dev/null
+++ b/docs/reference/prep_for_sim.html
@@ -0,0 +1,249 @@
+
+
+
+
+
+
+
+
+prepare data for simulation — prep_for_sim • simaerep
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Internal function called by sim_sites
.
+Collect AEs per patient at visit_med75 for site and study as a vector of
+integers.
+
+
+
prep_for_sim ( df_site , df_visit )
+
+
Arguments
+
+
+
+ df_site
+ dataframe created by site_aggr
+
+
+ df_visit
+ dataframe, created by sim_sites
+
+
+
+
Value
+
+
dataframe
+
See also
+
+
+
+
Examples
+
#> # A tibble: 5 x 7
+#> study_id site_number n_pat n_pat_with_med75 visit_med75 n_ae_site n_ae_study
+#> <chr> <chr> <int> <int> <dbl> <list> <list>
+#> 1 A S0001 20 18 15 <int [18]> <int [71]>
+#> 2 A S0002 20 16 16 <int [16]> <int [64]>
+#> 3 A S0003 20 19 15 <int [19]> <int [70]>
+#> 4 A S0004 20 18 14 <int [18]> <int [73]>
+#> 5 A S0005 20 17 15 <int [17]> <int [72]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/prob_lower_site_ae_vs_study_ae.html b/docs/reference/prob_lower_site_ae_vs_study_ae.html
index d2e20a0..1f7bd3e 100644
--- a/docs/reference/prob_lower_site_ae_vs_study_ae.html
+++ b/docs/reference/prob_lower_site_ae_vs_study_ae.html
@@ -90,7 +90,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -98,7 +98,7 @@
@@ -142,7 +145,7 @@
-
+
diff --git a/docs/reference/purrr_bar.html b/docs/reference/purrr_bar.html
new file mode 100644
index 0000000..c15f174
--- /dev/null
+++ b/docs/reference/purrr_bar.html
@@ -0,0 +1,328 @@
+
+
+
+
+
+
+
+
+execute a purrr or furrr function with a progress
+bar — purrr_bar • simaerep
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
call still needs to be wrapped in with_progress()
+
+
+
purrr_bar (
+ ... ,
+ .purrr ,
+ .f ,
+ .f_args = list ( ) ,
+ .purrr_args = list ( ) ,
+ .steps ,
+ .slow = FALSE ,
+ .progress = TRUE
+)
+
+
Arguments
+
+
+
+ ...
+ iterable arguments passed to .purrr
+
+
+ .purrr
+ purrr or furrr function
+
+
+ .f
+ function to be executed over iterables
+
+
+ .f_args
+ list of arguments passed to .f, Default: list()
+
+
+ .purrr_args
+ list of arguments passed to .purrr, Default: list()
+
+
+ .steps
+ integer number of iterations
+
+
+ .slow
+ logical slows down execution, Default: FALSE
+
+
+ .progress
+ logical, show progress bar, Default: TRUE
+
+
+
+
+
Examples
+
#> [[1]]
+#> NULL
+#>
+#> [[2]]
+#> NULL
+#>
+#> [[3]]
+#> NULL
+#>
+#> [[4]]
+#> NULL
+#>
+#> [[5]]
+#> NULL
+#>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/sim_after_prep.html b/docs/reference/sim_after_prep.html
new file mode 100644
index 0000000..78d94ba
--- /dev/null
+++ b/docs/reference/sim_after_prep.html
@@ -0,0 +1,272 @@
+
+
+
+
+
+
+
+
+start simulation after preparation — sim_after_prep • simaerep
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
sim_after_prep (
+ df_sim_prep ,
+ r = 1000 ,
+ poisson_test = FALSE ,
+ prob_lower = TRUE ,
+ progress = FALSE
+)
+
+
Arguments
+
+
+
+ df_sim_prep
+ dataframe as returned by
+prep_for_sim
+
+
+ r
+ integer, denotes number of simulations, default = 1000
+
+
+ poisson_test
+ logical, calculates poisson.test pvalue
+
+
+ prob_lower
+ logical, calculates probability for getting a lower value
+
+
+ progress
+ logical, display progress bar, Default = TRUE
+
+
+
+
Value
+
+
dataframe
+
See also
+
+
+
+
Examples
+
#> # A tibble: 5 x 9
+#> study_id site_number n_pat n_pat_with_med75 visit_med75 mean_ae_site_me…
+#> <chr> <chr> <int> <int> <dbl> <dbl>
+#> 1 A S0001 20 20 15 6.1
+#> 2 A S0002 20 17 16 6.53
+#> 3 A S0003 20 17 16 8.35
+#> 4 A S0004 20 20 15 6.55
+#> 5 A S0005 20 16 15 7.62
+#> # … with 3 more variables: mean_ae_study_med75 <dbl> ,
+#> # n_pat_with_med75_study <int> , prob_low <dbl>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/sim_scenario.html b/docs/reference/sim_scenario.html
new file mode 100644
index 0000000..b0e36a3
--- /dev/null
+++ b/docs/reference/sim_scenario.html
@@ -0,0 +1,266 @@
+
+
+
+
+
+
+
+
+simulate single scenario — sim_scenario • simaerep
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
internal function called by simulate_scenarios()
+
+
+
sim_scenario ( n_ae_site , n_ae_study , frac_pat_with_ur , ur_rate )
+
+
Arguments
+
+
+
+ n_ae_site
+ integer vector
+
+
+ n_ae_study
+ integer vector
+
+
+ frac_pat_with_ur
+ double
+
+
+ ur_rate
+ double
+
+
+
+
Value
+
+
list
+
+
Examples
+
#> $n_ae_site
+#> [1] 2.5 2.5 5.0 5.0
+#>
+#> $n_ae_study
+#> [1] 8 8 8 8
+#>
#> $n_ae_site
+#> [1] 2.5 2.5 2.5 2.5
+#>
+#> $n_ae_study
+#> [1] 4 4 8 8
+#>
#> $n_ae_site
+#> [1] 2.5 2.5 2.5 2.5
+#>
+#> $n_ae_study
+#> [1] 4 4 4 4
+#>
#> $n_ae_site
+#> [1] 0 0 0 0
+#>
+#> $n_ae_study
+#> [1] 0 0 0 0
+#>
#> $n_ae_site
+#> [1] 5 5 5 5
+#>
+#> $n_ae_study
+#> [1] 8 8 8 8
+#>
#> $n_ae_site
+#> [1] 2.5 2.5 2.5 2.5
+#>
+#> $n_ae_study
+#> [1] 4 4 4 4
+#>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/sim_sites.html b/docs/reference/sim_sites.html
index 7152cf0..3674926 100644
--- a/docs/reference/sim_sites.html
+++ b/docs/reference/sim_sites.html
@@ -92,7 +92,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -100,7 +100,7 @@
@@ -144,7 +147,7 @@
-
+
@@ -172,7 +175,14 @@ calculate prob_lower and poisson.test pvalue for study sites
bootstrapped probability of having a lower mean value.
- sim_sites ( df_site , df_visit , r = 1000 , poisson_test = TRUE , prob_lower = TRUE )
+ sim_sites (
+ df_site ,
+ df_visit ,
+ r = 1000 ,
+ poisson_test = TRUE ,
+ prob_lower = TRUE ,
+ progress = TRUE
+)
Arguments
@@ -197,6 +207,10 @@ Arg
prob_lower
logical, calculates probability for getting a lower value
+
+ progress
+ logical, display progress bar, Default = TRUE
+
Value
@@ -204,9 +218,12 @@ Value
dataframe with the following columns:
study_id study identification
site_number site identification
+n_pat number of patients at site
visit_med75 median(max(visit)) * 0.75
+n_pat_with_med75 number of patients at site with med75
mean_ae_site_med75 mean AE at visit_med75 site level
mean_ae_study_med75 mean AE at visit_med75 study level
+n_pat_with_med75_study number of patients at study with med75 excl. site
pval p-value as returned by poisson.test
prob_low bootstrapped probability for having mean_ae_site_med75 or lower
@@ -218,7 +235,9 @@ See a
site_aggr
,
pat_pool
,
prob_lower_site_ae_vs_study_ae
,
-poiss_test_site_ae_vs_study_ae
,
+poiss_test_site_ae_vs_study_ae
,
+sim_sites
,
+prep_for_sim
Examples
#>
#>
-#> |study_id |site_number | n_pat| n_pat_with_med75| visit_med75| mean_ae_site_med75| mean_ae_study_med75| pval| prob_low|
-#> |:--------|:-----------|-----:|----------------:|-----------:|------------------:|-------------------:|----:|--------:|
-#> |A |S0001 | 20| 20| 15| 6.10| 6.96| 0.21| 0.07|
-#> |A |S0002 | 20| 17| 16| 6.53| 7.43| 0.23| 0.07|
-#> |A |S0003 | 20| 17| 16| 8.35| 6.97| 1.00| 1.00|
-#> |A |S0004 | 20| 20| 15| 6.55| 6.83| 0.70| 0.32|
-#> |A |S0005 | 20| 16| 15| 7.62| 6.59| 1.00| 1.00|
+#> |study_id |site_number | n_pat| n_pat_with_med75| visit_med75| mean_ae_site_med75| mean_ae_study_med75| n_pat_with_med75_study| pval| prob_low|
+#> |:--------|:-----------|-----:|----------------:|-----------:|------------------:|-------------------:|----------------------:|----:|--------:|
+#> |A |S0001 | 20| 19| 15| 6.63| 7.07| 71| 0.56| 0.26|
+#> |A |S0002 | 20| 18| 15| 6.11| 7.19| 72| 0.12| 0.10|
+#> |A |S0003 | 20| 17| 16| 8.53| 7.17| 66| 1.00| 1.00|
+#> |A |S0004 | 20| 20| 15| 6.40| 7.14| 70| 0.29| 0.14|
+#> |A |S0005 | 20| 16| 16| 8.38| 7.22| 67| 1.00| 1.00|
@@ -100,7 +100,7 @@
@@ -144,7 +147,7 @@
-
+
@@ -263,30 +266,30 @@ Examp
df_visit2 $ study_id <- "B"
-df_visit <- bind_rows ( df_visit1 , df_visit2 )
+df_visit <- dplyr :: bind_rows ( df_visit1 , df_visit2 )
df_site <- site_aggr ( df_visit )
sim_studies ( df_visit , df_site , r = 3 , keep_ae = TRUE )
-#> # A tibble: 24 x 10
+
#> # A tibble: 24 x 10
#> r study_id site_number visit_med75 n_pat_with_med75 n_pat_study n_ae_site
-#> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
-#> 1 1 A S0001 16 20 68 7,13,7,4…
-#> 2 1 A S0002 18 17 57 1,10,4,4…
-#> 3 1 A S0003 17 18 63 13,11,13…
-#> 4 1 A S0004 14 18 76 3,9,9,9,…
-#> 5 1 A S0005 15 18 72 9,4,7,6,…
-#> 6 1 B S0001 15 297 597 9,8,5,7,…
-#> 7 1 B S0002 15 296 598 14,8,7,1…
-#> 8 1 B S0003 15 301 593 9,5,8,6,…
-#> 9 2 A S0001 16 20 68 10,7,5,3…
-#> 10 2 A S0002 18 17 57 12,12,6,…
-#> # … with 14 more rows, and 3 more variables: n_ae_study <chr> , pval <dbl> ,
-#> # prob_low <dbl>
+#> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
+#> 1 1 A S0001 16 20 68 7,13,7,4…
+#> 2 1 A S0002 18 17 57 1,10,4,4…
+#> 3 1 A S0003 17 18 63 13,11,13…
+#> 4 1 A S0004 14 18 76 3,9,9,9,…
+#> 5 1 A S0005 15 18 72 9,4,7,6,…
+#> 6 1 B S0001 15 297 597 9,8,5,7,…
+#> 7 1 B S0002 15 296 598 14,8,7,1…
+#> 8 1 B S0003 15 301 593 9,5,8,6,…
+#> 9 2 A S0001 16 20 68 10,7,5,3…
+#> 10 2 A S0002 18 17 57 12,12,6,…
+#> # … with 14 more rows, and 3 more variables: n_ae_study <chr> , pval <dbl> ,
+#> # prob_low <dbl>
diff --git a/docs/reference/sim_test_data_patient.html b/docs/reference/sim_test_data_patient.html
index 3f6fd2a..1f778eb 100644
--- a/docs/reference/sim_test_data_patient.html
+++ b/docs/reference/sim_test_data_patient.html
@@ -90,7 +90,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -98,7 +98,7 @@
@@ -142,7 +145,7 @@
-
+
@@ -160,7 +163,7 @@
diff --git a/docs/reference/sim_test_data_portfolio.html b/docs/reference/sim_test_data_portfolio.html
new file mode 100644
index 0000000..5908af4
--- /dev/null
+++ b/docs/reference/sim_test_data_portfolio.html
@@ -0,0 +1,288 @@
+
+
+
+
+
+
+
+
+
Simulate Portfolio Test Data — sim_test_data_portfolio • simaerep
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Simulate visit level data from a portfolio configuration.
+
+
+
sim_test_data_portfolio ( df_config , parallel = FALSE , progress = TRUE )
+
+
Arguments
+
+
+
+ df_config
+ dataframe as returned by get_config
+
+
+ parallel
+ logical activate parallel processing, see details, Default: FALSE
+
+
+ progress
+ logical, Default: TRUE
+
+
+
+
Value
+
+
dataframe with the following columns:
+study_id study identification
ae_per_visit_mean mean
+AE per visit per study
site_number site
+max_visit_sd standard deviation of maximum patient visits per
+site
max_visit_mean mean of maximum patient visits per site
+patnum number of patients
+visit visit number
+n_ae cumulative sum of AEs
+
+
+
+
Details
+
+
uses sim_test_data_study
.
+We use the furrr
package to
+implement parallel processing as these simulations can take a long time to
+run. For this to work we need to specify the plan for how the code should
+run, e.g. `plan(multisession, workers = 3)
+
See also
+
+
+
+
Examples
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/sim_test_data_study.html b/docs/reference/sim_test_data_study.html
index d57814a..cd706c7 100644
--- a/docs/reference/sim_test_data_study.html
+++ b/docs/reference/sim_test_data_study.html
@@ -92,7 +92,7 @@
simaerep
- 0.3.0
+ 0.3.2
@@ -100,7 +100,7 @@
@@ -144,7 +147,7 @@
-
+
@@ -162,7 +165,7 @@
@@ -233,43 +236,43 @@
Examp
#> # A tibble: 17 x 8
+
#> # A tibble: 17 x 8
#> patnum site_number is_ur max_visit_mean max_visit_sd ae_per_visit_me… visit
-#> <chr> <chr> <lgl> <dbl> <dbl> <dbl> <int>
-#> 1 P0000… S0001 FALSE 20 4 0.5 1
-#> 2 P0000… S0001 FALSE 20 4 0.5 2
-#> 3 P0000… S0001 FALSE 20 4 0.5 3
-#> 4 P0000… S0001 FALSE 20 4 0.5 4
-#> 5 P0000… S0001 FALSE 20 4 0.5 5
-#> 6 P0000… S0001 FALSE 20 4 0.5 6
-#> 7 P0000… S0001 FALSE 20 4 0.5 7
-#> 8 P0000… S0001 FALSE 20 4 0.5 8
-#> 9 P0000… S0001 FALSE 20 4 0.5 9
-#> 10 P0000… S0001 FALSE 20 4 0.5 10
-#> 11 P0000… S0001 FALSE 20 4 0.5 11
-#> 12 P0000… S0001 FALSE 20 4 0.5 12
-#> 13 P0000… S0001 FALSE 20 4 0.5 13
-#> 14 P0000… S0001 FALSE 20 4 0.5 14
-#> 15 P0000… S0001 FALSE 20 4 0.5 15
-#> 16 P0000… S0001 FALSE 20 4 0.5 16
-#> 17 P0000… S0001 FALSE 20 4 0.5 17
-#> # … with 1 more variable: n_ae <int>
df_visit <- sim_test_data_study ( n_pat = 100 , n_sites = 5 ,
+#> <chr> <chr> <lgl> <dbl> <dbl> <dbl> <int>
+#> 1 P0000… S0001 FALSE 20 4 0.5 1
+#> 2 P0000… S0001 FALSE 20 4 0.5 2
+#> 3 P0000… S0001 FALSE 20 4 0.5 3
+#> 4 P0000… S0001 FALSE 20 4 0.5 4
+#> 5 P0000… S0001 FALSE 20 4 0.5 5
+#> 6 P0000… S0001 FALSE 20 4 0.5 6
+#> 7 P0000… S0001 FALSE 20 4 0.5 7
+#> 8 P0000… S0001 FALSE 20 4 0.5 8
+#> 9 P0000… S0001 FALSE 20 4 0.5 9
+#> 10 P0000… S0001 FALSE 20 4 0.5 10
+#> 11 P0000… S0001 FALSE 20 4 0.5 11
+#> 12 P0000… S0001 FALSE 20 4 0.5 12
+#> 13 P0000… S0001 FALSE 20 4 0.5 13
+#> 14 P0000… S0001 FALSE 20 4 0.5 14
+#> 15 P0000… S0001 FALSE 20 4 0.5 15
+#> 16 P0000… S0001 FALSE 20 4 0.5 16
+#> 17 P0000… S0001 FALSE 20 4 0.5 17
+#> # … with 1 more variable: n_ae <int>
#> # A tibble: 23 x 8
+
#> # A tibble: 23 x 8
#> patnum site_number is_ur max_visit_mean max_visit_sd ae_per_visit_me… visit
-#> <chr> <chr> <lgl> <dbl> <dbl> <dbl> <int>
-#> 1 P0000… S0001 TRUE 20 4 0.25 1
-#> 2 P0000… S0001 TRUE 20 4 0.25 2
-#> 3 P0000… S0001 TRUE 20 4 0.25 3
-#> 4 P0000… S0001 TRUE 20 4 0.25 4
-#> 5 P0000… S0001 TRUE 20 4 0.25 5
-#> 6 P0000… S0001 TRUE 20 4 0.25 6
-#> 7 P0000… S0001 TRUE 20 4 0.25 7
-#> 8 P0000… S0001 TRUE 20 4 0.25 8
-#> 9 P0000… S0001 TRUE 20 4 0.25 9
-#> 10 P0000… S0001 TRUE 20 4 0.25 10
-#> # … with 13 more rows, and 1 more variable: n_ae <int>
+#> <chr> <chr> <lgl> <dbl> <dbl> <dbl> <int>
+#> 1 P0000… S0001 TRUE 20 4 0.25 1
+#> 2 P0000… S0001 TRUE 20 4 0.25 2
+#> 3 P0000… S0001 TRUE 20 4 0.25 3
+#> 4 P0000… S0001 TRUE 20 4 0.25 4
+#> 5 P0000… S0001 TRUE 20 4 0.25 5
+#> 6 P0000… S0001 TRUE 20 4 0.25 6
+#> 7 P0000… S0001 TRUE 20 4 0.25 7
+#> 8 P0000… S0001 TRUE 20 4 0.25 8
+#> 9 P0000… S0001 TRUE 20 4 0.25 9
+#> 10 P0000… S0001 TRUE 20 4 0.25 10
+#> # … with 13 more rows, and 1 more variable: n_ae <int>
@@ -98,7 +98,7 @@
@@ -142,7 +145,7 @@
-
+
diff --git a/docs/reference/with_progress_cnd.html b/docs/reference/with_progress_cnd.html
new file mode 100644
index 0000000..6819f88
--- /dev/null
+++ b/docs/reference/with_progress_cnd.html
@@ -0,0 +1,266 @@
+
+
+
+
+
+
+
+
+conditional with_progress — with_progress_cnd • simaerep
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
internal function. Use instead of
+with_progress
within custom functions with progress
+bars.
+
+
+
with_progress_cnd ( ex , progress = TRUE )
+
+
Arguments
+
+
+
+ ex
+ expression
+
+
+ progress
+ logical, Default: TRUE
+
+
+
+
Details
+
+
DETAILS
+
See also
+
+
+
+
Examples
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index fa39bed..dadfbfb 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -18,12 +18,18 @@
https://openpharma.github.io/simaerep//reference/exp_implicit_missing_visits.html
+
+ https://openpharma.github.io/simaerep//reference/get_config.html
+
https://openpharma.github.io/simaerep//reference/get_ecd_values.html
https://openpharma.github.io/simaerep//reference/get_pat_pool_config.html
+
+ https://openpharma.github.io/simaerep//reference/get_portf_perf.html
+
https://openpharma.github.io/simaerep//reference/get_site_mean_ae_dev.html
@@ -60,9 +66,21 @@
https://openpharma.github.io/simaerep//reference/poiss_test_site_ae_vs_study_ae.html
+
+ https://openpharma.github.io/simaerep//reference/prep_for_sim.html
+
https://openpharma.github.io/simaerep//reference/prob_lower_site_ae_vs_study_ae.html
+
+ https://openpharma.github.io/simaerep//reference/purrr_bar.html
+
+
+ https://openpharma.github.io/simaerep//reference/sim_after_prep.html
+
+
+ https://openpharma.github.io/simaerep//reference/sim_scenario.html
+
https://openpharma.github.io/simaerep//reference/sim_sites.html
@@ -72,21 +90,30 @@
https://openpharma.github.io/simaerep//reference/sim_test_data_patient.html
+
+ https://openpharma.github.io/simaerep//reference/sim_test_data_portfolio.html
+
https://openpharma.github.io/simaerep//reference/sim_test_data_study.html
+
+ https://openpharma.github.io/simaerep//reference/sim_ur_scenarios.html
+
https://openpharma.github.io/simaerep//reference/site_aggr.html
- https://openpharma.github.io/simaerep//articles/check_poisson.html
+ https://openpharma.github.io/simaerep//reference/with_progress_cnd.html
- https://openpharma.github.io/simaerep//articles/do_not_commit.html
+ https://openpharma.github.io/simaerep//articles/check_poisson.html
https://openpharma.github.io/simaerep//articles/intro.html
+
+ https://openpharma.github.io/simaerep//articles/portfolio_perf.html
+
https://openpharma.github.io/simaerep//articles/sas_files.html
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 68040f6..a7eca6e 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -8,6 +8,7 @@ Downey's
Downey’s
FP
Hochberg
+Hoffmann
Lifecycle
Modelling
Ménard
@@ -20,7 +21,7 @@ aes
aggr
al
allendowney
-analytics
+anonymize
anonymized
blogspot
codecov
@@ -34,24 +35,33 @@ ecdf
et
eval
fp
+fpr
frac
+furrr
ggplot
grey
http
https
infer’
+iterable
+iterables
iteratively
lightblue
linter
lintr
+multisession
+openpharma
patnum
poisson
+purrr
pval
pvalue
rnorm
rpois
sd
tibble
+tp
+tpr
ttest
unadjusted
uncompliant
diff --git a/man/check_df_visit.Rd b/man/check_df_visit.Rd
index 2ef4e10..720e9da 100644
--- a/man/check_df_visit.Rd
+++ b/man/check_df_visit.Rd
@@ -30,13 +30,13 @@ df_visit <- sim_test_data_study(
df_visit$study_id <- "A"
df_visit_filt <- df_visit \%>\%
- filter(visit != 3)
+ dplyr::filter(visit != 3)
df_visit_corr <- check_df_visit(df_visit_filt)
3 \%in\% df_visit_corr$visit
nrow(df_visit_corr) == nrow(df_visit)
-df_visit_corr <- check_df_visit(bind_rows(df_visit, df_visit))
+df_visit_corr <- check_df_visit(dplyr::bind_rows(df_visit, df_visit))
nrow(df_visit_corr) == nrow(df_visit)
}
diff --git a/man/figures/README-unnamed-chunk-2-1.png b/man/figures/README-unnamed-chunk-2-1.png
index 5814600..b86c61b 100644
Binary files a/man/figures/README-unnamed-chunk-2-1.png and b/man/figures/README-unnamed-chunk-2-1.png differ
diff --git a/man/figures/logo.png b/man/figures/logo.png
index 495c6cf..548b3ae 100644
Binary files a/man/figures/logo.png and b/man/figures/logo.png differ
diff --git a/man/get_config.Rd b/man/get_config.Rd
new file mode 100644
index 0000000..215cfd2
--- /dev/null
+++ b/man/get_config.Rd
@@ -0,0 +1,87 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simulate_test_data.R
+\name{get_config}
+\alias{get_config}
+\title{Get Portfolio Configuration}
+\usage{
+get_config(
+ df_site,
+ min_pat_per_study = 100,
+ min_sites_per_study = 10,
+ anonymize = TRUE,
+ pad_width = 4
+)
+}
+\arguments{
+\item{df_site}{dataframe aggregated on patient level with max_ae and max_visit}
+
+\item{min_pat_per_study}{minimum number of patients per study, Default: 100}
+
+\item{min_sites_per_study}{minimum number of sites per study, Default: 10}
+
+\item{anonymize}{logical, Default: TRUE}
+
+\item{pad_width}{padding width for newly created IDs, Default: 4}
+}
+\value{
+dataframe with the following columns: \describe{
+\item{\strong{study_id}}{study identification} \item{\strong{ae_per_visit_mean}}{mean
+AE per visit per study} \item{\strong{site_number}}{site}
+\item{\strong{max_visit_sd}}{standard deviation of maximum patient visits per
+site} \item{\strong{max_visit_mean}}{mean of maximum patient visits per site}
+\item{\strong{n_pat}}{number of patients} }
+}
+\description{
+Get Portfolio configuration from a dataframe aggregated on
+patient level with max_ae and max_visit. Will filter studies with only a few
+sites and patients and will anonymize IDs. Portfolio configuration can be
+used by \code{\link{sim_test_data_portfolio}} to generate data for an
+artificial portfolio.
+}
+\examples{
+if (interactive()) {
+df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.4, ur_rate = 0.6)
+.
+df_visit1$study_id <- "A"
+.
+df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.2, ur_rate = 0.1)
+.
+df_visit2$study_id <- "B"
+.
+df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
+
+df_site_max <- df_visit \%>\%
+ group_by(study_id, site_number, patnum) \%>\%
+ summarise(max_visit = max(visit),
+ max_ae = max(n_ae),
+ .groups = "drop")
+
+df_config <- get_config(df_site_max)
+
+df_config
+
+df_portf <- sim_test_data_portfolio(df_config)
+
+df_portf
+
+df_scen <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 2,
+ ur_rate = c(0.5, 1))
+
+
+df_scen
+
+df_perf <- get_portf_perf(df_scen)
+
+df_perf
+}
+}
+\seealso{
+\code{\link{sim_test_data_study}}
+\code{\link{get_config}}
+\code{\link{sim_test_data_portfolio}}
+\code{\link{sim_ur_scenarios}}
+\code{\link{get_portf_perf}}
+}
diff --git a/man/get_pat_pool_config.Rd b/man/get_pat_pool_config.Rd
index 00b98d7..f6e8dfc 100644
--- a/man/get_pat_pool_config.Rd
+++ b/man/get_pat_pool_config.Rd
@@ -35,7 +35,7 @@ df_visit2 <- sim_test_data_study(n_pat = 1000, n_sites = 3,
df_visit2$study_id <- "B"
-df_visit <- bind_rows(df_visit1, df_visit2)
+df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
df_site <- site_aggr(df_visit)
diff --git a/man/get_portf_perf.Rd b/man/get_portf_perf.Rd
new file mode 100644
index 0000000..ae761cf
--- /dev/null
+++ b/man/get_portf_perf.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simulate_test_data.R
+\name{get_portf_perf}
+\alias{get_portf_perf}
+\title{Get Portfolio Performance}
+\usage{
+get_portf_perf(df_scen, stat = "prob_low_prob_ur", fpr = c(0.001, 0.01, 0.05))
+}
+\arguments{
+\item{df_scen}{dataframe as returned by \code{\link{sim_ur_scenarios}}}
+
+\item{stat}{character denoting the column name of the under-reporting
+statistic, Default: 'prob_low_prob_ur'}
+
+\item{fpr}{numeric vector specifying false positive rates, Default: c(0.001,
+0.01, 0.05)}
+}
+\value{
+dataframe
+}
+\description{
+Performance as true positive rate (tpr as tp/P) on the basis of
+desired false positive rates (fpr as fp/P).
+}
+\details{
+DETAILS
+}
+\examples{
+df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.4, ur_rate = 0.6)
+
+df_visit1$study_id <- "A"
+
+df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.2, ur_rate = 0.1)
+
+df_visit2$study_id <- "B"
+
+df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
+
+df_site_max <- df_visit \%>\%
+ dplyr::group_by(study_id, site_number, patnum) \%>\%
+ dplyr::summarise(max_visit = max(visit),
+ max_ae = max(n_ae),
+ .groups = "drop")
+
+df_config <- get_config(df_site_max)
+
+df_config
+
+df_portf <- sim_test_data_portfolio(df_config)
+
+df_portf
+
+df_scen <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 2,
+ ur_rate = c(0.5, 1))
+
+
+df_scen
+
+df_perf <- get_portf_perf(df_scen)
+
+df_perf
+}
+\seealso{
+\code{\link{sim_test_data_study}} \code{\link{get_config}}
+\code{\link{sim_test_data_portfolio}} \code{\link{sim_ur_scenarios}}
+\code{\link{get_portf_perf}}
+}
diff --git a/man/plot_dots.Rd b/man/plot_dots.Rd
index fc0c943..8cd79bd 100644
--- a/man/plot_dots.Rd
+++ b/man/plot_dots.Rd
@@ -51,12 +51,12 @@ different groupings can be applied
' '
}
\examples{
-study <- tibble(
+study <- tibble::tibble(
site = LETTERS[1:3],
patients = c(list(seq(1, 50, 1)), list(seq(1, 40, 1)), list(seq(1, 10, 1)))
) \%>\%
tidyr::unnest(patients) \%>\%
- mutate(n_ae = as.integer(runif(min = 0, max = 10, n = nrow(.))))
+ dplyr::mutate(n_ae = as.integer(runif(min = 0, max = 10, n = nrow(.))))
plot_dots(study)
}
diff --git a/man/prep_for_sim.Rd b/man/prep_for_sim.Rd
new file mode 100644
index 0000000..489a745
--- /dev/null
+++ b/man/prep_for_sim.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simaerep.R
+\name{prep_for_sim}
+\alias{prep_for_sim}
+\title{prepare data for simulation}
+\usage{
+prep_for_sim(df_site, df_visit)
+}
+\arguments{
+\item{df_site}{dataframe created by \code{\link[simaerep]{site_aggr}}}
+
+\item{df_visit}{dataframe, created by \code{\link[simaerep]{sim_sites}}}
+}
+\value{
+dataframe
+}
+\description{
+Internal function called by \code{\link[simaerep]{sim_sites}}.
+Collect AEs per patient at visit_med75 for site and study as a vector of
+integers.
+}
+\examples{
+df_visit <- sim_test_data_study(
+ n_pat = 100,
+ n_sites = 5,
+ frac_site_with_ur = 0.4,
+ ur_rate = 0.2
+)
+
+df_visit$study_id <- "A"
+
+df_site <- site_aggr(df_visit)
+
+df_prep <- prep_for_sim(df_site, df_visit)
+df_prep
+}
+\seealso{
+\code{\link[simaerep]{sim_sites}}, \code{\link[simaerep]{sim_after_prep}}
+}
diff --git a/man/purrr_bar.Rd b/man/purrr_bar.Rd
new file mode 100644
index 0000000..0a3b2f4
--- /dev/null
+++ b/man/purrr_bar.Rd
@@ -0,0 +1,99 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/progress.R
+\name{purrr_bar}
+\alias{purrr_bar}
+\title{execute a purrr or furrr function with a progress
+bar}
+\usage{
+purrr_bar(
+ ...,
+ .purrr,
+ .f,
+ .f_args = list(),
+ .purrr_args = list(),
+ .steps,
+ .slow = FALSE,
+ .progress = TRUE
+)
+}
+\arguments{
+\item{...}{iterable arguments passed to .purrr}
+
+\item{.purrr}{purrr or furrr function}
+
+\item{.f}{function to be executed over iterables}
+
+\item{.f_args}{list of arguments passed to .f, Default: list()}
+
+\item{.purrr_args}{list of arguments passed to .purrr, Default: list()}
+
+\item{.steps}{integer number of iterations}
+
+\item{.slow}{logical slows down execution, Default: FALSE}
+
+\item{.progress}{logical, show progress bar, Default: TRUE}
+}
+\description{
+call still needs to be wrapped in with_progress()
+}
+\examples{
+# purrr::map
+progressr::with_progress(
+ purrr_bar(rep(0.25, 5), .purrr = purrr::map, .f = Sys.sleep, .steps = 5)
+)
+
+\dontrun{
+# purrr::walk
+progressr::with_progress(
+ purrr_bar(rep(0.25, 5), .purrr = purrr::walk,.f = Sys.sleep, .steps = 5)
+)
+
+# progress bar off
+progressr::with_progress(
+ purrr_bar(
+ rep(0.25, 5), .purrr = purrr::walk,.f = Sys.sleep, .steps = 5, .progress = FALSE
+ )
+)
+
+# purrr::map2
+progressr::with_progress(
+ purrr_bar(
+ rep(1, 5), rep(2, 5),
+ .purrr = purrr::map2,
+ .f = `+`,
+ .steps = 5,
+ .slow = TRUE
+ )
+)
+
+# purrr::pmap
+progressr::with_progress(
+ purrr_bar(
+ list(rep(1, 5), rep(2, 5)),
+ .purrr = purrr::pmap,
+ .f = `+`,
+ .steps = 5,
+ .slow = TRUE
+ )
+)
+
+# define function within purr_bar() call
+progressr::with_progress(
+ purrr_bar(
+ list(rep(1, 5), rep(2, 5)),
+ .purrr = purrr::pmap,
+ .f = function(x, y) {
+ paste0(x, y)
+ },
+ .steps = 5,
+ .slow = TRUE
+ )
+)
+
+# with mutate
+progressr::with_progress(
+ tibble(x = rep(0.25, 5)) \%>\%
+ mutate(x = purrr_bar(x, .purrr = purrr::map, .f = Sys.sleep, .steps = 5))
+)
+}
+}
diff --git a/man/sim_after_prep.Rd b/man/sim_after_prep.Rd
new file mode 100644
index 0000000..7217ac1
--- /dev/null
+++ b/man/sim_after_prep.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simaerep.R
+\name{sim_after_prep}
+\alias{sim_after_prep}
+\title{start simulation after preparation}
+\usage{
+sim_after_prep(
+ df_sim_prep,
+ r = 1000,
+ poisson_test = FALSE,
+ prob_lower = TRUE,
+ progress = FALSE
+)
+}
+\arguments{
+\item{df_sim_prep}{dataframe as returned by
+\code{\link[simaerep]{prep_for_sim}}}
+
+\item{r}{integer, denotes number of simulations, default = 1000}
+
+\item{poisson_test}{logical, calculates poisson.test pvalue}
+
+\item{prob_lower}{logical, calculates probability for getting a lower value}
+
+\item{progress}{logical, display progress bar, Default = TRUE}
+}
+\value{
+dataframe
+}
+\description{
+Internal function called by \code{\link[simaerep]{sim_sites}}
+after \code{\link[simaerep]{prep_for_sim}}
+}
+\examples{
+df_visit <- sim_test_data_study(
+ n_pat = 100,
+ n_sites = 5,
+ frac_site_with_ur = 0.4,
+ ur_rate = 0.2
+)
+
+df_visit$study_id <- "A"
+
+df_site <- site_aggr(df_visit)
+
+df_prep <- prep_for_sim(df_site, df_visit)
+
+df_sim <- sim_after_prep(df_prep)
+
+df_sim
+}
+\seealso{
+\code{\link[simaerep]{sim_sites}},
+\code{\link[simaerep]{prep_for_sim}}
+}
diff --git a/man/sim_scenario.Rd b/man/sim_scenario.Rd
new file mode 100644
index 0000000..59cc81a
--- /dev/null
+++ b/man/sim_scenario.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simulate_test_data.R
+\name{sim_scenario}
+\alias{sim_scenario}
+\title{simulate single scenario}
+\usage{
+sim_scenario(n_ae_site, n_ae_study, frac_pat_with_ur, ur_rate)
+}
+\arguments{
+\item{n_ae_site}{integer vector}
+
+\item{n_ae_study}{integer vector}
+
+\item{frac_pat_with_ur}{double}
+
+\item{ur_rate}{double}
+}
+\value{
+list
+}
+\description{
+internal function called by simulate_scenarios()
+}
+\examples{
+sim_scenario(c(5,5,5,5), c(8,8,8,8), 0.2, 0.5)
+sim_scenario(c(5,5,5,5), c(8,8,8,8), 0.75, 0.5)
+sim_scenario(c(5,5,5,5), c(8,8,8,8), 1, 0.5)
+sim_scenario(c(5,5,5,5), c(8,8,8,8), 1, 1)
+sim_scenario(c(5,5,5,5), c(8,8,8,8), 0, 0.5)
+sim_scenario(c(5,5,5,5), c(8,8,8,8), 2, 0.5)
+}
diff --git a/man/sim_sites.Rd b/man/sim_sites.Rd
index 941b2bc..431ab43 100644
--- a/man/sim_sites.Rd
+++ b/man/sim_sites.Rd
@@ -4,7 +4,14 @@
\alias{sim_sites}
\title{calculate prob_lower and poisson.test pvalue for study sites}
\usage{
-sim_sites(df_site, df_visit, r = 1000, poisson_test = TRUE, prob_lower = TRUE)
+sim_sites(
+ df_site,
+ df_visit,
+ r = 1000,
+ poisson_test = TRUE,
+ prob_lower = TRUE,
+ progress = TRUE
+)
}
\arguments{
\item{df_site}{dataframe created by \code{\link[simaerep]{site_aggr}}}
@@ -16,15 +23,20 @@ sim_sites(df_site, df_visit, r = 1000, poisson_test = TRUE, prob_lower = TRUE)
\item{poisson_test}{logical, calculates poisson.test pvalue}
\item{prob_lower}{logical, calculates probability for getting a lower value}
+
+\item{progress}{logical, display progress bar, Default = TRUE}
}
\value{
dataframe with the following columns:
\describe{
\item{\strong{study_id}}{study identification}
\item{\strong{site_number}}{site identification}
+\item{\strong{n_pat}}{number of patients at site}
\item{\strong{visit_med75}}{median(max(visit)) * 0.75}
+\item{\strong{n_pat_with_med75}}{number of patients at site with med75}
\item{\strong{mean_ae_site_med75}}{mean AE at visit_med75 site level}
\item{\strong{mean_ae_study_med75}}{mean AE at visit_med75 study level}
+\item{\strong{n_pat_with_med75_study}}{number of patients at study with med75 excl. site}
\item{\strong{pval}}{p-value as returned by \code{\link[stats]{poisson.test}}}
\item{\strong{prob_low}}{bootstrapped probability for having mean_ae_site_med75 or lower}
}
@@ -57,4 +69,6 @@ df_sim_sites \%>\%
\code{\link[simaerep]{pat_pool}},
\code{\link[simaerep]{prob_lower_site_ae_vs_study_ae}},
\code{\link[simaerep]{poiss_test_site_ae_vs_study_ae}},
+\code{\link[simaerep]{sim_sites}},
+\code{\link[simaerep]{prep_for_sim}}
}
diff --git a/man/sim_studies.Rd b/man/sim_studies.Rd
index c5599e1..909ad6c 100644
--- a/man/sim_studies.Rd
+++ b/man/sim_studies.Rd
@@ -74,7 +74,7 @@ df_visit2 <- sim_test_data_study(n_pat = 1000, n_sites = 3,
df_visit2$study_id <- "B"
-df_visit <- bind_rows(df_visit1, df_visit2)
+df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
df_site <- site_aggr(df_visit)
diff --git a/man/sim_test_data_patient.Rd b/man/sim_test_data_patient.Rd
index 5cefa8d..42ac341 100644
--- a/man/sim_test_data_patient.Rd
+++ b/man/sim_test_data_patient.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/simaerep.R
+% Please edit documentation in R/simulate_test_data.R
\name{sim_test_data_patient}
\alias{sim_test_data_patient}
\title{simulate patient ae reporting test data}
diff --git a/man/sim_test_data_portfolio.Rd b/man/sim_test_data_portfolio.Rd
new file mode 100644
index 0000000..46deb6b
--- /dev/null
+++ b/man/sim_test_data_portfolio.Rd
@@ -0,0 +1,83 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simulate_test_data.R
+\name{sim_test_data_portfolio}
+\alias{sim_test_data_portfolio}
+\title{Simulate Portfolio Test Data}
+\usage{
+sim_test_data_portfolio(df_config, parallel = FALSE, progress = TRUE)
+}
+\arguments{
+\item{df_config}{dataframe as returned by \code{\link{get_config}}}
+
+\item{parallel}{logical activate parallel processing, see details, Default: FALSE}
+
+\item{progress}{logical, Default: TRUE}
+}
+\value{
+dataframe with the following columns: \describe{
+\item{\strong{study_id}}{study identification} \item{\strong{ae_per_visit_mean}}{mean
+AE per visit per study} \item{\strong{site_number}}{site}
+\item{\strong{max_visit_sd}}{standard deviation of maximum patient visits per
+site} \item{\strong{max_visit_mean}}{mean of maximum patient visits per site}
+\item{\strong{patnum}}{number of patients}
+\item{\strong{visit}}{visit number}
+\item{\strong{n_ae}}{cumulative sum of AEs}
+}
+}
+\description{
+Simulate visit level data from a portfolio configuration.
+}
+\details{
+uses \code{\link{sim_test_data_study}}.
+We use the \code{furrr} package to
+implement parallel processing as these simulations can take a long time to
+run. For this to work we need to specify the plan for how the code should
+run, e.g. `plan(multisession, workers = 3)
+}
+\examples{
+if (interactive()) {
+df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.4, ur_rate = 0.6)
+
+df_visit1$study_id <- "A"
+
+df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.2, ur_rate = 0.1)
+
+df_visit2$study_id <- "B"
+
+df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
+
+df_site_max <- df_visit \%>\%
+ group_by(study_id, site_number, patnum) \%>\%
+ summarise(max_visit = max(visit),
+ max_ae = max(n_ae),
+ .groups = "drop")
+
+df_config <- get_config(df_site_max)
+
+df_config
+
+df_portf <- sim_test_data_portfolio(df_config)
+
+df_portf
+
+df_scen <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 2,
+ ur_rate = c(0.5, 1))
+
+
+df_scen
+
+df_perf <- get_portf_perf(df_scen)
+
+df_perf
+}
+}
+\seealso{
+\code{\link{sim_test_data_study}}
+\code{\link{get_config}}
+\code{\link{sim_test_data_portfolio}}
+\code{\link{sim_ur_scenarios}}
+\code{\link{get_portf_perf}}
+}
diff --git a/man/sim_test_data_study.Rd b/man/sim_test_data_study.Rd
index 0d8e861..406b615 100644
--- a/man/sim_test_data_study.Rd
+++ b/man/sim_test_data_study.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/simaerep.R
+% Please edit documentation in R/simulate_test_data.R
\name{sim_test_data_study}
\alias{sim_test_data_study}
\title{simulate study test data}
diff --git a/man/sim_ur_scenarios.Rd b/man/sim_ur_scenarios.Rd
new file mode 100644
index 0000000..27266f1
--- /dev/null
+++ b/man/sim_ur_scenarios.Rd
@@ -0,0 +1,137 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/simulate_test_data.R
+\name{sim_ur_scenarios}
+\alias{sim_ur_scenarios}
+\title{Simulate Under-Reporting Scenarios}
+\usage{
+sim_ur_scenarios(
+ df_portf,
+ extra_ur_sites = 3,
+ ur_rate = c(0.25, 0.5),
+ r = 1000,
+ poisson_test = FALSE,
+ prob_lower = TRUE,
+ parallel = FALSE,
+ progress = TRUE,
+ site_aggr_args = list(),
+ eval_sites_args = list()
+)
+}
+\arguments{
+\item{df_portf}{dataframe as returned by \code{\link{sim_test_data_portfolio}}}
+
+\item{extra_ur_sites}{numeric, set maximum number of additional
+under-reporting sites, see details Default: 3}
+
+\item{ur_rate}{numeric vector, set under-reporting rates for scenarios
+Default: c(0.25, 0.5)}
+
+\item{r}{integer, denotes number of simulations, default = 1000}
+
+\item{poisson_test}{logical, calculates poisson.test pvalue}
+
+\item{prob_lower}{logical, calculates probability for getting a lower value}
+
+\item{parallel}{logical, use parallel processing see details, Default: FALSE}
+
+\item{progress}{logical, show progress bar, Default: TRUE}
+
+\item{site_aggr_args}{named list of parameters passed to
+\code{\link{site_aggr}}, Default: list()}
+
+\item{eval_sites_args}{named list of parameters passed to
+\code{\link{eval_sites}}, Default: list()}
+}
+\value{
+dataframe with the following columns:
+\describe{
+\item{\strong{study_id}}{study identification}
+\item{\strong{site_number}}{site identification}
+\item{\strong{n_pat}}{number of patients at site}
+\item{\strong{n_pat_with_med75}}{number of patients at site with visit_med75}
+\item{\strong{visit_med75}}{median(max(visit)) * 0.75}
+\item{\strong{mean_ae_site_med75}}{mean AE at visit_med75 site level}
+\item{\strong{mean_ae_study_med75}}{mean AE at visit_med75 study level}
+\item{\strong{n_pat_with_med75_study}}{number of patients at site with
+visit_med75 at study excl site}
+\item{\strong{extra_ur_sites}}{additional sites
+with under-reporting patients}
+\item{\strong{frac_pat_with_ur}}{ratio of
+patients in study that are under-reporting}
+\item{\strong{ur_rate}}{under-reporting rate}
+\item{\strong{pval}}{p-value as
+returned by \code{\link[stats]{poisson.test}}}
+\item{\strong{prob_low}}{bootstrapped probability for having mean_ae_site_med75
+or lower} \item{\strong{pval_adj}}{adjusted p-values}
+\item{\strong{prob_low_adj}}{adjusted bootstrapped probability for having
+mean_ae_site_med75 or lower} \item{\strong{pval_prob_ur}}{probability
+under-reporting as 1 - pval_adj, poisson.test (use as benchmark)}
+\item{\strong{prob_low_prob_ur}}{probability under-reporting as 1 -
+prob_low_adj, bootstrapped (use)}
+}
+}
+\description{
+Use with simulated portfolio data to generate under-reporting
+stats for specified scenarios.
+}
+\details{
+The function will apply under-reporting scenarios to each site.
+Reducing the number of AEs by a given under-reporting (ur_rate) for all
+patients at the site and add the corresponding under-reporting statistics.
+Since the under-reporting probability is also affected by the number of
+other sites that are under-reporting we additionally calculate
+under-reporting statistics in a scenario where additional under reporting
+sites are present. For this we use the median number of patients per site
+at the study to calculate the final number of patients for which we lower
+the AEs in a given under-reporting scenario. We use the furrr package to
+implement parallel processing as these simulations can take a long time to
+run. For this to work we need to specify the plan for how the code should
+run, e.g. plan(multisession, workers = 18)
+}
+\examples{
+if (interactive()) {
+df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.4, ur_rate = 0.6)
+
+df_visit1$study_id <- "A"
+
+df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.2, ur_rate = 0.1)
+
+df_visit2$study_id <- "B"
+
+df_visit <- dplyr::bind_rows(df_visit1, df_visit2)
+
+df_site_max <- df_visit \%>\%
+ group_by(study_id, site_number, patnum) \%>\%
+ summarise(max_visit = max(visit),
+ max_ae = max(n_ae),
+ .groups = "drop")
+
+df_config <- get_config(df_site_max)
+
+df_config
+
+df_portf <- sim_test_data_portfolio(df_config)
+
+df_portf
+
+df_scen <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 2,
+ ur_rate = c(0.5, 1))
+
+
+df_scen
+
+df_perf <- get_portf_perf(df_scen)
+
+df_perf
+}
+}
+\seealso{
+\code{\link{sim_test_data_study}}
+\code{\link{get_config}}
+\code{\link{sim_test_data_portfolio}}
+\code{\link{sim_ur_scenarios}}
+\code{\link{get_portf_perf}}
+}
diff --git a/man/with_progress_cnd.Rd b/man/with_progress_cnd.Rd
new file mode 100644
index 0000000..54591de
--- /dev/null
+++ b/man/with_progress_cnd.Rd
@@ -0,0 +1,63 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/progress.R
+\name{with_progress_cnd}
+\alias{with_progress_cnd}
+\title{conditional \code{\link[progressr]{with_progress}}}
+\usage{
+with_progress_cnd(ex, progress = TRUE)
+}
+\arguments{
+\item{ex}{expression}
+
+\item{progress}{logical, Default: TRUE}
+}
+\description{
+internal function. Use instead of
+\code{\link[progressr]{with_progress}} within custom functions with progress
+bars.
+}
+\details{
+DETAILS
+}
+\examples{
+if (interactive()) {
+
+ with_progress_cnd(
+ purrr_bar(rep(0.25, 5), .purrr = purrr::map, .f = Sys.sleep, .steps = 5),
+ progress = TRUE
+ )
+
+ with_progress_cnd(
+ purrr_bar(rep(0.25, 5), .purrr = purrr::map, .f = Sys.sleep, .steps = 5),
+ progress = FALSE
+ )
+
+# wrap a function with progress bar with another call with progress bar
+
+f1 <- function(x, progress = TRUE) {
+ with_progress_cnd(
+ purrr_bar(x, .purrr = purrr::walk, .f = Sys.sleep, .steps = length(x), .progress = progress),
+ progress = progress
+ )
+}
+
+# inner progress bar blocks outer progress bar
+progressr::with_progress(
+ purrr_bar(
+ rep(rep(1, 3),3), .purrr = purrr::walk, .f = f1, .steps = 3,
+ .f_args = list(progress = TRUE)
+ )
+)
+
+# inner progress bar turned off
+progressr::with_progress(
+ purrr_bar(
+ rep(list(rep(0.25, 3)), 5), .purrr = purrr::walk, .f = f1, .steps = 5,
+ .f_args = list(progress = FALSE)
+ )
+)
+}
+}
+\seealso{
+\code{\link[progressr]{with_progress}}
+}
diff --git a/tests/testthat/test_simaerep.R b/tests/testthat/test_simaerep.R
index 7f3dd56..92c9ae0 100644
--- a/tests/testthat/test_simaerep.R
+++ b/tests/testthat/test_simaerep.R
@@ -79,7 +79,7 @@ test_that("eval_sites_with_all_NA", {
ungroup() %>%
filter(study_id == "C") %>%
select(- study_id, - site_number, - visit_med75) %>%
- summarize_all(~ all(is.na(.))) %>%
+ summarise_all(~ all(is.na(.))) %>%
as.matrix() %>%
.[1, ] %>%
all()
@@ -421,3 +421,92 @@ test_that("check_visit_med75_qup8_maximum", {
# nolint end
})
+
+test_that("prep_for_sim", {
+
+ df_prep <- prep_for_sim(df_site, df_visit)
+
+ # ae vector for site must match number of patients at site.
+ df_prep %>%
+ mutate(check = map2(
+ n_pat_with_med75, n_ae_site,
+ function(x, y) x == length(y))
+ ) %>%
+ pull(check) %>%
+ unlist() %>%
+ all() %>%
+ expect_true()
+})
+
+test_that("portfolio_sim", {
+
+ df_site_max <- df_visit %>%
+ group_by(study_id, site_number, patnum) %>%
+ summarise(max_visit = max(visit),
+ max_ae = max(n_ae),
+ .groups = "drop")
+
+ df_config <- simaerep::get_config(
+ df_site_max,
+ anonymize = TRUE,
+ min_pat_per_study = 100,
+ min_sites_per_study = 5
+ )
+
+ expect_true(
+ all(
+ c("study_id", "ae_per_visit_mean", "site_number", "max_visit_sd",
+ "max_visit_mean", "n_pat") %in% colnames(df_config)
+ )
+ )
+
+ df_portf <- sim_test_data_portfolio(df_config)
+
+ expect_true(
+ all(
+ c("study_id", "ae_per_visit_mean", "site_number", "max_visit_sd",
+ "max_visit_mean", "patnum", "visit", "n_ae") %in% colnames(df_portf)
+ )
+ )
+
+ df_scen_adj <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 2,
+ ur_rate = c(0.5, 1),
+ parallel = FALSE,
+ poisson = FALSE,
+ prob_lower = TRUE,
+ progress = TRUE,
+ site_aggr_args = list(method = "med75_adj"))
+
+
+ df_scen_old <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 2,
+ ur_rate = c(0.5, 1),
+ parallel = FALSE,
+ poisson = FALSE,
+ prob_lower = TRUE,
+ progress = TRUE,
+ site_aggr_args = list(method = "med75"))
+
+ expect_false(identical(df_scen_adj, df_scen_old))
+
+ df_perf <- get_portf_perf(df_scen_adj)
+
+ expect_true(
+ all(
+ c("fpr", "thresh", "extra_ur_sites", "ur_rate",
+ "tpr") %in% colnames(df_perf)
+ )
+ )
+
+ # warning should be given when there are stat values with NA
+
+ df_scen_na <- df_scen_adj %>%
+ bind_rows(
+ df_scen_adj %>%
+ mutate(prob_low_prob_ur = NA,
+ site_number = paste("A", site_number))
+ )
+
+ expect_warning(get_portf_perf(df_scen_na))
+})
diff --git a/vignettes/_portfolio_perf.Rmd b/vignettes/_portfolio_perf.Rmd
new file mode 100644
index 0000000..661ce49
--- /dev/null
+++ b/vignettes/_portfolio_perf.Rmd
@@ -0,0 +1,367 @@
+---
+title: "simaerep Portfolio Performance"
+output:
+ html_document:
+ toc: true
+ toc_depth: 3
+ toc_float: true
+ number_sections: true
+ code_folding: show
+ collapse: false
+editor_options:
+ chunk_output_type: console
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE, cache = FALSE)
+```
+
+# Load
+```{r load}
+suppressPackageStartupMessages(library(tidyverse))
+suppressPackageStartupMessages(library(knitr))
+suppressPackageStartupMessages(library(furrr))
+suppressPackageStartupMessages(library(future))
+suppressPackageStartupMessages(library(simaerep))
+
+# RAM ~26 GB
+# plan 2GB per core
+plan(multisession, workers = 13)
+```
+
+# Introduction
+
+We want to define minimal requirements for simulating test data that reflects realistic portfolio data which we then want to use to benchmark overall {simaerep} performance.
+
+# Performance
+
+These simulations take some time to run and require multiple cores and appropriate memory. Rendering articles in {pkgdown} can be a bit unstable so we recommend to render first using pure {rmarkdown} to generate the intermediate csv files.
+
+```{r perf, eval = FALSE}
+rmarkdown::render("vignettes/_portfolio_perf.Rmd", knit_root_dir = "/home/koneswab/simaerep")
+```
+
+
+# Portfolio Configuration
+
+The portfolio configuration should give a minimal description of a portfolio without violating data privacy laws or competitive intellectual property. We propose to include the following metrics into the portfolio configuration:
+
+**site parameters:**
+
+- mean of all maximum patient visits
+- sd of of all maximum patient visits
+- total number patients
+
+**study parameters:**
+
+- mean AE per visit
+
+
+The information contained in a portfolio configuration is very scarce and thus can be shared more easily within the industry. We can use those parameters to simulate test data for assessing {simaerep} performance on a given portfolio.
+
+We can start with a maximum aggregation of visit and n_ae on patient level starting with df_visit as we would use it for `simaerep::site_aggr()`. We can use `simaerep::get_config` to generate a valid portfolio configuration, which will automatically apply a few filters:
+
+- remove patients with 0 visits
+- minimum number of patients per study
+- minimum number of sites per study
+- anonymize study and site IDs
+
+```{r ex_config}
+df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.4, ur_rate = 0.6)
+
+df_visit1$study_id <- "A"
+
+df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.2, ur_rate = 0.1)
+
+df_visit2$study_id <- "B"
+
+df_visit <- bind_rows(df_visit1, df_visit2)
+
+df_site_max <- df_visit %>%
+ group_by(study_id, site_number, patnum) %>%
+ summarise(max_visit = max(visit),
+ max_ae = max(n_ae),
+ .groups = "drop")
+
+df_config <- simaerep::get_config(
+ df_site_max, anonymize = TRUE,
+ min_pat_per_study = 100,
+ min_sites_per_study = 10
+)
+
+df_config
+```
+
+## Simulate Portfolio from Configuration
+
+We can now apply sim_test_data_portfolio which uses `sim_test_data_study()` to generate artificial data on visit level.
+
+```{r ex_portf}
+df_portf <- sim_test_data_portfolio(df_config)
+df_portf
+```
+
+
+## Load Realistic Configuration
+
+Here we load a realistic portfolio configuration.
+
+```{r real_config}
+df_config <- readr::read_csv("ae_profile.csv")
+df_config
+```
+
+# Simulate Portfolio
+
+And again simulate artificial visit level data. Using parallel processing.
+
+```{r sim_portf}
+
+df_portf <- sim_test_data_portfolio(df_config, parallel = TRUE, progress = TRUE)
+df_portf
+```
+
+## Confirm that Portfolio Simulation results in Similar Configuration
+
+```{r check_portf}
+df_site_max_portf <- df_portf %>%
+ group_by(study_id, site_number, patnum) %>%
+ summarise(max_visit = max(visit),
+ max_ae = max(n_ae),
+ .groups = "drop")
+
+df_config_portf <- simaerep::get_config(df_site_max_portf, anonymize = TRUE, min_pat_per_study = 100, min_sites_per_study = 10)
+
+df_comp <- df_config %>%
+ left_join(
+ df_config_portf,
+ by = c("study_id", "site_number"),
+ suffix = c(".ori", ".sim")
+ ) %>%
+ select(
+ study_id,
+ starts_with("ae"),
+ site_number,
+ contains("max_visit_sd"),
+ contains("max_visit_mean"),
+ contains("n_pat")
+ )
+
+df_comp %>%
+ select(study_id, starts_with("ae")) %>%
+ distinct() %>%
+ ggplot(aes(ae_per_visit_mean.ori, ae_per_visit_mean.sim)) +
+ geom_point() +
+ geom_smooth() +
+ labs(title = "simulated vs original AE per visit study mean") +
+ theme(aspect.ratio = 1)
+
+df_comp %>%
+ ggplot(aes(max_visit_sd.ori, max_visit_sd.sim)) +
+ geom_point() +
+ geom_smooth() +
+ geom_abline(slope = 1, color = "red") +
+ labs(title = "simulated vs original max visit sd site") +
+ theme(aspect.ratio = 1)
+```
+
+In our portfolio simulation we sample the patient maximum visit values from a normal distribution. If that returns values smaller than 1 we replace it with one. The larger the SD values compared to the mean values the more likely we will sample a patient maximum visit smaller than one. Every time we have to do that correction we are lowering the patient maximum visit SD in our simulation, which we can see in the graph above.
+
+```{r check_portf_2}
+df_comp %>%
+ ggplot(aes(max_visit_mean.ori, max_visit_mean.sim)) +
+ geom_point() +
+ geom_smooth() +
+ geom_abline(slope = 1, color = "red") +
+ labs(title = "simulated vs original max visit mean site") +
+ theme(aspect.ratio = 1)
+
+df_comp %>%
+ ggplot(aes(n_pat.ori, n_pat.sim)) +
+ geom_point() +
+ geom_smooth() +
+ labs(title = "simulated vs original n_pat site") +
+ theme(aspect.ratio = 1)
+
+```
+
+
+
+# Get Under-Reporting Probability for Different Under Reporting Scenarios
+
+The performance of detecting AE under-reporting is dependent on three things:
+
+- the higher the mean AE per visit on study level the better
+- the higher the number of patients at an under-reporting site the better
+- the higher the number of under-reporting sites in a study the worse
+
+In our initial usability assessment we have fixed those parameters. Here we are going leave them as they are in the portfolio. The vanilla version of our artificial portfolio data does not contain any under-reporting sites yet. However `simaerep::sim_ur_scenarios()` will apply under-reporting scenarios to each site. Reducing the number of AEs by a given under-reporting rate (ur_rate) for all patients at the site and add the corresponding under-reporting statistics. Since the under-reporting probability is also affected by the number of other sites that are under-reporting we additionally calculate under-reporting statistics in a scenario where additional under reporting sites are present. For this we use the mean number of patients per site at the study to calculate the final number of patients for which we lower the AEs in a given under-reporting scenario.
+
+```{r sim_ur_scen, eval = TRUE}
+
+df_scen <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 5,
+ ur_rate = c(0.1, 0.25, 0.5, 0.75, 1),
+ parallel = TRUE,
+ poisson = TRUE,
+ prob_lower = TRUE,
+ progress = TRUE)
+
+df_scen
+
+readr::write_csv(df_scen, file = "scen.csv")
+```
+
+
+# Portfolio Performance
+
+We can calculate the portfolio performance as the overall true positive rate (tpr as tp/P) on the basis of desired false positive rates (fpr as fp/N).
+We calculate a threshold based on the desired fpr using the vanilla scenario with no under-reporting sites. Then we check how many sites with known under-reporting get flagged to calculate tpr.
+
+```{r portf_scen}
+
+df_scen <- readr::read_csv("scen.csv")
+
+df_perf <- get_portf_perf(df_scen)
+
+df_perf %>%
+ pivot_wider(
+ names_from = extra_ur_sites,
+ values_from = tpr,
+ names_prefix = "extra_ur_sites_"
+ ) %>%
+ knitr::kable(digits = 3)
+
+```
+
+
+# Benchmark simaerep Using Portfolio Performance
+
+## Effect of Adjusting visit_med75
+
+One of the latest update to simaerep was an improvement to the visit_med75 calculation. We can check how this has affected portfolio performance. We find that we have most likely slightly increased performance.
+
+```{r scen_old, warning = FALSE, eval = TRUE}
+
+df_scen_old_visit_med75 <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 5,
+ ur_rate = c(0.1, 0.25, 0.5, 0.75, 1),
+ parallel = TRUE,
+ poisson = TRUE,
+ prob_lower = TRUE,
+ progress = TRUE,
+ site_aggr_args = list(method = "med75")) # default is "med75_adj"
+
+readr::write_csv(df_scen_old_visit_med75, file = "scen_old.csv")
+
+```
+
+```{r scen_old_perf}
+df_scen_old_visit_med75 <- readr::read_csv("scen_old.csv")
+
+df_perf_old <- get_portf_perf(df_scen_old_visit_med75)
+
+df_perf_old %>%
+ pivot_wider(
+ names_from = extra_ur_sites,
+ values_from = tpr,
+ names_prefix = "extra_ur_sites_"
+ ) %>%
+ knitr::kable(digits = 3)
+
+```
+
+## Days vs. Visits
+
+The maximum number of days per patient can be up to several years, so > 1000 days. simaerep exposes implicitly missing entries which can lead to single patients having 1000 entries or more, one entry for each day on the study. In order to avoid to generate a huge portfolio data frame we preserve memory by wrapping `sim_test_data_portfolio()` and `sim_ur_scenarios()` into a single call and apply it per study.
+
+```{r scen_days, eval = TRUE}
+
+wr <- function(df) {
+ df_portf <- sim_test_data_portfolio(df, parallel = FALSE, progress = FALSE)
+ df_scen <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 5,
+ ur_rate = c(0.1, 0.25, 0.5, 0.75, 1),
+ parallel = FALSE,
+ poisson = TRUE,
+ prob_lower = TRUE,
+ progress = FALSE)
+ return(df_scen)
+}
+
+df_prep <- df_config %>%
+ select(- max_visit_sd, - max_visit_mean, - ae_per_visit_mean) %>%
+ rename(max_visit_sd = max_days_sd,
+ max_visit_mean = max_days_mean,
+ ae_per_visit_mean = ae_per_day_mean) %>%
+ group_by(study_id_gr = study_id) %>%
+ nest() %>%
+ ungroup()
+
+progressr::with_progress(
+ df_scen_days <- df_prep %>%
+ mutate(data = purrr_bar(
+ .data$data,
+ .purrr = furrr::future_map,
+ .f = wr,
+ .progress = TRUE,
+ .steps = nrow(.),
+ .purrr_args = list(.options = furrr_options(seed = TRUE))
+ )
+ )
+)
+
+df_scen_days <- df_scen_days %>%
+ unnest(data) %>%
+ select(- study_id_gr)
+
+readr::write_csv(df_scen_days, file = "scen_days.csv")
+
+```
+
+```{r scen_days_perf}
+df_scen_days <- readr::read_csv("scen_days.csv")
+
+df_perf_days <- get_portf_perf(df_scen_days)
+
+df_perf_days %>%
+ pivot_wider(
+ names_from = extra_ur_sites,
+ values_from = tpr,
+ names_prefix = "extra_ur_sites_"
+ ) %>%
+ knitr::kable(digits = 3)
+
+```
+
+
+
+## Plot
+
+
+```{r, plot, fig.width=12, fig.height = 10}
+df_perf %>%
+ mutate(type = "med75_adj") %>%
+ bind_rows(
+ df_perf_old %>%
+ mutate(type = "med75")
+ ) %>%
+ bind_rows(
+ df_perf_days %>%
+ mutate(type = "days")
+ ) %>%
+ ggplot(aes(x = fpr, y = tpr, color = type)) +
+ geom_line() +
+ geom_point() +
+ facet_grid(ur_rate ~ extra_ur_sites) +
+ theme(legend.position = "bottom")
+```
+
+Using days instead of visits does not provide a clear advantage while the adjusted method for determining the evaluation point visit_med75 seems to be advantageous.
+
+```{r close}
+plan(sequential)
+```
+
diff --git a/vignettes/portfolio_perf.Rmd b/vignettes/portfolio_perf.Rmd
new file mode 100644
index 0000000..b880acc
--- /dev/null
+++ b/vignettes/portfolio_perf.Rmd
@@ -0,0 +1,367 @@
+---
+title: "simaerep Portfolio Performance"
+output:
+ html_document:
+ toc: true
+ toc_depth: 3
+ toc_float: true
+ number_sections: true
+ code_folding: show
+ collapse: false
+editor_options:
+ chunk_output_type: console
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE, cache = FALSE)
+```
+
+# Load
+```{r load}
+suppressPackageStartupMessages(library(tidyverse))
+suppressPackageStartupMessages(library(knitr))
+suppressPackageStartupMessages(library(furrr))
+suppressPackageStartupMessages(library(future))
+suppressPackageStartupMessages(library(simaerep))
+
+# RAM ~26 GB
+# plan 2GB per core
+plan(multisession, workers = 13)
+```
+
+# Introduction
+
+We want to define minimal requirements for simulating test data that reflects realistic portfolio data which we then want to use to benchmark overall {simaerep} performance.
+
+# Performance
+
+These simulations take some time to run and require multiple cores and appropriate memory. Rendering articles in {pkgdown} can be a bit unstable so we recommend to render first using pure {rmarkdown} to generate the intermediate csv files.
+
+```{r perf, eval = FALSE}
+rmarkdown::render("vignettes/_portfolio_perf.Rmd", knit_root_dir = "/home/koneswab/simaerep")
+```
+
+
+# Portfolio Configuration
+
+The portfolio configuration should give a minimal description of a portfolio without violating data privacy laws or competitive intellectual property. We propose to include the following metrics into the portfolio configuration:
+
+**site parameters:**
+
+- mean of all maximum patient visits
+- sd of of all maximum patient visits
+- total number patients
+
+**study parameters:**
+
+- mean AE per visit
+
+
+The information contained in a portfolio configuration is very scarce and thus can be shared more easily within the industry. We can use those parameters to simulate test data for assessing {simaerep} performance on a given portfolio.
+
+We can start with a maximum aggregation of visit and n_ae on patient level starting with df_visit as we would use it for `simaerep::site_aggr()`. We can use `simaerep::get_config` to generate a valid portfolio configuration, which will automatically apply a few filters:
+
+- remove patients with 0 visits
+- minimum number of patients per study
+- minimum number of sites per study
+- anonymize study and site IDs
+
+```{r ex_config}
+df_visit1 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.4, ur_rate = 0.6)
+
+df_visit1$study_id <- "A"
+
+df_visit2 <- sim_test_data_study(n_pat = 100, n_sites = 10,
+ frac_site_with_ur = 0.2, ur_rate = 0.1)
+
+df_visit2$study_id <- "B"
+
+df_visit <- bind_rows(df_visit1, df_visit2)
+
+df_site_max <- df_visit %>%
+ group_by(study_id, site_number, patnum) %>%
+ summarise(max_visit = max(visit),
+ max_ae = max(n_ae),
+ .groups = "drop")
+
+df_config <- simaerep::get_config(
+ df_site_max, anonymize = TRUE,
+ min_pat_per_study = 100,
+ min_sites_per_study = 10
+)
+
+df_config
+```
+
+## Simulate Portfolio from Configuration
+
+We can now apply sim_test_data_portfolio which uses `sim_test_data_study()` to generate artificial data on visit level.
+
+```{r ex_portf}
+df_portf <- sim_test_data_portfolio(df_config)
+df_portf
+```
+
+
+## Load Realistic Configuration
+
+Here we load a realistic portfolio configuration.
+
+```{r real_config}
+df_config <- readr::read_csv("ae_profile.csv")
+df_config
+```
+
+# Simulate Portfolio
+
+And again simulate artificial visit level data. Using parallel processing.
+
+```{r sim_portf}
+
+df_portf <- sim_test_data_portfolio(df_config, parallel = TRUE, progress = TRUE)
+df_portf
+```
+
+## Confirm that Portfolio Simulation results in Similar Configuration
+
+```{r check_portf}
+df_site_max_portf <- df_portf %>%
+ group_by(study_id, site_number, patnum) %>%
+ summarise(max_visit = max(visit),
+ max_ae = max(n_ae),
+ .groups = "drop")
+
+df_config_portf <- simaerep::get_config(df_site_max_portf, anonymize = TRUE, min_pat_per_study = 100, min_sites_per_study = 10)
+
+df_comp <- df_config %>%
+ left_join(
+ df_config_portf,
+ by = c("study_id", "site_number"),
+ suffix = c(".ori", ".sim")
+ ) %>%
+ select(
+ study_id,
+ starts_with("ae"),
+ site_number,
+ contains("max_visit_sd"),
+ contains("max_visit_mean"),
+ contains("n_pat")
+ )
+
+df_comp %>%
+ select(study_id, starts_with("ae")) %>%
+ distinct() %>%
+ ggplot(aes(ae_per_visit_mean.ori, ae_per_visit_mean.sim)) +
+ geom_point() +
+ geom_smooth() +
+ labs(title = "simulated vs original AE per visit study mean") +
+ theme(aspect.ratio = 1)
+
+df_comp %>%
+ ggplot(aes(max_visit_sd.ori, max_visit_sd.sim)) +
+ geom_point() +
+ geom_smooth() +
+ geom_abline(slope = 1, color = "red") +
+ labs(title = "simulated vs original max visit sd site") +
+ theme(aspect.ratio = 1)
+```
+
+In our portfolio simulation we sample the patient maximum visit values from a normal distribution. If that returns values smaller than 1 we replace it with one. The larger the SD values compared to the mean values the more likely we will sample a patient maximum visit smaller than one. Every time we have to do that correction we are lowering the patient maximum visit SD in our simulation, which we can see in the graph above.
+
+```{r check_portf_2}
+df_comp %>%
+ ggplot(aes(max_visit_mean.ori, max_visit_mean.sim)) +
+ geom_point() +
+ geom_smooth() +
+ geom_abline(slope = 1, color = "red") +
+ labs(title = "simulated vs original max visit mean site") +
+ theme(aspect.ratio = 1)
+
+df_comp %>%
+ ggplot(aes(n_pat.ori, n_pat.sim)) +
+ geom_point() +
+ geom_smooth() +
+ labs(title = "simulated vs original n_pat site") +
+ theme(aspect.ratio = 1)
+
+```
+
+
+
+# Get Under-Reporting Probability for Different Under Reporting Scenarios
+
+The performance of detecting AE under-reporting is dependent on three things:
+
+- the higher the mean AE per visit on study level the better
+- the higher the number of patients at an under-reporting site the better
+- the higher the number of under-reporting sites in a study the worse
+
+In our initial usability assessment we have fixed those parameters. Here we are going leave them as they are in the portfolio. The vanilla version of our artificial portfolio data does not contain any under-reporting sites yet. However `simaerep::sim_ur_scenarios()` will apply under-reporting scenarios to each site. Reducing the number of AEs by a given under-reporting rate (ur_rate) for all patients at the site and add the corresponding under-reporting statistics. Since the under-reporting probability is also affected by the number of other sites that are under-reporting we additionally calculate under-reporting statistics in a scenario where additional under reporting sites are present. For this we use the mean number of patients per site at the study to calculate the final number of patients for which we lower the AEs in a given under-reporting scenario.
+
+```{r sim_ur_scen, eval = FALSE}
+
+df_scen <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 5,
+ ur_rate = c(0.1, 0.25, 0.5, 0.75, 1),
+ parallel = TRUE,
+ poisson = TRUE,
+ prob_lower = TRUE,
+ progress = TRUE)
+
+df_scen
+
+readr::write_csv(df_scen, file = "scen.csv")
+```
+
+
+# Portfolio Performance
+
+We can calculate the portfolio performance as the overall true positive rate (tpr as tp/P) on the basis of desired false positive rates (fpr as fp/N).
+We calculate a threshold based on the desired fpr using the vanilla scenario with no under-reporting sites. Then we check how many sites with known under-reporting get flagged to calculate tpr.
+
+```{r portf_scen}
+
+df_scen <- readr::read_csv("scen.csv")
+
+df_perf <- get_portf_perf(df_scen)
+
+df_perf %>%
+ pivot_wider(
+ names_from = extra_ur_sites,
+ values_from = tpr,
+ names_prefix = "extra_ur_sites_"
+ ) %>%
+ knitr::kable(digits = 3)
+
+```
+
+
+# Benchmark simaerep Using Portfolio Performance
+
+## Effect of Adjusting visit_med75
+
+One of the latest update to simaerep was an improvement to the visit_med75 calculation. We can check how this has affected portfolio performance. We find that we have most likely slightly increased performance.
+
+```{r scen_old, warning = FALSE, eval = FALSE}
+
+df_scen_old_visit_med75 <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 5,
+ ur_rate = c(0.1, 0.25, 0.5, 0.75, 1),
+ parallel = TRUE,
+ poisson = TRUE,
+ prob_lower = TRUE,
+ progress = TRUE,
+ site_aggr_args = list(method = "med75")) # default is "med75_adj"
+
+readr::write_csv(df_scen_old_visit_med75, file = "scen_old.csv")
+
+```
+
+```{r scen_old_perf}
+df_scen_old_visit_med75 <- readr::read_csv("scen_old.csv")
+
+df_perf_old <- get_portf_perf(df_scen_old_visit_med75)
+
+df_perf_old %>%
+ pivot_wider(
+ names_from = extra_ur_sites,
+ values_from = tpr,
+ names_prefix = "extra_ur_sites_"
+ ) %>%
+ knitr::kable(digits = 3)
+
+```
+
+## Days vs. Visits
+
+The maximum number of days per patient can be up to several years, so > 1000 days. simaerep exposes implicitly missing entries which can lead to single patients having 1000 entries or more, one entry for each day on the study. In order to avoid to generate a huge portfolio data frame we preserve memory by wrapping `sim_test_data_portfolio()` and `sim_ur_scenarios()` into a single call and apply it per study.
+
+```{r scen_days, eval = FALSE}
+
+wr <- function(df) {
+ df_portf <- sim_test_data_portfolio(df, parallel = FALSE, progress = FALSE)
+ df_scen <- sim_ur_scenarios(df_portf,
+ extra_ur_sites = 5,
+ ur_rate = c(0.1, 0.25, 0.5, 0.75, 1),
+ parallel = FALSE,
+ poisson = TRUE,
+ prob_lower = TRUE,
+ progress = FALSE)
+ return(df_scen)
+}
+
+df_prep <- df_config %>%
+ select(- max_visit_sd, - max_visit_mean, - ae_per_visit_mean) %>%
+ rename(max_visit_sd = max_days_sd,
+ max_visit_mean = max_days_mean,
+ ae_per_visit_mean = ae_per_day_mean) %>%
+ group_by(study_id_gr = study_id) %>%
+ nest() %>%
+ ungroup()
+
+progressr::with_progress(
+ df_scen_days <- df_prep %>%
+ mutate(data = purrr_bar(
+ .data$data,
+ .purrr = furrr::future_map,
+ .f = wr,
+ .progress = TRUE,
+ .steps = nrow(.),
+ .purrr_args = list(.options = furrr_options(seed = TRUE))
+ )
+ )
+)
+
+df_scen_days <- df_scen_days %>%
+ unnest(data) %>%
+ select(- study_id_gr)
+
+readr::write_csv(df_scen_days, file = "scen_days.csv")
+
+```
+
+```{r scen_days_perf}
+df_scen_days <- readr::read_csv("scen_days.csv")
+
+df_perf_days <- get_portf_perf(df_scen_days)
+
+df_perf_days %>%
+ pivot_wider(
+ names_from = extra_ur_sites,
+ values_from = tpr,
+ names_prefix = "extra_ur_sites_"
+ ) %>%
+ knitr::kable(digits = 3)
+
+```
+
+
+
+## Plot
+
+
+```{r, plot, fig.width=12, fig.height = 10}
+df_perf %>%
+ mutate(type = "med75_adj") %>%
+ bind_rows(
+ df_perf_old %>%
+ mutate(type = "med75")
+ ) %>%
+ bind_rows(
+ df_perf_days %>%
+ mutate(type = "days")
+ ) %>%
+ ggplot(aes(x = fpr, y = tpr, color = type)) +
+ geom_line() +
+ geom_point() +
+ facet_grid(ur_rate ~ extra_ur_sites) +
+ theme(legend.position = "bottom")
+```
+
+Using days instead of visits does not provide a clear advantage while the adjusted method for determining the evaluation point visit_med75 seems to be advantageous.
+
+```{r close}
+plan(sequential)
+```
+
diff --git a/vignettes/usability_limits.Rmd b/vignettes/usability_limits.Rmd
index 37ccce9..15f3c39 100644
--- a/vignettes/usability_limits.Rmd
+++ b/vignettes/usability_limits.Rmd
@@ -13,16 +13,16 @@ editor_options:
---
```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE, cache = FALSE)
+knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
```
# Load
```{r}
-suppressPackageStartupMessages( library(tidyverse) )
-suppressPackageStartupMessages( library(knitr) )
-suppressPackageStartupMessages( library(furrr) )
-suppressPackageStartupMessages( library(future) )
-suppressPackageStartupMessages( library(simaerep) )
+suppressPackageStartupMessages(library(tidyverse))
+suppressPackageStartupMessages(library(knitr))
+suppressPackageStartupMessages(library(furrr))
+suppressPackageStartupMessages(library(future))
+suppressPackageStartupMessages(library(simaerep))
```
# Introduction
diff --git a/vignettes/visit_med75.Rmd b/vignettes/visit_med75.Rmd
index f55a6ce..65c8bf5 100644
--- a/vignettes/visit_med75.Rmd
+++ b/vignettes/visit_med75.Rmd
@@ -18,9 +18,9 @@ knitr::opts_chunk$set(echo = TRUE, cache = FALSE)
# Load
```{r}
-suppressPackageStartupMessages( library(tidyverse) )
-suppressPackageStartupMessages( library(knitr) )
-suppressPackageStartupMessages( library(simaerep) )
+suppressPackageStartupMessages(library(tidyverse))
+suppressPackageStartupMessages(library(knitr))
+suppressPackageStartupMessages(library(simaerep))
```
# Introduction
@@ -89,6 +89,9 @@ df_visit2 <- sim_test_data_study(
df_visit1$site_number <- paste0("A", df_visit1$site_number)
df_visit2$site_number <- paste0("B", df_visit2$site_number)
+df_visit1$patnum <- paste0("A", df_visit1$patnum)
+df_visit2$patnum <- paste0("B", df_visit2$patnum)
+
df_visit <- bind_rows(df_visit1, df_visit2)