Skip to content

Commit

Permalink
Merge pull request #40 from sempwn/20-create-validation-of-model-from…
Browse files Browse the repository at this point in the history
…-synthetic-data

20 create validation of model from synthetic data
  • Loading branch information
sempwn authored Apr 19, 2024
2 parents 60a3f8b + 3ea9f28 commit 8b7fd8a
Show file tree
Hide file tree
Showing 13 changed files with 467 additions and 2 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
^cran-comments\.md$
^CRAN-RELEASE$
^CRAN-SUBMISSION$
^data-raw$
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ Suggests:
bayesplot,
covr,
knitr,
latex2exp,
posterior,
progress,
rmarkdown,
stringr,
testthat (>= 3.0.0)
Expand Down
33 changes: 33 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#' Experimental validation results
#'
#' Generated data from validation experiments of simulated data
#'
#' @format ## `experimental_validation_data`
#' A data frame with 200 rows and 8 columns:
#' \describe{
#' \item{.variable}{Model variable}
#' \item{p50}{Median of the posterior}
#' \item{p25, p75}{2nd and 3rd quartiles of the posterior}
#' \item{p05, p95}{1st and 19th ventiles of the posterior}
#' \item{true_value}{The value used to generate the simulation}
#' \item{experiment}{Experiment number index}
#' }
#' @backref data-raw/create_validation_data.R
#' @family validation data
"experimental_validation_data"

#' Missing data experimental validation results
#'
#' Generated data from validation experiments of simulated data
#'
#' @format ## `missing_data_validation`
#' A data frame with 10 rows and 6 columns:
#' \describe{
#' \item{p50}{Median of the posterior}
#' \item{p25, p75}{2nd and 3rd quartiles of the posterior}
#' \item{p05, p95}{1st and 19th ventiles of the posterior}
#' \item{reporting_freq}{The reporting frequency in months}
#' }
#' @backref data-raw/create_missing_data_validation.R
#' @family validation data
"missing_data_validation"
3 changes: 2 additions & 1 deletion R/utils-plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ plot_kit_use <- function(...,
reported = reported)

region <- model <- times <- data_var_name <- sim_reported_used <- p_use <- NULL
region_name <- NULL
region_name <- .data <- NULL
p50 <- p05 <- p95 <- p25 <- p75 <- NULL

# data variables that can be plotted if they exist
Expand Down Expand Up @@ -107,6 +107,7 @@ plot_kit_use <- function(...,
#' @noRd
combine_model_fits <- function(..., data = NULL, reported = FALSE) {
sim_p <- i <- sim_reported_used <- NULL
Reported_Used <- Reported_Distributed <- NULL

fit_list <- list(...)

Expand Down
4 changes: 4 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,9 @@ reference:
contents:
- starts_with("plot_")
- kit_summary_table
- title: Validation Data
contents:
- has_concept("validation data")
- has_keyword("datasets")


83 changes: 83 additions & 0 deletions data-raw/create_missing_data_validation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
library(bennu)
library(rstan)
library(tidybayes)
library(dplyr)
library(magrittr)
library(ggplot2)
library(tidyr)
library(progress)
library(here)

set.seed(43)

rstan::rstan_options(auto_write = TRUE)
options(mc.cores = 4)


experiments <- expand_grid(reporting_freq = seq(1, 10, by = 1))

missing_data_validation <- tibble()

pb <- progress_bar$new(
format = " running [:bar] :percent eta: :eta",
total = nrow(experiments), clear = FALSE, width = 60
)

for (ind in 1:nrow(experiments)) {
reporting_freq <- as.numeric(experiments[ind, "reporting_freq"])

# generate complete data
d_complete <- bennu::model_random_walk_data(
zeta = 0.25, sigma = 0.25,
region_coeffs = c(5, 0.5, 1, 2),
c_region = c(-1, 2, 0, 1), N_t = 48
)

# generate partial data based on reporting frequency
d_reported <- d_complete %>%
mutate(
nonreporting_times = times %% reporting_freq != 1,
Reported_Distributed = if_else(
times %% reporting_freq != 1,
NA_real_, Reported_Distributed
),
Reported_Used = if_else(
times %% reporting_freq != 1,
NA_real_, Reported_Used
)
)

if (reporting_freq == 1) {
d_reported <- d_complete
}

fit <- est_naloxone(d_reported)

row_used <- fit %>%
tidybayes::spread_draws(sim_used[i]) %>%
dplyr::left_join(
dplyr::mutate(d_complete, i = dplyr::row_number()),
by = "i"
) %>%
group_by(.chain, .iteration, .draw) %>%
dplyr::summarise(
Actual_Used = sum(Orders * p_use),
Sim_Used = sum(sim_used)
) %>%
ungroup() %>%
dplyr::mutate(p_diff = (Actual_Used - Sim_Used) / Actual_Used) %>%
dplyr::summarise(
p50 = stats::quantile(p_diff, 0.5),
p25 = stats::quantile(p_diff, 0.25),
p75 = stats::quantile(p_diff, 0.75),
p05 = stats::quantile(p_diff, 0.05),
p95 = stats::quantile(p_diff, 0.95)
) %>%
mutate(reporting_freq = reporting_freq)

missing_data_validation <- missing_data_validation %>% bind_rows(row_used)
pb$tick()
}

usethis::use_data(missing_data_validation)

73 changes: 73 additions & 0 deletions data-raw/create_validation_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
##########################################
## ###
## ###
## Create a series of simulations to ###
## Assess ability of model to ###
## recover main parameters ###
## ###
## ###
##########################################


library(bennu)
library(rstan)
library(tidybayes)
library(dplyr)
library(magrittr)
library(ggplot2)
library(tidyr)
library(progress)
library(here)

rstan::rstan_options(auto_write = TRUE)
options(mc.cores = 4)


plot_data <- function(d){
d %>%
ggplot(aes(x=times,y=p_use,color=as.factor(regions))) +
geom_line() +
geom_point()
}

experiments <- expand_grid(sigma = seq(0.05,0.5,by=0.05),
zeta = seq(0.05, 0.5, by=0.05))

experimental_validation_data <- tibble()

pb <- progress_bar$new(
format = " running [:bar] :percent eta: :eta",
total = nrow(experiments), clear = FALSE, width= 60)

for(i in 1:nrow(experiments)){
sigma <- as.numeric(experiments[i,"sigma"])
zeta <- as.numeric(experiments[i,"zeta"])

d <- bennu::model_random_walk_data(zeta=zeta,sigma=sigma,
region_coeffs = c(5, 0.5, 1, 2),
c_region = c(-1, 2, 0, 1), N_t = 48)
fit <- est_naloxone(d)

row_results <- fit %>%
tidybayes::gather_draws(sigma,zeta) %>%
group_by(.variable) %>%
dplyr::summarise(
p50 = stats::quantile(.value, 0.5),
p25 = stats::quantile(.value, 0.25),
p75 = stats::quantile(.value, 0.75),
p05 = stats::quantile(.value, 0.05),
p95 = stats::quantile(.value, 0.95)
) %>%
left_join(
tibble(.variable = c("sigma", "zeta"),
true_value = c(sigma,zeta))
) %>%
mutate(experiment = i)

experimental_validation_data <- experimental_validation_data %>%
bind_rows(row_results)
pb$tick()
}

usethis::use_data(experimental_validation_data)

Binary file added data/experimental_validation_data.rda
Binary file not shown.
Binary file added data/missing_data_validation.rda
Binary file not shown.
9 changes: 8 additions & 1 deletion man/est_naloxone.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/experimental_validation_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions man/missing_data_validation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 8b7fd8a

Please sign in to comment.