Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

In estimate_joint, fix tmin to be the 95th percentile of the SI distribution #127

Merged
merged 11 commits into from
Jul 20, 2021
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ export(WT)
export(check_cdt_samples_convergence)
export(coarse2estim)
export(compute_lambda)
export(compute_si_cutoff)
export(compute_t_min)
export(default_mcmc_controls)
export(default_priors)
export(discr_si)
Expand All @@ -16,6 +18,7 @@ export(draw_epsilon)
export(estimate_R)
export(estimate_R_plots)
export(estimate_joint)
export(first_nonzero_incid)
export(get_shape_R_flat)
export(get_shape_epsilon)
export(init_mcmc_params)
Expand Down
151 changes: 116 additions & 35 deletions R/gibbs_draws.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
#' @param t_max an integer >`t_min` and <=`nrow(incid)` giving the maximum time
#' step to consider in the estimation. Default value is `nrow(incid)`.
#'
#' @return a vector of the shape of the posterior distribution of R for
#' each time step t and each location l
#' @return a vector of the shape of the posterior distribution of R for
#' each time step t and each location l
#' (stored in element (l-1)*(t_max - t_min + 1) + t of the vector)
#'
#' @export
Expand All @@ -35,7 +35,7 @@
#'
get_shape_R_flat <- function(incid, priors, t_min = 2L, t_max = nrow(incid)) {
t <- seq(t_min, t_max, 1)
shape <- apply(incid[t, , , drop = FALSE], c(1, 2), sum) + priors$R$shape
shape <- apply(incid[t, , , drop = FALSE], c(1, 2), sum) + priors$R$shape
as.numeric(shape)
}

Expand Down Expand Up @@ -67,7 +67,7 @@ get_shape_R_flat <- function(incid, priors, t_min = 2L, t_max = nrow(incid)) {
#' @param t_max an integer >`t_min` and <=`nrow(incid)` giving the maximum time
#' step to consider in the estimation. Default value is `nrow(incid)`.
#'
#' @return a value or vector of values of the shape of the posterior
#' @return a value or vector of values of the shape of the posterior
#' distribution of epsilon for each of the non reference variants
#'
#' @export
Expand Down Expand Up @@ -236,8 +236,8 @@ compute_lambda <- function(incid, si_distr) {
#' distribution) for epsilon and R; can be obtained from the function
#' `default_priors`. The prior for R is assumed to be the same for all
#' time steps and all locations
#'
#' @param shape_epsilon a value or vector of values of the shape of the posterior
#'
#' @param shape_epsilon a value or vector of values of the shape of the posterior
#' distribution of epsilon for each of the non reference variants, as returned
#' by function `get_shape_epsilon`
#'
Expand Down Expand Up @@ -329,10 +329,10 @@ draw_epsilon <- function(R, incid, lambda, priors,
#' distribution) for epsilon and R; can be obtained from the function
#' `default_priors`. The prior for R is assumed to be the same for all
#' time steps and all locations
#'
#' @param shape_R_flat a vector of the shape of the posterior distribution of R
#' for each time step t and each location l
#' (stored in element (l-1)*(t_max - t_min + 1) + t of the vector),
#'
#' @param shape_R_flat a vector of the shape of the posterior distribution of R
#' for each time step t and each location l
#' (stored in element (l-1)*(t_max - t_min + 1) + t of the vector),
#' as obtained from function `get_shape_R_flat`
#'
#' @param t_min an integer >1 giving the minimum time step to consider in the
Expand Down Expand Up @@ -367,11 +367,11 @@ draw_epsilon <- function(R, incid, lambda, priors,
#' lambda <- compute_lambda(incid, si_distr)
#' # Epsilon = 1 i.e. no transmission advantage
#' epsilon <- 1
#' draw_R(epsilon, incid$local, lambda, priors, seed = 1)
#' draw_R(epsilon, incid$local, lambda, priors, seed = 1, t_min = 2L)
#'
draw_R <- function(epsilon, incid, lambda, priors,
shape_R_flat = NULL,
t_min = 2L, t_max = nrow(incid),
t_min = NULL, t_max = nrow(incid),
seed = NULL) {
if (!is.integer(t_min) | !is.integer(t_max)){
stop("t_min and t_max must be integers")
Expand Down Expand Up @@ -409,7 +409,78 @@ draw_R <- function(epsilon, incid, lambda, priors,
R[t, ] <- R_fill
R
}
##' Index before which at most a given probability
##' mass is captured
##'
##' Across a matrix of discretised probability distributions
##' (see \code{estimate_joint}
##' this function returns the largest index
##' (across all columns) such that the
##' cumulative probability mass before index is
##' 1 - \code{miss_at_most}.
##'
##'
##' @inheritParams estimate_joint
##' @param miss_at_most numeric. probability mass in the tail of the SI distribution
##' @return integer
##' @author Sangeeta Bhatia
##' @export
compute_si_cutoff <- function(si_distr, miss_at_most = 0.05) {
if (any(colSums(si_distr) != 1)) {
warning("Input SI distributions should sum to 1. Normalising now")
si_distr <- si_distr / colSums(si_distr)
}
cutoff <- 1 - miss_at_most
cdf <- apply(si_distr, 2, cumsum)
idx <- apply(
cdf, 2,
function(col) Position(function(x) x > cutoff, col)
)
as.integer(max(idx))
}

##' Get the first day of non-zero incidence across
##' all variants and locations.
##' @details For each variant, find the first day of
##' non-zero incidence. The maximum of these
##' is the smallest possible point at which
##' estimation can begin.
##' @inheritParams estimate_joint
##' @return integer
##' @author Sangeeta Bhatia
##' @export
first_nonzero_incid <- function(incid) {
t_min_incid <- apply(
incid, c(2, 3),
function(vec) Position(function(x) x > 0, vec)
)
if (any(is.na(t_min_incid))) {
warning(
"For some variants/locations, incidence is
always zero. This will cause estimate_joint to fail."
)
}
max(t_min_incid)
}
##' Compute the smallest index at which joint estimation
##' should start
##'
##' Unless specified by the user, t_min in \code{estimate_joint}
##' is computed as the sum of two indices:
##' (i) the first day of non-zero incidence across all locations,
##' computed using \code{first_nonzero_incid}
##' and (ii) the 95th percentile of the probability mass function of the
##' SI distribution across all variants computed using \code{compute_si_cutoff}
##' @inheritParams compute_si_cutoff
##' @inheritParams estimate_joint
##' @return integer
##' @author Sangeeta Bhatia
##' @export
compute_t_min <- function(incid, si_distr, miss_at_most) {
t_min_si <- compute_si_cutoff(si_distr, 0.05)
t_min_incid <- first_nonzero_incid(incid)
as.integer(t_min_incid + t_min_si)
}

#' Jointly estimate the instantaneous reproduction number for a reference
#' pathogen/strain/variant and the relative transmissibility of a
Expand All @@ -433,9 +504,12 @@ draw_R <- function(epsilon, incid, lambda, priors,
#' @param mcmc_control a list of default MCMC control parameters, as obtained
#' for example from function `default_mcmc_controls`
#'
#' @param t_min an integer >1 giving the minimum time step to consider in the
#' estimation. Default value is 2 (as the estimation is conditional on
#' observations at time step 1 and can therefore only start at time step 2).
#' @param t_min an integer > 1 giving the minimum time step to consider in the
#' estimation.
#' The NULL, t_min is calculated using the function \code{compute_si_cutoff}
#' which gets the maximum (across all variants) of the 95th percentile of the
#' SI distribution.
#'
#'
#' @param t_max an integer >`t_min` and <=`nrow(incid)` giving the maximum time
#' step to consider in the estimation. Default value is `nrow(incid)`.
Expand All @@ -450,8 +524,8 @@ draw_R <- function(epsilon, incid, lambda, priors,
#' NULL this means there are no
#' known imported cases and all cases other than on those from the first
#' time step will be considered locally infected.
#'
#' @param precompute a boolean (defaulting to TRUE) deciding whether to
#'
#' @param precompute a boolean (defaulting to TRUE) deciding whether to
#' precompute quantities or not. Using TRUE will make the algorithm faster
#'
#' @return a list with two elements.
Expand Down Expand Up @@ -504,12 +578,15 @@ draw_R <- function(epsilon, incid, lambda, priors,
#'
estimate_joint <- function(incid, si_distr, priors,
mcmc_control = default_mcmc_controls(),
t_min = 2L, t_max = nrow(incid),
t_min = NULL, t_max = nrow(incid),
seed = NULL,
incid_imported = NULL,
precompute = TRUE
) {
if (!is.integer(t_min) | !is.integer(t_max)){
precompute = TRUE) {

if (is.null(t_min)) {
t_min <- compute_t_min(incid, si_distr)
}
if (!is.integer(t_min) | !is.integer(t_max)) {
stop("t_min and t_max must be integers")
}
if (t_min < 2 | t_max < 2){
Expand Down Expand Up @@ -543,18 +620,22 @@ estimate_joint <- function(incid, si_distr, priors,
stop("seed must be numeric")
}
if (!is.null(seed)) set.seed(seed)

if (t_min > t_max) {
stop("t_min is greater than t_max. You can specify a smaller t_min or increase t_max.")
}
t <- seq(t_min, t_max, 1)

T <- nrow(incid)
n_loc <- ncol(incid)

incid <- process_I_multivariant(incid, incid_imported)

lambda <- compute_lambda(incid, si_distr)

## find clever initial values, based on ratio of reproduction numbers
## over the whole time period, across all locations together

R_init <- sapply(seq_len(dim(incid$local)[3]), function(i) {
tmp_df <- data.frame(local = apply(incid$local[, , i, drop = FALSE],
c(1, 3), sum)[,1],
Expand All @@ -568,27 +649,27 @@ estimate_joint <- function(incid, si_distr, priors,
t_start = t_min,
t_end = t_max)))$R$'Mean(R)'
})

max_transmiss <- which.max(R_init)
# reorder variants so most transmissible is first
incid_reordered <- array(NA, dim = dim(incid$local))
incid_reordered[,,1] <- incid$local[,,max_transmiss]
incid_reordered[,,-1] <- incid$local[,, -max_transmiss]

incid$local <- incid_reordered
## Re-order R_init so that they are in the same
## order as the incidence
R_init_reord <- R_init
R_init_reord[1] <- R_init[max_transmiss]
R_init_reord[-1] <- R_init[-max_transmiss]
R_init <- R_init_reord

## Re-order lambda
lambda_reordered <- lambda
lambda_reordered[, , 1] <- lambda[, , max_transmiss]
lambda_reordered[, , -1] <- lambda[, , -max_transmiss]
lambda <- lambda_reordered

## Precalculate quantities of interest
if (precompute) {
shape_R_flat <- get_shape_R_flat(incid$local, priors, t_min, t_max)
Expand All @@ -597,7 +678,7 @@ estimate_joint <- function(incid, si_distr, priors,
shape_R_flat <- NULL
shape_epsilon <- NULL
}

epsilon_init <- unlist(lapply(seq(2, length(R_init)), function(i)
median(R_init[[i]] / R_init[[1]], na.rm = TRUE)))
epsilon_out <- matrix(NA, nrow = length(epsilon_init),
Expand All @@ -607,21 +688,21 @@ estimate_joint <- function(incid, si_distr, priors,
epsilon = epsilon_init, incid = incid$local, lambda = lambda,
priors = priors, shape_R_flat = shape_R_flat, t_min = t_min, t_max = t_max
)

R_out <- array(NA, dim= c(T, n_loc, mcmc_control$n_iter + 1))
R_out[, , 1] <- R_init
for (i in seq_len(mcmc_control$n_iter)) {
R_out[, , i + 1] <- draw_R(epsilon_out[, i], incid$local, lambda, priors,
shape_R_flat = shape_R_flat,
shape_R_flat = shape_R_flat,
t_min = t_min, t_max = t_max)
epsilon_out[, i + 1] <- draw_epsilon(
abind::adrop(R_out[, , i + 1, drop = FALSE], drop = 3),
incid$local, lambda, priors,
shape_epsilon = shape_epsilon,
t_min = t_min, t_max = t_max)

}

# remove burnin and thin
keep <- seq(mcmc_control$burnin, mcmc_control$n_iter, mcmc_control$thin)
epsilon_out <- epsilon_out[, keep, drop = FALSE]
Expand Down
32 changes: 32 additions & 0 deletions man/compute_si_cutoff.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 36 additions & 0 deletions man/compute_t_min.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading