Skip to content

Commit

Permalink
BEAST and sampling-intensity helper functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
mdkarcher committed Jul 25, 2018
1 parent 4969512 commit 85c5304
Show file tree
Hide file tree
Showing 13 changed files with 815 additions and 34 deletions.
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(BNPR)
export(BNPR_PS)
export(BNPR_samp_only)
export(boombust_traj)
export(bottleneck_traj)
export(coalsim)
Expand All @@ -12,10 +13,14 @@ export(find_info2)
export(generate_newick)
export(hist2heat)
export(inla.models)
export(log_samp_mat)
export(logistic_traj)
export(make_alignment)
export(make_taxa)
export(mcmc_sampling)
export(mesa_traj)
export(ml_exp_coal)
export(plot_BEAST)
export(plot_BNPR)
export(plot_mrw)
export(plot_res)
Expand All @@ -24,6 +29,7 @@ export(posterior_coal_check)
export(posterior_samp_check)
export(pref_sample)
export(read_times)
export(sample_tree)
export(sloped_traj)
export(smcp_sampling)
export(steep_cyc_traj)
Expand Down
174 changes: 149 additions & 25 deletions R/calculate.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ gen_summary = function(coal_times, samp_times, n_sampled)
#' @param lengthout numeric specifying number of grid points.
#' @param pref logical. Should the preferential sampling model be used?
#' @param prec_alpha,prec_beta numerics specifying gamma prior for precision
#' \eqn{\tau}.
#' \eqn{\kappa}.
#' @param beta1_prec numeric specifying precision for normal prior on
#' \eqn{\beta_1}.
#' @param fns list containing functions of covariates.
Expand Down Expand Up @@ -264,29 +264,6 @@ samp_stats <- function(grid, samp_times, n_sampled = NULL, trim_end = FALSE)
return(result)
}

infer_samp <- function(samp_times, n_sampled = NULL, lengthout = 100,
prec_alpha = 0.01, prec_beta = 0.01)
{
if (!requireNamespace("INLA", quietly = TRUE)) {
stop('INLA needed for this function to work. Use install.packages("INLA", repos="https://www.math.ntnu.no/inla/R/stable").',
call. = FALSE)
}

grid <- seq(min(samp_times),max(samp_times),length.out=lengthout+1)

samp_data <- samp_stats(grid = grid, samp_times = samp_times,
n_sampled = n_sampled)

data <- with(samp_data, data.frame(y = count, time = time, E_log = E_log))
hyper <- list(prec = list(param = c(prec_alpha, prec_beta)))
formula_sampling <- y ~ 1 + f(time, model="rw1", hyper = hyper, constr=FALSE)

mod <- INLA::inla(formula_sampling, family="poisson", data=data,
offset=data$E_log, control.predictor=list(compute=TRUE))

return(list(result = mod, data = data, grid = grid, x = samp_data$time))
}

joint_stats <- function(coal_data, samp_data)
{
n1 <- length(coal_data$time)
Expand Down Expand Up @@ -532,4 +509,151 @@ infer_coal_samp_exper <- function(samp_times, coal_times, n_sampled=NULL, fns =
control.inla = list(lincomb.derived.only=FALSE))

return(list(result = mod, data = data, grid = grid, x = coal_data$time))
}
}

infer_samp <- function(samp_times, n_sampled = NULL, lengthout = 100, grid = NULL,
prec_alpha = 0.01, prec_beta = 0.01)
{
if (!requireNamespace("INLA", quietly = TRUE)) {
stop('INLA needed for this function to work. Use install.packages("INLA", repos="https://www.math.ntnu.no/inla/R/stable").',
call. = FALSE)
}

if (is.null(grid)) {
grid <- seq(min(samp_times),max(samp_times),length.out=lengthout+1)
}

samp_data <- samp_stats(grid = grid, samp_times = samp_times,
n_sampled = n_sampled)

data <- with(samp_data, data.frame(y = count, time = time, E_log = E_log))
hyper <- list(prec = list(param = c(prec_alpha, prec_beta)))
formula_sampling <- y ~ 1 + f(time, model="rw1", hyper = hyper, constr=FALSE)

mod <- INLA::inla(formula_sampling, family="poisson", data=data,
offset=data$E_log, control.predictor=list(compute=TRUE))

return(list(result = mod, data = data, grid = grid, x = samp_data$time))
}

#' Nonparametric estimate of sampling intensity
#'
#' @param data \code{phylo} object or list containing vectors of sampling times
#' \code{samp_times} and number sampled per sampling time \code{n_sampled}.
#' @param lengthout numeric specifying number of grid points.
#' @param grid numeric vector of endpoints of the latent field.
#' @param prec_alpha,prec_beta numerics specifying gamma prior for precision
#' \eqn{\kappa}.
#'
#' @return
#' @export
#'
#' @examples
BNPR_samp_only <- function(data, lengthout = 100, grid = NULL, tmrca = NULL,
prec_alpha = 0.01, prec_beta = 0.01)
{
if (class(data) == "phylo")
{
phy <- summarize_phylo(data)
}
else if (all(c("coal_times", "samp_times", "n_sampled") %in% names(data)))
{
phy <- with(data, list(samp_times = samp_times, coal_times = coal_times,
n_sampled = n_sampled))
}

if (is.null(grid)) {
if (is.null(tmrca)) {
grid <- seq(min(phy$samp_times), max(phy$samp_times), length.out=lengthout+1)
} else {
grid <- seq(min(phy$samp_times), tmrca, length.out=lengthout+1)
}
}
midpts <- grid[-1] - diff(grid)/2

foo <- infer_samp(samp_times = phy$samp_times, n_sampled = phy$n_sampled,
lengthout = lengthout, grid = grid,
prec_alpha = prec_alpha, prec_beta = prec_beta)

quants <- foo$result$summary.random$time
beta0 <- foo$result$summary.fixed$`0.5quant`

result <- list(q025Samp = exp(quants$`0.025quant` + beta0),
medianSamp = exp(quants$`0.5quant` + beta0),
q975Samp = exp(quants$`0.975quant` + beta0),
beta0 = beta0, grid = grid, midpts = midpts,
samp_times = gene$samp_times, n_sampled = gene$n_sampled, internals = foo)

return(result)
}

log_samp_int = function(logpop, betas,
covariates = NULL, cov_betas = NULL,
pow_covariates = NULL, pow_cov_betas = NULL)
{
result = betas[1] + logpop * betas[2]

if (!is.null(covariates))
{
for (i in 1:length(covariates))
{
result = result + covariates[[i]] * cov_betas[i]
}
}

if (!is.null(pow_covariates))
{
for (i in 1:length(pow_covariates))
{
result = result + pow_covariates[[i]] * logpop * pow_cov_betas[i]
}
}

return(result)
}

#' Calculate posterior sampling intensities
#'
#' @param popTbl table of log-effective population sizes.
#' @param betaTbl table of log-linear sampling model coefficients.
#' @param covariates list of covariates
#' @param powBetaTbl table of log-linear sampling model coefficients for terms
#' crossed with the log-effective population size.
#' @param pow_covariates list of covariates to be crossed with the log-effective
#' population size.
#'
#' @return a matrix of posterior sampling intensities.
#' @export
#'
#' @examples 2 + 2
log_samp_mat <- function(popTbl, betaTbl, covariates = NULL,
powBetaTbl = NULL, pow_covariates = NULL)
{
n <- nrow(popTbl)
m <- ncol(popTbl)
#m <- length(popTbl[1,])
#grid <- epoch_width * 0:m

samp_mat<- matrix(0, nrow = n, ncol = m)
for (i in 1:n) {
logpop <- as.numeric(popTbl[i, ])
betas <- as.numeric(betaTbl[i, ])

cov_betas <- NULL
if (length(betas) > 2) {
cov_betas <- tail(betas, -2)
betas <- betas[1:2]
}

pow_cov_betas <- NULL
if (ncol(powBetaTbl) > 0) {
pow_cov_betas <- as.numeric(powBetaTbl[i, ])
}

samp_mat[i, ] <- log_samp_int(logpop = logpop, betas = betas,
covariates = covariates, cov_betas = cov_betas,
pow_covariates = pow_covariates, pow_cov_betas = pow_cov_betas)
}

return(samp_mat)
}
68 changes: 68 additions & 0 deletions R/coalsieve.R
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,71 @@ coalsim_thin <- function(samp_times, n_sampled, traj, lower_bound, ...)
intercoal_times = c(coal_times[1], diff(coal_times)),
samp_times = samp_times, n_sampled = n_sampled))
}

#' Sample a phylo tree
#'
#' @param gene a list containing
#'
#' @return a phylo tree consisent with `gene`.
#' @export
#'
#' @examples
#' gene = coalsim(samp_times = c(0,1,2), n_sampled = c(2,1,1), traj = unif_traj)
#' tree = sample_tree(gene)
#' plot(tree)
sample_tree <- function(gene)
{
n <- sum(gene$n_sampled)
labels <- paste0(rep("t",n), seq(1,n,1))
Nnode <- n - 1

tb <- gene$n_sampled[1] #Total branches (initial)
s <- 0 #time for branch lengths
temp_labels <- labels[1:tb]
temp_times <- rep(gene$samp_times[1], gene$n_sampled[1])
initial.row <- 2
args2 <- gen_INLA_args(gene$samp_times, gene$n_sampled, gene$coal_times)

for (j in 2:length(args2$event))
{
if (args2$event[j] == 1)
{
s <- args2$s[j];
ra <- sort(sample.int(tb, 2))
new_label <- paste0("(",temp_labels[ra[1]],":",s-temp_times[ra[1]],",",
temp_labels[ra[2]],":",s-temp_times[ra[2]],")")
temp_labels[ra[1]] <- new_label
temp_labels <- temp_labels[-ra[2]]
temp_times[ra[1]] <- s
temp_times <- temp_times[-ra[2]]
tb <- tb - 1
}
else
{ #I will be adding samples at
s <- args2$s[j];
if (gene$n_sample[initial.row] == 1)
{
temp_labels <- c(temp_labels, labels[cumsum(gene$n_sampled)[initial.row]])
initial.row <- initial.row + 1
tb <- tb + 1
temp_times <- c(temp_times, s)
}
else
{
end <- cumsum(gene$n_sampled)[initial.row]
ini <- cumsum(gene$n_sampled)[initial.row - 1] + 1
for (k in ini:end)
{
temp_labels <- c(temp_labels, labels[k])
tb <- tb + 1
temp_times <- c(temp_times, s)
}
initial.row <- initial.row + 1
}
}
}

out.tree <- ape::read.tree(text=paste0(temp_labels, ";"))
return(out.tree)
}

Loading

0 comments on commit 85c5304

Please sign in to comment.