Permalink
Browse files

Change default initialization of size factors as per reviewer suggestion

  • Loading branch information...
kieranrcampbell committed Dec 17, 2018
1 parent b5424e9 commit b0888595acc01c13153009809565ac8501f5f5b5
Showing with 73 additions and 16 deletions.
  1. +13 −4 R/clonealign.R
  2. +24 −6 R/inference-tflow.R
  3. +11 −3 man/clonealign.Rd
  4. +3 −3 man/inference_tflow.Rd
  5. +22 −0 tests/testthat/test_clonealign.R
@@ -17,8 +17,6 @@
#' @param learning_rate The learning rate to be passed to the Adam optimizer
#' @param fix_alpha Should the underlying priors for clone frequencies be fixed? Default TRUE
#' (values are inferred from the data)
#' @param fix_s Should the size factors be fixed? If \code{NULL} they are jointly inferred from the data, otherwise
#' a vector corresponding to the number of cells should be specified.
#' @param dtype The dtype for tensorflow useage, either "float32" or "float64"
#' @param saturate Should the CNV-expression relationship saturate above copy number = \code{saturation_threshold}? Default TRUE
#' @param saturation_threshold If \code{saturate} is true, copy numbers above this will be reduced to the threshold
@@ -28,9 +26,11 @@
#' @param K The dimensionality of the expression latent space. If left \code{NULL}, K is set to 1 if fewer than 100 genes
#' are present and 6 otherwise.
#' @param B Number of basis functions for spline fitting
#' @param size_factors Either "fixed", "infer", or a numeric vector of size factors. See \code{details}.
#'
#'
#' @details
#'
#' \strong{Input format}
#'
#' \code{gene_expression_data} must either be a \code{SingleCellExperiment} or \code{SummarizedExperiment}
@@ -42,6 +42,15 @@
#' If \code{colnames(copy_number_data)} is not \code{NULL} then these names will be used for each of
#' the clones in the final output.
#'
#'
#' \strong{Size factors}
#'
#' If \code{size_factors == "fixed"}, the size factors will be set to the overall library size per cell
#' (total number of reads mapping to the cell).
#' If \code{size_factors == "infer"}, the size factors will be treated as a model paramter and jointly
#' optimized during inference.
#' Otherwise, \code{size_factors} can be a numeric vector of precomputed, custom size factors.
#'
#' \strong{Recommended parameter settings}
#'
#' As with any probabilistic model there are many parameters to set. Through comprehensive simulations regarding
@@ -96,7 +105,7 @@ clonealign <- function(gene_expression_data,
cov = NULL,
ref = NULL,
fix_alpha = FALSE,
fix_s = NULL,
size_factors = "fixed",
dtype = "float64",
saturate = TRUE,
saturation_threshold = 6,
@@ -162,7 +171,7 @@ clonealign <- function(gene_expression_data,
cov = cov,
ref = cov,
fix_alpha = fix_alpha,
fix_s = fix_s,
size_factors = "fixed",
dtype = dtype,
saturate = saturate,
saturation_threshold = saturation_threshold,
@@ -41,7 +41,7 @@ inference_tflow <- function(Y_dat,
cov = NULL,
ref = NULL,
fix_alpha = FALSE,
fix_s = NULL,
size_factors = "fixed",
dtype = c("float32", "float64"),
saturate = TRUE,
saturation_threshold = 6,
@@ -167,7 +167,7 @@ inference_tflow <- function(Y_dat,
pcs <- pca$x[,seq_len(K),drop=FALSE]
pcs <- scale(pcs)

s_init = rowSums(data$Y)
s_init <- rowSums(data$Y)

mu_guess <- colMeans(data$Y / rowMeans(data$Y)) / rowMeans(data$L)
mu_guess <- mu_guess[-1] / mu_guess[1]
@@ -192,12 +192,30 @@ inference_tflow <- function(Y_dat,
}

s <- NULL
if(!is.null(fix_s)) {
s <- tf$constant(s_init, dtype = dtype)
} else {
s <- tf$exp(tf$Variable(tf$constant(log(s_init), dtype = dtype))) + LOWER_BOUND

# Sort out size factors
if(length(size_factors == 1) && N > 1) {
if(!is.character(size_factors)) {
stop("If size factors does not contain sizes per cell it must be either 'fixed' or 'infer'. See ?clonealign")
}
if(!(size_factors %in% c("fixed", "infer"))) {
stop("Size factors must be either 'fixed' or 'infer'. See ?clonealign")
}

if(size_factors == "fixed") {
s <- tf$constant(s_init, dtype = dtype)
} else {
s <- tf$exp(tf$Variable(tf$constant(log(s_init), dtype = dtype))) + LOWER_BOUND
}

} else if(length(size_factors) > 1) {
if(length(size_factors) != N || !is.numeric(size_factors) || all(size_factors > 0)) {
stop("If size factors is not specified as 'fixed' or 'infer' a positive numeric vector of length N_cells must be supplied. See ?clonealign")
}
s <- tf$constant(size_factors, dtype = dtype)
}


log_alpha <- NULL

if(!fix_alpha) {

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.
@@ -31,3 +31,25 @@ test_that("clonealign(...) returns a valid object", {
expect_true(all(c("mu", "s", "a", "b") %in% names(cal$ml_params)))

})



test_that("clonealign(...) works with non-default size factors", {
data(example_sce)
library(SummarizedExperiment)
N <- ncol(example_sce)

L <- rowData(example_sce)[, c("A", "B", "C")]

cal <- clonealign(example_sce, L, max_iter = 5, size_factors = "infer")

expect_is(cal, "clonealign_fit")

sf <- colSums(counts(example_sce))

cal2 <- clonealign(example_sce, L, max_iter = 5, size_factors = sf)
expect_is(cal2, "clonealign_fit")


})

0 comments on commit b088859

Please sign in to comment.