Skip to content

Commit

Permalink
adding filterBarcodes argument
Browse files Browse the repository at this point in the history
  • Loading branch information
mikelove committed Mar 21, 2020
1 parent 918e03a commit 45dfea9
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 26 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: tximport
Version: 1.15.9
Version: 1.15.10
Title: Import and summarize transcript-level estimates for transcript- and
gene-level analysis
Description: Imports transcript-level abundance, estimated counts and
Expand All @@ -23,6 +23,6 @@ URL: https://github.com/mikelove/tximport
biocViews: DataImport, Preprocessing,
RNASeq, Transcriptomics, Transcription, GeneExpression,
ImmunoOncology
RoxygenNote: 6.1.1
RoxygenNote: 7.1.0
NeedsCompilation: no
Encoding: UTF-8
6 changes: 6 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
CHANGES IN VERSION 1.15.10
--------------------------

o Add argument `filterBarcodes`, which will only import cells
with barcodes in the `whitelist.txt` file.

CHANGES IN VERSION 1.15.9
-------------------------

Expand Down
41 changes: 37 additions & 4 deletions R/alevin.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
readAlevinPreV014 <- function(files) {
readAlevinPreV014 <- function(files, filterBarcodes) {
message("using importer for pre-v0.14.0 Alevin files")
message("pre-v0.14.0 Alevin import will be deprecated in October 2020")
warning("pre-v0.14.0 Alevin import will be deprecated in October 2020")
dir <- sub("/alevin$","",dirname(files))
barcode.file <- file.path(dir, "alevin/quants_mat_rows.txt")
gene.file <- file.path(dir, "alevin/quants_mat_cols.txt")
matrix.file <- file.path(dir, "alevin/quants_mat.gz")
boot.barcode.file <- file.path(dir, "alevin/quants_boot_rows.txt")
var.file <- file.path(dir, "alevin/quants_var_mat.gz")
whitelist.file <- file.path(dir, "alevin/whitelist.txt")
for (f in c(barcode.file, gene.file, matrix.file)) {
if (!file.exists(f)) {
stop("expecting 'files' to point to 'quants_mat.gz' file in a directory 'alevin'
Expand Down Expand Up @@ -44,18 +45,32 @@ readAlevinPreV014 <- function(files) {
close(con)
mat <- list(counts=counts.mat, variance=var.mat)
}
# cell barcode filtering
if (filterBarcodes & file.exists(whitelist.file)) {
filter <- readLines(whitelist.file)
if (is.list(mat)) {
keep <- colnames(mat$counts) %in% filter
mat$counts <- mat$counts[,keep]
mat$variance <- mat$variance[,keep]
} else {
keep <- colnames(mat) %in% filter
mat <- mat[,keep]
}
message(paste("filtering down to",sum(keep),"cell barcodes"))
}
mat
}

readAlevin <- function(files, dropInfReps, forceSlow) {
dir <- sub("/alevin$","",dirname(files))
readAlevin <- function(files, dropInfReps, forceSlow, filterBarcodes) {
dir <- sub("/alevin$","",dirname(files))
barcode.file <- file.path(dir, "alevin/quants_mat_rows.txt")
gene.file <- file.path(dir, "alevin/quants_mat_cols.txt")
matrix.file <- file.path(dir, "alevin/quants_mat.gz")
mean.file <- file.path(dir, "alevin/quants_mean_mat.gz")
var.file <- file.path(dir, "alevin/quants_var_mat.gz")
boot.file <- file.path(dir, "alevin/quants_boot_mat.gz")
boot.barcode.file <- file.path(dir, "alevin/quants_boot_rows.txt")
whitelist.file <- file.path(dir, "alevin/whitelist.txt")
for (f in c(barcode.file, gene.file, matrix.file)) {
if (!file.exists(f)) {
stop("expecting 'files' to point to 'quants_mat.gz' file in a directory 'alevin'
Expand Down Expand Up @@ -110,6 +125,14 @@ readAlevin <- function(files, dropInfReps, forceSlow) {
# slow in R, because requires looping over cells to read positions and expression
mat <- readAlevinBits(matrix.file, gene.names, cell.names)
}

# cell barcode filtering
if (filterBarcodes & file.exists(whitelist.file)) {
filter <- readLines(whitelist.file)
keep <- colnames(mat) %in% filter
message(paste("filtering down to",sum(keep),"cell barcodes"))
mat <- mat[,keep]
}

if (num.boot > 0) {

Expand All @@ -133,13 +156,23 @@ readAlevin <- function(files, dropInfReps, forceSlow) {
# need to re-arrange to match the counts matrix
mean.mat <- mean.mat[,cell.names]
var.mat <- var.mat[,cell.names]

# cell barcode filtering of bootstrap mean and variance matrices
if (filterBarcodes & file.exists(whitelist.file)) {
mean.mat <- mean.mat[,keep]
var.mat <- var.mat[,keep]
}

if (boot.exists & !dropInfReps) {
# read in bootstrap inferential replicates
message("reading in alevin inferential replicates (set dropInfReps=TRUE to skip)")
infReps <- readAlevinInfReps(boot.file, gene.names, boot.cell.names, num.boot)
# need to re-arrange to match the counts matrix
infReps <- lapply(infReps, function(z) z[,cell.names])
# cell barcode filtering of infReps
if (filterBarcodes & file.exists(whitelist.file)) {
infReps <- lapply(infReps, function(z) z[,keep])
}
return(list(counts=mat, mean=mean.mat, variance=var.mat, infReps=infReps))
} else {
# return counts, mean and variance
Expand Down
12 changes: 8 additions & 4 deletions R/tximport.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ NULL
#' C++ importer for alevin's EDS format.
#' For alevin, \code{tximport} is importing the gene-by-cell matrix of counts,
#' as \code{txi$counts}, and effective lengths are not estimated.
#' \code{txi$variance} may also be imported if inferential replicates were
#' \code{txi$mean} and \code{txi$variance}
#' may also be imported if inferential replicates were
#' used, as well as inferential replicates if these were output by alevin.
#' Length correction should not be applied to datasets where there
#' is not an expected correlation of counts and feature length.
Expand Down Expand Up @@ -183,6 +184,8 @@ NULL
#' @param forceSlow logical, argument used for testing. Will force the use of
#' the slower R code for importing alevin, even if \code{fishpond}
#' library is installed. Default is FALSE
#' @param filterBarcodes option for alevin, to import only cell barcodes
#' listed in \code{whitelist.txt}. Default is FALSE
#'
#' @return A simple list containing matrices: abundance, counts, length.
#' Another list element 'countsFromAbundance' carries through
Expand Down Expand Up @@ -251,7 +254,8 @@ tximport <- function(files,
sparse=FALSE,
sparseThreshold=1,
readLength=75,
forceSlow=FALSE) {
forceSlow=FALSE,
filterBarcodes=FALSE) {

# inferential replicate importer
infRepImporter <- NULL
Expand Down Expand Up @@ -290,9 +294,9 @@ tximport <- function(files,
vrsn <- getAlevinVersion(files)
compareToV014 <- compareVersion(vrsn, "0.14.0")
if (compareToV014 == -1) {
mat <- readAlevinPreV014(files)
mat <- readAlevinPreV014(files, filterBarcodes)
} else {
mat <- readAlevin(files, dropInfReps, forceSlow)
mat <- readAlevin(files, dropInfReps, forceSlow, filterBarcodes)
}
if (!is.list(mat)) {
# only counts
Expand Down
8 changes: 6 additions & 2 deletions man/makeCountsFromAbundance.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 8 additions & 3 deletions man/summarizeToGene.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 30 additions & 10 deletions man/tximport.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion tests/testthat/test_alevin.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ test_that("import alevin works", {
files <- file.path(dir,"alevin/neurons_900_v012/alevin/quants_mat.gz")
file.exists(files)

txi <- tximport(files, type="alevin")
expect_warning({txi <- tximport(files, type="alevin")}, "deprecated")

dir <- system.file("extdata", package="tximportData")
files <- file.path(dir,"alevin/neurons_900_v014/alevin/quants_mat.gz")
Expand Down Expand Up @@ -35,5 +35,8 @@ test_that("import alevin works", {
idx <- 1:1000
cts <- unname(as.matrix(txi$counts[idx,]))
expect_true(max(abs(mat - unname(cts))) < 1e-6)

# again import with cell barcode filtering
txi <- tximport(files, type="alevin", dropInfReps=TRUE, filterBarcodes=TRUE)

})

0 comments on commit 45dfea9

Please sign in to comment.