Skip to content

Commit

Permalink
Add [ method for xcmsRaw objects
Browse files Browse the repository at this point in the history
o Implement a [ method for xcmsRaw objects allowing to sub-set the
  object to specified scans/spectra.
o Implement the do_detectFeatures_massifquant function.
o Unit tests for the above method/function.
  • Loading branch information
jorainer committed Sep 26, 2016
1 parent 9f35b91 commit f82432a
Show file tree
Hide file tree
Showing 14 changed files with 810 additions and 230 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ export(
"breaks_on_binSize",
"breaks_on_nBins",
"do_detectFeatures_centWave",
"do_detectFeatures_massifquant",
"do_detectFeatures_matchedFilter",
"do_detectFeatures_MSW",
"imputeLinInterpol",
Expand Down
1 change: 1 addition & 0 deletions R/DataClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
############################################################
## Class unions
setClassUnion("characterOrNULL", c("character", "NULL"))
setClassUnion("logicalOrNumeric", c("logical", "numeric"))
##setClassUnion("ANYorNULL", c("ANY", "NULL"))


Expand Down
286 changes: 186 additions & 100 deletions R/do_detectFeatures-functions.R

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions R/functions-xcmsRaw.R
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,4 @@ remakeTIC<-function(object){
return(object)
}


160 changes: 158 additions & 2 deletions R/methods-xcmsRaw.R
Original file line number Diff line number Diff line change
Expand Up @@ -1904,8 +1904,10 @@ setMethod("findKalmanROI", "xcmsRaw", function(object, mzrange=c(0.0,0.0),
})

############################################################
## findPeaks.massifquant
setMethod("findPeaks.massifquant", "xcmsRaw",
## This should be replaced soon; is only for testing purposes.
setGeneric("findPeaks.massifquant_orig", function(object, ...)
standardGeneric("findPeaks.massifquant_orig"))
setMethod("findPeaks.massifquant_orig", "xcmsRaw",
function(object,
ppm=10,
peakwidth=c(20,50),
Expand Down Expand Up @@ -1986,6 +1988,76 @@ setMethod("findPeaks.massifquant", "xcmsRaw",
return(invisible(featlist));
})

############################################################
## findPeaks.massifquant: Note the original code returned, if withWave = 1,
## a Peaks object otherwise a matrix!
setMethod("findPeaks.massifquant", "xcmsRaw", function(object,
ppm=10,
peakwidth = c(20,50),
snthresh = 10,
prefilter = c(3,100),
mzCenterFun = "wMean",
integrate = 1,
mzdiff = -0.001,
fitgauss = FALSE,
scanrange = numeric(),
noise = 0,
sleep = 0,
verbose.columns = FALSE,
criticalValue = 1.125,
consecMissedLimit = 2,
unions = 1,
checkBack = 0,
withWave = 0) {

if (sleep > 0)
cat("'sleep' argument is defunct and will be ignored.")

## Sub-set the xcmsRaw baesd on scanrange
scanrange.old <- scanrange
## sanitize if too few or too many scanrange is given
if (length(scanrange) < 2)
scanrange <- c(1, length(object@scantime))
else
scanrange <- range(scanrange)
## restrict and sanitize scanrange to maximally cover all scans
scanrange[1] <- max(1,scanrange[1])
scanrange[2] <- min(length(object@scantime),scanrange[2])
## Mild warning if the actual scanrange doesn't match the scanrange
## argument
if (!(identical(scanrange.old, scanrange)) &&
(length(scanrange.old) > 0)) {
cat("Warning: scanrange was adjusted to ",scanrange,"\n")
## Scanrange filtering
keepidx <- seq.int(1, length(object@scantime)) %in% seq.int(scanrange[1], scanrange[2])
object <- split(object, f=keepidx)[["TRUE"]]
}
if (!isCentroided(object))
warning("It looks like this file is in profile mode.",
" Massifquant can process only centroid mode data !\n")
vps <- diff(c(object@scanindex, length(object@env$mz)))
res <- do_detectFeatures_massifquant(mz = object@env$mz,
int = object@env$intensity,
scantime = object@scantime,
valsPerSpect = vps,
ppm = ppm, peakwidth = peakwidth,
snthresh = snthresh,
prefilter = prefilter,
mzCenterFun = mzCenterFun,
integrate = integrate,
mzdiff = mzdiff, fitgauss = fitgauss,
noise = noise,
verboseColumns = verbose.columns,
criticalValue = criticalValue,
consecMissedLimit = consecMissedLimit,
unions = unions, checkBack = checkBack,
withWave = as.logical(withWave))
## Compatibility with "old" code, i.e. return a matrix if withWave is 0
if (withWave == 0)
return(res)
invisible(new("xcmsPeaks", res));
})

############################################################
## isCentroided
setMethod("isCentroided", "xcmsRaw", function(object){
Expand Down Expand Up @@ -2645,3 +2717,87 @@ setMethod("stitch.netCDF.new", "xcmsRaw", function(object, lockMass) {
return(ob)
})

############################################################
## [
## Subset by scan.
##' @title Subset an xcmsRaw object by scans
##'
##' @description Subset an \code{\linkS4class{xcmsRaw}} object by scans. The
##' returned \code{\linkS4class{xcmsRaw}} object contains values for all scans
##' specified with argument \code{i}.
##'
##' @details Only subsetting by scan index in increasing order or by a logical
##' vector are supported. If not ordered, argument \code{i} is sorted
##' automatically. Indices which are larger than the total number of scans
##' are discarded.
##' @param x The \code{\linkS4class{xcmsRaw}} object that should be sub-setted.
##' @param i Integer or logical vector specifying the scans/spectra to which \code{x} should be sub-setted.
##' @param j Not supported.
##' @param drop Not supported.
##' @return The sub-setted \code{\linkS4class{xcmsRaw}} object.
##' @author Johannes Rainer
##' @seealso \code{\link{split.xcmsRaw}}
##' @examples
##' ## Load a test file
##' file <- system.file('cdf/KO/ko15.CDF', package = "faahKO")
##' xraw <- xcmsRaw(file)
##' ## The number of scans/spectra:
##' length(xraw@scantime)
##'
##' ## Subset the object to scans with a scan time from 3500 to 4000.
##' xsub <- xraw[xraw@scantime >= 3500 & xraw@scantime <= 4000]
##' range(xsub@scantime)
##' ## The number of scans:
##' length(xsub@scantime)
##' ## The number of values of the subset:
##' length(xsub@env$mz)
setMethod("[", signature(x = "xcmsRaw",
i = "logicalOrNumeric",
j = "missing",
drop = "missing"),
function(x, i, j, drop) {
nScans <- length(x@scantime)
if (is.logical(i))
i <- which(i)
if (length(i) == 0)
return(new("xcmsRaw"))
## Ensure i is sorted.
if (is.unsorted(i)) {
warning("'i' has been sorted as subsetting supports",
" only sorted indices.")
i <- sort(i)
}
## Ensure that we're not supposed to return duplicated scans!
i <- unique(i)
## Check that i is within the number of scans
outsideRange <- !(i %in% 1:nScans)
if (any(outsideRange)) {
## Throw an error or just a warning?
iOut <- i[outsideRange]
i <- i[!outsideRange]
warning("Some of the indices in 'i' are larger than the",
" total number of scans which is ", nScans)
}
valsPerSpect <- diff(c(x@scanindex, length(x@env$mz)))
## Subset:
## 1) scantime
x@scantime <- x@scantime[i]
## 2) scanindex
x@scanindex <- x@scanindex[i]
## 3) @env$mz
newE <- new.env()
valsInScn <- rep(1:nScans, valsPerSpect) %in% i
newE$mz <- x@env$mz[valsInScn]
## 4) @env$intensity
newE$intensity <- x@env$intensity[valsInScn]
x@env <- newE
## 5) The remaining slots
x@tic <- x@tic[i]
if (length(x@polarity) == nScans)
x@polarity <- x@polarity[i]
if (length(x@acquisitionNum) == nScans)
x@acquisitionNum <- x@acquisitionNum[i]
x@mzrange <- range(x@env$mz)
## TODO: what with the MSn data?
return(x)
})
1 change: 1 addition & 0 deletions inst/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ NEW FEATURES:
detection on all files without stopping on errors.
+ Method showError for xcmsSet objects that list all errors during feature
detection (if stopOnError = FALSE in the xcmsSet function).
+ [ method to subset xcmsRaw objects by scans.

BUG FIXES:
+ Fixed bug in rawMat (issue #58).
Expand Down
72 changes: 49 additions & 23 deletions inst/unitTests/runit.splitRaw.R
Original file line number Diff line number Diff line change
@@ -1,50 +1,40 @@
testSplitRawEven <- function() {
file <- system.file('cdf/KO/ko15.CDF', package = "faahKO")
xraw <- xcmsRaw(file)
file <- system.file('cdf/KO/ko15.CDF', package = "faahKO")
xraw <- xcmsRaw(file)

xrl <- split(xraw, f=xraw@scanindex%%2)
testSplitRawEven <- function() {
xrl <- split(xraw, f = xraw@scanindex%%2)
checkEqualsNumeric(length(xrl), 2)

checkTrue(length(xraw@scanindex) == length(xrl[[1]]@scanindex) + length(xrl[[2]]@scanindex))
checkTrue(length(xraw@env$mz) == length(xrl[[1]]@env$mz) + length(xrl[[2]]@env$mz))
checkTrue(length(xraw@scanindex) == length(xrl[[1]]@scanindex)
+ length(xrl[[2]]@scanindex))
checkTrue(length(xraw@env$mz) == length(xrl[[1]]@env$mz)
+ length(xrl[[2]]@env$mz))
}

testSplitRawOdd <- function() {
file <- system.file('cdf/KO/ko15.CDF', package = "faahKO")
xraw <- xcmsRaw(file)

xrl <- split(xraw, f=(xraw@scanindex+1)%%2)
xrl <- split(xraw, f = (xraw@scanindex+1)%%2)
checkEqualsNumeric(length(xrl), 2)

checkTrue(length(xraw@scanindex) == length(xrl[[1]]@scanindex) + length(xrl[[2]]@scanindex))
checkTrue(length(xraw@env$mz) == length(xrl[[1]]@env$mz) + length(xrl[[2]]@env$mz))
checkTrue(length(xraw@scanindex) == length(xrl[[1]]@scanindex) +
length(xrl[[2]]@scanindex))
checkTrue(length(xraw@env$mz) == length(xrl[[1]]@env$mz) +
length(xrl[[2]]@env$mz))
}


testSplitRawFirst <- function() {
file <- system.file('cdf/KO/ko15.CDF', package = "faahKO")
xraw <- xcmsRaw(file)

xrl <- split(xraw, f=c(1,rep(2,length(xraw@scanindex)-1)))

checkEqualsNumeric(length(xrl), 2)

}

testSplitRawLast <- function() {
file <- system.file('cdf/KO/ko15.CDF', package = "faahKO")
xraw <- xcmsRaw(file)

xrl <- split(xraw, f=c(rep(1,length(xraw@scanindex)-1), 2))

checkEqualsNumeric(length(xrl), 2)
}

testSplitRawNone <- function() {

file <- system.file('cdf/KO/ko15.CDF', package = "faahKO")
xraw <- xcmsRaw(file)

xrl <- split(xraw, f=rep(1,length(xraw@scanindex)))

checkEqualsNumeric(length(xrl), 1)
Expand All @@ -54,3 +44,39 @@ testSplitRawNone <- function() {
checkTrue(all(xraw@scanindex == xraw2@scanindex))

}

############################################################
## Subset an xcmsRaw object by scan index.
test_bracked_subset_xcmsRaw <- function() {
## Get scans 1:10
xsub <- xraw[1:10]
checkIdentical(xraw@scantime[1:10], xsub@scantime)
checkIdentical(xraw@scanindex[1:10], xsub@scanindex[1:10])
checkIdentical(xraw@env$mz[1:xraw@scanindex[11]], xsub@env$mz)
checkIdentical(xraw@env$intensity[1:xraw@scanindex[11]], xsub@env$intensity)

## Check using logical
bm <- rep(FALSE, length(xraw@scanindex))
bm[1:10] <- TRUE
xsub_2 <- xraw[bm]
checkEquals(xsub, xsub_2)

## Get none.
xempty <- xraw[rep(FALSE, length(xraw@scanindex))]
checkTrue(length(xempty@scanindex) == 0)

## Get the full one:
xsub <- xraw[1:length(xraw@scanindex)]
checkEquals(xsub@env$mz, xraw@env$mz)
checkEquals(xsub@env$intensity, xraw@env$intensity)

## Get some scans in the middle somewhere
i <- c(5, 99, 317)
xsub <- xraw[i]
checkIdentical(xsub@scantime, xraw@scantime[i])
whichIdx <- c(((xraw@scanindex[5] + 1):xraw@scanindex[6]),
((xraw@scanindex[99] + 1):xraw@scanindex[100]),
((xraw@scanindex[317] + 1):xraw@scanindex[318]))
checkIdentical(xsub@env$mz, xraw@env$mz[whichIdx])
checkIdentical(xsub@env$intensity, xraw@env$intensity[whichIdx])
}

0 comments on commit f82432a

Please sign in to comment.