-
Notifications
You must be signed in to change notification settings - Fork 81
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
xcms3 blueprint proposal #30
Comments
For completeness, there are also plans to to have a low level MS processing package, focusing on C-level implementations and using basic data structures as inputs/outputs. |
You're right. Some/all of the above mentioned base functions might go into that package. |
The base parameter class could be implemented like: ####============================================================
## Basic Param class.
##
## Doesn't do much (yet), but we can define common methods for
## all classes extending this one.
setClass("Param",
contains=c("Versioned", "VIRTUAL"),
prototype=prototype(
new("Versioned", versions=c(Param="0.0.1"))
)
)
####============================================================
## as.list
##
## Method to extract all slot values and returm them in a (named) list
## with the names corresponding to the slot names.
## That way we can easily extract the parameters and pass them e.g to a do.call call.
## The method returns a named list.
setMethod("as.list", signature(x="Param"), function(x, ...){
return(.param2list(x))
})
## The 'setAs' method.
setAs("Param" ,"list", function(from){
return(.param2list(from))
})
.param2list <- function(x){
## Get all slot names, skip those matching the provided pattern.
sNames <- slotNames(x)
skipSome <- grep(sNames, pattern="^\\.")
if(length(skipSome) > 0)
sNames <- sNames[-skipSome]
if(length(sNames) > 0){
resL <- vector("list", length(sNames))
for(i in 1:length(sNames))
resL[[i]] <- slot(x, name=sNames[i])
names(resL) <- sNames
return(resL)
}else{
return(list())
}
}
With a parameter class for the matchedFilter algorithm from ####============================================================
## MatchedFilterParam
##
## Parameter class containing all parameters for the 'findPeaks.matchedFilter'
## method from the xcms package.
setClass("MatchedFilterParam",
contains="Param",
slots=list(
fwhm="numeric",
sigma="numeric",
max="integer",
snthresh="numeric",
step="numeric",
steps="numeric",
mzdiff="numeric",
index="logical",
verbose.columns="logical"
),
prototype=prototype(
fwhm=30,
sigma=numeric(),
max=5L,
snthresh=10,
step=0.1,
steps=2,
mzdiff=numeric(),
index=FALSE,
verbose.columns=FALSE,
new("Versioned",
versions=c(classVersion("Param"),
MatchedFilterParam="0.0.1"))
),
validity=function(object){
msg <- validMsg(NULL, NULL)
## TODO: implement.
if(is.null(msg))
return(TRUE)
return(msg)
})
## Initialize method
setMethod("initialize", "MatchedFilterParam", function(.Object, fwhm, sigma,
max, snthresh,
step, steps, mzdiff,
index, verbose.columns){
callNextMethod()
## This just ensures that the expected default values are propagated.
## Set all the values for which we do have defaults in the prototype.
if(!missing(fwhm))
.Object@fwhm <- fwhm
if(!missing(max))
.Object@max <- max
if(!missing(snthresh))
.Object@snthresh <- snthresh
if(!missing(step))
.Object@step <- step
if(!missing(steps))
.Object@steps <- steps
if(!missing(index))
.Object@index <- index
if(!missing(verbose.columns))
.Object@verbose.columns <- verbose.columns
## Processing slots that depend on other slots, if not provided.
if(missing(sigma)){
.Object@sigma <- .Object@fwhm/2.3548
}else{
.Object@sigma <- sigma
}
if(missing(mzdiff)){
.Object@mzdiff <- 0.8 - .Object@step * .Object@steps
}else{
.Object@mzdiff <- mzdiff
}
validObject(.Object)
return(.Object)
})
## Constructor function inclusive standard parameter values.
MatchedFilterParam <- function(fwhm = 30,
sigma = fwhm/2.3548,
max = 5L,
snthresh = 10,
step = 0.1,
steps = 2,
mzdiff = 0.8 - step*steps,
index = FALSE,
verbose.columns = FALSE){
return(new("MatchedFilterParam", fwhm=fwhm, sigma=sigma, max=max,
snthresh=snthresh, step=step, steps=steps, mzdiff=mzdiff,
index=index, verbose.columns=verbose.columns))
}
|
The main user interface method could be (in this case for ####============================================================
## detectFeatures
##
## The main method for the feature detection step of the (metabolomics) data analysis.
##
####------------------------------------------------------------
setGeneric("detectFeatures", function(object, params, ...)
standardGeneric("detectFeatures"))
####============================================================
## detectFeatures signature: xcmsRaw, MatchedFilterParam
##
## Method implementation for xcmsRaw objects and MatchedFilterParam.
## This method returns, same as the findFeautes.matchedFilter, a
## 'xcmsPeaks' object.
##
####------------------------------------------------------------
setMethod("detectFeatures", signature(object="xcmsRaw", params="MatchedFilterParam"),
function(object, params, scanrange=numeric(), ...){
## Preparing data.
profFun <- match.profFun(object)
## scanrange handling.
scanrange.old <- scanrange
## sanitize if too few or too many scanrange is given
if (length(scanrange) < 2)
scanrange <- c(1, length(object@scantime))
else
scanrange <- range(scanrange)
## restrict and sanitize scanrange to maximally cover all scans
scanrange[1] <- max(1,scanrange[1])
scanrange[2] <- min(length(object@scantime),scanrange[2])
## Mild warning if the actual scanrange doesn't match the scanrange argument
if(!(identical(scanrange.old,scanrange)) && (length(scanrange.old) >0)){
cat("Warning: scanrange was adjusted to ",scanrange,"\n")
## Scanrange filtering
keepidx <- seq.int(1, length(object@scantime)) %in% seq.int(scanrange[1],
scanrange[2])
object <- split(object, f=keepidx)[["TRUE"]]
}
## Extract all required data values from the object and all parameters
## from the Param class and run the do.call
Res <- do.call("do_detectFeatures_matchedFilter",
c(list(mz=object@env$mz,
int=object@env$intensity,
scantime=object@scantime,
valsPerSpect=diff(c(object@scanindex,
length(object@env$mz))),
profFun=profFun),
as(params, "list")
))
return(new("xcmsPeaks", Res))
})
####============================================================
## valueCount2scanIndex
##
## Simple (internal) helper function to convert the number of data values per
## scan/spectrum to the integer vector expected by base xcms functions
## which are internally passed to the downstream C functions.
## Arguments:
## o valCount: numeric vector with the number of values per spectrum.
## Returns:
## o a numeric vector with the index (0-based) in the mz or intensity vectors
## indicating the start of a spectrum.
####------------------------------------------------------------
valueCount2scanIndex <- function(valCount){
## Convert into 0 based.
valCount <- cumsum(valCount)
return(as.integer(c(0, valCount[-length(valCount)])))
}
|
For backward compatibility, the ####============================================================
## Proposed replacement method for the findPeaks.matchedFilter method in xcms.
## This basically just prepares all parameters to be passed down to the actual
## analysis function.
## Also, this is supposed to be deprecated in the long run in favour of a
## 'detectFeatures' method with signature(object, params)
setGeneric("findPeaks.matchedFilter", function(object, ...)
standardGeneric("findPeaks.matchedFilter"))
setMethod("findPeaks.matchedFilter", "xcmsRaw", function(object, fwhm = 30, sigma = fwhm/2.3548,
max = 5, snthresh = 10, step = 0.1,
steps = 2, mzdiff = 0.8 - step*steps,
index = FALSE, sleep = 0,
verbose.columns = FALSE,
scanrange= numeric()){
if(sleep != 0)
warning("Argument sleep is no longer supported and thus ignored.")
profFun <- xcms:::match.profFun(object)
## scanrange handling.
scanrange.old <- scanrange
## sanitize if too few or too many scanrange is given
if (length(scanrange) < 2)
scanrange <- c(1, length(object@scantime))
else
scanrange <- range(scanrange)
## restrict and sanitize scanrange to maximally cover all scans
scanrange[1] <- max(1,scanrange[1])
scanrange[2] <- min(length(object@scantime),scanrange[2])
## Mild warning if the actual scanrange doesn't match the scanrange argument
if(!(identical(scanrange.old,scanrange)) && (length(scanrange.old) >0)){
cat("Warning: scanrange was adjusted to ",scanrange,"\n")
## Scanrange filtering
keepidx <- seq.int(1, length(object@scantime)) %in% seq.int(scanrange[1], scanrange[2])
object <- split(object, f=keepidx)[["TRUE"]]
}
## Call the function representing the 'original' code from the findPeaks.matchedFilter
## method.
Res <- do_detectFeatures_matchedFilter(object@env$mz,
object@env$intensity,
scantime=object@scantime,
valsPerSpect=diff(c(object@scanindex,
length(object@env$mz))),
profFun=profFun,
profparam=object@profparam,
fwhm=fwhm,
sigma=sigma,
max=max,
snthresh=snthresh,
step=step,
steps=steps,
mzdiff=mzdiff,
index=index,
verbose.columns=verbose.columns)
return(new("xcmsPeaks", Res))
}) I think that way we could nicely separate the user interface (i.e. the methods) from the actual analysis functions. |
Closing because outdated by now. |
Goal is to provide a high modularity and hence easier maintenance and better integration in other packages.
Two tier implementation:
BPPARAM
argument to enable parallel processing setup.@sneumann @lgatto: discussion, notes, ideas etc welcome.
Will add below a reference implementation for the
findPeaks.matchedFilter
method fromxcms
to illustrate how it might look like.The text was updated successfully, but these errors were encountered: