Merge pull request #33 from statOmics/MSqRob0.7.5

MSqRob0.7.5
statOmics · Oct 22, 2017 · 8d2787d · 8d2787d
2 parents dcc1d3f + 69b2b22
commit 8d2787d
Show file tree

Hide file tree

Showing 20 changed files with 1,390 additions and 1,030 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,21 @@
 # Change Log
 All notable changes to this project will be documented in this file.
 
+## 0.7.5 - 2017-10-20
+
+### Added
+
+- Added support for mzTab and Progenesis file formats.
+- Changed to default testing against a fold change of 0.5 in the GUI.
+
+### Fixed
+
+- Made estimations more stable when testing multiple fixed effects simultanously.
+
+### Removed
+
+- Removed the preprocess_moFF function and replaced it by preprocess_generic.
+
 ## 0.7.1 - 2017-09-06
 
 ### Added

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,7 +8,7 @@ Authors@R: c(
     "lieven.clement@UGent.be"),
     person("Lieven", "Clement", role = c("aut", "cre"), email =
     "lieven.clement@UGent.be"))
-Version: 0.7.0
+Version: 0.7.5
 Date: 2015-07-07
 Author: Ludger Goeminne [aut],
     Kris Gevaert [ctb],

diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,8 @@ S3method(predict,dummyVars_MSqRob)
 export(MSnSet2protdata)
 export(addFactorInteractions)
 export(addVarFromVar)
+export(adjustNames)
+export(aggregateMSnSet)
 export(contr.ltfr_MSqRob)
 export(countPeptides)
 export(cutOffPval)
@@ -15,6 +17,7 @@ export(fit.model)
 export(getBetaVcovDfList)
 export(getThetaVars)
 export(get_qvals)
+export(import2MSnSet)
 export(init_ann_MQ_Excel)
 export(inspect_loads_MSqRob)
 export(loads_MSqRob)
@@ -24,8 +27,8 @@ export(pasteAnnotation)
 export(plot_fdrtool)
 export(preprocess_MSnSet)
 export(preprocess_MaxQuant)
+export(preprocess_generic)
 export(preprocess_long)
-export(preprocess_moFF)
 export(preprocess_wide)
 export(prot.p.adjust)
 export(prot.p.adjust_protwise)

diff --git a/R/fit_model.R b/R/fit_model.R
@@ -74,7 +74,7 @@ fit.model=function(protdata, response=NULL, fixed=NULL, random=NULL, add.interce
     #Adjust names of predictors to make them similar to those of "lm"
     #In case of factors, the name of the factor is concatenated with the level of the factor
     #Also takes a bit of time with big datasets:
-    datalist <- .adjustNames(datalist, random)
+    datalist <- adjustNames(datalist, random)
 
     #Fit the list of ridge models
     modellist <- .createRidgeList(datalist = datalist, weights = weights, response = response, fixed = fixed, shrinkage.fixed = shrinkage.fixed, formula_fix = formula_fix, random = random, formula_ran = formula_ran, add.intercept = add.intercept, intercept = intercept, intercept_name = "(Intercept)", k = k, robustM = robustM, scaleUnshrFix = scaleUnshrFix, modfiedGS = modfiedGS, tolPwrss = tolPwrss, verbose = verbose, printProgress=printProgress, shiny=shiny, message_fitting=message_fitting, ...)
@@ -103,10 +103,14 @@ fit.model=function(protdata, response=NULL, fixed=NULL, random=NULL, add.interce
 
   errorMsg <- NULL
 
-  #Control: fixed en random connot be NULL at the same time!
+  #Control: fixed and random connot be NULL at the same time!
   if(is.null(response)){errorMsg <- paste0(errorMsg, "\n\n", "Please specify a response variable.")}
   if(is.null(fixed) && is.null(random)){errorMsg <- paste0(errorMsg, "\n\n", "Please specify appropriate fixed and/or random effects.")}
 
+  #Control: fixed and random effects must be completely different: no overlaps, otherwise problems with "adjustNames" function!
+  #If you really want to try something this crazy, just duplicate the effect.
+  if(any(fixed %in% random)){errorMsg <- paste0(errorMsg, "\n\n", "Fixed and random effects must be different from each other.")}
+
   #Correct for possible interactions (only used in error control that follows)
   errorMsg <- .checkPredictors(errorMsg, fixed, possible_vars, "fixed")
   errorMsg <- .checkPredictors(errorMsg, random, possible_vars, "random")
@@ -384,7 +388,7 @@ fit.model=function(protdata, response=NULL, fixed=NULL, random=NULL, add.interce
 
 
 
-.createParsedFormula=function(x,y,response,fixed,shrinkage.fixed,random,formula_ran,add.intercept,formula_fix,scaleUnshrFix,modfiedGS,nObsPep=NULL,nNonObsPep=NULL,v=NULL){
+.createParsedFormula=function(x,y,response,fixed,shrinkage.fixed,random,formula_ran,add.intercept,formula_fix,scaleUnshrFix,modfiedGS,nObsPep=NULL,nNonObsPep=NULL,v=NULL,familyfun=NULL){
 
   #,nObsPep=NULL,nNonObsPep=NULL,v=NULL need to be given if a count formula needs to be made, even though these arguments are never used,
   #they need to be set in order for the formula not to fail!
@@ -395,6 +399,8 @@ fit.model=function(protdata, response=NULL, fixed=NULL, random=NULL, add.interce
 
   error <- FALSE
 
+  if(n==0){error <- TRUE}
+
   # if(!is.null(fixed) && any(sapply(strsplit(fixed, ":"), function(z){return(length(levels(factor(do.call(paste, x[,z, drop=FALSE])))))})==1)){
   #   warning("Fixed effects should have at least 2 levels before a model can be fit.")
   #   error <- TRUE
@@ -431,9 +437,65 @@ fit.model=function(protdata, response=NULL, fixed=NULL, random=NULL, add.interce
     #If there is at least one shrunken fixed effect
   } else if(any(shrinkage.fixed!=0)){
 
+    #We don't want any singularities in our fixed effects!!!
+    #Remove those singularities from the data!
+    oldContr <- options("contrasts")$contrasts
+    newContr <- oldContr
+    newContr["unordered"] <- "contr.ltfr_MSqRob"
+    options(contrasts = newContr)
+
+    MMFull <- model.matrix(formula_fix, data=x)
+
+    options(contrasts = oldContr)
+
+    if(any(duplicated(cov(MMFull), MARGIN = 2))){
+    lol <- lapply(adjustNames(list(x), fixed), function(z) {
+      colnames(MMFull)[duplicated(cov(MMFull), MARGIN = 2)]==z}
+      )
+
+    x <- x[which(rowSums(lol[[1]])==0),]
+    x <- droplevels(x)
+    n <- nrow(x)
+    y <- y[which(rowSums(lol[[1]])==0)]
+
+    ###COPY PASTE VAN HIERBOVEN!!!####
+    ###Recall functie???###
+
+    #Remove fixed effects with only one level so that these models can be fit (interpretation is done when fitting contrasts!)
+    templist <- .removeSingleLevels(fixed, shrinkage.fixed, formula_fix, x, add.intercept)
+    fixed <- templist[[1]]
+    shrinkage.fixed <- templist[[2]]
+    formula_fix <- templist[[3]]
+
+    #Default: shrink all fixed effects together except the intercept
+    if(!is.null(fixed) && is.null(shrinkage.fixed)){
+      shrinkage.fixed <- rep(1, length(fixed))
+      if(isTRUE(add.intercept)){ #If shrinkage.fixed is NULL AND there is an intercept!!!
+        shrinkage.fixed <- c(0,shrinkage.fixed)
+      }
+    }
+
+    fixed2 <- fixed
+
+    if(isTRUE(add.intercept)){
+      fixed2 <- c(1,fixed)
+      #Always add 0 to fixed2, needed for the formula
+    } else{fixed2 <- c(0,fixed)}
+
+    ############################
+    ############################
+    ############################
+
+    }
+
     #Make fixed design model matrix MM, this step is only because we need its attributes
     #If you find a bug here, check options("contrasts")$contrasts: unordered should be "contr.treatment" and definitely not "contr.ltfr_MSqRob"!
-    MM <- model.matrix(formula_fix, data=x)
+
+    #Catch error in case x would have 0 rows after removing singularities
+    if(n==0){
+      MM <- x
+    #Default model matrix for the fixed effects
+    } else{MM <- model.matrix(formula_fix, data=x)}
 
     #Make fixed design matrix
     #design_fix <- Matrix::Matrix(MM)
@@ -473,6 +535,24 @@ fit.model=function(protdata, response=NULL, fixed=NULL, random=NULL, add.interce
     Q_fix <- QR_fix[["Q"]]
     R_fix <- QR_fix[["R"]]
 
+    # R_fix2 <- R_fix
+    # Q_fix2 <- Q_fix
+
+    #Intercept will always be first in the design matrix!!!!
+    #Try e.g.: model.matrix(~ -1 + lab + 1 + condition, data=x)
+    #Check if there is an intercept!!!
+    # if(isTRUE(add.intercept)){
+    #
+    # R_fix2[1,] <- R_fix2[1,]/R_fix2[1,1]
+    # Q_fix2[,1] <- 1 #Q_fix2[,1]/Q_fix2[1,1]
+    #
+    # R_fix <- R_fix2
+    # Q_fix <- Q_fix2
+    # }
+
+    # R_fix <- NULL
+    # Q_fix <- MM
+
     ridgeGroupsForm <- NULL
     if(length(groups)!=0){
       ridgeGroups <- vector()
@@ -498,7 +578,7 @@ fit.model=function(protdata, response=NULL, fixed=NULL, random=NULL, add.interce
 
     #If there are also unshrunken fixed effects next to shrunken fixed effects
     #position of unshrunken effects in Q_fix:
-    if(any(shrinkage.fixed==0)){
+    if(any(shrinkage.fixed==0) && nrow(x)>0){
       fixedUnshrGroups <- vector()
 
       for(i in 1:length(unshr_pos)){
@@ -764,8 +844,15 @@ addZerosQR <- function(Q=NULL, R){
   return(emptylmerMod)
 }
 
-#This function adjusts the names for the factors in a data object
-.adjustNames=function(datalist, predictors){
+#' Adjust the names of the elements in a list of dataframes
+#'
+#' @description Given a list of dataframes, this function pastes the colnames of all factors and characters to the elements in these columns.
+#' This is necessary to produces consisten results when fitting with lm or lmer. This function is borderline internal.
+#' @param datalist A list of dataframes.
+#' @param predictors A vector of predictors corresponding to some of the columnames in the list of dataframes for which the given adjustment will be performed if the columns corresponding to these predictors are characters or factors.
+#' @return The list of dataframes with adusted elements.
+#' @export
+adjustNames=function(datalist, predictors){
 
   if(!is.null(predictors)){
   datalist_adj <- lapply(datalist, function(x){
@@ -807,7 +894,7 @@ addZerosQR <- function(Q=NULL, R){
   devianceFunction <- tryCatch(do.call(lme4::mkLmerDevfun, parsedFormula), error=function(e){
     return(NULL)})
 
-  optimizerOutput <- tryCatch(lme4::optimizeLmer(devianceFunction), error=function(e){return(NULL)})
+  optimizerOutput <- suppressWarnings(tryCatch(lme4::optimizeLmer(devianceFunction), error=function(e){return(NULL)}))
 
   #ridgeFit=lme4::lmer(formula, data=data,weights=weights,...)
   ridgeFit=lme4::mkMerMod(
@@ -857,7 +944,7 @@ addZerosQR <- function(Q=NULL, R){
 
     #devianceFunction(environment(devianceFunction)$pp$theta) # one evaluation to ensure all values are set
 
-    optimizerOutput <- tryCatch(lme4::optimizeLmer(devianceFunction), error=function(e){return(NULL)}) #, optimizer = "nloptwrap"
+    optimizerOutput <- suppressWarnings(tryCatch(lme4::optimizeLmer(devianceFunction), error=function(e){return(NULL)})) #, optimizer = "nloptwrap"
 
     #ridgeFit=lme4::lmer(formula, data=data,weights=weights,...)
     # ridgeFit=lme4::mkMerMod(

diff --git a/R/getBetaVcovDf.R b/R/getBetaVcovDf.R
@@ -52,6 +52,7 @@ setGeneric (
 #' @export
 setMethod("getBetaVcovDf", "lm", .getBetaVcovDfLm)
 
+
 .getBetaVcovDfMermod <- function(model, exp_unit=NULL, pars_between=NULL, Ginvoffset = 1e-18){
   # condVar <- function(model) {
   #   s2 <- sigma(model)^2
@@ -82,7 +83,7 @@ setMethod("getBetaVcovDf", "lm", .getBetaVcovDfLm)
 
   #Design matrix for the fixed effects:
   X <- lme4::getME(model,"X")
-  
+
   if(!is.null(attr(X,"scaled:center")) & !is.null(attr(X,"scaled:scale"))){
   X <- X * attr(X, 'scaled:scale') + attr(X, 'scaled:center')
   }
@@ -234,3 +235,4 @@ setMethod("getBetaVcovDf", "lm", .getBetaVcovDfLm)
 #' @include protLM.R
 #' @export
 setMethod("getBetaVcovDf", "lmerMod", .getBetaVcovDfMermod)
+