first upload

nghiavtr · Jan 5, 2015 · 163e26a · 163e26a
1 parent 5ebb505
commit 163e26a
Show file tree

Hide file tree

Showing 32 changed files with 1,317 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,14 @@
+Package: speaq
+Type: Package
+Title: An R-package for NMR spectrum alignment and quantitation
+Version: 1.2.0
+Date: 2015-05-01
+Author: Trung Nghia VU, Kris Laukens and Dirk Valkenborg
+Maintainer: Trung Nghia VU <nghiavtr@gmail.com>
+Description: Integrated NMR spectrum alignment and quantitation based on Hierarchial Clustering Based Peak Alignment (CluPA) 
+Depends: R (>= 3.0.0), MassSpecWavelet
+License: Apache License 2.0
+biocViews: NuclearMagneticResonance, Bioinformatics, Spectrum,
+        Alignment, Quantitation,Proteomics, Clustering,
+        MassSpectrometry, NMR
+Packaged: 2015-01-04 20:26:17 UTC; trungvu
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,6 @@
+importFrom(graphics, abline, contour, hist, legend, lines, par, plot,
+           points, segments, text, title)
+importFrom(stats, fft)
+importFrom(MassSpecWavelet, peakDetectionCWT)
+
+export(detectSpecPeaks,dohCluster,dohClusterCustommedSegments,doShift,drawSpec,findRef,returnLocalMaxima,export2file,findSegPeakList,findShiftStepFFT,hClustAlign,BWR,drawBW,createNullSampling)
diff --git a/R/BWR.R b/R/BWR.R
@@ -0,0 +1,34 @@
+BWR <-function(X, groupLabel)
+{
+  groupNum=length(levels(groupLabel));
+  m=colMeans(X);
+  groupMean=list();
+  groupW=list();
+  B=X;
+  B[,]=0;
+  for (i in 1:groupNum){
+    groupLabeli=which(groupLabel==levels(groupLabel)[i]);
+    Xi=X[groupLabeli,]
+    mi=colMeans(Xi);
+    groupMean[[i]]=mi;
+    tempi=matrix(data=rep(mi,nrow(Xi)),nrow=nrow(Xi),
+        ncol=length(mi),byrow=TRUE);
+
+    Wi=(Xi-tempi)^2; 
+    groupW[[i]]=Wi;
+    temp=matrix(data=rep(m,nrow(Xi)),nrow=nrow(Xi),
+        ncol=length(m),byrow=TRUE);
+    B[groupLabeli,]=(tempi-temp)^2;    
+  }  
+
+  BW=double(ncol(B));
+  for (i in 1:length(BW)){
+    BW_denominator=0;
+    for (j in 1:groupNum){
+      BW_denominator=BW_denominator+sum(groupW[[j]][,i]);
+    }
+    Bw_numerator=sum(B[,i]);
+    BW[i]=Bw_numerator/BW_denominator;    
+  }
+  return(BW); 
+}
diff --git a/R/createNullSampling.R b/R/createNullSampling.R
@@ -0,0 +1,32 @@
+createNullSampling <-function(X, groupLabel, N=1000, 
+      filename=NULL, verbose=TRUE){
+
+  groupNum=length(levels(groupLabel));
+  samplePool=X;  
+  groupMean=list();
+
+  for (i in 1:groupNum){
+    groupLabeli=which(groupLabel==levels(groupLabel)[i]);
+    Xi=X[groupLabeli,]
+    mi=colMeans(Xi);
+    groupMean[[i]]=mi;    
+  }  
+
+  for (i in 1:nrow(samplePool)){
+    samplePool[i,]=
+      X[i,]-groupMean[[which(levels(groupLabel)==groupLabel[i])]];
+  }
+
+  L=nrow(X);    
+  H0=matrix(data=0,nrow=N,ncol=ncol(X));
+
+  for(i in 1 : N){
+    if (verbose) cat(i,"\n");
+    index=sample(L);    
+    H0[i,]=BWR(samplePool[index,],groupLabel);
+  }
+
+  if (!is.null(filename))
+     write.csv(H0,filename,row.names=FALSE,col.names=FALSE);
+  return(H0);
+}
diff --git a/R/detectSpecPeaks.R b/R/detectSpecPeaks.R
@@ -0,0 +1,43 @@
+detectSpecPeaks <-function(X, nDivRange=128, scales=seq(1,16,2), 
+    baselineThresh=50000, SNR.Th=-1, verbose=TRUE){
+nFea=ncol(X);
+nSamp=nrow(X);
+noiseEsp=0.005;
+if (SNR.Th<0) SNR.Th=max(scales) * 0.05; 
+pList=NULL;
+for (i in 1:nSamp){
+  myPeakRes=NULL;
+  mySpec=X[i,];
+  for (k in 1:length(nDivRange)){
+    divR=nDivRange[k];
+    for (j in 1:(trunc(nFea/divR)-3)){
+    startR=(j-1)*divR+1;
+    if (startR>=nFea) startR=nFea;
+    endR=(j+3)*divR;
+    if (endR>nFea) endR=nFea;
+
+    xRange=mySpec[startR:endR];
+    xMean=mean(xRange);
+    xMedian=median(xRange);
+    if ((xMean==xMedian)||abs(xMean-xMedian)/((xMean+xMedian)*2)<noiseEsp){
+      next;
+    }
+    else{
+      peakInfo=
+        peakDetectionCWT(mySpec[startR:endR],scales=scales,SNR.Th=SNR.Th)
+      majorPeakInfo=peakInfo$majorPeakInfo
+      if (length(majorPeakInfo$peakIndex)>0){
+          myPeakRes=c(myPeakRes,majorPeakInfo$peakIndex+startR-1);
+      }
+    }
+    }
+  }
+  pList[i]=list(myPeakRes);
+  pList[[i]]=sort(unique(pList[[i]]));
+  pList[[i]]=pList[[i]][which(X[i,pList[[i]]]>baselineThresh)]
+  pList[[i]]=sort(pList[[i]]);
+  if (verbose) cat("\n Spectrum ",i," has ",length(pList[[i]])," peaks");
+}
+
+return (pList)
+}
diff --git a/R/doShift.R b/R/doShift.R
@@ -0,0 +1,15 @@
+doShift <-function(specSeg, shiftStep){
+	nFea=length(specSeg);
+	newSegment=double(nFea);
+	for (j in 1:nFea)
+	if (shiftStep+j>0&&shiftStep+j<=nFea) 
+		newSegment[shiftStep+j]=specSeg[j];
+	if (shiftStep>0){
+		for (j in 1: shiftStep) newSegment[j]=newSegment[shiftStep+1];
+	}
+	else{
+		for (j in (nFea+shiftStep): nFea) 
+			newSegment[j]=newSegment[(nFea+shiftStep-1)];
+	}
+	return (newSegment);
+}
diff --git a/R/dohCluster.R b/R/dohCluster.R
@@ -0,0 +1,31 @@
+dohCluster <-function(X, peakList, refInd=0, 
+    maxShift =100, acceptLostPeak=TRUE, verbose=TRUE){
+  Y=X;
+  peakListNew=peakList;
+
+  if (verbose) startTime=proc.time();  
+  refSpec=Y[refInd,];		
+  for (tarInd in 1: nrow(X))
+  if (tarInd!=refInd)
+  {
+      if (verbose) cat("\n aligning spectrum ",tarInd);
+      targetSpec=Y[tarInd,];
+      myPeakList=c(peakList[[refInd]],peakList[[tarInd]]);
+      myPeakLabel=double(length(myPeakList));
+      for (i in 1:length(peakList[[refInd]]) ) myPeakLabel[i]=1;
+      startP=1;
+      endP=length(targetSpec);
+      res=hClustAlign(refSpec,targetSpec,myPeakList,myPeakLabel,startP,endP,
+        maxShift=maxShift,acceptLostPeak=acceptLostPeak)
+      Y[tarInd,]=res$tarSpec;
+
+      peakListNew[[tarInd]]=
+        res$PeakList[(length(peakList[[refInd]])+1):length(myPeakList)]
+  }
+  peakList=peakListNew;
+  if (verbose){
+    endTime=proc.time();
+    cat("\n Alignment time: ",(endTime[3]-startTime[3])/60," minutes");
+  }
+  return(Y);
+}
diff --git a/R/dohClusterCustommedSegments.R b/R/dohClusterCustommedSegments.R
@@ -0,0 +1,58 @@
+dohClusterCustommedSegments <-function(X, peakList, refInd, maxShift,
+     acceptLostPeak=TRUE, infoFilename, minSegSize=128,verbose=TRUE){
+
+  myinfo=read.csv(infoFilename,header=TRUE,sep=",");
+  myinfo=as.matrix(myinfo);
+
+  if (myinfo[1,1]>minSegSize) 
+    mysegments=c(1,myinfo[1,1]-1,0,0,0) else mysegments=c();
+  i = 0;
+  if (nrow(myinfo)>1){   
+   for (i in 1:(nrow(myinfo)-1)){
+    mysegments=c(mysegments,c(myinfo[i,]))
+    if (myinfo[i+1,1]-myinfo[i,2]>minSegSize)
+      mysegments=c(mysegments,c(myinfo[i,2]+1,myinfo[i+1,1]-1,0,0,0))
+   }
+  }
+  mysegments=c(mysegments,c(myinfo[i+1,]))  
+
+  if (ncol(X)-myinfo[i+1,2]-1>minSegSize) 
+    mysegments=c(mysegments,c(myinfo[i+1,2]+1,ncol(X),0,0,0))
+
+  mysegments=matrix(data=mysegments,nrow=length(mysegments)/5,
+              ncol=5,byrow=TRUE,dimnames=NULL)
+
+  mysegments[which(mysegments[,4]==0),4]=refInd;
+  mysegments[which(mysegments[,5]==0),5]=maxShift;
+
+  if (sum(mysegments[,3]!=0)==0){
+    cat("\n No segments are set for alignment! Please put 
+        at least 1 row in ",infoFilename, " containing forAlign=1.")
+    return;
+  }
+
+  Y=X;
+
+  for (i in 1:nrow(mysegments))
+  if (mysegments[i,3]!=0)
+  {
+    if (verbose)
+    cat("\n Doing alignment a segment from ",
+        mysegments[i,1]," to ",mysegments[i,2]," ...");
+
+    segmentpeakList=peakList;
+    for (j in 1:length(peakList)){
+      segmentpeakList[[j]]=
+          findSegPeakList(peakList[[j]],mysegments[i,1],mysegments[i,2]);
+    }    
+    Y[,c(mysegments[i,1]:mysegments[i,2])]=
+    dohCluster(X[,c(mysegments[i,1]:mysegments[i,2])],peakList=segmentpeakList,
+        refInd=mysegments[i,4],maxShift =mysegments[i,5],
+        acceptLostPeak=acceptLostPeak, verbose=verbose);    
+  }else{
+    if (verbose)
+      cat("\n The segment ",
+      mysegments[i,1],"-",mysegments[i,2], " is not aligned");
+  }  
+ return(Y)
+}
diff --git a/R/drawBW.R b/R/drawBW.R
@@ -0,0 +1,18 @@
+drawBW <-function(BW, perc, X, startP=-1, endP=-1, groupLabel=NULL,
+	 highBound=-1, lowBound=-1, nAxisPos=4, offside=0){
+  if (startP==-1) startP=1;
+  if (endP==-1) endP=ncol(X);
+  GraphRange<-c(startP:endP); 
+  op=par(mfrow=c(2,1))
+
+  plot(BW[GraphRange],ylim=c(min(c(BW[GraphRange]),perc[GraphRange]),
+    max(c(BW[GraphRange]),perc[GraphRange])),type="n", ylab="BW",
+  xlab="index", xaxt="n")
+  tempVal =trunc(length(GraphRange)/nAxisPos);
+  xPos=c(0:nAxisPos) * tempVal; 
+  axis(1,at=xPos,groupLabel=xPos+startP+offside);
+  lines(BW[GraphRange],col= "blue")
+  lines(perc[GraphRange],col= "black")
+  drawSpec(X,xlab="index",ylab="intensity",startP=startP,endP=endP,groupLabel=groupLabel,
+    offside=offside,highBound=highBound,lowBound=lowBound)
+}
diff --git a/R/drawSpec.R b/R/drawSpec.R
@@ -0,0 +1,64 @@
+drawSpec <-function(X, startP=-1, endP=-1, groupLabel=NULL, useLog=-1,
+	 highBound=-1, lowBound=-1, xlab=NULL, ylab=NULL, main=NULL,
+	 nAxisPos=4, offside=0)
+{
+    groupLabel_name=groupLabel;
+	X=as.data.frame(X);
+	colnames(X)=c(1:ncol(X));
+	X=as.matrix(X);
+	if (highBound!=-1){
+		for (i in 1:nrow(X)){
+			myIndex=which(X[i,]>highBound);
+			X[i,myIndex]=highBound;
+		}		
+	}
+
+	if (lowBound!=-1){
+		for (i in 1:nrow(X)){
+			myIndex=which(X[i,]<lowBound);
+			X[i,myIndex]=lowBound;
+		}		
+	}
+
+	if (is.null(groupLabel)){
+		groupLabel=c(1:nrow(X));
+		groupLabel=as.factor(groupLabel);
+	}else{
+		levels(groupLabel) =c(1:length(levels(groupLabel)))
+	}
+
+	if (startP==-1) startP=1;
+	if (endP==-1) endP=ncol(X);
+
+	if (is.null(xlab)){xlab='index' }
+	if (is.null(ylab)){ylab='intensity' }
+    if (is.null(main)){main=paste(' ',startP+offside,'-',endP+offside)}
+
+	GraphRange<-c(startP:endP); 
+
+	yn<-X[,GraphRange];
+	if (useLog!=-1) yn=log(yn);
+
+	plot(yn[1,],ylim=c(min(yn),max(yn)),type="n",
+	ylab=ylab,
+	xlab=xlab,
+	main=main,
+	xaxt="n" 
+	)
+	tempVal =trunc(length(GraphRange)/nAxisPos);
+	xPos=c(0:nAxisPos) * tempVal; 
+	axis(1,at=xPos,groupLabel=xPos+startP+offside);
+
+	for(i in 1:length(levels(groupLabel))){
+	  groupLabelIdx=which(groupLabel==levels(groupLabel)[i]);
+	  for (j in 1:length(groupLabelIdx)){
+		lines(yn[groupLabelIdx[j],],col=as.integer(levels(groupLabel)[i]))		
+	  }
+	}
+
+	 if (!is.null(groupLabel_name)){
+		 legendPos="topleft";
+		 legend(legendPos,levels(groupLabel_name),col=as.integer(levels(groupLabel)),
+		 text.col="black",pch=c(19,19),bg='gray90')
+	 }
+}
diff --git a/R/export2file.R b/R/export2file.R
@@ -0,0 +1,13 @@
+export2file <-function(X, dirPath="./", fileList="InputFiles.csv",
+	 		saveDirPath="outDir"){
+	dir.create(saveDirPath)
+	FileList=list.files(dirPath);
+	inputFileName=paste(dirPath,fileList,sep="");
+	inputFile=read.csv(inputFileName);
+	outputFile=inputFile[,3];
+	for (i in 1:length(outputFile)){
+		fn=paste(saveDirPath,"/",as.character(outputFile[i]),sep="");
+		write.table(X[i,],fn,append= FALSE,sep=",",
+			col.names=FALSE,row.names=FALSE);
+	}
+}
diff --git a/R/findRef.R b/R/findRef.R
@@ -0,0 +1,22 @@
+findRef <-function(peakList)
+{
+disS=matrix(data=NA,ncol=length(peakList),nrow=length(peakList));
+sumDis=double(length(peakList));
+for(refInd in 1:length(peakList)){
+	for(tarInd in 1:length(peakList))
+	if (refInd!=tarInd)
+	{
+		disS[refInd,tarInd]=0;
+		for (i in 1:length(peakList[[tarInd]]))
+			disS[refInd,tarInd]=disS[refInd,tarInd]+
+				min(abs(peakList[[tarInd]][i]-peakList[[refInd]]));
+	}
+}
+
+for(refInd in 1:length(peakList)){
+	disS[refInd,refInd]=0;
+	sumDis[refInd]=sum(disS[refInd,]);
+}
+orderSumdis=order(sumDis);
+return(list(refInd=orderSumdis[1],orderSpec=orderSumdis));
+}
diff --git a/R/findSegPeakList.R b/R/findSegPeakList.R
@@ -0,0 +1,9 @@
+findSegPeakList <-function(peakList, startP, endP){
+  res=0;  
+  for (i in 1:length(peakList)){    
+    if (peakList[i]>startP&&peakList[i]<endP){    
+      res=c(res,peakList[i]-startP+1);
+    }    
+  }
+  return(res[-1])
+}