-
Notifications
You must be signed in to change notification settings - Fork 2
/
fastSVD.R
45 lines (40 loc) · 1.57 KB
/
fastSVD.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#' fastSVD
#'
#' Perform SVD. The input datasets are the already preprocessed (filtering) and global-scale-nomalized datasets from Seurat.
#'
#'
#' @author Mengjie Chen, Qi Zhan
#' @param samples.list A list of input datasets preprocessed by Seurat.
#' @param nPC Total number of PCs to compute and store (30 by default).
#' @return A list consists of: PCs; Loadings; Centers (centers of samples before SVD); batch.id.forPC (batch id of cells in the samples for SVD); Raw representing the input for SVD; cells are the cell names for cells that are included and used for SVD; genes are the gene names for genes that are included and used for SVD.
#' @export
fastSVD <- function(samples.list, nPC = 30){
samples <- samples.list[[1]]
batch.id <- rep(1,ncol(samples))
for (i in 2:length(samples.list)) {
sample <- samples.list[[i]]
samples <- cbind(samples, sample)
batch.id <- c(batch.id, rep(i, ncol(sample)))
}
names(batch.id)<-colnames(samples)
cells<-colnames(samples)
genes<-rownames(samples)
require(irlba)
# center samples before SVD
xx<-samples
yy <- apply(xx, 1, function(x){
x - mean(x)
})
svd.base <- irlba(yy, nv = nPC)
centers <- apply(xx, 1, function(x){
mean(x)
})
PCs <- svd.base$u%*%diag(svd.base$d[1:nPC])
Loadings <- svd.base$v
rownames(PCs)<-cells
colnames(PCs)<-paste0("PC", seq_along(1:nPC))
rownames(Loadings)<-genes
colnames(Loadings)<-paste0("PC", seq_along(1:nPC))
PCA <- list(PCs = PCs, Loadings = Loadings, Centers = centers, batch.id.forPC=batch.id, Raw=yy, cells=cells, genes=genes)
return(PCA)
}