Skip to content

Commit

Permalink
Add corSparse
Browse files Browse the repository at this point in the history
  • Loading branch information
timoast committed Dec 22, 2023
1 parent 67ef04a commit ba9cf7e
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 9 deletions.
5 changes: 1 addition & 4 deletions R/links.R
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,6 @@ LinkPeaks <- function(
gene.id = FALSE,
verbose = TRUE
) {
if (!requireNamespace(package = "qlcMatrix", quietly = TRUE)) {
stop("Please install qlcMatrix: install.packages('qlcMatrix')")
}
if (!inherits(x = object[[peak.assay]], what = "ChromatinAssay")) {
stop("The requested assay is not a ChromatinAssay")
}
Expand All @@ -259,7 +256,7 @@ LinkPeaks <- function(
}
features.match <- c("GC.percent", "count", "sequence.length")
if (method == "pearson") {
cor_method <- qlcMatrix::corSparse
cor_method <- corSparse
} else if (method == "spearman") {
cor_method <- SparseSpearmanCor
} else {
Expand Down
59 changes: 54 additions & 5 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,58 @@ ClosestFeature <- function(
return(df)
}


#' Sparse matrix correlation
#'
#' Compute the Pearson correlation matrix between
#' columns of two sparse matrices.
#'
#' Originally from
#' \url{http://stackoverflow.com/questions/5888287/running-cor-or-any-variant-over-a-sparse-matrix-in-r}
#' and the qlcMatrix package.
#'
#' @param X A matrix
#' @param Y A matrix
#' @param cov return covariance matrix
#' @export
#' @author Michael Cysouw, Karsten Looschen
#' @importMethodsFrom Matrix colMeans
# covmat uses E[(X-muX)'(Y-muY)] = E[X'Y] - muX'muY
# with sample correction n/(n-1) this leads to cov = ( X'Y - n*muX'muY ) / (n-1)
#
# the sd in the case Y!=NULL uses E[X-mu]^2 = E[X^2]-mu^2
# with sample correction n/(n-1) this leads to sd^2 = ( X^2 - n*mu^2 ) / (n-1)
#
# Note that results larger than 1e4 x 1e4 will become very slow, because the resulting matrix is not sparse anymore.
corSparse <- function(X, Y = NULL, cov = FALSE) {
X <- as(object = X, Class = "CsparseMatrix")
n <- nrow(x = X)
muX <- colMeans(x = X)

if (!is.null(x = Y)) {
if (nrow(x = X) != nrow(x = Y)) {
stop("Matrices must contain the same number of rows")
}
Y <- as(object = Y, Class = "CsparseMatrix")
muY <- colMeans(x = Y)
covmat <- ( as.matrix(x = crossprod(x = X, y = Y)) - n * tcrossprod(x = muX, y = muY) ) / (n-1)
sdvecX <- sqrt( (colSums(x = X^2) - n*muX^2) / (n-1) )
sdvecY <- sqrt( (colSums(x = Y^2) - n*muY^2) / (n-1) )
cormat <- covmat / tcrossprod(x = sdvecX, y = sdvecY)
} else {
covmat <- ( as.matrix(crossprod(x = X)) - n * tcrossprod(x = muX) ) / (n-1)
sdvec <- sqrt(x = diag(x = covmat))
cormat <- covmat / tcrossprod(x = sdvec)
}
if (cov) {
dimnames(x = covmat) <- NULL
return(covmat)
} else {
dimnames(x = cormat) <- NULL
return(cormat)
}
}

#' Create gene activity matrix
#'
#' Compute counts per cell in gene body and promoter region.
Expand Down Expand Up @@ -2263,15 +2315,12 @@ SparsifiedRanks <- function(X){
}

SparseSpearmanCor <- function(X, Y = NULL, cov = FALSE) {
if (!requireNamespace(package = "qlcMatrix", quietly = TRUE)) {
stop("Please install qlcMatrix: install.packages('qlcMatrix')")
}
# Get sparsified ranks
rankX <- SparsifiedRanks(X = X)
if (is.null(Y)){
# Calculate pearson correlation on rank matrices
return (qlcMatrix::corSparse(X = rankX, cov = cov))
return (corSparse(X = rankX, cov = cov))
}
rankY <- SparsifiedRanks(X = Y)
return(qlcMatrix::corSparse(X = rankX, Y = rankY, cov = cov))
return(corSparse(X = rankX, Y = rankY, cov = cov))
}

0 comments on commit ba9cf7e

Please sign in to comment.