In [None]:
# use image: scRNAscATAC_20240724

In [1]:
find_elbow <- function(x) {
    saturation <- 0.01
    accum_gap <- 0
    for (i in 2:length(x)) {
        gap <- x[i-1] - x[i]
        accum_gap <- accum_gap + gap
        if (gap < saturation * accum_gap) {
            return(i)
        }
    }
    return(i)
}

In [2]:
library("ArchR")
library("Matrix")
library("rhdf5")

 
path = '/data/work/test_data'

files = c('10x_Brain5k.h5ad','10x_PBMC10k.h5ad','Buenrostro_2018.h5ad','Chen_NBT_2019.h5ad','GSE194122_subset.h5ad','Ma_Cell_2020.h5ad','Trevino_Cell_2021.h5ad','Yao_Nature_2021.h5ad', 'Zemke_2023_human_subset.h5ad', 'Zemke_2023_mouse.h5ad')
methods_all = c('ArchR (TF-logIDF)','ArchR (log(TF-IDF))','ArchR (logTF-logIDF)','Signac (log(TF-IDF))','Signac (TF-logIDF)','Signac (logTF-logIDF)','Signac (IDF)','SnapATAC')

files = c('Zemke_2023_mouse.h5ad')
 
for (method in methods_all){
    for (dataName in files){
        file = H5Fopen(paste0(path,dataName), flags = "H5F_ACC_RDONLY")
        x <- H5Gopen(file, "X")
        data <- sparseMatrix(i = x$indices, p = x$indptr,
            x = c(x$data), index1=F, repr = "C", dims=rev(H5Aread(H5Aopen(x, "shape")))
        )
        
        output <- paste0('/data/work/embedding_results/',method,dataName,"reduced_dim.tsv")
        
        if (method == 'ArchR (TF-logIDF)'){ 
            set.seed(2022)
            result <- ArchR:::.computeLSI(mat = data,
                LSIMethod = 1,
                scaleTo = 10^4,
                nDimensions = 30,
                binarize = TRUE, 
                outlierQuantiles = NULL,
                seed = 1
            )
            
            write.table(result$matSVD, file=output, row.names=F, col.names=F, sep="\t")
        } else if (method ==  'ArchR (log(TF-IDF))'){ 
            set.seed(2022)
            result <- ArchR:::.computeLSI(mat = data,
                LSIMethod = 2,
                scaleTo = 10^4,
                nDimensions = 30,
                binarize = TRUE, 
                outlierQuantiles = NULL,
                seed = 1
            )
            
            write.table(result$matSVD, file=output, row.names=F, col.names=F, sep="\t")
        } else if (method == 'ArchR (logTF-logIDF)') { 
            set.seed(2022)
            result <- ArchR:::.computeLSI(mat = data,
                LSIMethod = 3,
                scaleTo = 10^4,
                nDimensions = 30,
                binarize = TRUE, 
                outlierQuantiles = NULL,
                seed = 1
            ) 
            write.table(result$matSVD, file=output, row.names=F, col.names=F, sep="\t")
        } else if (method == 'Signac (log(TF-IDF))') {
 
            set.seed(2022)             
            result <- Signac:::RunTFIDF.default(data,method = 1)
            result <- Signac:::RunSVD.default(result, n = 30) 
            i <- find_elbow(result@stdev)
            write.table(result@cell.embeddings[, 1:i], file=output, row.names=F, col.names=F, sep="\t")
        } else if (method == 'Signac (TF-logIDF)') {
 
            set.seed(2022) 
            result <- Signac:::RunTFIDF.default(data,method = 2)
            result <- Signac:::RunSVD.default(result, n = 30) 
            i <- find_elbow(result@stdev)
            write.table(result@cell.embeddings[, 1:i], file=output, row.names=F, col.names=F, sep="\t")

        } else if (method == 'Signac (logTF-logIDF)') {
 
            set.seed(2022) 
            result <- Signac:::RunTFIDF.default(data,method = 3)
            result <- Signac:::RunSVD.default(result, n = 30) 
            i <- find_elbow(result@stdev)
            write.table(result@cell.embeddings[, 1:i], file=output, row.names=F, col.names=F, sep="\t")

        } else if (method == 'Signac (IDF)') {   
      
            set.seed(2022) 
            result <- Signac:::RunTFIDF.default(data,method = 4)
            result <- Signac:::RunSVD.default(result, n = 30) 
            i <- find_elbow(result@stdev)
            write.table(result@cell.embeddings[, 1:i], file=output, row.names=F, col.names=F, sep="\t") 
        } else if (method ==  'SnapATAC'){
          
            set.seed(2022) 
            if (nrow(data) <= 20000) {
                x.sp <- SnapATAC::newSnap()
                x.sp@bmat <- data
                x.sp <- SnapATAC::makeBinary(x.sp, mat="bmat")
                x.sp <- SnapATAC::runDiffusionMaps(
                        obj=x.sp,
                        input.mat="bmat", 
                        num.eigs=30
                    )
                result <- SnapATAC:::weightDimReduct(x.sp@smat, 1:30, weight.by.sd=T)
            } else {
                sample_size <- 20000
                n_dim <- min(sample_size - 2, 30)
                reference <- data[sample(nrow(data),size=sample_size, replace=F),]

                x.ref <- SnapATAC::newSnap()
                x.ref@bmat <- reference
                x.ref <- SnapATAC::makeBinary(x.ref, mat="bmat")
                x.ref <- SnapATAC::runDiffusionMaps(
                    obj=x.ref,
                    input.mat="bmat", 
                    num.eigs=n_dim
                )

                x.sp <- SnapATAC::newSnap()
                x.sp@bmat <- data
                x.sp <- SnapATAC::makeBinary(x.sp, mat="bmat")
                x.sp <- SnapATAC::runDiffusionMapsExtension(
                    obj1 = x.ref,
                    obj2 = x.sp,
                    input.mat="bmat"
                )

                result <- SnapATAC:::weightDimReduct(x.sp@smat, 1:30, weight.by.sd=T)
            }

            write.table(result, file=output, row.names=F, col.names=F, sep="\t")
        }
    }
}


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _  \     
         /  ^  \    |  |_) 