In [1]:
suppressMessages(library(ArchR))
suppressMessages(library(Seurat))
suppressMessages(library(Signac))
suppressMessages(library(harmony))
suppressMessages(library(dplyr))
suppressMessages(library(cowplot))
suppressMessages(library(harmony))
suppressMessages(library(Nebulosa))
suppressMessages(library(ggpubr))
suppressMessages(library(Ipaper))
suppressMessages(library(phateR))
suppressMessages(library(parallel))

In [2]:
set.seed(42)
getwd()

addArchRThreads(threads = parallel::detectCores() - 2)
addArchRGenome("hg38")

Setting default number of Parallel threads to 126.

Setting default genome to Hg38.



In [3]:
df.matching <- readRDS("../data/coembed/ATAC_RNA_matching.rds")
head(df.matching)

Unnamed: 0_level_0,ATAC,RNA,cell_name
Unnamed: 0_level_1,<chr>,<chr>,<chr>
1,CK171#ATGTTTCTCACTTACT-1,ATTTCTGAGCGTTAGG-1_1_1_1_1_1_1_1_1_1_1_1_1,cell-1
2,CK171#CCAGAATAGTCGCCTG-1,CAATACGGTCAAATCC-1_2_1_1_1_1,cell-2
3,CK171#CCCAGAGCATTTAGGC-1,AGGAAATTCTCCGATC-1_2_1_1_1_1_1,cell-3
4,CK171#ACTTCCGGTTAGTAGA-1,ACCACAATCTCGCGTT-1_2_1_1_1_1_1_1_1_1_1_1_1_1_1,cell-4
5,CK171#TACATGGGTTCAGTTG-1,TGATCAGTCCGCTTAC-1_2,cell-5
6,CK171#CTTCTAATCTATCCTA-1,CTCTGGTTCCACGTAA-1_2_1_1,cell-6


In [4]:
coembed <- readRDS("../data/coembed/coembed.annotation.Rds")

In [8]:
obj.atac <- coembed[, df.matching$ATAC]
obj.atac

An object of class Seurat 
135323 features across 4937 samples within 3 assays 
Active assay: RNA (28933 features, 2000 variable features)
 2 other assays present: peaks, GeneActivity
 5 dimensional reductions calculated: pca, umap, harmony, umap_harmony, umap_harmony_v2

In [None]:
## subset atac project
proj <- loadArchRProject("../data/snATAC", showLogo = FALSE)

proj <- subsetArchRProject(proj, 
                           cells = colnames(obj.atac),
                           outputDirectory = "../data/snATAC_subset",
                           force = TRUE)

In [None]:
proj

In [None]:
## add cell states information
meta.data <- as.data.frame(obj.atac@meta.data)
meta.data <- meta.data[proj@cellColData@rownames, ]
annotation <- meta.data$annotation

proj <- addCellColData(proj, 
                       data = as.character(annotation), 
                        cells = rownames(meta.data),
                       name = "annotation", 
                       force = TRUE)

In [None]:
## add harmony
embedding <- obj.atac@reductions$harmony@cell.embeddings
embedding <- embedding[rownames(proj), ]

proj@reducedDims[["Harmony"]] <- SimpleList(matDR = as.data.frame(embedding),
                                      params = NULL,
                                           date = Sys.time(),
    scaleDims = NA, #Do not scale dims after
    corToDepth = NA)

In [None]:
## add umap
embedding <- obj.atac@reductions$umap_harmony_v2@cell.embeddings
embedding <- embedding[rownames(proj), ]
colnames(embedding) <- c("Harmony#UMAP_Dimension_1",
                         "Harmony#UMAP_Dimension_2")

proj@embeddings[["umap"]] <- SimpleList(df = as.data.frame(embedding),
                                      params = NULL)

In [None]:
## peak calling for each annotated cell state
pathToMacs2 <- findMacs2()

proj <- addGroupCoverages(ArchRProj = proj, groupBy = "annotation", force = TRUE)

proj <- addReproduciblePeakSet(
    ArchRProj = proj, 
    groupBy = "annotation", 
    pathToMacs2 = pathToMacs2
)

proj <- addPeakMatrix(proj, binarize = TRUE, force = TRUE)

In [None]:
## add chromVAR
proj <- addMotifAnnotations(ArchRProj = proj, motifSet = "JASPAR2020", name = "Motif",
                           force = TRUE)

proj <- addBgdPeaks(proj, force = TRUE)

proj <- addDeviationsMatrix(
  ArchRProj = proj, 
  peakAnnotation = "Motif",
  force = TRUE,
    binarize = TRUE
)

In [None]:
## add gene expression
df.matching.sub <- df.matching %>%
    dplyr::filter(ATAC %in% colnames(obj.atac))

obj.rna <- subset(coembed[, df.matching.sub$RNA])

obj.rna

geneMatrix <- getMatrixFromProject(ArchRProj = proj,
                                   useMatrix = "GeneScoreMatrix")

gex.mat <- as.matrix(obj.rna@assays$RNA@counts)
colnames(gex.mat) <- df.matching.sub$ATAC

rowRanges <- rowData(geneMatrix)
sel_genes <- intersect(rownames(gex.mat), rowRanges$name)

length(sel_genes)

gex.mat <- gex.mat[sel_genes, ]
rownames(rowRanges) <- rowRanges$name
rowRanges <- rowRanges[sel_genes, ]

rowRanges <- GRanges(rowRanges$seqnames,
                     IRanges(start = as.numeric(rowRanges$start),
                             end = as.numeric(rowRanges$start) + 1))

seRNA <- SummarizedExperiment(assays = SimpleList(counts = gex.mat),
                              rowRanges = rowRanges)

proj <- addGeneExpressionMatrix(proj,
                                seRNA = seRNA,
                                force = TRUE)

In [None]:
saveRDS(obj.atac, file = "../data/snATAC_subset/snATAC.Rds")

saveArchRProject(ArchRProj = proj, 
                 load = FALSE)

In [None]:
sessionInfo()