In [1]:
suppressMessages(library(ArchR))
suppressMessages(library(Seurat))
suppressMessages(library(Signac))
suppressMessages(library(cowplot))
suppressMessages(library(ggplot2))
suppressMessages(library(dplyr))

In [2]:
# set parameters
set.seed(42)
addArchRThreads(threads = parallel::detectCores() - 2)
addArchRGenome("hg38")

Setting default number of Parallel threads to 126.

Setting default genome to Hg38.



In [3]:
proj <- loadArchRProject("../../../snATAC/DataIntegration/data/VisiumHeart", showLogo = FALSE)

Successfully loaded ArchRProject!



In [4]:
df <- read.csv("../../../snATAC/DataIntegration/data/VisiumHeart/metadata.csv",
              row.names = 1)

head(df)

Unnamed: 0_level_0,orig.ident,nCount_peaks,nFeature_peaks,Sample,TSSEnrichment,ReadsInTSS,ReadsInPromoter,ReadsInBlacklist,PromoterRatio,PassQC,⋯,seurat_clusters,cell_type,condition,region,patient_group,global_id,rep,patient,region_novel,patient_id
Unnamed: 0_level_1,<chr>,<int>,<int>,<chr>,<dbl>,<int>,<int>,<int>,<dbl>,<int>,⋯,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>
CK171#AGGCGTCCACCATTCC-1,CK171,62095,34713,CK171,6.245,14669,18011,1136,0.09025808,1,⋯,15,CM,not_defined,BZ,group_1,AKK003 No4 borderzone,1,P2,RZ/BZ,RZ/BZ_P2
CK171#TGATCAGAGGTAAGTT-1,CK171,90454,46242,CK171,8.331,24600,26536,912,0.13673276,1,⋯,3,CM,not_defined,BZ,group_1,AKK003 No4 borderzone,1,P2,RZ/BZ,RZ/BZ_P2
CK171#TAGCATGCAAGTCTCA-1,CK171,73832,39097,CK171,9.02,21378,23169,1163,0.1227887,1,⋯,15,CM,not_defined,BZ,group_1,AKK003 No4 borderzone,1,P2,RZ/BZ,RZ/BZ_P2
CK171#TACATTCCAAACCTAC-1,CK171,90941,46789,CK171,8.8,23434,25827,912,0.13702053,1,⋯,3,CM,not_defined,BZ,group_1,AKK003 No4 borderzone,1,P2,RZ/BZ,RZ/BZ_P2
CK171#CACCACTGTCGCTAGC-1,CK171,76195,40207,CK171,9.127,21954,23478,1032,0.12531759,1,⋯,15,CM,not_defined,BZ,group_1,AKK003 No4 borderzone,1,P2,RZ/BZ,RZ/BZ_P2
CK171#GTCACCTAGGAAGGTA-1,CK171,79884,42256,CK171,8.614,22565,25028,956,0.13525286,1,⋯,15,CM,not_defined,BZ,group_1,AKK003 No4 borderzone,1,P2,RZ/BZ,RZ/BZ_P2


In [5]:
unique(df$cell_type)

In [6]:
df_sub <- subset(df, cell_type == "Lymphoid")

In [8]:
nrow(df_sub)

In [9]:
df_cell_count <- df_sub %>%
    group_by(Sample) %>%
    summarise(count = n()) %>%
    arrange(desc(count)) %>%
    subset(count > 10)

df_cell_count

Sample,count
<chr>,<int>
CK382,197
CK383,99
CK340,92
CK351,86
CK354,75
CK339,46
CK343,42
CK349,42
CK350,39
CK380,37


In [10]:
# we dont want to include the samples with too few cells
df_sub <- subset(df_sub, Sample %in% df_cell_count$Sample)

In [9]:
proj <- subsetArchRProject(proj, 
                           cells = rownames(df_sub),
                           outputDirectory = "../data/snATAC",
                           force = TRUE)
proj <- addTileMatrix(proj,
                      force = TRUE)

proj <- addGeneScoreMatrix(proj,
                           force = TRUE)

Copying ArchRProject to new outputDirectory : /data/scATA/SingleCellOpenChromatin/local/VisiumHeartRevision/IntegrativeAnalysis/Fibroblast/data/snATAC

Copying Arrow Files...



In [None]:
## save peak matrix
peakMatrix <- getMatrixFromProject(proj,
                                   useMatrix = "PeakMatrix")

peak_counts <- peakMatrix@assays@data$PeakMatrix
df_rangers <- as.data.frame(peakMatrix@rowRanges@ranges)

rownames(peak_counts) <- paste(peakMatrix@rowRanges@seqnames,
                          df_rangers$start,
                          df_rangers$end,
                          sep = "_") 

saveRDS(peak_counts, file = "../data/snATAC/PeakMatrix.Rds")

## save gene matrix
atac <- getMatrixFromProject(ArchRProj = proj,
                             useMatrix = "GeneScoreMatrix")

gene_counts <- atac@assays@data$GeneScoreMatrix
rownames(gene_counts) <- atac@elementMetadata$name

saveRDS(gene_counts, file = "../data/snATAC/GeneScoreMatrix.Rds")

In [None]:
saveArchRProject(ArchRProj = proj, 
                 load = FALSE)

In [None]:
sessionInfo()