In [1]:
suppressMessages(library(GenomicRanges))
suppressMessages(library(GenomeInfoDb))
suppressMessages(library(data.table))
suppressMessages(library(ggplot2))
suppressMessages(library(ArchR))
suppressMessages(library(Seurat))
suppressMessages(library(Signac))

In [2]:
set.seed(42)
addArchRThreads(threads = 64)

Setting default number of Parallel threads to 64.



In [3]:
proj <- loadArchRProject("./ArchRProject_T", showLogo = FALSE)

Successfully loaded ArchRProject!



In [4]:
dataPath = "/data/hanxue/BCY_ATAC/scATAC/fragments"
batches <- c("10T", "10N", "11T", "12N", "12T", "13T", "18T", "19T", "20T", "21T", "22T")

In [5]:
gr_list <- list()
all_fragments <- data.table(chr = character(), start = integer(), end = integer(), barcode = character(), read_count = integer())

for (batch in batches) {
    print(paste0(dataPath, "/", batch,"/", "fragments.tsv.gz"))
    df <- fread(paste0(dataPath, "/", batch,"/", "fragments.tsv.gz"), sep = "\t", header = FALSE)
    colnames(df) <- c("chr", "start", "end", "barcode", "read_count")
    metadata <- as.data.frame(proj@cellColData)
    cell_barcodes <- rownames(metadata)[metadata$Sample == batch]
    cell_barcodes <- sub("^[^#]+#", "", cell_barcodes)
    cells <- df[barcode %in% cell_barcodes]
    cells$barcode <- paste(batch, sep = "#", cells$barcode)
    all_fragments <- rbind(all_fragments, cells)
    gr <- GRanges(
        seqnames = cells$chr,
        ranges = IRanges(start = cells$start, end = cells$end),
        RG = cells$barcode,
        read_count = cells$read_count
    )
    gr
    gr_list[[batch]] <- gr
}
gr_list

[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/10T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/10N/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/11T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/12N/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/12T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/13T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/18T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/19T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/20T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/21T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/22T/fragments.tsv.gz"


$`10T`
GRanges object with 770403 ranges and 2 metadata columns:
             seqnames        ranges strand |                     RG read_count
                <Rle>     <IRanges>  <Rle> |            <character>  <integer>
       [1]       chr1 180885-180948      * | 10T#GTCCATCAGTGCTCGC-1          3
       [2]       chr1 267965-268016      * | 10T#GTCTACCCATTACACG-1          3
       [3]       chr1 267971-268031      * | 10T#TAGTCCCGTCTGTTGA-1          3
       [4]       chr1 633996-634061      * | 10T#ACTGCGGCATTATGGC-1          1
       [5]       chr1 634016-634068      * | 10T#AGGCCTGAGAAGACTT-1          3
       ...        ...           ...    ... .                    ...        ...
  [770399] KI270713.1   22160-22221      * | 10T#CCCACATTCACGATTG-1          1
  [770400] KI270713.1   26721-26766      * | 10T#TAACAGCCAGGTGTGA-1          1
  [770401] KI270713.1   28124-28330      * | 10T#TGGAAGGAGACTTGAA-1          1
  [770402] KI270713.1   29096-29211      * | 10T#TGGACATGTTTCACCC-

In [6]:
all_gr <- c(gr_list[["11T"]], gr_list[["12N"]], gr_list[["12T"]], gr_list[["13T"]], gr_list[["19T"]])
all_gr

GRanges object with 57059141 ranges and 2 metadata columns:
               seqnames      ranges strand |                     RG read_count
                  <Rle>   <IRanges>  <Rle> |            <character>  <integer>
         [1]       chr1 10132-10327      * | 11T#TATCGAGGTTGCCTGG-1          1
         [2]       chr1 10151-10192      * | 11T#ACAAGCTGTTCCTATT-1          3
         [3]       chr1 10151-10196      * | 11T#CCAGAATAGAATATCG-1          1
         [4]       chr1 10163-10192      * | 11T#GTGACATGTGAGTCGA-1          1
         [5]       chr1 10169-10209      * | 11T#GCTTTCGGTTCCTATT-1          1
         ...        ...         ...    ... .                    ...        ...
  [57059137] KI270713.1 31656-31700      * | 19T#CCTATTATCTAAACGC-1          4
  [57059138] KI270713.1 32364-32391      * | 19T#ACAGCGCTCTTATCAC-1          1
  [57059139] KI270713.1 32390-32584      * | 19T#TCCGACTAGTCACGCC-1          1
  [57059140] KI270713.1 32719-32888      * | 19T#AAATGAGAGAGAGTTT-1    

In [7]:
# 将合并后的所有 cells 写入一个 fragments.tsv 文件中
fwrite(all_fragments, "./ArchRProject_T/T_fragments.tsv", sep = "\t", col.names = FALSE)

In [8]:
# 保存合并后的 GRanges 对象（所有样本中的上皮细胞gr）
saveRDS(all_gr, file = "./ArchRProject_T/T_gr.rds")