In [7]:
suppressMessages(library(GenomicRanges))
suppressMessages(library(GenomeInfoDb))
suppressMessages(library(data.table))
suppressMessages(library(ggplot2))
suppressMessages(library(ArchR))
suppressMessages(library(Seurat))
suppressMessages(library(Signac))

In [8]:
set.seed(42)
addArchRThreads(threads = 64)

Setting default number of Parallel threads to 64.



In [9]:
proj <- loadArchRProject("./ArchRProject_epithelial", showLogo = FALSE)

Successfully loaded ArchRProject!



In [10]:
dataPath = "/data/hanxue/BCY_ATAC/scATAC/fragments"
batches <- c("10T", "10N", "11T", "12N", "12T", "13T", "18T", "19T", "20T", "21T", "22T")

In [11]:
gr_list <- list()

for (batch in batches) {
    print(paste0(dataPath, "/", batch,"/", "fragments.tsv.gz"))
    df <- fread(paste0(dataPath, "/", batch,"/", "fragments.tsv.gz"), sep = "\t", header = FALSE)
    colnames(df) <- c("chr", "start", "end", "barcode", "read_count")
    metadata <- as.data.frame(proj@cellColData)
    cell_barcodes <- rownames(metadata)[metadata$Sample == batch]
    cell_barcodes <- sub("^[^#]+#", "", cell_barcodes)
    cells <- df[barcode %in% cell_barcodes]
    gr <- GRanges(
        seqnames = cells$chr,
        ranges = IRanges(start = cells$start, end = cells$end),
        RG = cells$barcode,
        read_count = cells$read_count
    )
    gr
    gr_list[[batch]] <- gr
}
gr_list

[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/10T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/10N/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/11T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/12N/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/12T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/13T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/18T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/19T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/20T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/21T/fragments.tsv.gz"
[1] "/data/hanxue/BCY_ATAC/scATAC/fragments/22T/fragments.tsv.gz"


$`10T`
GRanges object with 175311175 ranges and 2 metadata columns:
                seqnames      ranges strand |                 RG read_count
                   <Rle>   <IRanges>  <Rle> |        <character>  <integer>
          [1]       chr1  9989-10012      * | TTCTGTACATGGTTTG-1          1
          [2]       chr1 10103-10277      * | CCAGATATCTTGGCCT-1          2
          [3]       chr1 10151-10204      * | TAAACCGGTTGGAGGT-1          1
          [4]       chr1 10163-10210      * | GTGATCAGTGGAAAGA-1          2
          [5]       chr1 10221-10307      * | TGAGTCAAGACTAGGC-1          2
          ...        ...         ...    ... .                ...        ...
  [175311171] KI270713.1 34589-34746      * | CTTGAAGCATTGTGAC-1          1
  [175311172] KI270713.1 34608-34980      * | ATGTCGAGTCATGAGG-1          1
  [175311173] KI270713.1 36307-36360      * | TGTAAGCAGTCGCCTG-1          2
  [175311174] KI270713.1 37672-37840      * | AACAAAGAGAGCTCCC-1          1
  [175311175] KI2707

In [12]:
all_gr <- c(gr_list[["11T"]], gr_list[["12N"]], gr_list[["12T"]], gr_list[["13T"]], gr_list[["19T"]])
all_gr

GRanges object with 103223445 ranges and 2 metadata columns:
                seqnames      ranges strand |                 RG read_count
                   <Rle>   <IRanges>  <Rle> |        <character>  <integer>
          [1]       chr1 10086-10346      * | CGTACAAAGTGTTCCA-1          1
          [2]       chr1 13283-13362      * | GAGTGAGGTGGCATAG-1          3
          [3]       chr1 47091-47138      * | GCTGAGCAGGCCTAAG-1          1
          [4]       chr1 55970-56010      * | GAGGATGTCGGGAAAC-1          2
          [5]       chr1 64822-64979      * | CAAAGCTCAGGCATTT-1          1
          ...        ...         ...    ... .                ...        ...
  [103223441] KI270713.1 34128-34171      * | GACTAGTAGGTGTTGG-1          3
  [103223442] KI270713.1 36867-36905      * | TAAGCCACAACGGGTA-1          6
  [103223443] KI270713.1 36872-36916      * | CAACCAAGTACGAGAC-1          1
  [103223444] KI270713.1 36877-36928      * | GAGCGCTGTTATCGAC-1          1
  [103223445] KI270713.1 37

In [13]:
saveRDS(all_gr, file = "./ArchRProject_epithelial/epithelial_gr.rds")