In [10]:
suppressMessages(library(ggplot2))
suppressMessages(library(dplyr))
suppressMessages(library(ArchR))
suppressMessages(library(Seurat))
suppressMessages(library(Signac))
suppressMessages(library(writexl))

In [2]:
set.seed(42)
addArchRThreads(threads = 64)

Setting default number of Parallel threads to 64.



In [3]:
proj <- loadArchRProject("./ArchRProject", showLogo = FALSE)

Successfully loaded ArchRProject!



### 提取主要细胞类型差异峰

In [3]:
markers <- readRDS(file = "./ArchRProject/markersPK_majorType.rds")

In [5]:
markerList <- getMarkers(markers, cutOff = "FDR <= 0.05 & Log2FC >=1")

In [17]:
markerList

List of length 9
names(9): B_cell Endothelial Epithelial ... PeriVascular Plasma T_cell

In [None]:
# 假设 markerList 已通过 getMarkers 生成
# markerList 是一个列表，每个元素是一个 GRanges 对象

# 遍历 markerList 中的每个聚类
for (cell_type in names(markerList)) {
# 提取当前聚类的 GRanges 对象
dar_gr <- markerList[[cell_type]]

# 如果当前聚类没有显著区域，跳过
if (length(dar_gr) == 0) {
cat(paste("No significant regions found for", cell_type, "\n"))
next
}

# 转换为数据框
dar_df <- as.data.frame(dar_gr)

# 提取 BED 所需的列并重命名
bed_df <- dar_df[, c("seqnames", "start", "end")]
colnames(bed_df) <- c("chr", "start", "end")

# 将 start 转换为 0-based 坐标系（BED 格式要求）
bed_df$start <- bed_df$start - 1

# 生成输出文件名（例如 cell_type1_DARs.bed）
output_file <- paste0("./output/output_bed/celltype_DAR", cell_type, "_DARs.bed")

# 保存为 BED 文件
write.table(bed_df, file = output_file,
quote = FALSE, sep = "\t",
row.names = FALSE, col.names = FALSE)

# 打印提示信息
cat(paste("BED file saved as:", output_file, "\n"))
}

BED file saved as: ./output/output_bed/celltype_DARB_cell_DARs.bed 
BED file saved as: ./output/output_bed/celltype_DAREndothelial_DARs.bed 
BED file saved as: ./output/output_bed/celltype_DAREpithelial_DARs.bed 
BED file saved as: ./output/output_bed/celltype_DARFibroblast_DARs.bed 
BED file saved as: ./output/output_bed/celltype_DARMast_DARs.bed 
BED file saved as: ./output/output_bed/celltype_DARMyeloid_DARs.bed 
BED file saved as: ./output/output_bed/celltype_DARPeriVascular_DARs.bed 
BED file saved as: ./output/output_bed/celltype_DARPlasma_DARs.bed 
BED file saved as: ./output/output_bed/celltype_DART_cell_DARs.bed 


### 提取主要细胞类型差异基因

In [11]:
markersGS <- readRDS(file = "./ArchRProject/markersGS_majorType.rds")

In [19]:
markerGSList <- getMarkers(markersGS, cutOff = "FDR <= 0.05 & Log2FC >=1.25")

In [20]:
markerGSList

List of length 9
names(9): B_cell Endothelial Epithelial ... PeriVascular Plasma T_cell

In [25]:
markerGSList[["B_cell"]]

DataFrame with 563 rows and 9 columns
      seqnames     start       end    strand        name       idx    Log2FC
         <Rle> <integer> <integer> <integer> <character> <integer> <numeric>
20904     chr6 167149480 167196913         1     TCP10L2      1249   2.66264
22435     chr8  55879813  56014168         1         LYN       355   2.05615
1702      chr1 160796074 160828261         1         LY9      1702   2.90187
20902     chr6 166999405 167139696         1        CCR6      1247   1.88617
8864     chr16  23836001  24220611         1       PRKCB       379   1.90599
...        ...       ...       ...       ...         ...       ...       ...
8743     chr16  11273641  11273218         2        PRM3       258   1.72193
15299    chr21  25583326  25582770         2   LINC00515        55   1.28698
8900     chr16  28637654  28658682         1      NPIPB8       415   1.82882
5175     chr12  10826358  10825317         2     TAS2R10       190   2.34049
4580     chr11  89968502  89975228    

In [36]:
# 通过循环为每个聚类筛选前 100 个基因并生成单独的文件
for (cell_type in names(markerGSList)) {
df <- markerGSList[[cell_type]] # 提取当前聚类的数据框
df_sorted <- df[order(-df$Log2FC), ] # 按 Log2FC 降序排序
top_genes <- df_sorted$name[1:min(100, nrow(df_sorted))] # 取前 100 个基因名称

# 写入表头
write("Gene", file = paste0("./output/output_marker/celltype_marker/", cell_type, "_top_100_genes.txt"))

con <- file(paste0("./output/output_marker/celltype_marker/", cell_type, "_top_100_genes.txt"), "a")

# 将基因列表写入文本文件，每行一个基因
writeLines(top_genes, con)
close(con)
}

# 提示所有操作完成
cat("所有聚类的基因列表文件已生成，可上传至 Metascape 进行分析。\n")

所有聚类的基因列表文件已生成，可上传至 Metascape 进行分析。
