In [11]:
library(Signac)
library(Seurat)
library(EnsDb.Hsapiens.v86)
library(SeuratDisk)
library(dplyr)
library(BSgenome.Hsapiens.UCSC.hg38)

In [None]:
ifnb <- readRDS("pd_processed_rna+atac.rds")

In [13]:
# ifnb <- subset(ifnb, celltype == "microglia")
# ifnb <- subset(ifnb, celltype == "excitatory neurons")
ifnb <- subset(ifnb, celltype == "inhibitory neurons")

In [14]:
seqlevels(Annotation(ifnb))

In [15]:
DefaultAssay(ifnb) <- "ATAC"
# first compute the GC content for each peak
ifnb <- RegionStats(ifnb, genome = BSgenome.Hsapiens.UCSC.hg38)

"Not all seqlevels present in supplied genome"


In [None]:
# idents.plot <- c('microglia')
# idents.plot <- c('excitatory neurons')
idents.plot <- c('inhibitory neurons')
Idents(ifnb) <- idents.plot

In [7]:
ifnb

An object of class Seurat 
283535 features across 3667 samples within 3 assays 
Active assay: ATAC (216774 features, 207802 variable features)
 2 layers present: counts, data
 2 other assays present: RNA, SCT
 2 dimensional reductions calculated: pca, lsi

In [None]:
total_genes <- c()
library(readxl)
library(dplyr)

# total_genes <- readLines("../../Signac_plots/saved_files/mic_total_genes.txt")
# total_genes <- readLines("../../Signac_plots/saved_files/exn_rorb_total_genes.txt")
total_genes <- readLines("../../Signac_plots/saved_files/inh_subset_total_genes.txt")


# 1.2 Pull out the links that LinkPeaks() created
## each element of the GRanges object is one peak; the linked gene lives in metadata
ifnb <- LinkPeaks(
  object = ifnb,
  peak.assay = "ATAC",
  expression.assay = "SCT",
  genes.use = total_genes
)

peak_gene_links <- Links(ifnb[["ATAC"]])
df <- as.data.frame(peak_gene_links)

links_df <- as_tibble(df) |>
  transmute(
    chrom      = seqnames,          # chromosome
    chromStart = start              # 0-based for BED
      - 1L,                         # Signac stores 1-based; BED wants 0-based half-open
    chromEnd   = end,
    gene       = gene,
    score      = score              # optional
  )

links_df <- links_df |>
  mutate(peak_id = paste0(chrom, ":", chromStart, "-", chromEnd))

library(tidyverse)
# write_tsv(links_df,  "../../Signac_plots/saved_files/PD_gene_peak_links_mic.tsv")
# write_tsv(links_df,  "../../Signac_plots/saved_files/PD_gene_peak_links_exn_rorb.tsv")
write_tsv(links_df,  "../../Signac_plots/saved_files/PD_gene_peak_links_inh_subset.tsv")


peaks_df <- links_df %>% 
  select(chrom, chromStart, chromEnd, peak_id)

# readr::write_tsv(
#   peaks_df, 
#   "../../Signac_plots/saved_files/PD_peaks_mic.bed", 
#   col_names = FALSE
# )
# readr::write_tsv(
#   peaks_df, 
#   "../../Signac_plots/saved_files/PD_peaks_exn_rorb.bed", 
#   col_names = FALSE
# )
readr::write_tsv(
  peaks_df, 
  "../../Signac_plots/saved_files/PD_peaks_inh_subset.bed", 
  col_names = FALSE
)

tfs <- read.table("../../Signac_plots/saved_files/all_tfs.txt", stringsAsFactors = FALSE)[,1]

DefaultAssay(ifnb) <- "SCT"
keep_genes <- union(VariableFeatures(ifnb), tfs)
ifnb_subset <- subset(ifnb, features = keep_genes)

# SaveLoom(
#   object   = ifnb_subset,
#   filename = "../../Signac_plots/saved_files/pd_mic_all_tfs_rna_MIC.loom",
#   assay    = "SCT",
#   slot     = "data"
# )
# SaveLoom(
#   object   = ifnb_subset,
#   filename = "../../Signac_plots/saved_files/pd_exn_all_tfs_rna_EXN_RORB.loom",
#   assay    = "SCT",
#   slot     = "data"
# )
SaveLoom(
  object   = ifnb_subset,
  filename = "../../Signac_plots/saved_files/pd_inh_all_tfs_rna_INH_SUBSET.loom",
  assay    = "SCT",
  slot     = "data"
)

Testing 1540 genes and 116887 peaks

Found gene coordinates for 1102 genes

"Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': GL000009.2, GL000194.1, GL000195.1, GL000205.2, GL000218.1, GL000219.1, KI270711.1, KI270713.1, KI270727.1, KI270728.1, KI270731.1, KI270734.1, KI270721.1, KI270726.1
  - in 'y': chrMT
  Make sure to always combine/compare objects based on the same reference
"package 'tidyverse' was built under R version 4.3.3"
"package 'ggplot2' was built under R version 4.3.3"
"package 'tibble' was built under R version 4.3.3"
"package 'tidyr' was built under R version 4.3.3"
"package 'readr' was built under R version 4.3.3"
"package 'purrr' was built under R version 4.3.3"
"package 'stringr' was built under R version 4.3.3"
"package 'forcats' was built under R version 4.3.3"
"package 'lubridate' was built under R version 4.3.3"
── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [