In [1]:
library(Signac)
library(Seurat)
library(EnsDb.Hsapiens.v86)
library(SeuratDisk)
library(dplyr)
library(BSgenome.Hsapiens.UCSC.hg38)

“package ‘Signac’ was built under R version 4.3.3”
“package ‘Seurat’ was built under R version 4.3.3”
Loading required package: SeuratObject

“package ‘SeuratObject’ was built under R version 4.3.3”
Loading required package: sp

“package ‘sp’ was built under R version 4.3.3”

Attaching package: ‘SeuratObject’


The following objects are masked from ‘package:base’:

    intersect, t


Loading required package: ensembldb

“package ‘ensembldb’ was built under R version 4.3.2”
Loading required package: BiocGenerics

“package ‘BiocGenerics’ was built under R version 4.3.2”

Attaching package: ‘BiocGenerics’


The following object is masked from ‘package:SeuratObject’:

    intersect


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect,

In [None]:
ifnb <- readRDS("hc_processed_rna+atac.rds")

In [3]:
ifnb <- subset(ifnb, celltype == "excitatory neurons")

In [4]:
DefaultAssay(ifnb) <- "ATAC"
# first compute the GC content for each peak
ifnb <- RegionStats(ifnb, genome = BSgenome.Hsapiens.UCSC.hg38)

"Not all seqlevels present in supplied genome"


In [None]:
idents.plot <- c('excitatory neurons')
Idents(ifnb) <- idents.plot

In [7]:
ifnb

An object of class Seurat 
333440 features across 7739 samples within 3 assays 
Active assay: ATAC (265980 features, 247295 variable features)
 2 layers present: counts, data
 2 other assays present: RNA, SCT
 2 dimensional reductions calculated: pca, lsi

### Helper

In [6]:
library(ggplot2)
save_link_counts <- function(ifnb, gene_list, file_prefix){
    # link peaks to genes
    ifnb <- LinkPeaks(
      object = ifnb,
      peak.assay = "ATAC",
      expression.assay = "SCT",
      genes.use = gene_list
    )

    peak_gene_links <- Links(ifnb[["ATAC"]])
    df <- as.data.frame(peak_gene_links)

    gene_link_counts <- df %>%
      filter(pvalue < 0.05) %>%
      group_by(gene) %>%
      summarise(link_count = n())

    write.csv(gene_link_counts, paste0(file_prefix, "gene_link_counts.csv"), row.names = FALSE)
    
    # Return both the original df and gene_link_counts as a list
    return(list(df = df, gene_link_counts = gene_link_counts))
}

"package 'ggplot2' was built under R version 4.3.3"


In [13]:
library(Signac)
library(patchwork)   # lets us manipulate the assembled plot
library(ggplot2)

plot_coverage <- function(ifnb, gene_list, file_prefix,
                          idents.plot = NULL,
                          extend.up = 5e4, extend.down = 5e5) {

  DefaultAssay(ifnb) <- "ATAC"
  ifnb <- LinkPeaks(ifnb,
                    peak.assay       = "ATAC",
                    expression.assay = "SCT",
                    genes.use        = gene_list)

  for (g in gene_list) {
    message("Plotting ", g)

    tryCatch({

      cov <- CoveragePlot(
             ifnb,
             region            = g,
             features          = g,
             expression.assay  = "SCT",
             links             = FALSE,
             link.assay        = "ATAC",
             idents            = idents.plot,
             extend.upstream   = extend.up,
             extend.downstream = extend.down
           )
       arc  <- LinkPlot(ifnb, region = g, assay = "ATAC",
                  extend.upstream = 5e4, extend.downstream = 5e5) +
       scale_colour_gradient2(low = "blue", mid = "grey",
                               high = "#ca0020", midpoint = 0,
                               limits = c(-0.1, 0.1), name = "Link\nscore") +
        guides(colour = guide_colourbar(title.position = "top")) +
        theme(legend.position = "right")

       final <- cov / arc

       ggsave(paste0(file_prefix, g, ".pdf"), final, width = 15, height = 6)

    }, error = function(e) {
      message("Skipping ", g, ": ", e$message)
    })
  }
}

### RORB Marker Genes

In [None]:
library(readxl)
library(dplyr)

top_genes <- c()
# Define file path
file_path <- "../../Annotate_Exc_Subtype/RORB_cells/Marker_Genes.xlsx"

sheet_names <- excel_sheets(file_path)
all_sheets <- lapply(sheet_names, function(sheet) {
  read_excel(file_path, sheet = sheet)
})
names(all_sheets) <- sheet_names

for (name in sheet_names){
    print(name)
    gene_data <- all_sheets[[name]]
    # Ensure column names are correctly recognized
    colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")

    # Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
    filtered_genes <- gene_data %>%
      filter(p_val_adj < 0.001, avg_log2FC > 2)
    
    top_genes <- c(top_genes, as.vector(filtered_genes$gene))
}

"package 'readxl' was built under R version 4.3.3"
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`


[1] "Exc L4-5 RORB LCN15"
[1] "Exc L3-4 RORB SEMA6D"
[1] "Exc L3-5 RORB HSPB3"
[1] "Exc L4-5 RORB HNRNPA1P46"
[1] "Exc L3-4 RORB FOLH1B"
[1] "Exc L2-4 RORB GRIK1"
[1] "Exc L4-5 RORB RPL31P31"
[1] "Exc L4-5 RORB LINC01474"
[1] "Exc L3-5 RORB CMAHP"
[1] "Exc L3-4 RORB PRSS12"
[1] "Exc L5 RORB LINC01202"
[1] "Exc L5 RORB SNHG7"
[1] "Exc L3 RORB CARTPT"
[1] "Exc L3-5 RORB CD24"
[1] "Exc L5-6 RORB LINC00320"
[1] "Exc L4-6 RORB HPCA"


In [9]:
length(top_genes)

In [None]:
file_prefix <- "../../Signac_plots/HC/Marker_Genes_RORB_exn/"

# Create the directory if it doesn't exist
if (!dir.exists(file_prefix)) {
  dir.create(file_prefix, recursive = TRUE)  # 'recursive = TRUE' ensures parent directories are created if needed
  message(paste("Directory created:", file_prefix))
} else {
  message(paste("Directory already exists:", file_prefix))
}

omit <- save_link_counts(ifnb, top_genes, file_prefix)

Directory already exists: ../Signac_plots/HC/Marker_Genes_RORB_exn/

Testing 577 genes and 148617 peaks

Found gene coordinates for 354 genes

"Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': GL000194.1, GL000195.1, GL000205.2, GL000218.1, GL000219.1, KI270711.1, KI270713.1, KI270721.1, KI270726.1, KI270727.1, KI270728.1, KI270731.1, KI270734.1, GL000009.2, GL000213.1
  - in 'y': chrMT
  Make sure to always combine/compare objects based on the same reference


In [None]:
file_prefix <- "../../Signac_plots/HC/Marker_Genes_RORB_exn/"

# Create the directory if it doesn't exist
if (!dir.exists(file_prefix)) {
  dir.create(file_prefix, recursive = TRUE)  # 'recursive = TRUE' ensures parent directories are created if needed
  message(paste("Directory created:", file_prefix))
} else {
  message(paste("Directory already exists:", file_prefix))
}

plot_coverage(ifnb, top_genes, file_prefix)

Directory already exists: ../Signac_plots/HC/Marker_Genes_RORB_exn/

Testing 577 genes and 148617 peaks

Found gene coordinates for 354 genes

"Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': GL000194.1, GL000195.1, GL000205.2, GL000218.1, GL000219.1, KI270711.1, KI270713.1, KI270721.1, KI270726.1, KI270727.1, KI270728.1, KI270731.1, KI270734.1, GL000009.2, GL000213.1
  - in 'y': chrMT
  Make sure to always combine/compare objects based on the same reference
Plotting TLL1

Plotting GRIN3A

"[1m[22mRemoved 7 rows containing missing values or values outside the scale range
(`geom_segment()`)."
Plotting COL21A1

[1m[22mScale for [32mcolour[39m is already present.
Adding another scale for [32mcolour[39m, which will replace the existing scale.
"[1m[22mRemoved 3 rows containing missing values or values outside the scale range
(`geom_segment()`)."
Plotting RBM20

[1m[22mScale for [32mcolour[39m is already present.
Adding another scale for [32mco

### DEG RORB (both RBD vs HC and PD vs HC)

In [None]:
deg = read.csv("../../Signac_plots/PD/DEGs_PD_vs_HC_RORB_exn/gene_link_counts.csv")$gene

In [None]:
file_prefix <- "../../Signac_plots/HC/DEGs_RBD_PD_vs_HC_RORB_exn/"

# Create the directory if it doesn't exist
if (!dir.exists(file_prefix)) {
  dir.create(file_prefix, recursive = TRUE)  # 'recursive = TRUE' ensures parent directories are created if needed
  message(paste("Directory created:", file_prefix))
} else {
  message(paste("Directory already exists:", file_prefix))
}

save_link_counts(ifnb, deg, file_prefix)

Directory created: ../Signac_plots/HC/DEGs_RBD_PD_vs_HC_RORB_exn/

Testing 117 genes and 148617 peaks

"Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': GL000194.1, GL000195.1, GL000205.2, GL000218.1, GL000219.1, KI270711.1, KI270713.1, KI270721.1, KI270726.1, KI270727.1, KI270728.1, KI270731.1, KI270734.1, GL000009.2, GL000213.1
  - in 'y': chrMT
  Make sure to always combine/compare objects based on the same reference


seqnames,start,end,width,strand,score,gene,peak,zscore,pvalue
<fct>,<int>,<int>,<int>,<fct>,<dbl>,<chr>,<chr>,<dbl>,<dbl>
chr1,812991,1309609,496619,*,0.05285895,ACAP3,chr1-812528-813454,3.980434,3.439474e-05
chr1,13583913,13892792,308880,*,0.05089589,KAZN,chr1-13583110-13584716,2.552734,5.344051e-03
chr1,13623127,13892792,269666,*,0.06969446,KAZN,chr1-13622160-13624094,2.889993,1.926253e-03
chr1,13892792,13922610,29819,*,0.05151530,KAZN,chr1-13922099-13923121,2.663100,3.871225e-03
chr1,13892792,14292751,399960,*,0.05622319,KAZN,chr1-14292145-14293357,2.942887,1.625834e-03
chr1,13892792,14366530,473739,*,0.05224207,KAZN,chr1-14366037-14367022,2.680479,3.675840e-03
chr1,36846076,37034129,188054,*,0.07663148,GRIK3,chr1-36845258-36846894,4.817343,7.274122e-07
chr1,36919755,37034129,114375,*,0.05923475,GRIK3,chr1-36919130-36920380,4.091295,2.144852e-05
chr1,36982494,37034129,51636,*,0.05728427,GRIK3,chr1-36981503-36983485,4.368677,6.250069e-06
chr1,37034129,37363087,328959,*,0.10602847,GRIK3,chr1-37362515-37363658,7.351313,9.813450e-14

gene,link_count
<chr>,<int>
ACAP3,1
ARL17B,7
ATP8B1,1
CAMK2D,1
COBLL1,2
CPNE4,1
CUX2,3
DAB1,3
DCC,6
EFR3B,1


In [None]:
file_prefix <- "../../Signac_plots/HC/DEGs_RBD_PD_vs_HC_RORB_exn/"

# Create the directory if it doesn't exist
if (!dir.exists(file_prefix)) {
  dir.create(file_prefix, recursive = TRUE)  # 'recursive = TRUE' ensures parent directories are created if needed
  message(paste("Directory created:", file_prefix))
} else {
  message(paste("Directory already exists:", file_prefix))
}

plot_coverage(ifnb, deg, file_prefix)

Directory already exists: ../Signac_plots/HC/DEGs_RBD_PD_vs_HC_RORB_exn/

Testing 117 genes and 148617 peaks

"Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': GL000194.1, GL000195.1, GL000205.2, GL000218.1, GL000219.1, KI270711.1, KI270713.1, KI270721.1, KI270726.1, KI270727.1, KI270728.1, KI270731.1, KI270734.1, GL000009.2, GL000213.1
  - in 'y': chrMT
  Make sure to always combine/compare objects based on the same reference
Plotting ABLIM2

"[1m[22mRemoved 90 rows containing missing values or values outside the scale range
(`geom_segment()`)."
Plotting ABR

"[1m[22mRemoved 122 rows containing missing values or values outside the scale range
(`geom_segment()`)."
"[1m[22mRemoved 1 row containing missing values or values outside the scale range
(`geom_segment()`)."
Plotting ACAP3

"[1m[22mRemoved 32 rows containing missing values or values outside the scale range
(`geom_segment()`)."
Plotting ADCY9

Plotting AGAP1

Plotting ANKRD36

"[1m[22mRem

### Save individually for viz

In [None]:
file_prefix <- "../../Signac_plots/HC/DEGs_RBD_PD_vs_HC_RORB_exn/"

# Create the directory if it doesn't exist
if (!dir.exists(file_prefix)) {
  dir.create(file_prefix, recursive = TRUE)  # 'recursive = TRUE' ensures parent directories are created if needed
  message(paste("Directory created:", file_prefix))
} else {
  message(paste("Directory already exists:", file_prefix))
}

one <- c("CUX2")
plot_coverage(ifnb, one, file_prefix)

Directory already exists: ../Signac_plots/HC/DEGs_RBD_PD_vs_HC_RORB_exn/

Testing 1 genes and 148617 peaks

"Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': GL000194.1, GL000195.1, GL000205.2, GL000218.1, GL000219.1, KI270711.1, KI270713.1, KI270721.1, KI270726.1, KI270727.1, KI270728.1, KI270731.1, KI270734.1, GL000009.2, GL000213.1
  - in 'y': chrMT
  Make sure to always combine/compare objects based on the same reference
Plotting CUX2

[1m[22mScale for [32mcolour[39m is already present.
Adding another scale for [32mcolour[39m, which will replace the existing scale.
"[1m[22mRemoved 56 rows containing missing values or values outside the scale range
(`geom_segment()`)."
"[1m[22mRemoved 1 row containing missing values or values outside the scale range
(`geom_segment()`)."
