In [1]:
library(Signac)
library(Seurat)
library(EnsDb.Hsapiens.v86)
library(SeuratDisk)
library(dplyr)
library(BSgenome.Hsapiens.UCSC.hg38)

“package ‘Signac’ was built under R version 4.3.3”
“package ‘Seurat’ was built under R version 4.3.3”
Loading required package: SeuratObject

“package ‘SeuratObject’ was built under R version 4.3.3”
Loading required package: sp

“package ‘sp’ was built under R version 4.3.3”

Attaching package: ‘SeuratObject’


The following objects are masked from ‘package:base’:

    intersect, t


Loading required package: ensembldb

“package ‘ensembldb’ was built under R version 4.3.2”
Loading required package: BiocGenerics

“package ‘BiocGenerics’ was built under R version 4.3.2”

Attaching package: ‘BiocGenerics’


The following object is masked from ‘package:SeuratObject’:

    intersect


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect,

To store RNA expression for relevant genes in each interested celltype (Mic, Exc, Inh), repeat this notebook for each cell type. The current version shows an example for Inh. If you wanted to do the same for other cell types, uncomment out filtering and saving lines for the relevant celltype.

# Get Peak - Gene Links

We only store expression for relevant genes for HC samples in this script.

In [None]:
ifnb <- readRDS("hc_processed_rna+atac.rds")

In [None]:
ifnb@meta.data

In [None]:
# do this for each of the 3 cell types
# ifnb <- subset(ifnb, celltype == "microglia")
# ifnb <- subset(ifnb, celltype == "excitatory neurons")
ifnb <- subset(ifnb, celltype == "inhibitory neurons")

In [4]:
seqlevels(Annotation(ifnb))

In [4]:
DefaultAssay(ifnb) <- "ATAC"
# first compute the GC content for each peak
ifnb <- RegionStats(ifnb, genome = BSgenome.Hsapiens.UCSC.hg38)

"Not all seqlevels present in supplied genome"


In [None]:
# idents.plot <- c('microglia')
# idents.plot <- c('excitatory neurons')
idents.plot <- c('inhibitory neurons')
Idents(ifnb) <- idents.plot

In [7]:
ifnb

An object of class Seurat 
333440 features across 7739 samples within 3 assays 
Active assay: ATAC (265980 features, 247295 variable features)
 2 layers present: counts, data
 2 other assays present: RNA, SCT
 2 dimensional reductions calculated: pca, lsi

# Save Gene-Peak Links for relevant genes

## Curate list of relevant genes

In [12]:
total_genes <- c()

### MG0 Marker Genes

In [None]:
library(readxl)
library(dplyr)

# Define file path
file_path <- "../../Annotate_Mic_Subtype/11_clusters/Marker_Genes.xlsx"

# Read the specific sheet "Cluster_0"
gene_data <- read_excel(file_path, sheet = "Cluster_0")

# Ensure column names are correctly recognized
colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")

# Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
filtered_genes <- gene_data %>%
  filter(p_val_adj < 0.001, avg_log2FC > 0.25)

total_genes <- c(total_genes, as.vector(filtered_genes$gene))

"package 'readxl' was built under R version 4.3.3"
[1m[22mNew names:
[36m•[39m `` -> `...1`


### MG2 Marker Genes

In [None]:
library(readxl)
library(dplyr)

# Define file path
file_path <- "../../Annotate_Mic_Subtype/11_clusters/Marker_Genes.xlsx"

# Read the specific sheet "Cluster_0"
gene_data <- read_excel(file_path, sheet = "Cluster_2")

# Ensure column names are correctly recognized
colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")

# Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
filtered_genes <- gene_data %>%
  filter(p_val_adj < 0.001, avg_log2FC > 0.25)

total_genes <- c(total_genes, as.vector(filtered_genes$gene))

[1m[22mNew names:
[36m•[39m `` -> `...1`


### MG3 Marker Genes

In [None]:
library(readxl)
library(dplyr)

# Define file path
file_path <- "../../Annotate_Mic_Subtype/11_clusters/Marker_Genes.xlsx"

# Read the specific sheet "Cluster_0"
gene_data <- read_excel(file_path, sheet = "Cluster_3")

# Ensure column names are correctly recognized
colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")

# Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
filtered_genes <- gene_data %>%
  filter(p_val_adj < 0.001, avg_log2FC > 0.25)

total_genes <- c(total_genes, as.vector(filtered_genes$gene))

[1m[22mNew names:
[36m•[39m `` -> `...1`


### DEGs for MG0,2,3 and DAM

In [None]:
# Define file path
file_path <- "../../DEG_GO_Analysis/DEGs_RBD_vs_HC/Significant_DEGs_RBD_vs_HC.xlsx"

# Read the specific sheet "Cluster_0"
gene_data <- read_excel(file_path, sheet = "Cluster_0")
# Ensure column names are correctly recognized
colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")
# Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
filtered_genes <- gene_data %>%
  filter(p_val_adj < 0.001, avg_log2FC > 0.25)

total_genes <- c(total_genes, as.vector(filtered_genes$gene))


gene_data <- read_excel(file_path, sheet = "Cluster_2")
# Ensure column names are correctly recognized
colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")
# Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
filtered_genes <- gene_data %>%
  filter(p_val_adj < 0.001, avg_log2FC > 0.25)

total_genes <- c(total_genes, as.vector(filtered_genes$gene))

gene_data <- read_excel(file_path, sheet = "Cluster_3")
# Ensure column names are correctly recognized
colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")
# Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
filtered_genes <- gene_data %>%
  filter(p_val_adj < 0.001, avg_log2FC > 0.25)

total_genes <- c(total_genes, as.vector(filtered_genes$gene))

[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`


In [None]:
# Define file path
file_path <- "../../DEG_GO_Analysis/DEGs_PD_vs_HC/Significant_DEGs_PD_vs_HC.xlsx"

# Read the specific sheet "Cluster_0"
gene_data <- read_excel(file_path, sheet = "Cluster_0")
# Ensure column names are correctly recognized
colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")
# Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
filtered_genes <- gene_data %>%
  filter(p_val_adj < 0.001, avg_log2FC > 0.25)

total_genes <- c(total_genes, as.vector(filtered_genes$gene))


gene_data <- read_excel(file_path, sheet = "Cluster_2")
# Ensure column names are correctly recognized
colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")
# Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
filtered_genes <- gene_data %>%
  filter(p_val_adj < 0.001, avg_log2FC > 0.25)

total_genes <- c(total_genes, as.vector(filtered_genes$gene))

gene_data <- read_excel(file_path, sheet = "Cluster_3")
# Ensure column names are correctly recognized
colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")
# Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
filtered_genes <- gene_data %>%
  filter(p_val_adj < 0.001, avg_log2FC > 0.25)

total_genes <- c(total_genes, as.vector(filtered_genes$gene))

[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`


In [15]:
dam_ad_genes <- c("CD83", "NAMPT", "HIF1A", "SRGN", "FTH1", "FTL", "RPL32", "TPT1", "RPLP1", 
               "RPS4X", "RPS24", "RPS11", "RPS15", "PLEK", "CCL3", "IL1B", "IFIT2", "B2M", 
               "CD74", "MAMDC2", "EEF1B2", "PDPN", "RPL10A", "RPL34", "RPL35A", "RPL37", 
               "RPL7", "RPL38", "RPS16", "RPL6", "RPS14", "RPS9", "RPL30", "APOE", "RPS25", 
               "RPS20", "EEF1A1", "RPS8", "RPL13", "RPL14", "RPL8", "RPL35", "RPL23", "FAU", 
               "RPS19", "RPL27A", "RPS3", "RPL37A", "ELOVL5", "SOCS6", "CADM1", 
               "ITGAX", "NAP1L1", "CEBPA", "TREM2", "PLXDC2", "SOAT1", "LIPA", "CD84", "CREG1", 
               "FAM20C", "SLC16A3", "ARHGEF7", "GPI", "CUX1", "DHRS3", "CSF2RA", "PSAP", 
               "RASSF3", "RHEB", "BCL2", "NFE2L2", "TLR2", "CHPT1", "FGL2", "EFR3A", "ARAP2", 
               "CLEC7A", "CYBA", "ITM2B", "CD37", "ITGB2", "ARPC3", "HLA-E", "EEF2", "PABPC1", 
               "NPC2", "FCER1G", "RPSA", "TYROBP", "RPL31", "EEF1D", "RPL12", "CHST11", "FCGR3A", 
               "AZIN1", "MFSD1", "GNAS", "NOP58", "LAT2", "CTSZ", "SNX3", "ADAR", "AXL", "BST2", 
               "OXCT1", "SREBF2", "NRP1", "USP12", "GPNMB", "MYO1E", "SPP1", "ELL2", "PADI2", 
               "ATP1B3", "RNF149", "SLC11A1", "KLHL6", "PGK1", "CTSB", "SDCBP")
total_genes <- c(total_genes, as.vector(dam_ad_genes))

### Exc RORB genes

In [None]:
library(readxl)
library(dplyr)

# Define file path
file_path <- "../../Annotate_Exc_Subtype/RORB_cells/Marker_Genes.xlsx"

sheet_names <- excel_sheets(file_path)
all_sheets <- lapply(sheet_names, function(sheet) {
  read_excel(file_path, sheet = sheet)
})
names(all_sheets) <- sheet_names

for (name in sheet_names){
    print(name)
    gene_data <- all_sheets[[name]]
    # Ensure column names are correctly recognized
    colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")

    # Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
    filtered_genes <- gene_data %>%
      filter(p_val_adj < 0.001, avg_log2FC > 0.25)
    
    total_genes <- c(total_genes, as.vector(filtered_genes$gene))
}

[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`


[1] "Exc L4-5 RORB LCN15"
[1] "Exc L3-4 RORB SEMA6D"
[1] "Exc L3-5 RORB HSPB3"
[1] "Exc L4-5 RORB HNRNPA1P46"
[1] "Exc L3-4 RORB FOLH1B"
[1] "Exc L2-4 RORB GRIK1"
[1] "Exc L4-5 RORB RPL31P31"
[1] "Exc L4-5 RORB LINC01474"
[1] "Exc L3-5 RORB CMAHP"
[1] "Exc L3-4 RORB PRSS12"
[1] "Exc L5 RORB LINC01202"
[1] "Exc L5 RORB SNHG7"
[1] "Exc L3 RORB CARTPT"
[1] "Exc L3-5 RORB CD24"
[1] "Exc L5-6 RORB LINC00320"
[1] "Exc L4-6 RORB HPCA"


In [None]:
# Define file path
file_path <- "../../DEG_GO_Analysis/DEGs_PD_vs_HC/Significant_DEGs_PD_vs_HC_RORB.xlsx"

sheet_names <- excel_sheets(file_path)
all_sheets <- lapply(sheet_names, function(sheet) {
  read_excel(file_path, sheet = sheet)
})
names(all_sheets) <- sheet_names

for (name in sheet_names){
    print(name)
    gene_data <- all_sheets[[name]]
    # Ensure column names are correctly recognized
    colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")

    # Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
    filtered_genes <- gene_data %>%
      filter(p_val_adj < 0.001, avg_log2FC > 0.25)
    
    total_genes <- c(total_genes, as.vector(filtered_genes$gene))

}

[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`


[1] "Exc L4-5 RORB LCN15"
[1] "Exc L3-4 RORB SEMA6D"
[1] "Exc L3-5 RORB HSPB3"
[1] "Exc L4-5 RORB HNRNPA1P46"
[1] "Exc L3-4 RORB FOLH1B"
[1] "Exc L2-4 RORB GRIK1"
[1] "Exc L4-5 RORB RPL31P31"
[1] "Exc L4-5 RORB LINC01474"
[1] "Exc L3-5 RORB CMAHP"
[1] "Exc L3-4 RORB PRSS12"
[1] "Exc L5 RORB LINC01202"
[1] "Exc L5 RORB SNHG7"
[1] "Exc L3 RORB CARTPT"
[1] "Exc L3-5 RORB CD24"
[1] "Exc L5-6 RORB LINC00320"


In [None]:
# Define file path
file_path <- "../../DEG_GO_Analysis/DEGs_RBD_vs_HC/Significant_DEGs_RBD_vs_HC_RORB.xlsx"

sheet_names <- excel_sheets(file_path)
all_sheets <- lapply(sheet_names, function(sheet) {
  read_excel(file_path, sheet = sheet)
})
names(all_sheets) <- sheet_names

for (name in sheet_names){
    print(name)
    gene_data <- all_sheets[[name]]
    # Ensure column names are correctly recognized
    colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")

    # Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
    filtered_genes <- gene_data %>%
      filter(p_val_adj < 0.001, avg_log2FC > 0.25)
    
    total_genes <- c(total_genes, as.vector(filtered_genes$gene))

}

[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`


[1] "Exc L4-5 RORB LCN15"
[1] "Exc L3-4 RORB SEMA6D"
[1] "Exc L3-5 RORB HSPB3"
[1] "Exc L4-5 RORB HNRNPA1P46"
[1] "Exc L3-4 RORB FOLH1B"
[1] "Exc L2-4 RORB GRIK1"
[1] "Exc L4-5 RORB RPL31P31"
[1] "Exc L4-5 RORB LINC01474"
[1] "Exc L3-5 RORB CMAHP"
[1] "Exc L3-4 RORB PRSS12"
[1] "Exc L5 RORB LINC01202"
[1] "Exc L5 RORB SNHG7"
[1] "Exc L3 RORB CARTPT"
[1] "Exc L3-5 RORB CD24"
[1] "Exc L5-6 RORB LINC00320"


### Inh Subset Genes

In [None]:
library(readxl)
library(dplyr)

# Define file path
file_path <- "../../Annotate_Inh_Subtype/Inh_subset_cells/Marker_Genes.xlsx"

sheet_names <- excel_sheets(file_path)
all_sheets <- lapply(sheet_names, function(sheet) {
  read_excel(file_path, sheet = sheet)
})
names(all_sheets) <- sheet_names

for (name in sheet_names){
    print(name)
    gene_data <- all_sheets[[name]]
    # Ensure column names are correctly recognized
    colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")

    # Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
    filtered_genes <- gene_data %>%
      filter(p_val_adj < 0.001, avg_log2FC > 2)
    
    total_genes <- c(total_genes, as.vector(filtered_genes$gene))
}

[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`


[1] "Inh L1 SST CXCL14"
[1] "Inh L1-4 LAMP5 DUSP4"
[1] "Inh L3-5 SST MAFB"
[1] "Inh L1 PAX6 CA4"
[1] "Inh L2-4 PVALB C8orf4"
[1] "Inh L5-6 PVALB STON2"
[1] "Inh L1-6 LAMP5 CA13"
[1] "Inh L6 LAMP5 ANKRD20A11P"
[1] "Inh L2-4 SST AHR"
[1] "Inh L5 PVALB CNTNAP3P2"
[1] "Inh L1 LAMP5 NDNF"
[1] "Inh L5-6 PVALB FAM150B"
[1] "Inh L1-3 VIP SSTR1"
[1] "Inh L1 ADARB2 ADAM33"
[1] "Inh L4-6 SST MTHFD2P6"
[1] "Inh L5-6 LAMP5 SFTA3"
[1] "Inh L1-3 VIP GGH"
[1] "Inh L1-3 PVALB WFDC2"
[1] "Inh L5-6 SST ISOC1"
[1] "Inh L5-6 SST TH"
[1] "Inh L5-6 SST KLHL14"
[1] "Inh L1 LAMP5 GGT8P"


In [None]:
# Define file path
file_path <- "../../DEG_GO_Analysis/DEGs_PD_vs_HC/Significant_DEGs_PD_vs_HC_Inh_subset.xlsx"

sheet_names <- excel_sheets(file_path)
all_sheets <- lapply(sheet_names, function(sheet) {
  read_excel(file_path, sheet = sheet)
})
names(all_sheets) <- sheet_names

for (name in sheet_names){
    print(name)
    gene_data <- all_sheets[[name]]
    # Ensure column names are correctly recognized
    colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")

    # Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
    filtered_genes <- gene_data %>%
      filter(p_val_adj < 0.001, avg_log2FC > 2)
    
    total_genes <- c(total_genes, as.vector(filtered_genes$gene))

}

[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`


[1] "Inh L1 SST CXCL14"
[1] "Inh L1-4 LAMP5 DUSP4"
[1] "Inh L3-5 SST MAFB"
[1] "Inh L1 PAX6 CA4"
[1] "Inh L2-4 PVALB C8orf4"
[1] "Inh L5-6 PVALB STON2"
[1] "Inh L1-6 LAMP5 CA13"
[1] "Inh L6 LAMP5 ANKRD20A11P"
[1] "Inh L2-4 SST AHR"
[1] "Inh L5 PVALB CNTNAP3P2"
[1] "Inh L1 LAMP5 NDNF"
[1] "Inh L5-6 PVALB FAM150B"
[1] "Inh L1-3 VIP SSTR1"
[1] "Inh L1 ADARB2 ADAM33"
[1] "Inh L4-6 SST MTHFD2P6"
[1] "Inh L5-6 LAMP5 SFTA3"
[1] "Inh L1-3 VIP GGH"
[1] "Inh L1-3 PVALB WFDC2"
[1] "Inh L5-6 SST ISOC1"
[1] "Inh L5-6 SST TH"
[1] "Inh L5-6 SST KLHL14"


In [None]:
# Define file path
file_path <- "../../DEG_GO_Analysis/DEGs_RBD_vs_HC/Significant_DEGs_RBD_vs_HC_Inh_subset.xlsx"

sheet_names <- excel_sheets(file_path)
all_sheets <- lapply(sheet_names, function(sheet) {
  read_excel(file_path, sheet = sheet)
})
names(all_sheets) <- sheet_names

for (name in sheet_names){
    print(name)
    gene_data <- all_sheets[[name]]
    # Ensure column names are correctly recognized
    colnames(gene_data) <- c("gene", "p_val", "avg_log2FC", "pct.1", "pct.2", "p_val_adj")

    # Filter genes based on criteria: p_val_adj < 0.001 and avg_log2FC > 0.25
    filtered_genes <- gene_data %>%
      filter(p_val_adj < 0.001, avg_log2FC > 2)
    
    total_genes <- c(total_genes, as.vector(filtered_genes$gene))

}

[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`


[1] "Inh L1 SST CXCL14"
[1] "Inh L1-4 LAMP5 DUSP4"
[1] "Inh L3-5 SST MAFB"
[1] "Inh L1 PAX6 CA4"
[1] "Inh L2-4 PVALB C8orf4"
[1] "Inh L5-6 PVALB STON2"
[1] "Inh L1-6 LAMP5 CA13"
[1] "Inh L6 LAMP5 ANKRD20A11P"
[1] "Inh L2-4 SST AHR"
[1] "Inh L5 PVALB CNTNAP3P2"
[1] "Inh L1 LAMP5 NDNF"
[1] "Inh L5-6 PVALB FAM150B"
[1] "Inh L1-3 VIP SSTR1"
[1] "Inh L1 ADARB2 ADAM33"
[1] "Inh L4-6 SST MTHFD2P6"
[1] "Inh L5-6 LAMP5 SFTA3"
[1] "Inh L1-3 VIP GGH"
[1] "Inh L1-3 PVALB WFDC2"
[1] "Inh L5-6 SST ISOC1"
[1] "Inh L5-6 SST TH"
[1] "Inh L5-6 SST KLHL14"


## Save expression for curated list of genes

In [16]:
total_genes <- unique(total_genes)

In [17]:
length(unique(total_genes))

In [None]:
# writeLines(total_genes, "../../Signac_plots/saved_files/mic_total_genes.txt")
# writeLines(total_genes, "../../Signac_plots/saved_files/exn_rorb_total_genes.txt")
writeLines(total_genes, "../../Signac_plots/saved_files/inh_subset_total_genes.txt")

In [None]:
# total_genes <- readLines("../../Signac_plots/saved_files/mic_total_genes.txt")
# total_genes <- readLines("../../Signac_plots/saved_files/exn_rorb_total_genes.txt")
total_genes <- readLines("../../Signac_plots/saved_files/inh_subset_total_genes.txt")

In [None]:
# 1.2 Pull out the links that LinkPeaks() created
## each element of the GRanges object is one peak; the linked gene lives in metadata
ifnb <- LinkPeaks(
  object = ifnb,
  peak.assay = "ATAC",
  expression.assay = "SCT",
  genes.use = total_genes
)

peak_gene_links <- Links(ifnb[["ATAC"]])
df <- as.data.frame(peak_gene_links)

Testing 1679 genes and 138047 peaks

Found gene coordinates for 1212 genes

"Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': GL000194.1, GL000195.1, GL000205.2, GL000218.1, GL000219.1, KI270711.1, KI270713.1, KI270721.1, KI270726.1, KI270727.1, KI270728.1, KI270731.1, KI270734.1, GL000009.2, GL000213.1
  - in 'y': chrMT
  Make sure to always combine/compare objects based on the same reference


In [21]:
head(df)

Unnamed: 0_level_0,seqnames,start,end,width,strand,score,gene,peak,zscore,pvalue
Unnamed: 0_level_1,<fct>,<int>,<int>,<int>,<fct>,<dbl>,<chr>,<chr>,<dbl>,<dbl>
1,chr1,812991,1309609,496619,*,0.05285895,ACAP3,chr1-812528-813454,4.369249,6.233726e-06
2,chr1,1000172,1250407,250236,*,0.05096074,HES4,chr1-1249805-1251009,4.138091,1.751036e-05
3,chr1,9088478,9250223,161746,*,0.05143187,SLC2A5,chr1-9249751-9250695,4.781409,8.703533e-07
4,chr1,9088478,9438513,350036,*,0.0781318,SLC2A5,chr1-9437929-9439097,7.460428,4.312092e-14
5,chr1,9651732,9689242,37511,*,0.0512915,PIK3CD,chr1-9688541-9689942,4.337663,7.200301e-06
6,chr1,13583913,13892792,308880,*,0.05089589,KAZN,chr1-13583110-13584716,2.885514,0.001953872


In [None]:
links_df <- as_tibble(df) |>
  transmute(
    chrom      = seqnames,          # chromosome
    chromStart = start              # 0-based for BED
      - 1L,                         # Signac stores 1-based; BED wants 0-based half-open
    chromEnd   = end,
    gene       = gene,
    score      = score              # optional
  )

In [None]:
links_df <- links_df |>
  mutate(peak_id = paste0(chrom, ":", chromStart, "-", chromEnd))

In [None]:
library(tidyverse)
# write_tsv(links_df,  "../../Signac_plots/saved_files/HC_gene_peak_links_mic.tsv")
# write_tsv(links_df,  "../../Signac_plots/saved_files/HC_gene_peak_links_exn_rorb.tsv")
write_tsv(links_df,  "../../Signac_plots/saved_files/HC_gene_peak_links_inh_subset.tsv")

In [None]:
peaks_df <- links_df %>% 
  select(chrom, chromStart, chromEnd, peak_id)

# readr::write_tsv(
#   peaks_df, 
#   "../../Signac_plots/saved_files/HC_peaks_mic.bed", 
#   col_names = FALSE
# )

# readr::write_tsv(
#   peaks_df, 
#   "../../Signac_plots/saved_files/HC_peaks_exn_rorb.bed", 
#   col_names = FALSE
# )

readr::write_tsv(
  peaks_df, 
  "../../Signac_plots/saved_files/HC_peaks_inh_subset.bed", 
  col_names = FALSE
)

In [None]:
tfs <- read.table("../../Signac_plots/saved_files/all_tfs.txt", stringsAsFactors = FALSE)[,1]

In [None]:
DefaultAssay(ifnb) <- "SCT"
ifnb

In [None]:
# after defining TFs in Python, read them back or compute in R the same way
DefaultAssay(ifnb) <- "SCT"
keep_genes <- union(VariableFeatures(ifnb), tfs)
ifnb_subset <- subset(ifnb, features = keep_genes)

In [None]:
length(unique(keep_genes))

In [None]:
ifnb_subset

In [None]:
# SaveLoom(
#   object   = ifnb_subset,
#   filename = "../../Signac_plots/saved_files/hc_mic_all_tfs_rna_MIC.loom",
#   assay    = "SCT",
#   slot     = "data"
# )

# SaveLoom(
#   object   = ifnb_subset,
#   filename = "../../Signac_plots/saved_files/hc_exn_all_tfs_rna_EXN_RORB.loom",
#   assay    = "SCT",
#   slot     = "data"
# )

SaveLoom(
  object   = ifnb_subset,
  filename = "../../Signac_plots/saved_files/hc_inh_all_tfs_rna_INH_SUBSET.loom",
  assay    = "SCT",
  slot     = "data"
)