In [3]:
suppressMessages(library(ArchR))
suppressMessages(library(Seurat))
suppressMessages(library(Signac))
suppressMessages(library(dplyr))
suppressMessages(library(cowplot))
suppressMessages(library(Nebulosa))
suppressMessages(library(ggpubr))
suppressMessages(library(BSgenome.Hsapiens.UCSC.hg38))
suppressMessages(library(EnsDb.Hsapiens.v86))
suppressMessages(library(Pando))

In [4]:
set.seed(42)
getwd()

In [5]:
coembed <- readRDS("../data/coembed/coembed.annotation.Rds")
coembed

An object of class Seurat 
159644 features across 3473 samples within 3 assays 
Active assay: RNA (28933 features, 2000 variable features)
 2 other assays present: peaks, GeneActivity
 5 dimensional reductions calculated: pca, umap, harmony, umap_harmony, umap_harmony_v2

In [6]:
obj.atac <- subset(coembed, tech == "ATAC")
obj.rna <- subset(coembed, tech == "RNA")

obj.atac
obj.rna

An object of class Seurat 
159644 features across 858 samples within 3 assays 
Active assay: RNA (28933 features, 2000 variable features)
 2 other assays present: peaks, GeneActivity
 5 dimensional reductions calculated: pca, umap, harmony, umap_harmony, umap_harmony_v2

An object of class Seurat 
159644 features across 2615 samples within 3 assays 
Active assay: RNA (28933 features, 2000 variable features)
 2 other assays present: peaks, GeneActivity
 5 dimensional reductions calculated: pca, umap, harmony, umap_harmony, umap_harmony_v2

In [7]:
df_cell_pairing <- readRDS("../data/coembed/ATAC_RNA_matching.rds")
df_cell_pairing$cell_name <- paste0("cell-", 1:nrow(df_cell_pairing))

head(df_cell_pairing)

Unnamed: 0_level_0,ATAC,RNA,cell_name
Unnamed: 0_level_1,<chr>,<chr>,<chr>
1,CK171#TCACCACAGCCATCAT-1,CAACCTCTCGGTTCAA-1_1_1_1,cell-1
2,CK171#GAGACTTGTAAGCCGA-1,TCACGCTAGCGATGCA-1_1_1_1_1,cell-2
3,CK171#GACCCAGCATTTAGGC-1,CGTTCTGTCACTCGAA-1_2_1_1_1_1,cell-3
4,CK171#AAGGTTCGTCGCTACG-1,AGGACTTTCCCATAGA-1_1_1_1_1,cell-4
5,CK171#AATGGCTGTTACGAAA-1,GGTTAACTCGCTCTAC-1_2_1,cell-5
6,CK171#AAAGATGAGCCTGTAT-1,TGTCCTGTCTCCGAGG-1_2_1_1_1_1,cell-6


In [8]:
atac_counts <- obj.atac@assays$peaks@counts[, df_cell_pairing$ATAC]
rna_counts <- obj.rna@assays$RNA@counts[, df_cell_pairing$RNA]

## unify the cell names in ATAC and RNA
colnames(atac_counts) <- df_cell_pairing$cell_name
colnames(rna_counts) <- df_cell_pairing$cell_name

## create seurat object for Pando
obj <- CreateSeuratObject(counts = rna_counts, assay = "RNA")

obj[['peaks']] <- CreateChromatinAssay(counts = atac_counts,
                                      genome = "hg38")

DefaultAssay(obj) <- "peaks"

obj

An object of class Seurat 
140618 features across 858 samples within 2 assays 
Active assay: peaks (111685 features, 0 variable features)
 1 other assay present: RNA

In [9]:
## add annotation for each peak
# extract gene annotations from EnsDb
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86, verbose = FALSE)
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "hg38"

# add the gene information to the object
Annotation(obj[["peaks"]]) <- annotations

"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence levels in common. (Use
"The 2 combined objects have no sequence

In [10]:
# Select variable features
DefaultAssay(obj) <- "RNA"

obj <- obj %>%
    NormalizeData() %>%
    FindVariableFeatures()

In [11]:
# Initiate GRN object and select candidate regions
obj <- initiate_grn(obj, exclude_exons = FALSE)

In [13]:
## get motifs
suppressMessages(library(JASPAR2020))
suppressMessages(library(TFBSTools))

opts <- list()
opts["species"] <- "Homo sapiens"
opts["collection"] <- "CORE"
motifs <- getMatrixSet(JASPAR2020, opts)

motif_tfs <- as.data.frame(name(motifs))
motif_tfs$motif <- rownames(motif_tfs)
colnames(motif_tfs)  <- c("tf", "motif")

motif_tfs <- motif_tfs[, c("motif", "tf")]

head(motif_tfs)

nrow(motif_tfs)

Unnamed: 0_level_0,motif,tf
Unnamed: 0_level_1,<chr>,<chr>
MA0030.1,MA0030.1,FOXF2
MA0031.1,MA0031.1,FOXD1
MA0051.1,MA0051.1,IRF2
MA0057.1,MA0057.1,MZF1(var.2)
MA0059.1,MA0059.1,MAX::MYC
MA0066.1,MA0066.1,PPARG


In [15]:
# Scan candidate regions for TF binding motifs
obj <- find_motifs(
    obj,
    pfm = motifs,
    motif_tfs = motif_tfs,
    genome = BSgenome.Hsapiens.UCSC.hg38
)

Adding TF info

Building motif matrix

Finding motif positions

Creating Motif object



In [16]:
# Infer gene regulatory network
obj <- infer_grn(obj,
                peak_to_gene_method = 'Signac',
                 method = 'glm')

Selecting candidate regulatory regions near genes

Preparing model input

Fitting models for 1462 target genes



In [17]:
df <- as.data.frame(obj@grn@networks$glm_network@coefs) %>%
    subset(padj < 0.05)

head(df)

Unnamed: 0_level_0,tf,target,region,term,estimate,std_err,statistic,pval,padj
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
16,MEIS3,BCOR,chrX-40098312-40098812,chrX_40098312_40098812:MEIS3,0.6205188,0.09470899,6.551847,9.801397e-11,1.108234e-09
34,USF1,MTMR8,chrX-64395225-64395725,chrX_64395225_64395725:USF1,0.4564577,0.12448931,3.666642,0.0002609154,0.00164004
41,BATF,MSN,chrX-65723649-65724149,BATF:chrX_65723649_65724149,0.5986432,0.22571948,2.652156,0.008147016,0.04055076
59,SPI1,PAK3,chrX-111098751-111099251,chrX_111098751_111099251:SPI1,0.325102,0.06202882,5.241145,2.011751e-07,1.752317e-06
60,MGA,PAK3,chrX-111098751-111099251,chrX_111098751_111099251:MGA,0.1698445,0.03296069,5.152942,3.185931e-07,2.731812e-06
66,KLF11,CXorf40A,chrX-149540298-149540798,chrX_149540298_149540798:KLF11,0.5064376,0.04789442,10.574042,1.209418e-24,2.6381080000000002e-23


In [18]:
length(unique(obj@grn@networks$glm_network@coefs$target))

In [19]:
obj <- find_modules(obj)

In [20]:
modules <- NetworkModules(obj)
#modules@meta
#modules@features

In [22]:
modules@meta

target,tf,estimate,n_regions,n_genes,n_tfs,regions,pval,padj
<chr>,<chr>,<dbl>,<int>,<int>,<int>,<chr>,<dbl>,<dbl>
HMBS,CREB3,0.502456918,1,1,1,chr11-119057030-119057530,1.413769e-16,2.239922e-15
TNNT3,E2F6,0.150808737,2,1,2,chr11-1839764-1840264,7.670730e-35,2.391280e-33
BBC3,EGR2,0.221948001,3,1,3,chr19-47274053-47274553,2.391756e-05,1.727628e-04
KEAP1,EGR3,1.008195189,3,1,2,chr19-10478848-10479348;chr19-10602148-10602648,9.042581e-10,9.564268e-09
GZMB,EHF,0.644146878,4,1,3,chr14-24695188-24695688,2.421297e-10,2.663427e-09
TOP1MT,ELF3,0.415747555,3,1,3,chr8-143281493-143281993,3.595062e-18,6.179013e-17
LINC01569,ELK3,0.013216023,3,2,2,chr16-4307497-4307997,8.634882e-04,5.052324e-03
SPSB2,ELK3,0.003256462,3,2,2,chr12-6943749-6944249;chr12-6970506-6971006,6.425397e-08,5.882944e-07
FCN3,ELK4,0.061526038,2,3,2,chr1-27321907-27322407,3.623591e-05,2.557763e-04
SNHG15,ELK4,0.256699658,4,3,3,chr7-44999794-45000294;chr7-45000351-45000851,5.909617e-04,3.546691e-03


In [23]:
sessionInfo()

R version 4.1.1 (2021-08-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: CentOS Linux 8

Matrix products: default
BLAS/LAPACK: /home/rs619065/miniconda3/envs/r-4.1/lib/libopenblasp-r0.3.18.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] TFBSTools_1.32.0                  JASPAR2020_0.99.10               
 [3] Pando_0.4.2                       EnsDb.Hsapiens.v86_2.99.0        
 [5] ensembldb_2.18.2                  AnnotationFilter_1.18.0          
 [7] GenomicFeatures_1.46.1            AnnotationDbi_1.56.2             
 