In [2]:
library(tidyverse)
library(Seurat)
library(sctransform)
library(SingleCellExperiment)
library(scales)
require(patchwork)
library(Signac)
library(BSgenome.Hsapiens.UCSC.hg38)
library(SeuratData)
library(SeuratDisk)
library(data.table)
library(gridExtra)



In [3]:
sessionInfo()

R version 4.1.1 (2021-08-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /home/myylee/anaconda3/envs/r_py/lib/libopenblasp-r0.3.17.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] data.table_1.14.6                 SeuratDisk_0.0.0.9020            
 [3] SeuratData_0.2.2                  BSgenome.Hsapiens.UCSC.hg38_1.4.3
 [5] BSgenome_1.62.0                   rtracklayer_1.54.0               
 [7] Biostrings_2.62.0                 XVector_0.34.0                  

# function

In [10]:
add_annot_res <- function(seurat,methods,df_result_sel){
    # adding method specific ct result
    # add reduction embedding per method
    for(method_i in methods){
        res_i = df_result_sel %>% dplyr::filter(method == method_i) %>% column_to_rownames("X")
        res_i = res_i[rownames(seurat@meta.data),]
        annot_i = res_i %>% pull("predicted_ct")
        names(annot_i) = rownames(res_i)
        seurat <- AddMetaData(
          object = seurat,
          metadata = annot_i,
          col.name = paste0(method_i,"_annot")
        )
        embed_i = res_i %>% mutate(dim1=X0,dim2=X1) %>% dplyr::select(dim1,dim2) 
        embed_i = embed_i %>% as.matrix()

        reduc_i = CreateDimReducObject(embeddings = embed_i, key = method_i, assay = DefaultAssay(seurat))

        seurat[[method_i]]= reduc_i

    }
    return(seurat)
}




#  RNA data

In [3]:
scrna_path <- "dataset/hpap/scRNA/"
scrna <- LoadH5Seurat(paste0(scrna_path,"hpap_scRNA_10donors.h5seurat"))
multiome_path <- "dataset/hpap/multiome/"
multiome <- LoadH5Seurat(paste0(multiome_path,"hpap_multiomeRNA_4donors.h5seurat"))

In [536]:
scrna$technology = "scRNA"
multiome$technology = "multiomeRNA"
rna_comb <- merge(scrna,multiome)


In [537]:
rna_comb <- NormalizeData(rna_comb, normalization.method = "LogNormalize", scale.factor = 10000)

# ATAC data

In [None]:
snatac_path <- "dataset/hpap/snATAC/"
snatac <- LoadH5Seurat(paste0(snatac_path,"hpap_snATAC_8donors.h5seurat"))
multiome_path <- "dataset/hpap/multiome/"
multiomeatac <- LoadH5Seurat(paste0(multiome_path,"hpap_multiomeATAC_4donors.h5seurat"))

#Fragments(multiomeatac@assays$ATAC)

multiomeatac[['ATAC2']]<- CreateChromatinAssay(
    counts = multiomeatac@assays$ATAC@counts,
    sep = c("-", "-"),
    fragments = 'dataset/hpap/snATAC/hpap_snATAC_multiomeATAC_fragments.tsv.gz'
)

DefaultAssay(multiomeatac) <- "ATAC2"
multiomeatac[['ATAC']]<-NULL

snatac[['ATAC2']]<- CreateChromatinAssay(
    counts = snatac@assays$ATAC@counts,
    sep = c("-", "-"),
    fragments = 'hpap/snATAC/hpap_snATAC_multiomeATAC_fragments.tsv.gz'
)

DefaultAssay(snatac) <- "ATAC2"
snatac[['ATAC']]<-NULL

snatac$technology = "snATAC"
multiomeatac$technology = "multiomeATAC"


# Combine RNA and ATAC data

In [None]:
all_combined = merge(rna_comb,snatac)
saveRDS(all_combined,file.path(folder_path,"all_combined.rds"))

# plots

## Functions

In [11]:
# functions
dimplot_custom<-function(seurat,m){
    p <- DimPlot(seurat,
                 group.by = paste0(m,"_annot"),
                 reduction = m,
                 raster=T,
                 label=T
                )+
    #NoLegend()+
    theme(axis.text=element_blank(),
          axis.ticks = element_blank(),
          axis.title=element_blank(),
          plot.title=element_blank())
    return(p)
}
dotplot_custom <- function(seurat,m, genes){
    p <- DotPlot(seurat,
                 features = genes,
                 group.by = paste0(m,"_annot"),
                 scale.min=0,
                 scale.max=80
                )+
        #NoLegend()+
        theme(axis.text=element_text(size=10),
              axis.title=element_blank()
             )+ 
        coord_flip()
    return(p)
}

dimplot_custom_technology<-function(seurat,m){
    p <- DimPlot(seurat,
                 group.by = "technology",
                 reduction = m,
                 shuffle=T,
                 pt.size = 0.1,
                 raster=T
                )+
    #NoLegend()+
    theme(axis.text=element_blank(),
          axis.ticks = element_blank(),
          axis.title=element_blank(),
          plot.title=element_blank())
    return(p)
}

compile_plot2 <- function(seurat,md,methods,genes,noLegend=TRUE){
    plist = list()
    counter=1
    for(m in methods){
        plist[[counter]] = dimplot_custom(seurat,m)
        counter = counter + 1
        plist[[counter]] = dimplot_custom_technology(seurat,m)
        counter = counter + 1
        plist[[counter]] = dotplot_custom(seurat,m,genes)
        counter = counter + 1
    }
    if(noLegend){
        plist = lapply(plist,function(p){p+ theme(legend.position = "none")})
    }
    return(plist)

}


In [4]:
folder_path <- "figures/metric_plots/hpap/"
all_combined <- readRDS(file.path(folder_path,"all_combined.rds"))


In [4]:
df_result = read.csv("figures/umap/hpap/df_to_plot.csv")

df_result <- df_result %>% mutate(method = gsub("_2","",gsub("all70000_1_","",key)))

methods = unique(df_result$method)
table(df_result$method)
df_result_dedup = c()
for(i in unique(df_result$method)){
    print(i)
    df_method_i = df_result[df_result$method==i,]
    # edit cell names for paired cells 
    df_method_i = df_method_i %>% mutate(bc = gsub("patac_","",gsub("prna_","",X))) %>% select(-X) %>% mutate(X=bc)
    df_result_dedup = rbind(df_result_dedup,df_method_i)
    
}
table(df_result_dedup$method)
# add annotation and reduction 
all_combined2 = add_annot_res(all_combined,methods,df_result_dedup)

# manually edit the cluster ID for LIGER result to go from 0 - 9
all_combined2$rliger_annot<- as.factor(all_combined2$rliger_annot)
levels(all_combined2$rliger_annot)
levels(all_combined2$rliger_annot) <- as.character(c(1:10) - 1)
levels(all_combined2$rliger_annot)

all_combined2$technology <- factor(all_combined2$technology,
                                  levels=c("scRNA","snATAC","Multiome"))
table(all_combined2$technology)

## methods - figure 8

In [5]:
methods_sel = c("seurat4int","multivi","glue","rbindsc")
p_sel <- compile_plot2(all_combined,all_combined@meta.data,methods_sel,markers,noLegend = TRUE)
p_sel

file_name <- "all_compiled2_wScale_noLegend_noTitle.pdf"
                                                  
pdf(file.path(folder_path,file_name),width=7,height=8.5)             
grid.arrange(grobs=p_sel,ncol=3,nrow=4,widths=c(2,2,3))
dev.off()


## methods - supplementary figure

In [6]:
methods_alt = c("rfigr","rliger","seurat3","cobolt","scmomat")
markers = c("INS","GCG","SST","PPY","GHRL","CPA1","KRT19","PDGFRB","VWF","PTPRC")

p_sel <- compile_plot2(all_combined2,all_combined2@meta.data,methods_alt,markers,noLegend = TRUE)
p_sel

file_name <- "all_compiled2_wScale_noLegend_noTitle_methods_alt.pdf"
                                                  
pdf(file.path(folder_path,file_name),width=7,height=11)             
grid.arrange(grobs=p_sel,ncol=3,nrow=5,widths=c(2,2,3))
dev.off()


methods_alt = c("rfigr","rliger","seurat3","cobolt","scmomat")
markers = c("INS","GCG","SST","PPY","GHRL","CPA1","KRT19","PDGFRB","VWF","PTPRC")

p_sel <- compile_plot2(all_combined2,all_combined2@meta.data,methods_alt,markers,noLegend = FALSE)
p_sel

file_name <- "all_compiled2_wScale_wLegend_noTitle_methods_alt.pdf"
                                                  
pdf(file.path(folder_path,file_name),width=7,height=11)             
grid.arrange(grobs=p_sel,ncol=3,nrow=5,widths=c(2,2,3))
dev.off()


## Venn diagram: donor overlap

In [1000]:
md_sel <- all_combined@meta.data %>% dplyr::select(donor,technology) %>% distinct()

In [1031]:
x <- list(`scRNA` = md_sel$donor[md_sel$technology == "scRNA"],
          `snATAC` = md_sel$donor[md_sel$technology == "snATAC"],
          `Multiome` = md_sel$donor[md_sel$technology == "Multiome"])


In [1038]:
library(ggvenn)

file_name <- "venndiagram.pdf"
                                                  
pdf(file.path(folder_path,file_name),width=3,height=3)             
ggvenn(x, c("scRNA","snATAC","Multiome"),fill_color=scales::hue_pal()(3))
dev.off()

