In [1]:
library(Seurat)
library(CellChat) 
library(patchwork) 
library(ggpubr)
library(circlize)
options(stringsAsFactors = FALSE)
library(tidyr) ## gather
library(tibble)
library(ggsankey)
source("./chat.fun.R")

Registered S3 methods overwritten by 'tibble':
  method     from  
  format.tbl pillar
  print.tbl  pillar

Loading required package: dplyr


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: igraph


Attaching package: ‘igraph’


The following objects are masked from ‘package:dplyr’:

    as_data_frame, groups, union


The following objects are masked from ‘package:stats’:

    decompose, spectrum


The following object is masked from ‘package:base’:

    union


Loading required package: ggplot2

circlize version 0.4.13
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/

If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
  in R. Bioinfo

In [2]:
## Load cellchat results
res <- readRDS(file = "./load_files/Cellchat_res_custom.rds")

interactions <- read.csv(file = "./load_files/cellchat_custom/interaction_input_CellChatDB.csv", row.names = 1)

In [3]:
## there is a wrong annotation in the database
interactions["WNT5A_ROR2", "interaction_name"]
interactions["WNT5A_ROR2", "interaction_name"] <- "WNT5A_ROR2"
interactions["WNT5A_ROR2", "interaction_name"]

In [4]:
## Extract cellchat results
inter_pre <- rownames(res@LR$LRsig) %>%
                intersect(., interactions$interaction_name)


## Set interaction pairs
pc_clusters <- c("PC FGF17", "PC NKX2-1", "PC RSPO3", "PC TTR")
rgc_clusters <- c("FC NERG-early", "GE NERG-early", "OcC NERG-early")
cls_pairs <- expand.grid(rgc_clusters, pc_clusters) %>%
                mutate(Var1 = as.character(Var1), Var2 = as.character(Var2)) %>%
                subset(Var1 != Var2) %>%
        mutate(pair = paste0(Var2, "|", Var1)) %>%
        .$pair


## Get the interaction df
## Filter based on p values & probs (p < 0.05 & prob > 1e-6)
probs <- res@net$prob
pvals <- res@net$pval

mats <- lapply(inter_pre, function(ii) {
    mat <- pvals[,,ii]; probmat <- probs[,,ii]
    mat[mat > 0.05] <- 1
    mat[probmat < 1e-6] <- 1
    diag(mat) <- NA

    vec <- reshape2::melt(mat, value.name = "prob") %>%
                setNames(., c("rowcls", "colcls", "prob")) %>%
                mutate(rowcls = as.character(rowcls), colcls = as.character(colcls)) %>%
                mutate(pair = paste0(rowcls, "|", colcls)) %>%
                filter(!is.na(prob)) %>%
                column_to_rownames("pair") %>%
                .[cls_pairs, "prob"]
    vec
    }) %>%
        setNames(., inter_pre) %>%
        as.data.frame(., check.names = FALSE) %>%
        t() %>%
        as.matrix()
colnames(mats) <- cls_pairs
mats <- mats[rowSums(mats <= 0.05) > 0, ]


inter_final <- rownames(mats)
sub_anno <- interactions %>%
            subset(interaction_name %in% inter_final) %>%
            filter(annotation != "ECM-Receptor") %>%
            select(interaction_name, pathway_name, pathway_name, ligand, receptor, evidence)
mats <- mats[rownames(sub_anno), ]
dim(mats)
head(mats)

Unnamed: 0,PC FGF17|FC NERG-early,PC FGF17|GE NERG-early,PC FGF17|OcC NERG-early,PC NKX2-1|FC NERG-early,PC NKX2-1|GE NERG-early,PC NKX2-1|OcC NERG-early,PC RSPO3|FC NERG-early,PC RSPO3|GE NERG-early,PC RSPO3|OcC NERG-early,PC TTR|FC NERG-early,PC TTR|GE NERG-early,PC TTR|OcC NERG-early
BMP2_SMO,1,1,1,1.0,1,1,0,0,0.0,1,1,1
BMP7_PTPRK,0,1,1,0.0,1,1,0,1,1.0,0,1,1
GDF11_TGFBR1_ACVR2B,0,1,0,0.03,1,1,0,1,0.05,0,1,0
WNT3A_FZD1,1,1,1,1.0,1,1,0,1,0.0,1,1,1
WNT3A_FZD2,1,1,1,1.0,1,1,0,0,0.0,1,1,1
WNT3A_FZD8,1,1,1,1.0,1,1,0,0,0.0,1,1,1


In [5]:
## Load cellphone DB results
## combine with cellchat results (only use the cellphoneDB-specific pairs)
load(file = paste0("./load_files/", "Cellphone_filtered_res.rds"))
## pval_cpb, mean_cpb, meta_cpb


## use setdiff(meta_cpb$interacting_pair, sub_anno$interaction_name) to get the cellphoneDB exclusive pairs
sigmeta <- read.table(file = paste0("./load_files/", "cpb.exclusive.pairs.txt"), header = TRUE)
rownames(sigmeta) <- sigmeta$interaction_name
sigmeta$ligand <- extract_field(sigmeta$interaction_name, 1, "_")
sigmeta$receptor <- extract_field(sigmeta$interaction_name, "rm_start", "_")
sigmeta$annotation <- ifelse(meta_cpb[rownames(sigmeta), "secreted"] == "True", "Secreted Signaling", 
                             "Cell-Cell Contact")
sigmeta$evidence <- "SigCellphoneDB"
message("Pairs unique to the cellphoneDB results")
sigmeta

## Combine
pval_cpb[pval_cpb > 0.05] <- 1
newmats <- rbind(mats, pval_cpb[sigmeta$interaction_name, colnames(mats), drop = FALSE])
newmeta <- rbind(sub_anno, sigmeta[, colnames(sub_anno)])
newmeta <- newmeta[rownames(newmats), ]
message(sprintf("the number of pairs in the combine data: %s", dim(newmeta)[1]))

Pairs unique to the cellphoneDB results



Unnamed: 0_level_0,interaction_name,pathway_name,ligand,receptor,annotation,evidence
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
COPA_SORT1,COPA_SORT1,COPA,COPA,SORT1,Secreted Signaling,SigCellphoneDB
EFNA2_EPHA2,EFNA2_EPHA2,EPHA,EFNA2,EPHA2,Cell-Cell Contact,SigCellphoneDB
FGF19_FGFR1,FGF19_FGFR1,FGF,FGF19,FGFR1,Secreted Signaling,SigCellphoneDB
FGF2_FGFRL1,FGF2_FGFRL1,FGF,FGF2,FGFRL1,Secreted Signaling,SigCellphoneDB
FGF3_FGFR3,FGF3_FGFR3,FGF,FGF3,FGFR3,Secreted Signaling,SigCellphoneDB
GDF11_ANTXR1,GDF11_ANTXR1,GDF,GDF11,ANTXR1,Secreted Signaling,SigCellphoneDB
GDF11_TGFR_AVR2A,GDF11_TGFR_AVR2A,GDF,GDF11,TGFR_AVR2A,Secreted Signaling,SigCellphoneDB
GDF11_TGFR_AVR2B,GDF11_TGFR_AVR2B,GDF,GDF11,TGFR_AVR2B,Secreted Signaling,SigCellphoneDB
MDK_LRP1,MDK_LRP1,MK,MDK,LRP1,Secreted Signaling,SigCellphoneDB
PROS1_AXL,PROS1_AXL,PROS,PROS1,AXL,Secreted Signaling,SigCellphoneDB


the number of pairs in the combine data: 121



In [6]:
## Do clustering on the data
smat <- newmats %>%
            t() %>% scale()
new_path <- newmeta[rownames(newmats), "pathway_name"]
rare_path <- table(new_path) %>% .[. <= 3] %>% names() %>%
                setdiff(., "BMP")
new_path[new_path %in% rare_path] <- "rare"
set.seed(42)
cc <- RSKC::RSKC(newmats, ncl = 10, alpha = 0)
cc


Input: 
#obs= 121  #feature= 12 
L1= 12  alpha= 0

Result:
wbss: 66.40512
trimmed cases:
#non-zero weights: 12 
 10 clusters of sizes 17, 8, 8, 13, 12, 14, 10, 19, 14, 6 

In [7]:
## Do tSNE
tsneres <- Seurat::RunTSNE(t(smat), check_duplicates= FALSE)
tsnedata <- data.frame(pairs = colnames(smat),
                rawpath = newmeta[rownames(newmats), "pathway_name"],
                pathway = new_path,
                stringsAsFactors = FALSE) %>%
            cbind(., tsneres@cell.embeddings)
tsnedata$cluster <- as.character(cc$labels)

## Add randomization to the coordinates 
## because there are duplicates (pairs with exact same interaction patterns)
set.seed(42)
tsnedata$tSNE_1 <- tsnedata$tSNE_1 + rnorm(nrow(tsnedata), mean = 0, sd = 0.2)
tsnedata$tSNE_2 <- tsnedata$tSNE_2 + rnorm(nrow(tsnedata), mean = 0, sd = 0.2)

“No assay specified, setting assay as RNA by default.”


In [8]:
## Remove three duplicated interaction pairs that were mistakenly introduced by manual curation
dim(tsnedata)
dim(newmats)
tsnedata <- tsnedata[!rownames(tsnedata) %in% c('FGF2_FGFRL11', 'WNT3A_LRP11', 'WNT5A_ROR21'), , drop = FALSE]
newmats <- newmats[rownames(tsnedata), , drop = FALSE]
dim(tsnedata)
dim(newmats)

In [9]:
## Save the results
save(tsnedata, newmats, file = "./load_files/LR_combined_results.Rdata")

In [19]:
sessionInfo()

R version 3.6.1 (2019-07-05)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux Server 7.9 (Maipo)

Matrix products: default
BLAS:   /gpfs/ycga/home/sm2726/packages/R_versions/R-3.6.1/lib64/R/lib/libRblas.so
LAPACK: /gpfs/ycga/home/sm2726/packages/R_versions/R-3.6.1/lib64/R/lib/libRlapack.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] ggsankey_0.0.99999  tibble_3.0.1        tidyr_1.1.3        
 [4] circlize_0.4.13     ggpubr_0.3.0        patchwork_1.0.0    
 [7] CellChat_1.1.3      bigmemory_4.6.1     Bi