In [None]:
###subnetwork distance cluster and plot umap/DBSCAN


library(dbscan)
library(fpc)
library(plotly)
library(RColorBrewer)
library(tidyverse)
library(umap)
install.packages('dbscan')
install.packages('fpc')
install.packages('umap')

set.seed(123)

setwd('G:\\课题\\tissue_specificity_manuscript\\素材\\network_distance_umap')
# loading subnetwork distance matrix
dis_tsne=data.frame(read.table('tsne_DBSCAN_cluster.txt',sep='\t',header=TRUE,comment.char = ''))
dat <- data.frame(read.table("distance_subnetwork_matrix.txt", sep = "\t", header=TRUE, row.names = 1,comment.char = ''))
subnetwork_dist_matrix <- as.matrix(dat)
rownames(subnetwork_dist_matrix) <- rownames(dat)
colnames(subnetwork_dist_matrix) <- rownames(dat)

# normalize distance
subnetwork_dist_matrix[row(subnetwork_dist_matrix)==col(subnetwork_dist_matrix)] <- 0

max_dist <- max(subnetwork_dist_matrix)
min_dist <- min(subnetwork_dist_matrix)

normalized_subnetwork_dist_matrix <- (subnetwork_dist_matrix-min_dist)/(max_dist-min_dist)
normalized_subnetwork_dist_matrix[row(normalized_subnetwork_dist_matrix)==col(normalized_subnetwork_dist_matrix)] <- 0


# UMAP
subnetwork.umap <- umap(normalized_subnetwork_dist_matrix, input="dist", min_dist=0.1, n_neighbors=15)
dot.positions <- tibble(x=subnetwork.umap$layout[, 1], y=subnetwork.umap$layout[, 2]) 

# DBSCAN clustering
db <- fpc::dbscan(dot.positions, eps = 1, MinPts = 5)

dot.positions$label <- rownames(dat)
dot.positions$cluster <- factor(db$cluster, levels=c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19))

my.colors <- colorRampPalette(brewer.pal(8,"Set1"))(19)
my.colors <- c("#BEBEBE", my.colors)

dot.positions_df=data.frame(dot.positions)
colnames(dot.positions_df)=c('umap 1','umap 2','label','umap_DBSCAN_id')
dot.positions_df_color=merge(dot.positions_df,dis_tsne,by.x='label',by.y='subnetwork_id')

p <- ggplot(dot.positions, aes(x=x, y=y, color=cluster, label=label, shape=cluster))+
  geom_point(size=2)+
  #scale_shape_manual(values=c(2, rep(19, 19)))+
  #scale_color_manual(values = my.colors)+
  scale_color_brewer(palette="Set1")+
  theme_bw()

ggplotly(p)
unique(dot.positions$cluster)
dbscan::kNNdistplot(dot.positions, k =  5)
abline(h = 1, lty = 2)



tsne_plot=data.frame(read.table('G:/课题/tissue_specificity_manuscript/素材/network_distance_umap/tsne_output_plot_color_cluster7_perplexity10.txt',sep='\t',header=1,comment.char = ""))

tsne_plot=data.frame(read.table('G:/课题/tissue_specificity_manuscript/素材/network_distance_umap/tsne_DBSCAN_cluster.txt',sep='\t',header=1,comment.char = ""))
tsne_plot$subnetwork_id_process=paste(tsne_plot$tissue_name,':',tsne_plot$seed,sep='')
tsne_plot$subnetwork_id_process=tsne_plot$subnetwork_id
tsne_plot=dot.positions_df_color[c('umap 1','umap 2','umap_DBSCAN_id','tissue','seed','tissue_cor_v2','label')]
tsne_plot$plot_label=''
tsne_plot[which(tsne_plot$label %in% c('Adrenal gland-FGFR1','Biliary tract-TP53',
                                               'Bladder/Urinary tract-RXRA',
                                               'Bladder/Urinary tract-PIK3CA',
                                               'Bone-IDH1',
                                               'Bowel-APC',
                                               'Breast-ERBB2',
                                               'Breast-PIK3CA',
                                               'CNS/Brain-EGFR',
                                               'Esophagus/Stomach-CDH1',
                                               'Head and neck-NOTCH1',
                                               'Head and neck-PIK3CA',
                                               'Kidney-VHL',
                                               'Liver-CTNNB1','Liver-DMD','Lung-EGFR','Lung-BRAF','Lung-KRAS','Lymphoid-MYD88',
                                               'Myeloid-FLT3','Myeloid-JAK2','Myeloid-KIT','Ovary/Fallopian tube-TP53','Pancreas-KRAS','Pancreas-TP53',
                                               'Prostate-SPOP',
                                               'Skin-BRAF',
                                               'Skin-NRAS',
                                               'Skin-KIT','Thyroid-BRAF','Uterus-PIK3CA')),]$plot_label=tsne_plot[which(tsne_plot$label %in%c('Adrenal gland-FGFR1','Biliary tract-TP53',
                                                                                                                                                 'Bladder/Urinary tract-RXRA',
                                                                                                                                                 'Bladder/Urinary tract-PIK3CA',
                                                                                                                                                 'Bone-IDH1',
                                                                                                                                                 'Bowel-APC',
                                                                                                                                                 'Breast-ERBB2',
                                                                                                                                                 'Breast-PIK3CA',
                                                                                                                                                 'CNS/Brain-EGFR',
                                                                                                                                                 'Esophagus/Stomach-CDH1',
                                                                                                                                                 'Head and neck-NOTCH1',
                                                                                                                                                 'Head and neck-PIK3CA',
                                                                                                                                                 'Kidney-VHL',
                                                                                                                                                 'Liver-CTNNB1','Liver-DMD','Lung-EGFR','Lung-BRAF','Lung-KRAS','Lymphoid-MYD88',
                                                                                                                                                 'Myeloid-FLT3','Myeloid-JAK2','Myeloid-KIT','Ovary/Fallopian tube-TP53','Pancreas-KRAS','Pancreas-TP53',
                                                                                                                                                 'Prostate-SPOP',
                                                                                                                                                 'Skin-BRAF',
                                                                                                                                                 'Skin-NRAS',
                                                                                                                                                 'Skin-KIT','Thyroid-BRAF','Uterus-PIK3CA')),]$label



write.table(tsne_plot,'tsne_output_plot_color_umap7_DBSCAN.txt',sep='\t',row.names = F)
# 创建基础图表
library(ggthemes)
setwd('G:\\课题\\tissue_specificity_manuscript\\素材\\network_distance_umap')
p <- ggscatter(tsne_plot, x='umap 1', y='umap 2', color = 'tissue_cor_v2',
               palette = unique(tsne_plot[c('tissue','tissue_cor_v2')])$tissue_cor_v2,label=tsne_plot$plot_label,
               font.label = c(8, "plain"), show.legend = FALSE,repel = T)+theme(legend.position = "none")+
  theme(
    legend.position = "none",panel.border = element_blank(),
    axis.line.y = element_line(size = (0.5/1.07)*0.5),
    axis.line.x = element_line(size = (0.5/1.07)*0.5) ,
    axis.ticks.y = element_line(size = (0.5/1.07)*0.5) ,
    axis.ticks.x = element_line(size = (0.5/1.07)*0.5) ,
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.background = element_rect(fill = "transparent", color = NA, size = (0.5/1.07)*0.5),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 8),
    legend.text = element_text(size = 8),  # Modify legend labels font and size
    legend.title = element_text(size = 8),  # Modify legend title font and size
    plot.title = element_text(size = 8, hjust = 0.5),
    axis.text.x = element_text(size = 8, family = "sans", color = "black"),
    axis.text.y = element_text(size = 8, family = "sans", color = "black")
  ) 
ggsave("tsne_output_plot_color_umap7_DBSCAN.pdf", p, width = 8, height = 8.5/2)

custom_colors <- c('#e6194B', '#3cb44b', '#ffe119', '#4363d8', '#f58231',
                   '#911eb4', '#42d4f4')
tsne_plot$umap_DBSCAN_id<- factor(tsne_plot$umap_DBSCAN_id)
p <- ggscatter(tsne_plot, x='umap 1', y='umap 2', 
               color = 'umap_DBSCAN_id',palette = custom_colors)
ggsave("tsne_output_plot_color_umap8_DBSCAN_cluster_number.pdf", p, width = 8, height = 8.5/2)


