----
# Visualize results from batch correction bake off
---

In [21]:
options(repos='http://cran.rstudio.com/')
#install.packages("ggExtra")
library(Seurat)
library(ggplot2)
library(ggpubr)
library(ggExtra)
library(ggrepel)
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [3]:
setwd("~/Desktop/H4H/pughlab/projects/BTSCs_scRNAseq/Manuscript_G607removed/NatCan_Rebuttal/BatchCorrection/")

----
## 1.0 Plot UMAPs
----

In [4]:
#load data
meta <- readRDS("Global_GSC_BatchCorrection_metadata.rds")

In [7]:
colnames(meta)

In [8]:
## Define sample color palette

colfunc <- colorRampPalette(c("#54278f", "#bcbddc", "#084081", "#4eb3d3", "#238b45", "#ccebc5"))
dirks <- colfunc(21)
colfunc <- colorRampPalette(c("#800026", "#fc4e2a", "#feb24c", "#ffeda0"))
weiss <- colfunc(8)
cols <- c(weiss, dirks)
length(cols)

In [43]:
#### ORIGINAL CLUSTERING


#calculate centroids 
hc.norm.cent <- meta %>% group_by(Original_clusters) %>% select(Original_UMAP1, 
    Original_UMAP2) %>% summarize_all(median)
#hc.norm.cent



original_sample <- ggplot(meta, aes(x=Original_UMAP1, y=Original_UMAP2, color=SampleID)) + 
                   geom_point(alpha = 0.3, size = 0.8, pch = 16) +  
                   labs(x = "UMAP 1", y = "UMAP 2") +
                   scale_colour_manual(values = cols) + 
                   theme_bw() + 
                   theme(axis.text.x = element_blank(), axis.text.y = element_blank(), 
                         axis.ticks = element_blank(),
                        panel.border = element_rect(linetype = "solid", fill = NA, size = 1),
                        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                        panel.background = element_blank())  +
                    guides(colour = guide_legend(override.aes = list(size=4, alpha = 1))) +
                    theme(legend.position="none") 

original_clusters <- ggplot(meta, aes(x=Original_UMAP1, y=Original_UMAP2, color=Original_clusters)) + 
                   geom_point(alpha = 0.3, size = 0.8, pch = 16) +  
                   labs(x = "UMAP 1", y = "UMAP 2") +
                   #scale_colour_manual(values = cols) + 
                   theme_bw() + 
                   theme(axis.text.x = element_blank(), axis.text.y = element_blank(), 
                         axis.ticks = element_blank(),
                        panel.border = element_rect(linetype = "solid", fill = NA, size = 1),
                        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                        panel.background = element_blank())  +
                    guides(colour = guide_legend(override.aes = list(size=4, alpha = 1))) +
                    theme(legend.position="none") +
                    geom_label_repel(aes(label = Original_clusters), 
                                     data = hc.norm.cent, 
                                     label.size = 0.05, 
                                     parse = T, 
                                     size = 3)

pdf("~/Desktop/OriginalClustering_NatCan_Cluster.pdf", width = 8, height= 8)
original_clusters
dev.off()

pdf("~/Desktop/OriginalClustering_NatCan_Sample.pdf", width = 8, height= 8)
original_sample
dev.off()

Adding missing grouping variables: `Original_clusters`



In [40]:
#### CONOS Clustering


#calculate centroids 
hc.norm.cent <- meta %>% group_by(Conos_clusters) %>% select(Conos_UMAP1, 
    Conos_UMAP2) %>% summarize_all(median)
#hc.norm.cent



Conos_sample <- ggplot(meta, aes(x=Conos_UMAP1, y=Conos_UMAP2, color=SampleID)) + 
                   geom_point(alpha = 0.3, size = 0.8, pch = 16) +  
                   labs(x = "UMAP 1", y = "UMAP 2") +
                   scale_colour_manual(values = cols) + 
                   theme_bw() + 
                   theme(axis.text.x = element_blank(), axis.text.y = element_blank(), 
                         axis.ticks = element_blank(),
                        panel.border = element_rect(linetype = "solid", fill = NA, size = 1),
                        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                        panel.background = element_blank())  +
                    guides(colour = guide_legend(override.aes = list(size=4, alpha = 1))) +
                    theme(legend.position="bottom") + theme(legend.position="none")

Conos_clusters <- ggplot(meta, aes(x=Conos_UMAP1, y=Conos_UMAP2, color=Conos_clusters)) + 
                   geom_point(alpha = 0.3, size = 0.8, pch = 16) +  
                   labs(x = "UMAP 1", y = "UMAP 2") +
                   #scale_colour_manual(values = cols) + 
                   theme_bw() + 
                   theme(axis.text.x = element_blank(), axis.text.y = element_blank(), 
                         axis.ticks = element_blank(),
                        panel.border = element_rect(linetype = "solid", fill = NA, size = 1),
                        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                        panel.background = element_blank())  +
                    guides(colour = guide_legend(override.aes = list(size=4, alpha = 1))) +
                    theme(legend.position="none") +
                    geom_label_repel(aes(label = Conos_clusters), 
                                     data = hc.norm.cent, 
                                     label.size = 0.05, 
                                     parse = T, 
                                     size = 3)

pdf("~/Desktop/ConosClustering_NatCan_Cluster.pdf", width = 8, height= 8)
Conos_clusters
dev.off()

pdf("~/Desktop/ConosClustering_NatCan_Sample.pdf", width = 8, height= 8)
Conos_sample
dev.off()

Adding missing grouping variables: `Conos_clusters`



In [41]:
#### Liger Clustering


#calculate centroids 
hc.norm.cent <- meta %>% group_by(Liger_clusters) %>% select(Liger_UMAP1, 
    Liger_UMAP2) %>% summarize_all(median)
#hc.norm.cent



Liger_sample <- ggplot(meta, aes(x=Liger_UMAP1, y=Liger_UMAP2, color=SampleID)) + 
                   geom_point(alpha = 0.3, size = 0.8, pch = 16) +  
                   labs(x = "UMAP 1", y = "UMAP 2") +
                   scale_colour_manual(values = cols) + 
                   theme_bw() + 
                   theme(axis.text.x = element_blank(), axis.text.y = element_blank(), 
                         axis.ticks = element_blank(),
                        panel.border = element_rect(linetype = "solid", fill = NA, size = 1),
                        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                        panel.background = element_blank())  +
                    guides(colour = guide_legend(override.aes = list(size=4, alpha = 1))) +
                    theme(legend.position="bottom") + theme(legend.position="none")

Liger_clusters <- ggplot(meta, aes(x=Liger_UMAP1, y=Liger_UMAP2, color=Liger_clusters)) + 
                   geom_point(alpha = 0.3, size = 0.8, pch = 16) +  
                   labs(x = "UMAP 1", y = "UMAP 2") +
                   #scale_colour_manual(values = cols) + 
                   theme_bw() + 
                   theme(axis.text.x = element_blank(), axis.text.y = element_blank(), 
                         axis.ticks = element_blank(),
                        panel.border = element_rect(linetype = "solid", fill = NA, size = 1),
                        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                        panel.background = element_blank())  +
                    guides(colour = guide_legend(override.aes = list(size=4, alpha = 1))) +
                    theme(legend.position="none") +
                    geom_label_repel(aes(label = Liger_clusters), 
                                     data = hc.norm.cent, 
                                     label.size = 0.05, 
                                     parse = T, 
                                     size = 3)

pdf("~/Desktop/LigerClustering_NatCan_Cluster.pdf", width = 8, height= 8)
Liger_clusters
dev.off()

pdf("~/Desktop/LigerClustering_NatCan_Sample.pdf", width = 8, height= 8)
Liger_sample
dev.off()

Adding missing grouping variables: `Liger_clusters`



In [44]:
#### fastMNN Clustering


#calculate centroids 
hc.norm.cent <- meta %>% group_by(fastMNN_clusters) %>% select(fastMNN_UMAP1, 
    fastMNN_UMAP2) %>% summarize_all(median)
#hc.norm.cent



fastMNN_sample <- ggplot(meta, aes(x=fastMNN_UMAP1, y=fastMNN_UMAP2, color=SampleID)) + 
                   geom_point(alpha = 0.3, size = 0.8, pch = 16) +  
                   labs(x = "UMAP 1", y = "UMAP 2") +
                   scale_colour_manual(values = cols) + 
                   theme_bw() + 
                   theme(axis.text.x = element_blank(), axis.text.y = element_blank(), 
                         axis.ticks = element_blank(),
                        panel.border = element_rect(linetype = "solid", fill = NA, size = 1),
                        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                        panel.background = element_blank())  +
                    guides(colour = guide_legend(override.aes = list(size=4, alpha = 1))) +
                    theme(legend.position="bottom") + theme(legend.position="none")

fastMNN_clusters <- ggplot(meta, aes(x=fastMNN_UMAP1, y=fastMNN_UMAP2, color=fastMNN_clusters)) + 
                   geom_point(alpha = 0.3, size = 0.8, pch = 16) +  
                   labs(x = "UMAP 1", y = "UMAP 2") +
                   #scale_colour_manual(values = cols) + 
                   theme_bw() + 
                   theme(axis.text.x = element_blank(), axis.text.y = element_blank(), 
                         axis.ticks = element_blank(),
                        panel.border = element_rect(linetype = "solid", fill = NA, size = 1),
                        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                        panel.background = element_blank())  +
                    guides(colour = guide_legend(override.aes = list(size=4, alpha = 1))) +
                    theme(legend.position="none") +
                    geom_label_repel(aes(label = fastMNN_clusters), 
                                     data = hc.norm.cent, 
                                     label.size = 0.05, 
                                     parse = T, 
                                     size = 3)

pdf("~/Desktop/fastMNNClustering_NatCan_Cluster.pdf", width = 8, height= 8)
fastMNN_clusters
dev.off()

pdf("~/Desktop/fastMNNClustering_NatCan_Sample.pdf", width = 8, height= 8)
fastMNN_sample
dev.off()

Adding missing grouping variables: `fastMNN_clusters`

