----
# Nuclei Integration with fastMNN
----

Ran fastMNN on nuclei and live cell cohorts using live/nuclei as batches to correct for. 

In [5]:
library(ggplot2)
library(ggpubr)
library(gridExtra)

---
## 1.0 Plot Gradient with fastMNN correction 
---

Do we see Developmental-Injury Response transcriptional gradient?

In [None]:
mnn <- readRDS("~/Desktop/H4H/pughlab/projects/BTSCs_scRNAseq/Manuscript_G607removed/NatCan_Rebuttal/Nuclei_integration/fastmnn/GSCs_Tumour_LiveNuclei_fastMNN_meta.rds")

In [None]:
mnn$SampleTech <- paste(mnn$SampleType, mnn$Technology, sep = "_")

In [None]:
ggscatterhist(mnn, 
              x = "Developmental_GSC_AUC", 
              y = "InjuryResponse_GSC_AUC",
              palette = c("#225ea8", "black", "darkred"), 
              color = "SampleTech",
              size = 0.5,
              alpha = 0.1,
              #ellipse = TRUE, 
              #ellipse.type = "confidence",
              margin.params = list(fill = "SampleTech", color = "black", size = 0.2),
              ggtheme = theme_classic(),
              #ellipse.alpha = 0
              #ylim = c(0.1,0.45),
              #xlim = c(0.05,0.4)
             ) 

### Scale the scores within each cohort (GSCs, tumour, nuclei)

In [None]:
GSCs <- scale(mnn[mnn$SampleTech == "GSC_LiveCell", ]$Developmental_GSC_AUC)
Tumours <- scale(mnn[mnn$SampleTech == "Tumour_LiveCell", ]$Developmental_GSC_AUC)
Nuclei <- scale(mnn[mnn$SampleTech == "Tumour_Nuclei", ]$Developmental_GSC_AUC)
mnn$Developmental_cohort_z <- c(GSCs, Tumours, Nuclei)

In [None]:
GSCs <- scale(mnn[mnn$SampleTech == "GSC_LiveCell", ]$InjuryResponse_GSC_AUC)
Tumours <- scale(mnn[mnn$SampleTech == "Tumour_LiveCell", ]$InjuryResponse_GSC_AUC)
Nuclei <- scale(mnn[mnn$SampleTech == "Tumour_Nuclei", ]$InjuryResponse_GSC_AUC)
mnn$InjuryResponse_cohort_z <- c(GSCs, Tumours, Nuclei)

In [None]:
ggscatterhist(mnn, 
              x = "Developmental_cohort_z", 
              y = "InjuryResponse_cohort_z",
              palette = c("#225ea8", "black", "darkred"), 
              color = "SampleTech",
              size = 0.5,
              alpha = 0.1,
              #ellipse = TRUE, 
              #ellipse.type = "confidence",
              margin.params = list(fill = "SampleTech", color = "black", size = 0.2),
              ggtheme = theme_classic(),
              #ellipse.alpha = 0
              #ylim = c(0.1,0.45),
              #xlim = c(0.05,0.4)
             ) 

---
## 2.0 Plot PCA (but really mnn components)
---

Do we see better overlap between samples

In [None]:
ggscatterhist(mnn, 
              x = "mnn_1", 
              y = "mnn_2",
              palette = c("#225ea8", "black", "darkred"), 
              color = "SampleTech",
              size = 0.5,
              alpha = 0.1,
              #ellipse = TRUE, 
              #ellipse.type = "confidence",
              margin.params = list(fill = "SampleTech", color = "black", size = 0.2),
              ggtheme = theme_classic(),
              #ellipse.alpha = 0
              #ylim = c(0.1,0.45),
              #xlim = c(0.05,0.4)
             ) 


ggscatterhist(mnn, 
              x = "mnn_2", 
              y = "mnn_3",
              palette = c("#225ea8", "black", "darkred"), 
              color = "SampleTech",
              size = 0.5,
              alpha = 0.1,
              #ellipse = TRUE, 
              #ellipse.type = "confidence",
              margin.params = list(fill = "SampleTech", color = "black", size = 0.2),
              ggtheme = theme_classic(),
              #ellipse.alpha = 0
              #ylim = c(0.1,0.45),
              #xlim = c(0.05,0.4)
             ) 

In [None]:
ggscatterhist(mnn, 
              x = "mnn_2", 
              y = "mnn_3",
              #palette = "RdBu", 
              color = "Developmental_GSC_AUC",
              size = 0.5,
              alpha = 0.1,
              #ellipse = TRUE, 
              #ellipse.type = "confidence",
              margin.params = list(fill = "SampleTech", color = "black", palette =c("#225ea8", "black", "darkred"), size = 0.2),
              ggtheme = theme_classic(),
              #ellipse.alpha = 0
              #ylim = c(0.1,0.45),
              #xlim = c(0.05,0.4)
             ) 

---
## 4.0 Plot gradient WITH scaled but WITHOUT fastMNN correction
---

In [None]:
mnn <- readRDS("~/Desktop/H4H/pughlab/projects/BTSCs_scRNAseq/Manuscript_G607removed/NatCan_Rebuttal/Nuclei_integration/fastmnn/GSCs_Tumour_LiveNuclei_fastMNN_meta.rds")
mnn$SampleTech <- paste(mnn$SampleType, mnn$Technology, sep = "_")

In [None]:
##scale scores within cohorts
GSCs <- scale(mnn[mnn$SampleTech == "GSC_LiveCell", ]$Developmental_AUC)
Tumours <- scale(mnn[mnn$SampleTech == "Tumour_LiveCell", ]$Developmental_AUC)
Nuclei <- scale(mnn[mnn$SampleTech == "Tumour_Nuclei", ]$Developmental_AUC)
mnn$Developmental_cohort_z <- c(GSCs, Tumours, Nuclei)

GSCs <- scale(mnn[mnn$SampleTech == "GSC_LiveCell", ]$InjuryResponse_AUC)
Tumours <- scale(mnn[mnn$SampleTech == "Tumour_LiveCell", ]$InjuryResponse_AUC)
Nuclei <- scale(mnn[mnn$SampleTech == "Tumour_Nuclei", ]$InjuryResponse_AUC)
mnn$InjuryResponse_cohort_z <- c(GSCs, Tumours, Nuclei)

In [None]:
gsc <- mnn[mnn$SampleTech == "GSC_LiveCell", ]
tum <- mnn[mnn$SampleTech == "Tumour_LiveCell", ]
nuc <- mnn[mnn$SampleTech == "Tumour_Nuclei", ]

## all samples
a <- ggscatterhist(mnn, 
              x = "Developmental_cohort_z", 
              y = "InjuryResponse_cohort_z",
              palette = c("#225ea8", "black", "darkred"), 
              color = "SampleTech",
              xlab = "Scaled Developmental Program Score (AUC; z-score)",
              ylab = "Scaled Injury Response Program Score (AUC; z-score)",
              size = 0.5,
              alpha = 0.1,
              margin.params = list(fill = "SampleTech", color = "black", size = 0.2),
              ggtheme = theme_classic(),
              legend = "none",
              xlim = c(-5,5),
              ylim = c(-2.5,6)
             ) 

## GSCs
b <- ggscatterhist(gsc, 
              x = "Developmental_cohort_z", 
              y = "InjuryResponse_cohort_z",
              palette = c("#225ea8"), 
              color = "SampleTech",
              xlab = "Scaled Developmental Program Score (AUC; z-score)",
              ylab = "Scaled Injury Response Program Score (AUC; z-score)",
              size = 0.5,
              alpha = 0.2,
              margin.params = list(fill = "SampleTech", color = "black", size = 0.2),
              ggtheme = theme_classic(),
              legend = "none",
              #font.label = c(14, "bold", "black"),
              xlim = c(-5,5),
              ylim = c(-2.5,6)
             ) 

c <- ggscatterhist(tum, 
              x = "Developmental_cohort_z", 
              y = "InjuryResponse_cohort_z",
              palette = c("black"), 
              color = "SampleTech",
              xlab = "Scaled Developmental Program Score (AUC; z-score)",
              ylab = "Scaled Injury Response Program Score (AUC; z-score)",
              size = 0.5,
              alpha = 0.2,
              margin.params = list(fill = "SampleTech", color = "black", size = 0.2),
              ggtheme = theme_classic(),
              legend = "none",
              font.label = c(14, "bold", "black"),
              xlim = c(-5,5),
              ylim = c(-2.5,6)
             ) 

d <- ggscatterhist(nuc, 
              x = "Developmental_cohort_z", 
              y = "InjuryResponse_cohort_z",
              palette = c("darkred"), 
              color = "SampleTech",
              xlab = "Scaled Developmental Program Score (AUC; z-score)",
              ylab = "Scaled Injury Response Program Score (AUC; z-score)",
              size = 0.5,
              alpha = 0.2,
              margin.params = list(fill = "SampleTech", color = "black", size = 0.2),
              ggtheme = theme_classic(),
              legend = "none",
              xlim = c(-5,5),
              ylim = c(-2.5,6)
             ) 

In [None]:
pdf(file = "~/Desktop/Nuclei_Gradient_Scaled.pdf", width = 5, height = 5)
a
b
c
d
dev.off()

---
## 5.0 Correlate rank between z-score and unscaled
---

In [None]:
cor(gsc$Developmental_AUC, gsc$Developmental_cohort_z, method = "pearson")

In [None]:
cor(mnn$Developmental_AUC, mnn$Developmental_cohort_z)

---
## 6.0 Plot each tumour on own
---

In [1]:
mnn <- readRDS("~/Desktop/H4H/pughlab/projects/BTSCs_scRNAseq/Manuscript_G607removed/NatCan_Rebuttal/Nuclei_integration/fastmnn/GSCs_Tumour_LiveNuclei_fastMNN_meta.rds")
mnn$SampleTech <- paste(mnn$SampleType, mnn$Technology, sep = "_")

In [2]:
##scale scores within cohorts
GSCs <- scale(mnn[mnn$SampleTech == "GSC_LiveCell", ]$Developmental_AUC)
Tumours <- scale(mnn[mnn$SampleTech == "Tumour_LiveCell", ]$Developmental_AUC)
Nuclei <- scale(mnn[mnn$SampleTech == "Tumour_Nuclei", ]$Developmental_AUC)
mnn$Developmental_cohort_z <- c(GSCs, Tumours, Nuclei)

GSCs <- scale(mnn[mnn$SampleTech == "GSC_LiveCell", ]$InjuryResponse_AUC)
Tumours <- scale(mnn[mnn$SampleTech == "Tumour_LiveCell", ]$InjuryResponse_AUC)
Nuclei <- scale(mnn[mnn$SampleTech == "Tumour_Nuclei", ]$InjuryResponse_AUC)
mnn$InjuryResponse_cohort_z <- c(GSCs, Tumours, Nuclei)

In [3]:
mnn$Color <- gsub("GSC_LiveCell","#225ea8",  mnn$SampleTech)
mnn$Color <- gsub("Tumour_LiveCell","black",  mnn$Color)
mnn$Color <- gsub("Tumour_Nuclei", "darkred", mnn$Color)
table(mnn$Color)


#225ea8   black darkred 
  65655   14207   42334 

In [8]:
plots <- list()
samples <- unique(mnn$SampleID)

#par(mfrow=c(2,2))
pdf("~/Desktop/Gradient_BySample.pdf", width = 5, height = 5)

for (i in 1:length(samples)){
    
    print(samples[i])
    sub <- mnn[mnn$SampleID == samples[i], ]
    main <- paste0(samples[i], "   (", unique(sub$SampleTech), ")") 
    
    plots[[samples[i]]] <- ggscatterhist(sub, 
                      x = "Developmental_cohort_z", 
                      y = "InjuryResponse_cohort_z",
                      #palette = c("#225ea8", "black", "darkred"), 
                       palette = unique(sub$Color),
                      color = "SampleTech",
                      xlab = "Developmental Program Score (AUC; z-score)",
                      ylab = "Injury Response Program Score (AUC; z-score)",
                      size = 1,
                      alpha = 0.5,
                      margin.params = list(fill = unique(sub$Color), color = "black", size = 0.2),
                      ggtheme = theme_classic(),
                      title = main,
                      legend = "none",
                      xlim = c(-5,5),
                      ylim = c(-2.5,6)
                     )
}

dev.off()

[1] "BT127_L"
[1] "BT147_L"
[1] "BT48_L"
[1] "BT67_L"
[1] "BT73_L"
[1] "BT84_L"
[1] "BT89_L"
[1] "BT94_L"
[1] "G523_L"
[1] "G549_L"
[1] "G564_L"
[1] "G566_L"
[1] "G583_L"
[1] "G620_L"
[1] "G637_L"
[1] "G729_L"
[1] "G797_L"
[1] "G799_L"
[1] "G837_L"
[1] "G851_L"
[1] "G876_L"
[1] "G885_L"
[1] "G895_L"
[1] "G945-I_L"
[1] "G945-J_L"
[1] "G945-K_L"
[1] "G946-J_L"
[1] "G946-K_L"
[1] "G1003-A_T"
[1] "G1003-B_T"
[1] "G1003-C_T"
[1] "G1003-D_T"
[1] "G620_T"
[1] "G910-A_T"
[1] "G910-B_T"
[1] "G910-C_T"
[1] "G910-D_T"
[1] "G910-E_T"
[1] "G945-I_T"
[1] "G945-J_T"
[1] "G945-K_T"
[1] "G946-I_T"
[1] "G946-J_T"
[1] "G946-K_T"
[1] "G967-A_T"
[1] "G967-B_T"
[1] "G967-C_T"
[1] "G967-D_T"
[1] "G983-A_T"
[1] "G983-B_T"
[1] "G983-C_T"
[1] "G523_T"
[1] "G549_T"
[1] "G566_T"
[1] "G583_T"
[1] "G637_T"
[1] "G729_T"
[1] "G797_T"
[1] "G799_T"
[1] "G837_T"
[1] "G851_T"


In [None]:
grid.arrange(grobs=plots, ncol=4)

In [None]:
do.call("grid.arrange", c(plots, ncol=5))