---
# Visualize LISI metric and runtime
*L.Richards*  
*2021-06-09*    
*/cluster/projects/pughlab/projects/cancer_scrna_integration/figures*    

---

https://davemcg.github.io/post/lets-plot-scrna-dotplots/

In [53]:
library(ggplot2)
library(ggpubr)
library(data.table)
library(ComplexHeatmap)
library(tidyr)
library(viridis)

setwd("~/Desktop/H4H/pughlab/projects/cancer_scrna_integration/figures")

---
## 1.0 Calculate median LISI scores
---

Make a big matrix of metadata across all samples, this will require formatting header columns the be the same. 

In [41]:
# load lisi scores (0 - 1)
#lisi <- fread("~/Desktop/H4H/pughlab/projects/cancer_scrna_integration/evalutation/lisi/LISI_calculations.csv")
lisi <- fread("~/Desktop/lisi_integration.csv")
lisi <- data.frame(lisi)
lisi$V1 <- NULL
rownames(lisi) <- lisi$V1.1
lisi$V1.1 <- NULL

---
### 1.1 Batch LISI for Samples and PatientIDs
---


In [58]:
# make a rowID where you combine Study and Method together
lisi$Study_Method <- paste0(lisi$Study, "_", lisi$Method)

In [59]:
# calculate median LISI for each study

# SAMPLEID
sample <- aggregate(LISI_SampleID_Norm ~ Study_Method, lisi, median)
sample$Category <- "SampleID"
colnames(sample)[2] <- "Median_LISI"

# Patient ID
patient <- aggregate(LISI_PatientID_Norm ~ Study_Method, lisi, median)
patient$Category <- "PatientID"
colnames(patient)[2] <- "Median_LISI"

In [60]:
# combine sample and patient batch lisi together & mutate
lisi_batch <- rbind(sample, patient)

---
### 1.2 Cell Type LISI for SampleID
---

In [137]:
# CELL TYPE

lisi$Study_Method_CellType <- paste0(lisi$Study_Method, "_", lisi$CellType)
celltype <- aggregate(LISI_CellType_SampleID_Norm ~ Study_Method_CellType, lisi, median)
celltype$Category <- "SampleID"


In [138]:
# extract cell types
cat <- sapply(strsplit(celltype$Study_Method_CellType,"_"), `[`, -c(1:2))
cat <- lapply(cat, function(x){paste(x, collapse = "_")})

# reformat studyID 
study <- sapply(strsplit(celltype$Study_Method_CellType,"_"), `[`, c(1:2))
study <- t(study)
study <- paste(study[,1], study[ ,2], sep = "_")

# add celltypes to dataframe
celltype$Category <- unlist(cat)
celltype$Study_Method_CellType <- study

# rename columns
colnames(celltype) <- c("Study_Method", "Median_LISI", "Category")

---
## 2.0 Visualize LISI scores with dotplot
---

In [160]:
res <- rbind(lisi_batch, celltype) 
res$Category <- factor(res$Category,
                       levels = c(unique(celltype$Category), "PatientID", "SampleID")
                      )

# remove lisi batch patient, since not all cohorts have multiple samples
# from the same patient anyways
res <- res[!res$Category == "PatientID", ]


In [173]:
# plot dotplot

pdf("LISI_Dotplot.pdf", width = 15, height = 9)
ggplot(res, aes(x = Study_Method, y = Category, fill = Median_LISI, size = Median_LISI)) + 
    geom_point(pch = 21) +
    scale_fill_viridis_c(option = "plasma", name = 'Median LISI') +
    cowplot::theme_cowplot() +
    ylab('') + xlab('') + 
    theme(axis.text.x = element_text(angle = 90, hjust = 0.95, size = 12)) +
    theme(axis.text.y = element_text(size = 15))
dev.off()