---
# Plot cell type composition across samples and datasets 
L.Richards  
2021-06-22  
*/cluster/projects/pughlab/projects/cancer_scrna_integration/figures*   

---

In [None]:
library(ggplot2)
library(ggpubr)
library(data.table)
library(ComplexHeatmap)
library(tidyr)
library(viridis)
library(dplyr)

setwd("~/Desktop/H4H/pughlab/projects/cancer_scrna_integration/figures")

---
## 1.0 Read and merge meta data
---


In [17]:
# read in study metadata
file.path <- "~/Desktop/H4H/pughlab/projects/cancer_scrna_integration/data"
datasets <- list.files(file.path)
datasets <- datasets[-3] # remove Griffiths, we did end up using this dataset in study
meta <- list()

for (i in 1:length(datasets)){
    
    file <- paste0(file.path, "/", datasets[i], "/", datasets[i], "_meta.csv")
    dat <- read.csv(file) 
    rownames(dat) <- dat$X
    dat$Study <- datasets[i]
    meta[[datasets[i]]] <- dat
    
}


In [21]:
# format column names to match across studies
colnames(meta$`Ma-LIHC`)[5] <- "SampleID"
colnames(meta$`Richards-GBM-LGG`)[15] <- "CellType"

# subset each dataframe
dat <- lapply(meta, function(x){x[ ,c("Study", "SampleID", "CellType")]})
dat <- do.call(rbind, dat)

---
## 2.0 Plot histogram of cell counts across samples
---

In [28]:
counts <- table(dat$SampleID)
counts <- data.frame(counts)
head(counts)

Unnamed: 0_level_0,Var1,Freq
Unnamed: 0_level_1,<fct>,<int>
1,P55_ICB_TKI,4621
2,P76_NoICB_NoTKI,7898
3,P90_NoICB_NoTKI,8421
4,P906_ICB_TKI,2449
5,P912_ICB_TKI,252
6,P913_ICB_TKI,3570


In [81]:
cells <- ggplot(counts, aes(x=Var1, y=Freq)) +
             geom_bar(stat="identity", fill = "grey", color = "black") + 
             theme_classic() +
             theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
             xlab("") + ylab("")

pdf("FigureS1_Cell_Histogram.pdf", width = 13, height = 5)
cells
dev.off()

---
## 3.0 Plot dotplot of per sample cell type proportions
---

Plot proportion of each cell type per patient

In [70]:
# get cell counts per sample for each cell type
cellno <- dat %>% count(Study, SampleID, CellType, sort = FALSE)

# get proportion cell type contributes to total cells within each sample
cellno <- group_by(cellno, SampleID) %>% mutate(percent = n/sum(n))

In [71]:
head(cellno)

Study,SampleID,CellType,n,percent
<chr>,<fct>,<fct>,<int>,<dbl>
Bi-RCC,P55_ICB_TKI,B_cells,11,0.0023804371
Bi-RCC,P55_ICB_TKI,DCs,10,0.0021640338
Bi-RCC,P55_ICB_TKI,Endothelial,1,0.0002164034
Bi-RCC,P55_ICB_TKI,Macrophages,663,0.1434754382
Bi-RCC,P55_ICB_TKI,Malignant,92,0.0199091106
Bi-RCC,P55_ICB_TKI,Mast_cells,4,0.0008656135


In [79]:
pdf("FigureS1_CellType_Dotplot.pdf", width = 16, height = 8.5)
ggplot(cellno, aes(x = SampleID, y = CellType, fill = n, size = percent)) + 
    geom_point(pch = 21) +
    scale_fill_viridis_c(option = "plasma", name = 'Cell Count') +
    cowplot::theme_cowplot() +
    ylab('') + xlab('') + 
    theme(axis.text.x = element_text(angle = 90, hjust = 0.95, size = 12)) +
    theme(axis.text.y = element_text(size = 15))

dev.off()

In [83]:
head(cellno[cellno$Study == "Ma-LIHC", ])

Study,SampleID,CellType,n,percent
<chr>,<fct>,<fct>,<int>,<dbl>
Ma-LIHC,S02_P01_LCP21,B_cells,6,0.00867052
Ma-LIHC,S02_P01_LCP21,Endothelial,219,0.31647399
Ma-LIHC,S02_P01_LCP21,Macrophages,9,0.01300578
Ma-LIHC,S02_P01_LCP21,T_cells,171,0.24710983
Ma-LIHC,S02_P01_LCP21,CAFs,284,0.41040462
Ma-LIHC,S02_P01_LCP21,HPCs,3,0.00433526
