# Mast cells: process reference for integration with MERFISH

In [None]:
require(Seurat)
require(tidyverse)
require(readxl)
require(patchwork)
require(sf)
require(ggpubr)
require(ggthemes)
require(harmony)
require(presto)
require(ComplexHeatmap)
require(circlize)
require(glue)
require(e1071) 
require(caTools) 
require(class) 
require(gghighlight)
require(UpSetR)
options(repr.matrix.max.cols=100, repr.matrix.max.rows=100, repr.plot.res=300)
set.seed(1)

# load processed scRNA reference dataset

In [None]:
completeReference = readr::read_rds('/n/scratch3/users/m/mup728/Cell_Typing_CRC_MERFISH/Pelka_reference_cleaning/pelka_dataset_with_merfish_genes.rds')

In [None]:
completeReference@meta.data$technology = 'scRNA'
completeReference@meta.data$biosample_id = completeReference@meta.data$batchID

In [None]:
completeReference@meta.data$combined_cell_names = colnames(completeReference)
length(unique(completeReference@meta.data$combined_cell_names))

In [None]:
summary(completeReference@meta.data$nCount_RNA)
summary(completeReference@meta.data$nFeature_RNA)

## select Mast cells for finetyping

In [None]:
scRNA_Mast = completeReference@meta.data %>% filter(ClusterTop == 'Mast') %>% select(combined_cell_names) %>% as.matrix() %>% as.vector()
temp = completeReference@meta.data %>% filter(ClusterTop == 'Mast')
rownames(temp) = temp$combined_cell_names
scRNA_Mast = CreateSeuratObject(GetAssayData(completeReference, 'counts')[,scRNA_Mast], meta.data = temp)
scRNA_Mast
rm(temp)

In [None]:
table(scRNA_Mast@meta.data$orig.ident, scRNA_Mast@meta.data$ClusterFull) %>% as.matrix()

In [None]:
table(scRNA_Mast@meta.data$orig.ident) %>% as.matrix()

In [None]:
options(repr.plot.width=5, repr.plot.height=5)
table(scRNA_Mast@meta.data$orig.ident) %>% as.data.frame() %>%
ggplot() + 
geom_histogram(aes(Freq)) 
summary(as.data.frame(table(scRNA_Mast@meta.data$orig.ident))$Freq)

#### filter out donors with < 30 Mast cells

In [None]:
donorsToRetain = table(scRNA_Mast@meta.data$orig.ident) %>% as.data.frame() %>% filter(Freq > 30) %>% select(Var1) %>% as.matrix() %>% as.vector()
length(unique(scRNA_Mast@meta.data$orig.ident))
donorsToRetain %>% length()

In [None]:
scRNA_Mast = subset(scRNA_Mast, subset = orig.ident %in% donorsToRetain)
scRNA_Mast

## scale/normalize/pca

In [None]:
normFactor = scRNA_Mast@meta.data %>%
select(technology, nCount_RNA) %>%
group_by(technology) %>%
summarize(medianCounts = median(nCount_RNA)) 
normFactor = mean(normFactor$medianCounts) #median of medians (across technology) 
normFactor
scRNA_Mast = NormalizeData(scRNA_Mast, 
                          normalization.method = "LogNormalize", 
                          scale.factor = normFactor) %>%
    ScaleData(features = rownames(scRNA_Mast)) %>%
    singlecellmethods::RunBalancedPCA(weight.by='orig.ident', npcs=15)
scRNA_Mast

### in scrna, are the Mast cellssubtypes distinguishable?

#### qualitative look - umap

In [None]:
U = uwot::umap(scRNA_Mast@reductions$pca@cell.embeddings[, 1:15], 
               min_dist = 0.05, 
               spread = 0.30, 
               ret_extra = 'fgraph', 
               n_sgd_threads = nbrOfWorkers(), 
               fast_sgd = TRUE)
colnames(U$embedding) = c('PCAUMAP1', 'PCAUMAP2')
rownames(U$fgraph) = colnames(U$fgraph) = Cells(scRNA_Mast)
scRNA_Mast[['pcaumap']] = Seurat::CreateDimReducObject(
    embeddings = U$embedding,
    assay = 'RNA', 
    key = 'PCAUMAP_', 
    global = TRUE
)
new_graph = Seurat::as.Graph(U$fgraph)
DefaultAssay(new_graph) = DefaultAssay(scRNA_Mast)
scRNA_Mast[['pcaumap_fgraph']] = new_graph

In [None]:
temp = Embeddings(scRNA_Mast, 'pcaumap') %>% as.data.frame()
temp$combined_cell_names = rownames(temp)
pcaumap_embeddings = right_join(scRNA_Mast@meta.data %>% select(combined_cell_names, ClusterTop, ClusterFull, orig.ident), temp)
pcaumap_embeddings %>% head()
pcaumap_embeddings %>% tail()

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5)
ggplot(pcaumap_embeddings, aes(x = PCAUMAP_1, y = PCAUMAP_2, color = orig.ident)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Pre-harmony UMAP - Mast cells') +
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') 

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
constantPalette = rep('red', length(unique(pcaumap_embeddings$orig.ident)))
names(constantPalette) = unique(pcaumap_embeddings$orig.ident)
ggplot(pcaumap_embeddings, aes(x = PCAUMAP_1, y = PCAUMAP_2, color = orig.ident)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Pre-harmony UMAP - Mast cells') +
scale_color_manual(values = constantPalette) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') +
facet_wrap(~orig.ident) + 
gghighlight::gghighlight()

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
ggplot(pcaumap_embeddings, aes(x = PCAUMAP_1, y = PCAUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Pre-harmony UMAP - Mast cells') +
ggthemes::scale_color_tableau('Tableau 20' , name = "") + 
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') 

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
ggplot(pcaumap_embeddings, aes(x = PCAUMAP_1, y = PCAUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Pre-harmony UMAP - Mast cells') +
ggthemes::scale_color_tableau('Tableau 20' , name = "") + 
guides(color = guide_legend(override.aes = list(size = 10, shape = 16))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
facet_wrap(~ClusterFull) +
gghighlight::gghighlight()

### harmonize over donor

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5)
scRNA_Mast = scRNA_Mast %>% harmony::RunHarmony("orig.ident", 
    plot_convergence = TRUE, 
    lambda = NULL,
    max.iter = 10,
    early_stop = F
) 

### post harmony umap

In [None]:
U = uwot::umap(scRNA_Mast@reductions$harmony@cell.embeddings[, 1:15], 
               min_dist = 0.05, 
               spread = 0.30, 
               ret_extra = 'fgraph', 
               n_sgd_threads = nbrOfWorkers(), 
               fast_sgd = TRUE)
colnames(U$embedding) = c('HUMAP1', 'HUMAP2')
rownames(U$fgraph) = colnames(U$fgraph) = Cells(scRNA_Mast)
scRNA_Mast[['humap']] = Seurat::CreateDimReducObject(
    embeddings = U$embedding,
    assay = 'RNA', 
    key = 'HUMAP_', 
    global = TRUE
)
new_graph = Seurat::as.Graph(U$fgraph)
DefaultAssay(new_graph) = DefaultAssay(scRNA_Mast)
scRNA_Mast[['humap_fgraph']] = new_graph

In [None]:
temp = Embeddings(scRNA_Mast, 'humap') %>% as.data.frame()
temp$combined_cell_names = rownames(temp)
humap_embeddings = right_join(scRNA_Mast@meta.data %>% select(combined_cell_names, ClusterTop, ClusterFull, orig.ident), temp)
humap_embeddings %>% head()
humap_embeddings %>% tail()

In [None]:
dim(humap_embeddings)

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = orig.ident)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony UMAP - Mast cells') +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') 

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
constantPalette = rep('red', length(unique(humap_embeddings$orig.ident)))
names(constantPalette) = unique(humap_embeddings$orig.ident)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = orig.ident)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony UMAP - Mast cells') +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~orig.ident) +
gghighlight::gghighlight()

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony UMAP - Mast cells') +
ggthemes::scale_color_tableau('Tableau 20' , name = "") + 
guides(color = guide_legend(override.aes = list(size = 10, shape = 16, alpha = 1))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') 

In [None]:
options(repr.plot.width = 15, repr.plot.height = 15)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony UMAP - Mast cells') +
ggthemes::scale_color_tableau('Tableau 20' , name = "") + 
guides(color = guide_legend(override.aes = list(size = 10, shape = 16, alpha = 1))) +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'top') +
facet_wrap(~ClusterFull) +
gghighlight::gghighlight()

In [None]:
scRNA_Mast@meta.data$ClusterFull %>% unique()

## Cluster at a few resolutions to remove non Mast cells

In [None]:
set.seed(1)
resolutions_test = seq(0.1, 1.5, by = 0.1)
resolutions_test
scRNA_Mast = Seurat::FindClusters(scRNA_Mast, 
                                    graph.name = 'humap_fgraph', 
                                    resolution = resolutions_test, 
                                    verbose = TRUE)
scRNA_Mast

### stash the results that you used to filter out non Mast cells

In [None]:
scRNA_Mast@meta.data$Louvain_for_filtering = droplevels(scRNA_Mast@meta.data$humap_fgraph_res.0.1)
scRNA_Mast@meta.data$Louvain_for_filtering 
scRNA_Mast

## DE with wilcox to figure out which of the ~30 clusters are non Mast cells

In [None]:
Idents(scRNA_Mast) = 'Louvain_for_filtering' 
require(presto)
top_markers(wilcoxauc(scRNA_Mast),
                      n = Inf,
                      padj = 0.05,
                      auc_min = 0.6)

In [None]:
humap_embeddings = Embeddings(scRNA_Mast, 'humap') %>% 
    as.data.frame() %>%
    mutate(combined_cell_names = rownames(.)) %>%
    left_join(., scRNA_Mast@meta.data %>% 
              select(combined_cell_names, Louvain_for_filtering, orig.ident))
rbind(humap_embeddings %>% head(), humap_embeddings %>% tail())

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
constantPalette = rep('red', length(unique(humap_embeddings$Louvain_for_filtering)))
names(constantPalette) = unique(humap_embeddings$Louvain_for_filtering)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = Louvain_for_filtering)) + 
geom_point(shape = '.', alpha = 1) + 
ggtitle('For filtering non Mast cells:\nOver-clustering of Mast cells UMAP') +
ggpubr::theme_pubr(base_size=20) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~Louvain_for_filtering) +
gghighlight::gghighlight()

In [None]:
readr::write_rds(scRNA_Mast, 'Mast_fineTyping_all_genes.rds')

## Redo scale/normalize/PCA/harmony/UMAP after removing the non-Mast cells

In [None]:
normFactor = scRNA_Mast@meta.data %>%
select(orig.ident, nCount_RNA) %>%
group_by(orig.ident) %>%
summarize(medianCounts = median(nCount_RNA)) 
normFactor = mean(normFactor$medianCounts) #median of medians
normFactor
scRNA_Mast = NormalizeData(scRNA_Mast, 
                          normalization.method = "LogNormalize", 
                          scale.factor = normFactor) %>%
    ScaleData(features = rownames(scRNA_Mast)) %>%
    singlecellmethods::RunBalancedPCA(weight.by='orig.ident', npcs=15)
scRNA_Mast

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5)
scRNA_Mast = scRNA_Mast %>% harmony::RunHarmony("orig.ident", 
    plot_convergence = TRUE, 
    lambda = NULL,
    max.iter = 10,
    early_stop = F
) 

In [None]:
U = uwot::umap(scRNA_Mast@reductions$harmony@cell.embeddings[, 1:15], 
               min_dist = 0.05, 
               spread = 0.30, 
               ret_extra = 'fgraph', 
               n_sgd_threads = nbrOfWorkers(), 
               fast_sgd = TRUE)
colnames(U$embedding) = c('HUMAP1', 'HUMAP2')
rownames(U$fgraph) = colnames(U$fgraph) = Cells(scRNA_Mast)
scRNA_Mast[['humap']] = Seurat::CreateDimReducObject(
    embeddings = U$embedding,
    assay = 'RNA', 
    key = 'HUMAP_', 
    global = TRUE
)
new_graph = Seurat::as.Graph(U$fgraph)
DefaultAssay(new_graph) = DefaultAssay(scRNA_Mast)
scRNA_Mast[['humap_fgraph']] = new_graph

In [None]:
humap_embeddings = Embeddings(scRNA_Mast, 'humap') %>% 
    as.data.frame() %>%
    mutate(combined_cell_names = rownames(.)) %>%
    left_join(., scRNA_Mast@meta.data %>% 
              select(combined_cell_names, ClusterFull, orig.ident))
rbind(humap_embeddings %>% head(), humap_embeddings %>% tail())

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
constantPalette = rep('red', length(unique(humap_embeddings$orig.ident)))
names(constantPalette) = unique(humap_embeddings$orig.ident)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = orig.ident)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony, post-filter (step 1) UMAP - Mast cells') +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~orig.ident) +
gghighlight::gghighlight()

In [None]:
options(repr.plot.width = 10, repr.plot.height = 7)
constantPalette = rep('red', length(unique(humap_embeddings$ClusterFull)))
names(constantPalette) = unique(humap_embeddings$ClusterFull)
ggplot(humap_embeddings, aes(x = HUMAP_1, y = HUMAP_2, color = ClusterFull)) + 
geom_point(shape = '.', alpha = 0.5) + 
ggtitle('Post-harmony, post-filter (step 1) UMAP - Mast cells') +
ggpubr::theme_pubr(base_size=10) +
theme(legend.position = 'none') +
scale_color_manual(values = constantPalette) +
facet_wrap(~ClusterFull) +
gghighlight::gghighlight()

## stash clusters used for mapping fine cell types to clean louvain clusters

In [None]:
scRNA_Mast@meta.data$cleaned_fine_types = droplevels(as.factor(scRNA_Mast@meta.data$ClusterFull))
scRNA_Mast@meta.data$cleaned_fine_types 

## Cache

### Mast cells with all genes

In [None]:
readr::write_rds(scRNA_Mast, 'Mast_fineTyping_all_genes.rds')

### Mast cells with selected genes

In [None]:
readr::write_rds(scRNA_Mast, 'Mast_fineTyping_selected_genes.rds')